jenkins-bot has submitted this change and it was merged.
Change subject: timestripper: match long names first for months
......................................................................
timestripper: match long names first for months
Month names sorted in length to match 'long' names before 'short'
ones.
Change-Id: I62bc3099b97bbc1e3a04e7da1fc5240f175f356e
---
M pywikibot/textlib.py
M tests/timestripper_tests.py
2 files changed, 9 insertions(+), 7 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 03c2d0a..d7f1f87 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1196,7 +1196,9 @@
# removed; will be handled as digits in regex, adding d+{1,2}\.?
escaped_months = [_ for _ in self.origNames2monthNum if
not _.strip('.').isdigit()]
- escaped_months = [re.escape(_) for _ in escaped_months]
+ # match longest names first.
+ escaped_months = [re.escape(_) for
+ _ in sorted(escaped_months, reverse=True)]
# work around for cs wiki: if month are in digits, we assume
# that format is dd. mm. (with dot and spaces optional)
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index 9d12b04..c66e69f 100644
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -52,23 +52,23 @@
None)
)
- txtWithOneMatch = u'this string has XXX, YYY and fév in it'
- txtWithTwoMatch = u'this string has XXX, mars and fév in it'
- txtWithThreeMatch = u'this string has avr, mars and fév in it'
+ txtWithOneMatch = u'this string has XXX, YYY and février in it'
+ txtWithTwoMatch = u'this string has XXX, mars and février in it'
+ txtWithThreeMatch = u'this string has avr, mars and février in it'
txtWithNoMatch = u'this string has no match'
pat = self.ts.pmonthR
self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
(u'this string has XXX, YYY and @@ in it',
- {'month': u'fév'})
+ {'month': u'février'})
)
self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
(u'this string has XXX, @@ and @@ in it',
- {'month': u'fév'})
+ {'month': u'février'})
)
self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, pat),
(u'this string has @@, @@ and @@ in it',
- {'month': u'fév'})
+ {'month': u'février'})
)
self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
(txtWithNoMatch,
--
To view, visit
https://gerrit.wikimedia.org/r/154474
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I62bc3099b97bbc1e3a04e7da1fc5240f175f356e
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>