jenkins-bot has submitted this change and it was merged.
Change subject: timestripper: match long names first for months ......................................................................
timestripper: match long names first for months
Month names sorted in length to match 'long' names before 'short' ones.
Change-Id: I62bc3099b97bbc1e3a04e7da1fc5240f175f356e --- M pywikibot/textlib.py M tests/timestripper_tests.py 2 files changed, 9 insertions(+), 7 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 03c2d0a..d7f1f87 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -1196,7 +1196,9 @@ # removed; will be handled as digits in regex, adding d+{1,2}.? escaped_months = [_ for _ in self.origNames2monthNum if not _.strip('.').isdigit()] - escaped_months = [re.escape(_) for _ in escaped_months] + # match longest names first. + escaped_months = [re.escape(_) for + _ in sorted(escaped_months, reverse=True)]
# work around for cs wiki: if month are in digits, we assume # that format is dd. mm. (with dot and spaces optional) diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py index 9d12b04..c66e69f 100644 --- a/tests/timestripper_tests.py +++ b/tests/timestripper_tests.py @@ -52,23 +52,23 @@ None) )
- txtWithOneMatch = u'this string has XXX, YYY and fév in it' - txtWithTwoMatch = u'this string has XXX, mars and fév in it' - txtWithThreeMatch = u'this string has avr, mars and fév in it' + txtWithOneMatch = u'this string has XXX, YYY and février in it' + txtWithTwoMatch = u'this string has XXX, mars and février in it' + txtWithThreeMatch = u'this string has avr, mars and février in it' txtWithNoMatch = u'this string has no match' pat = self.ts.pmonthR
self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat), (u'this string has XXX, YYY and @@ in it', - {'month': u'fév'}) + {'month': u'février'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat), (u'this string has XXX, @@ and @@ in it', - {'month': u'fév'}) + {'month': u'février'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, pat), (u'this string has @@, @@ and @@ in it', - {'month': u'fév'}) + {'month': u'février'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat), (txtWithNoMatch,
pywikibot-commits@lists.wikimedia.org