jenkins-bot has submitted this change and it was merged.
Change subject: Bug 69551-add Vietnamese to timestripper_tests.py plus bug fixing ......................................................................
Bug 69551-add Vietnamese to timestripper_tests.py plus bug fixing
Added Vietnamese language to timestripper_tests.py.
Fixed also a bug in TimeStripper.last_match_and_replace() found debugging Vietnamese.
Month was not replaced correctly. Inserted proper logic to handle when months are expressed as digits or in words.
Updated timestripper_tests.py to cover the logic in last_match_and_replace(), using sites with the two formats.
Change-Id: I148663b7c694c499c194e993526ea8f928e4c174 --- M pywikibot/textlib.py M tests/timestripper_tests.py 2 files changed, 130 insertions(+), 13 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index c6997a1..03c2d0a 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -1201,9 +1201,11 @@ # work around for cs wiki: if month are in digits, we assume # that format is dd. mm. (with dot and spaces optional) if any(_.isdigit() for _ in self.origNames2monthNum): + self.is_digit_month = True monthR = r'(?P<month>(%s)|\d{1,2}.?)' % u'|'.join(escaped_months) dayR = r'(?P<day>(3[01]|[12]\d|0?[1-9])).?\s*[01]?\d.?' else: + self.is_digit_month = False monthR = r'(?P<month>(%s))' % u'|'.join(escaped_months) dayR = r'(?P<day>(3[01]|[12]\d|0?[1-9])).?'
@@ -1244,8 +1246,11 @@ # replace all matches but the last two # (i.e. allow to search for dd. mm.) if pat == self.pmonthR: - if cnt > 2: - txt = pat.sub(marker, txt, cnt - 2) + if self.is_digit_month: + if cnt > 2: + txt = pat.sub(marker, txt, cnt - 2) + else: + txt = pat.sub(marker, txt) else: txt = pat.sub(marker, txt) return (txt, m.groupdict()) diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py index 1b3f171..9d12b04 100644 --- a/tests/timestripper_tests.py +++ b/tests/timestripper_tests.py @@ -16,13 +16,13 @@ from pywikibot.textlib import TimeStripper, tzoneFixedOffset
-class TestTimeStripper(PywikibotTestCase): - """Test cases for Link objects""" +class TestTimeStripperWithNoDigitsAsMonths(PywikibotTestCase): + """Test cases for TimeStripper methods"""
def setUp(self): site = pywikibot.Site('fr', 'wikipedia') self.ts = TimeStripper(site) - super(TestTimeStripper, self).setUp() + super(TestTimeStripperWithNoDigitsAsMonths, self).setUp()
def test_findmarker(self): """Test that string which is not part of text is found""" @@ -32,15 +32,43 @@ '@@@@@@')
def test_last_match_and_replace(self): - """Test that pattern matches the righmost item""" + """Test that pattern matches and removes items correctly."""
- txtWithMatch = u'this string has one 1998, 1999 and 3000 in it' + txtWithOneMatch = u'this string has 3000, 1999 and 3000 in it' + txtWithTwoMatch = u'this string has 1998, 1999 and 3000 in it' txtWithNoMatch = u'this string has no match' pat = self.ts.pyearR
- self.assertEqual(self.ts.last_match_and_replace(txtWithMatch, pat), - (u'this string has one @@, @@ and 3000 in it', + self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat), + (u'this string has 3000, @@ and 3000 in it', {'year': u'1999'}) + ) + self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat), + (u'this string has @@, @@ and 3000 in it', + {'year': u'1999'}) + ) + self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat), + (txtWithNoMatch, + None) + ) + + txtWithOneMatch = u'this string has XXX, YYY and fév in it' + txtWithTwoMatch = u'this string has XXX, mars and fév in it' + txtWithThreeMatch = u'this string has avr, mars and fév in it' + txtWithNoMatch = u'this string has no match' + pat = self.ts.pmonthR + + self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat), + (u'this string has XXX, YYY and @@ in it', + {'month': u'fév'}) + ) + self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat), + (u'this string has XXX, @@ and @@ in it', + {'month': u'fév'}) + ) + self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, pat), + (u'this string has @@, @@ and @@ in it', + {'month': u'fév'}) ) self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat), (txtWithNoMatch, @@ -62,8 +90,55 @@ self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+class TestTimeStripperWithDigitsAsMonths(PywikibotTestCase): + """Test cases for TimeStripper methods""" + + def setUp(self): + site = pywikibot.Site('cs', 'wikipedia') + self.ts = TimeStripper(site) + super(TestTimeStripperWithDigitsAsMonths, self).setUp() + + def test_last_match_and_replace(self): + """Test that pattern matches and removes items correctly.""" + + txtWithOneMatch = u'this string has XX. YY. 12. in it' + txtWithTwoMatch = u'this string has XX. 1. 12. in it' + txtWithThreeMatch = u'this string has 1. 1. 12. in it' + txtWithNoMatch = u'this string has no match' + pat = self.ts.pmonthR + + self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat), + (u'this string has XX. YY. 12. in it', + {'month': u'12.'}) + ) + self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat), + (u'this string has XX. 1. 12. in it', + {'month': u'12.'}) + ) + self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, pat), + (u'this string has @@ 1. 12. in it', + {'month': u'12.'}) + ) + self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat), + (txtWithNoMatch, + None) + ) + + def test_timestripper(self): + txtMatch = u'3. 2. 2010, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)' + txtNoMatch = u'3 March 2010 19:48 (UTC) 7 March 2010 19:48 (UTC)' + + tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'], + self.ts.site.siteinfo['timezone']) + + res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone) + + self.assertEqual(self.ts.timestripper(txtMatch), res) + self.assertEqual(self.ts.timestripper(txtNoMatch), None) + + class TestEnglishTimeStripper(PywikibotTestCase): - """Test cases for Link objects""" + """Test cases for English language"""
def setUp(self): site = pywikibot.Site('en', 'wikipedia') @@ -86,7 +161,7 @@
class TestCzechTimeStripper(PywikibotTestCase): - """Test cases for Link objects""" + """Test cases for Czech language"""
def setUp(self): site = pywikibot.Site('cs', 'wikipedia') @@ -109,7 +184,7 @@
class TestPortugueseTimeStripper(PywikibotTestCase): - """Test cases for Link objects""" + """Test cases for Portuguese language"""
def setUp(self): site = pywikibot.Site('pt', 'wikipedia') @@ -132,7 +207,7 @@
class TestNorwegianTimeStripper(PywikibotTestCase): - """Test cases for Link objects""" + """Test cases for Norwegian language"""
def setUp(self): site = pywikibot.Site('no', 'wikipedia') @@ -154,6 +229,43 @@ self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+class TestVietnameseTimeStripper(PywikibotTestCase): + """Test cases for Vietnamese language""" + + def setUp(self): + site = pywikibot.Site('vi', 'wikipedia') + self.ts = TimeStripper(site) + super(TestVietnameseTimeStripper, self).setUp() + + def test_timestripper_01(self): + """Test that correct date is matched""" + + txtMatch = u'16:41, ngày 15 tháng 9 năm 2008 (UTC) 16:41, ngày 12 tháng 9 năm 2008 (UTC)' + txtNoMatch = u'16:41, ngày 15 March 9 năm 2008 (UTC) 16:41, ngày 12 March 9 năm 2008 (UTC)' + + tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'], + self.ts.site.siteinfo['timezone']) + + res = datetime.datetime(2008, 9, 12, 16, 41, tzinfo=tzone) + + self.assertEqual(self.ts.timestripper(txtMatch), res) + self.assertEqual(self.ts.timestripper(txtNoMatch), None) + + def test_timestripper_02(self): + """Test that correct date is matched""" + + txtMatch = u'21:18, ngày 13 tháng 8 năm 2014 (UTC) 21:18, ngày 14 tháng 8 năm 2014 (UTC)' + txtNoMatch = u'21:18, ngày 13 March 8 năm 2014 (UTC) 21:18, ngày 14 March 8 năm 2014 (UTC)' + + tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'], + self.ts.site.siteinfo['timezone']) + + res = datetime.datetime(2014, 8, 14, 21, 18, tzinfo=tzone) + + self.assertEqual(self.ts.timestripper(txtMatch), res) + self.assertEqual(self.ts.timestripper(txtNoMatch), None) + + if __name__ == '__main__': try: unittest.main()