jenkins-bot has submitted this change and it was merged.
Change subject: Bug 69551-add Vietnamese to timestripper_tests.py plus bug fixing
......................................................................
Bug 69551-add Vietnamese to timestripper_tests.py plus bug fixing
Added Vietnamese language to timestripper_tests.py.
Fixed also a bug in TimeStripper.last_match_and_replace() found
debugging Vietnamese.
Month was not replaced correctly.
Inserted proper logic to handle when months are expressed as digits or
in words.
Updated timestripper_tests.py to cover the logic in
last_match_and_replace(), using sites with the two formats.
Change-Id: I148663b7c694c499c194e993526ea8f928e4c174
---
M pywikibot/textlib.py
M tests/timestripper_tests.py
2 files changed, 130 insertions(+), 13 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index c6997a1..03c2d0a 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1201,9 +1201,11 @@
# work around for cs wiki: if month are in digits, we assume
# that format is dd. mm. (with dot and spaces optional)
if any(_.isdigit() for _ in self.origNames2monthNum):
+ self.is_digit_month = True
monthR = r'(?P<month>(%s)|\d{1,2}\.?)' %
u'|'.join(escaped_months)
dayR = r'(?P<day>(3[01]|[12]\d|0?[1-9]))\.?\s*[01]?\d\.?'
else:
+ self.is_digit_month = False
monthR = r'(?P<month>(%s))' %
u'|'.join(escaped_months)
dayR = r'(?P<day>(3[01]|[12]\d|0?[1-9]))\.?'
@@ -1244,8 +1246,11 @@
# replace all matches but the last two
# (i.e. allow to search for dd. mm.)
if pat == self.pmonthR:
- if cnt > 2:
- txt = pat.sub(marker, txt, cnt - 2)
+ if self.is_digit_month:
+ if cnt > 2:
+ txt = pat.sub(marker, txt, cnt - 2)
+ else:
+ txt = pat.sub(marker, txt)
else:
txt = pat.sub(marker, txt)
return (txt, m.groupdict())
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index 1b3f171..9d12b04 100644
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -16,13 +16,13 @@
from pywikibot.textlib import TimeStripper, tzoneFixedOffset
-class TestTimeStripper(PywikibotTestCase):
- """Test cases for Link objects"""
+class TestTimeStripperWithNoDigitsAsMonths(PywikibotTestCase):
+ """Test cases for TimeStripper methods"""
def setUp(self):
site = pywikibot.Site('fr', 'wikipedia')
self.ts = TimeStripper(site)
- super(TestTimeStripper, self).setUp()
+ super(TestTimeStripperWithNoDigitsAsMonths, self).setUp()
def test_findmarker(self):
"""Test that string which is not part of text is
found"""
@@ -32,15 +32,43 @@
'@@@@@@')
def test_last_match_and_replace(self):
- """Test that pattern matches the righmost item"""
+ """Test that pattern matches and removes items
correctly."""
- txtWithMatch = u'this string has one 1998, 1999 and 3000 in it'
+ txtWithOneMatch = u'this string has 3000, 1999 and 3000 in it'
+ txtWithTwoMatch = u'this string has 1998, 1999 and 3000 in it'
txtWithNoMatch = u'this string has no match'
pat = self.ts.pyearR
- self.assertEqual(self.ts.last_match_and_replace(txtWithMatch, pat),
- (u'this string has one @@, @@ and 3000 in it',
+ self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
+ (u'this string has 3000, @@ and 3000 in it',
{'year': u'1999'})
+ )
+ self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
+ (u'this string has @@, @@ and 3000 in it',
+ {'year': u'1999'})
+ )
+ self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
+ (txtWithNoMatch,
+ None)
+ )
+
+ txtWithOneMatch = u'this string has XXX, YYY and fév in it'
+ txtWithTwoMatch = u'this string has XXX, mars and fév in it'
+ txtWithThreeMatch = u'this string has avr, mars and fév in it'
+ txtWithNoMatch = u'this string has no match'
+ pat = self.ts.pmonthR
+
+ self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
+ (u'this string has XXX, YYY and @@ in it',
+ {'month': u'fév'})
+ )
+ self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
+ (u'this string has XXX, @@ and @@ in it',
+ {'month': u'fév'})
+ )
+ self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, pat),
+ (u'this string has @@, @@ and @@ in it',
+ {'month': u'fév'})
)
self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
(txtWithNoMatch,
@@ -62,8 +90,55 @@
self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+class TestTimeStripperWithDigitsAsMonths(PywikibotTestCase):
+ """Test cases for TimeStripper methods"""
+
+ def setUp(self):
+ site = pywikibot.Site('cs', 'wikipedia')
+ self.ts = TimeStripper(site)
+ super(TestTimeStripperWithDigitsAsMonths, self).setUp()
+
+ def test_last_match_and_replace(self):
+ """Test that pattern matches and removes items
correctly."""
+
+ txtWithOneMatch = u'this string has XX. YY. 12. in it'
+ txtWithTwoMatch = u'this string has XX. 1. 12. in it'
+ txtWithThreeMatch = u'this string has 1. 1. 12. in it'
+ txtWithNoMatch = u'this string has no match'
+ pat = self.ts.pmonthR
+
+ self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
+ (u'this string has XX. YY. 12. in it',
+ {'month': u'12.'})
+ )
+ self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
+ (u'this string has XX. 1. 12. in it',
+ {'month': u'12.'})
+ )
+ self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, pat),
+ (u'this string has @@ 1. 12. in it',
+ {'month': u'12.'})
+ )
+ self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
+ (txtWithNoMatch,
+ None)
+ )
+
+ def test_timestripper(self):
+ txtMatch = u'3. 2. 2010, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)'
+ txtNoMatch = u'3 March 2010 19:48 (UTC) 7 March 2010 19:48 (UTC)'
+
+ tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
+ self.ts.site.siteinfo['timezone'])
+
+ res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone)
+
+ self.assertEqual(self.ts.timestripper(txtMatch), res)
+ self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+
+
class TestEnglishTimeStripper(PywikibotTestCase):
- """Test cases for Link objects"""
+ """Test cases for English language"""
def setUp(self):
site = pywikibot.Site('en', 'wikipedia')
@@ -86,7 +161,7 @@
class TestCzechTimeStripper(PywikibotTestCase):
- """Test cases for Link objects"""
+ """Test cases for Czech language"""
def setUp(self):
site = pywikibot.Site('cs', 'wikipedia')
@@ -109,7 +184,7 @@
class TestPortugueseTimeStripper(PywikibotTestCase):
- """Test cases for Link objects"""
+ """Test cases for Portuguese language"""
def setUp(self):
site = pywikibot.Site('pt', 'wikipedia')
@@ -132,7 +207,7 @@
class TestNorwegianTimeStripper(PywikibotTestCase):
- """Test cases for Link objects"""
+ """Test cases for Norwegian language"""
def setUp(self):
site = pywikibot.Site('no', 'wikipedia')
@@ -154,6 +229,43 @@
self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+class TestVietnameseTimeStripper(PywikibotTestCase):
+ """Test cases for Vietnamese language"""
+
+ def setUp(self):
+ site = pywikibot.Site('vi', 'wikipedia')
+ self.ts = TimeStripper(site)
+ super(TestVietnameseTimeStripper, self).setUp()
+
+ def test_timestripper_01(self):
+ """Test that correct date is matched"""
+
+ txtMatch = u'16:41, ngày 15 tháng 9 năm 2008 (UTC) 16:41, ngày 12 tháng 9 năm
2008 (UTC)'
+ txtNoMatch = u'16:41, ngày 15 March 9 năm 2008 (UTC) 16:41, ngày 12 March 9
năm 2008 (UTC)'
+
+ tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
+ self.ts.site.siteinfo['timezone'])
+
+ res = datetime.datetime(2008, 9, 12, 16, 41, tzinfo=tzone)
+
+ self.assertEqual(self.ts.timestripper(txtMatch), res)
+ self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+
+ def test_timestripper_02(self):
+ """Test that correct date is matched"""
+
+ txtMatch = u'21:18, ngày 13 tháng 8 năm 2014 (UTC) 21:18, ngày 14 tháng 8 năm
2014 (UTC)'
+ txtNoMatch = u'21:18, ngày 13 March 8 năm 2014 (UTC) 21:18, ngày 14 March 8
năm 2014 (UTC)'
+
+ tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
+ self.ts.site.siteinfo['timezone'])
+
+ res = datetime.datetime(2014, 8, 14, 21, 18, tzinfo=tzone)
+
+ self.assertEqual(self.ts.timestripper(txtMatch), res)
+ self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+
+
if __name__ == '__main__':
try:
unittest.main()
--
To view, visit
https://gerrit.wikimedia.org/r/154209
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I148663b7c694c499c194e993526ea8f928e4c174
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>