jenkins-bot submitted this change.
[bugfix] timestripper should skip HTML elements
Remove HTML elements before searching for timestamp in text.
Also fix isort check.
Bug: T302496
Change-Id: Iad70c4dd803fd40aac6f8d100c80512a876ea724
---
M pywikibot/textlib.py
M tests/timestripper_tests.py
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index cc94eaf..d6eab05 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -25,7 +25,7 @@
from pywikibot.exceptions import InvalidTitleError, SiteDefinitionError
from pywikibot.family import Family
from pywikibot.time import TZoneFixedOffset
-from pywikibot.tools import deprecated, ModuleDeprecationWrapper
+from pywikibot.tools import ModuleDeprecationWrapper, deprecated
from pywikibot.userinterfaces.transliteration import NON_LATIN_DIGITS
@@ -2016,6 +2016,7 @@
# Remove parts that are not supposed to contain the timestamp, in order
# to reduce false positives.
line = removeDisabledParts(line)
+ line = removeHTMLParts(line)
line = to_latin_digits(line)
for pat in self.patterns:
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index b178bac..4929296 100755
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -377,6 +377,16 @@
txt_match = self.date[:9] + '[[foo]]' + self.date[9:]
self.assertEqual(ts(txt_match), self.expected_date)
+ def test_timestripper_skip_html(self):
+ """Test dates in html are correctly skipped."""
+ ts = self.ts.timestripper
+
+ txt_match = '<div ' + self.fake_date + '>'
+ self.assertIsNone(ts(txt_match))
+
+ txt_match = self.date + '<div ' + self.fake_date + '>'
+ self.assertEqual(ts(txt_match), self.expected_date)
+
class TestTimeStripperDoNotArchiveUntil(TestTimeStripperCase):
To view, visit change 821799. To unsubscribe, or for help writing mail filters, visit settings.