jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/508473 )
Change subject: textlib: avoid infinite execution of regex
......................................................................
textlib: avoid infinite execution of regex
Bug: T222671
Change-Id: Iae491922d29c458f28810f4da23e4be254dd8bc5
---
M pywikibot/textlib.py
M tests/textlib_tests.py
2 files changed, 17 insertions(+), 1 deletion(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 236be29..b0ee716 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -913,7 +913,7 @@
langlink_pattern = interwiki_regex.pattern.replace(':?', '')
last_section_content = sections[-1].content if sections else header
footer = re.search(
- r'(%s)*\Z' % r'|'.join((langlink_pattern, cat_regex.pattern,
r'\s+')),
+ r'(%s)*\Z' % r'|'.join((langlink_pattern, cat_regex.pattern,
r'\s')),
last_section_content).group().lstrip()
if footer:
if sections:
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index 990e937..6ae076d 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -1768,6 +1768,22 @@
'')
)
+ def test_long_comment(self):
+ r"""Test for text having a long expanse of white space.
+
+ This is to catch certain regex issues caused by patterns like
+ r'(\s+)*$' (as found in older versions of extract_section).
+ They may not halt.
+
+ c.f.
+
https://www.regular-expressions.info/catastrophic.html
+ """
+ text = '<!-- -->'
+ self.assertEqual(
+ extract_sections(text, self.site),
+ (text, [], '')
+ )
+
if __name__ == '__main__': # pragma: no cover
try:
--
To view, visit
https://gerrit.wikimedia.org/r/508473
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: Iae491922d29c458f28810f4da23e4be254dd8bc5
Gerrit-Change-Number: 508473
Gerrit-PatchSet: 1
Gerrit-Owner: Whym <whym(a)whym.org>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot (75)