jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/508473 )
Change subject: textlib: avoid infinite execution of regex ......................................................................
textlib: avoid infinite execution of regex
Bug: T222671 Change-Id: Iae491922d29c458f28810f4da23e4be254dd8bc5 --- M pywikibot/textlib.py M tests/textlib_tests.py 2 files changed, 17 insertions(+), 1 deletion(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 236be29..b0ee716 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -913,7 +913,7 @@ langlink_pattern = interwiki_regex.pattern.replace(':?', '') last_section_content = sections[-1].content if sections else header footer = re.search( - r'(%s)*\Z' % r'|'.join((langlink_pattern, cat_regex.pattern, r'\s+')), + r'(%s)*\Z' % r'|'.join((langlink_pattern, cat_regex.pattern, r'\s')), last_section_content).group().lstrip() if footer: if sections: diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 990e937..6ae076d 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -1768,6 +1768,22 @@ '') )
+ def test_long_comment(self): + r"""Test for text having a long expanse of white space. + + This is to catch certain regex issues caused by patterns like + r'(\s+)*$' (as found in older versions of extract_section). + They may not halt. + + c.f. + https://www.regular-expressions.info/catastrophic.html + """ + text = '<!-- -->' + self.assertEqual( + extract_sections(text, self.site), + (text, [], '') + ) +
if __name__ == '__main__': # pragma: no cover try:
pywikibot-commits@lists.wikimedia.org