jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] replaceExcept: Handle empty matches ......................................................................
[FIX] replaceExcept: Handle empty matches
When the regex can match the empty string, it will get into an infinite loop as soon as it reached the end of the string, as it will match the last 0 characters over and over.
This uses the same functionality from re.sub() that it only replaces if it isn't adjacent to a previous match.
Change-Id: I479d6d5f9ac756b84d93bfd1fd81a2948677787f --- M pywikibot/textlib.py M tests/textlib_tests.py 2 files changed, 15 insertions(+), 0 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved XZise: Looks good to me, but someone else must approve jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 462afa2..cfca7e0 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -253,6 +253,8 @@ index = 0 markerpos = len(text) while True: + if index > len(text): + break match = old.search(text, index) if not match: # nothing left to replace @@ -318,6 +320,9 @@ index = match.start() + 1 else: index = match.start() + len(replacement) + if not match.group(): + # When the regex allows to match nothing, shift by one character + index += 1 markerpos = match.start() + len(replacement) text = text[:markerpos] + marker + text[markerpos:]
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 3974fd3..5edb23f 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -374,6 +374,16 @@ r'A(\d)2(\d)B', r'A\1x\2B', [], site=self.site), 'A1x3B') + self.assertEqual( + textlib.replaceExcept('', r'(a?)', r'\1B', [], site=self.site), + 'B') + self.assertEqual( + textlib.replaceExcept('abc', r'x*', r'-', [], site=self.site), + '-a-b-c-') + # This is different from re.sub() as re.sub() doesn't allow None groups + self.assertEqual( + textlib.replaceExcept('', r'(a)?', r'\1\1', [], site=self.site), + '')
def test_case_sensitive(self): self.assertEqual(textlib.replaceExcept('AxB', 'x', 'y', [],