jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] replaceExcept: Replace references iteratively ......................................................................
[FIX] replaceExcept: Replace references iteratively
Instead of searching for back references (\0 or \g<foo>) on the entire resulting string it just searches over the original replacement string. So when back references appear in the resulting string itself, they won't be found.
It also allows group numbers in \g back references to act the same as re.sub.
Bug: T99032 Change-Id: Ia4419804b83e67baada277e1493b58e4d2f73bf5 --- M pywikibot/textlib.py M tests/textlib_tests.py 2 files changed, 42 insertions(+), 12 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index cfca7e0..26b291f 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -296,23 +296,26 @@ # text = text[:match.start()] + replacement + text[match.end():]
# So we have to process the group references manually. - replacement = new + replacement = ''
- groupR = re.compile(r'\(?P<number>\d+)|\g<(?P<name>.+?)>') - while True: - groupMatch = groupR.search(replacement) - if not groupMatch: - break - groupID = (groupMatch.group('name') or - int(groupMatch.group('number'))) + group_regex = re.compile(r'\(\d+)|\g<(.+?)>') + last = 0 + for group_match in group_regex.finditer(new): + group_id = group_match.group(1) or group_match.group(2) try: - replacement = (replacement[:groupMatch.start()] + - (match.group(groupID) or '') + - replacement[groupMatch.end():]) + group_id = int(group_id) + except ValueError: + pass + try: + replacement += new[last:group_match.start()] + replacement += match.group(group_id) or '' except IndexError: - pywikibot.output('\nInvalid group reference: %s' % groupID) + pywikibot.output('\nInvalid group reference: %s' % group_id) pywikibot.output('Groups found:\n%s' % match.groups()) raise IndexError + last = group_match.end() + replacement += new[last:] + text = text[:match.start()] + replacement + text[match.end():]
# continue the search on the remaining text diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 5edb23f..468d3f2 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -384,6 +384,22 @@ self.assertEqual( textlib.replaceExcept('', r'(a)?', r'\1\1', [], site=self.site), '') + self.assertEqual( + textlib.replaceExcept('A123B', r'A(\d)2(\d)B', r'A\g<1>x\g<2>B', + [], site=self.site), + 'A1x3B') + self.assertEqual( + textlib.replaceExcept('A123B', r'A(?P<a>\d)2(?P<b>\d)B', + r'A\g<a>x\g<b>B', [], site=self.site), + 'A1x3B') + self.assertEqual( + textlib.replaceExcept('A123B', r'A(?P<a>\d)2(\d)B', + r'A\g<a>x\g<2>B', [], site=self.site), + 'A1x3B') + self.assertEqual( + textlib.replaceExcept('A123B', r'A(?P<a>\d)2(\d)B', + r'A\g<a>x\2B', [], site=self.site), + 'A1x3B')
def test_case_sensitive(self): self.assertEqual(textlib.replaceExcept('AxB', 'x', 'y', [], @@ -513,6 +529,17 @@ ['template'], site=self.site), template_sample)
+ def test_replace_source_reference(self): + """Test replacing in text which contains back references.""" + # Don't use a valid reference number in the original string, in case it + # tries to apply that as a reference. + self.assertEqual(textlib.replaceExcept(r'\42', r'^(.*)$', r'X\1X', + [], site=self.site), + r'X\42X') + self.assertEqual(textlib.replaceExcept(r'\g<bar>', r'^(?P<foo>.*)$', + r'X\g<foo>X', [], site=self.site), + r'X\g<bar>X') +
if __name__ == '__main__': try:
pywikibot-commits@lists.wikimedia.org