jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/458636 )
Change subject: textlib._tag_pattern: Do not mistake self-closing tags with start tag ......................................................................
textlib._tag_pattern: Do not mistake self-closing tags with start tag
Bug: T203568 Change-Id: I58ac107691b2c6866d2b7568983e8760f7e6683a --- M pywikibot/textlib.py M tests/textlib_tests.py 2 files changed, 14 insertions(+), 1 deletion(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index c9d416b..b58ae27 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -258,7 +258,11 @@
def _tag_pattern(tag_name): """Return a tag pattern for the given tag name.""" - return r'<{0}[ >][\s\S]*?</{0}\s*>'.format(_ignore_case(tag_name)) + return ( + r'<{0}(?:>|\s+[^>]*(?<!/)>)' # start tag + r'[\s\S]*?' # contents + r'</{0}\s*>' # end tag + .format(_ignore_case(tag_name)))
def _tag_regex(tag_name): diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 67db849..0e56cc8 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -1216,6 +1216,15 @@ self.assertEqual(textlib.replaceExcept('{{#invoke:x}}', 'x', 'y', ['invoke'], site=self.site), '{{#invoke:x}}') + self.assertEqual( + textlib.replaceExcept( + '<ref name=etwa /> not_in_ref <ref> in_ref </ref>', + 'not_in_ref', 'text', ['ref'], site=self.site), + '<ref name=etwa /> text <ref> in_ref </ref>') + self.assertEqual( + textlib.replaceExcept( + '<ab> content </a>', 'content', 'text', ['a'], site=self.site), + '<ab> text </a>')
def test_replace_with_count(self): """Test replacing with count argument."""
pywikibot-commits@lists.wikimedia.org