jenkins-bot merged this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
textlib._tag_pattern: Do not mistake self-closing tags with start tag

Bug: T203568
Change-Id: I58ac107691b2c6866d2b7568983e8760f7e6683a
---
M pywikibot/textlib.py
M tests/textlib_tests.py
2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index c9d416b..b58ae27 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -258,7 +258,11 @@

def _tag_pattern(tag_name):
"""Return a tag pattern for the given tag name."""
- return r'<{0}[ >][\s\S]*?</{0}\s*>'.format(_ignore_case(tag_name))
+ return (
+ r'<{0}(?:>|\s+[^>]*(?<!/)>)' # start tag
+ r'[\s\S]*?' # contents
+ r'</{0}\s*>' # end tag
+ .format(_ignore_case(tag_name)))


def _tag_regex(tag_name):
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index 67db849..0e56cc8 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -1216,6 +1216,15 @@
self.assertEqual(textlib.replaceExcept('{{#invoke:x}}', 'x', 'y',
['invoke'], site=self.site),
'{{#invoke:x}}')
+ self.assertEqual(
+ textlib.replaceExcept(
+ '<ref name=etwa /> not_in_ref <ref> in_ref </ref>',
+ 'not_in_ref', 'text', ['ref'], site=self.site),
+ '<ref name=etwa /> text <ref> in_ref </ref>')
+ self.assertEqual(
+ textlib.replaceExcept(
+ '<ab> content </a>', 'content', 'text', ['a'], site=self.site),
+ '<ab> text </a>')

def test_replace_with_count(self):
"""Test replacing with count argument."""

To view, visit change 458636. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I58ac107691b2c6866d2b7568983e8760f7e6683a
Gerrit-Change-Number: 458636
Gerrit-PatchSet: 3
Gerrit-Owner: Dalba <dalba.wiki@gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot (75)