jenkins-bot has submitted this change and it was merged.
Change subject: Fix bug 54568 ......................................................................
Fix bug 54568
I changed the regex to a more complex thing to handle the problem of catching ")" at the end of URL if it's used in bracket
I tested it on several different texts and it was okay
Change-Id: I6f3addcaf93d4d7499e3ec169255f284ab70a526 --- M pywikibot/textlib.py M weblinkchecker.py 2 files changed, 8 insertions(+), 11 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index bb6fb9a..41c61ab 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -871,18 +871,18 @@ # not allowed inside links. For example, in this wiki text: # ''Please see http://www.example.org.'' # .'' shouldn't be considered as part of the link. - regex = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ + regex = r'https?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ r'(?=[%(notAtEnd)s]*'')|http[s]?://[^%(notInside)s]*' \ - r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEnd} - regexb = r'(?P<urlb>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ + r'[^%(notAtEnd)s]' % {'notInside': notInside, 'notAtEnd': notAtEnd} + regexb = r'https?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ r'(?=[%(notAtEnd)s]*'')|http[s]?://[^%(notInside)s]*' \ - r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEndb} + r'[^%(notAtEnd)s]' % {'notInside': notInside, 'notAtEnd': notAtEndb} if withoutBracketed: - regex = r'(?<![)' + regex + regex = r'(?<![)(?P<url>%s)' % regex elif onlyBracketed: - regex = r'[' + regexb + regex = r'[(?P<url>%s)' % regexb else: - regex=r'(?:(?<![)'+ regex+r'|['+regexb+')' + regex = r'(?P<url>(?<![)%s|[%s)' % (regex, regexb) linkR = re.compile(regex) return linkR
diff --git a/weblinkchecker.py b/weblinkchecker.py index eafd7ed..ca41986 100644 --- a/weblinkchecker.py +++ b/weblinkchecker.py @@ -175,10 +175,7 @@ text = pywikibot.removeDisabledParts(text) linkR = pywikibot.compileLinkR(withoutBracketed, onlyBracketed) for m in linkR.finditer(text): - if m.group('url'): - yield m.group('url') - else: - yield m.group('urlb') + yield m.group('url')
class XmlDumpPageGenerator:
pywikibot-commits@lists.wikimedia.org