jenkins-bot has submitted this change and it was merged.
Change subject: Fix bug 54568
......................................................................
Fix bug 54568
I changed the regex to a more complex thing to handle
the problem of catching ")" at the end of URL if it's
used in bracket
I tested it on several different texts and it was okay
Change-Id: I6f3addcaf93d4d7499e3ec169255f284ab70a526
---
M pywikibot/textlib.py
M weblinkchecker.py
2 files changed, 8 insertions(+), 11 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index bb6fb9a..41c61ab 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -871,18 +871,18 @@
# not allowed inside links. For example, in this wiki text:
# ''Please see
http://www.example.org.''
# .'' shouldn't be considered as part of the link.
- regex = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \
+ regex = r'https?://[^%(notInside)s]*?[^%(notAtEnd)s]' \
r'(?=[%(notAtEnd)s]*\'\')|http[s]?://[^%(notInside)s]*' \
- r'[^%(notAtEnd)s])' % {'notInside': notInside,
'notAtEnd': notAtEnd}
- regexb = r'(?P<urlb>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \
+ r'[^%(notAtEnd)s]' % {'notInside': notInside,
'notAtEnd': notAtEnd}
+ regexb = r'https?://[^%(notInside)s]*?[^%(notAtEnd)s]' \
r'(?=[%(notAtEnd)s]*\'\')|http[s]?://[^%(notInside)s]*' \
- r'[^%(notAtEnd)s])' % {'notInside': notInside,
'notAtEnd': notAtEndb}
+ r'[^%(notAtEnd)s]' % {'notInside': notInside,
'notAtEnd': notAtEndb}
if withoutBracketed:
- regex = r'(?<!\[)' + regex
+ regex = r'(?<!\[)(?P<url>%s)' % regex
elif onlyBracketed:
- regex = r'\[' + regexb
+ regex = r'\[(?P<url>%s)' % regexb
else:
- regex=r'(?:(?<!\[)'+ regex+r'|\['+regexb+')'
+ regex = r'(?P<url>(?<!\[)%s|\[%s)' % (regex, regexb)
linkR = re.compile(regex)
return linkR
diff --git a/weblinkchecker.py b/weblinkchecker.py
index eafd7ed..ca41986 100644
--- a/weblinkchecker.py
+++ b/weblinkchecker.py
@@ -175,10 +175,7 @@
text = pywikibot.removeDisabledParts(text)
linkR = pywikibot.compileLinkR(withoutBracketed, onlyBracketed)
for m in linkR.finditer(text):
- if m.group('url'):
- yield m.group('url')
- else:
- yield m.group('urlb')
+ yield m.group('url')
class XmlDumpPageGenerator:
--
To view, visit
https://gerrit.wikimedia.org/r/119964
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I6f3addcaf93d4d7499e3ec169255f284ab70a526
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>