[Gerrit] Fix bug 54568 - change (pywikibot/compat) - Pywikibot-commits

9 Jun 2014

jenkins-bot has submitted this change and it was merged.
Change subject: Fix bug 54568
......................................................................
Fix bug 54568
I changed the regex to a more complex thing to handle
the problem of catching ")" at the end of URL if it's
used in bracket
I tested it on several different texts and it was okay
Change-Id: I6f3addcaf93d4d7499e3ec169255f284ab70a526
---
M pywikibot/textlib.py
M weblinkchecker.py
2 files changed, 8 insertions(+), 11 deletions(-)
Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index bb6fb9a..41c61ab 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -871,18 +871,18 @@
     # not allowed inside links. For example, in this wiki text:
     #       ''Please see http://www.example.org.''
     # .'' shouldn't be considered as part of the link.
-    regex = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \
+    regex = r'https?://[^%(notInside)s]*?[^%(notAtEnd)s]' \
             r'(?=[%(notAtEnd)s]*'')|http[s]?://[^%(notInside)s]*' \
-            r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEnd}
-    regexb = r'(?P<urlb>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \
+            r'[^%(notAtEnd)s]' % {'notInside': notInside, 'notAtEnd': notAtEnd}
+    regexb = r'https?://[^%(notInside)s]*?[^%(notAtEnd)s]' \
              r'(?=[%(notAtEnd)s]*'')|http[s]?://[^%(notInside)s]*' \
-             r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEndb}
+             r'[^%(notAtEnd)s]' % {'notInside': notInside, 'notAtEnd': notAtEndb}
     if withoutBracketed:
-        regex = r'(?<![)' + regex
+        regex = r'(?<![)(?P<url>%s)' % regex
     elif onlyBracketed:
-        regex = r'[' + regexb
+        regex = r'[(?P<url>%s)' % regexb
     else:
-        regex=r'(?:(?<![)'+ regex+r'|['+regexb+')'
+        regex = r'(?P<url>(?<![)%s|[%s)' % (regex, regexb)
     linkR = re.compile(regex)
     return linkR
diff --git a/weblinkchecker.py b/weblinkchecker.py
index eafd7ed..ca41986 100644
--- a/weblinkchecker.py
+++ b/weblinkchecker.py
@@ -175,10 +175,7 @@
     text = pywikibot.removeDisabledParts(text)
     linkR = pywikibot.compileLinkR(withoutBracketed, onlyBracketed)
     for m in linkR.finditer(text):
-        if m.group('url'):
-            yield m.group('url')
-        else:
-            yield m.group('urlb')
+        yield m.group('url')
class XmlDumpPageGenerator:
-- 
To view, visit https://gerrit.wikimedia.org/r/119964
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I6f3addcaf93d4d7499e3ec169255f284ab70a526
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup ladsgroup@gmail.com
Gerrit-Reviewer: Merlijn van Deen valhallasw@arctus.nl
Gerrit-Reviewer: Ricordisamoa ricordisamoa@openmailbox.org
Gerrit-Reviewer: Xqt info@gno.de
Gerrit-Reviewer: jenkins-bot <>