http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11440
Revision: 11440 Author: amir Date: 2013-04-23 16:02:18 +0000 (Tue, 23 Apr 2013) Log Message: ----------- fixing bug #3610818 in a way that doesn't cause bug #3455789. It's not very good coding though
Modified Paths: -------------- trunk/pywikipedia/pywikibot/textlib.py
Modified: trunk/pywikipedia/pywikibot/textlib.py =================================================================== --- trunk/pywikipedia/pywikibot/textlib.py 2013-04-23 15:20:48 UTC (rev 11439) +++ trunk/pywikipedia/pywikibot/textlib.py 2013-04-23 16:02:18 UTC (rev 11440) @@ -844,7 +844,9 @@ # Note: While allowing dots inside URLs, MediaWiki will regard # dots at the end of the URL as not part of that URL. # The same applies to comma, colon and some other characters. - notAtEnd = ']\s.:;,<>"|' + notAtEnd = ']\s.:;,<>"|)' + #This is specially set for brackted link + notAtEndb = ']\s.:;,<>"|' # So characters inside the URL can be anything except whitespace, # closing squared brackets, quotation marks, greater than and less # than, and the last character also can't be parenthesis or another @@ -857,11 +859,15 @@ regex = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ r'(?=[%(notAtEnd)s]*'')|http[s]?://[^%(notInside)s]*' \ r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEnd} - + regexb = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ + r'(?=[%(notAtEnd)s]*'')|http[s]?://[^%(notInside)s]*' \ + r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEndb} if withoutBracketed: regex = r'(?<![)' + regex elif onlyBracketed: - regex = r'[' + regex + regex = r'[' + regexb + else: + regex=r'(?:(?<![)'+ regex+r'|['+regexb=')' linkR = re.compile(regex) return linkR
pywikipedia-svn@lists.wikimedia.org