Revision: 8022 Author: xqt Date: 2010-03-18 06:46:50 +0000 (Thu, 18 Mar 2010)
Log Message: ----------- patch #2972270 for bug #2970428 (not striping | following link). Thanks masti.
Modified Paths: -------------- branches/rewrite/pywikibot/textlib.py trunk/pywikipedia/pywikibot/textlib.py
Modified: branches/rewrite/pywikibot/textlib.py =================================================================== --- branches/rewrite/pywikibot/textlib.py 2010-03-17 17:55:22 UTC (rev 8021) +++ branches/rewrite/pywikibot/textlib.py 2010-03-18 06:46:50 UTC (rev 8022) @@ -7,7 +7,7 @@
""" # -# (C) Pywikipedia bot team, 2008 +# (C) Pywikipedia bot team, 2008-2010 # # Distributed under the terms of the MIT license. # @@ -92,7 +92,8 @@ # this matches internal wikilinks, but also interwiki, categories, and # images. 'link': re.compile(r'[[[^]|]*(|[^]]*)?]]'), - 'interwiki': re.compile(r'(?i)[[(%s)\s?:[^]]*]][\s]*' + # also finds links to foreign sites with preleading ":" + 'interwiki': re.compile(r'(?i)[[:?(%s)\s?:[^]]*]][\s]*' % '|'.join(site.validLanguageLinks() + site.family.obsolete.keys()) ), @@ -678,7 +679,7 @@ # Note: While allowing parenthesis inside URLs, MediaWiki will regard # right parenthesis at the end of the URL as not part of that URL. # The same applies to dot, comma, colon and some other characters. - notAtEnd = ']\s).:;,<>"' + notAtEnd = ']\s).:;,<>"|' # So characters inside the URL can be anything except whitespace, # closing squared brackets, quotation marks, greater than and less # than, and the last character also can't be parenthesis or another
Modified: trunk/pywikipedia/pywikibot/textlib.py =================================================================== --- trunk/pywikipedia/pywikibot/textlib.py 2010-03-17 17:55:22 UTC (rev 8021) +++ trunk/pywikipedia/pywikibot/textlib.py 2010-03-18 06:46:50 UTC (rev 8022) @@ -680,7 +680,7 @@ # Note: While allowing parenthesis inside URLs, MediaWiki will regard # right parenthesis at the end of the URL as not part of that URL. # The same applies to dot, comma, colon and some other characters. - notAtEnd = ']\s).:;,<>"' + notAtEnd = ']\s).:;,<>"|' # So characters inside the URL can be anything except whitespace, # closing squared brackets, quotation marks, greater than and less # than, and the last character also can't be parenthesis or another