Revision: 8022
Author: xqt
Date: 2010-03-18 06:46:50 +0000 (Thu, 18 Mar 2010)
Log Message:
-----------
patch #2972270 for bug #2970428 (not striping | following link). Thanks masti.
Modified Paths:
--------------
branches/rewrite/pywikibot/textlib.py
trunk/pywikipedia/pywikibot/textlib.py
Modified: branches/rewrite/pywikibot/textlib.py
===================================================================
--- branches/rewrite/pywikibot/textlib.py 2010-03-17 17:55:22 UTC (rev 8021)
+++ branches/rewrite/pywikibot/textlib.py 2010-03-18 06:46:50 UTC (rev 8022)
@@ -7,7 +7,7 @@
"""
#
-# (C) Pywikipedia bot team, 2008
+# (C) Pywikipedia bot team, 2008-2010
#
# Distributed under the terms of the MIT license.
#
@@ -92,7 +92,8 @@
# this matches internal wikilinks, but also interwiki, categories, and
# images.
'link': re.compile(r'\[\[[^\]\|]*(\|[^\]]*)?\]\]'),
- 'interwiki': re.compile(r'(?i)\[\[(%s)\s?:[^\]]*\]\][\s]*'
+ # also finds links to foreign sites with preleading ":"
+ 'interwiki': re.compile(r'(?i)\[\[:?(%s)\s?:[^\]]*\]\][\s]*'
% '|'.join(site.validLanguageLinks()
+ site.family.obsolete.keys())
),
@@ -678,7 +679,7 @@
# Note: While allowing parenthesis inside URLs, MediaWiki will regard
# right parenthesis at the end of the URL as not part of that URL.
# The same applies to dot, comma, colon and some other characters.
- notAtEnd = '\]\s\)\.:;,<>"'
+ notAtEnd = '\]\s\)\.:;,<>"\|'
# So characters inside the URL can be anything except whitespace,
# closing squared brackets, quotation marks, greater than and less
# than, and the last character also can't be parenthesis or another
Modified: trunk/pywikipedia/pywikibot/textlib.py
===================================================================
--- trunk/pywikipedia/pywikibot/textlib.py 2010-03-17 17:55:22 UTC (rev 8021)
+++ trunk/pywikipedia/pywikibot/textlib.py 2010-03-18 06:46:50 UTC (rev 8022)
@@ -680,7 +680,7 @@
# Note: While allowing parenthesis inside URLs, MediaWiki will regard
# right parenthesis at the end of the URL as not part of that URL.
# The same applies to dot, comma, colon and some other characters.
- notAtEnd = '\]\s\)\.:;,<>"'
+ notAtEnd = '\]\s\)\.:;,<>"\|'
# So characters inside the URL can be anything except whitespace,
# closing squared brackets, quotation marks, greater than and less
# than, and the last character also can't be parenthesis or another
Show replies by date