Revision: 5258
Author: wikipedian
Date: 2008-04-23 12:46:19 +0000 (Wed, 23 Apr 2008)
Log Message:
-----------
Bugfix: }} and | were regarded as part of the link when there is a URL in a nested
template. See
for example:
http://de.wikipedia.org/w/index.php?title=Benutzer:Head/Spielwiese&acti…
Modified Paths:
--------------
trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/weblinkchecker.py
===================================================================
--- trunk/pywikipedia/weblinkchecker.py 2008-04-23 08:51:40 UTC (rev 5257)
+++ trunk/pywikipedia/weblinkchecker.py 2008-04-23 12:46:19 UTC (rev 5258)
@@ -203,7 +203,13 @@
# MediaWiki parses templates before parsing external links. Thus, there
# might be a | or a } directly after a URL which does not belong to
# the URL itself.
- # Blow up templates with spaces to avoid these problems.
+
+ # First, remove the curly braces of inner templates:
+ nestedTemplateR = re.compile(r'{{([^}]*?){{(.*?)}}(.*?)}}')
+ while nestedTemplateR.search(text):
+ text = nestedTemplateR.sub(r'{{\1 \2 \3}}', text)
+
+ # Then blow up the templates with spaces so that the | and }} will not be regarded as
part of the link:.
templateWithParamsR = re.compile(r'{{([^}]*?[^ ])\|([^ ][^}]*?)}}',
re.DOTALL)
while templateWithParamsR.search(text):
text = templateWithParamsR.sub(r'{{ \1 | \2 }}', text)