Revision: 5258 Author: wikipedian Date: 2008-04-23 12:46:19 +0000 (Wed, 23 Apr 2008)
Log Message: ----------- Bugfix: }} and | were regarded as part of the link when there is a URL in a nested template. See for example: http://de.wikipedia.org/w/index.php?title=Benutzer:Head/Spielwiese&actio...
Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2008-04-23 08:51:40 UTC (rev 5257) +++ trunk/pywikipedia/weblinkchecker.py 2008-04-23 12:46:19 UTC (rev 5258) @@ -203,7 +203,13 @@ # MediaWiki parses templates before parsing external links. Thus, there # might be a | or a } directly after a URL which does not belong to # the URL itself. - # Blow up templates with spaces to avoid these problems. + + # First, remove the curly braces of inner templates: + nestedTemplateR = re.compile(r'{{([^}]*?){{(.*?)}}(.*?)}}') + while nestedTemplateR.search(text): + text = nestedTemplateR.sub(r'{{\1 \2 \3}}', text) + + # Then blow up the templates with spaces so that the | and }} will not be regarded as part of the link:. templateWithParamsR = re.compile(r'{{([^}]*?[^ ])|([^ ][^}]*?)}}', re.DOTALL) while templateWithParamsR.search(text): text = templateWithParamsR.sub(r'{{ \1 | \2 }}', text)
pywikipedia-l@lists.wikimedia.org