[Pywikipedia-l] SVN: [5957] trunk/pywikipedia/checkimages.py
filnik at svn.wikimedia.org
filnik at svn.wikimedia.org
Sat Oct 11 16:03:06 UTC 2008
Revision: 5957
Author: filnik
Date: 2008-10-11 16:03:06 +0000 (Sat, 11 Oct 2008)
Log Message:
-----------
Little bufix in the regex to detect the templates
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2008-10-11 14:07:54 UTC (rev 5956)
+++ trunk/pywikipedia/checkimages.py 2008-10-11 16:03:06 UTC (rev 5957)
@@ -1034,7 +1034,7 @@
def smartDetection(self, image_text):
seems_ok = False
license_found = None
- regex_find_licenses = re.compile(r'\{\{(?:[Tt]emplate:|)(.*?)(?:[|\n].*?|)\}\}', re.DOTALL)
+ regex_find_licenses = re.compile(r'\{\{(?:[Tt]emplate:|)(.*?)(?:[|\n<].*?|)\}\}', re.DOTALL)
licenses_found = regex_find_licenses.findall(image_text)
second_round = False
@@ -1399,7 +1399,7 @@
# If there are {{ use regex, otherwise no (if there's not the {{ may not be a template
# and the regex will be wrong)
if '{{' in i:
- regexP = re.compile('\{\{(?:template|)%s ?(?:\||\n|\}) ?' % i.split('{{')[1].replace(' ', '[ _]'), re.I)
+ regexP = re.compile('\{\{(?:template|)%s ?(?:\||\n|\}|<) ?' % i.split('{{')[1].replace(' ', '[ _]'), re.I)
result = regexP.findall(g)
if result != []:
tagged = True
@@ -1412,7 +1412,7 @@
for l in hiddentemplate:
if tagged == False:
# why creator? Because on commons there's a template such as {{creator:name}} that.. works
- res = re.findall(r'\{\{(?:[Tt]emplate:|)%s(?:[ \n]*?(?:\n|\||\}|creator:))' % l.lower(), g.lower())
+ res = re.findall(r'\{\{(?:[Tt]emplate:|)(?:%s(?:[ \n]*?(?:\n|\||\}|<)|creator:)' % l.lower(), g.lower())
if res != []:
white_template_found += 1
if l != '' and l != ' ': # Check that l is not nothing or a space
More information about the Pywikipedia-l
mailing list