[Pywikipedia-l] SVN: [5950] trunk/pywikipedia/checkimages.py
filnik at svn.wikimedia.org
filnik at svn.wikimedia.org
Fri Oct 10 14:33:40 UTC 2008
Revision: 5950
Author: filnik
Date: 2008-10-10 14:33:40 +0000 (Fri, 10 Oct 2008)
Log Message:
-----------
Fixing again smartdetection, commons testing phase successful, let's see if there's anything else to add..
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2008-10-10 14:14:04 UTC (rev 5949)
+++ trunk/pywikipedia/checkimages.py 2008-10-10 14:33:40 UTC (rev 5950)
@@ -342,7 +342,7 @@
'ta':[u'information'],
'zh':[u'information'],
}
-
+# A page where there's a list of template to skip.
PageWithHiddenTemplates = {
'commons': u'User:Filbot/White_templates#White_templates',
'en':None,
@@ -350,6 +350,14 @@
'ko': u'User:Kwjbot_IV/whitetemplates/list',
}
+# A page where there's a list of template to consider as licenses.
+PageWithAllowedTemplates = {
+ 'commons': u'User:Filbot/Allowed templates',
+ 'en':None,
+ 'it':u'Progetto:Coordinamento/Immagini/Bot/AllowedTemplates',
+ 'ko': u'User:Kwjbot_IV/whitetemplates/list',
+ }
+
# Template added when the bot finds only an hidden template and nothing else.
# Note: every __botnick__ will be repleaced with your bot's nickname (feel free not to use if you don't need it)
HiddenTemplateNotification = {
@@ -497,6 +505,7 @@
self.com = wikipedia.translate(self.site, comm10)
self.hiddentemplate = wikipedia.translate(self.site, HiddenTemplate)
self.pageHidden = wikipedia.translate(self.site, PageWithHiddenTemplates)
+ self.pageAllowed = wikipedia.translate(self.site, PageWithAllowedTemplates)
# Commento = Summary in italian
self.commento = wikipedia.translate(self.site, comm)
# Adding the bot's nickname at the notification text if needed.
@@ -992,6 +1001,20 @@
gen = pagegenerators.CategorizedPageGenerator(cat)
pages = [page for page in gen]
list_licenses.extend(pages)
+
+ # Add the licenses set in the default page as licenses
+ # to check
+ if self.pageAllowed != None:
+ try:
+ pageAllowedText = wikipedia.Page(self.site, self.pageAllowed).get()
+ except (wikipedia.NoPage, wikipedia.IsRedirectPage):
+ pageAllowedText = ''
+ for nameLicense in self.load(pageAllowedText):
+ if not 'template:' in nameLicense.lower():
+ nameLicense = 'Template:%s' % nameLicense
+ pageLicense = wikipedia.Page(self.site, nameLicense)
+ if pageLicense not in list_licenses:
+ list_licenses.append(pageLicense) # the list has wiki-pages
return list_licenses
def smartDetection(self, image_text):
@@ -1000,6 +1023,7 @@
regex_find_licenses = re.compile(r'\{\{(?:[Tt]emplate:|)(.*?)(?:[|\n].*?|)\}\}', re.DOTALL)
licenses_found = regex_find_licenses.findall(image_text)
second_round = False
+
exit_cicle = False # howTo exit from both the for and the while cicle
while 1:
if exit_cicle: # howTo exit from the while
@@ -1033,6 +1057,8 @@
seems_ok = False # Empty template (maybe deleted while the script's running)
exit_cicle = True
break
+ regex_noinclude = re.compile(r'<noinclude>(.*?)</noinclude>', re.DOTALL)
+ template_text = regex_noinclude.sub('', template_text)
if second_round == False:
licenses_found = regex_find_licenses.findall(template_text)
second_round = True
More information about the Pywikipedia-l
mailing list