Revision: 5950 Author: filnik Date: 2008-10-10 14:33:40 +0000 (Fri, 10 Oct 2008)
Log Message: ----------- Fixing again smartdetection, commons testing phase successful, let's see if there's anything else to add..
Modified Paths: -------------- trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2008-10-10 14:14:04 UTC (rev 5949) +++ trunk/pywikipedia/checkimages.py 2008-10-10 14:33:40 UTC (rev 5950) @@ -342,7 +342,7 @@ 'ta':[u'information'], 'zh':[u'information'], } - +# A page where there's a list of template to skip. PageWithHiddenTemplates = { 'commons': u'User:Filbot/White_templates#White_templates', 'en':None, @@ -350,6 +350,14 @@ 'ko': u'User:Kwjbot_IV/whitetemplates/list', }
+# A page where there's a list of template to consider as licenses. +PageWithAllowedTemplates = { + 'commons': u'User:Filbot/Allowed templates', + 'en':None, + 'it':u'Progetto:Coordinamento/Immagini/Bot/AllowedTemplates', + 'ko': u'User:Kwjbot_IV/whitetemplates/list', + } + # Template added when the bot finds only an hidden template and nothing else. # Note: every __botnick__ will be repleaced with your bot's nickname (feel free not to use if you don't need it) HiddenTemplateNotification = { @@ -497,6 +505,7 @@ self.com = wikipedia.translate(self.site, comm10) self.hiddentemplate = wikipedia.translate(self.site, HiddenTemplate) self.pageHidden = wikipedia.translate(self.site, PageWithHiddenTemplates) + self.pageAllowed = wikipedia.translate(self.site, PageWithAllowedTemplates) # Commento = Summary in italian self.commento = wikipedia.translate(self.site, comm) # Adding the bot's nickname at the notification text if needed. @@ -992,6 +1001,20 @@ gen = pagegenerators.CategorizedPageGenerator(cat) pages = [page for page in gen] list_licenses.extend(pages) + + # Add the licenses set in the default page as licenses + # to check + if self.pageAllowed != None: + try: + pageAllowedText = wikipedia.Page(self.site, self.pageAllowed).get() + except (wikipedia.NoPage, wikipedia.IsRedirectPage): + pageAllowedText = '' + for nameLicense in self.load(pageAllowedText): + if not 'template:' in nameLicense.lower(): + nameLicense = 'Template:%s' % nameLicense + pageLicense = wikipedia.Page(self.site, nameLicense) + if pageLicense not in list_licenses: + list_licenses.append(pageLicense) # the list has wiki-pages return list_licenses
def smartDetection(self, image_text): @@ -1000,6 +1023,7 @@ regex_find_licenses = re.compile(r'{{(?:[Tt]emplate:|)(.*?)(?:[|\n].*?|)}}', re.DOTALL) licenses_found = regex_find_licenses.findall(image_text) second_round = False + exit_cicle = False # howTo exit from both the for and the while cicle while 1: if exit_cicle: # howTo exit from the while @@ -1033,6 +1057,8 @@ seems_ok = False # Empty template (maybe deleted while the script's running) exit_cicle = True break + regex_noinclude = re.compile(r'<noinclude>(.*?)</noinclude>', re.DOTALL) + template_text = regex_noinclude.sub('', template_text) if second_round == False: licenses_found = regex_find_licenses.findall(template_text) second_round = True