http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11339
Revision: 11339 Author: xqt Date: 2013-04-04 17:30:35 +0000 (Thu, 04 Apr 2013) Log Message: ----------- PEP8, some code improvements
Modified Paths: -------------- trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2013-04-04 14:41:33 UTC (rev 11338) +++ trunk/pywikipedia/checkimages.py 2013-04-04 17:30:35 UTC (rev 11339) @@ -2,7 +2,8 @@ # -*- coding: utf-8 -*- """ Script to check recently uploaded files. This script checks if a file -description is present and if there are other problems in the image's description. +description is present and if there are other problems in the image's +description.
This script will have to be configured for each language. Please submit translations as addition to the pywikipediabot framework. @@ -89,10 +90,17 @@ __version__ = '$Id$' #
-import re, time, urllib, urllib2, os, locale, sys, datetime +import re +import time +import datetime +import locale +import urllib import wikipedia as pywikibot import pagegenerators as pg -import config, catlib, query, userlib +import catlib +import config +import query +import userlib
locale.setlocale(locale.LC_ALL, '')
@@ -128,20 +136,22 @@ # '{{no license' --> '{{(?:template:|)no[ _]license ?(?:||\n|}) ?' (case # insensitive). # If there's not a {{ it will work as usual (if x in Text) -txt_find = { - 'commons': [u'{{no license', u'{{no license/en', u'{{nld', u'{{no permission', u'{{no permission since'], +txt_find = { + 'commons': [u'{{no license', u'{{no license/en', + u'{{nld', u'{{no permission', u'{{no permission since'], 'ar': [u'{{لت', u'{{لا ترخيص'], 'de': [u'{{DÜP', u'{{Düp', u'{{Dateiüberprüfung'], 'en': [u'{{nld', u'{{no license'], 'fa': [u'{{حق تکثیر تصویر نامعلوم'], 'ga': [u'{{Ceadúnas de dhíth', u'{{Ceadúnas de dhíth'], - 'hu': [u'{{nincsforrás',u'{{nincslicenc'], + 'hu': [u'{{nincsforrás', u'{{nincslicenc'], 'it': [u'{{unverdata', u'{{unverified'], - 'ja': [u'{{no source', u'{{unknown', u'{{non free', u'<!--削除についての議論が終了するまで',], + 'ja': [u'{{no source', u'{{unknown', + u'{{non free', u'<!--削除についての議論が終了するまで'], 'ta': [u'{{no source', u'{{nld', u'{{no license'], - 'ko': [u'{{출처 없음', u'{{라이선스 없음',u'{{Unknown',], - 'ur': [u'{{ناحوالہ', u'{{اجازہ نامعلوم',u'{{Di-no',], - 'zh': [u'{{no source', u'{{unknown', u'{{No license',], + 'ko': [u'{{출처 없음', u'{{라이선스 없음', u'{{Unknown'], + 'ur': [u'{{ناحوالہ', u'{{اجازہ نامعلوم', u'{{Di-no'], + 'zh': [u'{{no source', u'{{unknown', u'{{No license'], }
# Summary for when the will add the no source @@ -155,7 +165,7 @@ 'fa': u'ربات: حق تکثیر تصویر تازه بارگذاری شده نامعلوم است.', 'ga': u'Róbó: Ag márcáil comhad nua-uaslódáilte gan ceadúnas', 'hu': u'Robot: Frissen feltöltött licencsablon nélküli fájl megjelölése', - 'it':u"Bot: Aggiungo unverified", + 'it': u"Bot: Aggiungo unverified", 'ja': u'ロボットによる:著作権情報なしの画像をタグ', 'ko': u'로봇:라이선스 없음', 'ta': u'தானியங்கி:காப்புரிமை வழங்கப்படா படிமத்தை சுட்டுதல்', @@ -202,7 +212,7 @@ # if the file has an unknown extension it will be tagged with this template. # In reality, there aren't unknown extension, they are only not allowed... delete_immediately = { - 'commons':u"{{speedy|The file has .%s as extension. Is it ok? Please check.}}", + 'commons': u"{{speedy|The file has .%s as extension. Is it ok? Please check.}}", 'ar': u"{{شطب|الملف له .%s كامتداد.}}", 'en': u"{{db-meta|The file has .%s as extension.}}", 'fa': u"{{حذف سریع|تصویر %s اضافی است.}}", @@ -218,7 +228,7 @@
# The header of the Unknown extension's message. delete_immediately_head = { - 'commons':u"\n== Unknown extension! ==\n", + 'commons': u"\n== Unknown extension! ==\n", 'ar': u"\n== امتداد غير معروف! ==\n", 'en': u"\n== Unknown extension! ==\n", 'fa': u"\n==بارگذاری تصاویر موجود در انبار==\n", @@ -245,7 +255,7 @@ 'ko': u'[[:그림:%s]]의 파일 형식이 잘못되었습니다. 확인 바랍니다.--~~~~', 'ta': u'[[:படிமம்:%s]] இனங்காணப்படாத கோப்பு நீட்சியை கொண்டுள்ளது தயவு செய்து ஒரு முறை சரி பார்க்கவும் ~~~~', 'ur': u'ملف [[:File:%s]] کی توسیع شاید درست نہیں ہے، براہ کرم جانچ لیں۔ ~~~~', - 'zh' :u'您好,你上傳的[[:File:%s]]無法被識別,請檢查您的檔案,謝謝。--~~~~', + 'zh': u'您好,你上傳的[[:File:%s]]無法被識別,請檢查您的檔案,謝謝。--~~~~', }
# Summary of the delete immediately. @@ -278,14 +288,14 @@ 'hu': u"\n== Licenc nélküli kép ==\n", 'it': u"\n\n== File senza licenza ==\n", 'ur': u"\n== تصویر بدون اجازہ ==\n", - } +} # That's the text that the bot will add if it doesn't find the license. # Note: every __botnick__ will be repleaced with your bot's nickname (feel free not to use if you don't need it) nothing_notification = { 'commons': u"\n{{subst:User:Filnik/untagged|File:%s}}\n\n''This message was '''added automatically by " + \ - "__botnick__''', if you need some help about it, please read the text above again and follow the links in it," + \ - "if you still need help ask at the [[File:Human-help-browser.svg|18px|link=Commons:Help desk|?]] '''[[Commons:Help desk|->]]" + \ - "[[Commons:Help desk]]''' in any language you like to use.'' --__botnick__ ~~~~~""", + u"__botnick__''', if you need some help about it, please read the text above again and follow the links in it," + \ + u"if you still need help ask at the [[File:Human-help-browser.svg|18px|link=Commons:Help desk|?]] '''[[Commons:Help desk|->]]" + \ + u"[[Commons:Help desk]]''' in any language you like to use.'' --__botnick__ ~~~~~""", 'ar': u"{{subst:مصدر الصورة|File:%s}} --~~~~", 'en': u"{{subst:image source|File:%s}} --~~~~", 'fa': u"{{جا:اخطار نگاره|%s}}", @@ -358,7 +368,7 @@ 'ar': u"\n*[[:ملف:%s]] " + timeselected, 'de': u"\n*[[:Datei:%s]] " + timeselected, 'en': u"\n*[[:File:%s]] " + timeselected, - 'fa': u"n*[[:پرونده:%s]] "+ timeselected, + 'fa': u"n*[[:پرونده:%s]] " + timeselected, 'ga': u"\n*[[:File:%s]] " + timeselected, 'hu': u"\n*[[:Kép:%s]] " + timeselected, 'it': u"\n*[[:File:%s]] " + timeselected, @@ -397,7 +407,7 @@ # Warning 3: the part that use this regex is case-insensitive (just to let you # know..) HiddenTemplate = { - 'commons': [u'Template:Information'], # Put the other in the page on the project defined below + 'commons': [u'Template:Information'], # Put the other in the page on the project defined below 'ar': [u'Template:معلومات'], 'de': [u'Template:Information'], 'en': [u'Template:Information'], @@ -405,7 +415,9 @@ 'fr': [u'Template:Information'], 'ga': [u'Template:Information'], 'hu': [u'Template:Információ', u'Template:Enwiki', u'Template:Azonnali'], - 'it': [u'Template:EDP', u'Template:Informazioni file', u'Template:Information', u'Template:Trademark', u'Template:Permissionotrs'], # Put the other in the page on the project defined below + 'it': [u'Template:EDP', u'Template:Informazioni file', + u'Template:Information', u'Template:Trademark', + u'Template:Permissionotrs'], # Put the other in the page on the project defined below 'ja': [u'Template:Information'], 'ko': [u'Template:그림 정보'], 'ta': [u'Template:Information'], @@ -429,7 +441,8 @@ }
# Template added when the bot finds only an hidden template and nothing else. -# Note: every __botnick__ will be repleaced with your bot's nickname (feel free not to use if you don't need it) +# Note: every __botnick__ will be repleaced with your bot's nickname +# (feel free not to use if you don't need it) HiddenTemplateNotification = { 'commons': u"""\n{{subst:User:Filnik/whitetemplate|File:%s}}\n\n''This message was added automatically by __botnick__, if you need some help about it please read the text above again and follow the links in it, if you still need help ask at the [[File:Human-help-browser.svg|18px|link=Commons:Help desk|?]] '''[[Commons:Help desk|→]] [[Commons:Help desk]]''' in any language you like to use.'' --__botnick__ ~~~~~""", 'it': u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Template_insufficiente|%s|__botnick__}} --~~~~", @@ -454,7 +467,7 @@
# Message to put in the talk duplicates_user_talk_text = { - 'commons': u'{{subst:User:Filnik/duplicates|File:%s|File:%s}}', # FIXME: it doesn't exist + 'commons': u'{{subst:User:Filnik/duplicates|File:%s|File:%s}}', # FIXME: it doesn't exist 'it': u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Duplicati|%s|%s|__botnick__}} --~~~~", }
@@ -482,7 +495,8 @@ 'it': r'{{(?:[Tt]emplate:|)[Pp]rogetto:[Cc]oordinamento/Immagini/Bot/Template duplicati[|}]', }
-# Category with the licenses and / or with subcategories with the other licenses. +# Category with the licenses and / or with subcategories with the other +# licenses. category_with_licenses = { 'commons': 'Category:License tags', 'ar': 'تصنيف:قوالب حقوق الصور', @@ -510,7 +524,8 @@
# Seems that uploaderBots aren't interested to get messages regarding the # files that they upload.. strange, uh? -# Format: [[user,regex], [user,regex]...] the regex is needed to match the user where to send the warning-msg +# Format: [[user,regex], [user,regex]...] the regex is needed to match the user +# where to send the warning-msg uploadBots = { 'commons': [['File Upload Bot (Magnus Manske)', r'|[Ss]ource=Transferred from .*?; transferred to Commons by [[User:(.*?)]]']], @@ -561,7 +576,6 @@ pywikibot.output(u"%s%s" % (message, time_zone))
- class checkImagesBot(object): def __init__(self, site, logFulNumber=25000, sendemailActive=False, duplicatesReport=False, logFullError=True): @@ -674,7 +688,8 @@ luser = results[0] return luser else: - return upBotArray[0] # we can't find the user, report the problem to the bot + # we can't find the user, report the problem to the bot + return upBotArray[0]
def tag_image(self, put=True): """ Function to add the template in the image and to find out @@ -741,7 +756,7 @@ # wikipedia.py's version. try: testoattuale = self.talk_page.get() - history = self.talk_page.getLatestEditors(limit = 10) + history = self.talk_page.getLatestEditors(limit=10) latest_user = history[0]["user"] pywikibot.output( u'The latest user that has written something is: %s' @@ -761,8 +776,8 @@ testoattuale = self.talk_page.get() except pywikibot.NoPage: second_text = False - testoattuale = pywikibot.translate(self.site, empty, - fallback=False) + testoattuale = pywikibot.translate(self.site, empty, + fallback=False) except pywikibot.NoPage: pywikibot.output(u'The user page is blank') second_text = False @@ -808,12 +823,14 @@
URL = u'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?' if lang == 'commons': - link = URL + 'wikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=100&order=img_timestamp&format=html' + link = URL + \ + 'wikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=100&order=img_timestamp&format=html' else: - link = URL + 'wikilang=%s&wikifam=%s&order=img_timestamp&max=%s&ofs=0&max=%s' \ + link = URL + \ + 'wikilang=%s&wikifam=%s&order=img_timestamp&max=%s&ofs=0&max=%s' \ % (lang, project, limit, limit) - text = self.site.getUrl(link, no_hostname = True) - results = re.findall(r"""<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>""", + text = self.site.getUrl(link, no_hostname=True) + results = re.findall(r"<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>", text) if results: for result in results: @@ -830,7 +847,7 @@ results
""" - regex = re.compile(r'%s' % regexp, re.UNICODE|re.DOTALL) + regex = re.compile(r'%s' % regexp, re.UNICODE | re.DOTALL) results = regex.findall(textrun) for image in results: yield pywikibot.ImagePage(self.site, image) @@ -842,7 +859,6 @@ for langK in pywikibot.Family(u'wikipedia').langs.keys(): self.hiddentemplates.add(pywikibot.Page(self.site, u'Template:%s' % langK)) - # Hidden template loading if self.pageHidden: try: @@ -857,10 +873,6 @@
def returnOlderTime(self, listGiven, timeListGiven): """ Get some time and return the oldest of them """ - # print listGiven; print timeListGiven - # -- Output: -- - # [[1210596312.0, u'Autoritratto.png'], [1210590240.0, u'Duplicato.png'], [1210592052.0, u'Duplicato_2.png']] - # [1210596312.0, 1210590240.0, 1210592052.0] usage = False num = 0 num_older = None @@ -926,65 +938,53 @@ % re.escape(self.imageName) hash_found = self.image.getHash() if not hash_found: - return False # Image deleted, no hash found. Skip the image. - else: - commons_image_with_this_hash = commons_site.getFilesFromAnHash(hash_found) - if commons_image_with_this_hash and \ - commons_image_with_this_hash != 'None': - servTMP = pywikibot.translate(self.site, serviceTemplates, - fallback=False) - templatesInTheImage = self.image.getTemplates() - if servTMP != None: - for template in servTMP: - if pywikibot.Page(self.site, - template) in templatesInTheImage: - pywikibot.output( - u"%s is on commons but it's a service image." - % self.imageName) - return True # Problems? No, return True and continue with the check-part + return False # Image deleted, no hash found. Skip the image.
- pywikibot.output(u'%s is on commons!' % self.imageName) - on_commons_text = self.image.getImagePageHtml() - if u"<div class='sharedUploadNotice'>" in on_commons_text: - pywikibot.output( - u"But, the file doesn't exist on your project! Skip...") - # Problems? Yes! We have to skip the check part for that image - # Because it's on commons but someone has added something on your project. - return False + commons_image_with_this_hash = commons_site.getFilesFromAnHash(hash_found) + if commons_image_with_this_hash and \ + commons_image_with_this_hash is not 'None': + servTMP = pywikibot.translate(self.site, serviceTemplates, + fallback=False) + templatesInTheImage = self.image.getTemplates() + if servTMP is not None: + for template in servTMP: + if pywikibot.Page(self.site, + template) in templatesInTheImage: + pywikibot.output( + u"%s is on commons but it's a service image." + % self.imageName) + return True # continue with the check-part
- elif re.findall(r'\bstemma\b', - self.imageName.lower()) and \ - self.site.lang == 'it': - pywikibot.output( - u'%s has "stemma" inside, means that it's ok.' - % self.imageName) - return True # Problems? No, it's only not on commons but the image needs a check - - else: - # the second usually is a url or something like that. - # Compare the two in equal way, both url. - if self.convert_to_url(self.imageName) \ - == self.convert_to_url(commons_image_with_this_hash[0]): - repme = u"\n*[[:File:%s]] is also on '''Commons''': [[commons:File:%s]] (same name)" \ - % (self.imageName, - commons_image_with_this_hash[0]) - else: - repme = u"\n*[[:File:%s]] is also on '''Commons''': [[commons:File:%s]]" \ - % (self.imageName, - commons_image_with_this_hash[0]) - self.report_image(self.imageName, - self.rep_page, self.com, repme, - addings=False, regex=regexOnCommons) - return True + pywikibot.output(u'%s is on commons!' % self.imageName) + on_commons_text = self.image.getImagePageHtml() + if u"<div class='sharedUploadNotice'>" in on_commons_text: + pywikibot.output( + u"But, the file doesn't exist on your project! Skip...") + # We have to skip the check part for that image because + # it's on commons but someone has added something on your + # project. + return False + if re.findall(r'\bstemma\b', self.imageName.lower()) and \ + self.site.lang == 'it': + pywikibot.output( + u'%s has "stemma" inside, means that it's ok.' + % self.imageName) + return True # It's not only on commons but the image needs a check + # the second usually is a url or something like that. + # Compare the two in equal way, both url. + if self.convert_to_url(self.imageName) \ + == self.convert_to_url(commons_image_with_this_hash[0]): + repme = u"\n*[[:File:%s]] is also on '''Commons''': [[commons:File:%s]] (same name)" \ + % (self.imageName, commons_image_with_this_hash[0]) else: - return True + repme = u"\n*[[:File:%s]] is also on '''Commons''': [[commons:File:%s]]" \ + % (self.imageName, commons_image_with_this_hash[0]) + self.report_image(self.imageName, self.rep_page, self.com, repme, + addings=False, regex=regexOnCommons) + return True
def checkImageDuplicated(self, duplicates_rollback): """ Function to check the duplicated files. """ - # {{Dupe|File:Blanche_Montel.jpg}} - # Skip the stub images - #if 'stub' in self.imageName.lower() and self.project == 'wikipedia' and self.site.lang == 'it': - # return True # Skip the stub, ok dupText = pywikibot.translate(self.site, duplicatesText, fallback=False) dupRegex = pywikibot.translate(self.site, duplicatesRegex, fallback=False) @@ -992,7 +992,8 @@ fallback=False) dupTalkText = pywikibot.translate(self.site, duplicates_user_talk_text, fallback=False) - dupComment_talk = pywikibot.translate(self.site, duplicates_comment_talk, + dupComment_talk = pywikibot.translate(self.site, + duplicates_comment_talk, fallback=False) dupComment_image = pywikibot.translate(self.site, duplicates_comment_image, @@ -1004,7 +1005,7 @@ duplicates = self.site.getFilesFromAnHash(hash_found)
if not duplicates: - return False # Error, image deleted, no hash found. Skip the image. + return False # Error, image deleted, no hash found. Skip the image.
if len(duplicates) > 1: if len(duplicates) == 2: @@ -1037,13 +1038,13 @@ for duplicate in duplicates: if pywikibot.ImagePage(self.site, duplicate) \ == pywikibot.ImagePage(self.site, older_image): - continue # the older image, not report also this as duplicate + continue # the older image, not report also this as duplicate DupePage = pywikibot.ImagePage(self.site, duplicate) try: DupPageText = DupePage.get() older_page_text = Page_oder_image.get() except pywikibot.NoPage: - continue # The page doesn't exists + continue # The page doesn't exists
if not (re.findall(dupRegex, DupPageText) or re.findall(dupRegex, older_page_text)): @@ -1060,10 +1061,11 @@ pywikibot.output( u"Already put the dupe-template in the files's page" u" or in the dupe's page. Skip.") - return False # Ok - No problem. Let's continue the checking phase - older_image_ns = u'%s%s' % (self.image_namespace, older_image) # adding the namespace - only_report = False # true if the image are not to be tagged as dupes + return False # Ok - Let's continue the checking phase
+ older_image_ns = u'%s%s' % (self.image_namespace, older_image) + only_report = False # true if the image are not to be tagged as dupes + # put only one image or the whole list according to the request if u'__images__' in dupText: text_for_the_report = re.sub(r'__images__', @@ -1075,12 +1077,14 @@ r'%s' % older_image_ns, dupText)
- # Two iteration: report the "problem" to the user only once (the last) + # Two iteration: report the "problem" to the user only once + # (the last) if len(images_to_tag_list) > 1: for image_to_tag in images_to_tag_list[:-1]: already_reported_in_past = self.countEdits( u'File:%s' % image_to_tag, self.botolist) - # if you want only one edit, the edit found should be more than 0 -> num - 1 + # if you want only one edit, the edit found should be + # more than 0 -> num - 1 if already_reported_in_past > duplicates_rollback - 1: only_report = True break @@ -1093,22 +1097,22 @@ commImage=dupComment_image, unver=True)
if len(images_to_tag_list) != 0 and not only_report: - already_reported_in_past = self.countEdits(u'File:%s' - % images_to_tag_list[-1], - self.botolist) + already_reported_in_past = self.countEdits( + u'File:%s' % images_to_tag_list[-1], self.botolist) image_to_resub = images_to_tag_list[-1] from_regex = r'\n*[[:File:%s]]' \ % re.escape(self.convert_to_url(self.imageName)) # Delete the image in the list where we're write on text_for_the_report = re.sub(from_regex, '', text_for_the_report) - # if you want only one edit, the edit found should be more than 0 -> num - 1 + # if you want only one edit, the edit found should be more + # than 0 -> num - 1 if already_reported_in_past > duplicates_rollback - 1: only_report = True else: self.report(text_for_the_report, images_to_tag_list[-1], dupTalkText % (older_image_ns, string), - dupTalkHead, commTalk = dupComment_talk, + dupTalkHead, commTalk=dupComment_talk, commImage=dupComment_image, unver=True)
if self.duplicatesReport or only_report: @@ -1120,16 +1124,21 @@ % self.convert_to_url(self.imageName)
for duplicate in duplicates: - if self.convert_to_url(duplicate) == self.convert_to_url(self.imageName): - continue # the image itself, not report also this as duplicate - repme += u"\n**[[:File:%s]]" % self.convert_to_url(duplicate) - result = self.report_image(self.imageName, self.rep_page, self.com, repme, addings = False, regex = duplicateRegex) + if self.convert_to_url(duplicate) == \ + self.convert_to_url(self.imageName): + continue # the image itself, not report also this as duplicate + repme += u"\n**[[:File:%s]]" \ + % self.convert_to_url(duplicate) + + result = self.report_image(self.imageName, self.rep_page, + self.com, repme, addings=False, + regex=duplicateRegex) if not result: - return True # If Errors, exit (but continue the check) + return True # If Errors, exit (but continue the check)
if older_image != self.imageName: - return False # The image is a duplicate, it will be deleted. So skip the check-part, useless - return True # Ok - No problem. Let's continue the checking phase + return False # The image is a duplicate, it will be deleted. So skip the check-part, useless + return True # Ok - No problem. Let's continue the checking phase
def report_image(self, image_to_report, rep_page=None, com=None, rep_text=None, addings=True, regex=None): @@ -1163,9 +1172,11 @@ pywikibot.output( u"The log page (%s) is full! Please delete the old files " u" reported. Skip!" % another_page.title()) - return True # Don't report, but continue with the check (we don't now if this is the first time we check this file or not) - # The talk page includes "_" between the two names, in this way i replace them to " " - n = re.compile(regex, re.UNICODE|re.DOTALL) + return True # Don't report, but continue with the check (we don't now if this is the first time we check this file or not) + + # The talk page includes "_" between the two names, in this way I + # replace them to " " + n = re.compile(regex, re.UNICODE | re.DOTALL) y = n.findall(text_get)
if y: @@ -1204,7 +1215,7 @@ "*[Hh]ead=['"](.*?)['"]\n" "*[Tt]ext ?= ?['"](.*?)['"]\n" "*[Mm]ex ?= ?['"]?([^\n]*?)['"]?\n", - re.UNICODE|re.DOTALL) + re.UNICODE | re.DOTALL) number = 1
for m in r.finditer(testo): @@ -1216,7 +1227,8 @@ head = str(m.group(6)) text = str(m.group(7)) mexcatched = str(m.group(8)) - tupla = [number, name, find_tipe, find, imagechanges, summary, head, text, mexcatched] + tupla = [number, name, find_tipe, find, imagechanges, + summary, head, text, mexcatched] self.settingsData += [tupla] number += 1
@@ -1242,7 +1254,7 @@ pywikibot.output(u'>> Loaded the real-time page... <<') else: pywikibot.output(u'>> No additional settings found! <<') - return self.settingsData # Useless, but it doesn't harm.. + return self.settingsData # Useless, but it doesn't harm..
def load_licenses(self): """ Load the list of the licenses """ @@ -1260,7 +1272,9 @@ catName = pywikibot.translate(self.site, category_with_licenses, fallback=False) if not catName: - raise pywikibot.Error(u'No licenses allowed provided, add that option to the code to make the script working correctly') + raise pywikibot.Error( + u'No licenses allowed provided, add that option to the code to ' + u'make the script working correctly') pywikibot.output(u'\nLoading the allowed licenses...\n') list_licenses = catlib.categoryAllPageObjectsAPI(catName) if self.site.lang == 'commons': @@ -1282,7 +1296,7 @@ for nameLicense in self.load(pageAllowedText): pageLicense = pywikibot.Page(self.site, nameLicense) if pageLicense not in list_licenses: - list_licenses.append(pageLicense) # the list has wiki-pages + list_licenses.append(pageLicense) # the list has wiki-pages return list_licenses
def miniTemplateCheck(self, template): @@ -1291,10 +1305,13 @@ licenses to skip.
""" - if template in self.list_licenses: # the list_licenses are loaded in the __init__ (not to load them multimple times) + # the list_licenses are loaded in the __init__ + # (not to load them multimple times) + if template in self.list_licenses: self.license_selected = template.title(withNamespace=False) self.seems_ok = True - self.license_found = self.license_selected # let the last "fake" license normally detected + # let the last "fake" license normally detected + self.license_found = self.license_selected return True
if template in self.hiddentemplates: @@ -1396,8 +1413,8 @@
if self.allLicenses: self.license_found = self.allLicenses[0].title() - self.some_problem = False # If it has "some_problem" it must check - # the additional settings. + self.some_problem = False # If it has "some_problem" it must check + # the additional settings. # if self.settingsData, use addictional settings if self.settingsData: self.findAdditionalProblems() @@ -1460,7 +1477,8 @@
def skipImages(self, skip_number, limit): """ Given a number of files, skip the first -number- files. """ - # If the images to skip are more the images to check, make them the same number + # If the images to skip are more the images to check, make them the + # same number if skip_number == 0: pywikibot.output(u'\t\t>> No files to skip...<<') return False @@ -1498,18 +1516,20 @@ % waitTime) imagesToSkip = 0 while True: - loadOtherImages = True # ensure that all the images loaded aren't to skip! + # ensure that all the images loaded aren't to skip! + loadOtherImages = True for image in generator: try: timestamp = image.getLatestUploader()[1] except pywikibot.NoPage: continue + # not relative to localtime img_time = datetime.datetime.strptime(timestamp, - u"%Y-%m-%dT%H:%M:%SZ") #not relative to localtime + u"%Y-%m-%dT%H:%M:%SZ")
now = datetime.datetime.strptime( str(datetime.datetime.utcnow()).split('.')[0], - "%Y-%m-%d %H:%M:%S") #timezones are UTC + "%Y-%m-%d %H:%M:%S") # timezones are UTC # + seconds to be sure that now > img_time while now < img_time: now = (now + datetime.timedelta(seconds=1)) @@ -1530,12 +1550,12 @@ self.site.newimages(number=limit, lestart=timestamp)) imagesToSkip = 0 - # continue to load images! continue + # continue to load images! continue else: - break # ok some other images, go below + break # ok some other images, go below newGen = list() - imagesToSkip += 1 # some calcs, better add 1 + imagesToSkip += 1 # some calcs, better add 1 # Add new images, instead of the images skipped newImages = self.site.newimages(number=imagesToSkip, lestart=timestamp) @@ -1554,8 +1574,8 @@ """ Understand if a file is already tagged or not. """ # Is the image already tagged? If yes, no need to double-check, skip for i in pywikibot.translate(self.site, txt_find, fallback=False): - # If there are {{ use regex, otherwise no (if there's not the {{ may not be a template - # and the regex will be wrong) + # If there are {{ use regex, otherwise no (if there's not the + # {{ may not be a template and the regex will be wrong) if '{{' in i: regexP = re.compile(r'{{(?:template|)%s ?(?:||\n|}|<) ?' % i.split('{{')[1].replace(u' ', u'[ _]'), @@ -1591,7 +1611,7 @@ if find_tipe.lower() == 'findonly': searchResults = re.findall(r'%s' % k.lower(), self.imageCheckText.lower()) - if searchResults != []: + if searchResults: if searchResults[0] == self.imageCheckText.lower(): self.some_problem = True self.text_used = text @@ -1602,7 +1622,8 @@ self.mex_used = mexCatched break elif find_tipe.lower() == 'find': - if re.findall(r'%s' % k.lower(), self.imageCheckText.lower()) != []: + if re.findall(r'%s' % k.lower(), + self.imageCheckText.lower()): self.some_problem = True self.text_used = text self.head_used = head_2 @@ -1617,9 +1638,12 @@ nothing = ['', ' ', ' ', ' ', '\n', '\n ', '\n ', '\n\n', '\n \n', ' \n', ' \n ', ' \n \n'] # something = Minimal requirements for an image description. - # If this fits, no tagging will take place (if there aren't other issues) + # If this fits, no tagging will take place + # (if there aren't other issues) # MIT license is ok on italian wikipedia, let also this here - something = ['{{'] # Don't put "}}" here, please. Useless and can give problems. + + # Don't put "}}" here, please. Useless and can give problems. + something = ['{{'] # Unused file extensions. Does not contain PDF. notallowed = ("xcf", "xls", "sxw", "sxi", "sxc", "sxd") brackets = False @@ -1667,9 +1691,11 @@ if self.isTagged(): printWithTimeZone(u'%s is already tagged...' % self.imageName) return True - for a_word in something: # something is the array with {{, MIT License and so on. + + # something is the array with {{, MIT License and so on. + for a_word in something: if a_word in self.imageCheckText: - # There's a template, probably a license (or I hope so) + # There's a template, probably a license brackets = True # Is the extension allowed? (is it an image or f.e. a .xls file?) for parl in notallowed: @@ -1723,20 +1749,20 @@ def main(): """ Main function """ # Command line configurable parameters - repeat = True # Restart after having check all the images? - limit = 80 # How many images check? - time_sleep = 30 # How many time sleep after the check? - skip_number = 0 # How many images to skip before checking? - waitTime = 0 # How many time sleep before the check? - commonsActive = False # Check if on commons there's an image with the same name? - normal = False # Check the new images or use another generator? - urlUsed = False # Use the url-related function instead of the new-pages generator - regexGen = False # Use the regex generator - untagged = False # Use the untagged generator - duplicatesActive = False # Use the duplicate option - duplicatesReport = False # Use the duplicate-report option - sendemailActive = False # Use the send-email - logFullError = True # Raise an error when the log is full + repeat = True # Restart after having check all the images? + limit = 80 # How many images check? + time_sleep = 30 # How many time sleep after the check? + skip_number = 0 # How many images to skip before checking? + waitTime = 0 # How many time sleep before the check? + commonsActive = False # Is there's an image with the same name at commons? + normal = False # Check the new images or use another generator? + urlUsed = False # Use the url-related function instead of the new-pages + regexGen = False # Use the regex generator + untagged = False # Use the untagged generator + duplicatesActive = False # Use the duplicate option + duplicatesReport = False # Use the duplicate-report option + sendemailActive = False # Use the send-email + logFullError = True # Raise an error when the log is full generator = None
# Here below there are the parameters. @@ -1771,20 +1797,16 @@ sendemailActive = True elif arg.startswith('-skip'): if len(arg) == 5: - skip = True skip_number = int(pywikibot.input( u'How many files do you want to skip?')) elif len(arg) > 5: - skip = True skip_number = int(arg[6:]) elif arg.startswith('-wait'): if len(arg) == 5: - wait = True waitTime = int(pywikibot.input( u'How many time do you want to wait before checking the ' u'files?')) elif len(arg) > 5: - wait = True waitTime = int(arg[6:]) elif arg.startswith('-start'): if len(arg) == 6: @@ -1850,16 +1872,9 @@ if not generator: normal = True
- # Define the site. site = pywikibot.getSite() + skip = skip_number > 0
- # If the images to skip are 0, set the skip variable to False (the same for - # the wait time) - if skip_number == 0: - skip = False - if waitTime == 0: - wait = False - # A little block-statement to ensure that the bot will not start with # en-parameters if site.lang not in project_inserted: @@ -1879,11 +1894,9 @@ Bot = checkImagesBot(site, sendemailActive=sendemailActive, duplicatesReport=duplicatesReport, logFullError=logFullError) - # Untagged is True? Let's take that generator if untagged: - generator = Bot.untaggedGenerator(projectUntagged, limit) - normal = False # Ensure that normal is False - # Normal True? Take the default generator + generator = Bot.untaggedGenerator(projectUntagged, limit) + normal = False if normal: generator = pg.NewimagesPageGenerator(number=limit, site=site) # if urlUsed and regexGen, get the source for the generator @@ -1896,20 +1909,19 @@ textRegex = pageRegex.get() except pywikibot.NoPage: pywikibot.output(u"%s doesn't exist!" % pageRegex.title()) - textRegex = '' # No source, so the bot will quit later. + textRegex = '' # No source, so the bot will quit later. # If generator is the regex' one, use your own Generator using an url # or page and a regex. if generator == 'regex' and regexGen: generator = Bot.regexGenerator(regexpToUse, textRegex)
Bot.takesettings() - if wait: + if waitTime: generator = Bot.wait(waitTime, generator, normal, limit) generator = pg.NamespaceFilterPageGenerator(generator, 6, site) for image in generator: # Setting the image for the main class Bot.setParameters(image.title(withNamespace=False)) - # Skip block if skip: skip = Bot.skipImages(skip_number, limit) if skip:
pywikipedia-svn@lists.wikimedia.org