[Pywikipedia-l] SVN: [5294] trunk/pywikipedia/checkimages.py
filnik at svn.wikimedia.org
filnik at svn.wikimedia.org
Fri May 2 11:05:19 UTC 2008
Revision: 5294
Author: filnik
Date: 2008-05-02 11:05:19 +0000 (Fri, 02 May 2008)
Log Message:
-----------
Bugfix. For the deleted images there's not Hash. So, let's skip them instead of raising an error
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2008-05-02 00:13:27 UTC (rev 5293)
+++ trunk/pywikipedia/checkimages.py 2008-05-02 11:05:19 UTC (rev 5294)
@@ -59,7 +59,6 @@
* Add the "catch the language" function for commons.
* Fix and reorganise the new documentation
* Add a report for the image tagged.
-* Implement: Special:FileDuplicateSearch/Image.jpg
"""
#
@@ -77,25 +76,20 @@
locale.setlocale(locale.LC_ALL, '')
-class NoHash(wikipedia.Error):
- """ The APIs don't return any Hash for the image searched.
- Really Strange, better to raise an error.
- """
-
#########################################################################################################################
# <------------------------------------------- Change only below! ----------------------------------------------------->#
#########################################################################################################################
# That's what you want that will be added. (i.e. the {{no source}} with the right day/month/year )
n_txt = {
- 'commons':'\n{{subst:nld}}',
+ 'commons':u'\n{{subst:nld}}',
'de' :u'{{Benutzer:ABF/D|~~~~}} {{Dateiüberprüfung/benachrichtigt (Kategorie)|{{subst:LOCALYEAR}}|{{subst:LOCALMONTH}}|{{subst:LOCALDAY}}}} {{Dateiüberprüfung/benachrichtigt (Text)|Lizenz|||||}} --This was added by ~~~~-- ',
- 'en' :'\n{{subst:nld}}',
- 'it' :'\n{{subst:unverdata}}',
- 'ja' :'{{subst:Nsd}}',
+ 'en' :u'\n{{subst:nld}}',
+ 'it' :u'\n{{subst:unverdata}}',
+ 'ja' :u'{{subst:Nsd}}',
'hu' :u'\n{{nincslicenc|~~~~~}}',
- 'ta' :'\n{{subst:nld}}',
- 'zh' :'{{subst:No license/auto}}',
+ 'ta' :u'\n{{subst:nld}}',
+ 'zh' :u'{{subst:No license/auto}}',
}
# Text that the bot will try to see if there's already or not. If there's a
@@ -129,22 +123,22 @@
# When the Bot find that the usertalk is empty is not pretty to put only the no source without the welcome, isn't it?
empty = {
- 'commons':'{{subst:welcome}}\n~~~~\n',
- 'de':'{{subst:willkommen}} ~~~~',
- 'en' :'{{welcome}}\n~~~~\n',
- 'it' :'<!-- inizio template di benvenuto -->\n{{subst:Benvebot}}\n~~~~\n<!-- fine template di benvenuto -->',
- 'ja':'{{welcome}}\n--~~~~\n',
+ 'commons':u'{{subst:welcome}}\n~~~~\n',
+ 'de' :u'{{subst:willkommen}} ~~~~',
+ 'en' :u'{{welcome}}\n~~~~\n',
+ 'it' :u'<!-- inizio template di benvenuto -->\n{{subst:Benvebot}}\n~~~~\n<!-- fine template di benvenuto -->',
+ 'ja' :u'{{welcome}}\n--~~~~\n',
'hu' :u'{{subst:Üdvözlet|~~~~}}\n',
- 'zh':'{{subst:welcome|sign=~~~~}}',
+ 'zh' :u'{{subst:welcome|sign=~~~~}}',
}
# Summary that the bot use when it notify the problem with the image's license
comm2 = {
- 'ar' :"بوت: طلب معلومات المصدر." ,
- 'commons':"Bot: Requesting source information." ,
+ 'ar' :u"بوت: طلب معلومات المصدر." ,
+ 'commons':u"Bot: Requesting source information." ,
'de' :u'Bot:Notify User',
- 'en' :"Bot: Requesting source information." ,
- 'it' :"Bot: Notifico l'unverified",
+ 'en' :u"Bot: Requesting source information." ,
+ 'it' :u"Bot: Notifico l'unverified",
'ja' :u"ロボットによる:出典とライセンス明記のお願い",
'hu' :u'Robot: Forrásinformáció kérése',
'ja' :u'{{welcome}}\n--~~~~\n',
@@ -159,7 +153,7 @@
'commons':u"{{speedy|The file has .%s as extension. Is it ok? Please check.}}",
'en' :u"{{db-meta|The file has .%s as extension.}}",
'it' :u'{{cancella subito|motivo=Il file ha come estensione ".%s"}}',
- 'ja':u'{{db|知らないファイルフォーマット%s}}',
+ 'ja' :u'{{db|知らないファイルフォーマット%s}}',
'hu' :u'{{azonnali|A fájlnak .%s a kiterjesztése}}',
'ta' :u'{{delete|இந்தக் கோப்பு .%s என்றக் கோப்பு நீட்சியைக் கொண்டுள்ளது.}}',
'zh' :u'{{delete|未知檔案格式%s}}',
@@ -228,7 +222,7 @@
# NOTE: YOUR Botnick is automatically added. It's not required to add it twice.
bot_list = {
'commons':[u'Siebot', u'CommonsDelinker', u'Filbot', u'John Bot', u'Sz-iwbot', u'ABFbot'],
- 'de' :['ABFbot'],
+ 'de' :[u'ABFbot'],
'en' :[u'OrphanBot'],
'it' :[u'Filbot', u'Nikbot', u'.snoopyBot.'],
'ja' :[u'alexbot'],
@@ -287,14 +281,14 @@
# The summary of the report
comm10 = {
'commons':u'Bot: Updating the log',
- 'ar':u'بوت: تحديث السجل',
- 'de': u'Bot:schreibe Log',
- 'en':u'Bot: Updating the log',
- 'it':u'Bot: Aggiorno il log',
- 'ja': u'ロボットによる:更新',
- 'hu': u'Robot: A napló frissítése',
- 'ta': u'தானியங்கி:பட்டியலை இற்றைப்படுத்தல்',
- 'zh': u'機器人:更新記錄',
+ 'ar' :u'بوت: تحديث السجل',
+ 'de' :u'Bot:schreibe Log',
+ 'en' :u'Bot: Updating the log',
+ 'it' :u'Bot: Aggiorno il log',
+ 'ja' :u'ロボットによる:更新',
+ 'hu' :u'Robot: A napló frissítése',
+ 'ta' :u'தானியங்கி:பட்டியலை இற்றைப்படுத்தல்',
+ 'zh' :u'機器人:更新記錄',
}
# If a template isn't a license but it's included on a lot of images, that can be skipped to
@@ -303,12 +297,12 @@
# Warning 2: The bot will use regex, make the names compatible, please (don't add "Template:" or {{
# because they are already put in the regex).
HiddenTemplate = {
- 'commons':['information', 'trademarked', 'trademark'],
+ 'commons':[u'information', u'trademarked', u'trademark'],
'de':[u'information'],
- 'en':['information'],
- 'it':['edp', 'informazioni[ _]file', 'information', 'trademark'],
+ 'en':[u'information'],
+ 'it':[u'edp', u'informazioni[ _]file', u'information', u'trademark'],
'ja':[u'Information'],
- 'hu':[u'információ','enwiki', 'azonnali'],
+ 'hu':[u'információ', u'enwiki', u'azonnali'],
'ta':[u'information'],
'zh':[u'information'],
}
@@ -316,7 +310,7 @@
# Template added when the bot finds only an hidden template and nothing else.
# Note: every __botnick__ will be repleaced with your bot's nickname (feel free not to use if you don't need it)
HiddenTemplateNotification = {
- 'commons': """\n{{subst:User:Filnik/whitetemplate|Image:%s}}\n\n''This message was '''added automatically by [[User:__botnick__|__botnick__]]''', if you need some help about it, ask its master (~~~) or go to the [[Commons:Help desk]]''. --~~~~""",
+ 'commons': u"""\n{{subst:User:Filnik/whitetemplate|Image:%s}}\n\n''This message was '''added automatically by [[User:__botnick__|__botnick__]]''', if you need some help about it, ask its master (~~~) or go to the [[Commons:Help desk]]''. --~~~~""",
'de': None,
'en': None,
'it': u"{{subst:Utente:Filbot/Template_insufficiente|%s}} --~~~~",
@@ -324,19 +318,25 @@
}
# Add your project (in alphabetical order) if you want that the bot start
-project_inserted = ['ar', 'commons', 'de', 'en', 'ja', 'hu', 'it', 'ta', 'zh']
+project_inserted = [u'ar', u'commons', u'de', u'en', u'ja', u'hu', u'it', u'ta', u'zh']
# Ok, that's all. What is below, is the rest of code, now the code is fixed and it will run correctly in your project.
#########################################################################################################################
# <------------------------------------------- Change only above! ----------------------------------------------------> #
#########################################################################################################################
+# Error Classes
class LogIsFull(wikipedia.Error):
"""An exception indicating that the log is full and the Bot cannot add other data to prevent Errors."""
class NothingFound(wikipedia.Error):
""" An exception indicating that a regex has return [] instead of results."""
+class NoHash(wikipedia.Error):
+ """ The APIs don't return any Hash for the image searched.
+ Really Strange, better to raise an error. """
+
+# Other common useful functions
def printWithTimeZone(message):
""" Function to print the messages followed by the TimeZone encoded correctly. """
if message[-1] != ' ':
@@ -578,7 +578,11 @@
if hash_found_list != []:
hash_found = hash_found_list[0]
else:
- raise NoHash('No Hash found in the APIs! Maybe the regex to catch it is wrong or someone has changed the APIs structure.')
+ if imagePage.exists():
+ raise NoHash('No Hash found in the APIs! Maybe the regex to catch it is wrong or someone has changed the APIs structure.')
+ else:
+ wikipedia.output(u'Image deleted before getting the Hash. Skipping...')
+ return False # Error, we need to skip the page.
get_duplicates = self.site.getUrl('/w/api.php?action=query&format=xml&list=allimages&aisha1=%s' % hash_found)
duplicates = re.findall(r'<img name="(.*?)".*?/>', get_duplicates)
if len(duplicates) > 1:
@@ -591,7 +595,8 @@
if duplicate == self.image:
continue # the image itself, not report also this as duplicate
repme += "\n**[[:Image:%s]]" % duplicate
- self.report_image(self.image, self.rep_page, self.com, repme + '\n', addings = False, regex = duplicateRegex)
+ self.report_image(self.image, self.rep_page, self.com, repme, addings = False, regex = duplicateRegex)
+ return True # Ok - No problem. Let's continue the checking phase
def report_image(self, image, rep_page = None, com = None, rep_text = None, addings = True, regex = None):
""" Function to report the images in the report page when needed. """
@@ -984,13 +989,6 @@
# the user has set 0 as images to skip
wikipedia.output(u'\t\t>> No images to skip...<<')
skip_list.append('skip = Off') # Only to print it once
- # Check on commons if there's already an image with the same name
- if commonsActive == True:
- response = mainClass.checkImageOnCommons(imageName)
- if response == False:
- continue
- if duplicatesActive == True:
- mainClass.checkImageDuplicated(imageName)
parentesi = False # parentesi are these in italian: { ( ) } []
delete = False
tagged = False
@@ -1006,6 +1004,16 @@
except wikipedia.IsRedirectPage:
wikipedia.output(u"The file description for %s is a redirect?!" % imageName )
continue
+ # Check on commons if there's already an image with the same name
+ if commonsActive == True:
+ response = mainClass.checkImageOnCommons(imageName)
+ if response == False:
+ continue
+ # Check if there are duplicates of the image on the project selected
+ if duplicatesActive == True:
+ response2 = mainClass.checkImageDuplicated(imageName)
+ if response2 == False:
+ continue
# Is the image already tagged? If yes, no need to double-check, skip
for i in TextFind:
# If there are {{ use regex, otherwise no (if there's not the {{ may not be a template
More information about the Pywikipedia-l
mailing list