Revision: 5280 Author: filnik Date: 2008-04-27 16:05:02 +0000 (Sun, 27 Apr 2008)
Log Message: ----------- Yuppi! I've written a script that dedents texts, so now the script has the same indentation (4) as all the other! Then, bugfix for when the script take some object that in reality it's not an image, adding a workaround and adding the support for the redirect in talk pages. Great diff! :-)
Modified Paths: -------------- trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2008-04-27 09:37:06 UTC (rev 5279) +++ trunk/pywikipedia/checkimages.py 2008-04-27 16:05:02 UTC (rev 5280) @@ -13,30 +13,30 @@
This script understands the following command-line arguments:
- -limit - The number of images to check (default: 80) + -limit - The number of images to check (default: 80)
- -commons - The Bot will check if an image on Commons has the same name - and if true it report the image. + -commons - The Bot will check if an image on Commons has the same name + and if true it report the image.
- -break - To break the bot after the first check (default: recursive) + -break - To break the bot after the first check (default: recursive)
- -time[:#] - Time in seconds between repeat runs (default: 30) + -time[:#] - Time in seconds between repeat runs (default: 30)
- -wait[:#] - Wait x second before check the images (default: 0) + -wait[:#] - Wait x second before check the images (default: 0)
- -skip[:#] - The bot skip the first [:#] images (default: 0) + -skip[:#] - The bot skip the first [:#] images (default: 0)
- -start[:#] - Use allpages() as generator (it starts already form Image:[:#]) + -start[:#] - Use allpages() as generator (it starts already form Image:[:#])
- -cat[:#] - Use a category as generator + -cat[:#] - Use a category as generator
- -regex[:#] - Use regex, must be used with -url or -page + -regex[:#] - Use regex, must be used with -url or -page
- -page[:#] - Define the name of the wikipage where are the images + -page[:#] - Define the name of the wikipage where are the images
- -url[:#] - Define the url where are the images + -url[:#] - Define the url where are the images
- -untagged[:#] - Use daniel's tool as generator ( http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php ) + -untagged[:#] - Use daniel's tool as generator ( http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php )
---- Istructions for the real-time settings ---- * For every new block you have to add: @@ -81,13 +81,13 @@
# That's what you want that will be added. (i.e. the {{no source}} with the right day/month/year ) n_txt = { - 'commons':'\n{{subst:nld}}', - 'en' :'\n{{subst:nld}}', - 'it' :'\n{{subst:unverdata}}', - 'ja' :'{{subst:Nsd}}', - 'hu' :u'\n{{nincslicenc|~~~~~}}', - 'ta' :'\n{{subst:nld}}', - 'zh' :'{{subst:No license/auto}}', + 'commons':'\n{{subst:nld}}', + 'en' :'\n{{subst:nld}}', + 'it' :'\n{{subst:unverdata}}', + 'ja' :'{{subst:Nsd}}', + 'hu' :u'\n{{nincslicenc|~~~~~}}', + 'ta' :'\n{{subst:nld}}', + 'zh' :'{{subst:No license/auto}}', }
# Text that the bot will try to see if there's already or not. If there's a @@ -96,184 +96,184 @@ # '{{nld' --> '{{(?:template:|)no[ _]license ?(?:||\n|}) ?' (case insensitive). # If there's not a {{ it will work as usual (if x in Text) txt_find = { - 'commons':[u'{{no license', u'{{nld', u'{{no permission since'], - 'en':[u'{{nld', u'{{no license'], - 'hu':[u'{{nincsforrás',u'{{nincslicenc'], - 'it':[u'{{unverdata', u'{{unverified'], - 'ja':[u'{{no source', u'{{unknown', u'{{non free', u'<!--削除についての議論が終了するまで',], - 'ta':[u'{{no source', u'{{nld', u'{{no license'], - 'zh':[u'{{no source', u'{{unknown', u'{{No license',], - } + 'commons':[u'{{no license', u'{{nld', u'{{no permission since'], + 'en':[u'{{nld', u'{{no license'], + 'hu':[u'{{nincsforrás',u'{{nincslicenc'], + 'it':[u'{{unverdata', u'{{unverified'], + 'ja':[u'{{no source', u'{{unknown', u'{{non free', u'<!--削除についての議論が終了するまで',], + 'ta':[u'{{no source', u'{{nld', u'{{no license'], + 'zh':[u'{{no source', u'{{unknown', u'{{No license',], + }
# Summary for when the will add the no source comm = { - 'commons':u'Bot: Marking newly uploaded untagged file', - 'ar' :u'بوت: التعليم على ملف مرفوع حديثا غير موسوم', - 'en' :u'Bot: Marking newly uploaded untagged file', - 'hu' :u'Robot: Frissen feltöltött licencsablon nélküli fájl megjelölése', - 'it' :u"Bot: Aggiungo unverified", - 'ja' :u'ロボットによる:出典やライセンスなしの画像をタグ', - 'ta' :u'தானியங்கி:காப்புரிமை வழங்கப்படா படிமத்தை சுட்டுதல்', - 'zh' :u'機器人:標示新上傳且未包含必要資訊的檔案', - } + 'commons':u'Bot: Marking newly uploaded untagged file', + 'ar' :u'بوت: التعليم على ملف مرفوع حديثا غير موسوم', + 'en' :u'Bot: Marking newly uploaded untagged file', + 'hu' :u'Robot: Frissen feltöltött licencsablon nélküli fájl megjelölése', + 'it' :u"Bot: Aggiungo unverified", + 'ja' :u'ロボットによる:出典やライセンスなしの画像をタグ', + 'ta' :u'தானியங்கி:காப்புரிமை வழங்கப்படா படிமத்தை சுட்டுதல்', + 'zh' :u'機器人:標示新上傳且未包含必要資訊的檔案', + }
# When the Bot find that the usertalk is empty is not pretty to put only the no source without the welcome, isn't it? empty = { - 'commons':'{{subst:welcome}}\n~~~~\n', - 'en' :'{{welcome}}\n~~~~\n', - 'it' :'<!-- inizio template di benvenuto -->\n{{subst:Benvebot}}\n~~~~\n<!-- fine template di benvenuto -->', - 'ja':'{{welcome}}\n--~~~~\n', - 'hu' :u'{{subst:Üdvözlet|~~~~}}\n', - 'zh':'{{subst:welcome|sign=~~~~}}', - } + 'commons':'{{subst:welcome}}\n~~~~\n', + 'en' :'{{welcome}}\n~~~~\n', + 'it' :'<!-- inizio template di benvenuto -->\n{{subst:Benvebot}}\n~~~~\n<!-- fine template di benvenuto -->', + 'ja':'{{welcome}}\n--~~~~\n', + 'hu' :u'{{subst:Üdvözlet|~~~~}}\n', + 'zh':'{{subst:welcome|sign=~~~~}}', + }
# Summary that the bot use when it notify the problem with the image's license comm2 = { - 'commons':"Bot: Requesting source information." , - 'ar' :"بوت: طلب معلومات المصدر." , - 'en' :"Bot: Requesting source information." , - 'it' :"Bot: Notifico l'unverified", - 'ja' :u"ロボットによる:出典とライセンス明記のお願い", - 'hu' :u'Robot: Forrásinformáció kérése', - 'ja' :u'{{welcome}}\n--~~~~\n', - 'hu' :u'{{subst:Üdvözlet|~~~~}}\n', - 'ta' :u'தானியங்கி:மூலம் வழங்கப்படா படிமத்தை சுட்டுதல்', - 'zh' :u'{{subst:welcome|sign=~~~~}}', - } + 'commons':"Bot: Requesting source information." , + 'ar' :"بوت: طلب معلومات المصدر." , + 'en' :"Bot: Requesting source information." , + 'it' :"Bot: Notifico l'unverified", + 'ja' :u"ロボットによる:出典とライセンス明記のお願い", + 'hu' :u'Robot: Forrásinformáció kérése', + 'ja' :u'{{welcome}}\n--~~~~\n', + 'hu' :u'{{subst:Üdvözlet|~~~~}}\n', + 'ta' :u'தானியங்கி:மூலம் வழங்கப்படா படிமத்தை சுட்டுதல்', + 'zh' :u'{{subst:welcome|sign=~~~~}}', + }
# if the file has an unknown extension it will be tagged with this template. # In reality, there aren't unknown extension, they are only not allewed... ^__^ delete_immediately = { - 'commons':u"{{speedy|The file has .%s as extension. Is it ok? Please check.}}", - 'en' :u"{{db-meta|The file has .%s as extension.}}", - 'it' :u'{{cancella subito|motivo=Il file ha come estensione ".%s"}}', - 'ja':u'{{db|知らないファイルフォーマット%s}}', - 'hu' :u'{{azonnali|A fájlnak .%s a kiterjesztése}}', - 'ta' :u'{{delete|இந்தக் கோப்பு .%s என்றக் கோப்பு நீட்சியைக் கொண்டுள்ளது.}}', - 'zh' :u'{{delete|未知檔案格式%s}}', - } + 'commons':u"{{speedy|The file has .%s as extension. Is it ok? Please check.}}", + 'en' :u"{{db-meta|The file has .%s as extension.}}", + 'it' :u'{{cancella subito|motivo=Il file ha come estensione ".%s"}}', + 'ja':u'{{db|知らないファイルフォーマット%s}}', + 'hu' :u'{{azonnali|A fájlnak .%s a kiterjesztése}}', + 'ta' :u'{{delete|இந்தக் கோப்பு .%s என்றக் கோப்பு நீட்சியைக் கொண்டுள்ளது.}}', + 'zh' :u'{{delete|未知檔案格式%s}}', + }
# The header of the Unknown extension's message. delete_immediately_head = { - 'commons':u"\n== Unknown extension! ==\n", - 'en' :u"\n== Unknown extension! ==\n", - 'it' :u'\n\n== File non specificato ==\n', - 'hu' :u'\n== Ismeretlen kiterjesztésű fájl ==\n', - 'ta' :u'\n== இனங்காணப்படாத கோப்பு நீட்சி! ==\n', - 'zh' :u'\n==您上載的檔案格式可能有誤==\n', - } + 'commons':u"\n== Unknown extension! ==\n", + 'en' :u"\n== Unknown extension! ==\n", + 'it' :u'\n\n== File non specificato ==\n', + 'hu' :u'\n== Ismeretlen kiterjesztésű fájl ==\n', + 'ta' :u'\n== இனங்காணப்படாத கோப்பு நீட்சி! ==\n', + 'zh' :u'\n==您上載的檔案格式可能有誤==\n', + }
# Text that will be add if the bot find a unknown extension. delete_immediately_notification = { - 'commons':u'The [[:Image:%s]] file seems to have a wrong extension, please check. ~~~~', - 'ar' :u'الملف [[:Image:%s]] يبدو أن امتداده خاطيء, من فضلك تحقق. ~~~~', - 'en' :u'The [[:Image:%s]] file seems to have a wrong extension, please check. ~~~~', - 'it' :u'{{subst:Utente:Filbot/Ext|%s}} --~~~~', - 'hu' :u'A [[:Kép:%s]] fájlnak rossz a kiterjesztése, kérlek ellenőrízd. ~~~~', - 'ta' :u'[[:படிமம்:%s]] இனங்காணப்படாத கோப்பு நீட்சியை கொண்டுள்ளது தயவு செய்து ஒரு முறை சரி பார்க்கவும் ~~~~', - 'zh' :u'您好,你上傳的[[:Image:%s]]無法被識別,請檢查您的檔案,謝謝。--~~~~', - } + 'commons':u'The [[:Image:%s]] file seems to have a wrong extension, please check. ~~~~', + 'ar' :u'الملف [[:Image:%s]] يبدو أن امتداده خاطيء, من فضلك تحقق. ~~~~', + 'en' :u'The [[:Image:%s]] file seems to have a wrong extension, please check. ~~~~', + 'it' :u'{{subst:Utente:Filbot/Ext|%s}} --~~~~', + 'hu' :u'A [[:Kép:%s]] fájlnak rossz a kiterjesztése, kérlek ellenőrízd. ~~~~', + 'ta' :u'[[:படிமம்:%s]] இனங்காணப்படாத கோப்பு நீட்சியை கொண்டுள்ளது தயவு செய்து ஒரு முறை சரி பார்க்கவும் ~~~~', + 'zh' :u'您好,你上傳的[[:Image:%s]]無法被識別,請檢查您的檔案,謝謝。--~~~~', + } # Summary of the delate immediately. (f.e: Adding {{db-meta|The file has .%s as extension.}}) del_comm = { - 'commons':u'Bot: Adding %s', - 'ar' :u'بوت: إضافة %s', - 'en' :u'Bot: Adding %s', - 'it' :u'Bot: Aggiungo %s', - 'ja' :u'ロボットによる: 追加 %s', - 'hu' :u'Robot:"%s" hozzáadása', - 'ta' :u'Bot: Adding %s', - 'zh' :u'機器人: 正在新增 %s', - } + 'commons':u'Bot: Adding %s', + 'ar' :u'بوت: إضافة %s', + 'en' :u'Bot: Adding %s', + 'it' :u'Bot: Aggiungo %s', + 'ja' :u'ロボットによる: 追加 %s', + 'hu' :u'Robot:"%s" hozzáadása', + 'ta' :u'Bot: Adding %s', + 'zh' :u'機器人: 正在新增 %s', + }
# This is the most important header, because it will be used a lot. That's the header that the bot # will add if the image hasn't the license. nothing_head = { - 'commons':u"",# Nothing, the template has already the header inside. - 'ar' :u"\n== صورة بدون ترخيص ==\n", - 'en' :u"\n== Image without license ==\n", - 'ja' :u'', - 'it' :u"\n\n== Immagine senza licenza ==\n", - 'hu' :u"\n== Licenc nélküli kép ==\n", - 'ta' :u'', - 'zh' :u'', - } + 'commons':u"",# Nothing, the template has already the header inside. + 'ar' :u"\n== صورة بدون ترخيص ==\n", + 'en' :u"\n== Image without license ==\n", + 'ja' :u'', + 'it' :u"\n\n== Immagine senza licenza ==\n", + 'hu' :u"\n== Licenc nélküli kép ==\n", + 'ta' :u'', + 'zh' :u'', + } # That's the text that the bot will add if it doesn't find the license. nothing_notification = { - 'commons':u"\n{{subst:User:Filnik/untagged|Image:%s}}\n\n''This message was '''added automatically by [[User:Filbot|Filbot]]''', if you need some help about it, ask [[User:Filnik|its master]] or go to the [[Commons:Help desk]]''. --~~~~", - 'en' :u"{{subst:image source|Image:%s}} --~~~~", - 'it' :u"{{subst:Utente:Filbot/Senza licenza|%s}} --~~~~", - 'ja' :u"\n{{subst:image source|Image:%s}}--~~~~", - 'hu' :u"{{subst:adjforrást|Kép:%s}} \n Ezt az üzenetet ~~~ automatikusan helyezte el a vitalapodon, kérdéseddel fordulj a gazdájához, vagy a [[WP:KF|Kocsmafalhoz]]. --~~~~", - 'ta' :u'\n{{subst:Di-no license-notice|படிமம்:%s}} ~~~~ ', - 'zh' :u'\n{{subst:Uploadvionotice|Image:%s}} ~~~~ ', - } + 'commons':u"\n{{subst:User:Filnik/untagged|Image:%s}}\n\n''This message was '''added automatically by [[User:Filbot|Filbot]]''', if you need some help about it, ask [[User:Filnik|its master]] or go to the [[Commons:Help desk]]''. --~~~~", + 'en' :u"{{subst:image source|Image:%s}} --~~~~", + 'it' :u"{{subst:Utente:Filbot/Senza licenza|%s}} --~~~~", + 'ja' :u"\n{{subst:image source|Image:%s}}--~~~~", + 'hu' :u"{{subst:adjforrást|Kép:%s}} \n Ezt az üzenetet ~~~ automatikusan helyezte el a vitalapodon, kérdéseddel fordulj a gazdájához, vagy a [[WP:KF|Kocsmafalhoz]]. --~~~~", + 'ta' :u'\n{{subst:Di-no license-notice|படிமம்:%s}} ~~~~ ', + 'zh' :u'\n{{subst:Uploadvionotice|Image:%s}} ~~~~ ', + }
# This is a list of what bots used this script in your project. # NOTE: YOUR Botnick is automatically added. It's not required to add it twice. bot_list = { - 'commons':['Siebot', 'CommonsDelinker'], - 'en' :['OrphanBot'], - 'it' :['Filbot', 'Nikbot', '.snoopyBot.'], - 'ja' :['alexbot'], - 'ta' :['TrengarasuBOT'], - 'zh' :['alexbot'], - } + 'commons':['Siebot', 'CommonsDelinker'], + 'en' :['OrphanBot'], + 'it' :['Filbot', 'Nikbot', '.snoopyBot.'], + 'ja' :['alexbot'], + 'ta' :['TrengarasuBOT'], + 'zh' :['alexbot'], + }
# The message that the bot will add the second time that find another license problem. second_message_without_license = { - 'commons':None, - 'en': None, - 'it':u':{{subst:Utente:Filbot/Senza licenza2|%s}} --~~~~', - 'hu':u'\nSzia! Úgy tűnik a [[:Kép:%s]] képpel is hasonló a probléma, mint az előbbivel. Kérlek olvasd el a [[WP:KÉPLIC|feltölthető képek]]ről szóló oldalunk, és segítségért fordulj a [[WP:KF-JO|Jogi kocsmafalhoz]]. Köszönöm --~~~~', - 'ja':None, - 'ta':None, - 'zh':None, - } + 'commons':None, + 'en': None, + 'it':u':{{subst:Utente:Filbot/Senza licenza2|%s}} --~~~~', + 'hu':u'\nSzia! Úgy tűnik a [[:Kép:%s]] képpel is hasonló a probléma, mint az előbbivel. Kérlek olvasd el a [[WP:KÉPLIC|feltölthető képek]]ről szóló oldalunk, és segítségért fordulj a [[WP:KF-JO|Jogi kocsmafalhoz]]. Köszönöm --~~~~', + 'ja':None, + 'ta':None, + 'zh':None, + } # You can add some settings to wikipedia. In this way, you can change them without touch the code. # That's useful if you are running the bot on Toolserver. page_with_settings = { - 'commons':u'User:Filbot/Settings', - 'en':None, - 'hu':None, - 'it':u'Progetto:Coordinamento/Immagini/Bot/Settings#Settings', - 'ja':None, - 'ta':None, - 'zh':u"User:Alexbot/cisettings#Settings", - } + 'commons':u'User:Filbot/Settings', + 'en':None, + 'hu':None, + 'it':u'Progetto:Coordinamento/Immagini/Bot/Settings#Settings', + 'ja':None, + 'ta':None, + 'zh':u"User:Alexbot/cisettings#Settings", + } # The bot can report some images (like the images that have the same name of an image on commons) # This is the page where the bot will store them. report_page = { - 'commons':u'User:Filbot/Report', - 'en' :u'User:Filnik/Report', - 'it' :u'Progetto:Coordinamento/Immagini/Bot/NowCommons', - 'ja' :u'User:Alexbot/report', - 'hu' :u'User:Bdamokos/Report', - 'ta' :u'Trengarasu/commonsimages', - 'zh' :u'User:Alexsh/checkimagereport', - } + 'commons':u'User:Filbot/Report', + 'en' :u'User:Filnik/Report', + 'it' :u'Progetto:Coordinamento/Immagini/Bot/NowCommons', + 'ja' :u'User:Alexbot/report', + 'hu' :u'User:Bdamokos/Report', + 'ta' :u'Trengarasu/commonsimages', + 'zh' :u'User:Alexsh/checkimagereport', + } # Adding the date after the signature. timeselected = u' ~~~~~' # The text added in the report report_text = { - 'commons':u"\n*[[:Image:%s]] " + timeselected, - 'en':u"\n*[[:Image:%s]] " + timeselected, - 'it':u"\n*[[:Immagine:%s]] " + timeselected, - 'ja':u"\n*[[:Immagine:%s]] " + timeselected, - 'hu':u"\n*[[:Kép:%s]] " + timeselected, - 'ta':u"\n*[[:படிமம்:%s]] " + timeselected, - 'zh':u"\n*[[:Image:%s]] " + timeselected, - } + 'commons':u"\n*[[:Image:%s]] " + timeselected, + 'en':u"\n*[[:Image:%s]] " + timeselected, + 'it':u"\n*[[:Immagine:%s]] " + timeselected, + 'ja':u"\n*[[:Immagine:%s]] " + timeselected, + 'hu':u"\n*[[:Kép:%s]] " + timeselected, + 'ta':u"\n*[[:படிமம்:%s]] " + timeselected, + 'zh':u"\n*[[:Image:%s]] " + timeselected, + } # The summary of the report comm10 = { - 'commons':u'Bot: Updating the log', - 'ar':u'بوت: تحديث السجل', - 'en':u'Bot: Updating the log', - 'it':u'Bot: Aggiorno il log', - 'ja': u'ロボットによる:更新', - 'hu': u'Robot: A napló frissítése', - 'ta': u'தானியங்கி:பட்டியலை இற்றைப்படுத்தல்', - 'zh': u'機器人:更新記錄', - } + 'commons':u'Bot: Updating the log', + 'ar':u'بوت: تحديث السجل', + 'en':u'Bot: Updating the log', + 'it':u'Bot: Aggiorno il log', + 'ja': u'ロボットによる:更新', + 'hu': u'Robot: A napló frissítése', + 'ta': u'தானியங்கி:பட்டியலை இற்றைப்படுத்தல்', + 'zh': u'機器人:更新記錄', + }
# If a template isn't a license but it's included on a lot of images, that can be skipped to # analise the image without taking care of it. (the template must be in a list) @@ -281,22 +281,22 @@ # Warning 2: The bot will use regex, make the names compatible, please (don't add "Template:" or {{ # because they are already put in the regex). HiddenTemplate = { - 'commons':['information', 'trademarked', 'trademark'], - 'en':['information'], - 'it':['edp', 'informazioni[ _]file', 'information', 'trademark'], - 'ja':[u'Information'], - 'hu':[u'információ','enwiki', 'azonnali'], - 'ta':[u'information'], - 'zh':[u'information'], - } + 'commons':['information', 'trademarked', 'trademark'], + 'en':['information'], + 'it':['edp', 'informazioni[ _]file', 'information', 'trademark'], + 'ja':[u'Information'], + 'hu':[u'információ','enwiki', 'azonnali'], + 'ta':[u'information'], + 'zh':[u'information'], + }
# Template added when the bot finds only an hidden template and nothing else. HiddenTemplateNotification = { - 'commons': """\n{{subst:User:Filnik/whitetemplate|Image:%s}}\n\n''This message was '''added automatically by [[User:Filbot|Filbot]]''', if you need some help about it, ask [[User:Filnik|its master]] or go to the [[Commons:Help desk]]''. --~~~~""", - 'en': None, - 'it': u"{{subst:Utente:Filbot/Template_insufficiente|%s}} --~~~~", - 'ta': None, - } + 'commons': """\n{{subst:User:Filnik/whitetemplate|Image:%s}}\n\n''This message was '''added automatically by [[User:Filbot|Filbot]]''', if you need some help about it, ask [[User:Filnik|its master]] or go to the [[Commons:Help desk]]''. --~~~~""", + 'en': None, + 'it': u"{{subst:Utente:Filbot/Template_insufficiente|%s}} --~~~~", + 'ta': None, + }
# Add your project (in alphabetical order) if you want that the bot start project_inserted = ['commons', 'en', 'ja', 'hu', 'it', 'ta', 'zh'] @@ -307,762 +307,786 @@ #########################################################################################################################
class LogIsFull(wikipedia.Error): - """An exception indicating that the log is full and the Bot cannot add other data to prevent Errors.""" + """An exception indicating that the log is full and the Bot cannot add other data to prevent Errors."""
class NothingFound(wikipedia.Error): - """ An exception indicating that a regex has return [] instead of results.""" + """ An exception indicating that a regex has return [] instead of results."""
def printWithTimeZone(message): - """ Function to print the messages followed by the TimeZone encoded correctly. """ - if message[-1] != ' ': - message = '%s ' % unicode(message) - time_zone = time.strftime("%d %b %Y %H:%M:%S (UTC)", time.localtime()) - if locale.getlocale()[1]: - time_zone = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()), locale.getlocale()[1]) - else: - time_zone = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime())) - wikipedia.output(u"%s%s" % (message, time_zone)) + """ Function to print the messages followed by the TimeZone encoded correctly. """ + if message[-1] != ' ': + message = '%s ' % unicode(message) + time_zone = time.strftime("%d %b %Y %H:%M:%S (UTC)", time.localtime()) + if locale.getlocale()[1]: + time_zone = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()), locale.getlocale()[1]) + else: + time_zone = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime())) + wikipedia.output(u"%s%s" % (message, time_zone))
def pageText(url): - """ Function used to get HTML text from every reachable URL """ - # When the page is not a wiki-page (as for untagged generator) you need that function - try: - request = urllib2.Request(url) - user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7' - request.add_header("User-Agent", user_agent) - response = urllib2.urlopen(request) - text = response.read() - response.close() - # When you load to many users, urllib2 can give this error. - except urllib2.HTTPError: - printWithTimeZone(u"Server error. Pausing for 10 seconds... ") - time.sleep(10) - request = urllib2.Request(url) - user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7' - request.add_header("User-Agent", user_agent) - response = urllib2.urlopen(request) - text = response.read() - response.close() - return text + """ Function used to get HTML text from every reachable URL """ + # When the page is not a wiki-page (as for untagged generator) you need that function + try: + request = urllib2.Request(url) + user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7' + request.add_header("User-Agent", user_agent) + response = urllib2.urlopen(request) + text = response.read() + response.close() + # When you load to many users, urllib2 can give this error. + except urllib2.HTTPError: + printWithTimeZone(u"Server error. Pausing for 10 seconds... ") + time.sleep(10) + request = urllib2.Request(url) + user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7' + request.add_header("User-Agent", user_agent) + response = urllib2.urlopen(request) + text = response.read() + response.close() + return text
# Here there is the main class. class main: - def __init__(self, site, logFulNumber = 25000): - """ Constructor, define some global variable """ - self.site = site - self.logFulNumber = logFulNumber - self.settings = wikipedia.translate(site, page_with_settings) - self.rep_page = wikipedia.translate(site, report_page) - self.rep_text = wikipedia.translate(site, report_text) - self.com = wikipedia.translate(site, comm10) - # Commento = Summary in italian - self.commento = wikipedia.translate(self.site, comm) - def general(self, newtext, image, notification, head, botolist): - """ This class can be called for two reason. So I need two different constructors, one with common data - and another with the data that I required... maybe it can be added on the other function, but in this way - seems more clear what parameters I need """ - self.newtext = newtext - self.image = image - self.head = head - self.notification = notification - self.botolist = botolist - def put_mex(self, put = True): - """ Function to add the template in the image and to find out - who's the user that has uploaded the image. """ - # Defing the image's Page Object - p = wikipedia.ImagePage(self.site, 'Image:%s' % self.image) - # Get the image's description - try: - testoa = p.get() - except wikipedia.NoPage: - wikipedia.output(u'%s has been deleted...' % p.title()) - # We have a problem! Report and exit! - return False - # You can use this function also to find only the user that - # has upload the image (FixME: Rewrite a bit this part) - if put: - p.put(testoa + self.newtext, comment = self.commento, minorEdit = True) - image_n = self.site.image_namespace() - image_namespace = "%s:" % image_n # Example: "User_talk:" - # paginetta it's the image page object. - paginetta = wikipedia.ImagePage(self.site, image_namespace + self.image) - # I take the data of the latest uploader and I take only the name - imagedata = paginetta.getFileVersionHistory() - #print imagedata # Let it so for de-buggin porpuse (wikipedia.output gives error) - # When an Image is deleted from Commons and someone has add something in the wikipedia page - # The bot doesn't catch the data properly :-) - if imagedata == list(): - wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image) - repme = "\n*[[:Image:%s]] seems to have problems ('''no data found in the image''')" - self.report_image(self.image, self.rep_page, self.com, repme) - # We have a problem! Report and exit! - return False - try: - nick = paginetta.getFileVersionHistory()[-1][1] - except IndexError: - wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image) - repme = "\n*[[:Image:%s]] seems to have problems ('''no data found in the image''')" - # We have a problem! Report and exit! - self.report_image(self.image, self.rep_page, self.com, repme) - return False - luser = wikipedia.url2link(nick, self.site, self.site) - pagina_discussione = "%s:%s" % (self.site.namespace(3), luser) - # Defing the talk page (pagina_discussione = talk_page ^__^ ) - talk_page = wikipedia.Page(self.site, pagina_discussione) - self.talk_page = talk_page - return True - # There is the function to put the advise in talk page. - def put_talk(self, notification, head, notification2 = None, commx = None): - commento2 = wikipedia.translate(self.site, comm2) - talk_page = self.talk_page - notification = self.notification - if notification2 == None: - notification2 = notification - else: - notification2 = notification2 % self.image - head = self.head - second_text = False - # Getting the talk page's history, to check if there is another advise... - # The try block is used to prevent error if you use an old wikipedia.py's version. - edit_to_load = 10 - if talk_page.exists(): - try: - history = talk_page.getVersionHistory(False, False, False, edit_to_load) - except TypeError: - history = talk_page.getVersionHistory(False, False, False) - latest_edit = history[0] - latest_user = latest_edit[2] - wikipedia.output(u'The latest user that has written something is: %s' % latest_user) - else: - wikipedia.output(u'The user page is blank') + def __init__(self, site, logFulNumber = 25000): + """ Constructor, define some global variable """ + self.site = site + self.logFulNumber = logFulNumber + self.settings = wikipedia.translate(site, page_with_settings) + self.rep_page = wikipedia.translate(site, report_page) + self.rep_text = wikipedia.translate(site, report_text) + self.com = wikipedia.translate(site, comm10) + # Commento = Summary in italian + self.commento = wikipedia.translate(self.site, comm) + def general(self, newtext, image, notification, head, botolist): + """ This class can be called for two reason. So I need two different constructors, one with common data + and another with the data that I required... maybe it can be added on the other function, but in this way + seems more clear what parameters I need """ + self.newtext = newtext + self.image = image + self.head = head + self.notification = notification + self.botolist = botolist + def put_mex(self, put = True): + """ Function to add the template in the image and to find out + who's the user that has uploaded the image. """ + # Defing the image's Page Object + p = wikipedia.ImagePage(self.site, 'Image:%s' % self.image) + # Get the image's description + try: + testoa = p.get() + except wikipedia.NoPage: + wikipedia.output(u'%s has been deleted...' % p.title()) + # We have a problem! Report and exit! + return False + # You can use this function also to find only the user that + # has upload the image (FixME: Rewrite a bit this part) + if put: + p.put(testoa + self.newtext, comment = self.commento, minorEdit = True) + image_n = self.site.image_namespace() + image_namespace = "%s:" % image_n # Example: "User_talk:" + # paginetta it's the image page object. + paginetta = wikipedia.ImagePage(self.site, image_namespace + self.image) + # I take the data of the latest uploader and I take only the name + imagedata = paginetta.getFileVersionHistory() + #print imagedata # Let it so for de-buggin porpuse (wikipedia.output gives error) + # When an Image is deleted from Commons and someone has add something in the wikipedia page + # The bot doesn't catch the data properly :-) + if imagedata == list(): + wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image) + repme = "\n*[[:Image:%s]] seems to have problems ('''no data found in the image''')" + self.report_image(self.image, self.rep_page, self.com, repme) + # We have a problem! Report and exit! + return False + try: + nick = paginetta.getFileVersionHistory()[-1][1] + except IndexError: + wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image) + repme = "\n*[[:Image:%s]] seems to have problems ('''no data found in the image''')" + # We have a problem! Report and exit! + self.report_image(self.image, self.rep_page, self.com, repme) + return False + luser = wikipedia.url2link(nick, self.site, self.site) + pagina_discussione = "%s:%s" % (self.site.namespace(3), luser) + # Defing the talk page (pagina_discussione = talk_page ^__^ ) + talk_page = wikipedia.Page(self.site, pagina_discussione) + self.talk_page = talk_page + return True + # There is the function to put the advise in talk page. + def put_talk(self, notification, head, notification2 = None, commx = None): + commento2 = wikipedia.translate(self.site, comm2) + talk_page = self.talk_page + notification = self.notification + if notification2 == None: + notification2 = notification + else: + notification2 = notification2 % self.image + head = self.head + second_text = False + # Getting the talk page's history, to check if there is another advise... + # The try block is used to prevent error if you use an old wikipedia.py's version. + edit_to_load = 10 + if talk_page.exists(): + try: + history = talk_page.getVersionHistory(False, False, False, edit_to_load) + except TypeError: + history = talk_page.getVersionHistory(False, False, False) + latest_edit = history[0] + latest_user = latest_edit[2] + wikipedia.output(u'The latest user that has written something is: %s' % latest_user) + else: + wikipedia.output(u'The user page is blank')
- if talk_page.exists(): - testoattuale = talk_page.get() # Actual text - project = self.site.family.name - bot = config.usernames[project] - botnick = bot[self.site.lang] - botolist = self.botolist + [botnick] - for i in botolist: - if latest_user == i: - second_text = True - # A block to prevent the second message if the bot also welcomed users... - if latest_edit == history[-1]: - second_text = False - else: - second_text = False - ti_es_ti = wikipedia.translate(self.site, empty) - testoattuale = ti_es_ti - if commx == None: - commentox = commento2 - else: - commentox = commx - if second_text == True: - talk_page.put("%s\n\n%s" % (testoattuale, notification2), comment = commentox, minorEdit = False) - elif second_text == False: - talk_page.put(testoattuale + head + notification, comment = commentox, minorEdit = False) + if talk_page.exists(): + try: + testoattuale = talk_page.get() # Actual text + except wikipedia.IsRedirectPage: + wikipedia.output(u'The user talk is a redirect, trying to get the right talk...') + try: + talk_page = talk_page.getRedirectTarget() + testoattuale = talk_page.get() + except wikipedia.NoPage: + second_text = False + ti_es_ti = wikipedia.translate(self.site, empty) + testoattuale = ti_es_ti + project = self.site.family.name + bot = config.usernames[project] + botnick = bot[self.site.lang] + botolist = self.botolist + [botnick] + for i in botolist: + if latest_user == i: + second_text = True + # A block to prevent the second message if the bot also welcomed users... + if latest_edit == history[-1]: + second_text = False + else: + second_text = False + ti_es_ti = wikipedia.translate(self.site, empty) + testoattuale = ti_es_ti + if commx == None: + commentox = commento2 + else: + commentox = commx + if second_text == True: + talk_page.put("%s\n\n%s" % (testoattuale, notification2), comment = commentox, minorEdit = False) + elif second_text == False: + talk_page.put(testoattuale + head + notification, comment = commentox, minorEdit = False) - def untaggedGenerator(self, untaggedProject, limit): - lang = untaggedProject.split('.', 1)[0] - project = '.%s' % untaggedProject.split('.', 1)[1] - if lang == 'commons': - link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikifam=commo...' - else: - link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikilang=%s&a...' % (lang, project, limit, limit) - text = pageText(link) - #print text - regexp = r"""<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>""" - results = re.findall(regexp, text) - if results == []: - print link - raise NothingFound('Nothing found! Try to use the tool by yourself to be sure that it works!') - else: - for result in results: - wikiPage = wikipedia.Page(self.site, result) - yield wikiPage + def untaggedGenerator(self, untaggedProject, limit): + lang = untaggedProject.split('.', 1)[0] + project = '.%s' % untaggedProject.split('.', 1)[1] + if lang == 'commons': + link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikifam=commo...' + else: + link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikilang=%s&a...' % (lang, project, limit, limit) + text = pageText(link) + regexp = r"""<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>""" + results = re.findall(regexp, text) + if results == []: + wikipedia.output(link) + raise NothingFound('Nothing found! Try to use the tool by yourself to be sure that it works!') + else: + for result in results: + wikiPage = wikipedia.Page(self.site, result) + yield wikiPage - def regexGenerator(self, regexp, textrun): - pos = 0 - done = list() - ext_list = list() - r = re.compile(r'%s' % regexp, re.UNICODE|re.M) - while 1: - m = r.search(textrun, pos) - if m == None: - wikipedia.output(u"\t\t>> All images checked. <<") - break - pos = m.end() - image = m.group(1) - if image not in done: - done.append(image) - yield image - #continue + def regexGenerator(self, regexp, textrun): + pos = 0 + done = list() + ext_list = list() + r = re.compile(r'%s' % regexp, re.UNICODE|re.M) + while 1: + m = r.search(textrun, pos) + if m == None: + wikipedia.output(u"\t\t>> All images checked. <<") + break + pos = m.end() + image = m.group(1) + if image not in done: + done.append(image) + yield image + #continue
- def checkImage(self, image): - self.image = image - # Search regular expression to find links like this (and the class attribute is optional too) - # title="Immagine:Nvidia.jpg" - wikipedia.output(u'Checking if %s is on commons...' % image) - commons = wikipedia.getSite('commons', 'commons') - if wikipedia.Page(commons, u'Image:%s' % image).exists(): - wikipedia.output(u'%s is on commons!' % image) - imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % image) - on_commons_text = imagePage.getImagePageHtml() - if "<div class='sharedUploadNotice'>" in on_commons_text: - wikipedia.output(u"But, the image doesn't exist on your project! Skip...") - # Problems? Yes! We have to skip the check part for that image! - # Because it's on commons but someone has added something on your project. - return False - elif 'stemma' in image.lower() and self.site.lang == 'it': - wikipedia.output(u'%s has "stemma" inside, means that it's ok.' % image) - return True # Problems? No, it's only not on commons but the image needs a check - else: - repme = "\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]]" - self.report_image(self.image, self.rep_page, self.com, repme) - # Problems? No, return True - return True - else: - # Problems? No, return True - return True - def report_image(self, image, rep_page = None, com = None, rep_text = None): - if rep_page == None: - rep_page = self.rep_page - if com == None: - com = self.com - if rep_text == None: - rep_text = self.rep_text - another_page = wikipedia.Page(self.site, rep_page) + def checkImage(self, image): + self.image = image + # Search regular expression to find links like this (and the class attribute is optional too) + # title="Immagine:Nvidia.jpg" + wikipedia.output(u'Checking if %s is on commons...' % image) + commons = wikipedia.getSite('commons', 'commons') + if wikipedia.Page(commons, u'Image:%s' % image).exists(): + wikipedia.output(u'%s is on commons!' % image) + imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % image) + on_commons_text = imagePage.getImagePageHtml() + if "<div class='sharedUploadNotice'>" in on_commons_text: + wikipedia.output(u"But, the image doesn't exist on your project! Skip...") + # Problems? Yes! We have to skip the check part for that image! + # Because it's on commons but someone has added something on your project. + return False + elif 'stemma' in image.lower() and self.site.lang == 'it': + wikipedia.output(u'%s has "stemma" inside, means that it's ok.' % image) + return True # Problems? No, it's only not on commons but the image needs a check + else: + repme = "\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]]" + self.report_image(self.image, self.rep_page, self.com, repme) + # Problems? No, return True + return True + else: + # Problems? No, return True + return True + def report_image(self, image, rep_page = None, com = None, rep_text = None): + if rep_page == None: + rep_page = self.rep_page + if com == None: + com = self.com + if rep_text == None: + rep_text = self.rep_text + another_page = wikipedia.Page(self.site, rep_page) - if another_page.exists(): - text_get = another_page.get() - else: - text_get = str() - if len(text_get) >= self.logFulNumber: - raise LogIsFull("The log page (%s) is full! Please delete the old images reported." % another_page.title()) - pos = 0 - # The talk page includes "_" between the two names, in this way i replace them to " " - regex = image - n = re.compile(regex, re.UNICODE) - y = n.search(text_get, pos) - if y == None: - # Adding the log :) - if "'''Commons'''" in rep_text: - rep_text = rep_text % (image, image) - else: - rep_text = rep_text % image - another_page.put(text_get + rep_text, comment = com, minorEdit = False) - wikipedia.output(u"...Reported...") - reported = True - else: - pos = y.end() - wikipedia.output(u"%s is already in the report page." % image) - reported = False - return reported + if another_page.exists(): + text_get = another_page.get() + else: + text_get = str() + if len(text_get) >= self.logFulNumber: + raise LogIsFull("The log page (%s) is full! Please delete the old images reported." % another_page.title()) + pos = 0 + # The talk page includes "_" between the two names, in this way i replace them to " " + regex = image + n = re.compile(regex, re.UNICODE) + y = n.search(text_get, pos) + if y == None: + # Adding the log :) + if "'''Commons'''" in rep_text: + rep_text = rep_text % (image, image) + else: + rep_text = rep_text % image + another_page.put(text_get + rep_text, comment = com, minorEdit = False) + wikipedia.output(u"...Reported...") + reported = True + else: + pos = y.end() + wikipedia.output(u"%s is already in the report page." % image) + reported = False + return reported - def takesettings(self): - pos = 0 - if self.settings == None: lista = None - else: - x = wikipedia.Page(self.site, self.settings) - lista = list() - try: - testo = x.get() - rxp = "<------- ------->\n*[Nn]ame ?= ?['"](.*?)['"]\n*([Ff]ind|[Ff]indonly)=(.*?)\n*[Ii]magechanges=(.*?)\n*[Ss]ummary=['"](.*?)['"]\n*[Hh]ead=['"](.*?)['"]\n*[Tt]ext ?= ?['"](.*?)['"]\n*[Mm]ex ?= ?['"]?(.*?)['"]?$" - r = re.compile(rxp, re.UNICODE|re.M) - number = 1 - while 1: - m = r.search(testo, pos) - if m == None: - if lista == list(): - wikipedia.output(u"You've set wrongly your settings, please take a look to the relative page. (run without them)") - lista = None - else: - break - else: - pos = m.end() - name = str(m.group(1)) - find_tipe = str(m.group(2)) - find = str(m.group(3)) - imagechanges = str(m.group(4)) - summary = str(m.group(5)) - head = str(m.group(6)) - text = str(m.group(7)) - mexcatched = str(m.group(8)) - tupla = [number, name, find_tipe, find, imagechanges, summary, head, text, mexcatched] - lista += [tupla] - number += 1 - except wikipedia.NoPage: - wikipedia.output(u"The settings' page doesn't exist!") - lista = None - return lista + def takesettings(self): + pos = 0 + if self.settings == None: lista = None + else: + x = wikipedia.Page(self.site, self.settings) + lista = list() + try: + testo = x.get() + rxp = "<------- ------->\n*[Nn]ame ?= ?['"](.*?)['"]\n*([Ff]ind|[Ff]indonly)=(.*?)\n*[Ii]magechanges=(.*?)\n*[Ss]ummary=['"](.*?)['"]\n*[Hh]ead=['"](.*?)['"]\n*[Tt]ext ?= ?['"](.*?)['"]\n*[Mm]ex ?= ?['"]?(.*?)['"]?$" + r = re.compile(rxp, re.UNICODE|re.M) + number = 1 + while 1: + m = r.search(testo, pos) + if m == None: + if lista == list(): + wikipedia.output(u"You've set wrongly your settings, please take a look to the relative page. (run without them)") + lista = None + else: + break + else: + pos = m.end() + name = str(m.group(1)) + find_tipe = str(m.group(2)) + find = str(m.group(3)) + imagechanges = str(m.group(4)) + summary = str(m.group(5)) + head = str(m.group(6)) + text = str(m.group(7)) + mexcatched = str(m.group(8)) + tupla = [number, name, find_tipe, find, imagechanges, summary, head, text, mexcatched] + lista += [tupla] + number += 1 + except wikipedia.NoPage: + wikipedia.output(u"The settings' page doesn't exist!") + lista = None + return lista - def load(self, raw): - list_loaded = list() - pos = 0 - load_2 = True - # I search with a regex how many user have not the talk page - # and i put them in a list (i find it more easy and secure) - while 1: - regl = "("|')(.*?)("|')(, |])" - pl = re.compile(regl, re.UNICODE) - xl = pl.search(raw, pos) - if xl == None: - if len(list_loaded) >= 1: - return list_loaded - break - elif len(done) == 0: - break - pos = xl.end() - word = xl.group(2) - if word not in list_loaded: - list_loaded.append(word) + def load(self, raw): + list_loaded = list() + pos = 0 + load_2 = True + # I search with a regex how many user have not the talk page + # and i put them in a list (i find it more easy and secure) + while 1: + regl = "("|')(.*?)("|')(, |])" + pl = re.compile(regl, re.UNICODE) + xl = pl.search(raw, pos) + if xl == None: + if len(list_loaded) >= 1: + return list_loaded + break + elif len(done) == 0: + break + pos = xl.end() + word = xl.group(2) + if word not in list_loaded: + list_loaded.append(word)
# I've seen that the report class before (the main) was to long to be called so, # here there is a function that has all the settings, so i can call it once ^__^ def report(newtext, image, notification, head, notification2 = None, unver = True, commx = None, bot_list = bot_list): - botolist = wikipedia.translate(wikipedia.getSite(), bot_list) - while 1: - run = main(site = wikipedia.getSite()) - secondrun = run.general(newtext, image, notification, head, botolist) - if unver == True: - try: - resPutMex = run.put_mex() - except wikipedia.NoPage: - wikipedia.output(u"The page has been deleted! Skip!") - break - except wikipedia.EditConflict: - wikipedia.output(u"Edit conflict! Skip!") - break - else: - if resPutMex == False: - break - else: - try: - resPutMex = run.put_mex(False) - except wikipedia.NoPage: - wikipedia.output(u"The page has been deleted!") - break - except wikipedia.EditConflict: - wikipedia.output(u"Edit conflict! Skip!") - break - else: - if resPutMex == False: - break - try: - run.put_talk(notification, head, notification2, commx) - except wikipedia.EditConflict: - wikipedia.output(u"Edit Conflict! Retrying...") - try: - run.put_talk(notification, head, notification2, commx) - except: - wikipedia.output(u"Another error... skipping the user..") - break - else: - break + botolist = wikipedia.translate(wikipedia.getSite(), bot_list) + while 1: + run = main(site = wikipedia.getSite()) + secondrun = run.general(newtext, image, notification, head, botolist) + if unver == True: + try: + resPutMex = run.put_mex() + except wikipedia.NoPage: + wikipedia.output(u"The page has been deleted! Skip!") + break + except wikipedia.EditConflict: + wikipedia.output(u"Edit conflict! Skip!") + break + else: + if resPutMex == False: + break + else: + try: + resPutMex = run.put_mex(False) + except wikipedia.NoPage: + wikipedia.output(u"The page has been deleted!") + break + except wikipedia.EditConflict: + wikipedia.output(u"Edit conflict! Skip!") + break + else: + if resPutMex == False: + break + try: + run.put_talk(notification, head, notification2, commx) + except wikipedia.EditConflict: + wikipedia.output(u"Edit Conflict! Retrying...") + try: + run.put_talk(notification, head, notification2, commx) + except: + wikipedia.output(u"Another error... skipping the user..") + break + else: + break
def checkbot(): - # Command line configurable parameters - repeat = True # Restart after having check all the images? - limit = 80 # How many images check? - time_sleep = 30 # How many time sleep after the check? - skip_number = 0 # How many images to skip before checking? - wait_number = 0 # How many time sleep before the check? - commonsActive = False # Check if on commons there's an image with the same name? - normal = False # Check the new images or use another generator? - urlUsed = False # Use the url-related function instead of the new-pages generator - regexGen = False # Use the regex generator - untagged = False # Use the untagged generator - skip_list = list() # Inizialize the skip list used below + # Command line configurable parameters + repeat = True # Restart after having check all the images? + limit = 80 # How many images check? + time_sleep = 30 # How many time sleep after the check? + skip_number = 0 # How many images to skip before checking? + wait_number = 0 # How many time sleep before the check? + commonsActive = False # Check if on commons there's an image with the same name? + normal = False # Check the new images or use another generator? + urlUsed = False # Use the url-related function instead of the new-pages generator + regexGen = False # Use the regex generator + untagged = False # Use the untagged generator + skip_list = list() # Inizialize the skip list used below
- # Here below there are the parameters. - for arg in wikipedia.handleArgs(): - if arg.startswith('-limit'): - if len(arg) == 7: - limit = int(wikipedia.input(u'How many images do you want to check?')) - else: - limit = int(arg[7:]) - if arg.startswith('-time'): - if len(arg) == 5: - time_sleep = int(wikipedia.input(u'How many seconds do you want runs to be apart?')) - else: - time_sleep = int(arg[6:]) - elif arg == '-break': - repeat = False - elif arg == '-commons': - commonsActive = True - elif arg.startswith('-skip'): - if len(arg) == 5: - skip = True - skip_number = int(wikipedia.input(u'How many images do you want to skip?')) - elif len(arg) > 5: - skip = True - skip_number = int(arg[6:]) - elif arg.startswith('-wait'): - if len(arg) == 5: - wait = True - wait_number = int(wikipedia.input(u'How many time do you want to wait before checking the images?')) - elif len(arg) > 5: - wait = True - wait_number = int(arg[6:]) - elif arg.startswith('-start'): - if len(arg) == 6: - firstPageTitle = str(wikipedia.input(u'From witch page do you want to start?')) - elif len(arg) > 6: - firstPageTitle = str(arg[7:]) - generator = wikipedia.getSite().allpages(start='Image:%s' % firstPageTitle) - repeat = False - elif arg.startswith('-page'): - if len(arg) == 5: - regexPageName = str(wikipedia.input(u'Which page do you want to use for the regex?')) - elif len(arg) > 5: - regexPageName = str(arg[6:]) - repeat = False - regexGen = True - elif arg.startswith('-url'): - if len(arg) == 4: - regexPageUrl = str(wikipedia.input(u'Which url do you want to use for the regex?')) - elif len(arg) > 4: - regexPageUrl = str(arg[5:]) - urlUsed = True - repeat = False - regexGen = True - elif arg.startswith('-regex'): - if len(arg) == 6: - regexpToUse = str(wikipedia.input(u'Which regex do you want to use?')) - elif len(arg) > 6: - regexpToUse = str(arg[7:]) - generator = 'regex' - repeat = False - elif arg.startswith('-cat'): - if len(arg) == 4: - catName = str(wikipedia.input(u'In which category do I work?')) - elif len(arg) > 4: - catName = str(arg[5:]) - catSelected = catlib.Category(wikipedia.getSite(), 'Category:%s' % catName) - generator = pagegenerators.CategorizedPageGenerator(catSelected) - repeat = False - elif arg.startswith('-ref'): - if len(arg) == 4: - refName = str(wikipedia.input(u'The references of what page should I parse?')) - elif len(arg) > 4: - refName = str(arg[5:]) - generator = pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), refName)) - repeat = False - elif arg.startswith('-untagged'): - untagged = True - if len(arg) == 9: - projectUntagged = str(wikipedia.input(u'In which project should I work?')) - elif len(arg) > 9: - projectUntagged = str(arg[10:]) + # Here below there are the parameters. + for arg in wikipedia.handleArgs(): + if arg.startswith('-limit'): + if len(arg) == 7: + limit = int(wikipedia.input(u'How many images do you want to check?')) + else: + limit = int(arg[7:]) + if arg.startswith('-time'): + if len(arg) == 5: + time_sleep = int(wikipedia.input(u'How many seconds do you want runs to be apart?')) + else: + time_sleep = int(arg[6:]) + elif arg == '-break': + repeat = False + elif arg == '-commons': + commonsActive = True + elif arg.startswith('-skip'): + if len(arg) == 5: + skip = True + skip_number = int(wikipedia.input(u'How many images do you want to skip?')) + elif len(arg) > 5: + skip = True + skip_number = int(arg[6:]) + elif arg.startswith('-wait'): + if len(arg) == 5: + wait = True + wait_number = int(wikipedia.input(u'How many time do you want to wait before checking the images?')) + elif len(arg) > 5: + wait = True + wait_number = int(arg[6:]) + elif arg.startswith('-start'): + if len(arg) == 6: + firstPageTitle = str(wikipedia.input(u'From witch page do you want to start?')) + elif len(arg) > 6: + firstPageTitle = str(arg[7:]) + generator = wikipedia.getSite().allpages(start='Image:%s' % firstPageTitle) + repeat = False + elif arg.startswith('-page'): + if len(arg) == 5: + regexPageName = str(wikipedia.input(u'Which page do you want to use for the regex?')) + elif len(arg) > 5: + regexPageName = str(arg[6:]) + repeat = False + regexGen = True + elif arg.startswith('-url'): + if len(arg) == 4: + regexPageUrl = str(wikipedia.input(u'Which url do you want to use for the regex?')) + elif len(arg) > 4: + regexPageUrl = str(arg[5:]) + urlUsed = True + repeat = False + regexGen = True + elif arg.startswith('-regex'): + if len(arg) == 6: + regexpToUse = str(wikipedia.input(u'Which regex do you want to use?')) + elif len(arg) > 6: + regexpToUse = str(arg[7:]) + generator = 'regex' + repeat = False + elif arg.startswith('-cat'): + if len(arg) == 4: + catName = str(wikipedia.input(u'In which category do I work?')) + elif len(arg) > 4: + catName = str(arg[5:]) + catSelected = catlib.Category(wikipedia.getSite(), 'Category:%s' % catName) + generator = pagegenerators.CategorizedPageGenerator(catSelected) + repeat = False + elif arg.startswith('-ref'): + if len(arg) == 4: + refName = str(wikipedia.input(u'The references of what page should I parse?')) + elif len(arg) > 4: + refName = str(arg[5:]) + generator = pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), refName)) + repeat = False + elif arg.startswith('-untagged'): + untagged = True + if len(arg) == 9: + projectUntagged = str(wikipedia.input(u'In which project should I work?')) + elif len(arg) > 9: + projectUntagged = str(arg[10:])
- # Understand if the generator it's the default or not. - try: - generator - except NameError: - normal = True + # Understand if the generator it's the default or not. + try: + generator + except NameError: + normal = True
- # Define the site. - site = wikipedia.getSite() + # Define the site. + site = wikipedia.getSite()
- # Block of text to translate the parameters set above. - image_n = site.image_namespace() - image_namespace = "%s:" % image_n # Example: "User_talk:" - unvertext = wikipedia.translate(site, n_txt) - di = wikipedia.translate(site, delete_immediately) - dih = wikipedia.translate(site, delete_immediately_head) - din = wikipedia.translate(site, delete_immediately_notification) - nh = wikipedia.translate(site, nothing_head) - nn = wikipedia.translate(site, nothing_notification) - dels = wikipedia.translate(site, del_comm) - smwl = wikipedia.translate(site, second_message_without_license) - TextFind = wikipedia.translate(site, txt_find) - hiddentemplate = wikipedia.translate(site, HiddenTemplate) - # If there's an hidden template, change the used - HiddenTN = wikipedia.translate(site, HiddenTemplateNotification) - # A template as {{en is not a license! Adding also them in the whitelist template... - for langK in wikipedia.Family('wikipedia').langs.keys(): - hiddentemplate.append('%s' % langK) + # Block of text to translate the parameters set above. + image_n = site.image_namespace() + image_namespace = "%s:" % image_n # Example: "User_talk:" + unvertext = wikipedia.translate(site, n_txt) + di = wikipedia.translate(site, delete_immediately) + dih = wikipedia.translate(site, delete_immediately_head) + din = wikipedia.translate(site, delete_immediately_notification) + nh = wikipedia.translate(site, nothing_head) + nn = wikipedia.translate(site, nothing_notification) + dels = wikipedia.translate(site, del_comm) + smwl = wikipedia.translate(site, second_message_without_license) + TextFind = wikipedia.translate(site, txt_find) + hiddentemplate = wikipedia.translate(site, HiddenTemplate) + # If there's an hidden template, change the used + HiddenTN = wikipedia.translate(site, HiddenTemplateNotification) + # A template as {{en is not a license! Adding also them in the whitelist template... + for langK in wikipedia.Family('wikipedia').langs.keys(): + hiddentemplate.append('%s' % langK)
- # If the images to skip are 0, set the skip variable to False (the same for the wait time) - if skip_number == 0: - skip = False - if wait_number == 0: - wait = False - # nothing = Defining an empty image description - nothing = ['', ' ', ' ', ' ', '\n', '\n ', '\n ', '\n\n', '\n \n', ' \n', ' \n ', ' \n \n'] - # something = Minimal requirements for an image description. - # If this fits, no tagging will take place (if there aren't other issues) - # MIT license is ok on italian wikipedia, let also this here - something = ['{{', "'''MIT license'''"] # Don't put "}}" here, please. Useless and can give problems. - # Unused file extensions. Does not contain PDF. - notallowed = ("xcf", "xls", "sxw", "sxi", "sxc", "sxd") + # If the images to skip are 0, set the skip variable to False (the same for the wait time) + if skip_number == 0: + skip = False + if wait_number == 0: + wait = False + # nothing = Defining an empty image description + nothing = ['', ' ', ' ', ' ', '\n', '\n ', '\n ', '\n\n', '\n \n', ' \n', ' \n ', ' \n \n'] + # something = Minimal requirements for an image description. + # If this fits, no tagging will take place (if there aren't other issues) + # MIT license is ok on italian wikipedia, let also this here + something = ['{{', "'''MIT license'''"] # Don't put "}}" here, please. Useless and can give problems. + # Unused file extensions. Does not contain PDF. + notallowed = ("xcf", "xls", "sxw", "sxi", "sxc", "sxd")
- # A little block-statement to ensure that the bot will not start with en-parameters - if site.lang not in project_inserted: - wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!") - wikipedia.stopme() - # Some formatting for delete immediately template - di = '\n%s' % di - dels = dels % di + # A little block-statement to ensure that the bot will not start with en-parameters + if site.lang not in project_inserted: + wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!") + wikipedia.stopme() + # Some formatting for delete immediately template + di = '\n%s' % di + dels = dels % di
- # Reading the log of the new images if another generator is not given. + # Reading the log of the new images if another generator is not given. + if normal == True: + if limit == 1: + wikipedia.output(u"Retrieving the latest file for checking...") + else: + wikipedia.output(u"Retrieving the latest %d files for checking..." % limit) + # Main Loop + while 1: + # Defing the Main Class. + mainClass = main(site) + # Untagged is True? Let's take that generator + if untagged == True: + generator = mainClass.untaggedGenerator(projectUntagged, limit) + normal = False # Ensure that normal is False + # Normal True? Take the default generator if normal == True: - if limit == 1: - wikipedia.output(u"Retrieving the latest file for checking...") + generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site) + # if urlUsed and regexGen, get the source for the generator + if urlUsed == True and regexGen == True: + textRegex = pagetext(regexPageUrl) + # Not an url but a wiki page as "source" for the regex + elif regexGen == True: + pageRegex = wikipedia.Page(site, regexPageName) + try: + textRegex = pageRegex.get() + except wikipedia.NoPage: + wikipedia.output(u"%s doesn't exist!" % page.title()) + textRegex = '' # No source, so the bot will quit later. + # If generator is the regex' one, use your own Generator using an url or page and a regex. + if generator == 'regex' and regexGen == True: + generator = mainClass.regexGenerator(regexpToUse, textRegex) + # Ok, We (should) have a generator, so let's go on. + try: + # Take the additional settings for the Project + tupla_written = mainClass.takesettings() + except wikipedia.Error: + # Error? Settings = None + wikipedia.output(u'Problems with loading the settigs, run without them.') + tupla_written = None + some_problem = False + # Ensure that if the list given is empty it will be converted to "None" + # (but it should be already done in the takesettings() function) + if tupla_written == []: tupla_written = None + # Real-Time page loaded + if tupla_written != None: wikipedia.output(u'\t >> Loaded the real-time page... <<') + # No settings found, No problem, continue. + else: wikipedia.output(u'\t >> No additional settings found! <<') + # Not the main, but the most important loop. + #parsed = False + for image in generator: + + # When you've a lot of image to skip before working use this workaround, otherwise + # let this commented, thanks. [ decoment also parsed = False if you want to use it + # + #if image.title() != u'Immagine:Nytlogo379x64.gif' and not parsed: + # wikipedia.output(u"%s already parsed." % image.title()) + # continue + #else: + # parsed = True + + # If I don't inizialize the generator, wait part and skip part are useless + if wait: + printWithTimeZone(u'Waiting %s seconds before checking the images,' % wait_number) + # Let's sleep... + time.sleep(wait_number) + # Never sleep again (we are in a loop) + wait = False + # If the generator returns something that is not an image, simply skip it. + if normal == False and regexGen == False: + if image_namespace.lower() not in image.title().lower() and \ + 'image:' not in image.title().lower(): + wikipedia.output(u'%s seems not an image, skip it...' % image.title()) + continue + try: + imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here) + except IndexError:# Namespace image not found, that's not an image! Let's skip... + wikipedia.output(u"%s is not an image, skipping..." % image.title()) + continue + # Skip block + if skip == True: + # If the images to skip are more the images to check, make them the same number + if skip_number > limit: skip_number = limit + # Print a starting message only if no images has been skipped + if skip_list == []: + if skip_number == 1: + wikipedia.output(u'Skipping the first image:\n') + else: + wikipedia.output(u'Skipping the first %s images:\n' % skip_number) + # If we still have pages to skip: + if len(skip_list) < skip_number: + wikipedia.output(u'Skipping %s...' % imageName) + skip_list.append(imageName) + if skip_number == 1: + wikipedia.output('') + skip = False + continue else: - wikipedia.output(u"Retrieving the latest %d files for checking..." % limit) - # Main Loop - while 1: - # Defing the Main Class. - mainClass = main(site) - # Untagged is True? Let's take that generator - if untagged == True: - generator = mainClass.untaggedGenerator(projectUntagged, limit) - normal = False # Ensure that normal is False - # Normal True? Take the default generator - if normal == True: - generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site) - # if urlUsed and regexGen, get the source for the generator - if urlUsed == True and regexGen == True: - textRegex = pagetext(regexPageUrl) - # Not an url but a wiki page as "source" for the regex - elif regexGen == True: - pageRegex = wikipedia.Page(site, regexPageName) - try: - textRegex = pageRegex.get() - except wikipedia.NoPage: - wikipedia.output(u"%s doesn't exist!" % page.title()) - textRegex = '' # No source, so the bot will quit later. - # If generator is the regex' one, use your own Generator using an url or page and a regex. - if generator == 'regex' and regexGen == True: - generator = mainClass.regexGenerator(regexpToUse, textRegex) - # Ok, We (should) have a generator, so let's go on. - try: - # Take the additional settings for the Project - tupla_written = mainClass.takesettings() - except wikipedia.Error: - # Error? Settings = None - wikipedia.output(u'Problems with loading the settigs, run without them.') + wikipedia.output('') # Print a blank line. + skip = False + elif skip_list == []: # Skip must be false if we are here but + # the user has set 0 as images to skip + wikipedia.output(u'\t\t>> No images to skip...<<') + skip_list.append('skip = Off') # Only to print it once + # Check on commons if there's already an image with the same name + if commonsActive == True: + response = mainClass.checkImage(imageName) + if response == False: + continue + parentesi = False # parentesi are these in italian: { ( ) } [] + delete = False + tagged = False + extension = imageName.split('.')[-1] # get the extension from the image's name + # Page => ImagePage + p = wikipedia.ImagePage(site, image.title()) + # Get the text in the image (called g) + try: + g = p.get() + except wikipedia.NoPage: + wikipedia.output(u"Skipping %s because it has been deleted." % imageName) + continue + except wikipedia.IsRedirectPage: + wikipedia.output(u"The file description for %s is a redirect?!" % imageName ) + continue + # Is the image already tagged? If yes, no need to double-check, skip + for i in TextFind: + # If there are {{ use regex, otherwise no (if there's not the {{ may not be a template + # and the regex will be wrong) + if '{{' in i: + regexP = re.compile('{{(?:template|)%s ?(?:||\n|}) ?' % i.split('{{')[1].replace(' ', '[ _]'), re.I) + result = regexP.findall(g) + if result != []: + tagged = True + elif i.lower() in g: + tagged = True + # Deleting the useless template from the description (before adding something + # in the image the original text will be reloaded, don't worry). + hiddenTemplateFound = False + for l in hiddentemplate: + if tagged == False: + res = re.findall(r'{{(?:[Tt]emplate:|)%s(?: \n|||\n|})' % l.lower(), g.lower()) + if res != []: + wikipedia.output(u'A white template found, skipping the template...') + if l != '' and l != ' ': # Check that l is not nothing or a space + # Deleting! (replace the template with nothing) + g = re.sub(r'{{(?:template:|)%s' % l.lower(), r'', g.lower()) + hiddenTemplateFound = True + for a_word in something: # something is the array with {{, MIT License and so on. + if a_word in g: + # There's a template, probably a license (or I hope so) + parentesi = True + # Is the extension allowed? (is it an image or f.e. a .xls file?) + for parl in notallowed: + if parl.lower() in extension.lower(): + delete = True + some_problem = False # If it has "some_problem" it must check + # the additional settings. + # if tupla_writte, use addictional settings + if tupla_written != None: + # In every tupla there's a setting configuration + for tupla in tupla_written: + name = tupla[1] + find_tipe = tupla[2] + find = tupla[3] + find_list = mainClass.load(find) + imagechanges = tupla[4] + if imagechanges.lower() == 'false': + imagestatus = False + elif imagechanges.lower() == 'true': + imagestatus = True + else: + wikipedia.output(u"Error! Imagechanges set wrongly!") tupla_written = None - some_problem = False - # Ensure that if the list given is empty it will be converted to "None" - # (but it should be already done in the takesettings() function) - if tupla_written == []: tupla_written = None - # Real-Time page loaded - if tupla_written != None: wikipedia.output(u'\t >> Loaded the real-time page... <<') - # No settings found, No problem, continue. - else: wikipedia.output(u'\t >> No additional settings found! <<') - # Not the main, but the most important loop. - for image in generator: - # If I don't inizialize the generator, wait part and skip part are useless - if wait: - printWithTimeZone(u'Waiting %s seconds before checking the images,' % wait_number) - # Let's sleep... - time.sleep(wait_number) - # Never sleep again (we are in a loop) - wait = False - # If the generator returns something that is not an image, simply skip it. - if normal == False and regexGen == False: - if image_namespace.lower() not in image.title().lower() and \ - 'image:' not in image.title().lower(): - wikipedia.output(u'%s seems not an image, skip it...' % image.title()) - continue - imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here) - # Skip block - if skip == True: - # If the images to skip are more the images to check, make them the same number - if skip_number > limit: skip_number = limit - # Print a starting message only if no images has been skipped - if skip_list == []: - if skip_number == 1: - wikipedia.output(u'Skipping the first image:\n') - else: - wikipedia.output(u'Skipping the first %s images:\n' % skip_number) - # If we still have pages to skip: - if len(skip_list) < skip_number: - wikipedia.output(u'Skipping %s...' % imageName) - skip_list.append(imageName) - if skip_number == 1: - wikipedia.output('') - skip = False - continue - else: - wikipedia.output('') # Print a blank line. - skip = False - elif skip_list == []: # Skip must be false if we are here but - # the user has set 0 as images to skip - wikipedia.output(u'\t\t>> No images to skip...<<') - skip_list.append('skip = Off') # Only to print it once - # Check on commons if there's already an image with the same name - if commonsActive == True: - response = mainClass.checkImage(imageName) - if response == False: - continue - parentesi = False # parentesi are these in italian: { ( ) } [] - delete = False - tagged = False - extension = imageName.split('.')[-1] # get the extension from the image's name - # Page => ImagePage - p = wikipedia.ImagePage(site, image.title()) - # Get the text in the image (called g) - try: - g = p.get() - except wikipedia.NoPage: - wikipedia.output(u"Skipping %s because it has been deleted." % imageName) + break + summary = tupla[5] + head_2 = tupla[6] + text = tupla[7] + text = text % imageName + mexCatched = tupla[8] + wikipedia.setAction(summary) + for k in find_list: + if find_tipe.lower() == 'findonly': + if k.lower() == g.lower(): + some_problem = True + text_used = text + head_used = head_2 + imagestatus_used = imagestatus + name_used = name + summary_used = summary + mex_used = mexCatched + break + elif find_tipe.lower() == 'find': + if k.lower() in g.lower(): + some_problem = True + text_used = text + head_used = head_2 + imagestatus_used = imagestatus + name_used = name + summary_used = summary + mex_used = mexCatched continue - except wikipedia.IsRedirectPage: - wikipedia.output(u"The file description for %s is a redirect?!" % imageName ) - continue - # Is the image already tagged? If yes, no need to double-check, skip - for i in TextFind: - # If there are {{ use regex, otherwise no (if there's not the {{ may not be a template - # and the regex will be wrong) - if '{{' in i: - regexP = re.compile('{{(?:template|)%s ?(?:||\n|}) ?' % i.split('{{')[1].replace(' ', '[ _]'), re.I) - result = regexP.findall(g) - if result != []: - tagged = True - elif i.lower() in g: - tagged = True - # Deleting the useless template from the description (before adding something - # in the image the original text will be reloaded, don't worry). - hiddenTemplateFound = False - for l in hiddentemplate: - if tagged == False: - res = re.findall(r'{{(?:[Tt]emplate:|)%s(?: \n|||\n|})' % l.lower(), g.lower()) - if res != []: - wikipedia.output(u'A white template found, skipping the template...') - if l != '' and l != ' ': # Check that l is not nothing or a space - # Deleting! (replace the template with nothing) - g = re.sub(r'{{(?:template:|)%s' % l.lower(), r'', g.lower()) - hiddenTemplateFound = True - for a_word in something: # something is the array with {{, MIT License and so on. - if a_word in g: - # There's a template, probably a license (or I hope so) - parentesi = True - # Is the extension allowed? (is it an image or f.e. a .xls file?) - for parl in notallowed: - if parl.lower() in extension.lower(): - delete = True - some_problem = False # If it has "some_problem" it must check - # the additional settings. - # if tupla_writte, use addictional settings - if tupla_written != None: - # In every tupla there's a setting configuration - for tupla in tupla_written: - name = tupla[1] - find_tipe = tupla[2] - find = tupla[3] - find_list = mainClass.load(find) - imagechanges = tupla[4] - if imagechanges.lower() == 'false': - imagestatus = False - elif imagechanges.lower() == 'true': - imagestatus = True - else: - wikipedia.output(u"Error! Imagechanges set wrongly!") - tupla_written = None - break - summary = tupla[5] - head_2 = tupla[6] - text = tupla[7] - text = text % imageName - mexCatched = tupla[8] - wikipedia.setAction(summary) - for k in find_list: - if find_tipe.lower() == 'findonly': - if k.lower() == g.lower(): - some_problem = True - text_used = text - head_used = head_2 - imagestatus_used = imagestatus - name_used = name - summary_used = summary - mex_used = mexCatched - break - elif find_tipe.lower() == 'find': - if k.lower() in g.lower(): - some_problem = True - text_used = text - head_used = head_2 - imagestatus_used = imagestatus - name_used = name - summary_used = summary - mex_used = mexCatched - continue - # If the image exists (maybe it has been deleting during the oder - # checking parts or something, who knows? ;-)) - if p.exists(): - # Here begins the check block. - if tagged == True: - # Tagged? Yes, skip. - printWithTimeZone(u'%s is already tagged...' % imageName) - continue - if some_problem == True: - if mex_used in g: - wikipedia.output(u'Image already fixed. Skip.') - continue - wikipedia.output(u"The image description for %s contains %s..." % (imageName, name_used)) - if mex_used.lower() == 'default': - mex_used = unvertext - if imagestatus_used == False: - reported = mainClass.report_image(imageName) - else: - reported = True - if reported == True: - #if imagestatus_used == True: - report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used) - else: - wikipedia.output(u"Skipping the image...") - some_problem = False - continue - elif parentesi == True: - printWithTimeZone(u"%s seems ok," % imageName) - # It works also without this... but i want only to be sure ^^ - parentesi = False - continue - elif delete == True: - wikipedia.output(u"%s is not a file!" % imageName) - # Modify summary text - wikipedia.setAction(dels) - canctext = di % extension - notification = din % imageName - head = dih - report(canctext, imageName, notification, head) - delete = False - continue - elif g in nothing: - wikipedia.output(u"The image description for %s does not contain a license template!" % imageName) - if hiddenTemplateFound and HiddenTN != None and HiddenTN != '' and HiddenTN != ' ': - notification = HiddenTN % imageName - else: - notification = nn % imageName - head = nh - report(unvertext, imageName, notification, head, smwl) - continue - else: - wikipedia.output(u"%s has only text and not the specific license..." % imageName) - if hiddenTemplateFound and HiddenTN != None and HiddenTN != '' and HiddenTN != ' ': - notification = HiddenTN % imageName - else: - notification = nn % imageName - head = nh - report(unvertext, imageName, notification, head, smwl) - continue - # A little block to perform the repeat or to break. - if repeat == True: - printWithTimeZone(u"Waiting for %s seconds," % time_sleep) - time.sleep(time_sleep) - elif repeat == False: - wikipedia.output(u"\t\t\t>> STOP! <<") - return True # Exit + # If the image exists (maybe it has been deleting during the oder + # checking parts or something, who knows? ;-)) + if p.exists(): + # Here begins the check block. + if tagged == True: + # Tagged? Yes, skip. + printWithTimeZone(u'%s is already tagged...' % imageName) + continue + if some_problem == True: + if mex_used in g: + wikipedia.output(u'Image already fixed. Skip.') + continue + wikipedia.output(u"The image description for %s contains %s..." % (imageName, name_used)) + if mex_used.lower() == 'default': + mex_used = unvertext + if imagestatus_used == False: + reported = mainClass.report_image(imageName) + else: + reported = True + if reported == True: + #if imagestatus_used == True: + report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used) + else: + wikipedia.output(u"Skipping the image...") + some_problem = False + continue + elif parentesi == True: + printWithTimeZone(u"%s seems ok," % imageName) + # It works also without this... but i want only to be sure ^^ + parentesi = False + continue + elif delete == True: + wikipedia.output(u"%s is not a file!" % imageName) + # Modify summary text + wikipedia.setAction(dels) + canctext = di % extension + notification = din % imageName + head = dih + report(canctext, imageName, notification, head) + delete = False + continue + elif g in nothing: + wikipedia.output(u"The image description for %s does not contain a license template!" % imageName) + if hiddenTemplateFound and HiddenTN != None and HiddenTN != '' and HiddenTN != ' ': + notification = HiddenTN % imageName + else: + notification = nn % imageName + head = nh + report(unvertext, imageName, notification, head, smwl) + continue + else: + wikipedia.output(u"%s has only text and not the specific license..." % imageName) + if hiddenTemplateFound and HiddenTN != None and HiddenTN != '' and HiddenTN != ' ': + notification = HiddenTN % imageName + else: + notification = nn % imageName + head = nh + report(unvertext, imageName, notification, head, smwl) + continue + # A little block to perform the repeat or to break. + if repeat == True: + printWithTimeZone(u"Waiting for %s seconds," % time_sleep) + time.sleep(time_sleep) + elif repeat == False: + wikipedia.output(u"\t\t\t>> STOP! <<") + return True # Exit
# Here there is the main loop. I'll take all the (name of the) images and then i'll check them. if __name__ == "__main__": + try: try: - try: - checkbot() - except wikipedia.BadTitle: - wikipedia.output(u"Wikidown or server's problem, quit") - wikipedia.stopme() - finally: - wikipedia.stopme() + checkbot() + except wikipedia.BadTitle: + wikipedia.output(u"Wikidown or server's problem, quit") + wikipedia.stopme() + finally: + wikipedia.stopme()
pywikipedia-l@lists.wikimedia.org