[Gerrit] Add checkimages from compat to core - change (pywikibot/core) - Pywikibot-commits

21 Apr 2014

jenkins-bot has submitted this change and it was merged.
Change subject: Add checkimages from compat to core
......................................................................
Add checkimages from compat to core
Note: -wait option is not implemented yet. It needs additional
methods from compat. Coming soon...
Change-Id: I637a6b55bf0a0336df642552053337ba8368bb20
---
A scripts/checkimages.py
1 file changed, 1,996 insertions(+), 0 deletions(-)
Approvals:
  Merlijn van Deen: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/scripts/checkimages.py b/scripts/checkimages.py
new file mode 100644
index 0000000..2e69333
--- /dev/null
+++ b/scripts/checkimages.py
@@ -0,0 +1,1996 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Script to check recently uploaded files. This script checks if a file
+description is present and if there are other problems in the image's
+description.
+
+This script will have to be configured for each language. Please submit
+translations as addition to the pywikipediabot framework.
+
+Everything that needs customisation is indicated by comments.
+
+This script understands the following command-line arguments:
+
+-limit              The number of images to check (default: 80)
+
+-commons            The Bot will check if an image on Commons has the same name
+                    and if true it reports the image.
+
+-duplicates[:#]     Checking if the image has duplicates (if arg, set how many
+                    rollback wait before reporting the image in the report
+                    instead of tag the image) default: 1 rollback.
+
+-duplicatesreport   Report the duplicates in a log *AND* put the template in
+                    the images.
+
+-sendemail          Send an email after tagging.
+
+-break              To break the bot after the first check (default: recursive)
+
+-time[:#]           Time in seconds between repeat runs (default: 30)
+
+-wait[:#]           Wait x second before check the images (default: 0)
+                    NOT YET IMPLEMENTED
+
+-skip[:#]           The bot skip the first [:#] images (default: 0)
+
+-start[:#]          Use allpages() as generator
+                    (it starts already form File:[:#])
+
+-cat[:#]            Use a category as generator
+
+-regex[:#]          Use regex, must be used with -url or -page
+
+-page[:#]           Define the name of the wikipage where are the images
+
+-url[:#]            Define the url where are the images
+
+-untagged[:#]       Use daniel's tool as generator:
+                    http://toolserver.org/~daniel/WikiSense/UntaggedImages.php
+
+-nologerror         If given, this option will disable the error that is risen
+                    when the log is full.
+
+---- Instructions for the real-time settings  ----
+* For every new block you have to add:
+
+<------- ------->
+
+In this way the Bot can understand where the block starts in order to take the
+right parameter.
+
+* Name=     Set the name of the block
+* Find=     Use it to define what search in the text of the image's description,
+            while
+  Findonly= search only if the exactly text that you give is in the image's
+            description.
+* Summary=  That's the summary that the bot will use when it will notify the
+            problem.
+* Head=     That's the incipit that the bot will use for the message.
+* Text=     This is the template that the bot will use when it will report the
+            image's problem.
+
+---- Known issues/FIXMEs: ----
+* Clean the code, some passages are pretty difficult to understand if you're not
+  the coder.
+* Add the "catch the language" function for commons.
+* Fix and reorganise the new documentation
+* Add a report for the image tagged.
+
+"""
+
+#
+# (C) Kyle/Orgullomoore, 2006-2007 (newimage.py)
+# (C) Siebrand Mazeland, 2007-2010
+# (C) Filnik, 2007-2011
+# (C) Pywikipedia team, 2007-2014
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import re
+import time
+import datetime
+import locale
+import urllib
+import pywikibot
+from pywikibot import pagegenerators as pg
+from pywikibot import config
+
+locale.setlocale(locale.LC_ALL, '')
+
+###############################################################################
+# <--------------------------- Change only below! --------------------------->#
+###############################################################################
+
+# NOTE: in the messages used by the Bot if you put __botnick__ in the text, it
+# will automatically replaced with the bot's nickname.
+
+# That's what you want that will be added. (i.e. the {{no source}} with the
+# right day/month/year )
+n_txt = {
+    'commons': u'{{subst:nld}}',
+    'ar': u'{{subst:لم}}',
+    'de': u'{{Dateiüberprüfung}}',
+    'en': u'{{subst:nld}}',
+    'fa': u'{{جا:حق تکثیر تصویر نامعلوم}}',
+    'fr': u'{{subst:lid}}',
+    'ga': u'{{subst:Ceadúnas de dhíth}}',
+    'hu': u'{{nincslicenc|~~~~~}}',
+    'it': u'{{subst:unverdata}}',
+    'ja': u'{{subst:Nld}}',
+    'ko': u'{{subst:nld}}',
+    'ta': u'{{subst:nld}}',
+    'ur': u'{{subst:حقوق نسخہ تصویر نامعلوم}}',
+    'zh': u'{{subst:No license/auto}}',
+}
+
+# Text that the bot will try to see if there's already or not. If there's a
+# {{ I'll use a regex to make a better check.
+# This will work so:
+# '{{no license' --> '{{(?:template:)?no[ _]license ?(?:||\n|}|/) ?' (case
+# insensitive).
+# If there's not a {{ it will work as usual (if x in Text)
+txt_find = {
+    'commons': [u'{{no license', u'{{no license/en',
+                u'{{nld', u'{{no permission', u'{{no permission since'],
+    'ar': [u'{{لت', u'{{لا ترخيص'],
+    'de': [u'{{DÜP', u'{{Düp', u'{{Dateiüberprüfung'],
+    'en': [u'{{nld', u'{{no license'],
+    'fa': [u'{{حق تکثیر تصویر نامعلوم۲'],
+    'ga': [u'{{Ceadúnas de dhíth', u'{{Ceadúnas de dhíth'],
+    'hu': [u'{{nincsforrás', u'{{nincslicenc'],
+    'it': [u'{{unverdata', u'{{unverified'],
+    'ja': [u'{{no source', u'{{unknown',
+           u'{{non free', u'<!--削除についての議論が終了するまで'],
+    'ta': [u'{{no source', u'{{nld', u'{{no license'],
+    'ko': [u'{{출처 없음', u'{{라이선스 없음', u'{{Unknown'],
+    'ur': [u'{{ناحوالہ', u'{{اجازہ نامعلوم', u'{{Di-no'],
+    'zh': [u'{{no source', u'{{unknown', u'{{No license'],
+}
+
+# Summary for when the will add the no source
+msg_comm = {
+    'ar': u'بوت: التعليم على ملف مرفوع حديثا غير موسوم',
+    'commons': u'Bot: Marking newly uploaded untagged file',
+    'de': u'Bot: Markiere mit {{[[Wikipedia:Dateiüberprüfung/Anleitung|DÜP]]}},'
+          u' da keine Lizenzvorlage gefunden — bitte nicht entfernen,'
+          u' Informationen bald auf der Benutzerdiskussion des Uploaders.',
+    'en': u'Bot: Marking newly uploaded untagged file',
+    'fa': u'ربات: حق تکثیر تصویر تازه بارگذاری شده نامعلوم است.',
+    'ga': u'Róbó: Ag márcáil comhad nua-uaslódáilte gan ceadúnas',
+    'hu': u'Robot: Frissen feltöltött licencsablon nélküli fájl megjelölése',
+    'it': u"Bot: Aggiungo unverified",
+    'ja': u'ロボットによる:著作権情報なしの画像をタグ',
+    'ko': u'로봇:라이선스 없음',
+    'ta': u'தானியங்கி:காப்புரிமை வழங்கப்படா படிமத்தை சுட்டுதல்',
+    'ur': u'روبالہ:نشان زدگی جدید زبراثقال شدہ املاف',
+    'zh': u'機器人:標示新上傳且未包含必要資訊的檔案',
+}
+
+# When the Bot find that the usertalk is empty is not pretty to put only the
+# no source without the welcome, isn't it?
+empty = {
+    'commons': u'{{subst:welcome}}\n~~~~\n',
+    'ar': u'{{ترحيب}}\n~~~~\n',
+    'de': u'{{subst:willkommen}} ~~~~',
+    'en': u'{{welcome}}\n~~~~\n',
+    'fa': u'{{جا:خوشامدید|%s}}',
+    'fr': u'{{Bienvenue nouveau\n~~~~\n',
+    'ga': u'{{subst:Fáilte}} - ~~~~\n',
+    'hu': u'{{subst:Üdvözlet|~~~~}}\n',
+    'it': u'<!-- inizio template di benvenuto -->\n{{subst:Benvebot}}\n~~~~\n<!-- fine template di benvenuto -->',
+    'ja': u'{{subst:Welcome/intro}}\n{{subst:welcome|--~~~~}}\n',
+    'ko': u'{{환영}}--~~~~\n',
+    'ta': u'{{welcome}}\n~~~~\n',
+    'ur': u'{{خوش آمدید}}\n~~~~\n',
+    'zh': u'{{subst:welcome|sign=~~~~}}',
+}
+
+# Summary that the bot use when it notify the problem with the image's license
+msg_comm2 = {
+    'ar': u'بوت: طلب معلومات المصدر.',
+    'commons': u'Bot: Requesting source information.',
+    'de': u'Bot:Notify User',
+    'en': u'Robot: Requesting source information.',
+    'fa': u'ربات: درخواست منبع تصویر',
+    'ga': u'Róbó: Ag iarraidh eolais foinse.',
+    'it': u"Bot: Notifico l'unverified",
+    'hu': u'Robot: Forrásinformáció kérése',
+    'ja': u'ロボットによる:著作権情報明記のお願い',
+    'ko': u'로봇:라이선스 정보 요청',
+    'ta': u'தானியங்கி:மூலம் வழங்கப்படா படிமத்தை சுட்டுதல்',
+    'ur': u'روبالہ:درخواست ماخذ تصویر',
+    'zh': u'機器人：告知用戶',
+}
+
+# if the file has an unknown extension it will be tagged with this template.
+# In reality, there aren't unknown extension, they are only not allowed...
+delete_immediately = {
+    'commons': u"{{speedy|The file has .%s as extension. Is it ok? Please check.}}",
+    'ar': u"{{شطب|الملف له .%s كامتداد.}}",
+    'en': u"{{db-meta|The file has .%s as extension.}}",
+    'fa': u"{{حذف سریع|تصویر %s اضافی است.}}",
+    'ga': u"{{scrios|Tá iarmhír .%s ar an comhad seo.}}",
+    'hu': u'{{azonnali|A fájlnak .%s a kiterjesztése}}',
+    'it': u'{{cancella subito|motivo=Il file ha come estensione ".%s"}}',
+    'ja': u'{{db|知らないファイルフォーマット %s}}',
+    'ko': u'{{delete|잘못된 파일 형식 (.%s)}}',
+    'ta': u'{{delete|இந்தக் கோப்பு .%s என்றக் கோப்பு நீட்சியைக் கொண்டுள்ளது.}}',
+    'ur': u"{{سریع حذف شدگی|اس ملف میں .%s بطور توسیع موجود ہے۔ }}",
+    'zh': u'{{delete|未知檔案格式%s}}',
+}
+
+# The header of the Unknown extension's message.
+delete_immediately_head = {
+    'commons': u"\n== Unknown extension! ==\n",
+    'ar': u"\n== امتداد غير معروف! ==\n",
+    'en': u"\n== Unknown extension! ==\n",
+    'fa': u"\n==بارگذاری تصاویر موجود در انبار==\n",
+    'ga': u"\n== Iarmhír neamhaithnid! ==\n",
+    'fr': u'\n== Extension inconnue ==\n',
+    'hu': u'\n== Ismeretlen kiterjesztésű fájl ==\n',
+    'it': u'\n\n== File non specificato ==\n',
+    'ko': u'\n== 잘못된 파일 형식 ==\n',
+    'ta': u'\n== இனங்காணப்படாத கோப்பு நீட்சி! ==\n',
+    'ur': u"\n== نامعلوم توسیع! ==\n",
+    'zh': u'\n==您上載的檔案格式可能有誤==\n',
+}
+
+# Text that will be add if the bot find a unknown extension.
+delete_immediately_notification = {
+    'ar': u'الملف [[:File:%s]] يبدو أن امتداده خاطيء, من فضلك تحقق. ~~~~',
+    'commons': u'The [[:File:%s]] file seems to have a wrong extension, please check. ~~~~',
+    'en': u'The [[:File:%s]] file seems to have a wrong extension, please check. ~~~~',
+    'fa': u'به نظر می‌آید تصویر [[:تصویر:%s]] مسیر نادرستی داشته باشد لطفا بررسی کنید.~~~~',
+    'ga': u'Tá iarmhír mícheart ar an comhad [[:File:%s]], scrúdaigh le d'thoil. ~~~~',
+    'fr': u'Le fichier [[:File:%s]] semble avoir une mauvaise extension, veuillez vérifier. ~~~~',
+    'hu': u'A [[:Kép:%s]] fájlnak rossz a kiterjesztése, kérlek ellenőrízd. ~~~~',
+    'it': u'{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Ext|%s|__botnick__}} --~~~~',
+    'ko': u'[[:그림:%s]]의 파일 형식이 잘못되었습니다. 확인 바랍니다.--~~~~',
+    'ta': u'[[:படிமம்:%s]] இனங்காணப்படாத கோப்பு நீட்சியை கொண்டுள்ளது தயவு செய்து ஒரு முறை சரி பார்க்கவும் ~~~~',
+    'ur': u'ملف [[:File:%s]] کی توسیع شاید درست نہیں ہے، براہ کرم جانچ لیں۔ ~~~~',
+    'zh': u'您好，你上傳的[[:File:%s]]無法被識別，請檢查您的檔案，謝謝。--~~~~',
+}
+
+# Summary of the delete immediately.
+# (e.g: Adding {{db-meta|The file has .%s as extension.}})
+msg_del_comm = {
+    'ar': u'بوت: إضافة %s',
+    'commons': u'Bot: Adding %s',
+    'en': u'Bot: Adding %s',
+    'fa': u'ربات: اضافه کردن %s',
+    'ga': u'Róbó: Cuir %s leis',
+    'fr': u'Robot : Ajouté %s',
+    'hu': u'Robot:"%s" hozzáadása',
+    'it': u'Bot: Aggiungo %s',
+    'ja': u'ロボットによる: 追加 %s',
+    'ko': u'로봇 : %s 추가',
+    'ta': u'Bot: Adding %s',
+    'ur': u'روبالہ: اضافہ %s',
+    'zh': u'機器人: 正在新增 %s',
+}
+
+# This is the most important header, because it will be used a lot. That's the
+# header that the bot will add if the image hasn't the license.
+nothing_head = {
+    'ar': u"\n== صورة بدون ترخيص ==\n",
+    'de': u"\n== Bild ohne Lizenz ==\n",
+    'en': u"\n== Image without license ==\n",
+    'fa': u"\n== تصویر بدون اجازہ ==\n",
+    'ga': u"\n== Comhad gan ceadúnas ==\n",
+    'fr': u"\n== Fichier sans licence ==\n",
+    'hu': u"\n== Licenc nélküli kép ==\n",
+    'it': u"\n\n== File senza licenza ==\n",
+    'ur': u"\n== تصویر بدون اجازہ ==\n",
+}
+# That's the text that the bot will add if it doesn't find the license.
+# Note: every __botnick__ will be repleaced with your bot's nickname
+# (feel free not to use if you don't need it)
+nothing_notification = {
+    'commons': (u"\n{{subst:User:Filnik/untagged|File:%s}}\n\n''This message "
+                u"was '''added automatically by __botnick__''', if you need "
+                u"some help about it, please read the text above again and "
+                u"follow the links in it, if you still need help ask at the "
+                u"[[File:Human-help-browser.svg|18px|link=Commons:Help desk|?]] "
+                u"'''[[Commons:Help desk|->]][[Commons:Help desk]]''' in any "
+                u"language you like to use.'' --__botnick__ ~~~~~"""),
+    'ar': u"{{subst:مصدر الصورة|File:%s}} --~~~~",
+    'en': u"{{subst:image source|File:%s}} --~~~~",
+    'fa': u"{{جا:اخطار نگاره|%s}}",
+    'ga': u"{{subst:Foinse na híomhá|File:%s}} --~~~~",
+    'hu': u"{{subst:adjforrást|Kép:%s}} \n Ezt az üzenetet ~~~ automatikusan helyezte el a vitalapodon, kérdéseddel fordulj a gazdájához, vagy a [[WP:KF|Kocsmafalhoz]]. --~~~~",
+    'it': u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Senza licenza|%s|__botnick__}} --~~~~",
+    'ja': u"\n{{subst:Image copyright|File:%s}}--~~~~",
+    'ko': u'\n{{subst:User:Kwjbot IV/untagged|%s}} --~~~~',
+    'ta': u'\n{{subst:Di-no license-notice|படிமம்:%s}} ~~~~ ',
+    'ur': u"{{subst:ماخذ تصویر|File:%s}}--~~~~",
+    'zh': u'\n{{subst:Uploadvionotice|File:%s}} ~~~~ ',
+}
+
+# This is a list of what bots used this script in your project.
+# NOTE: YOUR Botnick is automatically added. It's not required to add it twice.
+bot_list = {
+    'commons': [u'Siebot', u'CommonsDelinker', u'Filbot', u'John Bot',
+                u'Sz-iwbot', u'ABFbot'],
+    'de': [u'Xqbot'],
+    'en': [u'OrphanBot'],
+    'fa': [u'Amirobot'],
+    'ga': [u'AllieBot'],
+    'it': [u'Filbot', u'Nikbot', u'.snoopyBot.'],
+    'ja': [u'Alexbot'],
+    'ko': [u'Kwjbot IV'],
+    'ta': [u'TrengarasuBOT'],
+    'ur': [u'Shuaib-bot', u'Tahir-bot', u'SAMI.bot'],
+    'zh': [u'Alexbot'],
+}
+
+# The message that the bot will add the second time that find another license
+# problem.
+second_message_without_license = {
+    'hu': u'\nSzia! Úgy tűnik a [[:Kép:%s]] képpel is hasonló a probléma, mint az előbbivel. Kérlek olvasd el a [[WP:KÉPLIC|feltölthető képek]]ről szóló oldalunk, és segítségért fordulj a [[WP:KF-JO|Jogi kocsmafalhoz]]. Köszönöm --~~~~',
+    'it': u':{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Senza licenza2|%s|__botnick__}} --~~~~',
+}
+
+# You can add some settings to wikipedia. In this way, you can change them
+# without touching the code. That's useful if you are running the bot on
+# Toolserver.
+page_with_settings = {
+    'commons': u'User:Filbot/Settings',
+    'it': u'Progetto:Coordinamento/Immagini/Bot/Settings#Settings',
+    'zh': u"User:Alexbot/cisettings#Settings",
+}
+
+# The bot can report some images (like the images that have the same name of an
+# image on commons) This is the page where the bot will store them.
+report_page = {
+    'commons': u'User:Filbot/Report',
+    'de': u'Benutzer:Xqbot/Report',
+    'en': u'User:Filnik/Report',
+    'fa': u'کاربر:Amirobot/گزارش تصویر',
+    'ga': u'User:AllieBot/ReportImages',
+    'hu': u'User:Bdamokos/Report',
+    'it': u'Progetto:Coordinamento/Immagini/Bot/Report',
+    'ja': u'User:Alexbot/report',
+    'ko': u'User:Kwjbot IV/Report',
+    'ta': u'User:Trengarasu/commonsimages',
+    'ur': u'صارف:محمد شعیب/درخواست تصویر',
+    'zh': u'User:Alexsh/checkimagereport',
+}
+
+# Adding the date after the signature.
+timeselected = u' ~~~~~'
+
+# The text added in the report
+report_text = {
+    'commons': u"\n*[[:File:%s]] " + timeselected,
+    'ar': u"\n*[[:ملف:%s]] " + timeselected,
+    'de': u"\n*[[:Datei:%s]] " + timeselected,
+    'en': u"\n*[[:File:%s]] " + timeselected,
+    'fa': u"n*[[:پرونده:%s]] " + timeselected,
+    'ga': u"\n*[[:File:%s]] " + timeselected,
+    'hu': u"\n*[[:Kép:%s]] " + timeselected,
+    'it': u"\n*[[:File:%s]] " + timeselected,
+    'ja': u"\n*[[:File:%s]] " + timeselected,
+    'ko': u"\n*[[:그림:%s]] " + timeselected,
+    'ta': u"\n*[[:படிமம்:%s]] " + timeselected,
+    'ur': u"\n*[[:تصویر:%s]] " + timeselected,
+    'zh': u"\n*[[:File:%s]] " + timeselected,
+}
+
+# The summary of the report
+msg_comm10 = {
+    'commons': u'Bot: Updating the log',
+    'ar': u'بوت: تحديث السجل',
+    'de': u'Bot: schreibe Log',
+    'en': u'Bot: Updating the log',
+    'fa': u'ربات: به‌روزرسانی سیاهه',
+    'fr': u'Robot: Mise à jour du journal',
+    'ga': u'Róbó: Log a thabhairt suas chun dáta',
+    'hu': u'Robot: A napló frissítése',
+    'it': u'Bot: Aggiorno il log',
+    'ja': u'ロボットによる:更新',
+    'ko': u'로봇:로그 업데이트',
+    'ta': u'தானியங்கி:பட்டியலை இற்றைப்படுத்தல்',
+    'ur': u'روبالہ: تجدید نوشتہ',
+    'zh': u'機器人:更新記錄',
+}
+
+# If a template isn't a license but it's included on a lot of images, that can
+# be skipped to analyze the image without taking care of it. (the template must
+# be in a list)
+# Warning:   Don't add template like "en, de, it" because they are already in
+#            (added in the code, below
+# Warning 2: The bot will use regex, make the names compatible, please (don't
+#            add "Template:" or {{because they are already put in the regex).
+# Warning 3: the part that use this regex is case-insensitive (just to let you
+#            know..)
+HiddenTemplate = {
+    # Put the other in the page on the project defined below
+    'commons': [u'Template:Information'],
+
+    'ar': [u'Template:معلومات'],
+    'de': [u'Template:Information'],
+    'en': [u'Template:Information'],
+    'fa': [u'الگو:اطلاعات'],
+    'fr': [u'Template:Information'],
+    'ga': [u'Template:Information'],
+    'hu': [u'Template:Információ', u'Template:Enwiki', u'Template:Azonnali'],
+    # Put the other in the page on the project defined below
+    'it': [u'Template:EDP', u'Template:Informazioni file',
+           u'Template:Information', u'Template:Trademark',
+           u'Template:Permissionotrs'],
+
+    'ja': [u'Template:Information'],
+    'ko': [u'Template:그림 정보'],
+    'ta': [u'Template:Information'],
+    'ur': [u'Template:معلومات'],
+    'zh': [u'Template:Information'],
+}
+
+# A page where there's a list of template to skip.
+PageWithHiddenTemplates = {
+    'commons': u'User:Filbot/White_templates#White_templates',
+    'it': u'Progetto:Coordinamento/Immagini/Bot/WhiteTemplates',
+    'ko': u'User:Kwjbot_IV/whitetemplates/list',
+}
+
+# A page where there's a list of template to consider as licenses.
+PageWithAllowedTemplates = {
+    'commons': u'User:Filbot/Allowed templates',
+    'de': u'Benutzer:Xqbot/Lizenzvorlagen',
+    'it': u'Progetto:Coordinamento/Immagini/Bot/AllowedTemplates',
+    'ko': u'User:Kwjbot_IV/AllowedTemplates',
+}
+
+# Template added when the bot finds only an hidden template and nothing else.
+# Note: every __botnick__ will be repleaced with your bot's nickname
+# (feel free not to use if you don't need it)
+HiddenTemplateNotification = {
+    'commons': (u"\n{{subst:User:Filnik/whitetemplate|File:%s}}\n\n''This "
+                u"message was added automatically by __botnick__, if you need "
+                u"some help about it please read the text above again and "
+                u"follow the links in it, if you still need help ask at the "
+                u"[[File:Human-help-browser.svg|18px|link=Commons:Help desk|?]]"
+                u" '''[[Commons:Help desk|→]] [[Commons:Help desk]]''' in any "
+                u"language you like to use.'' --__botnick__ ~~~~~"),
+    'it': u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Template_insufficiente|%s|__botnick__}} --~~~~",
+    'ko': u"\n{{subst:User:Kwj2772/whitetemplates|%s}} --~~~~",
+}
+
+# In this part there are the parameters for the dupe images.
+
+# Put here the template that you want to put in the image to warn that it's a
+# dupe. put __image__ if you want only one image, __images__ if you want the
+# whole list
+duplicatesText = {
+    'commons': u'\n{{Dupe|__image__}}',
+    'de': u'{{NowCommons}}',
+    'it': u'\n{{Progetto:Coordinamento/Immagini/Bot/Template duplicati|__images__}}',
+}
+
+# Head of the message given to the author
+duplicate_user_talk_head = {
+    'it': u'\n\n== File doppio ==\n',
+}
+
+# Message to put in the talk
+duplicates_user_talk_text = {
+    'commons': u'{{subst:User:Filnik/duplicates|File:%s|File:%s}}',  # FIXME: it doesn't exist
+    'it': u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Duplicati|%s|%s|__botnick__}} --~~~~",
+}
+
+# Comment used by the bot while it reports the problem in the uploader's talk
+duplicates_comment_talk = {
+    'commons': u'Bot: Dupe file found',
+    'ar': u'بوت: ملف مكرر تم العثور عليه',
+    'fa': u'ربات: تصویر تکراری یافت شد',
+    'it': u"Bot: Notifico il file doppio trovato",
+}
+
+# Comment used by the bot while it reports the problem in the image
+duplicates_comment_image = {
+    'commons': u'Bot: Tagging dupe file',
+    'de': u'Bot: Datei liegt auf Commons',
+    'ar': u'بوت: وسم ملف مكرر',
+    'fa': u'ربات: برچسب زدن بر تصویر تکراری',
+    'it': u'Bot: File doppio, da cancellare',
+}
+
+# Regex to detect the template put in the image's decription to find the dupe
+duplicatesRegex = {
+    'commons': r'{{(?:[Tt]emplate:|)(?:[Dd]up(?:licat|)e|[Bb]ad[ _][Nn]ame)[|}]',
+    'de': r'{{[nN](?:C|ow(?: c|[cC])ommons)[|}',
+    'it': r'{{(?:[Tt]emplate:|)[Pp]rogetto:[Cc]oordinamento/Immagini/Bot/Template duplicati[|}]',
+}
+
+# Category with the licenses and / or with subcategories with the other
+# licenses.
+category_with_licenses = {
+    'commons': u'Category:License tags',
+    'ar': u'تصنيف:قوالب حقوق الصور',
+    'de': u'Kategorie:Vorlage:Lizenz für Bilder',
+    'en': u'Category:Wikipedia image copyright templates',
+    'fa': u'رده:الگو:حق تکثیر پرونده',
+    'ga': u'Catagóir:Clibeanna cóipchirt d'íomhánna',
+    'it': u'Categoria:Template Licenze copyright',
+    'ja': u'Category:画像の著作権表示テンプレート',
+    'ko': u'분류:위키백과 그림 저작권 틀',
+    'ta': u'Category:காப்புரிமை வார்ப்புருக்கள்',
+    'ur': u'زمرہ:ویکیپیڈیا سانچہ جات حقوق تصاویر',
+    'zh': u'Category:版權申告模板',
+}
+
+# Page where is stored the message to send as email to the users
+emailPageWithText = {
+    #'de': 'Benutzer:ABF/D3',
+}
+
+# Title of the email
+emailSubject = {
+    #'de': 'Problemen mit Deinem Bild auf der Deutschen Wikipedia',
+}
+
+# Seems that uploaderBots aren't interested to get messages regarding the
+# files that they upload.. strange, uh?
+# Format: [[user,regex], [user,regex]...] the regex is needed to match the user
+#         where to send the warning-msg
+uploadBots = {
+    'commons': [['File Upload Bot (Magnus Manske)',
+                 r'|[Ss]ource=Transferred from .*?; transferred to Commons by [[User:(.*?)]]']],
+}
+
+# Service images that don't have to be deleted and/or reported has a template
+# inside them (you can let this param as None)
+serviceTemplates = {
+    'it': ['Template:Immagine di servizio'],
+}
+
+# Add your project (in alphabetical order) if you want that the bot starts
+project_inserted = ['ar', 'commons', 'de', 'en', 'fa', 'ga', 'hu', 'it', 'ja',
+                    'ko', 'ta', 'ur', 'zh']
+
+################################################################################
+# <--------------------------- Change only above! ---------------------------> #
+################################################################################
+
+
+class LogIsFull(pywikibot.Error):
+    """An exception indicating that the log is full and the Bot cannot add
+    other data to prevent Errors.
+
+    """
+
+
+class NothingFound(pywikibot.Error):
+    """ An exception indicating that a regex has return [] instead of results.
+
+    """
+
+
+def printWithTimeZone(message):
+    """ Function to print the messages followed by the TimeZone encoded
+    correctly.
+
+    """
+    if message[-1] != ' ':
+        message = '%s ' % unicode(message)
+    if locale.getlocale()[1]:
+        time_zone = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)",
+                                          time.gmtime()),
+                            locale.getlocale()[1])
+    else:
+        time_zone = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)",
+                                          time.gmtime()))
+    pywikibot.output(u"%s%s" % (message, time_zone))
+
+
+class checkImagesBot(object):
+
+    def __init__(self, site, logFulNumber=25000, sendemailActive=False,
+                 duplicatesReport=False, logFullError=True):
+        """ Constructor, define some global variable """
+        self.site = site
+        self.logFullError = logFullError
+        self.logFulNumber = logFulNumber
+        self.rep_page = pywikibot.translate(self.site, report_page,
+                                            fallback=False)
+        self.rep_text = pywikibot.translate(self.site, report_text,
+                                            fallback=False)
+        self.com = pywikibot.translate(self.site, msg_comm10)
+        hiddentemplatesRaw = pywikibot.translate(self.site, HiddenTemplate,
+                                                 fallback=False)
+        self.hiddentemplates = set([pywikibot.Page(self.site, tmp)
+                                    for tmp in hiddentemplatesRaw])
+        self.pageHidden = pywikibot.translate(self.site,
+                                              PageWithHiddenTemplates,
+                                              fallback=False)
+        self.pageAllowed = pywikibot.translate(self.site,
+                                               PageWithAllowedTemplates,
+                                               fallback=False)
+        self.comment = pywikibot.translate(self.site, msg_comm)
+        # Adding the bot's nickname at the notification text if needed.
+        botolist = pywikibot.translate(self.site, bot_list, fallback=False)
+        project = pywikibot.Site().family.name
+        self.project = project
+        bot = config.usernames[project]
+        try:
+            botnick = bot[self.site.lang]
+        except KeyError:
+            raise pywikibot.NoUsername(
+                u"You have to specify an username for your bot in this project "
+                u"in the user-config.py file.")
+
+        self.botnick = botnick
+        botolist.append(botnick)
+        self.botolist = botolist
+
+        self.sendemailActive = sendemailActive
+        self.skip_list = []
+        self.duplicatesReport = duplicatesReport
+
+        self.image_namespace = u"File:"
+        # Load the licenses only once, so do it once
+        self.list_licenses = self.load_licenses()
+
+    def setParameters(self, imageName):
+        """ Function to set parameters, now only image but maybe it can be used
+        for others in "future"
+
+        """
+        self.imageName = imageName
+        self.image = pywikibot.ImagePage(self.site, self.imageName)
+        self.timestamp = None
+        self.uploader = None
+
+    def report(self, newtext, image_to_report, notification=None, head=None,
+               notification2=None, unver=True, commTalk=None, commImage=None):
+        """ Function to make the reports easier. """
+        self.image_to_report = image_to_report
+        self.newtext = newtext
+        self.head = head or u''
+        self.notification = notification
+        self.notification2 = notification2
+
+        if self.notification:
+            self.notification = re.sub(r'__botnick__', self.botnick,
+                                       notification)
+        if self.notification2:
+            self.notification2 = re.sub(r'__botnick__', self.botnick,
+                                        notification2)
+        self.commTalk = commTalk
+        self.commImage = commImage or self.comment
+
+        while True:
+            try:
+                resPutMex = self.tag_image(unver)
+            except pywikibot.NoPage:
+                pywikibot.output(u"The page has been deleted! Skip!")
+                break
+            except pywikibot.EditConflict:
+                pywikibot.output(u"Edit conflict! Skip!")
+                break
+            else:
+                if not resPutMex:
+                    break
+            if self.notification:
+                try:
+                    self.put_mex_in_talk()
+                except pywikibot.EditConflict:
+                    pywikibot.output(u"Edit Conflict! Retrying...")
+                    try:
+                        self.put_mex_in_talk()
+                    except:
+                        pywikibot.output(
+                            u"Another error... skipping the user..")
+                        break
+                else:
+                    break
+            else:
+                break
+
+    def uploadBotChangeFunction(self, reportPageText, upBotArray):
+        """Detect the user that has uploaded the file through the upload bot"""
+        regex = upBotArray[1]
+        results = re.findall(regex, reportPageText)
+
+        if results:
+            luser = results[0]
+            return luser
+        else:
+            # we can't find the user, report the problem to the bot
+            return upBotArray[0]
+
+    def tag_image(self, put=True):
+        """ Function to add the template in the image and to find out
+        who's the user that has uploaded the file.
+
+        """
+        # Get the image's description
+        reportPageObject = pywikibot.ImagePage(self.site, self.image_to_report)
+
+        try:
+            reportPageText = reportPageObject.get()
+        except pywikibot.NoPage:
+            pywikibot.output(u'%s has been deleted...' % self.imageName)
+            return
+        # You can use this function also to find only the user that
+        # has upload the image (FixME: Rewrite a bit this part)
+        if put:
+            pywikibot.showDiff(reportPageText,
+                               self.newtext + "\n" + reportPageText)
+            pywikibot.output(self.commImage)
+            try:
+                reportPageObject.put(self.newtext + "\n" + reportPageText,
+                                     comment=self.commImage)
+            except pywikibot.LockedPage:
+                pywikibot.output(u'File is locked. Skipping.')
+                return
+        # paginetta it's the image page object.
+        try:
+            if reportPageObject == self.image and self.uploader:
+                nick = self.uploader
+            else:
+                nick = reportPageObject.getLatestUploader()[0]
+        except pywikibot.NoPage:
+            pywikibot.output(
+                u"Seems that %s has only the description and not the file..."
+                % self.image_to_report)
+            repme = u"\n*[[:File:%s]] problems '''with the APIs'''"
+            self.report_image(self.image_to_report, self.rep_page, self.com,
+                              repme)
+            return
+        upBots = pywikibot.translate(self.site, uploadBots, fallback=False)
+        luser = pywikibot.url2link(nick, self.site, self.site)
+
+        if upBots:
+            for upBot in upBots:
+                if upBot[0] == luser:
+                    luser = self.uploadBotChangeFunction(reportPageText, upBot)
+        talk_page = pywikibot.Page(self.site,
+                                   u"%s:%s" % (self.site.namespace(3), luser))
+        self.talk_page = talk_page
+        self.luser = luser
+        return True
+
+    def put_mex_in_talk(self):
+        """ Function to put the warning in talk page of the uploader."""
+        commento2 = pywikibot.translate(self.site, msg_comm2)
+        emailPageName = pywikibot.translate(self.site, emailPageWithText,
+                                            fallback=False)
+        emailSubj = pywikibot.translate(self.site, emailSubject, fallback=False)
+        if self.notification2:
+            self.notification2 = self.notification2 % self.image_to_report
+        else:
+            self.notification2 = self.notification
+        second_text = False
+        # Getting the talk page's history, to check if there is another
+        # advise...
+        # The try block is used to prevent error if you use an old
+        # wikipedia.py's version.
+        try:
+            testoattuale = self.talk_page.get()
+            history = self.talk_page.getLatestEditors(limit=10)
+            latest_user = history[0]["user"]
+            pywikibot.output(
+                u'The latest user that has written something is: %s'
+                % latest_user)
+            for i in self.botolist:
+                if latest_user == i:
+                    second_text = True
+                    # A block to prevent the second message if the bot also
+                    # welcomed users...
+                    if history[0]['timestamp'] == history[-1]['timestamp']:
+                        second_text = False
+        except pywikibot.IsRedirectPage:
+            pywikibot.output(
+                u'The user talk is a redirect, trying to get the right talk...')
+            try:
+                self.talk_page = self.talk_page.getRedirectTarget()
+                testoattuale = self.talk_page.get()
+            except pywikibot.NoPage:
+                second_text = False
+                testoattuale = pywikibot.translate(self.site, empty,
+                                                   fallback=False)
+        except pywikibot.NoPage:
+            pywikibot.output(u'The user page is blank')
+            second_text = False
+            testoattuale = pywikibot.translate(self.site, empty, fallback=False)
+        if self.commTalk:
+            commentox = self.commTalk
+        else:
+            commentox = commento2
+
+        if second_text:
+            newText = u"%s\n\n%s" % (testoattuale, self.notification2)
+        else:
+            newText = testoattuale + self.head + self.notification
+
+        try:
+            self.talk_page.put(newText, comment=commentox, minorEdit=False)
+        except pywikibot.LockedPage:
+            pywikibot.output(u'Talk page blocked, skip.')
+
+        if emailPageName and emailSubj:
+            emailPage = pywikibot.Page(self.site, emailPageName)
+            try:
+                emailText = emailPage.get()
+            except (pywikibot.NoPage, pywikibot.IsRedirectPage):
+                return
+            if self.sendemailActive:
+                text_to_send = re.sub(r'__user-nickname__', r'%s'
+                                      % self.luser, emailText)
+                emailClass = pywikibot.User(self.site, self.luser)
+                try:
+                    emailClass.sendMail(emailSubj, text_to_send)
+                except pywikibot.UserActionRefuse:
+                    pywikibot.output("User is not mailable, aborted")
+                    return
+
+    def untaggedGenerator(self, untaggedProject, limit):
+        """ Generator that yield the files without license. It's based on a
+        tool of the toolserver.
+
+        """
+        lang = untaggedProject.split('.', 1)[0]
+        project = '.%s' % untaggedProject.split('.', 1)[1]
+
+        URL = u'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?'
+        if lang == 'commons':
+            link = (
+                URL +
+                'wikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=100&order=img_timestamp&format=html'
+            )
+        else:
+            link = (
+                URL +
+                'wikilang=%s&wikifam=%s&order=img_timestamp&max=%s&ofs=0&max=%s'
+                % (lang, project, limit, limit)
+            )
+        text = self.site.getUrl(link, no_hostname=True)
+        results = re.findall(
+            r"<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>",
+            text)
+        if results:
+            for result in results:
+                wikiPage = pywikibot.ImagePage(self.site, result)
+                yield wikiPage
+        else:
+            pywikibot.output(link)
+            raise NothingFound(
+                u'Nothing found! Try to use the tool by yourself to be sure '
+                u'that it works!')
+
+    def regexGenerator(self, regexp, textrun):
+        """ Generator used when an user use a regex parsing a page to yield the
+        results
+
+        """
+        regex = re.compile(r'%s' % regexp, re.UNICODE | re.DOTALL)
+        results = regex.findall(textrun)
+        for image in results:
+            yield pywikibot.ImagePage(self.site, image)
+
+    def loadHiddenTemplates(self):
+        """ Function to load the white templates """
+        # A template as {{en is not a license! Adding also them in the
+        # whitelist template...
+        for langK in pywikibot.site.Family('wikipedia').langs.keys():
+            self.hiddentemplates.add(pywikibot.Page(self.site,
+                                                    u'Template:%s' % langK))
+        # Hidden template loading
+        if self.pageHidden:
+            try:
+                pageHiddenText = pywikibot.Page(self.site,
+                                                self.pageHidden).get()
+            except (pywikibot.NoPage, pywikibot.IsRedirectPage):
+                pageHiddenText = ''
+
+            for element in self.load(pageHiddenText):
+                self.hiddentemplates.add(pywikibot.Page(self.site, element))
+        return self.hiddentemplates
+
+    def returnOlderTime(self, listGiven, timeListGiven):
+        """ Get some time and return the oldest of them """
+        usage = False
+        num = 0
+        num_older = None
+        max_usage = 0
+        for element in listGiven:
+            imageName = element[1]
+            imagePage = pywikibot.ImagePage(self.site, imageName)
+            imageUsage = [page for page in imagePage.usingPages()]
+            if len(imageUsage) > 0 and len(imageUsage) > max_usage:
+                max_usage = len(imageUsage)
+                num_older = num
+            num += 1
+
+        if num_older:
+            return listGiven[num_older][1]
+
+        for element in listGiven:
+            time = element[0]
+            imageName = element[1]
+            not_the_oldest = False
+
+            for time_selected in timeListGiven:
+                if time > time_selected:
+                    not_the_oldest = True
+                    break
+
+            if not not_the_oldest:
+                return imageName
+
+    def convert_to_url(self, page):
+        # Function stolen from wikipedia.py
+        """The name of the page this Page refers to, in a form suitable for the
+        URL of the page.
+
+        """
+        title = page.replace(u" ", u"_")
+        encodedTitle = title.encode(self.site.encoding())
+        return urllib.quote(encodedTitle)
+
+    def countEdits(self, pagename, userlist):
+        """Function to count the edit of a user or a list of users in a page."""
+        # self.botolist
+        if type(userlist) == str:
+            userlist = [userlist]
+        page = pywikibot.Page(self.site, pagename)
+        history = page.getVersionHistory()
+        user_list = list()
+
+        for data in history:
+            user_list.append(data[2])
+        number_edits = 0
+
+        for username in userlist:
+            number_edits += user_list.count(username)
+        return number_edits
+
+    def checkImageOnCommons(self):
+        """ Checking if the file is on commons """
+        pywikibot.output(u'Checking if [[%s]] is on commons...'
+                         % self.imageName)
+        commons_site = pywikibot.Site('commons', 'commons')
+        regexOnCommons = r"[[:File:%s]] is also on '''Commons''': [[commons:File:.*?]](?: (same name)|)$" \
+                         % re.escape(self.imageName)
+        hash_found = self.image.getHash()
+        if not hash_found:
+            return  # Image deleted, no hash found. Skip the image.
+
+        commons_image_with_this_hash = commons_site.getFilesFromAnHash(
+            hash_found)
+        if commons_image_with_this_hash and \
+           commons_image_with_this_hash is not 'None':
+            servTMP = pywikibot.translate(self.site, serviceTemplates,
+                                          fallback=False)
+            templatesInTheImage = self.image.getTemplates()
+            if servTMP is not None:
+                for template in servTMP:
+                    if pywikibot.Page(self.site,
+                                      template) in templatesInTheImage:
+                        pywikibot.output(
+                            u"%s is on commons but it's a service image."
+                            % self.imageName)
+                        return True  # continue with the check-part
+
+            pywikibot.output(u'%s is on commons!' % self.imageName)
+            on_commons_text = self.image.getImagePageHtml()
+            if re.search(ur"<div class=(?:'|")sharedUploadNotice(?:'|")>",
+                         on_commons_text):
+                pywikibot.output(
+                    u"But, the file doesn't exist on your project! Skip...")
+                # We have to skip the check part for that image because
+                # it's on commons but someone has added something on your
+                # project.
+                return
+            if re.findall(r'\bstemma\b', self.imageName.lower()) and \
+               self.site.lang == 'it':
+                pywikibot.output(
+                    u'%s has "stemma" inside, means that it's ok.'
+                    % self.imageName)
+                return True
+            # It's not only on commons but the image needs a check
+            # the second usually is a url or something like that.
+            # Compare the two in equal way, both url.
+            if self.convert_to_url(self.imageName) \
+               == self.convert_to_url(commons_image_with_this_hash[0]):
+                repme = u"\n*[[:File:%s]] is also on '''Commons''': [[commons:File:%s]] (same name)" \
+                        % (self.imageName, commons_image_with_this_hash[0])
+            else:
+                repme = u"\n*[[:File:%s]] is also on '''Commons''': [[commons:File:%s]]" \
+                        % (self.imageName, commons_image_with_this_hash[0])
+            self.report_image(self.imageName, self.rep_page, self.com, repme,
+                              addings=False, regex=regexOnCommons)
+        return True
+
+    def checkImageDuplicated(self, duplicates_rollback):
+        """ Function to check the duplicated files. """
+        dupText = pywikibot.translate(self.site, duplicatesText, fallback=False)
+        dupRegex = pywikibot.translate(self.site, duplicatesRegex,
+                                       fallback=False)
+        dupTalkHead = pywikibot.translate(self.site, duplicate_user_talk_head,
+                                          fallback=False)
+        dupTalkText = pywikibot.translate(self.site, duplicates_user_talk_text,
+                                          fallback=False)
+        dupComment_talk = pywikibot.translate(self.site,
+                                              duplicates_comment_talk,
+                                              fallback=False)
+        dupComment_image = pywikibot.translate(self.site,
+                                               duplicates_comment_image,
+                                               fallback=False)
+        duplicateRegex = r'[[:File:%s]] has the following duplicates' \
+                         % re.escape(self.convert_to_url(self.imageName))
+        imagePage = pywikibot.ImagePage(self.site, self.imageName)
+        hash_found = imagePage.getHash()
+        duplicates = self.site.getFilesFromAnHash(hash_found)
+
+        if not duplicates:
+            return  # Error, image deleted, no hash found. Skip the image.
+
+        if len(duplicates) > 1:
+            if len(duplicates) == 2:
+                pywikibot.output(u'%s has a duplicate! Reporting it...'
+                                 % self.imageName)
+            else:
+                pywikibot.output(u'%s has %s duplicates! Reporting them...'
+                                 % (self.imageName, len(duplicates) - 1))
+
+            if dupText and dupRegex:
+                time_image_list = []
+                time_list = []
+
+                for duplicate in duplicates:
+                    DupePage = pywikibot.ImagePage(self.site, duplicate)
+
+                    if DupePage.urlname() != self.image.urlname() or \
+                       self.timestamp is None:
+                        self.timestamp = DupePage.getLatestUploader()[1]
+                    data = time.strptime(self.timestamp, u"%Y-%m-%dT%H:%M:%SZ")
+                    data_seconds = time.mktime(data)
+                    time_image_list.append([data_seconds, duplicate])
+                    time_list.append(data_seconds)
+                older_image = self.returnOlderTime(time_image_list, time_list)
+                # And if the images are more than two?
+                Page_oder_image = pywikibot.ImagePage(self.site, older_image)
+                string = ''
+                images_to_tag_list = []
+
+                for duplicate in duplicates:
+                    if pywikibot.ImagePage(self.site, duplicate) \
+                       == pywikibot.ImagePage(self.site, older_image):
+                        # the older image, not report also this as duplicate
+                        continue
+                    DupePage = pywikibot.ImagePage(self.site, duplicate)
+                    try:
+                        DupPageText = DupePage.get()
+                        older_page_text = Page_oder_image.get()
+                    except pywikibot.NoPage:
+                        continue  # The page doesn't exists
+
+                    if not (re.findall(dupRegex, DupPageText) or
+                            re.findall(dupRegex, older_page_text)):
+                        pywikibot.output(
+                            u'%s is a duplicate and has to be tagged...'
+                            % duplicate)
+                        images_to_tag_list.append(duplicate)
+##                        if duplicate != duplicates[-1]:
+                        string += u"*[[:%s%s]]\n" % (self.image_namespace,
+                                                     duplicate)
+##                        else:
+##                            string += "*[[:%s%s]]" \
+##                                      % (self.image_namespace, duplicate)
+                    else:
+                        pywikibot.output(
+                            u"Already put the dupe-template in the files's page"
+                            u" or in the dupe's page. Skip.")
+                        return  # Ok - Let's continue the checking phase
+
+                older_image_ns = u'%s%s' % (self.image_namespace, older_image)
+
+                # true if the image are not to be tagged as dupes
+                only_report = False
+
+                # put only one image or the whole list according to the request
+                if u'__images__' in dupText:
+                    text_for_the_report = re.sub(r'__images__',
+                                                 r'\n%s*[[:%s]]\n'
+                                                 % (string, older_image_ns),
+                                                 dupText)
+                else:
+                    text_for_the_report = re.sub(r'__image__',
+                                                 r'%s' % older_image_ns,
+                                                 dupText)
+
+                # Two iteration: report the "problem" to the user only once
+                # (the last)
+                if len(images_to_tag_list) > 1:
+                    for image_to_tag in images_to_tag_list[:-1]:
+                        already_reported_in_past = self.countEdits(
+                            u'File:%s' % image_to_tag, self.botolist)
+                        # if you want only one edit, the edit found should be
+                        # more than 0 -> num - 1
+                        if already_reported_in_past > duplicates_rollback - 1:
+                            only_report = True
+                            break
+                        # Delete the image in the list where we're write on
+                        text_for_the_report = re.sub(
+                            r'\n*[[:%s]]'
+                            % re.escape(self.image_namespace + image_to_tag),
+                            '', text_for_the_report)
+                        self.report(text_for_the_report, image_to_tag,
+                                    commImage=dupComment_image, unver=True)
+
+                if len(images_to_tag_list) != 0 and not only_report:
+                    already_reported_in_past = self.countEdits(
+                        u'File:%s' % images_to_tag_list[-1], self.botolist)
+                    image_to_resub = images_to_tag_list[-1]
+                    from_regex = r'\n*[[:File:%s]]' \
+                                 % re.escape(self.convert_to_url(
+                                     self.imageName))
+                    # Delete the image in the list where we're write on
+                    text_for_the_report = re.sub(from_regex, '',
+                                                 text_for_the_report)
+                    # if you want only one edit, the edit found should be more
+                    # than 0 -> num - 1
+                    if already_reported_in_past > duplicates_rollback - 1:
+                        only_report = True
+                    else:
+                        self.report(text_for_the_report, images_to_tag_list[-1],
+                                    dupTalkText % (older_image_ns, string),
+                                    dupTalkHead, commTalk=dupComment_talk,
+                                    commImage=dupComment_image, unver=True)
+
+            if self.duplicatesReport or only_report:
+                if only_report:
+                    repme = u"\n*[[:File:%s]] has the following duplicates ('''forced mode'''):" \
+                            % self.convert_to_url(self.imageName)
+                else:
+                    repme = u"\n*[[:File:%s]] has the following duplicates:" \
+                            % self.convert_to_url(self.imageName)
+
+                for duplicate in duplicates:
+                    if self.convert_to_url(duplicate) == \
+                       self.convert_to_url(self.imageName):
+                        # the image itself, not report also this as duplicate
+                        continue
+                    repme += u"\n**[[:File:%s]]" \
+                             % self.convert_to_url(duplicate)
+
+                result = self.report_image(self.imageName, self.rep_page,
+                                           self.com, repme, addings=False,
+                                           regex=duplicateRegex)
+                if not result:
+                    return True  # If Errors, exit (but continue the check)
+
+            if older_image != self.imageName:
+                # The image is a duplicate, it will be deleted. So skip the
+                # check-part, useless
+                return
+        return True  # Ok - No problem. Let's continue the checking phase
+
+    def report_image(self, image_to_report, rep_page=None, com=None,
+                     rep_text=None, addings=True, regex=None):
+        """ Report the files to the report page when needed. """
+        if not rep_page:
+            rep_page = self.rep_page
+
+        if not com:
+            com = self.com
+
+        if not rep_text:
+            rep_text = self.rep_text
+
+        another_page = pywikibot.Page(self.site, rep_page)
+
+        if not regex:
+            regex = image_to_report
+        try:
+            text_get = another_page.get()
+        except pywikibot.NoPage:
+            text_get = ''
+        except pywikibot.IsRedirectPage:
+            text_get = another_page.getRedirectTarget().get()
+
+        if len(text_get) >= self.logFulNumber:
+            if self.logFullError:
+                raise LogIsFull(
+                    u"The log page (%s) is full! Please delete the old files "
+                    u"reported." % another_page.title())
+            else:
+                pywikibot.output(
+                    u"The log page (%s) is full! Please delete the old files "
+                    u" reported. Skip!" % another_page.title())
+                # Don't report, but continue with the check
+                # (we don't now if this is the first time we check this file
+                # or not)
+                return True
+
+        # The talk page includes "_" between the two names, in this way I
+        # replace them to " "
+        n = re.compile(regex, re.UNICODE | re.DOTALL)
+        y = n.findall(text_get)
+
+        if y:
+            pywikibot.output(u"%s is already in the report page."
+                             % image_to_report)
+            reported = False
+        else:
+            # Adding the log
+            if addings:
+                # Adding the name of the image in the report if not done already
+                rep_text = rep_text % image_to_report
+            another_page.put(text_get + rep_text, comment=com, force=True,
+                             minorEdit=False)
+            pywikibot.output(u"...Reported...")
+            reported = True
+        return reported
+
+    def takesettings(self):
+        """ Function to take the settings from the wiki. """
+        settingsPage = pywikibot.translate(self.site, page_with_settings,
+                                           fallback=False)
+        try:
+            if not settingsPage:
+                self.settingsData = None
+            else:
+                wikiPage = pywikibot.Page(self.site, settingsPage)
+                self.settingsData = list()
+                try:
+                    testo = wikiPage.get()
+                    r = re.compile(
+                        r"<------- ------->\n"
+                        "*[Nn]ame ?= ?['"](.*?)['"]\n"
+                        "*([Ff]ind|[Ff]indonly)=(.*?)\n"
+                        "*[Ii]magechanges=(.*?)\n"
+                        "*[Ss]ummary=['"](.*?)['"]\n"
+                        "*[Hh]ead=['"](.*?)['"]\n"
+                        "*[Tt]ext ?= ?['"](.*?)['"]\n"
+                        "*[Mm]ex ?= ?['"]?([^\n]*?)['"]?\n",
+                        re.UNICODE | re.DOTALL)
+                    number = 1
+
+                    for m in r.finditer(testo):
+                        name = str(m.group(1))
+                        find_tipe = str(m.group(2))
+                        find = str(m.group(3))
+                        imagechanges = str(m.group(4))
+                        summary = str(m.group(5))
+                        head = str(m.group(6))
+                        text = str(m.group(7))
+                        mexcatched = str(m.group(8))
+                        tupla = [number, name, find_tipe, find, imagechanges,
+                                 summary, head, text, mexcatched]
+                        self.settingsData += [tupla]
+                        number += 1
+
+                    if self.settingsData == list():
+                        pywikibot.output(
+                            u"You've set wrongly your settings, please take a "
+                            u"look to the relative page. (run without them)")
+                        self.settingsData = None
+                except pywikibot.NoPage:
+                    pywikibot.output(u"The settings' page doesn't exist!")
+                    self.settingsData = None
+        except pywikibot.Error:
+            pywikibot.output(
+                u'Problems with loading the settigs, run without them.')
+            self.settingsData = None
+            self.some_problem = False
+
+        if not self.settingsData:
+            self.settingsData = None
+
+        # Real-Time page loaded
+        if self.settingsData:
+            pywikibot.output(u'>> Loaded the real-time page... <<')
+        else:
+            pywikibot.output(u'>> No additional settings found! <<')
+        return self.settingsData  # Useless, but it doesn't harm..
+
+    def load_licenses(self):
+        """ Load the list of the licenses """
+##        catName = pywikibot.translate(self.site, category_with_licenses)
+##        cat = pywikibot.Category(pywikibot.Site(), catName)
+##        categories = [page.title() for page in pagegenerators.SubCategoriesPageGenerator(cat)]
+##        categories.append(catName)
+##        list_licenses = list()
+##        pywikibot.output(u'\n\t...Loading the licenses allowed...\n')
+##        for catName in categories:
+##            cat = pywikibot.Category(pywikibot.Site(), catName)
+##            gen = pagegenerators.CategorizedPageGenerator(cat)
+##            pages = [page for page in gen]
+##            list_licenses.extend(pages)
+        catName = pywikibot.translate(self.site, category_with_licenses,
+                                      fallback=False)
+        if not catName:
+            raise pywikibot.Error(
+                u'No licenses allowed provided, add that option to the code to '
+                u'make the script working correctly')
+        pywikibot.output(u'\nLoading the allowed licenses...\n')
+        cat = pywikibot.Category(self.site, catName)
+        list_licenses = list(cat.articles())
+        if self.site.lang == 'commons':
+            no_licenses_to_skip = pywikibot.Category(self.site,
+                                                     'License-related tags')
+            for license_given in no_licenses_to_skip.articles():
+                if license_given in list_licenses:
+                    list_licenses.remove(license_given)
+        pywikibot.output('')
+
+        # Add the licenses set in the default page as licenses to check
+        if self.pageAllowed:
+            try:
+                pageAllowedText = pywikibot.Page(self.site,
+                                                 self.pageAllowed).get()
+            except (pywikibot.NoPage, pywikibot.IsRedirectPage):
+                pageAllowedText = ''
+
+            for nameLicense in self.load(pageAllowedText):
+                pageLicense = pywikibot.Page(self.site, nameLicense)
+                if pageLicense not in list_licenses:
+                    list_licenses.append(pageLicense)  # the list has wiki-pages
+        return list_licenses
+
+    def miniTemplateCheck(self, template):
+        """
+        Check whether the given template given in the licenses allowed or in the
+        licenses to skip.
+
+        """
+        # the list_licenses are loaded in the __init__
+        # (not to load them multimple times)
+        if template in self.list_licenses:
+            self.license_selected = template.title(withNamespace=False)
+            self.seems_ok = True
+            # let the last "fake" license normally detected
+            self.license_found = self.license_selected
+            return True
+
+        if template in self.hiddentemplates:
+            # if the whitetemplate is not in the images description, we don't
+            # care
+            try:
+                self.allLicenses.remove(template)
+            except ValueError:
+                return
+            else:
+                self.whiteTemplatesFound = True
+
+    def templateInList(self):
+        """
+        The problem is the calls to the Mediawiki system because they can be
+        pretty slow. While searching in a list of objects is really fast, so
+        first of all let's see if we can find something in the info that we
+        already have, then make a deeper check.
+
+        """
+        for template in self.licenses_found:
+            result = self.miniTemplateCheck(template)
+            if result:
+                break
+        if not self.license_found:
+            for template in self.licenses_found:
+                try:
+                    template.pageAPInfo()
+                except pywikibot.IsRedirectPage:
+                    template = template.getRedirectTarget()
+                    result = self.miniTemplateCheck(template)
+                    if result:
+                        break
+                except pywikibot.NoPage:
+                    continue
+
+    def smartDetection(self):
+        """The bot instead of checking if there's a simple template in the
+        image's description, checks also if that template is a license or
+        something else. In this sense this type of check is smart.
+
+        """
+        self.seems_ok = False
+        self.license_found = None
+        self.whiteTemplatesFound = False
+        regex_find_licenses = re.compile(
+            r'(?<!{){{(?:[Tt]emplate:|)([^{]+?)[|\n<}]', re.DOTALL)
+        regex_are_licenses = re.compile(
+            r'(?<!{){{(?:[Tt]emplate:|)([^{]+?)}}', re.DOTALL)
+        while True:
+            self.hiddentemplates = self.loadHiddenTemplates()
+            self.licenses_found = self.image.templates()
+            templatesInTheImageRaw = regex_find_licenses.findall(
+                self.imageCheckText)
+
+            if not self.licenses_found and templatesInTheImageRaw:
+                # {{nameTemplate|something <- this is not a template, be sure
+                # that we haven't catch something like that.
+                licenses_TEST = regex_are_licenses.findall(self.imageCheckText)
+                if not self.licenses_found and licenses_TEST:
+                    raise pywikibot.Error(
+                        "APIs seems down. No templates found with them but "
+                        "actually there are templates used in the image's "
+                        "page!")
+            self.allLicenses = []
+
+            if not self.list_licenses:
+                raise pywikibot.Error(
+                    u'No licenses allowed provided, add that option to the '
+                    u'code to make the script working correctly')
+
+            # Found the templates ONLY in the image's description
+            for template_selected in templatesInTheImageRaw:
+                for templateReal in self.licenses_found:
+                    if self.convert_to_url(
+                        template_selected).lower().replace('template%3a', '') \
+                        == self.convert_to_url(
+                            templateReal.title()).lower().replace('template%3a',
+                                                                  ''):
+                        if templateReal not in self.allLicenses:
+                            self.allLicenses.append(templateReal)
+            break
+
+        if self.licenses_found:
+            self.templateInList()
+
+            if not self.license_found and self.allLicenses:
+                # If only iterlist = self.AllLicenses if I remove something
+                # from iterlist it will be remove from self.AllLicenses too
+                iterlist = list(self.allLicenses)
+
+                for template in iterlist:
+                    try:
+                        template.pageAPInfo()
+                    except pywikibot.IsRedirectPage:
+                        template = template.getRedirectTarget()
+                    except pywikibot.NoPage:
+                        self.allLicenses.remove(template)
+
+                if self.allLicenses:
+                    self.license_found = self.allLicenses[0].title()
+        self.some_problem = False  # If it has "some_problem" it must check
+                                   # the additional settings.
+        # if self.settingsData, use addictional settings
+        if self.settingsData:
+            self.findAdditionalProblems()
+
+        if self.some_problem:
+            if self.mex_used in self.imageCheckText:
+                pywikibot.output(u'File already fixed. Skipping.')
+            else:
+                pywikibot.output(
+                    u"The file's description for %s contains %s..."
+                    % (self.imageName, self.name_used))
+                if self.mex_used.lower() == 'default':
+                    self.mex_used = self.unvertext
+                if self.imagestatus_used:
+                    reported = True
+                else:
+                    reported = self.report_image(self.imageName)
+                if reported:
+                    #if self.imagestatus_used:
+                    self.report(self.mex_used, self.imageName, self.text_used,
+                                u"\n%s\n" % self.head_used, None,
+                                self.imagestatus_used, self.summary_used)
+                else:
+                    pywikibot.output(u"Skipping the file...")
+                self.some_problem = False
+        else:
+            if not self.seems_ok and self.license_found:
+                rep_text_license_fake = u"\n*[[:File:%s]] seems to have " \
+                                        % self.imageName + \
+                    "a ''fake license'', license detected: <nowiki>%s</nowiki>" \
+                                        % self.license_found
+                regexFakeLicense = r"* ?[[:File:%s]] seems to have " \
+                                   % (re.escape(self.imageName)) + \
+                    "a ''fake license'', license detected: <nowiki>%s</nowiki>$" \
+                                   % (re.escape(self.license_found))
+                printWithTimeZone(
+                    u"%s seems to have a fake license: %s, reporting..."
+                    % (self.imageName, self.license_found))
+                self.report_image(self.imageName,
+                                  rep_text=rep_text_license_fake,
+                                  addings=False, regex=regexFakeLicense)
+            elif self.license_found:
+                pywikibot.output(u"[[%s]] seems ok, license found: {{%s}}..."
+                                 % (self.imageName, self.license_found))
+        return (self.license_found, self.whiteTemplatesFound)
+
+    def load(self, raw):
+        """ Load a list of objects from a string using regex. """
+        list_loaded = []
+        pos = 0
+        # I search with a regex how many user have not the talk page
+        # and i put them in a list (i find it more easy and secure)
+        regl = r"("|')(.*?)\1(?:,|])"
+        pl = re.compile(regl, re.UNICODE)
+        for xl in pl.finditer(raw):
+            word = xl.group(2).replace(u'\\', u'\')
+            if word not in list_loaded:
+                list_loaded.append(word)
+        return list_loaded
+
+    def skipImages(self, skip_number, limit):
+        """ Given a number of files, skip the first -number- files. """
+        # If the images to skip are more the images to check, make them the
+        # same number
+        if skip_number == 0:
+            pywikibot.output(u'\t\t>> No files to skip...<<')
+            return
+        if skip_number > limit:
+            skip_number = limit
+        # Print a starting message only if no images has been skipped
+        if not self.skip_list:
+            pywikibot.output(
+                pywikibot.translate(
+                    'en',
+                    u'Skipping the first {{PLURAL:num|file|%(num)s files}}:\n',
+                    {'num': skip_number}))
+        # If we still have pages to skip:
+        if len(self.skip_list) < skip_number:
+            pywikibot.output(u'Skipping %s...' % self.imageName)
+            self.skip_list.append(self.imageName)
+            if skip_number == 1:
+                pywikibot.output('')
+            return True
+        else:
+            pywikibot.output('')
+
+    def wait(self, waitTime, generator, normal, limit):
+        """ Skip the images uploaded before x seconds to let
+            the users to fix the image's problem alone in the
+            first x seconds.
+        """
+        imagesToSkip = 0
+        # if normal, we can take as many images as "limit" has told us,
+        # otherwise, sorry, nope.
+        if normal and False:
+            printWithTimeZone(
+                u'Skipping the files uploaded less than %s seconds ago..'
+                % waitTime)
+            imagesToSkip = 0
+            while True:
+                # ensure that all the images loaded aren't to skip!
+                loadOtherImages = True
+                for image in generator:
+                    try:
+                        timestamp = image.getLatestUploader()[1]
+                    except pywikibot.NoPage:
+                        continue
+                    # not relative to localtime
+                    img_time = datetime.datetime.strptime(timestamp,
+                                                          u"%Y-%m-%dT%H:%M:%SZ")
+
+                    now = datetime.datetime.strptime(
+                        str(datetime.datetime.utcnow()).split('.')[0],
+                        "%Y-%m-%d %H:%M:%S")  # timezones are UTC
+                    # + seconds to be sure that now > img_time
+                    while now < img_time:
+                        now = (now + datetime.timedelta(seconds=1))
+                    delta = now - img_time
+                    secs_of_diff = delta.seconds
+                    if waitTime > secs_of_diff:
+                        pywikibot.output(
+                            u'Skipping %s, uploaded %s seconds ago..'
+                            % (image.title(), int(secs_of_diff)))
+                        imagesToSkip += 1
+                        continue  # Still wait
+                    else:
+                        loadOtherImages = False
+                        break  # Not ok, continue
+                # if yes, we have skipped all the images given!
+                if loadOtherImages:
+                    generator = (x[0] for x in
+                                 self.site.newimages(number=limit,
+                                                     lestart=timestamp))
+                    imagesToSkip = 0
+                    # continue to load images!
+                    continue
+                else:
+                    break  # ok some other images, go below
+            newGen = list()
+            imagesToSkip += 1  # some calcs, better add 1
+            # Add new images, instead of the images skipped
+            newImages = self.site.newimages(number=imagesToSkip,
+                                            lestart=timestamp)
+            for image in generator:
+                newGen.append(image)
+            for imageData in newImages:
+                newGen.append(imageData[0])
+            return newGen
+        else:
+##            pywikibot.output(
+##                u"The wait option is available only with the standard "
+##                u"generator.")
+            pywikibot.output(
+                u"The wait option is not available at core yet.")
+            return generator
+
+    def isTagged(self):
+        """ Understand if a file is already tagged or not. """
+        # Is the image already tagged? If yes, no need to double-check, skip
+        for i in pywikibot.translate(self.site, txt_find, fallback=False):
+            # If there are {{ use regex, otherwise no (if there's not the
+            # {{ may not be a template and the regex will be wrong)
+            if '{{' in i:
+                regexP = re.compile(r'{{(?:template)?%s ?(?:||\r?\n|}|<|/) ?'
+                                    % i.split('{{')[1].replace(u' ', u'[ _]'),
+                                    re.I)
+                result = regexP.findall(self.imageCheckText)
+                if result:
+                    return True
+            elif i.lower() in self.imageCheckText:
+                return True
+
+    def findAdditionalProblems(self):
+        # In every tuple there's a setting configuration
+        for tupla in self.settingsData:
+            name = tupla[1]
+            find_tipe = tupla[2]
+            find = tupla[3]
+            find_list = self.load(find)
+            imagechanges = tupla[4]
+            if imagechanges.lower() == 'false':
+                imagestatus = False
+            elif imagechanges.lower() == 'true':
+                imagestatus = True
+            else:
+                pywikibot.error(u"Imagechanges set wrongly!")
+                self.settingsData = None
+                break
+            summary = tupla[5]
+            head_2 = tupla[6]
+            text = tupla[7] % self.imageName
+            mexCatched = tupla[8]
+            for k in find_list:
+                if find_tipe.lower() == 'findonly':
+                    searchResults = re.findall(r'%s' % k.lower(),
+                                               self.imageCheckText.lower())
+                    if searchResults:
+                        if searchResults[0] == self.imageCheckText.lower():
+                            self.some_problem = True
+                            self.text_used = text
+                            self.head_used = head_2
+                            self.imagestatus_used = imagestatus
+                            self.name_used = name
+                            self.summary_used = summary
+                            self.mex_used = mexCatched
+                            break
+                elif find_tipe.lower() == 'find':
+                    if re.findall(r'%s' % k.lower(),
+                                  self.imageCheckText.lower()):
+                        self.some_problem = True
+                        self.text_used = text
+                        self.head_used = head_2
+                        self.imagestatus_used = imagestatus
+                        self.name_used = name
+                        self.summary_used = summary
+                        self.mex_used = mexCatched
+                        continue
+
+    def checkStep(self):
+        # nothing = Defining an empty image description
+        nothing = ['', ' ', '  ', '   ', '\n', '\n ', '\n  ', '\n\n', '\n \n',
+                   ' \n', ' \n ', ' \n \n']
+        # something = Minimal requirements for an image description.
+        # If this fits, no tagging will take place
+        # (if there aren't other issues)
+        # MIT license is ok on italian wikipedia, let also this here
+
+        # Don't put "}}" here, please. Useless and can give problems.
+        something = ['{{']
+        # Unused file extensions. Does not contain PDF.
+        notallowed = ("xcf", "xls", "sxw", "sxi", "sxc", "sxd")
+        brackets = False
+        delete = False
+        notification = None
+        # get the extension from the image's name
+        extension = self.imageName.split('.')[-1]
+        # Load the notification messages
+        HiddenTN = pywikibot.translate(self.site, HiddenTemplateNotification,
+                                       fallback=False)
+        self.unvertext = pywikibot.translate(self.site, n_txt, fallback=False)
+        di = pywikibot.translate(self.site, delete_immediately, fallback=False)
+        dih = pywikibot.translate(self.site, delete_immediately_head,
+                                  fallback=False)
+        din = pywikibot.translate(self.site, delete_immediately_notification,
+                                  fallback=False)
+        nh = pywikibot.translate(self.site, nothing_head, fallback=False)
+        nn = pywikibot.translate(self.site, nothing_notification,
+                                 fallback=False)
+        dels = pywikibot.translate(self.site, msg_del_comm)
+        smwl = pywikibot.translate(self.site, second_message_without_license,
+                                   fallback=False)
+
+        # Some formatting for delete immediately template
+        di = u'\n%s' % di
+        dels = dels % di
+
+        try:
+            self.imageCheckText = self.image.get()
+        except pywikibot.NoPage:
+            pywikibot.output(u"Skipping %s because it has been deleted."
+                             % self.imageName)
+            return True
+        except pywikibot.IsRedirectPage:
+            pywikibot.output(u"Skipping %s because it's a redirect."
+                             % self.imageName)
+            return True
+        # Delete the fields where the templates cannot be loaded
+        regex_nowiki = re.compile(r'<nowiki>(.*?)</nowiki>', re.DOTALL)
+        regex_pre = re.compile(r'<pre>(.*?)</pre>', re.DOTALL)
+        self.imageCheckText = regex_nowiki.sub('', self.imageCheckText)
+        self.imageCheckText = regex_pre.sub('', self.imageCheckText)
+        # Deleting the useless template from the description (before adding sth
+        # in the image the original text will be reloaded, don't worry).
+        if self.isTagged():
+            printWithTimeZone(u'%s is already tagged...' % self.imageName)
+            return True
+
+        # something is the array with {{, MIT License and so on.
+        for a_word in something:
+            if a_word in self.imageCheckText:
+                # There's a template, probably a license
+                brackets = True
+        # Is the extension allowed? (is it an image or f.e. a .xls file?)
+        for parl in notallowed:
+            if parl.lower() in extension.lower():
+                delete = True
+        (license_found, hiddenTemplateFound) = self.smartDetection()
+        # If the image exists (maybe it has been deleting during the oder
+        # checking parts or something, who knows? ;-))
+        #if p.exists(): <-- improve thebot, better to make as
+        #                   less call to the server as possible
+        # Here begins the check block.
+        if brackets and license_found:
+            # It works also without this... but i want only to be sure ^^
+            brackets = False
+            return True
+        elif delete:
+            pywikibot.output(u"%s is not a file!" % self.imageName)
+            # Modify summary text
+            pywikibot.setAction(dels)
+            canctext = di % extension
+            notification = din % self.imageName
+            head = dih
+            self.report(canctext, self.imageName, notification, head)
+            delete = False
+            return True
+        elif self.imageCheckText in nothing:
+            pywikibot.output(
+                u"The file's description for %s does not contain a license "
+                u" template!" % self.imageName)
+            if hiddenTemplateFound and HiddenTN:
+                notification = HiddenTN % self.imageName
+            elif nn:
+                notification = nn % self.imageName
+            head = nh
+            self.report(self.unvertext, self.imageName, notification, head,
+                        smwl)
+            return True
+        else:
+            pywikibot.output(u"%s has only text and not the specific license..."
+                             % self.imageName)
+            if hiddenTemplateFound and HiddenTN:
+                notification = HiddenTN % self.imageName
+            elif nn:
+                notification = nn % self.imageName
+            head = nh
+            self.report(self.unvertext, self.imageName, notification, head,
+                        smwl)
+            return True
+
+
+def main():
+    """ Main function """
+    # Command line configurable parameters
+    repeat = True  # Restart after having check all the images?
+    limit = 80  # How many images check?
+    time_sleep = 30  # How many time sleep after the check?
+    skip_number = 0  # How many images to skip before checking?
+    waitTime = 0  # How many time sleep before the check?
+    commonsActive = False  # Is there's an image with the same name at commons?
+    normal = False  # Check the new images or use another generator?
+    urlUsed = False  # Use the url-related function instead of the new-pages
+    regexGen = False  # Use the regex generator
+    untagged = False  # Use the untagged generator
+    duplicatesActive = False  # Use the duplicate option
+    duplicatesReport = False  # Use the duplicate-report option
+    sendemailActive = False  # Use the send-email
+    logFullError = True  # Raise an error when the log is full
+    generator = None
+
+    # Here below there are the parameters.
+    for arg in pywikibot.handleArgs():
+        if arg.startswith('-limit'):
+            if len(arg) == 7:
+                limit = int(pywikibot.input(
+                    u'How many files do you want to check?'))
+            else:
+                limit = int(arg[7:])
+        if arg.startswith('-time'):
+            if len(arg) == 5:
+                time_sleep = int(pywikibot.input(
+                    u'How many seconds do you want runs to be apart?'))
+            else:
+                time_sleep = int(arg[6:])
+        elif arg == '-break':
+            repeat = False
+        elif arg == '-nologerror':
+            logFullError = False
+        elif arg == '-commons':
+            commonsActive = True
+        elif arg.startswith('-duplicates'):
+            duplicatesActive = True
+            if len(arg) == 11:
+                duplicates_rollback = 1
+            elif len(arg) > 11:
+                duplicates_rollback = int(arg[12:])
+        elif arg == '-duplicatereport':
+            duplicatesReport = True
+        elif arg == '-sendemail':
+            sendemailActive = True
+        elif arg.startswith('-skip'):
+            if len(arg) == 5:
+                skip_number = int(pywikibot.input(
+                    u'How many files do you want to skip?'))
+            elif len(arg) > 5:
+                skip_number = int(arg[6:])
+        elif arg.startswith('-wait'):
+            pywikibot.warning(
+                u'"-wait" option is not implemented yet in core. Sorry!\n')
+##            if len(arg) == 5:
+##                waitTime = int(pywikibot.input(
+##                    u'How many time do you want to wait before checking the '
+##                    u'files?'))
+##            elif len(arg) > 5:
+##                waitTime = int(arg[6:])
+        elif arg.startswith('-start'):
+            if len(arg) == 6:
+                firstPageTitle = pywikibot.input(
+                    u'From witch page do you want to start?')
+            elif len(arg) > 6:
+                firstPageTitle = arg[7:]
+            firstPageTitle = firstPageTitle.split(":")[1:]
+            generator = pywikibot.Site().allpages(start=firstPageTitle,
+                                                     namespace=6)
+            repeat = False
+        elif arg.startswith('-page'):
+            if len(arg) == 5:
+                regexPageName = str(pywikibot.input(
+                    u'Which page do you want to use for the regex?'))
+            elif len(arg) > 5:
+                regexPageName = str(arg[6:])
+            repeat = False
+            regexGen = True
+        elif arg.startswith('-url'):
+            if len(arg) == 4:
+                regexPageUrl = str(pywikibot.input(
+                    u'Which url do you want to use for the regex?'))
+            elif len(arg) > 4:
+                regexPageUrl = str(arg[5:])
+            urlUsed = True
+            repeat = False
+            regexGen = True
+        elif arg.startswith('-regex'):
+            if len(arg) == 6:
+                regexpToUse = str(pywikibot.input(
+                    u'Which regex do you want to use?'))
+            elif len(arg) > 6:
+                regexpToUse = str(arg[7:])
+            generator = 'regex'
+            repeat = False
+        elif arg.startswith('-cat'):
+            if len(arg) == 4:
+                catName = str(pywikibot.input(u'In which category do I work?'))
+            elif len(arg) > 4:
+                catName = str(arg[5:])
+            catSelected = pywikibot.Category(pywikibot.Site(),
+                                             'Category:%s' % catName)
+            generator = pg.CategorizedPageGenerator(catSelected)
+            repeat = False
+        elif arg.startswith('-ref'):
+            if len(arg) == 4:
+                refName = str(pywikibot.input(
+                    u'The references of what page should I parse?'))
+            elif len(arg) > 4:
+                refName = str(arg[5:])
+            generator = pg.ReferringPageGenerator(
+                pywikibot.Page(pywikibot.Site(), refName))
+            repeat = False
+        elif arg.startswith('-untagged'):
+            untagged = True
+            if len(arg) == 9:
+                projectUntagged = str(pywikibot.input(
+                    u'In which project should I work?'))
+            elif len(arg) > 9:
+                projectUntagged = str(arg[10:])
+
+    if not generator:
+        normal = True
+
+    site = pywikibot.Site()
+    skip = skip_number > 0
+
+    # A little block-statement to ensure that the bot will not start with
+    # en-parameters
+    if site.lang not in project_inserted:
+        pywikibot.output(u"Your project is not supported by this script.\n"
+                         u"You have to edit the script and add it!")
+        return
+
+    # Reading the log of the new images if another generator is not given.
+    if normal:
+        if limit == 1:
+            pywikibot.output(u"Retrieving the latest file for checking...")
+        else:
+            pywikibot.output(u"Retrieving the latest %d files for checking..."
+                             % limit)
+    while True:
+        # Defing the Main Class.
+        Bot = checkImagesBot(site, sendemailActive=sendemailActive,
+                             duplicatesReport=duplicatesReport,
+                             logFullError=logFullError)
+        if untagged:
+            generator = Bot.untaggedGenerator(projectUntagged, limit)
+            normal = False
+        if normal:
+            generator = pg.NewimagesPageGenerator(total=limit, site=site)
+        # if urlUsed and regexGen, get the source for the generator
+        if urlUsed and regexGen:
+            textRegex = site.getUrl(regexPageUrl, no_hostname=True)
+        # Not an url but a wiki page as "source" for the regex
+        elif regexGen:
+            pageRegex = pywikibot.Page(site, regexPageName)
+            try:
+                textRegex = pageRegex.get()
+            except pywikibot.NoPage:
+                pywikibot.output(u"%s doesn't exist!" % pageRegex.title())
+                textRegex = ''  # No source, so the bot will quit later.
+        # If generator is the regex' one, use your own Generator using an url
+        # or page and a regex.
+        if generator == 'regex' and regexGen:
+            generator = Bot.regexGenerator(regexpToUse, textRegex)
+
+        Bot.takesettings()
+        if waitTime:
+            generator = Bot.wait(waitTime, generator, normal, limit)
+        generator = pg.NamespaceFilterPageGenerator(generator, 6, site)
+        for image in generator:
+            # Setting the image for the main class
+            Bot.setParameters(image.title(withNamespace=False))
+            if skip:
+                skip = Bot.skipImages(skip_number, limit)
+                if skip:
+                    continue
+            # Check on commons if there's already an image with the same name
+            if commonsActive and site.family.name != "commons":
+                if not Bot.checkImageOnCommons():
+                    continue
+            # Check if there are duplicates of the image on the project selected
+            if duplicatesActive:
+                if not Bot.checkImageDuplicated(duplicates_rollback):
+                    continue
+            if Bot.checkStep():
+                continue
+
+        if repeat:
+            pywikibot.output(u"Waiting for %s seconds," % time_sleep)
+            time.sleep(time_sleep)
+        else:
+            break
+
+
+if __name__ == "__main__":
+    old = datetime.datetime.strptime(
+        str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S")
+    try:
+        main()
+    finally:
+        final = datetime.datetime.strptime(
+            str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S")
+        delta = final - old
+        secs_of_diff = delta.seconds
+        pywikibot.output("Execution time: %s seconds\n" % secs_of_diff)
-- 
To view, visit https://gerrit.wikimedia.org/r/127615
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I637a6b55bf0a0336df642552053337ba8368bb20
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt info@gno.de
Gerrit-Reviewer: Ladsgroup ladsgroup@gmail.com
Gerrit-Reviewer: Merlijn van Deen valhallasw@arctus.nl
Gerrit-Reviewer: jenkins-bot <>