SVN: [5989] trunk/pywikipedia/checkimages.py - Pywikipedia-l

18 Oct 2008

Revision: 5989
Author:   filnik
Date:     2008-10-18 12:39:26 +0000 (Sat, 18 Oct 2008)
Log Message:
-----------
Some minor changes, rewriting, adding comments somewhere..
Modified Paths:
--------------
    trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================

--- trunk/pywikipedia/checkimages.py	2008-10-18 12:01:49 UTC (rev 5988)
+++ trunk/pywikipedia/checkimages.py	2008-10-18 12:39:26 UTC (rev 5989)
@@ -362,49 +362,58 @@
 # Note: every __botnick__ will be repleaced with your bot's nickname (feel free not to use if you don't need it)
 HiddenTemplateNotification = {
         'commons': u"""\n{{subst:User:Filnik/whitetemplate|Image:%s}}\n\n''This message was '''added automatically by [[User:__botnick__|__botnick__]]''', if you need some help about it, ask its master (~~~) or go to the [[Commons:Help desk]]''. --~~~~""",
-        'de': None,
-        'en': None,
-        'it': u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Template_insufficiente|%s|__botnick__}} --~~~~",
-        'ko': u"\n{{subst:User:김우진1/BotRFL|%s}} --~~~~",
-        'ta': None,
+        'de'     : None,
+        'en'     : None,
+        'it'     : u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Template_insufficiente|%s|__botnick__}} --~~~~",
+        'ko'     : u"\n{{subst:User:김우진1/BotRFL|%s}} --~~~~",
+        'ta'     : None,
         }
-# Stub - will make it better in future, work in progress.
+
+# In this part there are the parameters for the dupe images.
+
+# Put here the template that you want to put in the image to warn that it's a dupe
 # put __image__ if you want only one image, __images__ if you want the whole list
 duplicatesText = {
-        'commons':u'\n{{Dupe|__image__}}',
-        'en':None,
-        'it':u'\n{{Progetto:Coordinamento/Immagini/Bot/Template duplicati|__images__}}',
-        'ko':'분류:그림 저작권 틀',
+        'commons': u'\n{{Dupe|__image__}}',
+        'en'     : None,
+        'it'     : u'\n{{Progetto:Coordinamento/Immagini/Bot/Template duplicati|__images__}}',
+        'ko'     :'분류:그림 저작권 틀',
         }
+# Head of the message given to the author
 duplicate_user_talk_head = {
-        'commons':None,
-        'it': u'\n\n== Immagine doppia ==\n',
+        'commons': None,
+        'en'     : None, 
+        'it'     : u'\n\n== Immagine doppia ==\n',
         }
+# Message to put in the talk
 duplicates_user_talk_text = {
-        'commons':u'{{subst:User:Filnik/duplicates|Image:%s|Image:%s}}',
-        'en':None,
-        'it':u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Duplicati|%s|%s|__botnick__}} --~~~~",
+        'commons': u'{{subst:User:Filnik/duplicates|Image:%s|Image:%s}}',
+        'en'     : None,
+        'it'     : u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Duplicati|%s|%s|__botnick__}} --~~~~",
         }
-
+# Comment used by the bot while it reports the problem in the uploader's talk
 duplicates_comment_talk = {
-        'commons':u'Bot: Dupe image found',
-        'en':None,
-        'it':u"Bot: Notifico l'immagine doppia trovata",
+        'commons': u'Bot: Dupe image found',
+        'en'     : None,
+        'it'     : u"Bot: Notifico l'immagine doppia trovata",
         }
+# Comment used by the bot while it reports the problem in the image
 duplicates_comment_image = {
-        'commons':u'Bot: Tagging dupe image',
-        'en':None,
-        'it':u'Bot: Immagine doppia, da cancellare',
+        'commons': u'Bot: Tagging dupe image',
+        'en'     : None,
+        'it'     : u'Bot: Immagine doppia, da cancellare',
         }
+# Regex to detect the template put in the image's decription to find the dupe
 duplicatesRegex = {
-        'commons':r'{{(?:[Tt]emplate:|)[Dd]upe[|}]',
-        'en':None,
-        'it':r'{{(?:[Tt]emplate:|)[Pp]rogetto:[Cc]oordinamento/Immagini/Bot/Template duplicati[|}]',
+        'commons': r'{{(?:[Tt]emplate:|)[Dd]upe[|}]',
+        'en'     : None,
+        'it'     : r'{{(?:[Tt]emplate:|)[Pp]rogetto:[Cc]oordinamento/Immagini/Bot/Template duplicati[|}]',
         }
-
+# Category with the licenses and / or with subcategories with the other licenses.
 category_with_licenses = {
-        'commons':'Category:License tags',
-        'it':'Categoria:Template Licenze copyright',
+        'commons': 'Category:License tags',
+        'en'     : None,
+        'it'     : 'Categoria:Template Licenze copyright',
         }
## Put None if you don't use this option or simply add nothing if en
@@ -521,7 +530,7 @@
         self.skip_list = list() # Inizialize the skip list used below
         self.duplicatesReport = duplicatesReport
         image_n = self.site.image_namespace()
-        self.image_namespace = "%s:" % image_n # Example: "Image:"
+        self.image_namespace = u"%s:" % image_n # Example: "Image:"
         # Load the licenses only once, so do it once
         self.smartdetection = smartdetection
         if self.smartdetection:
@@ -530,7 +539,7 @@
         """ Function to set parameters, now only image but maybe it can be used for others in "future" """
         self.imageName = imageName
         # Defing the image's Page Object
-        self.image = wikipedia.ImagePage(self.site, '%s%s' % (self.image_namespace, self.imageName))
+        self.image = wikipedia.ImagePage(self.site, u'%s%s' % (self.image_namespace, self.imageName))
     def report(self, newtext, image_to_report, notification = None, head = None,
                notification2 = None, unver = True, commTalk = None, commImage = None):
         """ Function to make the reports easier. """
@@ -541,9 +550,9 @@
         self.notification = notification
         self.notification2 = notification2
         if self.notification != None:
-            self.notification = re.sub('__botnick__', self.botnick, notification)
+            self.notification = re.sub(r'__botnick__', self.botnick, notification)
         if self.notification2 != None:
-            self.notification2 = re.sub('__botnick__', self.botnick, notification2)
+            self.notification2 = re.sub(r'__botnick__', self.botnick, notification2)
         self.commTalk = commTalk
         if commImage == None:
             self.commImage = self.commento
@@ -611,12 +620,12 @@
             nick = reportPageObject.getLatestUploader()[0]
         except wikipedia.NoPage:
             wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image_to_report)
-            repme = "\n*[[:Image:%s]] problems '''with the APIs'''"
+            repme = u"\n*[[:Image:%s]] problems '''with the APIs'''"
             # We have a problem! Report and exit!
             self.report_image(self.image_to_report, self.rep_page, self.com, repme)
             return False
         luser = wikipedia.url2link(nick, self.site, self.site)
-        talk_page = wikipedia.Page(self.site, "%s:%s" % (self.site.namespace(3), luser))
+        talk_page = wikipedia.Page(self.site, u"%s:%s" % (self.site.namespace(3), luser))
         self.talk_page = talk_page
         self.luser = luser
         return True
@@ -667,7 +676,7 @@
         else:
             commentox = self.commTalk
         if second_text == True:
-            self.talk_page.put("%s\n\n%s" % (testoattuale, self.notification2), comment = commentox, minorEdit = False)
+            self.talk_page.put(u"%s\n\n%s" % (testoattuale, self.notification2), comment = commentox, minorEdit = False)
         elif second_text == False:
             self.talk_page.put(testoattuale + self.head + self.notification, comment = commentox, minorEdit = False)
         if emailPageName != None and emailSubj != None:
@@ -677,7 +686,7 @@
             except (wikipedia.NoPage, wikipedia.IsRedirectPage):
                 return # Exit
             if self.sendemailActive:
-                text_to_send = re.sub(r'__user-nickname__', '%s' % self.luser, emailText)
+                text_to_send = re.sub(r'__user-nickname__', r'%s' % self.luser, emailText)
                 emailClass = EmailSender(self.site, self.luser)
                 emailClass.send(emailSubj, text_to_send)
@@ -694,7 +703,7 @@
         results = re.findall(regexp, text)
         if results == []:
             wikipedia.output(link)
-            raise NothingFound('Nothing found! Try to use the tool by yourself to be sure that it works!')
+            raise NothingFound(u'Nothing found! Try to use the tool by yourself to be sure that it works!')
         else:
             for result in results:
                 wikiPage = wikipedia.Page(self.site, result)
@@ -721,10 +730,10 @@
     def loadHiddenTemplates(self):
         """ Function to load the white templates """
         # A template as {{en is not a license! Adding also them in the whitelist template...
-        for langK in wikipedia.Family('wikipedia').langs.keys():
-            self.hiddentemplate.append('%s' % langK)
+        for langK in wikipedia.Family(u'wikipedia').langs.keys():
+            self.hiddentemplate.append(u'%s' % langK)
         # The template #if: and #switch: aren't something to care about
-        self.hiddentemplate.extend(['#if:', '#switch:'])
+        self.hiddentemplate.extend([u'#if:', u'#switch:'])
         # Hidden template loading
         if self.pageHidden != None:
             try:
@@ -746,7 +755,7 @@
         max_usage = 0
         for element in listGiven:
             imageName = element[1]
-            imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % imageName)
+            imagePage = wikipedia.ImagePage(self.site, u'Image:%s' % imageName)
             imageUsage = [page for page in imagePage.usingPages()]
             if len(imageUsage) > 0 and len(imageUsage) > max_usage:
                 max_usage = len(imageUsage)
@@ -768,7 +777,7 @@
     def convert_to_url(self, page):
         # Function stolen from wikipedia.py
         """The name of the page this Page refers to, in a form suitable for the URL of the page."""
-        title = page.replace(" ", "_")
+        title = page.replace(u" ", u"_")
         encodedTitle = title.encode(self.site.encoding())
         return urllib.quote(encodedTitle)
@@ -792,7 +801,7 @@
         wikipedia.output(u'Checking if %s is on commons...' % self.imageName)
         commons_site = wikipedia.getSite('commons', 'commons')
         regexOnCommons = r"\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:.*?]](?: (same name)|)$" % self.imageName
-        imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % self.imageName)
+        imagePage = wikipedia.ImagePage(self.site, u'Image:%s' % self.imageName)
         hash_found = imagePage.getHash()
         if hash_found == None:
             return False # Problems? Yes! Image deleted, no hash found. Skip the image.
@@ -800,9 +809,9 @@
             commons_image_with_this_hash = commons_site.getImagesFromAnHash(hash_found)
             if commons_image_with_this_hash != []:
                 wikipedia.output(u'%s is on commons!' % self.imageName)
-                imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % self.imageName)
+                imagePage = wikipedia.ImagePage(self.site, u'Image:%s' % self.imageName)
                 on_commons_text = imagePage.getImagePageHtml()
-                if "<div class='sharedUploadNotice'>" in on_commons_text:
+                if u"<div class='sharedUploadNotice'>" in on_commons_text:
                     wikipedia.output(u"But, the image doesn't exist on your project! Skip...")
                     # Problems? Yes! We have to skip the check part for that image!
                     # Because it's on commons but someone has added something on your project.
@@ -813,9 +822,9 @@
                 else:
                     # the second usually is a url or something like that. Compare the two in equal way, both url.
                     if self.convert_to_url(self.imageName) == self.convert_to_url(commons_image_with_this_hash[0]):
-                        repme = "\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]] (same name)" % (self.imageName, commons_image_with_this_hash[0])
+                        repme = u"\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]] (same name)" % (self.imageName, commons_image_with_this_hash[0])
                     else:
-                        repme = "\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]]" % (self.imageName, commons_image_with_this_hash[0])
+                        repme = u"\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]]" % (self.imageName, commons_image_with_this_hash[0])
                     self.report_image(self.imageName, self.rep_page, self.com, repme, addings = False, regex = regexOnCommons)
                     # Problems? No, return True
                     return True
@@ -836,7 +845,7 @@
         dupComment_talk = wikipedia.translate(self.site, duplicates_comment_talk)
         dupComment_image = wikipedia.translate(self.site, duplicates_comment_image)
         duplicateRegex = r'\n*(?:[[:Image:%s]] has the following duplicates(?: ('''forced mode''')|):|*[[:Image:%s]])$' % (self.convert_to_url(self.imageName), self.convert_to_url(self.imageName))
-        imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % self.imageName)
+        imagePage = wikipedia.ImagePage(self.site, u'Image:%s' % self.imageName)
         hash_found = imagePage.getHash()
         duplicates = self.site.getImagesFromAnHash(hash_found)
         if duplicates == None:
@@ -853,7 +862,7 @@
                     DupePage = wikipedia.ImagePage(self.site, u'Image:%s' % duplicate)
                     imagedata = DupePage.getLatestUploader()[1]
                     # '2008-06-18T08:04:29Z'
-                    data = time.strptime(imagedata, "%Y-%m-%dT%H:%M:%SZ")
+                    data = time.strptime(imagedata, u"%Y-%m-%dT%H:%M:%SZ")
                     data_seconds = time.mktime(data)
                     time_image_list.append([data_seconds, duplicate])
                     time_list.append(data_seconds)
@@ -876,24 +885,24 @@
                         wikipedia.output(u'%s is a duplicate and has to be tagged...' % duplicate)
                         images_to_tag_list.append(duplicate)
                         #if duplicate != duplicates[-1]:
-                        string += "*[[:%s%s]]\n" % (self.image_namespace, duplicate)
+                        string += u"*[[:%s%s]]\n" % (self.image_namespace, duplicate)
                         #else:
                         #    string += "*[[:%s%s]]" % (self.image_namespace, duplicate)
                     else:
                         wikipedia.output(u"Already put the dupe-template in the image's page or in the dupe's page. Skip.")
                         return True # Ok - No problem. Let's continue the checking phase
-                older_image_ns = '%s%s' % (self.image_namespace, older_image) # adding the namespace
+                older_image_ns = u'%s%s' % (self.image_namespace, older_image) # adding the namespace
                 only_report = False # true if the image are not to be tagged as dupes
# put only one image or the whole list according to the request
-                if '__images__' in dupText:
+                if u'__images__' in dupText:
                     text_for_the_report = re.sub(r'__images__', r'\n%s*[[:%s]]\n' % (string, older_image_ns), dupText)
                 else:
                     text_for_the_report = re.sub(r'__image__', r'%s' % older_image_ns, dupText)
                 # Two iteration: report the "problem" to the user only once (the last)
                 if len(images_to_tag_list) > 1:
                     for image_to_tag in images_to_tag_list[:-1]:
-                        already_reported_in_past = self.countEdits('Image:%s' % image_to_tag, self.botolist)
+                        already_reported_in_past = self.countEdits(u'Image:%s' % image_to_tag, self.botolist)
                         # if you want only one edit, the edit found should be more than 0 -> num - 1
                         if already_reported_in_past > duplicates_rollback - 1:
                             only_report = True
@@ -903,7 +912,7 @@
                         self.report(text_for_the_report, image_to_tag,
                                     commImage = dupComment_image, unver = True)
                 if len(images_to_tag_list) != 0 and not only_report:
-                    already_reported_in_past = self.countEdits('Image:%s' % images_to_tag_list[-1], self.botolist)
+                    already_reported_in_past = self.countEdits(u'Image:%s' % images_to_tag_list[-1], self.botolist)
                     # It's a regex, we need to fix the name in order to make it regex-compatible.
                     replaces_to_perform = [[' ', '_'], ['(', '('], [')', ')'], ['.', '.'], ['[', '['], [']', ']'],
                                            ['{', '{'], ['}', '}']]
@@ -921,13 +930,13 @@
                                 commImage = dupComment_image, unver = True)
             if self.duplicatesReport or only_report:
                 if only_report:
-                    repme = "\n*[[:Image:%s]] has the following duplicates ('''forced mode'''):" % self.convert_to_url(self.imageName)
+                    repme = u"\n*[[:Image:%s]] has the following duplicates ('''forced mode'''):" % self.convert_to_url(self.imageName)
                 else:
-                    repme = "\n*[[:Image:%s]] has the following duplicates:" % self.convert_to_url(self.imageName)
+                    repme = u"\n*[[:Image:%s]] has the following duplicates:" % self.convert_to_url(self.imageName)
                 for duplicate in duplicates:
                     if self.convert_to_url(duplicate) == self.convert_to_url(self.imageName):
                         continue # the image itself, not report also this as duplicate
-                    repme += "\n**[[:Image:%s]]" % self.convert_to_url(duplicate)
+                    repme += u"\n**[[:Image:%s]]" % self.convert_to_url(duplicate)
                 result = self.report_image(self.imageName, self.rep_page, self.com, repme, addings = False, regex = duplicateRegex)
                 if not result:
                     return True # If Errors, exit (but continue the check)                
@@ -949,7 +958,7 @@
         except wikipedia.IsRedirectPage:            
             text_get = another_page.getRedirectTarget().get()
         if len(text_get) >= self.logFulNumber:
-            raise LogIsFull("The log page (%s) is full! Please delete the old images reported." % another_page.title())
+            raise LogIsFull(u"The log page (%s) is full! Please delete the old images reported." % another_page.title())
         pos = 0
         # The talk page includes "_" between the two names, in this way i replace them to " "
         n = re.compile(regex, re.UNICODE|re.M)
@@ -1021,7 +1030,7 @@
                 pageAllowedText = ''
             for nameLicense in self.load(pageAllowedText):
                 if not 'template:' in nameLicense.lower():
-                    nameLicense = 'Template:%s' % nameLicense
+                    nameLicense = u'Template:%s' % nameLicense
                 pageLicense = wikipedia.Page(self.site, nameLicense)
                 if pageLicense not in list_licenses:
                     list_licenses.append(pageLicense) # the list has wiki-pages
@@ -1033,7 +1042,7 @@
             gets the real page, if there's a NoPage, return None.
         """
         #print template.exists()
-        template = wikipedia.Page(self.site, 'Template:%s' % license_selected)
+        template = wikipedia.Page(self.site, u'Template:%s' % license_selected)
         try:
             template.pageAPInfo()
         except wikipedia.NoPage:
@@ -1101,7 +1110,7 @@
                         exit_cicle = True
                         break
         if not seems_ok:
-            rep_text_license_fake = "\n*[[:Image:%s]] seems to have a ''fake license'', license detected: {{tl|%s}}." % (self.imageName, license_found)
+            rep_text_license_fake = u"\n*[[:Image:%s]] seems to have a ''fake license'', license detected: {{tl|%s}}." % (self.imageName, license_found)
             regexFakeLicense = r"* ?[[:Image:%s]] seems to have a ''fake license'', license detected: {{tl|%s}}.$" % (self.imageName, license_found)
             printWithTimeZone(u"%s seems to have a fake license: %s, reporting..." % (self.imageName, license_found))
             self.report_image(self.imageName, rep_text = rep_text_license_fake,
@@ -1120,7 +1129,7 @@
         regl = r"("|')(.*?)\1(?:,|])"
         pl = re.compile(regl, re.UNICODE)
         for xl in pl.finditer(raw):
-            word = xl.group(2).replace('\\', '\')
+            word = xl.group(2).replace(u'\\', u'\')
             if word not in list_loaded:
                 list_loaded.append(word)
         return list_loaded
@@ -1158,7 +1167,7 @@
         os.environ['TZ'] = 'EST+01EDT,M4.1.0,M10.5.0'
         time.tzset()
         # '2008-06-18T08:04:29Z'
-        data = time.strptime(imagedata, "%Y-%m-%dT%H:%M:%SZ")
+        data = time.strptime(imagedata, u"%Y-%m-%dT%H:%M:%SZ")
         data_seconds = time.mktime(data)
         current_time = time.time()
         secs_of_diff = current_time - data_seconds
@@ -1290,7 +1299,7 @@
# Block of text to translate the parameters set above.
     image_n = site.image_namespace()
-    image_namespace = "%s:" % image_n # Example: "User_talk:"
+    image_namespace = u"%s:" % image_n # Example: "User_talk:"
     unvertext = wikipedia.translate(site, n_txt)
     di = wikipedia.translate(site, delete_immediately)
     dih = wikipedia.translate(site, delete_immediately_head)
@@ -1320,7 +1329,7 @@
         wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!")
         return
     # Some formatting for delete immediately template
-    di = '\n%s' % di
+    di = u'\n%s' % di
     dels = dels % di
# Reading the log of the new images if another generator is not given.
@@ -1448,7 +1457,7 @@
                 # If there are {{ use regex, otherwise no (if there's not the {{ may not be a template
                 # and the regex will be wrong)
                 if '{{' in i:
-                    regexP = re.compile('{{(?:template|)%s ?(?:||\n|}|<) ?' % i.split('{{')[1].replace(' ', '[ _]'), re.I)
+                    regexP = re.compile(r'{{(?:template|)%s ?(?:||\n|}|<) ?' % i.split('{{')[1].replace(u' ', u'[ _]'), re.I)
                     result = regexP.findall(imageCheckText)
                     if result != []:
                         tagged = True
@@ -1551,7 +1560,7 @@
                     reported = True
                 if reported == True:
                     #if imagestatus_used == True:
-                    mainClass.report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used)
+                    mainClass.report(mex_used, imageName, text_used, u"\n%s\n" % head_used, None, imagestatus_used, summary_used)
                 else:
                     wikipedia.output(u"Skipping the image...")
                 some_problem = False