SVN: [6146] trunk/pywikipedia/checkimages.py - Pywikipedia-l

14 Dec 2008

Revision: 6146
Author:   filnik
Date:     2008-12-14 11:16:08 +0000 (Sun, 14 Dec 2008)

Log Message:
-----------
Fixed more or less all the problems dued to the change of namespace

Modified Paths:
--------------
    trunk/pywikipedia/checkimages.py

Modified: trunk/pywikipedia/checkimages.py
===================================================================

--- trunk/pywikipedia/checkimages.py	2008-12-12 20:24:40 UTC (rev 6145)
+++ trunk/pywikipedia/checkimages.py	2008-12-14 11:16:08 UTC (rev 6146)
@@ -387,15 +387,15 @@
         }
 # Comment used by the bot while it reports the problem in the uploader's talk
 duplicates_comment_talk = {
-        'commons': u'Bot: Dupe image found',
+        'commons': u'Bot: Dupe file found',
         'en'     : None,
-        'it'     : u"Bot: Notifico l'immagine doppia trovata",
+        'it'     : u"Bot: Notifico il file doppio trovato",
         }
 # Comment used by the bot while it reports the problem in the image
 duplicates_comment_image = {
-        'commons': u'Bot: Tagging dupe image',
+        'commons': u'Bot: Tagging dupe file',
         'en'     : None,
-        'it'     : u'Bot: Immagine doppia, da cancellare',
+        'it'     : u'Bot: File doppio, da cancellare',
         }
 # Regex to detect the template put in the image's decription to find the dupe
 duplicatesRegex = {
@@ -650,7 +650,7 @@
 
     def tag_image(self, put = True):
         """ Function to add the template in the image and to find out
-        who's the user that has uploaded the image. """
+        who's the user that has uploaded the file. """
         # Get the image's description
         reportPageObject = wikipedia.ImagePage(self.site, self.image_namespace +
self.image_to_report)
         try:
@@ -670,7 +670,7 @@
             else:
                 nick = reportPageObject.getLatestUploader()[0]
         except wikipedia.NoPage:
-            wikipedia.output(u"Seems that %s hasn't the image at all, but there
is something in the description..." % self.image_to_report)
+            wikipedia.output(u"Seems that %s has only the description and not the
file..." % self.image_to_report)
             repme = u"\n*[[:File:%s]] problems '''with the
APIs'''"
             # We have a problem! Report and exit!
             self.report_image(self.image_to_report, self.rep_page, self.com, repme)
@@ -742,7 +742,7 @@
                 emailClass.send(emailSubj, text_to_send)
 
     def untaggedGenerator(self, untaggedProject, limit):
-        """ Generator that yield the images without license. It's
based on a tool of the toolserver. """
+        """ Generator that yield the files without license. It's based
on a tool of the toolserver. """
         lang = untaggedProject.split('.', 1)[0]
         project = '.%s' % untaggedProject.split('.', 1)[1]
         if lang == 'commons':
@@ -849,7 +849,7 @@
         return number_edits
 
     def checkImageOnCommons(self):
-        """ Checking if the image is on commons """
+        """ Checking if the file is on commons """
         wikipedia.output(u'Checking if %s is on commons...' % self.imageName)
         commons_site = wikipedia.getSite('commons', 'commons')
         regexOnCommons = r"\n\*\[\[:File:%s\]\] is also on
'''Commons''': \[\[commons:File:.*?\]\](?: \(same name\)|)$"
% re.escape(self.imageName)
@@ -864,7 +864,7 @@
                 imagePage = wikipedia.ImagePage(self.site, u'File:%s' %
self.imageName)
                 on_commons_text = imagePage.getImagePageHtml()
                 if u"<div class='sharedUploadNotice'>" in
on_commons_text:
-                    wikipedia.output(u"But, the image doesn't exist on your
project! Skip...")
+                    wikipedia.output(u"But, the file doesn't exist on your
project! Skip...")
                     # Problems? Yes! We have to skip the check part for that image!
                     # Because it's on commons but someone has added something on your
project.
                     return False
@@ -885,7 +885,7 @@
                 return True
 
     def checkImageDuplicated(self, duplicates_rollback):
-        """ Function to check the duplicated images. """
+        """ Function to check the duplicated files. """
         # {{Dupe|File:Blanche_Montel.jpg}}
         # Skip the stub images
         #if 'stub' in self.imageName.lower() and self.project ==
'wikipedia' and self.site.lang == 'it':
@@ -944,7 +944,7 @@
                         #else:
                         #    string += "*[[:%s%s]]" % (self.image_namespace,
duplicate)
                     else:
-                        wikipedia.output(u"Already put the dupe-template in the
image's page or in the dupe's page. Skip.")
+                        wikipedia.output(u"Already put the dupe-template in the
files's page or in the dupe's page. Skip.")
                         return True # Ok - No problem. Let's continue the checking
phase
                 older_image_ns = u'%s%s' % (self.image_namespace, older_image) #
adding the namespace
                 only_report = False # true if the image are not to be tagged as dupes
@@ -996,7 +996,7 @@
         return True # Ok - No problem. Let's continue the checking phase
 
     def report_image(self, image_to_report, rep_page = None, com = None, rep_text = None,
addings = True, regex = None):
-        """ Function to report the images in the report page when needed.
"""
+        """ Function to report the files in the report page when needed.
"""
         if rep_page == None: rep_page = self.rep_page
         if com == None: com = self.com
         if rep_text == None: rep_text = self.rep_text
@@ -1009,7 +1009,7 @@
         except wikipedia.IsRedirectPage:            
             text_get = another_page.getRedirectTarget().get()
         if len(text_get) >= self.logFulNumber:
-            raise LogIsFull(u"The log page (%s) is full! Please delete the old
images reported." % another_page.title())
+            raise LogIsFull(u"The log page (%s) is full! Please delete the old files
reported." % another_page.title())
         pos = 0
         # The talk page includes "_" between the two names, in this way i
replace them to " "
         n = re.compile(regex, re.UNICODE|re.M)
@@ -1211,10 +1211,10 @@
         return list_loaded
 
     def skipImages(self, skip_number, limit):
-        """ Given a number of images, skip the first -number- images.
"""
+        """ Given a number of files, skip the first -number- files.
"""
         # If the images to skip are more the images to check, make them the same number
         if skip_number == 0:
-            wikipedia.output(u'\t\t>> No images to skip...<<')
+            wikipedia.output(u'\t\t>> No files to skip...<<')
             return False
         if skip_number > limit: skip_number = limit
         # Print a starting message only if no images has been skipped
@@ -1240,62 +1240,67 @@
             first x seconds.
         """
         imagesToSkip = 0
-        while 1:            
-            loadOtherImages = True # ensure that all the images loaded aren't to
skip!
-            for image in generator:
-                if normal:
-                    imageData = image
-                    image = imageData[0]
-                    timestamp = imageData[1]
-                else:
-                    timestamp = image.getLatestUploader()[1]
-                #http://pytz.sourceforge.net/ <- maybe useful?
-                # '2008-06-18T08:04:29Z'
-                img_time = datetime.datetime.strptime(timestamp,
u"%Y-%m-%dT%H:%M:%SZ") #not relative to localtime
-                now =
datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0],
"%Y-%m-%d %H:%M:%S") #timezones are UTC
-                # + seconds to be sure that now > img_time
-                while now < img_time:
-                    now = (now + datetime.timedelta(seconds=1))
-                delta = now - img_time
-                secs_of_diff = delta.seconds
-                if waitTime > secs_of_diff:
-                    wikipedia.output(u'Skipping %s, uploaded %s seconds ago..' %
(image.title(), int(secs_of_diff)))
-                    imagesToSkip += 1
-                    continue # Still wait
-                else:
-                    loadOtherImages = False
-                    break # No ok, continue
-            # if yes, we have skipped all the images given!
-            if loadOtherImages:
-                generator = self.site.newimages(number = limit, lestart = timestamp)
-                imagesToSkip = 0
-                # continue to load images! continue
-                continue
-            else:
-                break # ok some other images, go below
         # if normal, we can take as many images as "limit" has told us,
otherwise, sorry, nope.
         if normal:
-            newGen = list()
-            imagesToSkip += 1 # some calcs, better add 1
-            # Add new images, instead of the images skipped
-            newImages = self.site.newimages(number = imagesToSkip, lestart = timestamp)
-            for imageData in generator:
-                if normal:
-                    image = imageData[0]
-                    timestamp = imageData[1]
-                    uploader = imageData[2]
-                    comment = imageData[3]
-                    newGen.append([image, timestamp, uploader, comment])
+            printWithTimeZone(u'Skipping the files uploaded less than %s seconds
ago..' % waitTime)
+            while 1:            
+                loadOtherImages = True # ensure that all the images loaded aren't to
skip!
+                for image in generator:
+                    image = wikipedia.ImagePage(self.site, image.title())
+                    if normal:
+                        imageData = image
+                        image = imageData[0]
+                        timestamp = imageData[1]
+                    else:
+                        timestamp = image.getLatestUploader()[1]
+                    #http://pytz.sourceforge.net/ <- maybe useful?
+                    # '2008-06-18T08:04:29Z'
+                    img_time = datetime.datetime.strptime(timestamp,
u"%Y-%m-%dT%H:%M:%SZ") #not relative to localtime
+                    now =
datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0],
"%Y-%m-%d %H:%M:%S") #timezones are UTC
+                    # + seconds to be sure that now > img_time
+                    while now < img_time:
+                        now = (now + datetime.timedelta(seconds=1))
+                    delta = now - img_time
+                    secs_of_diff = delta.seconds
+                    if waitTime > secs_of_diff:
+                        wikipedia.output(u'Skipping %s, uploaded %s seconds
ago..' % (image.title(), int(secs_of_diff)))
+                        imagesToSkip += 1
+                        continue # Still wait
+                    else:
+                        loadOtherImages = False
+                        break # No ok, continue
+                # if yes, we have skipped all the images given!
+                if loadOtherImages:
+                    generator = self.site.newimages(number = limit, lestart = timestamp)
+                    imagesToSkip = 0
+                    # continue to load images! continue
+                    continue
                 else:
-                    image = imageData
-                    newGen.append(image)
-            num = 0
-            for imageData in newImages:
-                newGen.append(imageData)
-        return newGen
+                    break # ok some other images, go below
+                newGen = list()
+                imagesToSkip += 1 # some calcs, better add 1
+                # Add new images, instead of the images skipped
+                newImages = self.site.newimages(number = imagesToSkip, lestart =
timestamp)
+                for imageData in generator:
+                    if normal:
+                        image = imageData[0]
+                        timestamp = imageData[1]
+                        uploader = imageData[2]
+                        comment = imageData[3]
+                        newGen.append([image, timestamp, uploader, comment])
+                    else:
+                        image = imageData
+                        newGen.append(image)
+                num = 0
+                for imageData in newImages:
+                    newGen.append(imageData)
+            return newGen
+        else:
+            wikipedia.output(u"The wait option is available only with the standard
generator.")
+            return generator
      
     def isTagged(self):
-        """ Understand if an image is already tagged or not.
"""
+        """ Understand if a file is already tagged or not.
"""
         TextFind = wikipedia.translate(self.site, txt_find)
         # Is the image already tagged? If yes, no need to double-check, skip
         for i in TextFind:
@@ -1427,9 +1432,9 @@
         # Here begins the check block.
         if self.some_problem == True:
             if self.mex_used in self.imageCheckText:
-                wikipedia.output(u'Image already fixed. Skip.')
+                wikipedia.output(u'File already fixed. Skip.')
                 return True
-            wikipedia.output(u"The image description for %s contains %s..." %
(self.imageName, self.name_used))
+            wikipedia.output(u"The file's description for %s contains
%s..." % (self.imageName, self.name_used))
             if self.mex_used.lower() == 'default':
                 self.mex_used = unvertext
             if self.imagestatus_used == False:
@@ -1440,7 +1445,7 @@
                 #if self.imagestatus_used == True:
                 self.report(self.mex_used, self.imageName, self.text_used,
u"\n%s\n" % self.head_used, None, self.imagestatus_used, self.summary_used)
             else:
-                wikipedia.output(u"Skipping the image...")
+                wikipedia.output(u"Skipping the file...")
             self.some_problem = False
             return True
         elif brackets == True and license_found != None:
@@ -1458,7 +1463,7 @@
             delete = False
             return True
         elif self.imageCheckText in nothing:
-            wikipedia.output(u"The image description for %s does not contain a
license template!" % self.imageName)
+            wikipedia.output(u"The file's description for %s does not contain a
license template!" % self.imageName)
             if hiddenTemplateFound and HiddenTN != None and HiddenTN != '' and
HiddenTN != ' ':
                 notification = HiddenTN % self.imageName
             else:
@@ -1483,7 +1488,7 @@
     limit = 80 # How many images check?
     time_sleep = 30 # How many time sleep after the check?
     skip_number = 0 # How many images to skip before checking?
-    wait_number = 0 # How many time sleep before the check?
+    waitTime = 0 # How many time sleep before the check?
     commonsActive = False # Check if on commons there's an image with the same name?
     normal = False # Check the new images or use another generator?
     urlUsed = False # Use the url-related function instead of the new-pages generator
@@ -1497,7 +1502,7 @@
     for arg in wikipedia.handleArgs():
         if arg.startswith('-limit'):
             if len(arg) == 7:
-                limit = int(wikipedia.input(u'How many images do you want to
check?'))
+                limit = int(wikipedia.input(u'How many files do you want to
check?'))
             else:
                 limit = int(arg[7:])
         if arg.startswith('-time'):
@@ -1522,22 +1527,23 @@
         elif arg.startswith('-skip'):
             if len(arg) == 5:
                 skip = True
-                skip_number = int(wikipedia.input(u'How many images do you want to
skip?'))
+                skip_number = int(wikipedia.input(u'How many files do you want to
skip?'))
             elif len(arg) > 5:
                 skip = True
                 skip_number = int(arg[6:])
         elif arg.startswith('-wait'):
             if len(arg) == 5:
                 wait = True
-                wait_number = int(wikipedia.input(u'How many time do you want to wait
before checking the images?'))
+                waitTime = int(wikipedia.input(u'How many time do you want to wait
before checking the files?'))
             elif len(arg) > 5:
                 wait = True
-                wait_number = int(arg[6:])
+                waitTime = int(arg[6:])
         elif arg.startswith('-start'):
             if len(arg) == 6:
                 firstPageTitle = wikipedia.input(u'From witch page do you want to
start?')
             elif len(arg) > 6:
                 firstPageTitle = arg[7:]
+            firstPageTitle = firstPageTitle.replace("File:",
'').replace("file:", "")
             generator = wikipedia.getSite().allpages(start=firstPageTitle, namespace=6)
             repeat = False
         elif arg.startswith('-page'):
@@ -1594,12 +1600,13 @@
     site = wikipedia.getSite()
 
     # Block of text to translate the parameters set above.
+    image_old_namespace = u"%s:" % site.image_namespace()
     image_namespace = u"File:"
 
     # If the images to skip are 0, set the skip variable to False (the same for the wait
time)
     if skip_number == 0:
         skip = False
-    if wait_number == 0:
+    if waitTime == 0:
         wait = False
 
     # A little block-statement to ensure that the bot will not start with en-parameters
@@ -1644,9 +1651,8 @@
         # Not the main, but the most important loop.
         #parsed = False
         if wait:
-            printWithTimeZone(u'Skipping the images uploaded less than %s seconds
ago..' % wait_number)
             # Let's sleep...
-            generator = mainClass.wait(wait_number, generator, normal, limit)
+            generator = mainClass.wait(waitTime, generator, normal, limit)
         for image in generator:
             # When you've a lot of image to skip before working use this workaround,
otherwise
             # let this commented, thanks. [ decoment also parsed = False if you want to
use it
@@ -1659,8 +1665,8 @@
             # If the generator returns something that is not an image, simply skip it.
             if normal == False and regexGen == False:
                 if image_namespace.lower() not in image.title().lower() and \
-                'file:' not in image.title().lower():
-                    wikipedia.output(u'%s seems not an image, skip it...' %
image.title())
+                image_old_namespace.lower() not in image.title().lower() and
'file:' not in image.title().lower():
+                    wikipedia.output(u'%s seems not an file, skip it...' %
image.title())
                     continue
             if normal:
                 imageData = image
@@ -1675,8 +1681,11 @@
             try:
                 imageName = image.title().split(image_namespace)[1] # Deleting the
namespace (useless here)
             except IndexError:# Namespace image not found, that's not an image!
Let's skip...
-                wikipedia.output(u"%s is not an image, skipping..." %
image.title())
-                continue
+                try:
+                    imageName = image.title().split(image_old_namespace)[1]
+                except IndexError:
+                    wikipedia.output(u"%s is not a file, skipping..." %
image.title())
+                    continue
             mainClass.setParameters(imageName, timestamp, uploader) # Setting the image
for the main class         
             # Skip block
             if skip == True: