[Pywikipedia-l] SVN: [6003] trunk/pywikipedia/checkimages.py - pywikibot

20 Oct 2008

Revision: 6003
Author:   filnik
Date:     2008-10-20 13:44:30 +0000 (Mon, 20 Oct 2008)

Log Message:
-----------
BugFix in the whitelistEraser function, regex not fully working, wait() function really
improved

Modified Paths:
--------------
    trunk/pywikipedia/checkimages.py

Modified: trunk/pywikipedia/checkimages.py
===================================================================

--- trunk/pywikipedia/checkimages.py	2008-10-20 03:11:06 UTC (rev 6002)
+++ trunk/pywikipedia/checkimages.py	2008-10-20 13:44:30 UTC (rev 6003)
@@ -1177,27 +1177,65 @@
             wikipedia.output('') # Print a blank line.
             return False
         
-    def wait(self, waitTime):
+    def wait(self, waitTime, generator, normal, limit):
         """ Skip the images uploaded before x seconds to let
             the users to fix the image's problem alone in the
             first x seconds.
         """
-        #http://pytz.sourceforge.net/ <- maybe useful?
-        imagedata = self.timestamp
-        # '2008-06-18T08:04:29Z'
-        img_time = datetime.datetime.strptime(imagedata, u"%Y-%m-%dT%H:%M:%SZ")
#not relative to localtime
-        now =
datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0],
"%Y-%m-%d %H:%M:%S") #timezones are UTC
-        # + seconds to be sure that now > img_time
-        while now < img_time:
-            now = (now + datetime.timedelta(seconds=1))
-        delta = now - img_time
-        secs_of_diff = delta.seconds
-        if waitTime > secs_of_diff:
-            wikipedia.output(u'Skipping %s, uploaded %s seconds ago..' %
(self.imageName, int(secs_of_diff)))
-            return True # Still wait
-        else:
-            return False # No ok, continue
-
+        imagesToSkip = 0
+        while 1:            
+            loadOtherImages = True # ensure that all the images loaded aren't to
skip!
+            for image in generator:
+                if normal:
+                    imageData = image
+                    image = imageData[0]
+                    timestamp = imageData[1]
+                else:
+                    timestamp = image.getLatestUploader()[1]
+                #http://pytz.sourceforge.net/ <- maybe useful?
+                # '2008-06-18T08:04:29Z'
+                img_time = datetime.datetime.strptime(timestamp,
u"%Y-%m-%dT%H:%M:%SZ") #not relative to localtime
+                now =
datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0],
"%Y-%m-%d %H:%M:%S") #timezones are UTC
+                # + seconds to be sure that now > img_time
+                while now < img_time:
+                    now = (now + datetime.timedelta(seconds=1))
+                delta = now - img_time
+                secs_of_diff = delta.seconds
+                if waitTime > secs_of_diff:
+                    wikipedia.output(u'Skipping %s, uploaded %s seconds ago..' %
(image.title(), int(secs_of_diff)))
+                    imagesToSkip += 1
+                    continue # Still wait
+                else:
+                    loadOtherImages = False
+                    break # No ok, continue
+            # if yes, we have skipped all the images given!
+            if loadOtherImages:
+                generator = self.site.newimages(number = limit, lestart = timestamp)
+                imagesToSkip = 0
+                # continue to load images! continue
+                continue
+            else:
+                break # ok some other images, go below
+        # if normal, we can take as many images as "limit" has told us,
otherwise, sorry, nope.
+        if normal:
+            newGen = list()
+            imagesToSkip += 1 # some calcs, better add 1
+            # Add new images, instead of the images skipped
+            newImages = self.site.newimages(number = imagesToSkip, lestart = timestamp)
+            for imageData in generator:
+                if normal:
+                    image = imageData[0]
+                    timestamp = imageData[1]
+                    uploader = imageData[2]
+                    comment = imageData[3]
+                    newGen.append([image, timestamp, uploader, comment])
+                else:
+                    image = imageData
+                    newGen.append(image)
+            num = 0
+            for imageData in newImages:
+                newGen.append(imageData)
+        return newGen
      
     def isTagged(self):
         """ Understand if an image is already tagged or not.
"""
@@ -1220,15 +1258,17 @@
         # Load the white templates(hidden template is the same as white template,
regarding the meaning)
         white_templates_found = 0
         hiddentemplate = self.loadHiddenTemplates()
-        for l in hiddentemplate:
+        for regexWhiteLicense in hiddentemplate:
+            fullRegexWL = r'\{\{(?:template:|)(?:%s[
\n]*?(?:\n|\||\}|<)|creator:)' % regexWhiteLicense.lower()
             if self.tagged == False:
                 # why creator? Because on commons there's a template such as
{{creator:name}} that.. works
-                res = re.findall(r'\{\{(?:[Tt]emplate:|)(?:%s[
\n]*?(?:\n|\||\}|<)|creator:)' % l.lower(), self.imageCheckText.lower())
+                res = re.findall(fullRegexWL, self.imageCheckText.lower())
                 if res != []:
-                    white_templates_found += 1
-                    if l != '' and l != ' ': # Check that l is not
nothing or a space
+                    for element in res: # if a regex gives more than 1 results, are more
than 1 template found.
+                        white_templates_found += 1
+                    if regexWhiteLicense != '' and regexWhiteLicense != '
': # Check that regexWhiteLicense is not nothing or a space
                         # Deleting! (replace the template with nothing)
-                        regex_white_template =
re.compile(r'\{\{(?:template:|)(?:%s|creator)' % l, re.IGNORECASE)
+                        regex_white_template = re.compile(fullRegexWL, re.IGNORECASE)
                         self.imageCheckText = regex_white_template.sub(r'',
self.imageCheckText)
         if white_templates_found == 1:
             wikipedia.output(u'A white template found, skipping the
template...')
@@ -1348,7 +1388,7 @@
             self.findAdditionalProblems()
         # If the image exists (maybe it has been deleting during the oder
         # checking parts or something, who knows? ;-))
-        #if p.exists(): <-- improve the bot, better to make as
+        #if p.exists(): <-- improve thebot, better to make as
         #                   less call to the server as possible
         # Here begins the check block.
         if self.tagged == True:
@@ -1585,6 +1625,8 @@
         #parsed = False
         if wait:
             printWithTimeZone(u'Skipping the images uploaded less than %s seconds
ago..' % wait_number)
+            # Let's sleep...
+            generator = mainClass.wait(wait_number, generator, normal, limit)
         for image in generator:
             # When you've a lot of image to skip before working use this workaround,
otherwise
             # let this commented, thanks. [ decoment also parsed = False if you want to
use it
@@ -1615,13 +1657,7 @@
             except IndexError:# Namespace image not found, that's not an image!
Let's skip...
                 wikipedia.output(u"%s is not an image, skipping..." %
image.title())
                 continue
-            mainClass.setParameters(imageName, timestamp, uploader) # Setting the image
for the main class
-            # If I don't inizialize the generator, wait part and skip part are
useless
-            if wait:
-                # Let's sleep...
-                wait = mainClass.wait(wait_number)
-                if wait:
-                    continue          
+            mainClass.setParameters(imageName, timestamp, uploader) # Setting the image
for the main class         
             # Skip block
             if skip == True:
                 skip = mainClass.skipImages(skip_number, limit)