Revision: 6003 Author: filnik Date: 2008-10-20 13:44:30 +0000 (Mon, 20 Oct 2008)
Log Message: ----------- BugFix in the whitelistEraser function, regex not fully working, wait() function really improved
Modified Paths: -------------- trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2008-10-20 03:11:06 UTC (rev 6002) +++ trunk/pywikipedia/checkimages.py 2008-10-20 13:44:30 UTC (rev 6003) @@ -1177,27 +1177,65 @@ wikipedia.output('') # Print a blank line. return False
- def wait(self, waitTime): + def wait(self, waitTime, generator, normal, limit): """ Skip the images uploaded before x seconds to let the users to fix the image's problem alone in the first x seconds. """ - #http://pytz.sourceforge.net/ <- maybe useful? - imagedata = self.timestamp - # '2008-06-18T08:04:29Z' - img_time = datetime.datetime.strptime(imagedata, u"%Y-%m-%dT%H:%M:%SZ") #not relative to localtime - now = datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S") #timezones are UTC - # + seconds to be sure that now > img_time - while now < img_time: - now = (now + datetime.timedelta(seconds=1)) - delta = now - img_time - secs_of_diff = delta.seconds - if waitTime > secs_of_diff: - wikipedia.output(u'Skipping %s, uploaded %s seconds ago..' % (self.imageName, int(secs_of_diff))) - return True # Still wait - else: - return False # No ok, continue - + imagesToSkip = 0 + while 1: + loadOtherImages = True # ensure that all the images loaded aren't to skip! + for image in generator: + if normal: + imageData = image + image = imageData[0] + timestamp = imageData[1] + else: + timestamp = image.getLatestUploader()[1] + #http://pytz.sourceforge.net/ <- maybe useful? + # '2008-06-18T08:04:29Z' + img_time = datetime.datetime.strptime(timestamp, u"%Y-%m-%dT%H:%M:%SZ") #not relative to localtime + now = datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S") #timezones are UTC + # + seconds to be sure that now > img_time + while now < img_time: + now = (now + datetime.timedelta(seconds=1)) + delta = now - img_time + secs_of_diff = delta.seconds + if waitTime > secs_of_diff: + wikipedia.output(u'Skipping %s, uploaded %s seconds ago..' % (image.title(), int(secs_of_diff))) + imagesToSkip += 1 + continue # Still wait + else: + loadOtherImages = False + break # No ok, continue + # if yes, we have skipped all the images given! + if loadOtherImages: + generator = self.site.newimages(number = limit, lestart = timestamp) + imagesToSkip = 0 + # continue to load images! continue + continue + else: + break # ok some other images, go below + # if normal, we can take as many images as "limit" has told us, otherwise, sorry, nope. + if normal: + newGen = list() + imagesToSkip += 1 # some calcs, better add 1 + # Add new images, instead of the images skipped + newImages = self.site.newimages(number = imagesToSkip, lestart = timestamp) + for imageData in generator: + if normal: + image = imageData[0] + timestamp = imageData[1] + uploader = imageData[2] + comment = imageData[3] + newGen.append([image, timestamp, uploader, comment]) + else: + image = imageData + newGen.append(image) + num = 0 + for imageData in newImages: + newGen.append(imageData) + return newGen
def isTagged(self): """ Understand if an image is already tagged or not. """ @@ -1220,15 +1258,17 @@ # Load the white templates(hidden template is the same as white template, regarding the meaning) white_templates_found = 0 hiddentemplate = self.loadHiddenTemplates() - for l in hiddentemplate: + for regexWhiteLicense in hiddentemplate: + fullRegexWL = r'{{(?:template:|)(?:%s[ \n]*?(?:\n|||}|<)|creator:)' % regexWhiteLicense.lower() if self.tagged == False: # why creator? Because on commons there's a template such as {{creator:name}} that.. works - res = re.findall(r'{{(?:[Tt]emplate:|)(?:%s[ \n]*?(?:\n|||}|<)|creator:)' % l.lower(), self.imageCheckText.lower()) + res = re.findall(fullRegexWL, self.imageCheckText.lower()) if res != []: - white_templates_found += 1 - if l != '' and l != ' ': # Check that l is not nothing or a space + for element in res: # if a regex gives more than 1 results, are more than 1 template found. + white_templates_found += 1 + if regexWhiteLicense != '' and regexWhiteLicense != ' ': # Check that regexWhiteLicense is not nothing or a space # Deleting! (replace the template with nothing) - regex_white_template = re.compile(r'{{(?:template:|)(?:%s|creator)' % l, re.IGNORECASE) + regex_white_template = re.compile(fullRegexWL, re.IGNORECASE) self.imageCheckText = regex_white_template.sub(r'', self.imageCheckText) if white_templates_found == 1: wikipedia.output(u'A white template found, skipping the template...') @@ -1348,7 +1388,7 @@ self.findAdditionalProblems() # If the image exists (maybe it has been deleting during the oder # checking parts or something, who knows? ;-)) - #if p.exists(): <-- improve the bot, better to make as + #if p.exists(): <-- improve thebot, better to make as # less call to the server as possible # Here begins the check block. if self.tagged == True: @@ -1585,6 +1625,8 @@ #parsed = False if wait: printWithTimeZone(u'Skipping the images uploaded less than %s seconds ago..' % wait_number) + # Let's sleep... + generator = mainClass.wait(wait_number, generator, normal, limit) for image in generator: # When you've a lot of image to skip before working use this workaround, otherwise # let this commented, thanks. [ decoment also parsed = False if you want to use it @@ -1615,13 +1657,7 @@ except IndexError:# Namespace image not found, that's not an image! Let's skip... wikipedia.output(u"%s is not an image, skipping..." % image.title()) continue - mainClass.setParameters(imageName, timestamp, uploader) # Setting the image for the main class - # If I don't inizialize the generator, wait part and skip part are useless - if wait: - # Let's sleep... - wait = mainClass.wait(wait_number) - if wait: - continue + mainClass.setParameters(imageName, timestamp, uploader) # Setting the image for the main class # Skip block if skip == True: skip = mainClass.skipImages(skip_number, limit)
pywikipedia-l@lists.wikimedia.org