Revision: 6146 Author: filnik Date: 2008-12-14 11:16:08 +0000 (Sun, 14 Dec 2008)
Log Message: ----------- Fixed more or less all the problems dued to the change of namespace
Modified Paths: -------------- trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2008-12-12 20:24:40 UTC (rev 6145) +++ trunk/pywikipedia/checkimages.py 2008-12-14 11:16:08 UTC (rev 6146) @@ -387,15 +387,15 @@ } # Comment used by the bot while it reports the problem in the uploader's talk duplicates_comment_talk = { - 'commons': u'Bot: Dupe image found', + 'commons': u'Bot: Dupe file found', 'en' : None, - 'it' : u"Bot: Notifico l'immagine doppia trovata", + 'it' : u"Bot: Notifico il file doppio trovato", } # Comment used by the bot while it reports the problem in the image duplicates_comment_image = { - 'commons': u'Bot: Tagging dupe image', + 'commons': u'Bot: Tagging dupe file', 'en' : None, - 'it' : u'Bot: Immagine doppia, da cancellare', + 'it' : u'Bot: File doppio, da cancellare', } # Regex to detect the template put in the image's decription to find the dupe duplicatesRegex = { @@ -650,7 +650,7 @@
def tag_image(self, put = True): """ Function to add the template in the image and to find out - who's the user that has uploaded the image. """ + who's the user that has uploaded the file. """ # Get the image's description reportPageObject = wikipedia.ImagePage(self.site, self.image_namespace + self.image_to_report) try: @@ -670,7 +670,7 @@ else: nick = reportPageObject.getLatestUploader()[0] except wikipedia.NoPage: - wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image_to_report) + wikipedia.output(u"Seems that %s has only the description and not the file..." % self.image_to_report) repme = u"\n*[[:File:%s]] problems '''with the APIs'''" # We have a problem! Report and exit! self.report_image(self.image_to_report, self.rep_page, self.com, repme) @@ -742,7 +742,7 @@ emailClass.send(emailSubj, text_to_send)
def untaggedGenerator(self, untaggedProject, limit): - """ Generator that yield the images without license. It's based on a tool of the toolserver. """ + """ Generator that yield the files without license. It's based on a tool of the toolserver. """ lang = untaggedProject.split('.', 1)[0] project = '.%s' % untaggedProject.split('.', 1)[1] if lang == 'commons': @@ -849,7 +849,7 @@ return number_edits
def checkImageOnCommons(self): - """ Checking if the image is on commons """ + """ Checking if the file is on commons """ wikipedia.output(u'Checking if %s is on commons...' % self.imageName) commons_site = wikipedia.getSite('commons', 'commons') regexOnCommons = r"\n*[[:File:%s]] is also on '''Commons''': [[commons:File:.*?]](?: (same name)|)$" % re.escape(self.imageName) @@ -864,7 +864,7 @@ imagePage = wikipedia.ImagePage(self.site, u'File:%s' % self.imageName) on_commons_text = imagePage.getImagePageHtml() if u"<div class='sharedUploadNotice'>" in on_commons_text: - wikipedia.output(u"But, the image doesn't exist on your project! Skip...") + wikipedia.output(u"But, the file doesn't exist on your project! Skip...") # Problems? Yes! We have to skip the check part for that image! # Because it's on commons but someone has added something on your project. return False @@ -885,7 +885,7 @@ return True
def checkImageDuplicated(self, duplicates_rollback): - """ Function to check the duplicated images. """ + """ Function to check the duplicated files. """ # {{Dupe|File:Blanche_Montel.jpg}} # Skip the stub images #if 'stub' in self.imageName.lower() and self.project == 'wikipedia' and self.site.lang == 'it': @@ -944,7 +944,7 @@ #else: # string += "*[[:%s%s]]" % (self.image_namespace, duplicate) else: - wikipedia.output(u"Already put the dupe-template in the image's page or in the dupe's page. Skip.") + wikipedia.output(u"Already put the dupe-template in the files's page or in the dupe's page. Skip.") return True # Ok - No problem. Let's continue the checking phase older_image_ns = u'%s%s' % (self.image_namespace, older_image) # adding the namespace only_report = False # true if the image are not to be tagged as dupes @@ -996,7 +996,7 @@ return True # Ok - No problem. Let's continue the checking phase
def report_image(self, image_to_report, rep_page = None, com = None, rep_text = None, addings = True, regex = None): - """ Function to report the images in the report page when needed. """ + """ Function to report the files in the report page when needed. """ if rep_page == None: rep_page = self.rep_page if com == None: com = self.com if rep_text == None: rep_text = self.rep_text @@ -1009,7 +1009,7 @@ except wikipedia.IsRedirectPage: text_get = another_page.getRedirectTarget().get() if len(text_get) >= self.logFulNumber: - raise LogIsFull(u"The log page (%s) is full! Please delete the old images reported." % another_page.title()) + raise LogIsFull(u"The log page (%s) is full! Please delete the old files reported." % another_page.title()) pos = 0 # The talk page includes "_" between the two names, in this way i replace them to " " n = re.compile(regex, re.UNICODE|re.M) @@ -1211,10 +1211,10 @@ return list_loaded
def skipImages(self, skip_number, limit): - """ Given a number of images, skip the first -number- images. """ + """ Given a number of files, skip the first -number- files. """ # If the images to skip are more the images to check, make them the same number if skip_number == 0: - wikipedia.output(u'\t\t>> No images to skip...<<') + wikipedia.output(u'\t\t>> No files to skip...<<') return False if skip_number > limit: skip_number = limit # Print a starting message only if no images has been skipped @@ -1240,62 +1240,67 @@ first x seconds. """ imagesToSkip = 0 - while 1: - loadOtherImages = True # ensure that all the images loaded aren't to skip! - for image in generator: - if normal: - imageData = image - image = imageData[0] - timestamp = imageData[1] - else: - timestamp = image.getLatestUploader()[1] - #http://pytz.sourceforge.net/ <- maybe useful? - # '2008-06-18T08:04:29Z' - img_time = datetime.datetime.strptime(timestamp, u"%Y-%m-%dT%H:%M:%SZ") #not relative to localtime - now = datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S") #timezones are UTC - # + seconds to be sure that now > img_time - while now < img_time: - now = (now + datetime.timedelta(seconds=1)) - delta = now - img_time - secs_of_diff = delta.seconds - if waitTime > secs_of_diff: - wikipedia.output(u'Skipping %s, uploaded %s seconds ago..' % (image.title(), int(secs_of_diff))) - imagesToSkip += 1 - continue # Still wait - else: - loadOtherImages = False - break # No ok, continue - # if yes, we have skipped all the images given! - if loadOtherImages: - generator = self.site.newimages(number = limit, lestart = timestamp) - imagesToSkip = 0 - # continue to load images! continue - continue - else: - break # ok some other images, go below # if normal, we can take as many images as "limit" has told us, otherwise, sorry, nope. if normal: - newGen = list() - imagesToSkip += 1 # some calcs, better add 1 - # Add new images, instead of the images skipped - newImages = self.site.newimages(number = imagesToSkip, lestart = timestamp) - for imageData in generator: - if normal: - image = imageData[0] - timestamp = imageData[1] - uploader = imageData[2] - comment = imageData[3] - newGen.append([image, timestamp, uploader, comment]) + printWithTimeZone(u'Skipping the files uploaded less than %s seconds ago..' % waitTime) + while 1: + loadOtherImages = True # ensure that all the images loaded aren't to skip! + for image in generator: + image = wikipedia.ImagePage(self.site, image.title()) + if normal: + imageData = image + image = imageData[0] + timestamp = imageData[1] + else: + timestamp = image.getLatestUploader()[1] + #http://pytz.sourceforge.net/ <- maybe useful? + # '2008-06-18T08:04:29Z' + img_time = datetime.datetime.strptime(timestamp, u"%Y-%m-%dT%H:%M:%SZ") #not relative to localtime + now = datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S") #timezones are UTC + # + seconds to be sure that now > img_time + while now < img_time: + now = (now + datetime.timedelta(seconds=1)) + delta = now - img_time + secs_of_diff = delta.seconds + if waitTime > secs_of_diff: + wikipedia.output(u'Skipping %s, uploaded %s seconds ago..' % (image.title(), int(secs_of_diff))) + imagesToSkip += 1 + continue # Still wait + else: + loadOtherImages = False + break # No ok, continue + # if yes, we have skipped all the images given! + if loadOtherImages: + generator = self.site.newimages(number = limit, lestart = timestamp) + imagesToSkip = 0 + # continue to load images! continue + continue else: - image = imageData - newGen.append(image) - num = 0 - for imageData in newImages: - newGen.append(imageData) - return newGen + break # ok some other images, go below + newGen = list() + imagesToSkip += 1 # some calcs, better add 1 + # Add new images, instead of the images skipped + newImages = self.site.newimages(number = imagesToSkip, lestart = timestamp) + for imageData in generator: + if normal: + image = imageData[0] + timestamp = imageData[1] + uploader = imageData[2] + comment = imageData[3] + newGen.append([image, timestamp, uploader, comment]) + else: + image = imageData + newGen.append(image) + num = 0 + for imageData in newImages: + newGen.append(imageData) + return newGen + else: + wikipedia.output(u"The wait option is available only with the standard generator.") + return generator
def isTagged(self): - """ Understand if an image is already tagged or not. """ + """ Understand if a file is already tagged or not. """ TextFind = wikipedia.translate(self.site, txt_find) # Is the image already tagged? If yes, no need to double-check, skip for i in TextFind: @@ -1427,9 +1432,9 @@ # Here begins the check block. if self.some_problem == True: if self.mex_used in self.imageCheckText: - wikipedia.output(u'Image already fixed. Skip.') + wikipedia.output(u'File already fixed. Skip.') return True - wikipedia.output(u"The image description for %s contains %s..." % (self.imageName, self.name_used)) + wikipedia.output(u"The file's description for %s contains %s..." % (self.imageName, self.name_used)) if self.mex_used.lower() == 'default': self.mex_used = unvertext if self.imagestatus_used == False: @@ -1440,7 +1445,7 @@ #if self.imagestatus_used == True: self.report(self.mex_used, self.imageName, self.text_used, u"\n%s\n" % self.head_used, None, self.imagestatus_used, self.summary_used) else: - wikipedia.output(u"Skipping the image...") + wikipedia.output(u"Skipping the file...") self.some_problem = False return True elif brackets == True and license_found != None: @@ -1458,7 +1463,7 @@ delete = False return True elif self.imageCheckText in nothing: - wikipedia.output(u"The image description for %s does not contain a license template!" % self.imageName) + wikipedia.output(u"The file's description for %s does not contain a license template!" % self.imageName) if hiddenTemplateFound and HiddenTN != None and HiddenTN != '' and HiddenTN != ' ': notification = HiddenTN % self.imageName else: @@ -1483,7 +1488,7 @@ limit = 80 # How many images check? time_sleep = 30 # How many time sleep after the check? skip_number = 0 # How many images to skip before checking? - wait_number = 0 # How many time sleep before the check? + waitTime = 0 # How many time sleep before the check? commonsActive = False # Check if on commons there's an image with the same name? normal = False # Check the new images or use another generator? urlUsed = False # Use the url-related function instead of the new-pages generator @@ -1497,7 +1502,7 @@ for arg in wikipedia.handleArgs(): if arg.startswith('-limit'): if len(arg) == 7: - limit = int(wikipedia.input(u'How many images do you want to check?')) + limit = int(wikipedia.input(u'How many files do you want to check?')) else: limit = int(arg[7:]) if arg.startswith('-time'): @@ -1522,22 +1527,23 @@ elif arg.startswith('-skip'): if len(arg) == 5: skip = True - skip_number = int(wikipedia.input(u'How many images do you want to skip?')) + skip_number = int(wikipedia.input(u'How many files do you want to skip?')) elif len(arg) > 5: skip = True skip_number = int(arg[6:]) elif arg.startswith('-wait'): if len(arg) == 5: wait = True - wait_number = int(wikipedia.input(u'How many time do you want to wait before checking the images?')) + waitTime = int(wikipedia.input(u'How many time do you want to wait before checking the files?')) elif len(arg) > 5: wait = True - wait_number = int(arg[6:]) + waitTime = int(arg[6:]) elif arg.startswith('-start'): if len(arg) == 6: firstPageTitle = wikipedia.input(u'From witch page do you want to start?') elif len(arg) > 6: firstPageTitle = arg[7:] + firstPageTitle = firstPageTitle.replace("File:", '').replace("file:", "") generator = wikipedia.getSite().allpages(start=firstPageTitle, namespace=6) repeat = False elif arg.startswith('-page'): @@ -1594,12 +1600,13 @@ site = wikipedia.getSite()
# Block of text to translate the parameters set above. + image_old_namespace = u"%s:" % site.image_namespace() image_namespace = u"File:"
# If the images to skip are 0, set the skip variable to False (the same for the wait time) if skip_number == 0: skip = False - if wait_number == 0: + if waitTime == 0: wait = False
# A little block-statement to ensure that the bot will not start with en-parameters @@ -1644,9 +1651,8 @@ # Not the main, but the most important loop. #parsed = False if wait: - printWithTimeZone(u'Skipping the images uploaded less than %s seconds ago..' % wait_number) # Let's sleep... - generator = mainClass.wait(wait_number, generator, normal, limit) + generator = mainClass.wait(waitTime, generator, normal, limit) for image in generator: # When you've a lot of image to skip before working use this workaround, otherwise # let this commented, thanks. [ decoment also parsed = False if you want to use it @@ -1659,8 +1665,8 @@ # If the generator returns something that is not an image, simply skip it. if normal == False and regexGen == False: if image_namespace.lower() not in image.title().lower() and \ - 'file:' not in image.title().lower(): - wikipedia.output(u'%s seems not an image, skip it...' % image.title()) + image_old_namespace.lower() not in image.title().lower() and 'file:' not in image.title().lower(): + wikipedia.output(u'%s seems not an file, skip it...' % image.title()) continue if normal: imageData = image @@ -1675,8 +1681,11 @@ try: imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here) except IndexError:# Namespace image not found, that's not an image! Let's skip... - wikipedia.output(u"%s is not an image, skipping..." % image.title()) - continue + try: + imageName = image.title().split(image_old_namespace)[1] + except IndexError: + wikipedia.output(u"%s is not a file, skipping..." % image.title()) + continue mainClass.setParameters(imageName, timestamp, uploader) # Setting the image for the main class # Skip block if skip == True: