[Pywikipedia-l] SVN: [4886] trunk/pywikipedia/checkimages.py

filnik at svn.wikimedia.org filnik at svn.wikimedia.org
Mon Jan 14 14:49:19 UTC 2008


Revision: 4886
Author:   filnik
Date:     2008-01-14 14:49:18 +0000 (Mon, 14 Jan 2008)

Log Message:
-----------
A bit of rewrite.

Modified Paths:
--------------
    trunk/pywikipedia/checkimages.py

Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py	2008-01-14 14:12:46 UTC (rev 4885)
+++ trunk/pywikipedia/checkimages.py	2008-01-14 14:49:18 UTC (rev 4886)
@@ -71,7 +71,7 @@
 #
 
 import re, time, urllib2
-import wikipedia, config, os, locale
+import wikipedia, config, os, locale, sys
 import cPickle, pagegenerators, catlib
 
 locale.setlocale(locale.LC_ALL, '')
@@ -81,7 +81,7 @@
 
 # That's what you want that will be added. (i.e. the {{no source}} with the right day/month/year )
 n_txt = {
-    'commons':'\n{{subst:nld}}',
+        'commons':'\n{{subst:nld}}',
     'en'     :'\n{{subst:nld}}',
     'it'     :'\n{{subst:unverdata}}',
     'ja':'{{subst:Nsd}}',
@@ -91,7 +91,7 @@
 
 txt_find =  {
     'commons':['{{no license', '{{nld'],
-    'en':['{{nld', '{{no license'],
+        'en':['{{nld', '{{no license'],
     'hu':[u'{{nincsforrás',u'{{nincslicenc'],
     'it':[u'{{unverdata', u'{{unverified'],
     'ja':[u'{{no source', u'{{unknown', u'{{non free', u'<!--削除についての議論が終了するまで',],
@@ -160,7 +160,7 @@
 
 # Text that will be add if the bot find a unknown extension.
 delete_immediately_notification = {
-				'commons':'The [[:Image:%s]] file has a wrong extension, please check. ~~~~',
+                                'commons':'The [[:Image:%s]] file has a wrong extension, please check. ~~~~',
 				'en'     :'The [[:Image:%s]] file has a wrong extension, please check. ~~~~',
 				'it'     :'{{subst:Utente:Filbot/Ext|%s}}',
 				'hu'     :u'A [[:Kép:%s]] fájlnak rossz a kiterjesztése, kérlek ellenőrízd. ~~~~',
@@ -208,7 +208,7 @@
 # The message that the bot will add the second time that find another license problem.
 second_message_without_license = {
 				'commons':None,
-				'en': None,
+                                'en': None,
 				'it':':{{subst:Utente:Filbot/Senza licenza2|%s}} --~~~~',
 				'hu':u'\nSzia! Úgy tűnik a [[:Kép:%s]] képpel is hasonló a probléma, mint az előbbivel. Kérlek olvasd el a [[WP:KÉPLIC|feltölthető képek]]ről szóló oldalunk, és segítségért fordulj a [[WP:KF-JO|Jogi kocsmafalhoz]]. Köszönöm --~~~~',
 				'ja':None,
@@ -218,8 +218,8 @@
 # That's useful if you are running the bot on Toolserver.
 page_with_settings = {
 					'commons':u'User:Filbot/Settings',
-					'en':None,
-					'hu':None,
+                                        'en':None,
+                                        'hu':None,
 					'it':u'Utente:Nikbot/Settings#Settings',
 					'ja':None,
 					'zh':u"User:Alexbot/cisettings#Settings",
@@ -228,7 +228,7 @@
 # This is the page where the bot will store them.
 report_page = {
 				'commons':'User:Filbot/Report',
-				'en'     :'User:Filnik/Report',
+                                'en'     :'User:Filnik/Report',
 				'it'     :'Utente:Nikbot/Report',
 				'ja':'User:Alexbot/report',
 				'hu'     :'User:Bdamokos/Report',
@@ -608,403 +608,405 @@
 				break
 		break
                         
-# Here there is the main loop. I'll take all the (name of the) images and then i'll check them.
-if __name__ == "__main__":
-	try:
+def checkbot():
+        # Command line configurable parameters
+        repeat = True # Restart after having check all the images?
+        limit = 80 # How many images check?
+        time_sleep = 30 # How many time sleep after the check?
+        skip_number = 0 # How many images to skip before checking?
+        wait_number = 0 # How many time sleep before the check?
+        commonsActive = False # Check if on commons there's an image with the same name?
+        normal = False # Check the new images or use another generator?
+        urlUsed = False # Use the url-related function instead of the new-pages generator
+        regexGen = False # Use the regex generator
+        untagged = False # Use the untagged generator
+        skip_list = list() # Inizialize the skip list used below
+        
+        # Here below there are the parameters.
+        for arg in wikipedia.handleArgs():
+                if arg.startswith('-limit'):
+                        if len(arg) == 7:
+                                limit = int(wikipedia.input(u'How many images do you want to check?'))
+                        else:
+                                limit = int(arg[7:])
+                if arg.startswith('-time'):
+                        if len(arg) == 5:
+                                time_sleep = int(wikipedia.input(u'How many seconds do you want runs to be apart?'))
+                        else:
+                                time_sleep = int(arg[6:])
+                elif arg == '-break':
+                        repeat = False
+                elif arg == '-commons':
+                        commonsActive = True
+                elif arg.startswith('-skip'):
+                        if len(arg) == 5:
+                                skip = True
+                                skip_number = int(wikipedia.input(u'How many images do you want to skip?'))
+                        elif len(arg) > 5:
+                                skip = True
+                                skip_number = int(arg[6:])
+                elif arg.startswith('-wait'):
+                        if len(arg) == 5:
+                                wait = True
+                                wait_number = int(wikipedia.input(u'How many time do you want to wait before checking the images?'))
+                        elif len(arg) > 5:
+                                wait = True
+                                wait_number = int(arg[6:])
+                elif arg.startswith('-start'):
+                        if len(arg) == 6:
+                                firstPageTitle = str(wikipedia.input(u'From witch page do you want to start?'))
+                        elif len(arg) > 6:
+                                firstPageTitle = str(arg[7:])
+                        generator = wikipedia.getSite().allpages(start='Image:%s' % firstPageTitle)
+                        repeat = False
+                elif arg.startswith('-page'):
+                        if len(arg) == 5:
+                                regexPageName = str(wikipedia.input(u'Which page do you want to use for the regex?'))
+                        elif len(arg) > 5:
+                                regexPageName = str(arg[6:])
+                        repeat = False
+                        regexGen = True
+                elif arg.startswith('-url'):
+                        if len(arg) == 4:
+                                regexPageUrl = str(wikipedia.input(u'Which url do you want to use for the regex?'))
+                        elif len(arg) > 4:
+                                regexPageUrl = str(arg[5:])
+                        urlUsed = True
+                        repeat = False
+                        regexGen = True
+                elif arg.startswith('-regex'):
+                        if len(arg) == 6:
+                                regexpToUse = str(wikipedia.input(u'Which regex do you want to use?'))
+                        elif len(arg) > 6:
+                                regexpToUse = str(arg[7:])
+                        generator = 'regex'
+                        repeat = False
+                elif arg.startswith('-cat'):
+                        if len(arg) == 4:
+                                catName = str(wikipedia.input(u'In which category do I work?'))
+                        elif len(arg) > 4:
+                                catName = str(arg[5:])
+                        catSelected = catlib.Category(wikipedia.getSite(), 'Category:%s' % catName)
+                        generator = pagegenerators.CategorizedPageGenerator(catSelected)
+                        repeat = False
+                elif arg.startswith('-untagged'):
+                        untagged = True    
+                        if len(arg) == 9:
+                                projectUntagged = str(wikipedia.input(u'In which project should I work?'))
+                        elif len(arg) > 9:
+                                projectUntagged = str(arg[10:])          
 
+        # Understand if the generator it's the default or not.
+        try:
+                generator
+        except NameError:
+                normal = True
+                
+        # Define the site.
+        site = wikipedia.getSite()
 
-		# Command line configurable parameters
-		repeat = True # Restart after having check all the images?
-		limit = 80 # How many images check?
-		time_sleep = 30 # How many time sleep after the check?
-		skip_number = 0 # How many images to skip before checking?
-		wait_number = 0 # How many time sleep before the check?
-		commonsActive = False # Check if on commons there's an image with the same name?
-		normal = False # Check the new images or use another generator?
-		urlUsed = False # Use the url-related function instead of the new-pages generator
-		regexGen = False # Use the regex generator
-		untagged = False # Use the untagged generator
-                skip_list = list() # Inizialize the skip list used below
+        # In this way i find what language, project and what bot do you use.
+        lang = config.mylang
+        project = config.family
+        
+        # Block of text to translate the parameters set above.
+        image_n = site.image_namespace()
+        image_namespace = "%s:" % image_n # Example: "User_talk:"
+        unvertext = wikipedia.translate(site, n_txt)
+        commento = wikipedia.translate(site, comm)
+        commento2 = wikipedia.translate(site, comm2)
+        ti_es_ti = wikipedia.translate(site, empty)
+        unverf = wikipedia.translate(site, unver)
+        di = wikipedia.translate(site, delete_immediately)
+        dih = wikipedia.translate(site, delete_immediately_head)
+        din = wikipedia.translate(site, delete_immediately_notification)
+        nh = wikipedia.translate(site, nothing_head)
+        nn = wikipedia.translate(site, nothing_notification)
+        dels = wikipedia.translate(site, del_comm)
+        botolist = wikipedia.translate(site, bot_list)
+        smwl = wikipedia.translate(site, second_message_without_license)
+        settings = wikipedia.translate(site, page_with_settings)
+        rep_page = wikipedia.translate(site, report_page)
+        rep_text = wikipedia.translate(site, report_text)
+        com = wikipedia.translate(site, comm10)
+        TextFind = wikipedia.translate(site, txt_find)
+        hiddentemplate = wikipedia.translate(site, HiddenTemplate)
+        # A template as {{en is not a license! Adding also them in the whitelist template...
+        for langK in wikipedia.Family('wikipedia').knownlanguages:
+                hiddentemplate.append('%s' % langK)
                 
-		# Here below there are the parameters.
-		for arg in wikipedia.handleArgs():
-			if arg.startswith('-limit'):
-				if len(arg) == 7:
-					limit = int(wikipedia.input(u'How many images do you want to check?'))
-				else:
-					limit = int(arg[7:])
-			if arg.startswith('-time'):
-				if len(arg) == 5:
-					time_sleep = int(wikipedia.input(u'How many seconds do you want runs to be apart?'))
-				else:
-					time_sleep = int(arg[6:])
-			elif arg == '-break':
-				repeat = False
-			elif arg == '-commons':
-				commonsActive = True
-			elif arg.startswith('-skip'):
-				if len(arg) == 5:
-					skip = True
-					skip_number = int(wikipedia.input(u'How many images do you want to skip?'))
-				elif len(arg) > 5:
-					skip = True
-					skip_number = int(arg[6:])
-			elif arg.startswith('-wait'):
-				if len(arg) == 5:
-					wait = True
-					wait_number = int(wikipedia.input(u'How many time do you want to wait before checking the images?'))
-				elif len(arg) > 5:
-					wait = True
-					wait_number = int(arg[6:])
-			elif arg.startswith('-start'):
-				if len(arg) == 6:
-					firstPageTitle = str(wikipedia.input(u'From witch page do you want to start?'))
-				elif len(arg) > 6:
-					firstPageTitle = str(arg[7:])
-				generator = wikipedia.getSite().allpages(start='Image:%s' % firstPageTitle)
-				repeat = False
-			elif arg.startswith('-page'):
-				if len(arg) == 5:
-					regexPageName = str(wikipedia.input(u'Which page do you want to use for the regex?'))
-				elif len(arg) > 5:
-					regexPageName = str(arg[6:])
-				repeat = False
-				regexGen = True
-			elif arg.startswith('-url'):
-				if len(arg) == 4:
-					regexPageUrl = str(wikipedia.input(u'Which url do you want to use for the regex?'))
-				elif len(arg) > 4:
-					regexPageUrl = str(arg[5:])
-				urlUsed = True
-				repeat = False
-				regexGen = True
-			elif arg.startswith('-regex'):
-				if len(arg) == 6:
-					regexpToUse = str(wikipedia.input(u'Which regex do you want to use?'))
-				elif len(arg) > 6:
-					regexpToUse = str(arg[7:])
-				generator = 'regex'
-				repeat = False
-			elif arg.startswith('-cat'):
-				if len(arg) == 4:
-					catName = str(wikipedia.input(u'In which category do I work?'))
-				elif len(arg) > 4:
-					catName = str(arg[5:])
-				catSelected = catlib.Category(wikipedia.getSite(), 'Category:%s' % catName)
-				generator = pagegenerators.CategorizedPageGenerator(catSelected)
-				repeat = False
-			elif arg.startswith('-untagged'):
-				untagged = True    
-				if len(arg) == 9:
-					projectUntagged = str(wikipedia.input(u'In which project should I work?'))
-				elif len(arg) > 9:
-					projectUntagged = str(arg[10:])          
+        # If the images to skip are 0, set the skip variable to False (the same for the wait time)
+        if skip_number == 0:
+                skip = False
+        if wait_number == 0:
+                wait = False
+        # nothing = Defining an empty image description
+        nothing = ['', ' ', '  ', '   ', '\n', '\n ', '\n  ', '\n\n', '\n \n', ' \n', ' \n ', ' \n \n']
+        # something = Minimal requirements for an image description.
+        # If this fits, no tagging will take place (if there aren't other issues)
+        # MIT license is ok on italian wikipedia, let also this here
+        something = ['{{', "'''MIT&nbsp;license'''"] # Don't put "}}" here, please. Useless and can give problems.
+        # Unused file extensions. Does not contain PDF.
+        notallowed = ("xcf", "xls", "sxw", "sxi", "sxc", "sxd", "djvu")
 
-		# Understand if the generator it's the default or not.
-		try:
-			generator
-		except NameError:
-			normal = True
-			
-		# Define the site.
-		site = wikipedia.getSite()
-
-		# In this way i find what language, project and what bot do you use.
-		lang = config.mylang
-		project = config.family
+        # A little block-statement to ensure that the bot will not start with en-parameters
+        if lang not in project_inserted:
+                wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!")
+                wikipedia.stopme()
+        # Some formatting for delete immediately template
+        di = '\n%s' % di
+        dels = dels % di
+        
+        # Reading the log of the new images if another generator is not given.
+        if normal == True:
+                if limit == 1:
+                        wikipedia.output(u"Retrieving the latest file for checking...")
+                else:
+                        wikipedia.output(u"Retrieving the latest %d files for checking..." % limit)
+        # Main Loop
+        while 1:
+                # Defing the Main Class.
+                mainClass = main(site)
+                # Untagged is True? Let's take that generator
+                if untagged == True:
+                        generator =  mainClass.untaggedGenerator(projectUntagged, rep_page, com)
+                        normal = False # Ensure that normal is False
+                # Normal True? Take the default generator
+                if normal == True:
+                        generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site)
+                # if urlUsed and regexGen, get the source for the generator
+                if urlUsed == True and regexGen == True:
+                        textRegex = pagetext(regexPageUrl)
+                # Not an url but a wiki page as "source" for the regex
+                elif regexGen == True:
+                        pageRegex = wikipedia.Page(site, regexPageName)
+                        try:
+                                textRegex = pageRegex.get()
+                        except wikipedia.NoPage:
+                                wikipedia.output(u"%s doesn't exist!" % page.title())
+                                textRegex = '' # No source, so the bot will quit later.
+                # If generator is the regex' one, use your own Generator using an url or page and a regex.
+                if generator == 'regex' and regexGen == True:
+                        generator = mainClass.regexGenerator(regexpToUse, textRegex)
+                # Ok, We (should) have a generator, so let's go on.
+                try:
+                        # Take the additional settings for the Project
+                        tupla_written = mainClass.takesettings(settings)
+                except wikipedia.Error:
+                        # Error? Settings = None
+                        wikipedia.output(u'Problems with loading the settigs, run without them.')
+                        tupla_written = None
+                        some_problem = False
+                # Ensure that if the list given is empty it will be converted to "None"
+                # (but it should be already done in the takesettings() function)
+                if tupla_written == []:
+                        tupla_written = None
+                if tupla_written != None:
+                        wikipedia.output(u'\t   >> Loaded the real-time page... <<')
+                        # Save the settings not to lose them (FixMe: Make that part better)
+                        filename = "settings.data"
+                        f = file(filename, 'w')
+                        cPickle.dump(tupla_written, f)
+                        f.close()
+                else:
+                        # No settings found, No problem, continue.
+                        wikipedia.output(u'\t   >> No additional settings found! <<')
+                for image in generator:
+                        # If I don't inizialize the generator, wait part and skip part are useless
+                        if wait:
+                                printWithTimeZone(u'Waiting %s seconds before checking the images,' % wait_number)
+                                # Let's sleep...
+                                time.sleep(wait_number)
+                                # Never sleep again (we are in a loop)
+                                wait = False
+                        # If the generator returns something that is not an image, simply skip it.
+                        if normal == False and regexGen == False:
+                                if image_namespace.lower() not in image.title().lower() and \
+                                'image:' not in image.title().lower():
+                                        wikipedia.output(u'%s seems not an image, skip it...' % image.title())
+                                        continue
+                        imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here)
+                        # Skip block
+                        if skip == True:
+                                # If the images to skip are more the images to check, make them the same number
+                                if skip_number > limit: skip_number = limit
+                                if skip_list == []:
+                                        if skip_number == 1:
+                                                wikipedia.output(u'Skipping the first image:\n')
+                                        else:
+                                                wikipedia.output(u'Skipping the first %s images:\n' % skip_number)
+                                if len(skip_list) < skip_number:
+                                        wikipedia.output(u'Skipping %s...' % imageName)
+                                        skip_list.append(imageName)
+                                        if skip_number == 1:
+                                                wikipedia.output('')
+                                                skip = False 
+                                        continue
+                                else:
+                                        wikipedia.output('1\n')
+                                        skip = False					                                               
+                        elif skip_list == []:
+                                wikipedia.output(u'\t\t>> No images to skip...<<')
+                                skip_list.append('skip = Off') # Only to print it once
+                        if commonsActive == True:
+                                response = mainClass.checkImage(imageName)
+                                if response == False:
+                                        continue
+                        if tupla_written != None:
+                                f = file(filename)
+                                tuplaList = cPickle.load(f)
+                        parentesi = False
+                        delete = False
+                        tagged = False
+                        extension = imageName.split('.')[-1]
+                        # Page => ImagePage
+                        p = wikipedia.ImagePage(site, image.title())
+                        # Skip deleted images
+                        try:
+                                g = p.get()
+                        except wikipedia.NoPage:
+                                wikipedia.output(u"Skipping %s because it has been deleted." % imageName)
+                                continue
+                        except wikipedia.IsRedirectPage:
+                                wikipedia.output(u"The file description for %s is a redirect?!" % imageName )
+                                continue
+                        for i in TextFind:
+                                if i.lower() in g:
+                                        tagged = True				
+                        for l in hiddentemplate:
+                                if tagged == False:
+                                        res = re.findall(r'\{\{(?:[Tt]emplate:|)%s(?: \n|\||\n)' % l.lower(), g.lower())
+                                        if res != []:
+                                                #print res
+                                                wikipedia.output(u'A white template found, skipping the template...')
+                                                # I don't delete the template, because if there is something to change the image page
+                                                # will be reloaded. I delete it only for the next check part.
+                                                if l != '' and l != ' ':
+                                                        g = g.lower().replace('{{%s' % l, '')
+                        for a_word in something:
+                                if a_word in g:
+                                        parentesi = True
+                        for parl in notallowed:
+                                if parl.lower() in extension.lower():
+                                        delete = True
+                        some_problem = False
+                        if tupla_written != None:                 
+                                for tupla in tuplaList:
+                                        name = tupla[1]
+                                        find_tipe = tupla[2]
+                                        find = tupla[3]
+                                        find_list = mainClass.load(find)
+                                        imagechanges = tupla[4]
+                                        if imagechanges.lower() == 'false':
+                                                imagestatus = False
+                                        elif imagechanges.lower() == 'true':
+                                                imagestatus = True
+                                        else:
+                                                wikipedia.output(u"Error! Imagechanges set wrongly!")
+                                                tupla_written = None
+                                                break
+                                        summary = tupla[5]
+                                        head_2 = tupla[6]
+                                        text = tupla[7]
+                                        text = text % imageName
+                                        mexCatched = tupla[8]
+                                        wikipedia.setAction(summary)
+                                        del tupla[0:8]
+                                        for k in find_list:
+                                                if find_tipe.lower() == 'findonly':
+                                                        if k.lower() == g.lower():
+                                                                some_problem = True
+                                                                text_used = text
+                                                                head_used = head_2
+                                                                imagestatus_used = imagestatus
+                                                                name_used = name
+                                                                summary_used = summary
+                                                                mex_used = mexCatched                                    
+                                                                break
+                                                elif find_tipe.lower() == 'find':
+                                                        if k.lower() in g.lower():
+                                                                some_problem = True
+                                                                text_used = text
+                                                                head_used = head_2
+                                                                imagestatus_used = imagestatus
+                                                                name_used = name
+                                                                summary_used = summary
+                                                                mex_used = mexCatched
+                                                                continue
+                        if p.exists():
+                                # Here begins the check block.
+                                if tagged == True:
+                                        printWithTimeZone(u'%s is already tagged...' % imageName)
+                                        continue
+                                if some_problem == True:
+                                        if mex_used in g:
+                                                wikipedia.output(u'Image already fixed. Skip.')
+                                                continue
+                                        wikipedia.output(u"The image description for %s contains %s..." % (imageName, name_used))
+                                        if mex_used.lower() == 'default':
+                                                mex_used = unvertext
+                                        if imagestatus_used == False:
+                                                reported = mainClass.report_image(rep_page, imageName, com, rep_text)
+                                        else:
+                                                reported = True
+                                        if reported == True:
+                                                #if imagestatus_used == True:
+                                                report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used)
+                                        else:
+                                                wikipedia.output(u"Skipping the image...")
+                                        some_problem = False
+                                        continue
+                                elif parentesi == True:
+                                        printWithTimeZone(u"%s seems ok," % imageName)
+                                        # It works also without this... but i want only to be sure ^^
+                                        parentesi = False
+                                        continue
+                                elif delete == True:
+                                        wikipedia.output(u"%s is not a file!" % imageName)
+                                        # Modify summary text
+                                        wikipedia.setAction(dels)
+                                        canctext = di % extension
+                                        notification = din % imageName
+                                        head = dih
+                                        report(canctext, imageName, notification, head)
+                                        delete = False
+                                        continue
+                                elif g in nothing:
+                                        wikipedia.output(u"The image description for %s does not contain a license template!" % imageName)
+                                        if lang == 'commons':
+                                                head = nh % imageName
+                                                notification = nn
+                                        else:
+                                                notification = nn % imageName
+                                                head = nh 
+                                        report(unvertext, imageName, notification, head, smwl)
+                                        continue
+                                else:
+                                        wikipedia.output(u"%s has only text and not the specific license..." % imageName)
+                                        if lang == 'commons':
+                                                head = nh % imageName
+                                                notification = nn
+                                        else:
+                                                notification = nn % imageName
+                                                head = nh 
+                                        report(unvertext, imageName, notification, head, smwl)
+                                        continue
+        # A little block to perform the repeat or to break.
+                if repeat == True:
+                        printWithTimeZone(u"Waiting for %s seconds," % time_sleep)
+                        time.sleep(time_sleep)
+                elif repeat == False:
+                        wikipedia.output(u"\t\t\t>> STOP! <<")
+                        break
                 
-		# Block of text to translate the parameters set above.
-		image_n = site.image_namespace()
-		image_namespace = "%s:" % image_n # Example: "User_talk:"
-		unvertext = wikipedia.translate(site, n_txt)
-		commento = wikipedia.translate(site, comm)
-		commento2 = wikipedia.translate(site, comm2)
-		ti_es_ti = wikipedia.translate(site, empty)
-		unverf = wikipedia.translate(site, unver)
-		di = wikipedia.translate(site, delete_immediately)
-		dih = wikipedia.translate(site, delete_immediately_head)
-		din = wikipedia.translate(site, delete_immediately_notification)
-		nh = wikipedia.translate(site, nothing_head)
-		nn = wikipedia.translate(site, nothing_notification)
-		dels = wikipedia.translate(site, del_comm)
-		botolist = wikipedia.translate(site, bot_list)
-		smwl = wikipedia.translate(site, second_message_without_license)
-		settings = wikipedia.translate(site, page_with_settings)
-		rep_page = wikipedia.translate(site, report_page)
-		rep_text = wikipedia.translate(site, report_text)
-		com = wikipedia.translate(site, comm10)
-		TextFind = wikipedia.translate(site, txt_find)
-		hiddentemplate = wikipedia.translate(site, HiddenTemplate)
-		# A template as {{en is not a license! Adding also them in the whitelist template...
-		for langK in wikipedia.Family('wikipedia').knownlanguages:
-                        hiddentemplate.append('%s' % langK)
-                        
-                # If the images to skip are 0, set the skip variable to False (the same for the wait time)
-		if skip_number == 0:
-			skip = False
-		if wait_number == 0:
-                        wait = False
-		# nothing = Defining an empty image description
-		nothing = ['', ' ', '  ', '   ', '\n', '\n ', '\n  ', '\n\n', '\n \n', ' \n', ' \n ', ' \n \n']
-		# something = Minimal requirements for an image description.
-		# If this fits, no tagging will take place (if there aren't other issues)
-		# MIT license is ok on italian wikipedia, let also this here
-		something = ['{{', "'''MIT&nbsp;license'''"] # Don't put "}}" here, please. Useless and can give problems.
-		# Unused file extensions. Does not contain PDF.
-		notallowed = ("xcf", "xls", "sxw", "sxi", "sxc", "sxd", "djvu")
-
-		# A little block-statement to ensure that the bot will not start with en-parameters
-		if lang not in project_inserted:
-			wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!")
-			wikipedia.stopme()
-		# Some formatting for delete immediately template
-		di = '\n%s' % di
-		dels = dels % di
-		
-		# Reading the log of the new images if another generator is not given.
-		if normal == True:
-                        if limit == 1:
-                                wikipedia.output(u"Retrieving the latest file for checking...")
-                        else:
-                                wikipedia.output(u"Retrieving the latest %d files for checking..." % limit)
-                # Main Loop
-		while 1:
-                        # Defing the Main Class.
-			mainClass = main(site)
-			# Untagged is True? Let's take that generator
-			if untagged == True:
-				generator =  mainClass.untaggedGenerator(projectUntagged, rep_page, com)
-				normal = False # Ensure that normal is False
-                        # Normal True? Take the default generator
-			if normal == True:
-				generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site)
-			# if urlUsed and regexGen, get the source for the generator
-			if urlUsed == True and regexGen == True:
-				textRegex = pagetext(regexPageUrl)
-			# Not an url but a wiki page as "source" for the regex
-			elif regexGen == True:
-				pageRegex = wikipedia.Page(site, regexPageName)
-				try:
-					textRegex = pageRegex.get()
-				except wikipedia.NoPage:
-					wikipedia.output(u"%s doesn't exist!" % page.title())
-					textRegex = '' # No source, so the bot will quit later.
-			# If generator is the regex' one, use your own Generator using an url or page and a regex.
-			if generator == 'regex' and regexGen == True:
-				generator = mainClass.regexGenerator(regexpToUse, textRegex)
-			# Ok, We (should) have a generator, so let's go on.
-			try:
-                                # Take the additional settings for the Project
-				tupla_written = mainClass.takesettings(settings)
-			except wikipedia.Error:
-                                # Error? Settings = None
-				wikipedia.output(u'Problems with loading the settigs, run without them.')
-				tupla_written = None
-				some_problem = False
-                        # Ensure that if the list given is empty it will be converted to "None"
-                        # (but it should be already done in the takesettings() function)
-			if tupla_written == []:
-                                tupla_written = None
-			if tupla_written != None:
-				wikipedia.output(u'\t   >> Loaded the real-time page... <<')
-				# Save the settings not to lose them (FixMe: Make that part better)
-				filename = "settings.data"
-				f = file(filename, 'w')
-				cPickle.dump(tupla_written, f)
-				f.close()
-			else:
-                                # No settings found, No problem, continue.
-                                wikipedia.output(u'\t   >> No additional settings found! <<')
-			for image in generator:
-                                # If I don't inizialize the generator, wait part and skip part are useless
-                                if wait:
-                                        printWithTimeZone(u'Waiting %s seconds before checking the images,' % wait_number)
-                                        # Let's sleep...
-                                        time.sleep(wait_number)
-                                        # Never sleep again (we are in a loop)
-                                        wait = False
-                                # If the generator returns something that is not an image, simply skip it.
-				if normal == False and regexGen == False:
-					if image_namespace.lower() not in image.title().lower() and \
-                                        'image:' not in image.title().lower():
-                                                wikipedia.output(u'%s seems not an image, skip it...' % image.title())
-						continue
-                                imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here)
-                                # Skip block
-				if skip == True:
-                                        # If the images to skip are more the images to check, make them the same number
-                                        if skip_number > limit: skip_number = limit
-                                        if skip_list == []:
-                                                if skip_number == 1:
-                                                        wikipedia.output(u'Skipping the first image:\n')
-                                                else:
-                                                        wikipedia.output(u'Skipping the first %s images:\n' % skip_number)
-					if len(skip_list) < skip_number:
-						wikipedia.output(u'Skipping %s...' % imageName)
-						skip_list.append(imageName)
-						if skip_number == 1:
-                                                        wikipedia.output('')
-                                                        skip = False 
-						continue
-					else:
-						wikipedia.output('1\n')
-						skip = False					                                               
-				elif skip_list == []:
-                                        wikipedia.output(u'\t\t>> No images to skip...<<')
-                                        skip_list.append('skip = Off') # Only to print it once
-				if commonsActive == True:
-					response = mainClass.checkImage(imageName)
-					if response == False:
-						continue
-				if tupla_written != None:
-					f = file(filename)
-					tuplaList = cPickle.load(f)
-				parentesi = False
-				delete = False
-				tagged = False
-				extension = imageName.split('.')[-1]
-				# Page => ImagePage
-				p = wikipedia.ImagePage(site, image.title())
-				# Skip deleted images
-				try:
-					g = p.get()
-				except wikipedia.NoPage:
-					wikipedia.output(u"Skipping %s because it has been deleted." % imageName)
-					continue
-				except wikipedia.IsRedirectPage:
-					wikipedia.output(u"The file description for %s is a redirect?!" % imageName )
-					continue
-				for i in TextFind:
-					if i.lower() in g:
-						tagged = True				
-				for l in hiddentemplate:
-                                        if tagged == False:
-                                                res = re.findall(r'\{\{(?:[Tt]emplate:|)%s(?: \n|\||\n)' % l.lower(), g.lower())
-                                                if res != []:
-                                                        #print res
-                                                        wikipedia.output(u'A white template found, skipping the template...')
-                                                        # I don't delete the template, because if there is something to change the image page
-                                                        # will be reloaded. I delete it only for the next check part.
-                                                        if l != '' and l != ' ':
-                                                                g = g.lower().replace('{{%s' % l, '')
-				for a_word in something:
-					if a_word in g:
-						parentesi = True
-				for parl in notallowed:
-					if parl.lower() in extension.lower():
-						delete = True
-				some_problem = False
-				if tupla_written != None:                 
-					for tupla in tuplaList:
-						name = tupla[1]
-						find_tipe = tupla[2]
-						find = tupla[3]
-						find_list = mainClass.load(find)
-						imagechanges = tupla[4]
-						if imagechanges.lower() == 'false':
-							imagestatus = False
-						elif imagechanges.lower() == 'true':
-							imagestatus = True
-						else:
-							wikipedia.output(u"Error! Imagechanges set wrongly!")
-							tupla_written = None
-							break
-						summary = tupla[5]
-						head_2 = tupla[6]
-						text = tupla[7]
-						text = text % imageName
-						mexCatched = tupla[8]
-						wikipedia.setAction(summary)
-						del tupla[0:8]
-						for k in find_list:
-							if find_tipe.lower() == 'findonly':
-								if k.lower() == g.lower():
-									some_problem = True
-									text_used = text
-									head_used = head_2
-									imagestatus_used = imagestatus
-									name_used = name
-									summary_used = summary
-									mex_used = mexCatched                                    
-									break
-							elif find_tipe.lower() == 'find':
-								if k.lower() in g.lower():
-									some_problem = True
-									text_used = text
-									head_used = head_2
-									imagestatus_used = imagestatus
-									name_used = name
-									summary_used = summary
-									mex_used = mexCatched
-									continue
-				if p.exists():
-					# Here begins the check block.
-					if tagged == True:
-                                                printWithTimeZone(u'%s is already tagged...' % imageName)
-						continue
-					if some_problem == True:
-						if mex_used in g:
-							wikipedia.output(u'Image already fixed. Skip.')
-							continue
-						wikipedia.output(u"The image description for %s contains %s..." % (imageName, name_used))
-						if mex_used.lower() == 'default':
-							mex_used = unvertext
-						if imagestatus_used == False:
-							reported = mainClass.report_image(rep_page, imageName, com, rep_text)
-						else:
-							reported = True
-						if reported == True:
-							#if imagestatus_used == True:
-							report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used)
-						else:
-							wikipedia.output(u"Skipping the image...")
-						some_problem = False
-						continue
-					elif parentesi == True:
-                                                printWithTimeZone(u"%s seems ok," % imageName)
-						# It works also without this... but i want only to be sure ^^
-						parentesi = False
-						continue
-					elif delete == True:
-						wikipedia.output(u"%s is not a file!" % imageName)
-						# Modify summary text
-						wikipedia.setAction(dels)
-						canctext = di % extension
-						notification = din % imageName
-						head = dih
-						report(canctext, imageName, notification, head)
-						delete = False
-						continue
-					elif g in nothing:
-						wikipedia.output(u"The image description for %s does not contain a license template!" % imageName)
-						if lang == 'commons':
-							head = nh % imageName
-							notification = nn
-						else:
-							notification = nn % imageName
-							head = nh 
-						report(unvertext, imageName, notification, head, smwl)
-						continue
-					else:
-						wikipedia.output(u"%s has only text and not the specific license..." % imageName)
-						if lang == 'commons':
-							head = nh % imageName
-							notification = nn
-						else:
-							notification = nn % imageName
-							head = nh 
-						report(unvertext, imageName, notification, head, smwl)
-						continue
-		# A little block to perform the repeat or to break.
-			if repeat == True:
-                                printWithTimeZone(u"Waiting for %s seconds," % time_sleep)
-				time.sleep(time_sleep)
-			elif repeat == False:
-				wikipedia.output(u"\t\t\t>> STOP! <<")
-				wikipedia.stopme()
-				break
-	except wikipedia.BadTitle:
-		wikipedia.output(u"Wikidown or server's problem, quit")
-		wikipedia.stopme()
+# Here there is the main loop. I'll take all the (name of the) images and then i'll check them.
+if __name__ == "__main__":
+        try:
+                try:
+                        checkbot()
+                except wikipedia.BadTitle:
+                        wikipedia.output(u"Wikidown or server's problem, quit")
+                        wikipedia.stopme()
 	finally:
 		wikipedia.stopme()
+		sys.exit() # Be sure that the Bot will stop





More information about the Pywikipedia-l mailing list