[Pywikipedia-l] SVN: [4886] trunk/pywikipedia/checkimages.py
filnik at svn.wikimedia.org
filnik at svn.wikimedia.org
Mon Jan 14 14:49:19 UTC 2008
Revision: 4886
Author: filnik
Date: 2008-01-14 14:49:18 +0000 (Mon, 14 Jan 2008)
Log Message:
-----------
A bit of rewrite.
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2008-01-14 14:12:46 UTC (rev 4885)
+++ trunk/pywikipedia/checkimages.py 2008-01-14 14:49:18 UTC (rev 4886)
@@ -71,7 +71,7 @@
#
import re, time, urllib2
-import wikipedia, config, os, locale
+import wikipedia, config, os, locale, sys
import cPickle, pagegenerators, catlib
locale.setlocale(locale.LC_ALL, '')
@@ -81,7 +81,7 @@
# That's what you want that will be added. (i.e. the {{no source}} with the right day/month/year )
n_txt = {
- 'commons':'\n{{subst:nld}}',
+ 'commons':'\n{{subst:nld}}',
'en' :'\n{{subst:nld}}',
'it' :'\n{{subst:unverdata}}',
'ja':'{{subst:Nsd}}',
@@ -91,7 +91,7 @@
txt_find = {
'commons':['{{no license', '{{nld'],
- 'en':['{{nld', '{{no license'],
+ 'en':['{{nld', '{{no license'],
'hu':[u'{{nincsforrás',u'{{nincslicenc'],
'it':[u'{{unverdata', u'{{unverified'],
'ja':[u'{{no source', u'{{unknown', u'{{non free', u'<!--削除についての議論が終了するまで',],
@@ -160,7 +160,7 @@
# Text that will be add if the bot find a unknown extension.
delete_immediately_notification = {
- 'commons':'The [[:Image:%s]] file has a wrong extension, please check. ~~~~',
+ 'commons':'The [[:Image:%s]] file has a wrong extension, please check. ~~~~',
'en' :'The [[:Image:%s]] file has a wrong extension, please check. ~~~~',
'it' :'{{subst:Utente:Filbot/Ext|%s}}',
'hu' :u'A [[:Kép:%s]] fájlnak rossz a kiterjesztése, kérlek ellenőrízd. ~~~~',
@@ -208,7 +208,7 @@
# The message that the bot will add the second time that find another license problem.
second_message_without_license = {
'commons':None,
- 'en': None,
+ 'en': None,
'it':':{{subst:Utente:Filbot/Senza licenza2|%s}} --~~~~',
'hu':u'\nSzia! Úgy tűnik a [[:Kép:%s]] képpel is hasonló a probléma, mint az előbbivel. Kérlek olvasd el a [[WP:KÉPLIC|feltölthető képek]]ről szóló oldalunk, és segítségért fordulj a [[WP:KF-JO|Jogi kocsmafalhoz]]. Köszönöm --~~~~',
'ja':None,
@@ -218,8 +218,8 @@
# That's useful if you are running the bot on Toolserver.
page_with_settings = {
'commons':u'User:Filbot/Settings',
- 'en':None,
- 'hu':None,
+ 'en':None,
+ 'hu':None,
'it':u'Utente:Nikbot/Settings#Settings',
'ja':None,
'zh':u"User:Alexbot/cisettings#Settings",
@@ -228,7 +228,7 @@
# This is the page where the bot will store them.
report_page = {
'commons':'User:Filbot/Report',
- 'en' :'User:Filnik/Report',
+ 'en' :'User:Filnik/Report',
'it' :'Utente:Nikbot/Report',
'ja':'User:Alexbot/report',
'hu' :'User:Bdamokos/Report',
@@ -608,403 +608,405 @@
break
break
-# Here there is the main loop. I'll take all the (name of the) images and then i'll check them.
-if __name__ == "__main__":
- try:
+def checkbot():
+ # Command line configurable parameters
+ repeat = True # Restart after having check all the images?
+ limit = 80 # How many images check?
+ time_sleep = 30 # How many time sleep after the check?
+ skip_number = 0 # How many images to skip before checking?
+ wait_number = 0 # How many time sleep before the check?
+ commonsActive = False # Check if on commons there's an image with the same name?
+ normal = False # Check the new images or use another generator?
+ urlUsed = False # Use the url-related function instead of the new-pages generator
+ regexGen = False # Use the regex generator
+ untagged = False # Use the untagged generator
+ skip_list = list() # Inizialize the skip list used below
+
+ # Here below there are the parameters.
+ for arg in wikipedia.handleArgs():
+ if arg.startswith('-limit'):
+ if len(arg) == 7:
+ limit = int(wikipedia.input(u'How many images do you want to check?'))
+ else:
+ limit = int(arg[7:])
+ if arg.startswith('-time'):
+ if len(arg) == 5:
+ time_sleep = int(wikipedia.input(u'How many seconds do you want runs to be apart?'))
+ else:
+ time_sleep = int(arg[6:])
+ elif arg == '-break':
+ repeat = False
+ elif arg == '-commons':
+ commonsActive = True
+ elif arg.startswith('-skip'):
+ if len(arg) == 5:
+ skip = True
+ skip_number = int(wikipedia.input(u'How many images do you want to skip?'))
+ elif len(arg) > 5:
+ skip = True
+ skip_number = int(arg[6:])
+ elif arg.startswith('-wait'):
+ if len(arg) == 5:
+ wait = True
+ wait_number = int(wikipedia.input(u'How many time do you want to wait before checking the images?'))
+ elif len(arg) > 5:
+ wait = True
+ wait_number = int(arg[6:])
+ elif arg.startswith('-start'):
+ if len(arg) == 6:
+ firstPageTitle = str(wikipedia.input(u'From witch page do you want to start?'))
+ elif len(arg) > 6:
+ firstPageTitle = str(arg[7:])
+ generator = wikipedia.getSite().allpages(start='Image:%s' % firstPageTitle)
+ repeat = False
+ elif arg.startswith('-page'):
+ if len(arg) == 5:
+ regexPageName = str(wikipedia.input(u'Which page do you want to use for the regex?'))
+ elif len(arg) > 5:
+ regexPageName = str(arg[6:])
+ repeat = False
+ regexGen = True
+ elif arg.startswith('-url'):
+ if len(arg) == 4:
+ regexPageUrl = str(wikipedia.input(u'Which url do you want to use for the regex?'))
+ elif len(arg) > 4:
+ regexPageUrl = str(arg[5:])
+ urlUsed = True
+ repeat = False
+ regexGen = True
+ elif arg.startswith('-regex'):
+ if len(arg) == 6:
+ regexpToUse = str(wikipedia.input(u'Which regex do you want to use?'))
+ elif len(arg) > 6:
+ regexpToUse = str(arg[7:])
+ generator = 'regex'
+ repeat = False
+ elif arg.startswith('-cat'):
+ if len(arg) == 4:
+ catName = str(wikipedia.input(u'In which category do I work?'))
+ elif len(arg) > 4:
+ catName = str(arg[5:])
+ catSelected = catlib.Category(wikipedia.getSite(), 'Category:%s' % catName)
+ generator = pagegenerators.CategorizedPageGenerator(catSelected)
+ repeat = False
+ elif arg.startswith('-untagged'):
+ untagged = True
+ if len(arg) == 9:
+ projectUntagged = str(wikipedia.input(u'In which project should I work?'))
+ elif len(arg) > 9:
+ projectUntagged = str(arg[10:])
+ # Understand if the generator it's the default or not.
+ try:
+ generator
+ except NameError:
+ normal = True
+
+ # Define the site.
+ site = wikipedia.getSite()
- # Command line configurable parameters
- repeat = True # Restart after having check all the images?
- limit = 80 # How many images check?
- time_sleep = 30 # How many time sleep after the check?
- skip_number = 0 # How many images to skip before checking?
- wait_number = 0 # How many time sleep before the check?
- commonsActive = False # Check if on commons there's an image with the same name?
- normal = False # Check the new images or use another generator?
- urlUsed = False # Use the url-related function instead of the new-pages generator
- regexGen = False # Use the regex generator
- untagged = False # Use the untagged generator
- skip_list = list() # Inizialize the skip list used below
+ # In this way i find what language, project and what bot do you use.
+ lang = config.mylang
+ project = config.family
+
+ # Block of text to translate the parameters set above.
+ image_n = site.image_namespace()
+ image_namespace = "%s:" % image_n # Example: "User_talk:"
+ unvertext = wikipedia.translate(site, n_txt)
+ commento = wikipedia.translate(site, comm)
+ commento2 = wikipedia.translate(site, comm2)
+ ti_es_ti = wikipedia.translate(site, empty)
+ unverf = wikipedia.translate(site, unver)
+ di = wikipedia.translate(site, delete_immediately)
+ dih = wikipedia.translate(site, delete_immediately_head)
+ din = wikipedia.translate(site, delete_immediately_notification)
+ nh = wikipedia.translate(site, nothing_head)
+ nn = wikipedia.translate(site, nothing_notification)
+ dels = wikipedia.translate(site, del_comm)
+ botolist = wikipedia.translate(site, bot_list)
+ smwl = wikipedia.translate(site, second_message_without_license)
+ settings = wikipedia.translate(site, page_with_settings)
+ rep_page = wikipedia.translate(site, report_page)
+ rep_text = wikipedia.translate(site, report_text)
+ com = wikipedia.translate(site, comm10)
+ TextFind = wikipedia.translate(site, txt_find)
+ hiddentemplate = wikipedia.translate(site, HiddenTemplate)
+ # A template as {{en is not a license! Adding also them in the whitelist template...
+ for langK in wikipedia.Family('wikipedia').knownlanguages:
+ hiddentemplate.append('%s' % langK)
- # Here below there are the parameters.
- for arg in wikipedia.handleArgs():
- if arg.startswith('-limit'):
- if len(arg) == 7:
- limit = int(wikipedia.input(u'How many images do you want to check?'))
- else:
- limit = int(arg[7:])
- if arg.startswith('-time'):
- if len(arg) == 5:
- time_sleep = int(wikipedia.input(u'How many seconds do you want runs to be apart?'))
- else:
- time_sleep = int(arg[6:])
- elif arg == '-break':
- repeat = False
- elif arg == '-commons':
- commonsActive = True
- elif arg.startswith('-skip'):
- if len(arg) == 5:
- skip = True
- skip_number = int(wikipedia.input(u'How many images do you want to skip?'))
- elif len(arg) > 5:
- skip = True
- skip_number = int(arg[6:])
- elif arg.startswith('-wait'):
- if len(arg) == 5:
- wait = True
- wait_number = int(wikipedia.input(u'How many time do you want to wait before checking the images?'))
- elif len(arg) > 5:
- wait = True
- wait_number = int(arg[6:])
- elif arg.startswith('-start'):
- if len(arg) == 6:
- firstPageTitle = str(wikipedia.input(u'From witch page do you want to start?'))
- elif len(arg) > 6:
- firstPageTitle = str(arg[7:])
- generator = wikipedia.getSite().allpages(start='Image:%s' % firstPageTitle)
- repeat = False
- elif arg.startswith('-page'):
- if len(arg) == 5:
- regexPageName = str(wikipedia.input(u'Which page do you want to use for the regex?'))
- elif len(arg) > 5:
- regexPageName = str(arg[6:])
- repeat = False
- regexGen = True
- elif arg.startswith('-url'):
- if len(arg) == 4:
- regexPageUrl = str(wikipedia.input(u'Which url do you want to use for the regex?'))
- elif len(arg) > 4:
- regexPageUrl = str(arg[5:])
- urlUsed = True
- repeat = False
- regexGen = True
- elif arg.startswith('-regex'):
- if len(arg) == 6:
- regexpToUse = str(wikipedia.input(u'Which regex do you want to use?'))
- elif len(arg) > 6:
- regexpToUse = str(arg[7:])
- generator = 'regex'
- repeat = False
- elif arg.startswith('-cat'):
- if len(arg) == 4:
- catName = str(wikipedia.input(u'In which category do I work?'))
- elif len(arg) > 4:
- catName = str(arg[5:])
- catSelected = catlib.Category(wikipedia.getSite(), 'Category:%s' % catName)
- generator = pagegenerators.CategorizedPageGenerator(catSelected)
- repeat = False
- elif arg.startswith('-untagged'):
- untagged = True
- if len(arg) == 9:
- projectUntagged = str(wikipedia.input(u'In which project should I work?'))
- elif len(arg) > 9:
- projectUntagged = str(arg[10:])
+ # If the images to skip are 0, set the skip variable to False (the same for the wait time)
+ if skip_number == 0:
+ skip = False
+ if wait_number == 0:
+ wait = False
+ # nothing = Defining an empty image description
+ nothing = ['', ' ', ' ', ' ', '\n', '\n ', '\n ', '\n\n', '\n \n', ' \n', ' \n ', ' \n \n']
+ # something = Minimal requirements for an image description.
+ # If this fits, no tagging will take place (if there aren't other issues)
+ # MIT license is ok on italian wikipedia, let also this here
+ something = ['{{', "'''MIT license'''"] # Don't put "}}" here, please. Useless and can give problems.
+ # Unused file extensions. Does not contain PDF.
+ notallowed = ("xcf", "xls", "sxw", "sxi", "sxc", "sxd", "djvu")
- # Understand if the generator it's the default or not.
- try:
- generator
- except NameError:
- normal = True
-
- # Define the site.
- site = wikipedia.getSite()
-
- # In this way i find what language, project and what bot do you use.
- lang = config.mylang
- project = config.family
+ # A little block-statement to ensure that the bot will not start with en-parameters
+ if lang not in project_inserted:
+ wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!")
+ wikipedia.stopme()
+ # Some formatting for delete immediately template
+ di = '\n%s' % di
+ dels = dels % di
+
+ # Reading the log of the new images if another generator is not given.
+ if normal == True:
+ if limit == 1:
+ wikipedia.output(u"Retrieving the latest file for checking...")
+ else:
+ wikipedia.output(u"Retrieving the latest %d files for checking..." % limit)
+ # Main Loop
+ while 1:
+ # Defing the Main Class.
+ mainClass = main(site)
+ # Untagged is True? Let's take that generator
+ if untagged == True:
+ generator = mainClass.untaggedGenerator(projectUntagged, rep_page, com)
+ normal = False # Ensure that normal is False
+ # Normal True? Take the default generator
+ if normal == True:
+ generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site)
+ # if urlUsed and regexGen, get the source for the generator
+ if urlUsed == True and regexGen == True:
+ textRegex = pagetext(regexPageUrl)
+ # Not an url but a wiki page as "source" for the regex
+ elif regexGen == True:
+ pageRegex = wikipedia.Page(site, regexPageName)
+ try:
+ textRegex = pageRegex.get()
+ except wikipedia.NoPage:
+ wikipedia.output(u"%s doesn't exist!" % page.title())
+ textRegex = '' # No source, so the bot will quit later.
+ # If generator is the regex' one, use your own Generator using an url or page and a regex.
+ if generator == 'regex' and regexGen == True:
+ generator = mainClass.regexGenerator(regexpToUse, textRegex)
+ # Ok, We (should) have a generator, so let's go on.
+ try:
+ # Take the additional settings for the Project
+ tupla_written = mainClass.takesettings(settings)
+ except wikipedia.Error:
+ # Error? Settings = None
+ wikipedia.output(u'Problems with loading the settigs, run without them.')
+ tupla_written = None
+ some_problem = False
+ # Ensure that if the list given is empty it will be converted to "None"
+ # (but it should be already done in the takesettings() function)
+ if tupla_written == []:
+ tupla_written = None
+ if tupla_written != None:
+ wikipedia.output(u'\t >> Loaded the real-time page... <<')
+ # Save the settings not to lose them (FixMe: Make that part better)
+ filename = "settings.data"
+ f = file(filename, 'w')
+ cPickle.dump(tupla_written, f)
+ f.close()
+ else:
+ # No settings found, No problem, continue.
+ wikipedia.output(u'\t >> No additional settings found! <<')
+ for image in generator:
+ # If I don't inizialize the generator, wait part and skip part are useless
+ if wait:
+ printWithTimeZone(u'Waiting %s seconds before checking the images,' % wait_number)
+ # Let's sleep...
+ time.sleep(wait_number)
+ # Never sleep again (we are in a loop)
+ wait = False
+ # If the generator returns something that is not an image, simply skip it.
+ if normal == False and regexGen == False:
+ if image_namespace.lower() not in image.title().lower() and \
+ 'image:' not in image.title().lower():
+ wikipedia.output(u'%s seems not an image, skip it...' % image.title())
+ continue
+ imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here)
+ # Skip block
+ if skip == True:
+ # If the images to skip are more the images to check, make them the same number
+ if skip_number > limit: skip_number = limit
+ if skip_list == []:
+ if skip_number == 1:
+ wikipedia.output(u'Skipping the first image:\n')
+ else:
+ wikipedia.output(u'Skipping the first %s images:\n' % skip_number)
+ if len(skip_list) < skip_number:
+ wikipedia.output(u'Skipping %s...' % imageName)
+ skip_list.append(imageName)
+ if skip_number == 1:
+ wikipedia.output('')
+ skip = False
+ continue
+ else:
+ wikipedia.output('1\n')
+ skip = False
+ elif skip_list == []:
+ wikipedia.output(u'\t\t>> No images to skip...<<')
+ skip_list.append('skip = Off') # Only to print it once
+ if commonsActive == True:
+ response = mainClass.checkImage(imageName)
+ if response == False:
+ continue
+ if tupla_written != None:
+ f = file(filename)
+ tuplaList = cPickle.load(f)
+ parentesi = False
+ delete = False
+ tagged = False
+ extension = imageName.split('.')[-1]
+ # Page => ImagePage
+ p = wikipedia.ImagePage(site, image.title())
+ # Skip deleted images
+ try:
+ g = p.get()
+ except wikipedia.NoPage:
+ wikipedia.output(u"Skipping %s because it has been deleted." % imageName)
+ continue
+ except wikipedia.IsRedirectPage:
+ wikipedia.output(u"The file description for %s is a redirect?!" % imageName )
+ continue
+ for i in TextFind:
+ if i.lower() in g:
+ tagged = True
+ for l in hiddentemplate:
+ if tagged == False:
+ res = re.findall(r'\{\{(?:[Tt]emplate:|)%s(?: \n|\||\n)' % l.lower(), g.lower())
+ if res != []:
+ #print res
+ wikipedia.output(u'A white template found, skipping the template...')
+ # I don't delete the template, because if there is something to change the image page
+ # will be reloaded. I delete it only for the next check part.
+ if l != '' and l != ' ':
+ g = g.lower().replace('{{%s' % l, '')
+ for a_word in something:
+ if a_word in g:
+ parentesi = True
+ for parl in notallowed:
+ if parl.lower() in extension.lower():
+ delete = True
+ some_problem = False
+ if tupla_written != None:
+ for tupla in tuplaList:
+ name = tupla[1]
+ find_tipe = tupla[2]
+ find = tupla[3]
+ find_list = mainClass.load(find)
+ imagechanges = tupla[4]
+ if imagechanges.lower() == 'false':
+ imagestatus = False
+ elif imagechanges.lower() == 'true':
+ imagestatus = True
+ else:
+ wikipedia.output(u"Error! Imagechanges set wrongly!")
+ tupla_written = None
+ break
+ summary = tupla[5]
+ head_2 = tupla[6]
+ text = tupla[7]
+ text = text % imageName
+ mexCatched = tupla[8]
+ wikipedia.setAction(summary)
+ del tupla[0:8]
+ for k in find_list:
+ if find_tipe.lower() == 'findonly':
+ if k.lower() == g.lower():
+ some_problem = True
+ text_used = text
+ head_used = head_2
+ imagestatus_used = imagestatus
+ name_used = name
+ summary_used = summary
+ mex_used = mexCatched
+ break
+ elif find_tipe.lower() == 'find':
+ if k.lower() in g.lower():
+ some_problem = True
+ text_used = text
+ head_used = head_2
+ imagestatus_used = imagestatus
+ name_used = name
+ summary_used = summary
+ mex_used = mexCatched
+ continue
+ if p.exists():
+ # Here begins the check block.
+ if tagged == True:
+ printWithTimeZone(u'%s is already tagged...' % imageName)
+ continue
+ if some_problem == True:
+ if mex_used in g:
+ wikipedia.output(u'Image already fixed. Skip.')
+ continue
+ wikipedia.output(u"The image description for %s contains %s..." % (imageName, name_used))
+ if mex_used.lower() == 'default':
+ mex_used = unvertext
+ if imagestatus_used == False:
+ reported = mainClass.report_image(rep_page, imageName, com, rep_text)
+ else:
+ reported = True
+ if reported == True:
+ #if imagestatus_used == True:
+ report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used)
+ else:
+ wikipedia.output(u"Skipping the image...")
+ some_problem = False
+ continue
+ elif parentesi == True:
+ printWithTimeZone(u"%s seems ok," % imageName)
+ # It works also without this... but i want only to be sure ^^
+ parentesi = False
+ continue
+ elif delete == True:
+ wikipedia.output(u"%s is not a file!" % imageName)
+ # Modify summary text
+ wikipedia.setAction(dels)
+ canctext = di % extension
+ notification = din % imageName
+ head = dih
+ report(canctext, imageName, notification, head)
+ delete = False
+ continue
+ elif g in nothing:
+ wikipedia.output(u"The image description for %s does not contain a license template!" % imageName)
+ if lang == 'commons':
+ head = nh % imageName
+ notification = nn
+ else:
+ notification = nn % imageName
+ head = nh
+ report(unvertext, imageName, notification, head, smwl)
+ continue
+ else:
+ wikipedia.output(u"%s has only text and not the specific license..." % imageName)
+ if lang == 'commons':
+ head = nh % imageName
+ notification = nn
+ else:
+ notification = nn % imageName
+ head = nh
+ report(unvertext, imageName, notification, head, smwl)
+ continue
+ # A little block to perform the repeat or to break.
+ if repeat == True:
+ printWithTimeZone(u"Waiting for %s seconds," % time_sleep)
+ time.sleep(time_sleep)
+ elif repeat == False:
+ wikipedia.output(u"\t\t\t>> STOP! <<")
+ break
- # Block of text to translate the parameters set above.
- image_n = site.image_namespace()
- image_namespace = "%s:" % image_n # Example: "User_talk:"
- unvertext = wikipedia.translate(site, n_txt)
- commento = wikipedia.translate(site, comm)
- commento2 = wikipedia.translate(site, comm2)
- ti_es_ti = wikipedia.translate(site, empty)
- unverf = wikipedia.translate(site, unver)
- di = wikipedia.translate(site, delete_immediately)
- dih = wikipedia.translate(site, delete_immediately_head)
- din = wikipedia.translate(site, delete_immediately_notification)
- nh = wikipedia.translate(site, nothing_head)
- nn = wikipedia.translate(site, nothing_notification)
- dels = wikipedia.translate(site, del_comm)
- botolist = wikipedia.translate(site, bot_list)
- smwl = wikipedia.translate(site, second_message_without_license)
- settings = wikipedia.translate(site, page_with_settings)
- rep_page = wikipedia.translate(site, report_page)
- rep_text = wikipedia.translate(site, report_text)
- com = wikipedia.translate(site, comm10)
- TextFind = wikipedia.translate(site, txt_find)
- hiddentemplate = wikipedia.translate(site, HiddenTemplate)
- # A template as {{en is not a license! Adding also them in the whitelist template...
- for langK in wikipedia.Family('wikipedia').knownlanguages:
- hiddentemplate.append('%s' % langK)
-
- # If the images to skip are 0, set the skip variable to False (the same for the wait time)
- if skip_number == 0:
- skip = False
- if wait_number == 0:
- wait = False
- # nothing = Defining an empty image description
- nothing = ['', ' ', ' ', ' ', '\n', '\n ', '\n ', '\n\n', '\n \n', ' \n', ' \n ', ' \n \n']
- # something = Minimal requirements for an image description.
- # If this fits, no tagging will take place (if there aren't other issues)
- # MIT license is ok on italian wikipedia, let also this here
- something = ['{{', "'''MIT license'''"] # Don't put "}}" here, please. Useless and can give problems.
- # Unused file extensions. Does not contain PDF.
- notallowed = ("xcf", "xls", "sxw", "sxi", "sxc", "sxd", "djvu")
-
- # A little block-statement to ensure that the bot will not start with en-parameters
- if lang not in project_inserted:
- wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!")
- wikipedia.stopme()
- # Some formatting for delete immediately template
- di = '\n%s' % di
- dels = dels % di
-
- # Reading the log of the new images if another generator is not given.
- if normal == True:
- if limit == 1:
- wikipedia.output(u"Retrieving the latest file for checking...")
- else:
- wikipedia.output(u"Retrieving the latest %d files for checking..." % limit)
- # Main Loop
- while 1:
- # Defing the Main Class.
- mainClass = main(site)
- # Untagged is True? Let's take that generator
- if untagged == True:
- generator = mainClass.untaggedGenerator(projectUntagged, rep_page, com)
- normal = False # Ensure that normal is False
- # Normal True? Take the default generator
- if normal == True:
- generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site)
- # if urlUsed and regexGen, get the source for the generator
- if urlUsed == True and regexGen == True:
- textRegex = pagetext(regexPageUrl)
- # Not an url but a wiki page as "source" for the regex
- elif regexGen == True:
- pageRegex = wikipedia.Page(site, regexPageName)
- try:
- textRegex = pageRegex.get()
- except wikipedia.NoPage:
- wikipedia.output(u"%s doesn't exist!" % page.title())
- textRegex = '' # No source, so the bot will quit later.
- # If generator is the regex' one, use your own Generator using an url or page and a regex.
- if generator == 'regex' and regexGen == True:
- generator = mainClass.regexGenerator(regexpToUse, textRegex)
- # Ok, We (should) have a generator, so let's go on.
- try:
- # Take the additional settings for the Project
- tupla_written = mainClass.takesettings(settings)
- except wikipedia.Error:
- # Error? Settings = None
- wikipedia.output(u'Problems with loading the settigs, run without them.')
- tupla_written = None
- some_problem = False
- # Ensure that if the list given is empty it will be converted to "None"
- # (but it should be already done in the takesettings() function)
- if tupla_written == []:
- tupla_written = None
- if tupla_written != None:
- wikipedia.output(u'\t >> Loaded the real-time page... <<')
- # Save the settings not to lose them (FixMe: Make that part better)
- filename = "settings.data"
- f = file(filename, 'w')
- cPickle.dump(tupla_written, f)
- f.close()
- else:
- # No settings found, No problem, continue.
- wikipedia.output(u'\t >> No additional settings found! <<')
- for image in generator:
- # If I don't inizialize the generator, wait part and skip part are useless
- if wait:
- printWithTimeZone(u'Waiting %s seconds before checking the images,' % wait_number)
- # Let's sleep...
- time.sleep(wait_number)
- # Never sleep again (we are in a loop)
- wait = False
- # If the generator returns something that is not an image, simply skip it.
- if normal == False and regexGen == False:
- if image_namespace.lower() not in image.title().lower() and \
- 'image:' not in image.title().lower():
- wikipedia.output(u'%s seems not an image, skip it...' % image.title())
- continue
- imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here)
- # Skip block
- if skip == True:
- # If the images to skip are more the images to check, make them the same number
- if skip_number > limit: skip_number = limit
- if skip_list == []:
- if skip_number == 1:
- wikipedia.output(u'Skipping the first image:\n')
- else:
- wikipedia.output(u'Skipping the first %s images:\n' % skip_number)
- if len(skip_list) < skip_number:
- wikipedia.output(u'Skipping %s...' % imageName)
- skip_list.append(imageName)
- if skip_number == 1:
- wikipedia.output('')
- skip = False
- continue
- else:
- wikipedia.output('1\n')
- skip = False
- elif skip_list == []:
- wikipedia.output(u'\t\t>> No images to skip...<<')
- skip_list.append('skip = Off') # Only to print it once
- if commonsActive == True:
- response = mainClass.checkImage(imageName)
- if response == False:
- continue
- if tupla_written != None:
- f = file(filename)
- tuplaList = cPickle.load(f)
- parentesi = False
- delete = False
- tagged = False
- extension = imageName.split('.')[-1]
- # Page => ImagePage
- p = wikipedia.ImagePage(site, image.title())
- # Skip deleted images
- try:
- g = p.get()
- except wikipedia.NoPage:
- wikipedia.output(u"Skipping %s because it has been deleted." % imageName)
- continue
- except wikipedia.IsRedirectPage:
- wikipedia.output(u"The file description for %s is a redirect?!" % imageName )
- continue
- for i in TextFind:
- if i.lower() in g:
- tagged = True
- for l in hiddentemplate:
- if tagged == False:
- res = re.findall(r'\{\{(?:[Tt]emplate:|)%s(?: \n|\||\n)' % l.lower(), g.lower())
- if res != []:
- #print res
- wikipedia.output(u'A white template found, skipping the template...')
- # I don't delete the template, because if there is something to change the image page
- # will be reloaded. I delete it only for the next check part.
- if l != '' and l != ' ':
- g = g.lower().replace('{{%s' % l, '')
- for a_word in something:
- if a_word in g:
- parentesi = True
- for parl in notallowed:
- if parl.lower() in extension.lower():
- delete = True
- some_problem = False
- if tupla_written != None:
- for tupla in tuplaList:
- name = tupla[1]
- find_tipe = tupla[2]
- find = tupla[3]
- find_list = mainClass.load(find)
- imagechanges = tupla[4]
- if imagechanges.lower() == 'false':
- imagestatus = False
- elif imagechanges.lower() == 'true':
- imagestatus = True
- else:
- wikipedia.output(u"Error! Imagechanges set wrongly!")
- tupla_written = None
- break
- summary = tupla[5]
- head_2 = tupla[6]
- text = tupla[7]
- text = text % imageName
- mexCatched = tupla[8]
- wikipedia.setAction(summary)
- del tupla[0:8]
- for k in find_list:
- if find_tipe.lower() == 'findonly':
- if k.lower() == g.lower():
- some_problem = True
- text_used = text
- head_used = head_2
- imagestatus_used = imagestatus
- name_used = name
- summary_used = summary
- mex_used = mexCatched
- break
- elif find_tipe.lower() == 'find':
- if k.lower() in g.lower():
- some_problem = True
- text_used = text
- head_used = head_2
- imagestatus_used = imagestatus
- name_used = name
- summary_used = summary
- mex_used = mexCatched
- continue
- if p.exists():
- # Here begins the check block.
- if tagged == True:
- printWithTimeZone(u'%s is already tagged...' % imageName)
- continue
- if some_problem == True:
- if mex_used in g:
- wikipedia.output(u'Image already fixed. Skip.')
- continue
- wikipedia.output(u"The image description for %s contains %s..." % (imageName, name_used))
- if mex_used.lower() == 'default':
- mex_used = unvertext
- if imagestatus_used == False:
- reported = mainClass.report_image(rep_page, imageName, com, rep_text)
- else:
- reported = True
- if reported == True:
- #if imagestatus_used == True:
- report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used)
- else:
- wikipedia.output(u"Skipping the image...")
- some_problem = False
- continue
- elif parentesi == True:
- printWithTimeZone(u"%s seems ok," % imageName)
- # It works also without this... but i want only to be sure ^^
- parentesi = False
- continue
- elif delete == True:
- wikipedia.output(u"%s is not a file!" % imageName)
- # Modify summary text
- wikipedia.setAction(dels)
- canctext = di % extension
- notification = din % imageName
- head = dih
- report(canctext, imageName, notification, head)
- delete = False
- continue
- elif g in nothing:
- wikipedia.output(u"The image description for %s does not contain a license template!" % imageName)
- if lang == 'commons':
- head = nh % imageName
- notification = nn
- else:
- notification = nn % imageName
- head = nh
- report(unvertext, imageName, notification, head, smwl)
- continue
- else:
- wikipedia.output(u"%s has only text and not the specific license..." % imageName)
- if lang == 'commons':
- head = nh % imageName
- notification = nn
- else:
- notification = nn % imageName
- head = nh
- report(unvertext, imageName, notification, head, smwl)
- continue
- # A little block to perform the repeat or to break.
- if repeat == True:
- printWithTimeZone(u"Waiting for %s seconds," % time_sleep)
- time.sleep(time_sleep)
- elif repeat == False:
- wikipedia.output(u"\t\t\t>> STOP! <<")
- wikipedia.stopme()
- break
- except wikipedia.BadTitle:
- wikipedia.output(u"Wikidown or server's problem, quit")
- wikipedia.stopme()
+# Here there is the main loop. I'll take all the (name of the) images and then i'll check them.
+if __name__ == "__main__":
+ try:
+ try:
+ checkbot()
+ except wikipedia.BadTitle:
+ wikipedia.output(u"Wikidown or server's problem, quit")
+ wikipedia.stopme()
finally:
wikipedia.stopme()
+ sys.exit() # Be sure that the Bot will stop
More information about the Pywikipedia-l
mailing list