[Pywikipedia-l] SVN: [4602] trunk/pywikipedia/checkimages.py - pywikibot

27 Nov 2007

Revision: 4602
Author:   filnik
Date:     2007-11-27 15:01:26 +0000 (Tue, 27 Nov 2007)

Log Message:
-----------
Adding a new script to check the new images but also the old ones

Added Paths:
-----------
    trunk/pywikipedia/checkimages.py

Added: trunk/pywikipedia/checkimages.py
===================================================================

--- trunk/pywikipedia/checkimages.py	                        (rev 0)
+++ trunk/pywikipedia/checkimages.py	2007-11-27 15:01:26 UTC (rev 4602)
@@ -0,0 +1,922 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Script to check recently uploaded files. This script checks if a file
+description is present and if there is only a {{PD}} tag in the description.
+It will tag a file "no source" in the former case, and request the uploader
+to choose a more specific license in the latter case.
+
+This script will have to be configured for each language. Please submit
+translations as addition to the pywikipediabot framework.
+
+Everything that needs customisation is indicated by comments.
+
+This script understands the following command-line arguments:
+
+	-limit          - The number of images to check (default: 80)
+
+	-commons        - The Bot will check if an image on Commons has the same name
+					and if true it report the image.
+
+	-break	        - To break the bot after the first check (default: recursive)
+
+	-time[:#]	- Time in seconds between repeat runs (default: 30)
+
+	-skip[:#]	- The bot skip the first [:#] images (default: 0)
+
+	-start[:#]	- Use allpages() as generator (it starts already form Image:[:#])
+
+	-cat[:#]        - Use a category as generator
+
+	-regex[:#]      - Use regex, must be used with -url or -page
+
+	-page[:#]       - Define the name of the wikipage where are the images
+
+	-url[:#]	- Define the url where are the images
+
+	-untagged[:#]   - Use daniel's tool as generator (
http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php )
+
+---- Istructions for the real-time settings  ----
+* For every new block you have to add:
+
+<------- ------->
+
+In this way the Bot can understand where the block start to take the right parameter.
+
+* Name= Set the name of the block
+* Find= Use it to define what search in the text of the image's description,
+while Findonly= search only if the exactly text that you give is in the image's
description.
+* Summary= That's the summary that the bot will use when it will notify the problem.
+* Head= That's the incipit that the bot will use for the message.
+* Text= This is the template that the bot will use when it will report the image's
problem.
+
+---- Known issues/FIXMEs: ----
+* In repeat mode, skip images already checked. (critical for use on Commons - too many
uploads there)
+* Fix the "real-time" regex and function
+* Add the "catch the language" function for commons.
+* see /home/daniel/public_html/WikiSense/UntaggedImages.php
+* Add new documentation
+* Add a report for the image tagged.
+"""
+
+#
+# (C) Kyle/Orgullomoore, 2006-2007 (newimage.py)
+# (C) Siebrand Mazeland, 2007
+# (C) Filnik, 2007
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id: checkimages.py,v 1.0 2007/11/27 16:00:25 filnik Exp$'
+#
+
+import re, time, urllib2
+import wikipedia, config, os
+import cPickle, pagegenerators, catlib
+
+#########################################################################################################################
+# <------------------------------------------- Change only below!
----------------------------------------------------->#
+#########################################################################################################################
+
+# That's what you want that will be added. (i.e. the {{no source}} with the right
day/month/year )
+n_txt = {
+        'commons':'\n{{subst:nld}}',
+	'en'     :'\n{{subst:nld}}',
+	'it'     :'\n{{subst:unverdata}}',
+	}
+
+txt_find =  {
+	'commons':['{{no license', '{{nld'],
+        'en':['{{nld', '{{no license'],
+	'it':['{{unverdata', '{{unverified'],
+                }
+
+# Summary for when the will add the no source
+comm = {
+		'commons':'Bot: Marking newly uploaded untagged file',
+		'en'     :'Bot: Marking newly uploaded untagged file',
+		'it'     :"Bot: Aggiungo unverified",
+		}
+
+# Summary that the bot use when it notify the problem with the image's license
+comm2 = {
+		'commons':"Bot: Requesting source information." ,
+		'en'     :"Bot: Requesting source information." ,
+		'it'     :"Bot: Notifico l'unverified",
+		}
+
+# When the Bot find that the usertalk is empty is not pretty to put only the no source
without the welcome, isn't it?
+empty = {
+		'commons':'{{subst:welcome}}\n~~~~\n',
+		'en'     :'{{welcome}}\n~~~~\n',
+		'it'     :'{{benvenuto}}\n~~~~\n',
+		}
+
+# General summary
+unver = {
+		'commons':'Bot: no source',
+		'en'     :'Bot: no source',
+		'it'     :'Bot: Unverified!',
+		}
+
+# if the file has an unknown extension it will be tagged with this template.
+# In reality, there aren't unknown extension, they are only not allewed... ^__^
+delete_immediately = {
+					'commons':"{{db-meta|The file has .%s as extension.}}",
+					'en'     :"{{db-meta|The file has .%s as extension.}}",
+					'it'     :'{{cancella subito|motivo=Il file ha come estensione
".%s"}}',
+					}
+
+# The header of the Unknown extension's message.
+delete_immediately_head = {
+						'commons':"\n== Unknown extension! ==\n",
+						'en'     :"\n== Unknown extension! ==\n",
+						'it'     :'\n== File non specificato ==\n',
+						}
+
+# Text that will be add if the bot find a unknown extension.
+delete_immediately_notification = {
+						'commons':'The [[:Image:%s]] file has a wrong extension, please check.
~~~~',
+						'en'     :'The [[:Image:%s]] file has a wrong extension, please check.
~~~~',
+						'it'     :'{{subst:Utente:Filbot/Ext|%s}}',
+						}
+# Summary of the delate immediately. (f.e: Adding {{db-meta|The file has .%s as
extension.}})
+del_comm = {
+			'commons':'Bot: Adding %s',
+			'en'     :'Bot: Adding %s',
+			'it'     :'Bot: Aggiungo %s',
+			}
+
+# This is the most important header, because it will be used a lot. That's the header
that the bot
+# will add if the image hasn't the license.
+nothing_head = {
+				'commons':"",# Nothing, the template has already the header
inside.
+				'en'     :"\n== Image without license ==\n",
+				'it'     :"\n== Immagine senza licenza ==\n",
+				}
+# That's the text that the bot will add if it doesn't find the license.
+nothing_notification = {
+				'commons':"{{subst:User:Filnik/untagged|Image:%s}}Image:%s}}\n\n''This
message was '''added automatically by [[User:Filbot|Filbot]]''',
if you need some help about it, ask [[User:Filnik|its master]] or go to the [[Commons:Help
desk]]''. --~~~~",
+				'en'     :"{{subst:image source|Image:%s}} --~~~~",
+				'it'     :"{{subst:Utente:Filbot/Senza licenza|%s}} --~~~~",
+				}
+# This is a list of what bots used this script in your project.
+# NOTE: YOUR Botnick is automatically added. It's not required to add it twice.
+bot_list = {
+			'commons':['Siebot', 'CommonsDelinker'],
+			'en'     :['OrphanBot'],
+			'it'     :['Filbot', 'Nikbot', '.snoopyBot.'],
+			}
+
+# The message that the bot will add the second time that find another license problem.
+second_message_without_license = {
+				'commons':None,
+                                'en': None,
+				'it':'{{subst:Utente:Filbot/Senza licenza2|%s}} --~~~~',
+				}
+# You can add some settings to wikipedia. In this way, you can change them without touch
the code.
+# That's useful if you are running the bot on Toolserver.
+page_with_settings = {
+					'commons':None,
+                                        'en':None,
+					'it':'Utente:Nikbot/Settings#Settings',
+					}
+# The bot can report some images (like the images that have the same name of an image on
commons)
+# This is the page where the bot will store them.
+report_page = {
+				'commons':'User:Filbot/Report',
+                                'en'     :'User:Filnik/Report',
+				'it'     :'Utente:Nikbot/Report',
+				}
+# Adding the date after the signature. 
+timeselected = u' ~~~~~'
+# The text added in the report
+report_text = {
+			'commons':"\n*[[:Image:%s]] " + timeselected,
+			'en':"\n*[[:Image:%s]] " + timeselected,
+			'it':"\n*[[:Immagine:%s]] " + timeselected,
+			}
+# The summary of the report
+comm10 = {
+		'commons':'Bot: Updating the log',
+		'en':'Bot: Updating the log',
+		'it':'Bot: Aggiorno il log',
+		}
+
+# If a template isn't a license but it's included on a lot of images, that can be
skipped to
+# analise the image without taking care of it. (the template must be in a list)
+HiddenTemplate = {
+		'commons':['{{information'],
+		'en':['{{information'],
+		'it':['{{edp', '{{informazioni file',
'{{information'],
+		}
+
+# Add your project (in alphabetical order) if you want that the bot start
+project_inserted = ['commons', 'en', 'it']
+
+# Ok, that's all. What is below, is the rest of code, now the code is fixed and it
will run correctly in your project.
+#########################################################################################################################
+# <------------------------------------------- Change only above!
----------------------------------------------------->#
+#########################################################################################################################
+
+class LogIsFull(wikipedia.Error):
+	"""An exception indicating that the log is full and the Bot cannot add
other data to prevent Errors."""
+
+class NothingFound(wikipedia.Error):
+	""" An exception indicating that a regex has return [] instead of
results."""
+
+def pageText(url):
+	try:
+                request = urllib2.Request(url)
+                user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US;
rv:1.7.12) Gecko/20050915 Firefox/1.0.7'
+                request.add_header("User-Agent", user_agent)
+                response = urllib2.urlopen(request)
+                text = response.read()
+                response.close()
+                # When you load to many users, urllib2 can give this error.
+	except urllib2.HTTPError:
+		wikipedia.output(u"Server error. Pausing for 10 seconds... " +
time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime()) )
+		time.sleep(10)
+                request = urllib2.Request(url)
+                user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US;
rv:1.7.12) Gecko/20050915 Firefox/1.0.7'
+                request.add_header("User-Agent", user_agent)
+                response = urllib2.urlopen(request)
+                text = response.read()
+                response.close()
+	return text
+
+# Here there is the main class.
+class main:
+	def __init__(self, site):
+		self.site = site
+		self.logFulNumber = 25000
+	def general(self, newtext, image, notification, head, botolist):
+		""" This class can be called for two reason. So I need two different
__init__, one with common data
+			and another with the data that I required... maybe it can be added on the other
function, but in this way
+			seems more clear what parameters I need
+		"""
+		self.newtext = newtext
+		self.image = image
+		self.head = head
+		self.notification = notification
+		self.botolist = botolist
+	def put_mex(self, put = True):
+		# Adding no source. - I'm sure that the image exists, double check... but another
can't be useless.
+		try:
+                        testoa = p.get()
+		except wikipedia.NoPage:
+			wikipedia.output(u'%s has been deleted...' % p.title())
+		if put:
+			p.put(testoa + self.newtext, comment = commento, minorEdit = True)
+		# paginetta it's the image page object.
+		paginetta = wikipedia.ImagePage(self.site, image_namespace + self.image)
+		# I take the data of the latest uploader and I take only the name
+		imagedata = paginetta.getFileVersionHistory()
+		#print imagedata # Let it so for de-buggin porpuse (wikipedia.output gives error)
+		# When an Image is deleted from Commons and someone has add something in the wikipedia
page
+		# The bot doesn't catch the data properly :-)
+		if imagedata == list():
+			wikipedia.output(u"Seems that %s hasn't the image at all, but there is
something in the description..." % self.image)
+			repme = "\n*[[:Image:%s]] seems to have problems ('''no data found in
the image''')"
+			self.report_image(rep_page, self.image, com, repme)
+			# We have a problem! Report and exit!         
+			return False
+		try:
+			nick = paginetta.getFileVersionHistory()[-1][1]
+		except IndexError:
+			wikipedia.output(u"Seems that %s hasn't the image at all, but there is
something in the description..." % self.image)
+			repme = "\n*[[:Image:%s]] seems to have problems ('''no data found in
the image''')"
+			# We have a problem! Report and exit!
+			self.report_image(rep_page, self.image, com, repme)
+			return False
+		luser = wikipedia.url2link(nick, self.site, self.site)
+		pagina_discussione = self.site.namespace(3) + ':' + luser
+		# Defing the talk page (pagina_discussione = talk_page ^__^ )
+		talk_page = wikipedia.Page(self.site, pagina_discussione)
+		self.talk_page = talk_page
+		return True
+	# There is the function to put the advise in talk page.
+	def put_talk(self, notification, head, notification2 = None, commx = None):
+		talk_page = self.talk_page
+		notification = self.notification
+		if notification2 == None:
+			notification2 = notification
+		else:
+			notification2 = notification2 % self.image
+		head = self.head
+		second_text = False
+		# Getting the talk page's history, to check if there is another advise...
+		# The try block is used to prevent error if you use an old wikipedia.py's version.
+		edit_to_load = 10
+		if talk_page.exists():
+			try:
+				history = talk_page.getVersionHistory(False, False, False, edit_to_load)
+			except TypeError:
+				history = talk_page.getVersionHistory(False, False, False)
+			latest_edit = history[0]
+			latest_user = latest_edit[2]
+			wikipedia.output(u'The latest user that has written something is: ' +
latest_user)
+		else:
+			wikipedia.output(u'The user page is blank')
+
+		if talk_page.exists():
+			testoattuale = talk_page.get()
+			# Find out the list of Bots that add no source tags.
+			lang = config.mylang
+			# Standard language
+			self.lang = lang
+			project = config.family
+			bot = config.usernames[project]
+			botnick = bot[lang]
+			botolist = self.botolist + [botnick]
+			for i in botolist:
+				if latest_user == i:
+					second_text = True
+					# A block to prevent the second message if the bot also welcomed users...
+					if latest_edit == history[-1]:
+						second_text = False
+		else:
+			second_text = False
+			testoattuale = ti_es_ti
+		if commx == None:
+			commentox = commento2
+		else:
+			commentox = commx
+		if second_text == True:
+			talk_page.put(testoattuale + "\n\n:" + notification2, comment = commentox,
minorEdit = False)
+		elif second_text == False:
+			talk_page.put(testoattuale + head + notification, comment = commentox, minorEdit =
False)
+	def run_bot(self, textrun, rep_page, com):
+		# Search regular expression to find links like this (and the class attribute is
optional too)
+		# class="new"
title="Immagine:Soldatino2.jpg">Immagine:Soldatino2.jpg</a>" ‎
<span class="comment">
+		regexp = r'(class=\"new\" |)title=\"' + image_namespace +
'(.*?)\.(\w\w\w|jpeg)\">.*?</a>\".*?<span
class=\"comment\">'    
+		pos = 0
+		done = list()
+		ext_list = list()
+		r = re.compile(regexp, re.UNICODE)
+		while 1:
+			m = r.search(textrun, pos)
+			if m == None:
+				wikipedia.output(u"\t\t>> All images checked. <<")
+				break
+			pos = m.end()
+			new = m.group(1)
+			im = m.group(2)
+			ext = m.group(3)
+			# This prevent pages with strange characters. They will be loaded without problem.
+			image = im + "." + ext
+			if new != '':
+				wikipedia.output(u"Skipping %s because it has been deleted." % image)
+				done.append(image)
+			if image not in done:
+				done.append(image)
+				yield image
+				#continue
+
+	def untaggedGenerator(self, untaggedProject, rep_page, com):
+		lang = untaggedProject.split('.', 1)[0]
+		project = '.' + untaggedProject.split('.', 1)[1]
+		if lang == 'commons':
+			link =
'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=100&order=img_timestamp&format=html'
+		else:
+			link =
'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikilang=' + lang
+ '&wikifam=' + project + '&order=img_timestamp&max=' +
str(limit) + '&ofs=0&max=' + str(limit)         
+		text = pageText(link)
+		#print text
+		regexp = r"""<td valign='top' title='Name'><a
href='http://.*?\..*?\.org/w/index\.php\?title=(.*?)'>.*?</a&…
+		results = re.findall(regexp, text)
+		if results == []:
+                        print link
+			raise NothingFound('Nothing found! Try to use the tool by yourself to be sure that
it works!')
+		else:
+			for result in results:
+				yield wikipedia.Page(self.site, result)
+	
+	def regexGenerator(self, regexp, textrun):
+		pos = 0
+		done = list()
+		ext_list = list()
+		r = re.compile(r'%s' % regexp, re.UNICODE|re.M)
+		while 1:
+			m = r.search(textrun, pos)
+			if m == None:
+				wikipedia.output(u"\t\t>> All images checked. <<")
+				break
+			pos = m.end()
+			image = m.group(1)
+			if image not in done:
+				done.append(image)
+				yield image
+				#continue
+
+	def checkImage(self, image):
+		# Search regular expression to find links like this (and the class attribute is
optional too)
+		# title="Immagine:Nvidia.jpg"
+		wikipedia.output(u'Checking if %s is on commons...' % image)
+		commons = wikipedia.getSite('commons', 'commons') 
+		if wikipedia.Page(commons, u'Image:' + image).exists():
+			wikipedia.output(u'%s is on commons!' % image)
+			imagePage = wikipedia.ImagePage(self.site, 'Image:' + image)
+			on_commons_text = imagePage.getImagePageHtml()
+			if "<div class='sharedUploadNotice'>" in on_commons_text:
+				wikipedia.output(u"But, the image doesn't exist on your project!
Skip...")
+				# Problems? Yes! We have to skip the check part for that image!
+				# Because it's on commons but someone has added something on your project.
+				return False
+			elif 'stemma' in image.lower() and self.site.lang == 'it':
+				wikipedia.output(u'%s has "stemma" inside, means that it\'s
ok.' % image)
+				return False
+			else:            
+				repme = "\n*[[:Image:%s]] is also on '''Commons''':
[[commons:Image:%s]]"
+				self.report_image(rep_page, image, com, repme)
+				# Problems? No, return True
+				return True
+		else:
+			# Problems? No, return True
+			return True
+
+	def report_image(self, rep_page, image, com, rep):
+		another_page = wikipedia.Page(self.site, rep_page)
+		
+		if another_page.exists():      
+			text_get = another_page.get()
+		else:
+			text_get = str()
+		if len(text_get) >= self.logFulNumber:
+			raise LogIsFull("The log page (%s) is full! Please delete the old images
reported." % another_page.title())  
+		pos = 0
+		# The talk page includes "_" between the two names, in this way i replace
them to " "
+		regex = image
+		n = re.compile(regex, re.UNICODE)
+		y = n.search(text_get, pos)
+		if y == None:
+			# Adding the log :)
+			if "\'\'\'Commons\'\'\'" in rep:
+				rep_text = rep % (image, image)
+			else:
+				rep_text = rep % image
+			another_page.put(text_get + rep_text, comment = com, minorEdit = False)
+			wikipedia.output(u"...Reported...")
+			reported = True
+		else:
+			pos = y.end()
+			wikipedia.output(u"%s is already in the report page." % image)
+			reported = False
+		return reported
+	
+	def takesettings(self, settings):
+		pos = 0
+		x = wikipedia.Page(self.site, settings)
+		lista = list()
+		try:
+			testo = x.get()
+			rxp = "<-------
------->\n\*[Nn]ame=['\"](.*?)['\"]\n\*([Ff]ind|[Ff]indonly)=(.*?)\n\*[Ii]magechanges=(.*?)\n\*[Ss]ummary=['\"](.*?)['\"]\n\*[Hh]ead=['\"](.*?)['\"]\n\*[Tt]ext
?= ?['\"](.*?)['\"]\n\*[Mm]ex ?=
?['\"]?(.*?)['\"]?$"
+			r = re.compile(rxp, re.UNICODE|re.M)
+			number = 1
+			while 1:
+				m = r.search(testo, pos)
+				if m == None:
+					if lista == list():
+						wikipedia.output(u"You've set wrongly your settings, please take a look to
the relative page. (run without them)")
+						lista = None
+					else:
+						break
+				else:
+					pos = m.end()
+					name = str(m.group(1))
+					find_tipe = str(m.group(2))
+					find = str(m.group(3))
+					imagechanges = str(m.group(4))
+					summary = str(m.group(5))
+					head = str(m.group(6))
+					text = str(m.group(7))
+					mexcatched = str(m.group(8))
+					tupla = [number, name, find_tipe, find, imagechanges, summary, head, text,
mexcatched]
+					lista += [tupla]
+					number += 1
+		except wikipedia.NoPage:
+			lista = None
+		return lista
+	
+	def load(self, raw):
+		list_loaded = list()
+		pos = 0
+		load_2 = True
+		# I search with a regex how many user have not the talk page
+		# and i put them in a list (i find it more easy and secure)
+		while 1:
+			regl = "(\"|\')(.*?)(\"|\')(, |\])"
+			pl = re.compile(regl, re.UNICODE)
+			xl = pl.search(raw, pos)
+			if xl == None:
+				if len(list_loaded) >= 1:
+					return list_loaded
+					break
+				elif len(done) == 0:
+					break
+			pos = xl.end()
+			word = xl.group(2)
+			if word not in list_loaded:
+				list_loaded.append(word)  
+
+# I've seen that the report class before (the main) was to long to be called so,
+# here there is a function that has all the settings, so i can call it once ^__^
+def report(newtext, image, notification, head, notification2 = None, unver = True, commx
= None):
+	global botolist
+	while 1:
+		run = main(site = wikipedia.getSite())
+		secondrun = run.general(newtext, image, notification, head, botolist)
+		if unver == True:
+			try:
+				resPutMex = run.put_mex()
+			except wikipedia.NoPage:
+				wikipedia.output(u"The page has been deleted! Skip!")
+				break
+			except wikipedia.EditConflict:
+				wikipedia.output(u"Edit conflict! Skip!")
+				break
+			else:
+				if resPutMex == False:
+					break
+		else:
+			try:
+				resPutMex = run.put_mex(False)
+			except wikipedia.NoPage:
+				wikipedia.output(u"The page has been deleted!")
+				break
+			except wikipedia.EditConflict:
+				wikipedia.output(u"Edit conflict! Skip!")
+				break
+			else:
+				if resPutMex == False:
+					break
+		try:
+			run.put_talk(notification, head, notification2, commx)
+		except wikipedia.EditConflict:
+			wikipedia.output(u"Edit Conflict! Retrying...")
+			try:
+				run.put_talk(notification, head, notification2, commx)
+			except:
+				wikipedia.output(u"Another error... skipping the user..")
+				break
+		break
+
+# Here there is the main loop. I'll take all the (name of the) images and then
i'll check them.
+if __name__ == "__main__":
+	try:
+		# Command line configurable parameters
+		repeat = True
+		limit = 80
+		time_sleep = 30
+		skip_number = 0
+		commonsActive = False
+		normal = False
+		urlUsed = False
+		regexGen = False
+		untagged = False
+		
+		# Here below there are the parameters.
+		for arg in wikipedia.handleArgs():
+			if arg.startswith('-limit'):
+				if len(arg) == 7:
+					limit = int(wikipedia.input(u'How many images do you want to check?'))
+				else:
+					limit = int(arg[7:])
+			if arg.startswith('-time'):
+				if len(arg) == 5:
+					time_sleep = int(wikipedia.input(u'How many seconds do you want runs to be
apart?'))
+				else:
+					time_sleep = int(arg[6:])
+			elif arg == '-break':
+				repeat = False
+			elif arg == '-commons':
+				commonsActive = True
+			elif arg.startswith('-skip'):
+				if len(arg) == 5:
+					skip = True
+					skip_number = int(wikipedia.input(u'How many images do you want to skip?'))
+				elif len(arg) > 5:
+					skip = True
+					skip_number = int(arg[6:])
+			elif arg.startswith('-start'):
+				if len(arg) == 6:
+					firstPageTitle = str(wikipedia.input(u'From witch page do you want to
start?'))
+				elif len(arg) > 6:
+					firstPageTitle = str(arg[7:])
+				generator = wikipedia.getSite().allpages(start='Image:'+firstPageTitle)
+				repeat = False
+			elif arg.startswith('-page:'):
+				if len(arg) == 6:
+					regexPageName = str(wikipedia.input(u'Which page do you want to use for the
regex?'))
+				elif len(arg) > 6:
+					regexPageName = str(arg[6:])
+				repeat = False
+				regexGen = True
+			elif arg.startswith('-url:'):
+				if len(arg) == 5:
+					regexPageUrl = str(wikipedia.input(u'Which url do you want to use for the
regex?'))
+				elif len(arg) > 5:
+					regexPageUrl = str(arg[5:])
+				urlUsed = True
+				repeat = False
+				regexGen = True
+			elif arg.startswith('-regex:'):
+				if len(arg) == 7:
+					regexpToUse = str(wikipedia.input(u'Which regex do you want to use?'))
+				elif len(arg) > 7:
+					regexpToUse = str(arg[7:])
+				generator = 'regex'
+				repeat = False
+			elif arg.startswith('-cat'):
+				if len(arg) == 4:
+					catName = str(wikipedia.input(u'In which category do I work?'))
+				elif len(arg) > 4:
+					catName = str(arg[5:])
+				catSelected = catlib.Category(wikipedia.getSite(), 'Category:'+catName)
+				generator = pagegenerators.CategorizedPageGenerator(catSelected)
+				repeat = False
+			elif arg.startswith('-untagged'):
+				untagged = True    
+				if len(arg) == 9:
+					projectUntagged = str(wikipedia.input(u'In which project do I work?'))
+				elif len(arg) > 9:
+					projectUntagged = str(arg[10:])          
+
+		# Understand if the generator it's the default or not.
+		try:
+			generator
+		except NameError:
+			normal = True
+
+		# URL of the log of newimages. (http:/en.wikipedia.org/ will generated according to the
project... you won't see it in the url)
+		url =
"/w/index.php?title=Special:Log&type=upload&user=&page=&pattern=&limit=%d&offset=0"
% limit
+		
+		# Define the site.
+		site = wikipedia.getSite()
+
+		# In this way i find what language, project and what bot do you use.
+		lang = config.mylang
+		project = config.family
+
+		# Block of text to translate the parameters set above.
+		image_n = site.image_namespace()
+		image_namespace = image_n + ":"
+		unvertext = wikipedia.translate(site, n_txt)
+		commento = wikipedia.translate(site, comm)
+		commento2 = wikipedia.translate(site, comm2)
+		ti_es_ti = wikipedia.translate(site, empty)
+		unverf = wikipedia.translate(site, unver)
+		di = wikipedia.translate(site, delete_immediately)
+		dih = wikipedia.translate(site, delete_immediately_head)
+		din = wikipedia.translate(site, delete_immediately_notification)
+		nh = wikipedia.translate(site, nothing_head)
+		nn = wikipedia.translate(site, nothing_notification)
+		dels = wikipedia.translate(site, del_comm)
+		botolist = wikipedia.translate(site, bot_list)
+		smwl = wikipedia.translate(site, second_message_without_license)
+		settings = wikipedia.translate(site, page_with_settings)
+		rep_page = wikipedia.translate(site, report_page)
+		rep_text = wikipedia.translate(site, report_text)
+		com = wikipedia.translate(site, comm10)
+		TextFind = wikipedia.translate(site, txt_find)
+		hiddentemplate = wikipedia.translate(site, HiddenTemplate)
+
+		if skip_number == 0:
+			skip = False        
+		# nothing = Defining an empty image description
+		nothing = ['', ' ', '  ', '   ', '\n', '\n
', '\n  ', '\n\n', '\n \n', ' \n', ' \n ',
' \n \n']
+		# something = Minimal requirements for an image description.
+		#If this fits, no tagging will take place
+		something = ['{{', 'MIT']#, '}}']
+		# Unused file extensions. Does not contain PDF.
+		notallowed = ("xcf", "xls", "sxw", "sxi",
"sxc", "sxd", "djvu")
+
+		# A little block-statement to ensure that the bot will not start with en-parameters
+		if lang not in project_inserted:
+			wikipedia.output(u"Your project is not supported by this script. You have to edit
the script and add it!")
+			wikipedia.stopme()
+		
+		di = '\n' + di
+		dels = dels % di
+		
+		# Reading the log of the new images
+		if normal == True:
+                        if limit == 1:
+                                wikipedia.output(u"Retrieving the lastest file for
checking...")
+                        else:
+                                wikipedia.output(u"Retrieving the lastest %d files
for checking..." % limit)
+
+		while 1:
+			# If I use the standard way, I have to download the page to parse it.
+			if normal == True:
+				textrun = site.getUrl(url)
+				
+			mainClass = main(site)
+			if untagged == True:
+				generator =  mainClass.untaggedGenerator(projectUntagged, rep_page, com)
+				normal = False
+			if normal == True:
+				generator = mainClass.run_bot(textrun, rep_page, com)
+
+			if urlUsed == True and regexGen == True:
+				textRegex = pagetext(regexPageUrl)
+			elif regexGen == True:
+				pageRegex = wikipedia.Page(site, regexPageName)
+				try:
+					textRegex = pageRegex.get()
+				except wikipedia.NoPage:
+					wikipedia.output(u"%s doesn't exist!" % page.title())
+					textRegex = ''
+			if generator == 'regex' and regexGen == True:
+				generator = mainClass.regexGenerator(regexpToUse, textRegex)
+			try:
+				tupla_written = mainClass.takesettings(settings)
+			except wikipedia.Error:
+				wikipedia.output(u'Problems with loading the settigs, run without them.')
+				tupla_written = None
+				some_problem = False
+			if tupla_written != None:
+				wikipedia.output(u'\t   >> Loaded the real-time page... <<')
+				filename = "settings.data"
+				f = file(filename, 'w')
+				cPickle.dump(tupla_written, f)
+				f.close()
+			else:
+				wikipedia.output(u"No additional settings found!")
+			if skip == True:
+				skip_list = list()
+				wikipedia.output(u'Skipping the first ' + str(skip_number) + u'
images:\n')
+			else:
+				wikipedia.output(u'\t\t>> No images to skip...<<')
+			skipok = False                                
+			for image in generator:
+				if normal == False and regexGen == False:
+					if image_namespace not in image.title():
+						continue
+					image = image.title().split(image_namespace)[1]
+				elif regexGen == True:
+					image = image.split(image_namespace)[1]
+				if skip == True:
+					if len(skip_list) < skip_number:
+						wikipedia.output(u'Skipping %s...' % image)
+						skip_list.append(image)
+						continue
+					else:
+						if skipok == False:
+							wikipedia.output('')
+						skipok = True
+				if commonsActive == True:
+					response = mainClass.checkImage(image)
+					if response == False:
+						continue
+				if tupla_written != None:
+					f = file(filename)
+					tuplaList = cPickle.load(f)
+				parentesi = False
+				delete = False
+				tagged = False
+				extension = image.split('.')[-1]
+				page = image_namespace + image
+				p = wikipedia.ImagePage(site, page)
+				# Skip deleted images
+				try:
+					g = p.get()
+				except wikipedia.NoPage:
+					wikipedia.output(u"Skipping %s because it has been deleted." % image)
+					continue
+				except wikipedia.IsRedirectPage:
+					wikipedia.output(u"The file description for %s is a redirect?!" % image )
+					continue            
+				for l in hiddentemplate:
+					if l.lower() in g.lower():
+						wikipedia.output(u'A white template found, skipping the template...')
+						#whiteTemplate = True
+						#final_text = g
+						g = g.lower().replace(l, '')
+						#print g
+				for a_word in something:
+					if a_word in g:
+						parentesi = True
+				for parl in notallowed:
+					if parl.lower() in extension.lower():
+						delete = True
+				for i in TextFind:
+					if i.lower() in g:
+						tagged = True
+				some_problem = False
+				if tupla_written != None:                 
+					for tupla in tuplaList:
+						name = tupla[1]
+						find_tipe = tupla[2]
+						find = tupla[3]
+						find_list = mainClass.load(find)
+						imagechanges = tupla[4]
+						if imagechanges.lower() == 'false':
+							imagestatus = False
+						elif imagechanges.lower() == 'true':
+							imagestatus = True
+						else:
+							wikipedia.output(u"Error! Imagechanges set wrongly!")
+							tupla_written = None
+							break
+						summary = tupla[5]
+						head_2 = tupla[6]
+						text = tupla[7]
+						text = text % image
+						mexCatched = tupla[8]
+						wikipedia.setAction(summary)
+						del tupla[0:8]
+						for k in find_list:
+							if find_tipe.lower() == 'findonly':
+								if k.lower() == g.lower():
+									some_problem = True
+									text_used = text
+									head_used = head_2
+									imagestatus_used = imagestatus
+									name_used = name
+									summary_used = summary
+									mex_used = mexCatched                                    
+									break
+							elif find_tipe.lower() == 'find':
+								if k.lower() in g.lower():
+									some_problem = True
+									text_used = text
+									head_used = head_2
+									imagestatus_used = imagestatus
+									name_used = name
+									summary_used = summary
+									mex_used = mexCatched
+									continue
+				if p.exists():
+					# Here there is the checkin ^^
+					if tagged == True:
+						wikipedia.output(image + u' is already tagged... ' +
time.strftime("%H:%M:%S", time.localtime()))
+						continue
+					if some_problem == True:
+						if mex_used in g:
+							wikipedia.output(u'Image already fixed. Skip.')
+							continue
+						wikipedia.output(u"The image description for %s contains %s..." % (image,
name_used))
+						if mex_used.lower() == 'default':
+							mex_used = unvertext
+						if imagestatus_used == False:
+							reported = mainClass.report_image(rep_page, image, com, rep_text)
+						else:
+							reported = True
+						if reported == True:
+							#if imagestatus_used == True:
+							report(mex_used, image, text_used, "\n" + head_used + "\n",
None, imagestatus_used, summary_used)
+						else:
+							wikipedia.output(u"Skipping the image...")
+						some_problem = False
+						continue
+					elif parentesi == True:
+						wikipedia.output(image + u" seems ok, " +
time.strftime("%H:%M:%S", time.localtime()))
+						# It works also without this... but i want only to be sure ^^
+						parentesi = False
+						continue
+					elif delete == True:
+						wikipedia.output(u"%s is not a file!" % image)
+						# Modify summary text
+						wikipedia.setAction(dels)
+						canctext = di % extension
+						notification = din % image
+						head = dih
+						report(canctext, image, notification, head)
+						delete = False
+						continue
+					elif g in nothing:
+						wikipedia.output(u"The image description for %s does not contain a license
template!" % image)
+						if lang == 'commons':
+							head = nh % image
+						else:
+							head = nh 
+						if lang == 'commons':
+							notification = nn
+						else:
+							notification = nn % image
+						report(unvertext, image, notification, head, smwl)
+						continue
+					else:
+						wikipedia.output(u"%s has only text and not the specific license..." %
image)
+						if lang == 'commons':
+							head = nh % image
+						else:
+							head = nh 
+						if lang == 'commons':
+							notification = nn
+						else:
+							notification = nn % image
+						report(unvertext, image, notification, head, smwl)
+						continue
+		# A little block to perform the repeat or to break.
+			if repeat == True:
+				wikipedia.output(u"Waiting for " + str(time_sleep) + u" seconds,
" + time.strftime("%d %b %Y %H:%M:%S (UTC)", time.localtime()) )
+				time.sleep(time_sleep)
+			elif repeat == False:
+				wikipedia.output(u"\t\t\t>> STOP! <<")
+				wikipedia.stopme()
+				break
+	except wikipedia.BadTitle:
+		wikipedia.output(u"Wikidown or server's problem, quit")
+		wikipedia.stopme()
+	finally:
+		wikipedia.stopme()