[Pywikipedia-l] SVN: [5369] trunk/pywikipedia/checkimages.py
filnik at svn.wikimedia.org
filnik at svn.wikimedia.org
Wed May 14 13:07:37 UTC 2008
Revision: 5369
Author: filnik
Date: 2008-05-14 13:07:37 +0000 (Wed, 14 May 2008)
Log Message:
-----------
Updating the dupe-function
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2008-05-14 11:33:04 UTC (rev 5368)
+++ trunk/pywikipedia/checkimages.py 2008-05-14 13:07:37 UTC (rev 5369)
@@ -59,10 +59,14 @@
* Text= This is the template that the bot will use when it will report the image's problem.
---- Known issues/FIXMEs: ----
+* Clean the code, some passages are pretty difficult to understand if you're not the coder.
* Fix the "real-time" regex and function
* Add the "catch the language" function for commons.
* Fix and reorganise the new documentation
* Add a report for the image tagged.
+* Duplicates: check the usage, find out which image is most usued and "delete" the other ones.
+* -> if the other ones are used, advise it in the message!
+
"""
#
@@ -322,9 +326,9 @@
}
# Stub - will make it better in future, work in progress.
duplicatesText = {
- 'commons':u'\n{{Dupe|Image:__image__}}',
+ 'commons':u'\n{{Dupe|__image__}}',
'en':None,
- 'it':u'\n{{Cancella subito|Immagine doppia di [[:Immagine:__image__]]}}',
+ 'it':u'\n{{Cancella subito|Immagine doppia di [[:__image__]]}}',
}
duplicate_user_talk_head = {
'commons':None,
@@ -336,11 +340,16 @@
'it':u"{{subst:Utente:Filbot/duplicati|%s|%s}} --~~~~",
}
-duplicates_comment = {
+duplicates_comment_talk = {
'commons':u'Bot: Dupe image found',
'en':None,
- 'it':u'Bot: Trovata immagine doppia',
+ 'it':u"Bot: Notifico l'immagine doppia trovata",
}
+duplicates_comment_image = {
+ 'commons':u'Bot: Tagging dupe image',
+ 'en':None,
+ 'it':u'Bot: Immagine doppia, da cancellare',
+ }
duplicatesRegex = {
'commons':r'\{\{(?:[Tt]emplate:|)[Dd]upe[|}]',
'en':None,
@@ -415,6 +424,10 @@
def returnOlderTime(listGiven, timeListGiven):
""" Get some time and return the oldest of them """
+ #print listGiven; print timeListGiven
+ #Output:
+ #[[1210596312.0, u'Autoritratto.png'], [1210590240.0, u'Duplicato.png'], [1210592052.0, u'Duplicato_2.png']]
+ #[1210596312.0, 1210590240.0, 1210592052.0]
for element in listGiven:
time = element[0]
imageName = element[1]
@@ -494,7 +507,8 @@
self.duplicatesReport = duplicatesReport
image_n = self.site.image_namespace()
self.image_namespace = "%s:" % image_n # Example: "User_talk:"
- def report(self, newtext, image, notification = None, head = None, notification2 = None, unver = True, commx = None):
+ def report(self, newtext, image, notification = None, head = None,
+ notification2 = None, unver = True, commTalk = None, commImage = None):
""" Function to make the reports easier (or I hope so). """
# Defining some useful variable for next...
self.newtext = newtext
@@ -506,12 +520,16 @@
self.notification = re.sub('__botnick__', self.botnick, notification)
if self.notification2 != None:
self.notification2 = re.sub('__botnick__', self.botnick, notification2)
- self.commx = commx
+ self.commTalk = commTalk
+ if commImage == None:
+ self.commImage = self.commento
+ else:
+ self.commImage = commImage
# Ok, done, let's loop.
while 1:
if unver == True:
try:
- resPutMex = self.put_mex()
+ resPutMex = self.tag_image()
except wikipedia.NoPage:
wikipedia.output(u"The page has been deleted! Skip!")
break
@@ -523,7 +541,7 @@
break
else:
try:
- resPutMex = self.put_mex(False)
+ resPutMex = self.tag_image(False)
except wikipedia.NoPage:
wikipedia.output(u"The page has been deleted!")
break
@@ -535,11 +553,11 @@
break
if self.notification != None and self.head != None:
try:
- self.put_talk()
+ self.put_mex_in_talk()
except wikipedia.EditConflict:
wikipedia.output(u"Edit Conflict! Retrying...")
try:
- self.put_talk()
+ self.put_mex_in_talk()
except:
wikipedia.output(u"Another error... skipping the user..")
break
@@ -548,7 +566,7 @@
else:
break
- def put_mex(self, put = True):
+ def tag_image(self, put = True):
""" Function to add the template in the image and to find out
who's the user that has uploaded the image. """
# Defing the image's Page Object
@@ -563,7 +581,7 @@
# You can use this function also to find only the user that
# has upload the image (FixME: Rewrite a bit this part)
if put:
- p.put(testoa + self.newtext, comment = self.commento, minorEdit = True)
+ p.put(testoa + self.newtext, comment = self.commImage, minorEdit = True)
# paginetta it's the image page object.
paginetta = wikipedia.ImagePage(self.site, self.image_namespace + self.image)
# I take the data of the latest uploader and I take only the name
@@ -592,7 +610,7 @@
self.talk_page = talk_page
self.luser = luser
return True
- def put_talk(self):
+ def put_mex_in_talk(self):
""" Function to put the warning in talk page of the uploader."""
commento2 = wikipedia.translate(self.site, comm2)
emailPageName = wikipedia.translate(self.site, emailPageWithText)
@@ -638,10 +656,10 @@
second_text = False
ti_es_ti = wikipedia.translate(self.site, empty)
testoattuale = ti_es_ti
- if self.commx == None:
+ if self.commTalk == None:
commentox = commento2
else:
- commentox = self.commx
+ commentox = self.commTalk
if second_text == True:
self.talk_page.put("%s\n\n%s" % (testoattuale, self.notification2), comment = commentox, minorEdit = False)
elif second_text == False:
@@ -731,12 +749,12 @@
def checkImageDuplicated(self, image):
""" Function to check the duplicated images. """
# {{Dupe|Image:Blanche_Montel.jpg}}
- # report(unvertext, imageName, notification, head)
dupText = wikipedia.translate(self.site, duplicatesText)
dupRegex = wikipedia.translate(self.site, duplicatesRegex)
dupTalkHead = wikipedia.translate(self.site, duplicate_user_talk_head)
dupTalkText = wikipedia.translate(self.site, duplicates_user_talk_text)
- dupComment = wikipedia.translate(self.site, duplicates_comment)
+ dupComment_talk = wikipedia.translate(self.site, duplicates_comment_talk)
+ dupComment_image = wikipedia.translate(self.site, duplicates_comment_image)
self.image = image
duplicateRegex = r'\n\*(?:\[\[:Image:%s\]\] has the following duplicates:|\*\[\[:Image:%s\]\])$' % (self.convert_to_url(self.image), self.convert_to_url(self.image))
@@ -774,17 +792,20 @@
for duplicate in duplicates:
DupePage = wikipedia.ImagePage(self.site, u'Image:%s' % duplicate)
imagedata = DupePage.getFileVersionHistory()[-1][0]
- # Example: 21:15, 5 ott 2005
try:
+ # Example: 21:15, 5 ott 2005
data = time.strptime(imagedata, "%H:%M, %d %b %Y")
except ValueError:
+ # Example: 21:15, 5 Ottobre 2005
data = time.strptime(imagedata, "%H:%M, %d %B %Y")
data_seconds = time.mktime(data)
- time_image_list.append([data_seconds, self.image])
+ time_image_list.append([data_seconds, duplicate])
time_list.append(data_seconds)
older_image = returnOlderTime(time_image_list, time_list)
# And if the images are more than two?
Page_oder_image = wikipedia.ImagePage(self.site, u'Image:%s' % older_image)
+ string = ''
+ images_to_tag_list = []
for duplicate in duplicates:
if wikipedia.ImagePage(self.site, u'%s:%s' % (self.image_namespace, duplicate)) == \
wikipedia.ImagePage(self.site, u'%s:%s' % (self.image_namespace, older_image)):
@@ -796,12 +817,24 @@
except wikipedia.NoPage:
continue # The page doesn't exists
if re.findall(dupRegex, DupPageText) == [] and re.findall(dupRegex, older_page_text) == []:
- wikipedia.output(u'Adding the duplicate template in the image...')
- self.report(re.sub(r'__image__', r'%s' % older_image, dupText), duplicate,
- dupTalkText % (duplicate, older_image), dupTalkHead, commx = dupComment, unver = True)
+ wikipedia.output(u'%s is a duplicate and has to be tagged...' % duplicate)
+ images_to_tag_list.append(duplicate)
+ if duplicate != duplicates[-1]:
+ string += "[[:%s%s]], " % (self.image_namespace, duplicate)
+ else:
+ string += "[[:%s%s]]" % (self.image_namespace, duplicate)
else:
wikipedia.output(u"Already put the dupe-template in the image's page or in the dupe's page. Skip.")
- break
+ return True # Ok - No problem. Let's continue the checking phase
+ older_image_ns = '%s%s' % (self.image_namespace, older_image) # adding the namespace
+ if len(images_to_tag_list) > 1:
+ for image_to_tag in images_to_tag_list[:-1]:
+ self.report(re.sub(r'__image__', r'%s' % older_image_ns, dupText), image_to_tag,
+ commImage = dupComment_image, unver = True)
+ if len(images_to_tag_list) != 0:
+ self.report(re.sub(r'__image__', r'%s' % older_image_ns, dupText), images_to_tag_list[-1],
+ dupTalkText % (older_image_ns, string), dupTalkHead, commTalk = dupComment_talk,
+ commImage = dupComment_image, unver = True)
return True # Ok - No problem. Let's continue the checking phase
def report_image(self, image, rep_page = None, com = None, rep_text = None, addings = True, regex = None):
More information about the Pywikipedia-l
mailing list