[Pywikipedia-l] SVN: [5989] trunk/pywikipedia/checkimages.py
filnik at svn.wikimedia.org
filnik at svn.wikimedia.org
Sat Oct 18 12:39:26 UTC 2008
Revision: 5989
Author: filnik
Date: 2008-10-18 12:39:26 +0000 (Sat, 18 Oct 2008)
Log Message:
-----------
Some minor changes, rewriting, adding comments somewhere..
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2008-10-18 12:01:49 UTC (rev 5988)
+++ trunk/pywikipedia/checkimages.py 2008-10-18 12:39:26 UTC (rev 5989)
@@ -362,49 +362,58 @@
# Note: every __botnick__ will be repleaced with your bot's nickname (feel free not to use if you don't need it)
HiddenTemplateNotification = {
'commons': u"""\n{{subst:User:Filnik/whitetemplate|Image:%s}}\n\n''This message was '''added automatically by [[User:__botnick__|__botnick__]]''', if you need some help about it, ask its master (~~~) or go to the [[Commons:Help desk]]''. --~~~~""",
- 'de': None,
- 'en': None,
- 'it': u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Template_insufficiente|%s|__botnick__}} --~~~~",
- 'ko': u"\n{{subst:User:김우진1/BotRFL|%s}} --~~~~",
- 'ta': None,
+ 'de' : None,
+ 'en' : None,
+ 'it' : u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Template_insufficiente|%s|__botnick__}} --~~~~",
+ 'ko' : u"\n{{subst:User:김우진1/BotRFL|%s}} --~~~~",
+ 'ta' : None,
}
-# Stub - will make it better in future, work in progress.
+
+# In this part there are the parameters for the dupe images.
+
+# Put here the template that you want to put in the image to warn that it's a dupe
# put __image__ if you want only one image, __images__ if you want the whole list
duplicatesText = {
- 'commons':u'\n{{Dupe|__image__}}',
- 'en':None,
- 'it':u'\n{{Progetto:Coordinamento/Immagini/Bot/Template duplicati|__images__}}',
- 'ko':'분류:그림 저작권 틀',
+ 'commons': u'\n{{Dupe|__image__}}',
+ 'en' : None,
+ 'it' : u'\n{{Progetto:Coordinamento/Immagini/Bot/Template duplicati|__images__}}',
+ 'ko' :'분류:그림 저작권 틀',
}
+# Head of the message given to the author
duplicate_user_talk_head = {
- 'commons':None,
- 'it': u'\n\n== Immagine doppia ==\n',
+ 'commons': None,
+ 'en' : None,
+ 'it' : u'\n\n== Immagine doppia ==\n',
}
+# Message to put in the talk
duplicates_user_talk_text = {
- 'commons':u'{{subst:User:Filnik/duplicates|Image:%s|Image:%s}}',
- 'en':None,
- 'it':u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Duplicati|%s|%s|__botnick__}} --~~~~",
+ 'commons': u'{{subst:User:Filnik/duplicates|Image:%s|Image:%s}}',
+ 'en' : None,
+ 'it' : u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Duplicati|%s|%s|__botnick__}} --~~~~",
}
-
+# Comment used by the bot while it reports the problem in the uploader's talk
duplicates_comment_talk = {
- 'commons':u'Bot: Dupe image found',
- 'en':None,
- 'it':u"Bot: Notifico l'immagine doppia trovata",
+ 'commons': u'Bot: Dupe image found',
+ 'en' : None,
+ 'it' : u"Bot: Notifico l'immagine doppia trovata",
}
+# Comment used by the bot while it reports the problem in the image
duplicates_comment_image = {
- 'commons':u'Bot: Tagging dupe image',
- 'en':None,
- 'it':u'Bot: Immagine doppia, da cancellare',
+ 'commons': u'Bot: Tagging dupe image',
+ 'en' : None,
+ 'it' : u'Bot: Immagine doppia, da cancellare',
}
+# Regex to detect the template put in the image's decription to find the dupe
duplicatesRegex = {
- 'commons':r'\{\{(?:[Tt]emplate:|)[Dd]upe[|}]',
- 'en':None,
- 'it':r'\{\{(?:[Tt]emplate:|)[Pp]rogetto:[Cc]oordinamento/Immagini/Bot/Template duplicati[|}]',
+ 'commons': r'\{\{(?:[Tt]emplate:|)[Dd]upe[|}]',
+ 'en' : None,
+ 'it' : r'\{\{(?:[Tt]emplate:|)[Pp]rogetto:[Cc]oordinamento/Immagini/Bot/Template duplicati[|}]',
}
-
+# Category with the licenses and / or with subcategories with the other licenses.
category_with_licenses = {
- 'commons':'Category:License tags',
- 'it':'Categoria:Template Licenze copyright',
+ 'commons': 'Category:License tags',
+ 'en' : None,
+ 'it' : 'Categoria:Template Licenze copyright',
}
## Put None if you don't use this option or simply add nothing if en
@@ -521,7 +530,7 @@
self.skip_list = list() # Inizialize the skip list used below
self.duplicatesReport = duplicatesReport
image_n = self.site.image_namespace()
- self.image_namespace = "%s:" % image_n # Example: "Image:"
+ self.image_namespace = u"%s:" % image_n # Example: "Image:"
# Load the licenses only once, so do it once
self.smartdetection = smartdetection
if self.smartdetection:
@@ -530,7 +539,7 @@
""" Function to set parameters, now only image but maybe it can be used for others in "future" """
self.imageName = imageName
# Defing the image's Page Object
- self.image = wikipedia.ImagePage(self.site, '%s%s' % (self.image_namespace, self.imageName))
+ self.image = wikipedia.ImagePage(self.site, u'%s%s' % (self.image_namespace, self.imageName))
def report(self, newtext, image_to_report, notification = None, head = None,
notification2 = None, unver = True, commTalk = None, commImage = None):
""" Function to make the reports easier. """
@@ -541,9 +550,9 @@
self.notification = notification
self.notification2 = notification2
if self.notification != None:
- self.notification = re.sub('__botnick__', self.botnick, notification)
+ self.notification = re.sub(r'__botnick__', self.botnick, notification)
if self.notification2 != None:
- self.notification2 = re.sub('__botnick__', self.botnick, notification2)
+ self.notification2 = re.sub(r'__botnick__', self.botnick, notification2)
self.commTalk = commTalk
if commImage == None:
self.commImage = self.commento
@@ -611,12 +620,12 @@
nick = reportPageObject.getLatestUploader()[0]
except wikipedia.NoPage:
wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image_to_report)
- repme = "\n*[[:Image:%s]] problems '''with the APIs'''"
+ repme = u"\n*[[:Image:%s]] problems '''with the APIs'''"
# We have a problem! Report and exit!
self.report_image(self.image_to_report, self.rep_page, self.com, repme)
return False
luser = wikipedia.url2link(nick, self.site, self.site)
- talk_page = wikipedia.Page(self.site, "%s:%s" % (self.site.namespace(3), luser))
+ talk_page = wikipedia.Page(self.site, u"%s:%s" % (self.site.namespace(3), luser))
self.talk_page = talk_page
self.luser = luser
return True
@@ -667,7 +676,7 @@
else:
commentox = self.commTalk
if second_text == True:
- self.talk_page.put("%s\n\n%s" % (testoattuale, self.notification2), comment = commentox, minorEdit = False)
+ self.talk_page.put(u"%s\n\n%s" % (testoattuale, self.notification2), comment = commentox, minorEdit = False)
elif second_text == False:
self.talk_page.put(testoattuale + self.head + self.notification, comment = commentox, minorEdit = False)
if emailPageName != None and emailSubj != None:
@@ -677,7 +686,7 @@
except (wikipedia.NoPage, wikipedia.IsRedirectPage):
return # Exit
if self.sendemailActive:
- text_to_send = re.sub(r'__user-nickname__', '%s' % self.luser, emailText)
+ text_to_send = re.sub(r'__user-nickname__', r'%s' % self.luser, emailText)
emailClass = EmailSender(self.site, self.luser)
emailClass.send(emailSubj, text_to_send)
@@ -694,7 +703,7 @@
results = re.findall(regexp, text)
if results == []:
wikipedia.output(link)
- raise NothingFound('Nothing found! Try to use the tool by yourself to be sure that it works!')
+ raise NothingFound(u'Nothing found! Try to use the tool by yourself to be sure that it works!')
else:
for result in results:
wikiPage = wikipedia.Page(self.site, result)
@@ -721,10 +730,10 @@
def loadHiddenTemplates(self):
""" Function to load the white templates """
# A template as {{en is not a license! Adding also them in the whitelist template...
- for langK in wikipedia.Family('wikipedia').langs.keys():
- self.hiddentemplate.append('%s' % langK)
+ for langK in wikipedia.Family(u'wikipedia').langs.keys():
+ self.hiddentemplate.append(u'%s' % langK)
# The template #if: and #switch: aren't something to care about
- self.hiddentemplate.extend(['#if:', '#switch:'])
+ self.hiddentemplate.extend([u'#if:', u'#switch:'])
# Hidden template loading
if self.pageHidden != None:
try:
@@ -746,7 +755,7 @@
max_usage = 0
for element in listGiven:
imageName = element[1]
- imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % imageName)
+ imagePage = wikipedia.ImagePage(self.site, u'Image:%s' % imageName)
imageUsage = [page for page in imagePage.usingPages()]
if len(imageUsage) > 0 and len(imageUsage) > max_usage:
max_usage = len(imageUsage)
@@ -768,7 +777,7 @@
def convert_to_url(self, page):
# Function stolen from wikipedia.py
"""The name of the page this Page refers to, in a form suitable for the URL of the page."""
- title = page.replace(" ", "_")
+ title = page.replace(u" ", u"_")
encodedTitle = title.encode(self.site.encoding())
return urllib.quote(encodedTitle)
@@ -792,7 +801,7 @@
wikipedia.output(u'Checking if %s is on commons...' % self.imageName)
commons_site = wikipedia.getSite('commons', 'commons')
regexOnCommons = r"\n\*\[\[:Image:%s\]\] is also on '''Commons''': \[\[commons:Image:.*?\]\](?: \(same name\)|)$" % self.imageName
- imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % self.imageName)
+ imagePage = wikipedia.ImagePage(self.site, u'Image:%s' % self.imageName)
hash_found = imagePage.getHash()
if hash_found == None:
return False # Problems? Yes! Image deleted, no hash found. Skip the image.
@@ -800,9 +809,9 @@
commons_image_with_this_hash = commons_site.getImagesFromAnHash(hash_found)
if commons_image_with_this_hash != []:
wikipedia.output(u'%s is on commons!' % self.imageName)
- imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % self.imageName)
+ imagePage = wikipedia.ImagePage(self.site, u'Image:%s' % self.imageName)
on_commons_text = imagePage.getImagePageHtml()
- if "<div class='sharedUploadNotice'>" in on_commons_text:
+ if u"<div class='sharedUploadNotice'>" in on_commons_text:
wikipedia.output(u"But, the image doesn't exist on your project! Skip...")
# Problems? Yes! We have to skip the check part for that image!
# Because it's on commons but someone has added something on your project.
@@ -813,9 +822,9 @@
else:
# the second usually is a url or something like that. Compare the two in equal way, both url.
if self.convert_to_url(self.imageName) == self.convert_to_url(commons_image_with_this_hash[0]):
- repme = "\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]] (same name)" % (self.imageName, commons_image_with_this_hash[0])
+ repme = u"\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]] (same name)" % (self.imageName, commons_image_with_this_hash[0])
else:
- repme = "\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]]" % (self.imageName, commons_image_with_this_hash[0])
+ repme = u"\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]]" % (self.imageName, commons_image_with_this_hash[0])
self.report_image(self.imageName, self.rep_page, self.com, repme, addings = False, regex = regexOnCommons)
# Problems? No, return True
return True
@@ -836,7 +845,7 @@
dupComment_talk = wikipedia.translate(self.site, duplicates_comment_talk)
dupComment_image = wikipedia.translate(self.site, duplicates_comment_image)
duplicateRegex = r'\n\*(?:\[\[:Image:%s\]\] has the following duplicates(?: \(\'\'\'forced mode\'\'\'\)|):|\*\[\[:Image:%s\]\])$' % (self.convert_to_url(self.imageName), self.convert_to_url(self.imageName))
- imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % self.imageName)
+ imagePage = wikipedia.ImagePage(self.site, u'Image:%s' % self.imageName)
hash_found = imagePage.getHash()
duplicates = self.site.getImagesFromAnHash(hash_found)
if duplicates == None:
@@ -853,7 +862,7 @@
DupePage = wikipedia.ImagePage(self.site, u'Image:%s' % duplicate)
imagedata = DupePage.getLatestUploader()[1]
# '2008-06-18T08:04:29Z'
- data = time.strptime(imagedata, "%Y-%m-%dT%H:%M:%SZ")
+ data = time.strptime(imagedata, u"%Y-%m-%dT%H:%M:%SZ")
data_seconds = time.mktime(data)
time_image_list.append([data_seconds, duplicate])
time_list.append(data_seconds)
@@ -876,24 +885,24 @@
wikipedia.output(u'%s is a duplicate and has to be tagged...' % duplicate)
images_to_tag_list.append(duplicate)
#if duplicate != duplicates[-1]:
- string += "*[[:%s%s]]\n" % (self.image_namespace, duplicate)
+ string += u"*[[:%s%s]]\n" % (self.image_namespace, duplicate)
#else:
# string += "*[[:%s%s]]" % (self.image_namespace, duplicate)
else:
wikipedia.output(u"Already put the dupe-template in the image's page or in the dupe's page. Skip.")
return True # Ok - No problem. Let's continue the checking phase
- older_image_ns = '%s%s' % (self.image_namespace, older_image) # adding the namespace
+ older_image_ns = u'%s%s' % (self.image_namespace, older_image) # adding the namespace
only_report = False # true if the image are not to be tagged as dupes
# put only one image or the whole list according to the request
- if '__images__' in dupText:
+ if u'__images__' in dupText:
text_for_the_report = re.sub(r'__images__', r'\n%s*[[:%s]]\n' % (string, older_image_ns), dupText)
else:
text_for_the_report = re.sub(r'__image__', r'%s' % older_image_ns, dupText)
# Two iteration: report the "problem" to the user only once (the last)
if len(images_to_tag_list) > 1:
for image_to_tag in images_to_tag_list[:-1]:
- already_reported_in_past = self.countEdits('Image:%s' % image_to_tag, self.botolist)
+ already_reported_in_past = self.countEdits(u'Image:%s' % image_to_tag, self.botolist)
# if you want only one edit, the edit found should be more than 0 -> num - 1
if already_reported_in_past > duplicates_rollback - 1:
only_report = True
@@ -903,7 +912,7 @@
self.report(text_for_the_report, image_to_tag,
commImage = dupComment_image, unver = True)
if len(images_to_tag_list) != 0 and not only_report:
- already_reported_in_past = self.countEdits('Image:%s' % images_to_tag_list[-1], self.botolist)
+ already_reported_in_past = self.countEdits(u'Image:%s' % images_to_tag_list[-1], self.botolist)
# It's a regex, we need to fix the name in order to make it regex-compatible.
replaces_to_perform = [[' ', '_'], ['(', '\('], [')', '\)'], ['.', '\.'], ['[', '\['], [']', '\]'],
['{', '\{'], ['}', '\}']]
@@ -921,13 +930,13 @@
commImage = dupComment_image, unver = True)
if self.duplicatesReport or only_report:
if only_report:
- repme = "\n*[[:Image:%s]] has the following duplicates ('''forced mode'''):" % self.convert_to_url(self.imageName)
+ repme = u"\n*[[:Image:%s]] has the following duplicates ('''forced mode'''):" % self.convert_to_url(self.imageName)
else:
- repme = "\n*[[:Image:%s]] has the following duplicates:" % self.convert_to_url(self.imageName)
+ repme = u"\n*[[:Image:%s]] has the following duplicates:" % self.convert_to_url(self.imageName)
for duplicate in duplicates:
if self.convert_to_url(duplicate) == self.convert_to_url(self.imageName):
continue # the image itself, not report also this as duplicate
- repme += "\n**[[:Image:%s]]" % self.convert_to_url(duplicate)
+ repme += u"\n**[[:Image:%s]]" % self.convert_to_url(duplicate)
result = self.report_image(self.imageName, self.rep_page, self.com, repme, addings = False, regex = duplicateRegex)
if not result:
return True # If Errors, exit (but continue the check)
@@ -949,7 +958,7 @@
except wikipedia.IsRedirectPage:
text_get = another_page.getRedirectTarget().get()
if len(text_get) >= self.logFulNumber:
- raise LogIsFull("The log page (%s) is full! Please delete the old images reported." % another_page.title())
+ raise LogIsFull(u"The log page (%s) is full! Please delete the old images reported." % another_page.title())
pos = 0
# The talk page includes "_" between the two names, in this way i replace them to " "
n = re.compile(regex, re.UNICODE|re.M)
@@ -1021,7 +1030,7 @@
pageAllowedText = ''
for nameLicense in self.load(pageAllowedText):
if not 'template:' in nameLicense.lower():
- nameLicense = 'Template:%s' % nameLicense
+ nameLicense = u'Template:%s' % nameLicense
pageLicense = wikipedia.Page(self.site, nameLicense)
if pageLicense not in list_licenses:
list_licenses.append(pageLicense) # the list has wiki-pages
@@ -1033,7 +1042,7 @@
gets the real page, if there's a NoPage, return None.
"""
#print template.exists()
- template = wikipedia.Page(self.site, 'Template:%s' % license_selected)
+ template = wikipedia.Page(self.site, u'Template:%s' % license_selected)
try:
template.pageAPInfo()
except wikipedia.NoPage:
@@ -1101,7 +1110,7 @@
exit_cicle = True
break
if not seems_ok:
- rep_text_license_fake = "\n*[[:Image:%s]] seems to have a ''fake license'', license detected: {{tl|%s}}." % (self.imageName, license_found)
+ rep_text_license_fake = u"\n*[[:Image:%s]] seems to have a ''fake license'', license detected: {{tl|%s}}." % (self.imageName, license_found)
regexFakeLicense = r"\* ?\[\[:Image:%s\]\] seems to have a ''fake license'', license detected: \{\{tl\|%s\}\}\.$" % (self.imageName, license_found)
printWithTimeZone(u"%s seems to have a fake license: %s, reporting..." % (self.imageName, license_found))
self.report_image(self.imageName, rep_text = rep_text_license_fake,
@@ -1120,7 +1129,7 @@
regl = r"(\"|\')(.*?)\1(?:,|\])"
pl = re.compile(regl, re.UNICODE)
for xl in pl.finditer(raw):
- word = xl.group(2).replace('\\\\', '\\')
+ word = xl.group(2).replace(u'\\\\', u'\\')
if word not in list_loaded:
list_loaded.append(word)
return list_loaded
@@ -1158,7 +1167,7 @@
os.environ['TZ'] = 'EST+01EDT,M4.1.0,M10.5.0'
time.tzset()
# '2008-06-18T08:04:29Z'
- data = time.strptime(imagedata, "%Y-%m-%dT%H:%M:%SZ")
+ data = time.strptime(imagedata, u"%Y-%m-%dT%H:%M:%SZ")
data_seconds = time.mktime(data)
current_time = time.time()
secs_of_diff = current_time - data_seconds
@@ -1290,7 +1299,7 @@
# Block of text to translate the parameters set above.
image_n = site.image_namespace()
- image_namespace = "%s:" % image_n # Example: "User_talk:"
+ image_namespace = u"%s:" % image_n # Example: "User_talk:"
unvertext = wikipedia.translate(site, n_txt)
di = wikipedia.translate(site, delete_immediately)
dih = wikipedia.translate(site, delete_immediately_head)
@@ -1320,7 +1329,7 @@
wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!")
return
# Some formatting for delete immediately template
- di = '\n%s' % di
+ di = u'\n%s' % di
dels = dels % di
# Reading the log of the new images if another generator is not given.
@@ -1448,7 +1457,7 @@
# If there are {{ use regex, otherwise no (if there's not the {{ may not be a template
# and the regex will be wrong)
if '{{' in i:
- regexP = re.compile('\{\{(?:template|)%s ?(?:\||\n|\}|<) ?' % i.split('{{')[1].replace(' ', '[ _]'), re.I)
+ regexP = re.compile(r'\{\{(?:template|)%s ?(?:\||\n|\}|<) ?' % i.split('{{')[1].replace(u' ', u'[ _]'), re.I)
result = regexP.findall(imageCheckText)
if result != []:
tagged = True
@@ -1551,7 +1560,7 @@
reported = True
if reported == True:
#if imagestatus_used == True:
- mainClass.report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used)
+ mainClass.report(mex_used, imageName, text_used, u"\n%s\n" % head_used, None, imagestatus_used, summary_used)
else:
wikipedia.output(u"Skipping the image...")
some_problem = False
More information about the Pywikipedia-l
mailing list