http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10985
Revision: 10985
Author: xqt
Date: 2013-01-25 15:11:32 +0000 (Fri, 25 Jan 2013)
Log Message:
-----------
remove obsolete code stuff, obsolete comments, outcommented code;
some PEP 8
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2013-01-25 14:33:32 UTC (rev 10984)
+++ trunk/pywikipedia/checkimages.py 2013-01-25 15:11:32 UTC (rev 10985)
@@ -541,25 +541,7 @@
pywikibot.output(u"%s%s" % (message, time_zone))
-class Global(object):
- # default environment settings
- # Command line configurable parameters
- repeat = True # Restart after having check all the images?
- limit = 80 # How many images check?
- time_sleep = 30 # How many time sleep after the check?
- skip_number = 0 # How many images to skip before checking?
- waitTime = 0 # How many time sleep before the check?
- commonsActive = False # Check if on commons there's an image with the same
name?
- normal = False # Check the new images or use another generator?
- urlUsed = False # Use the url-related function instead of the new-pages
generator
- regexGen = False # Use the regex generator
- untagged = False # Use the untagged generator
- duplicatesActive = False # Use the duplicate option
- duplicatesReport = False # Use the duplicate-report option
- sendemailActive = False # Use the send-email
- logFullError = True # Raise an error when the log is full
-
class checkImagesBot(object):
def __init__(self, site, logFulNumber=25000, sendemailActive=False,
duplicatesReport=False, logFullError=True):
@@ -706,7 +688,6 @@
u"Seems that %s has only the description and not the file..."
% self.image_to_report)
repme = u"\n*[[:File:%s]] problems '''with the
APIs'''"
- # We have a problem! Report and exit!
self.report_image(self.image_to_report, self.rep_page,
self.com,
repme)
return False
@@ -786,7 +767,7 @@
try:
emailText = emailPage.get()
except (pywikibot.NoPage, pywikibot.IsRedirectPage):
- return # Exit
+ return
if self.sendemailActive:
text_to_send = re.sub(r'__user-nickname__', r'%s'
% self.luser, emailText)
@@ -795,7 +776,7 @@
emailClass.sendMail(emailSubj, text_to_send)
except userlib.UserActionRefuse:
pywikibot.output("User is not mailable, aborted")
- return # exit
+ return
def untaggedGenerator(self, untaggedProject, limit):
""" Generator that yield the files without license. It's based
on a
@@ -842,9 +823,6 @@
self.hiddentemplates.add(pywikibot.Page(self.site,
u'Template:%s' % langK))
- # The template #if: and #switch: aren't something to care about
- #self.hiddentemplates.extend([u'#if:', u'#switch:']) FIXME
-
# Hidden template loading
if self.pageHidden:
try:
@@ -921,7 +899,8 @@
def checkImageOnCommons(self):
""" Checking if the file is on commons """
- pywikibot.output(u'Checking if [[%s]] is on commons...' %
self.imageName)
+ pywikibot.output(u'Checking if [[%s]] is on commons...'
+ % self.imageName)
commons_site = pywikibot.getSite('commons', 'commons')
regexOnCommons = r"\[\[:File:%s\]\] is also on
'''Commons''': \[\[commons:File:.*?\]\](?: \(same name\)|)$"
\
% re.escape(self.imageName)
@@ -1079,7 +1058,8 @@
# Two iteration: report the "problem" to the user only once
(the last)
if len(images_to_tag_list) > 1:
for image_to_tag in images_to_tag_list[:-1]:
- already_reported_in_past = self.countEdits(u'File:%s' %
image_to_tag, self.botolist)
+ already_reported_in_past = self.countEdits(
+ u'File:%s' % image_to_tag, self.botolist)
# if you want only one edit, the edit found should be more than 0
-> num - 1
if already_reported_in_past > duplicates_rollback - 1:
only_report = True
@@ -1100,7 +1080,8 @@
from_regex = r'\n\*\[\[:File:%s\]\]' \
% re.escape(self.convert_to_url(self.imageName))
# Delete the image in the list where we're write on
- text_for_the_report = re.sub(from_regex, '',
text_for_the_report)
+ text_for_the_report = re.sub(from_regex, '',
+ text_for_the_report)
# if you want only one edit, the edit found should be more than 0
-> num - 1
if already_reported_in_past > duplicates_rollback - 1:
only_report = True
@@ -1227,7 +1208,6 @@
pywikibot.output(u"The settings' page doesn't
exist!")
self.settingsData = None
except pywikibot.Error:
- # Error? Settings = None
pywikibot.output(
u'Problems with loading the settigs, run without them.')
self.settingsData = None
@@ -1297,7 +1277,8 @@
return True
if template in self.hiddentemplates:
- # if the whitetemplate is not in the images description, we don't care
+ # if the whitetemplate is not in the images description, we don't
+ # care
try:
self.allLicenses.remove(template)
except ValueError:
@@ -1341,10 +1322,8 @@
self.whiteTemplatesFound = False
regex_find_licenses = re.compile(
r'(?<!\{)\{\{(?:[Tt]emplate:|)([^{]+?)[|\n<}]', re.DOTALL)
- # see below to understand the use of this regex
regex_are_licenses = re.compile(
r'(?<!\{)\{\{(?:[Tt]emplate:|)([^{]+?)\}\}', re.DOTALL)
- #dummy_edit = False
while True:
self.hiddentemplates = self.loadHiddenTemplates()
self.licenses_found = self.image.getTemplates()
@@ -1374,20 +1353,8 @@
== self.convert_to_url(
templateReal.title()).lower().replace('template%3a',
''):
- if templateReal not in self.allLicenses: # don't put the same
template, twice.
+ if templateReal not in self.allLicenses:
self.allLicenses.append(templateReal)
- # perform a dummy edit, sometimes there are problems with the Job queue
- # it happends that there is listed only the template used and not all the
template that are in the templates used in the page
- # for example: there's only self, and not GFDL and the other licenses.
- #if self.allLicenses == self.licenses_found and not dummy_edit and
self.licenses_found != []:
- # pywikibot.output(u"Seems that there's a problem regarding the
Job queue, trying with a dummy edit to solve the problem.")
- # try:
- # self.imageCheckText = self.image.get()
- # self.image.put(self.imageCheckText, 'Bot: Dummy edit,if you see
this comment write [[User talk:%s|here]].' % self.botnick)
- # except (pywikibot.NoPage, pywikibot.IsRedirectPage):
- # return (None, list())
- # dummy_edit = True
- #else:
break
if self.licenses_found:
@@ -1482,7 +1449,8 @@
if skip_number == 1:
pywikibot.output(u'Skipping the first file:\n')
else:
- pywikibot.output(u'Skipping the first %s files:\n' %
skip_number)
+ pywikibot.output(u'Skipping the first %s files:\n'
+ % skip_number)
# If we still have pages to skip:
if len(self.skip_list) < skip_number:
pywikibot.output(u'Skipping %s...' % self.imageName)
@@ -1572,7 +1540,6 @@
return True
elif i.lower() in self.imageCheckText:
return True
-
return False # Nothing Found
def findAdditionalProblems(self):
@@ -1654,11 +1621,7 @@
di = u'\n%s' % di
dels = dels % di
- # Page => ImagePage
- # Get the text in the image (called imageCheckText)
try:
- # the checkText will be modified in order to make the check phase
- # easier
self.imageCheckText = self.image.get()
except pywikibot.NoPage:
pywikibot.output(u"Skipping %s because it has been deleted."
@@ -1673,7 +1636,7 @@
regex_pre = re.compile(r'<pre>(.*?)</pre>', re.DOTALL)
self.imageCheckText = regex_nowiki.sub('', self.imageCheckText)
self.imageCheckText = regex_pre.sub('', self.imageCheckText)
- # Deleting the useless template from the description (before adding something
+ # Deleting the useless template from the description (before adding sth
# in the image the original text will be reloaded, don't worry).
if self.isTagged():
printWithTimeZone(u'%s is already tagged...' % self.imageName)
@@ -1731,9 +1694,7 @@
return True
-gbv = Global()
-
-def checkbot():
+def main():
""" Main function """
# Command line configurable parameters
repeat = True # Restart after having check all the images?
@@ -1866,13 +1827,15 @@
# Define the site.
site = pywikibot.getSite()
- # If the images to skip are 0, set the skip variable to False (the same for the wait
time)
+ # If the images to skip are 0, set the skip variable to False (the same for
+ # the wait time)
if skip_number == 0:
skip = False
if waitTime == 0:
wait = False
- # A little block-statement to ensure that the bot will not start with en-parameters
+ # A little block-statement to ensure that the bot will not start with
+ # en-parameters
if site.lang not in project_inserted:
pywikibot.output(u"Your project is not supported by this script.\n"
u"You have to edit the script and add it!")
@@ -1885,7 +1848,6 @@
else:
pywikibot.output(u"Retrieving the latest %d files for checking..."
% limit)
- # Main Loop
while True:
# Defing the Main Class.
Bot = checkImagesBot(site, sendemailActive=sendemailActive,
@@ -1909,16 +1871,13 @@
except pywikibot.NoPage:
pywikibot.output(u"%s doesn't exist!" % pageRegex.title())
textRegex = '' # No source, so the bot will quit later.
- # If generator is the regex' one, use your own Generator using an url or page
and a regex.
+ # If generator is the regex' one, use your own Generator using an url
+ # or page and a regex.
if generator == 'regex' and regexGen:
generator = Bot.regexGenerator(regexpToUse, textRegex)
- # Ok, We (should) have a generator, so let's go on.
- # Take the additional settings for the Project
+
Bot.takesettings()
- # Not the main, but the most important loop.
- #parsed = False
if wait:
- # Let's sleep...
generator = Bot.wait(waitTime, generator, normal, limit)
generator = pg.NamespaceFilterPageGenerator(generator, 6, site)
for image in generator:
@@ -1939,26 +1898,23 @@
continue
if Bot.checkStep():
continue
- # A little block to perform the repeat or to break.
+
if repeat:
- printWithTimeZone(u"Waiting for %s seconds," % time_sleep)
+ pywikibot.output(u"Waiting for %s seconds," % time_sleep)
time.sleep(time_sleep)
else:
- pywikibot.output(u"\t\t\t>> STOP! <<")
- break # Exit
+ break
-# Main loop will take all the (name of the) images and then i'll check them.
if __name__ == "__main__":
- #timezones are UTC
old = datetime.datetime.strptime(
str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d
%H:%M:%S")
try:
- checkbot()
+ main()
finally:
final = datetime.datetime.strptime(
str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d
%H:%M:%S")
delta = final - old
secs_of_diff = delta.seconds
- pywikibot.output("Execution time: %s" % secs_of_diff)
+ pywikibot.output("Execution time: %s seconds\n" % secs_of_diff)
pywikibot.stopme()