Revision: 5331 Author: filnik Date: 2008-05-08 15:25:11 +0000 (Thu, 08 May 2008)
Log Message: ----------- Committing these changes, because I'm tired to apply the new changes manually... I'll finish to code in a few seconds, don't worry
Modified Paths: -------------- trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2008-05-08 00:07:39 UTC (rev 5330) +++ trunk/pywikipedia/checkimages.py 2008-05-08 15:25:11 UTC (rev 5331) @@ -20,6 +20,8 @@
-duplicates - Checking if the image has duplicates.
+ -duplicatesreport - Report the duplicates in a log *AND* put the template in the images. + -break - To break the bot after the first check (default: recursive)
-time[:#] - Time in seconds between repeat runs (default: 30) @@ -320,7 +322,7 @@ duplicatesText = { 'commons':u'\n{{Dupe|__image__}}', 'en':None, - 'it':u'\n{{Cancella subito|Immagine doppia di __image__}}', + 'it':u'\n{{Cancella subito|Immagine doppia di [[:__image__]]}}', } duplicatesRegex = { 'commons':r'{{(?:[Tt]emplate:|)[Dd]upe[|}]', @@ -328,6 +330,12 @@ 'it':r'{{(?:[Tt]emplate:|)[Cc]ancella[ _]subito[|}]', }
+# Another stub +emailPageWithText = { + 'de':'Benutzer:ABF/D3', + 'en':None, + } + # Add your project (in alphabetical order) if you want that the bot start project_inserted = [u'ar', u'commons', u'de', u'en', u'ja', u'hu', u'it', u'ta', u'zh']
@@ -381,9 +389,14 @@ response.close() return text
+def sendEmail(): + """ Function that let you send email trough the Wikipedia system """ + pass # Empty, need work + + # Here there is the main class. class main: - def __init__(self, site, logFulNumber = 25000): + def __init__(self, site, logFulNumber = 25000, sendemailActive = False, duplicatesReport = False): """ Constructor, define some global variable """ self.site = site self.logFulNumber = logFulNumber @@ -393,15 +406,67 @@ self.com = wikipedia.translate(site, comm10) # Commento = Summary in italian self.commento = wikipedia.translate(self.site, comm) - def general(self, newtext, image, notification, head, botolist): - """ This class can be called for two reason. So I need two different constructors, one with common data - and another with the data that I required... maybe it can be added on the other function, but in this way - seems more clear what parameters I need """ + # Adding the bot's nickname at the notification text if needed. + botolist = wikipedia.translate(wikipedia.getSite(), bot_list) + project = wikipedia.getSite().family.name + bot = config.usernames[project] + botnick = bot[wikipedia.getSite().lang] + self.botnick = botnick + self.botolist = botolist.append(botnick) + self.sendemailActive = sendemailActive + def report(self, newtext, image, notification = None, head = None, notification2 = None, unver = True, commx = None): + """ Function to make the reports easier (or I hope so). """ + # Defining some useful variable for next... self.newtext = newtext self.image = image self.head = head self.notification = notification - self.botolist = botolist + if self.notification != None: + self.notification = re.sub('__botnick__', self.botnick, notification) + if self.notification2 != None: + self.notification2 = re.sub('__botnick__', self.botnick, notification2) + self.commx = commx + # Ok, done, let's loop. + while 1: + if unver == True: + try: + resPutMex = run.put_mex() + except wikipedia.NoPage: + wikipedia.output(u"The page has been deleted! Skip!") + break + except wikipedia.EditConflict: + wikipedia.output(u"Edit conflict! Skip!") + break + else: + if resPutMex == False: + break + else: + try: + resPutMex = run.put_mex(False) + except wikipedia.NoPage: + wikipedia.output(u"The page has been deleted!") + break + except wikipedia.EditConflict: + wikipedia.output(u"Edit conflict! Skip!") + break + else: + if resPutMex == False: + break + if self.notification != None and self.head != None: + try: + run.put_talk() + except wikipedia.EditConflict: + wikipedia.output(u"Edit Conflict! Retrying...") + try: + run.put_talk() + except: + wikipedia.output(u"Another error... skipping the user..") + break + else: + break + else: + break + def put_mex(self, put = True): """ Function to add the template in the image and to find out who's the user that has uploaded the image. """ @@ -446,49 +511,44 @@ # Defing the talk page (pagina_discussione = talk_page ^__^ ) talk_page = wikipedia.Page(self.site, pagina_discussione) self.talk_page = talk_page + self.luser = luser return True - def put_talk(self, notification, head, notification2 = None, commx = None): + def put_talk(self): """ Function to put the warning in talk page of the uploader.""" commento2 = wikipedia.translate(self.site, comm2) - talk_page = self.talk_page - notification = self.notification - if notification2 == None: - notification2 = notification + emailPageName = wikipedia.translate(self.site, emailPageWithText) + if self.notification2 == None: + self.notification2 = self.notification else: - notification2 = notification2 % self.image - head = self.head + self.notification2 = self.notification2 % self.image second_text = False # Getting the talk page's history, to check if there is another advise... # The try block is used to prevent error if you use an old wikipedia.py's version. edit_to_load = 10 - if talk_page.exists(): + if self.talk_page.exists(): try: - history = talk_page.getVersionHistory(False, False, False, edit_to_load) + history = self.talk_page.getVersionHistory(False, False, False, edit_to_load) except TypeError: - history = talk_page.getVersionHistory(False, False, False) + history = self.talk_page.getVersionHistory(False, False, False) latest_edit = history[0] latest_user = latest_edit[2] wikipedia.output(u'The latest user that has written something is: %s' % latest_user) else: wikipedia.output(u'The user page is blank')
- if talk_page.exists(): + if self.talk_page.exists(): try: - testoattuale = talk_page.get() # Actual text + testoattuale = self.talk_page.get() # Actual text except wikipedia.IsRedirectPage: wikipedia.output(u'The user talk is a redirect, trying to get the right talk...') try: - talk_page = talk_page.getRedirectTarget() - testoattuale = talk_page.get() + self.talk_page = self.talk_page.getRedirectTarget() + testoattuale = self.talk_page.get() except wikipedia.NoPage: second_text = False ti_es_ti = wikipedia.translate(self.site, empty) testoattuale = ti_es_ti - project = self.site.family.name - bot = config.usernames[project] - botnick = bot[self.site.lang] - botolist = self.botolist + [botnick] - for i in botolist: + for i in self.botolist: if latest_user == i: second_text = True # A block to prevent the second message if the bot also welcomed users... @@ -498,14 +558,22 @@ second_text = False ti_es_ti = wikipedia.translate(self.site, empty) testoattuale = ti_es_ti - if commx == None: + if self.commx == None: commentox = commento2 else: - commentox = commx + commentox = self.commx if second_text == True: - talk_page.put("%s\n\n%s" % (testoattuale, notification2), comment = commentox, minorEdit = False) + self.talk_page.put("%s\n\n%s" % (testoattuale, self.notification2), comment = commentox, minorEdit = False) elif second_text == False: - talk_page.put(testoattuale + head + notification, comment = commentox, minorEdit = False) + self.talk_page.put(testoattuale + self.head + self.notification, comment = commentox, minorEdit = False) + if emailPageName != None: + emailPage = wikipedia.Page(self.site, emailPageName) + try: + emailText = emailPage.get() + except (wikipedia.NoPage, wikipedia.IsRedirectPage): + return # Exit + if self.sendemailActive: + sendEmail(self.luser, re.sub(r'__user-nickname__', '%s' % self.luser, emailText)) def untaggedGenerator(self, untaggedProject, limit): """ Generator that yield the images without license. It's based on a tool of the toolserver. """ @@ -605,16 +673,17 @@ wikipedia.output(u'%s has a duplicate! Reporting it...' % self.image) else: wikipedia.output(u'%s has %s duplicates! Reporting them...' % (self.image, len(duplicates) - 1)) + if duplicatesReport: repme = "\n*[[:Image:%s]] has the following duplicates:" % self.convert_to_url(self.image) - for duplicate in duplicates: - if self.convert_to_url(duplicate) == self.convert_to_url(self.image): - continue # the image itself, not report also this as duplicate - repme += "\n**[[:Image:%s]]" % self.convert_to_url(duplicate) - result = self.report_image(self.image, self.rep_page, self.com, repme, addings = False, regex = duplicateRegex) - if result and not dupText == None and not dupRegex == None: for duplicate in duplicates: if self.convert_to_url(duplicate) == self.convert_to_url(self.image): continue # the image itself, not report also this as duplicate + repme += "\n**[[:Image:%s]]" % self.convert_to_url(duplicate) + result = self.report_image(self.image, self.rep_page, self.com, repme, addings = False, regex = duplicateRegex) + if result and not dupText == None and not dupRegex == None: + for duplicate in duplicates: + if wikipedia.Page(self.site, u'Image:%s' % duplicate) == wikipedia.Page(self.site, u'Image:%s' % self.image): + continue # the image itself, not report also this as duplicate DupePage = wikipedia.Page(self.site, u'Image:%s' % duplicate) try: DupPageText = DupePage.get() @@ -622,7 +691,7 @@ continue # The page doesn't exists if re.findall(dupRegex, DupPageText) == []: wikipedia.output(u'Adding the duplicate template in the image...') - report(re.sub(r'__image__', r'%s' % self.image, dupText), duplicate) + self.report(re.sub(r'__image__', r'%s' % self.image, dupText), duplicate) return True # Ok - No problem. Let's continue the checking phase
def report_image(self, image, rep_page = None, com = None, rep_text = None, addings = True, regex = None): @@ -714,61 +783,6 @@ word = xl.group(2) if word not in list_loaded: list_loaded.append(word) - -# I've seen that the report class before (the main) was to long to be called so, -# here there is a function that has all the settings, so i can call it once ^__^ -def report(newtext, image, notification = None, head = None, notification2 = None, unver = True, commx = None, bot_list = bot_list): - # Adding the bot's nickname at the notification text if needed. - botolist = wikipedia.translate(wikipedia.getSite(), bot_list) - project = wikipedia.getSite().family.name - bot = config.usernames[project] - botnick = bot[wikipedia.getSite().lang] - if notification != None: - notification = re.sub('__botnick__', botnick, notification) - if notification2 != None: - notification2 = re.sub('__botnick__', botnick, notification2) - # Ok, done, let's loop. - while 1: - run = main(site = wikipedia.getSite()) - secondrun = run.general(newtext, image, notification, head, botolist) - if unver == True: - try: - resPutMex = run.put_mex() - except wikipedia.NoPage: - wikipedia.output(u"The page has been deleted! Skip!") - break - except wikipedia.EditConflict: - wikipedia.output(u"Edit conflict! Skip!") - break - else: - if resPutMex == False: - break - else: - try: - resPutMex = run.put_mex(False) - except wikipedia.NoPage: - wikipedia.output(u"The page has been deleted!") - break - except wikipedia.EditConflict: - wikipedia.output(u"Edit conflict! Skip!") - break - else: - if resPutMex == False: - break - if notification != None and head != None: - try: - run.put_talk(notification, head, notification2, commx) - except wikipedia.EditConflict: - wikipedia.output(u"Edit Conflict! Retrying...") - try: - run.put_talk(notification, head, notification2, commx) - except: - wikipedia.output(u"Another error... skipping the user..") - break - else: - break - else: - break
def checkbot(): # Command line configurable parameters @@ -783,8 +797,10 @@ regexGen = False # Use the regex generator untagged = False # Use the untagged generator skip_list = list() # Inizialize the skip list used below - duplicatesActive = False - + duplicatesActive = False # Use the duplicate option + duplicatesReport = False # Use the duplicate-report option + sendemailActive = False # Use the send-email option + # Here below there are the parameters. for arg in wikipedia.handleArgs(): if arg.startswith('-limit'): @@ -803,6 +819,10 @@ commonsActive = True elif arg == '-duplicates': duplicatesActive = True + elif arg == '-duplicatereport': + duplicatesReport = True + elif arg == '-sendemail': + sendemailActive = True elif arg.startswith('-skip'): if len(arg) == 5: skip = True @@ -927,7 +947,7 @@ # Main Loop while 1: # Defing the Main Class. - mainClass = main(site) + mainClass = main(site, sendemailActive = sendemailActive, duplicatesReport = duplicatesReport) # Untagged is True? Let's take that generator if untagged == True: generator = mainClass.untaggedGenerator(projectUntagged, limit) @@ -1150,7 +1170,7 @@ reported = True if reported == True: #if imagestatus_used == True: - report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used) + self.report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used) else: wikipedia.output(u"Skipping the image...") some_problem = False @@ -1167,7 +1187,7 @@ canctext = di % extension notification = din % imageName head = dih - report(canctext, imageName, notification, head) + self.report(canctext, imageName, notification, head) delete = False continue elif g in nothing: @@ -1177,7 +1197,7 @@ else: notification = nn % imageName head = nh - report(unvertext, imageName, notification, head, smwl) + self.report(unvertext, imageName, notification, head, smwl) continue else: wikipedia.output(u"%s has only text and not the specific license..." % imageName) @@ -1186,7 +1206,7 @@ else: notification = nn % imageName head = nh - report(unvertext, imageName, notification, head, smwl) + self.report(unvertext, imageName, notification, head, smwl) continue # A little block to perform the repeat or to break. if repeat == True: