[Pywikipedia-l] SVN: [5331] trunk/pywikipedia/checkimages.py

filnik at svn.wikimedia.org filnik at svn.wikimedia.org
Thu May 8 15:25:11 UTC 2008


Revision: 5331
Author:   filnik
Date:     2008-05-08 15:25:11 +0000 (Thu, 08 May 2008)

Log Message:
-----------
Committing these changes, because I'm tired to apply the new changes manually... I'll finish to code in a few seconds, don't worry

Modified Paths:
--------------
    trunk/pywikipedia/checkimages.py

Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py	2008-05-08 00:07:39 UTC (rev 5330)
+++ trunk/pywikipedia/checkimages.py	2008-05-08 15:25:11 UTC (rev 5331)
@@ -20,6 +20,8 @@
 
     -duplicates     - Checking if the image has duplicates.
 
+    -duplicatesreport - Report the duplicates in a log *AND* put the template in the images.
+
     -break	        - To break the bot after the first check (default: recursive)
 
     -time[:#]	- Time in seconds between repeat runs (default: 30)
@@ -320,7 +322,7 @@
 duplicatesText = {
         'commons':u'\n{{Dupe|__image__}}',
         'en':None,
-        'it':u'\n{{Cancella subito|Immagine doppia di __image__}}',
+        'it':u'\n{{Cancella subito|Immagine doppia di [[:__image__]]}}',
         }
 duplicatesRegex = {
         'commons':r'\{\{(?:[Tt]emplate:|)[Dd]upe[|}]',
@@ -328,6 +330,12 @@
         'it':r'\{\{(?:[Tt]emplate:|)[Cc]ancella[ _]subito[|}]',
         }
 
+# Another stub
+emailPageWithText = {
+        'de':'Benutzer:ABF/D3',
+        'en':None,
+        }   
+
 # Add your project (in alphabetical order) if you want that the bot start
 project_inserted = [u'ar', u'commons', u'de', u'en', u'ja', u'hu', u'it', u'ta', u'zh']
 
@@ -381,9 +389,14 @@
         response.close()
     return text
 
+def sendEmail():
+    """ Function that let you send email trough the Wikipedia system """
+    pass # Empty, need work
+
+
 # Here there is the main class.
 class main:
-    def __init__(self, site, logFulNumber = 25000):
+    def __init__(self, site, logFulNumber = 25000, sendemailActive = False, duplicatesReport = False):
         """ Constructor, define some global variable """
         self.site = site
         self.logFulNumber = logFulNumber
@@ -393,15 +406,67 @@
         self.com = wikipedia.translate(site, comm10)
         # Commento = Summary in italian
         self.commento = wikipedia.translate(self.site, comm)
-    def general(self, newtext, image, notification, head, botolist):
-        """ This class can be called for two reason. So I need two different constructors, one with common data
-        and another with the data that I required... maybe it can be added on the other function, but in this way
-        seems more clear what parameters I need """
+        # Adding the bot's nickname at the notification text if needed.
+        botolist = wikipedia.translate(wikipedia.getSite(), bot_list)
+        project = wikipedia.getSite().family.name
+        bot = config.usernames[project]
+        botnick = bot[wikipedia.getSite().lang]
+        self.botnick = botnick
+        self.botolist = botolist.append(botnick)
+        self.sendemailActive = sendemailActive
+    def report(self, newtext, image, notification = None, head = None, notification2 = None, unver = True, commx = None):
+        """ Function to make the reports easier (or I hope so). """
+        # Defining some useful variable for next...
         self.newtext = newtext
         self.image = image
         self.head = head
         self.notification = notification
-        self.botolist = botolist
+        if self.notification != None:
+            self.notification = re.sub('__botnick__', self.botnick, notification)
+        if self.notification2 != None:
+            self.notification2 = re.sub('__botnick__', self.botnick, notification2)
+        self.commx = commx        
+        # Ok, done, let's loop.
+        while 1:
+            if unver == True:
+                try:
+                    resPutMex = run.put_mex()
+                except wikipedia.NoPage:
+                    wikipedia.output(u"The page has been deleted! Skip!")
+                    break
+                except wikipedia.EditConflict:
+                    wikipedia.output(u"Edit conflict! Skip!")
+                    break
+                else:
+                    if resPutMex == False:
+                        break
+            else:
+                try:
+                    resPutMex = run.put_mex(False)
+                except wikipedia.NoPage:
+                    wikipedia.output(u"The page has been deleted!")
+                    break
+                except wikipedia.EditConflict:
+                    wikipedia.output(u"Edit conflict! Skip!")
+                    break
+                else:
+                    if resPutMex == False:
+                        break
+            if self.notification != None and self.head != None:
+                try:
+                    run.put_talk()
+                except wikipedia.EditConflict:
+                    wikipedia.output(u"Edit Conflict! Retrying...")
+                    try:
+                        run.put_talk()
+                    except:
+                        wikipedia.output(u"Another error... skipping the user..")
+                        break
+                else:
+                    break
+            else:
+                break
+
     def put_mex(self, put = True):
         """ Function to add the template in the image and to find out
         who's the user that has uploaded the image. """
@@ -446,49 +511,44 @@
         # Defing the talk page (pagina_discussione = talk_page ^__^ )
         talk_page = wikipedia.Page(self.site, pagina_discussione)
         self.talk_page = talk_page
+        self.luser = luser
         return True
-    def put_talk(self, notification, head, notification2 = None, commx = None):
+    def put_talk(self):
         """ Function to put the warning in talk page of the uploader."""
         commento2 = wikipedia.translate(self.site, comm2)
-        talk_page = self.talk_page
-        notification = self.notification
-        if notification2 == None:
-            notification2 = notification
+        emailPageName = wikipedia.translate(self.site, emailPageWithText)
+        if self.notification2 == None:
+            self.notification2 = self.notification
         else:
-            notification2 = notification2 % self.image
-        head = self.head
+            self.notification2 = self.notification2 % self.image
         second_text = False
         # Getting the talk page's history, to check if there is another advise...
         # The try block is used to prevent error if you use an old wikipedia.py's version.
         edit_to_load = 10
-        if talk_page.exists():
+        if self.talk_page.exists():
             try:
-                history = talk_page.getVersionHistory(False, False, False, edit_to_load)
+                history = self.talk_page.getVersionHistory(False, False, False, edit_to_load)
             except TypeError:
-                history = talk_page.getVersionHistory(False, False, False)
+                history = self.talk_page.getVersionHistory(False, False, False)
             latest_edit = history[0]
             latest_user = latest_edit[2]
             wikipedia.output(u'The latest user that has written something is: %s' % latest_user)
         else:
             wikipedia.output(u'The user page is blank')
 
-        if talk_page.exists():
+        if self.talk_page.exists():
             try:
-                testoattuale = talk_page.get() # Actual text
+                testoattuale = self.talk_page.get() # Actual text
             except wikipedia.IsRedirectPage:
                 wikipedia.output(u'The user talk is a redirect, trying to get the right talk...')
                 try:
-                    talk_page = talk_page.getRedirectTarget()
-                    testoattuale = talk_page.get()
+                    self.talk_page = self.talk_page.getRedirectTarget()
+                    testoattuale = self.talk_page.get()
                 except wikipedia.NoPage:
                     second_text = False
                     ti_es_ti = wikipedia.translate(self.site, empty)
                     testoattuale = ti_es_ti                               
-            project = self.site.family.name
-            bot = config.usernames[project]
-            botnick = bot[self.site.lang]
-            botolist = self.botolist + [botnick]
-            for i in botolist:
+            for i in self.botolist:
                 if latest_user == i:
                     second_text = True
                     # A block to prevent the second message if the bot also welcomed users...
@@ -498,14 +558,22 @@
             second_text = False
             ti_es_ti = wikipedia.translate(self.site, empty)
             testoattuale = ti_es_ti
-        if commx == None:
+        if self.commx == None:
             commentox = commento2
         else:
-            commentox = commx
+            commentox = self.commx
         if second_text == True:
-            talk_page.put("%s\n\n%s" % (testoattuale, notification2), comment = commentox, minorEdit = False)
+            self.talk_page.put("%s\n\n%s" % (testoattuale, self.notification2), comment = commentox, minorEdit = False)
         elif second_text == False:
-            talk_page.put(testoattuale + head + notification, comment = commentox, minorEdit = False)
+            self.talk_page.put(testoattuale + self.head + self.notification, comment = commentox, minorEdit = False)
+        if emailPageName != None:
+            emailPage = wikipedia.Page(self.site, emailPageName)
+            try:
+                emailText = emailPage.get()
+            except (wikipedia.NoPage, wikipedia.IsRedirectPage):
+                return # Exit
+            if self.sendemailActive:
+                sendEmail(self.luser, re.sub(r'__user-nickname__', '%s' % self.luser, emailText))            
 			
     def untaggedGenerator(self, untaggedProject, limit):
         """ Generator that yield the images without license. It's based on a tool of the toolserver. """
@@ -605,16 +673,17 @@
                 wikipedia.output(u'%s has a duplicate! Reporting it...' % self.image)
             else:
                 wikipedia.output(u'%s has %s duplicates! Reporting them...' % (self.image, len(duplicates) - 1))
+            if duplicatesReport:
             repme = "\n*[[:Image:%s]] has the following duplicates:" % self.convert_to_url(self.image)
-            for duplicate in duplicates:
-                if self.convert_to_url(duplicate) == self.convert_to_url(self.image):
-                    continue # the image itself, not report also this as duplicate
-                repme += "\n**[[:Image:%s]]" % self.convert_to_url(duplicate)    
-            result = self.report_image(self.image, self.rep_page, self.com, repme, addings = False, regex = duplicateRegex)
-            if result and not dupText == None and not dupRegex == None:
                 for duplicate in duplicates:
                     if self.convert_to_url(duplicate) == self.convert_to_url(self.image):
                         continue # the image itself, not report also this as duplicate
+                    repme += "\n**[[:Image:%s]]" % self.convert_to_url(duplicate)    
+                result = self.report_image(self.image, self.rep_page, self.com, repme, addings = False, regex = duplicateRegex)
+            if result and not dupText == None and not dupRegex == None:
+                for duplicate in duplicates:
+                    if wikipedia.Page(self.site, u'Image:%s' % duplicate) == wikipedia.Page(self.site, u'Image:%s' % self.image):
+                        continue # the image itself, not report also this as duplicate
                     DupePage = wikipedia.Page(self.site, u'Image:%s' % duplicate)
                     try:
                         DupPageText = DupePage.get()
@@ -622,7 +691,7 @@
                         continue # The page doesn't exists
                     if re.findall(dupRegex, DupPageText) == []:
                         wikipedia.output(u'Adding the duplicate template in the image...')
-                        report(re.sub(r'__image__', r'%s' % self.image, dupText), duplicate)                
+                        self.report(re.sub(r'__image__', r'%s' % self.image, dupText), duplicate)                
         return True # Ok - No problem. Let's continue the checking phase
         
     def report_image(self, image, rep_page = None, com = None, rep_text = None, addings = True, regex = None):
@@ -714,61 +783,6 @@
             word = xl.group(2)
             if word not in list_loaded:
                 list_loaded.append(word)  
-
-# I've seen that the report class before (the main) was to long to be called so,
-# here there is a function that has all the settings, so i can call it once ^__^
-def report(newtext, image, notification = None, head = None, notification2 = None, unver = True, commx = None, bot_list = bot_list):
-    # Adding the bot's nickname at the notification text if needed.
-    botolist = wikipedia.translate(wikipedia.getSite(), bot_list)
-    project = wikipedia.getSite().family.name
-    bot = config.usernames[project]
-    botnick = bot[wikipedia.getSite().lang]
-    if notification != None:
-        notification = re.sub('__botnick__', botnick, notification)
-    if notification2 != None:
-        notification2 = re.sub('__botnick__', botnick, notification2)
-    # Ok, done, let's loop.
-    while 1:
-        run = main(site = wikipedia.getSite())
-        secondrun = run.general(newtext, image, notification, head, botolist)
-        if unver == True:
-            try:
-                resPutMex = run.put_mex()
-            except wikipedia.NoPage:
-                wikipedia.output(u"The page has been deleted! Skip!")
-                break
-            except wikipedia.EditConflict:
-                wikipedia.output(u"Edit conflict! Skip!")
-                break
-            else:
-                if resPutMex == False:
-                    break
-        else:
-            try:
-                resPutMex = run.put_mex(False)
-            except wikipedia.NoPage:
-                wikipedia.output(u"The page has been deleted!")
-                break
-            except wikipedia.EditConflict:
-                wikipedia.output(u"Edit conflict! Skip!")
-                break
-            else:
-                if resPutMex == False:
-                    break
-        if notification != None and head != None:
-            try:
-                run.put_talk(notification, head, notification2, commx)
-            except wikipedia.EditConflict:
-                wikipedia.output(u"Edit Conflict! Retrying...")
-                try:
-                    run.put_talk(notification, head, notification2, commx)
-                except:
-                    wikipedia.output(u"Another error... skipping the user..")
-                    break
-            else:
-                break
-        else:
-            break
                         
 def checkbot():
     # Command line configurable parameters
@@ -783,8 +797,10 @@
     regexGen = False # Use the regex generator
     untagged = False # Use the untagged generator
     skip_list = list() # Inizialize the skip list used below
-    duplicatesActive = False
-        
+    duplicatesActive = False # Use the duplicate option
+    duplicatesReport = False # Use the duplicate-report option
+    sendemailActive = False # Use the send-email option
+    
     # Here below there are the parameters.
     for arg in wikipedia.handleArgs():
         if arg.startswith('-limit'):
@@ -803,6 +819,10 @@
             commonsActive = True
         elif arg == '-duplicates':
             duplicatesActive = True
+        elif arg == '-duplicatereport':
+            duplicatesReport = True
+        elif arg == '-sendemail':
+            sendemailActive = True                   
         elif arg.startswith('-skip'):
             if len(arg) == 5:
                 skip = True
@@ -927,7 +947,7 @@
     # Main Loop
     while 1:
         # Defing the Main Class.
-        mainClass = main(site)
+        mainClass = main(site, sendemailActive = sendemailActive, duplicatesReport = duplicatesReport)
         # Untagged is True? Let's take that generator
         if untagged == True:
             generator =  mainClass.untaggedGenerator(projectUntagged, limit)
@@ -1150,7 +1170,7 @@
                         reported = True
                     if reported == True:
                         #if imagestatus_used == True:
-                        report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used)
+                        self.report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used)
                     else:
                         wikipedia.output(u"Skipping the image...")
                     some_problem = False
@@ -1167,7 +1187,7 @@
                     canctext = di % extension
                     notification = din % imageName
                     head = dih
-                    report(canctext, imageName, notification, head)
+                    self.report(canctext, imageName, notification, head)
                     delete = False
                     continue
                 elif g in nothing:
@@ -1177,7 +1197,7 @@
                     else:
                         notification = nn % imageName
                     head = nh 
-                    report(unvertext, imageName, notification, head, smwl)
+                    self.report(unvertext, imageName, notification, head, smwl)
                     continue
                 else:
                     wikipedia.output(u"%s has only text and not the specific license..." % imageName)
@@ -1186,7 +1206,7 @@
                     else:
                         notification = nn % imageName
                     head = nh
-                    report(unvertext, imageName, notification, head, smwl)
+                    self.report(unvertext, imageName, notification, head, smwl)
                     continue
     # A little block to perform the repeat or to break.
         if repeat == True:





More information about the Pywikipedia-l mailing list