[Pywikipedia-l] SVN: [6043] trunk/pywikipedia/checkimages.py

filnik at svn.wikimedia.org filnik at svn.wikimedia.org
Wed Oct 29 19:21:05 UTC 2008


Revision: 6043
Author:   filnik
Date:     2008-10-29 19:21:05 +0000 (Wed, 29 Oct 2008)

Log Message:
-----------
Spare some time in checking phase, better now

Modified Paths:
--------------
    trunk/pywikipedia/checkimages.py

Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py	2008-10-29 19:06:07 UTC (rev 6042)
+++ trunk/pywikipedia/checkimages.py	2008-10-29 19:21:05 UTC (rev 6043)
@@ -1100,19 +1100,62 @@
                     list_licenses.append(pageLicense) # the list has wiki-pages
         return list_licenses
 
+    def miniTemplateCheck(self, template):
+        """
+        Is the template given in the licenses allowed or in the licenses to skip?
+        This function check this.
+        """
+        if template in self.list_licenses: # the list_licenses are loaded in the __init__ (not to load them multimple times)
+            self.seems_ok = True
+            self.license_found = self.license_selected # let the last "fake" license normally detected
+            return True
+        if template in self.hiddentemplates:
+            # if the whitetemplate is not in the images description, we don't care
+            try:
+                self.allLicenses.remove(template)
+            except ValueError:
+                return False
+            else:
+                self.whiteTemplatesFound = True
+                return False  
+
+    def templateInList(self):
+        """
+        The problem is the calls to the Mediawiki system because they can be pretty slow.
+        While searching in a list of objects is really fast, so first of all let's see if
+        we can find something in the info that we already have, then make a deeper check.
+        """
+        for template in self.licenses_found:
+            self.license_selected = template.title().replace('Template:', '')
+            result = self.miniTemplateCheck(template)
+            if result:
+                break
+        if self.license_found == None:
+            for template in self.licenses_found:
+                try:
+                    template.pageAPInfo()
+                except wikipedia.IsRedirectPage:
+                    template = template.getRedirectTarget()
+                except wikipedia.NoPage:
+                    continue  
+                self.license_selected = template.title().replace('Template:', '')
+                result = self.miniTemplateCheck(template)
+                if result:
+                    break            
+                
     def smartDetection(self):
         """ The bot instead of checking if there's a simple template in the
             image's description, checks also if that template is a license or
             something else. In this sense this type of check is smart.
             """
-        seems_ok = False
-        license_found = None
+        self.seems_ok = False
+        self.license_found = None
         self.hiddentemplates = self.loadHiddenTemplates()      
         self.licenses_found = self.image.getTemplates()
-        whiteTemplatesFound = False
+        self.whiteTemplatesFound = False
         regex_find_licenses = re.compile(r'(?<!\{)\{\{(?:[Tt]emplate:|)([^{]*?)[|\n<}]', re.DOTALL)
         templatesInTheImageRaw = regex_find_licenses.findall(self.imageCheckText)
-        allLicenses = list()
+        self.allLicenses = list()
         if self.list_licenses == []:
             raise wikipedia.Error(u'No licenses allowed provided, add that option to the code to make the script working correctly')
         # Found the templates ONLY in the image's description
@@ -1120,41 +1163,23 @@
             for templateReal in self.licenses_found:
                 if self.convert_to_url(template_selected).lower().replace('template:', '') == \
                        self.convert_to_url(templateReal.title().lower().replace('template:', '')):
-                    if templateReal not in allLicenses: # don't put the same template, twice.
-                        allLicenses.append(templateReal)
+                    if templateReal not in self.allLicenses: # don't put the same template, twice.
+                        self.allLicenses.append(templateReal)
         if self.licenses_found != []:
-            for template in self.licenses_found:
-                try:
-                    template.pageAPInfo()
-                except wikipedia.IsRedirectPage:
-                    template = template.getRedirectTarget()
-                except wikipedia.NoPage:
-                    continue
-                license_selected = template.title().replace('Template:', '')
-                if template in self.list_licenses: # the list_licenses are loaded in the __init__ (not to load them multimple times)
-                    seems_ok = True
-                    license_found = license_selected # let the last "fake" license normally detected
-                    break
-                if template in self.hiddentemplates:
-                    # if the whitetemplate is not in the images description, we don't care
-                    try:
-                        allLicenses.remove(template)
-                    except ValueError:
-                        continue
-                    else:
-                        whiteTemplatesFound = True
-                        continue
-            if license_found == None and allLicenses != list():
-                license_found = license_selected
-        if not seems_ok and license_found != None:
-            rep_text_license_fake = u"\n*[[:Image:%s]] seems to have a ''fake license'', license detected: <nowiki>%s</nowiki>" % (self.imageName, license_found)
-            regexFakeLicense = r"\* ?\[\[:Image:%s\]\] seems to have a ''fake license'', license detected: <nowiki>%s</nowiki>$" % (re.escape(self.imageName), license_found)
-            printWithTimeZone(u"%s seems to have a fake license: %s, reporting..." % (self.imageName, license_found))
+            self.templateInList()
+            if self.license_found == None and self.allLicenses != list():
+                self.license_found = self.license_selected
+        if not self.seems_ok and self.license_found != None:
+            rep_text_license_fake = u"\n*[[:Image:%s]] seems to have " + \
+                    "a ''fake license'', license detected: <nowiki>%s</nowiki>" % (self.imageName, self.license_found)
+            regexFakeLicense = r"\* ?\[\[:Image:%s\]\] seems to have " + \
+                    "a ''fake license'', license detected: <nowiki>%s</nowiki>$" % (re.escape(self.imageName), self.license_found)
+            printWithTimeZone(u"%s seems to have a fake license: %s, reporting..." % (self.imageName, self.license_found))
             self.report_image(self.imageName, rep_text = rep_text_license_fake,
                                    addings = False, regex = regexFakeLicense)
-        elif license_found != None:
-            printWithTimeZone(u"%s seems ok, license found: %s..." % (self.imageName, license_found))
-        return (license_found, whiteTemplatesFound)
+        elif self.license_found != None:
+            printWithTimeZone(u"%s seems ok, license found: %s..." % (self.imageName, self.license_found))
+        return (self.license_found, self.whiteTemplatesFound)
 
     def load(self, raw):
         """ Load a list of object from a string using regex. """
@@ -1405,7 +1430,6 @@
             self.some_problem = False
             return True
         elif brackets == True and license_found != None:
-            seems_ok = False
             # It works also without this... but i want only to be sure ^^
             brackets = False
             return True





More information about the Pywikipedia-l mailing list