[Pywikipedia-l] SVN: [4648] trunk/pywikipedia/blockpageschecker.py

filnik at svn.wikimedia.org filnik at svn.wikimedia.org
Sun Dec 9 11:41:27 UTC 2007


Revision: 4648
Author:   filnik
Date:     2007-12-09 11:41:22 +0000 (Sun, 09 Dec 2007)

Log Message:
-----------
Adding comments in the code and adding two example of how to use the script

Modified Paths:
--------------
    trunk/pywikipedia/blockpageschecker.py

Modified: trunk/pywikipedia/blockpageschecker.py
===================================================================
--- trunk/pywikipedia/blockpageschecker.py	2007-12-09 11:40:15 UTC (rev 4647)
+++ trunk/pywikipedia/blockpageschecker.py	2007-12-09 11:41:22 UTC (rev 4648)
@@ -10,8 +10,14 @@
 -always         Doesn't ask every time if the bot should make the change or not, do it always.
 -page           Work only on one page
 
-Note: This script uses also genfactory, you can use these generator as default.
+Note: This script uses also genfactory, you can use those generator as default.
 
+Example of how to use the script:
+
+python blockpageschecker.py -always
+
+python blockpageschecker.py -cat:Geography -always
+
 """
 #
 # (C) Wikihermit, 2007
@@ -26,7 +32,6 @@
 import wikipedia, catlib, pagegenerators
 
 # Use only regex!
-#fr regexes added by Darkoneko 09 oct 07, THEY ARE UNTESTED at the moment, please check !
 templateToRemove = {
             'en':[r'\{\{(?:[Tt]emplate:|)[Pp]p-protected\}\}', r'{\{([Tt]emplate:|)[Pp]p-dispute\}\}',
                   r'{\{(?:[Tt]emplate:|)[Pp]p-template\}\}', r'{\{([Tt]emplate:|)[Pp]p-usertalk\}\}'],
@@ -49,12 +54,11 @@
             }
 
 def main():
-    global templateToRemove
-    global categoryToCheck
-    global comment
-    always = False
-    generator = False
-    genFactory = pagegenerators.GeneratorFactory()
+    # Loading the comments
+    global templateToRemove; global categoryToCheck; global comment
+    # always, define a generator to understand if the user sets one, defining what's genFactory
+    always = False; generator = False; genFactory = pagegenerators.GeneratorFactory()
+    # To prevent Infinite loops
     errorCount = 0
     # Loading the default options.
     for arg in wikipedia.handleArgs():
@@ -73,16 +77,19 @@
     TTR = wikipedia.translate(site, templateToRemove)
     category = wikipedia.translate(site, categoryToCheck)
     commentUsed = wikipedia.translate(site, comment)
-    # Define the category
     if not generator:
+        # Define the category if no other generator has been setted
         for CAT in category:
             cat = catlib.Category(site, CAT)
             # Define the generator
             generator = pagegenerators.CategorizedPageGenerator(cat)
+    # Main Loop
     for page in generator:
         pagename = page.title()
         wikipedia.output('Loading %s...' % pagename)
         try:
+            # The same as .get() but it loads also the editRestriction var, that's what we
+            # need to understand if the page is protected or not.
             (text, useless, editRestriction) = page._getEditPage()
         except wikipedia.NoPage:
             wikipedia.output("%s doesn't exist! Skipping..." % pagename)
@@ -98,9 +105,11 @@
             wikipedia.output(u'The page is editable for all, deleting the template...')
             # Only to see if the text is the same or not...
             oldtext = text
+            # Deleting the template because the page doesn't need it.
             for replaceToPerform in TTR:
                 text = re.sub(replaceToPerform, '', text)
             if oldtext != text:
+                # Ok, asking if the change has to be performed and do it.
                 wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
                 wikipedia.showDiff(oldtext, text)
                 choice = ''
@@ -118,12 +127,15 @@
                             wikipedia.output(u'Edit conflict! skip!')
                             break
                         except wikipedia.ServerError:
+                            # Sometimes there is this error that's quite annoying because
+                            # can block the whole process for nothing. 
                             errorCount += 1
                             if errorCount < 5:
                                 wikipedia.output(u'Server Error! Wait..')
                                 time.sleep(3)
                                 continue
                             else:
+                                # Prevent Infinite Loops
                                 raise wikipedia.ServerError(u'Fifth Server Error!')
                         except wikipedia.SpamfilterError, e:
                             wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url))
@@ -135,7 +147,7 @@
                             wikipedia.output(u'The page is still protected. Skipping...')
                             break
                         else:
-                            # Break only if the errors are one after the other...
+                            # Break only if the errors are one after the other
                             errorCount = 0
                             break
 if __name__ == "__main__":





More information about the Pywikipedia-l mailing list