[Pywikipedia-l] SVN: [5086] trunk/pywikipedia/redirect.py - pywikibot

27 Feb 2008

Revision: 5086
Author:   russblau
Date:     2008-02-27 15:22:51 +0000 (Wed, 27 Feb 2008)
Log Message:
-----------
style edits only; break long lines and cleanup whitespace
Modified Paths:
--------------
    trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py
===================================================================

--- trunk/pywikipedia/redirect.py	2008-02-27 13:46:49 UTC (rev 5085)
+++ trunk/pywikipedia/redirect.py	2008-02-27 15:22:51 UTC (rev 5086)
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 """
-Script to resolve double redirects, and to delete broken redirects.
-Requires access to MediaWiki's maintenance pages or to a XML dump file. Delete function requires
-adminship.
+Script to resolve double redirects, and to delete broken redirects. Requires
+access to MediaWiki's maintenance pages or to a XML dump file. Delete
+function requires adminship.
Syntax:
@@ -106,9 +106,9 @@
def get_redirects_from_dump(self, alsoGetPageTitles = False):
         '''
-        Loads a local XML dump file, looks at all pages which have the redirect flag
-        set, and finds out where they're pointing at.
-        Returns a dictionary where the redirect names are the keys and the redirect
+        Load a local XML dump file, look at all pages which have the
+        redirect flag set, and find out where they're pointing at. Return
+        a dictionary where the redirect names are the keys and the redirect
         targets are the values.
         '''
         xmlFilename = self.xmlFilename
@@ -126,7 +126,8 @@
             if readPagesCount % 10000 == 0:
                 wikipedia.output(u'%i pages read...' % readPagesCount)
             if len(self.namespaces) > 0:
-                if wikipedia.Page(site, entry.title).namespace() not in self.namespaces:
+                if wikipedia.Page(site, entry.title).namespace() \
+                        not in self.namespaces:
                     continue
             if alsoGetPageTitles:
                 pageTitles.add(entry.title.replace(' ', '_'))
@@ -136,8 +137,11 @@
                 target = m.group(1)
                 # There might be redirects to another wiki. Ignore these.
                 for code in site.family.langs.keys():
-                    if target.startswith('%s:' % code) or target.startswith(':%s:' % code):
-                        wikipedia.output(u'NOTE: Ignoring %s which is a redirect to %s:' % (entry.title, code))
+                    if target.startswith('%s:' % code) \
+                            or target.startswith(':%s:' % code):
+                        wikipedia.output(
+                            u'NOTE: Ignoring %s which is a redirect to %s:'
+                            % (entry.title, code))
                         target = None
                         break
                 # if the redirect does not link to another wiki
@@ -153,7 +157,9 @@
                     if '#' in target:
                         target = target[:target.index('#')]
                     if '|' in target:
-                        wikipedia.output(u'HINT: %s is a redirect with a pipelink.' % entry.title)
+                        wikipedia.output(
+                            u'HINT: %s is a redirect with a pipelink.'
+                            % entry.title)
                         target = target[:target.index('|')]
                     dict[source] = target
         if alsoGetPageTitles:
@@ -170,17 +176,21 @@
             wikipedia.output(u'Retrieving special page...')
             maintenance_txt = mysite.getUrl(path)
-            # regular expression which finds redirects which point to a non-existing page inside the HTML
+            # regular expression which finds redirects which point to a
+            # non-existing page inside the HTML
             Rredir = re.compile('<li><a href=".+?" title="(.*?)"')
redir_names = Rredir.findall(maintenance_txt)
-            wikipedia.output(u'Retrieved %d redirects from special page.\n' % len(redir_names))
+            wikipedia.output(u'Retrieved %d redirects from special page.\n'
+                             % len(redir_names))
             for redir_name in redir_names:
                 yield redir_name
         else:
             # retrieve information from XML dump
-            wikipedia.output(u'Getting a list of all redirects and of all page titles...')
-            redirs, pageTitles = self.get_redirects_from_dump(alsoGetPageTitles = True)
+            wikipedia.output(
+                u'Getting a list of all redirects and of all page titles...')
+            redirs, pageTitles = self.get_redirects_from_dump(
+                                            alsoGetPageTitles=True)
             for (key, value) in redirs.iteritems():
                 if value not in pageTitles:
                     yield key
@@ -195,10 +205,12 @@
             wikipedia.output(u'Retrieving special page...')
             maintenance_txt = mysite.getUrl(path)
-            # regular expression which finds redirects which point to another redirect inside the HTML
+            # regular expression which finds redirects which point to
+            # another redirect inside the HTML
             Rredir = re.compile('<li><a href=".+?" title="(.*?)">')
             redir_names = Rredir.findall(maintenance_txt)
-            wikipedia.output(u'Retrieved %i redirects from special page.\n' % len(redir_names))
+            wikipedia.output(u'Retrieved %i redirects from special page.\n'
+                             % len(redir_names))
             for redir_name in redir_names:
                 yield redir_name
         else:
@@ -210,7 +222,8 @@
                 # redirect as well
                 if num > self.offset and dict.has_key(value):
                     yield key
-                    wikipedia.output(u'\nChecking redirect %i of %i...' % (num + 1, len(dict)))
+                    wikipedia.output(u'\nChecking redirect %i of %i...'
+                                     % (num + 1, len(dict)))
class RedirectRobot:
     def __init__(self, action, generator, always = False):
@@ -220,7 +233,8 @@
def prompt(self, question):
         if not self.always:
-            choice = wikipedia.inputChoice(question, ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
+            choice = wikipedia.inputChoice(question, ['Yes', 'No', 'All'],
+                                           ['y', 'N', 'a'], 'N')
             if choice == 'n':
                 return False
             elif choice == 'a':
@@ -235,7 +249,8 @@
             redir_page = wikipedia.Page(wikipedia.getSite(), redir_name)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
-            wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % redir_page.title())
+            wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+                             % redir_page.title())
             try:
                 targetPage = redir_page.getRedirectTarget()
             except wikipedia.IsNotRedirectPage:
@@ -246,12 +261,15 @@
                 try:
                     targetPage.get()
                 except wikipedia.NoPage:
-                    if self.prompt(u'Do you want to delete %s?' % redir_page.aslink()):
+                    if self.prompt(u'Do you want to delete %s?'
+                                   % redir_page.aslink()):
                         redir_page.delete(reason, prompt = False)
                 except wikipedia.IsRedirectPage:
-                    wikipedia.output(u'Redirect target is also a redirect! Won't delete anything.')
+                    wikipedia.output(
+            u'Redirect target is also a redirect! Won't delete anything.')
                 else:
-                    wikipedia.output(u'Redirect target does exist! Won't delete anything.')
+                    wikipedia.output(
+            u'Redirect target does not exist! Won't delete anything.')
                 # idle for 1 minute
             wikipedia.output(u'')
             wikipedia.put_throttle()
@@ -262,27 +280,29 @@
             redir = wikipedia.Page(mysite, redir_name)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
-            wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % redir.title())
-            newRedir   = redir
+            wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+                             % redir.title())
+            newRedir = redir
             redirList = []  # bookkeeping to detect loops
             while True:
-                redirList.append(u'%s:%s' % (newRedir  .site().lang, newRedir  .sectionFreeTitle()))
+                redirList.append(u'%s:%s' % (newRedir.site().lang,
+                                             newRedir.sectionFreeTitle()))
                 try:
                     targetPage = newRedir.getRedirectTarget()
                 except wikipedia.IsNotRedirectPage:
                     if len(redirList) == 1:
-                        wikipedia.output(u'Skipping: Page %s is not a redirect.' % redir.aslink())
+                        wikipedia.output(u'Skipping: Page %s is not a redirect.'
+                                         % redir.aslink())
                         break  #do nothing
                     elif len(redirList) == 2:
                         wikipedia.output(
-                            u'Skipping: Redirect target %s is not a redirect.' % redir.aslink())
+                            u'Skipping: Redirect target %s is not a redirect.'
+                            % redir.aslink())
                         break  # do nothing
-                except wikipedia.NoPage:
-                    wikipedia.output(u'Warning: %s doesn't exist.' % newRedir  .aslink())
                 except wikipedia.SectionError:
                     wikipedia.output(
                         u'Warning: Redirect target section %s doesn't exist.'
-                          % newRedir  .aslink())
+                          % newRedir.aslink())
                 except wikipedia.BadTitle, e:
                     # str(e) is in the format 'BadTitle: [[Foo]]'
                     wikipedia.output(
@@ -291,22 +311,24 @@
                 except wikipedia.NoPage:
                     wikipedia.output(
                         u'Warning: Redirect target %s doesn't exist.'
-                          % newRedir  .aslink())
+                          % newRedir.aslink())
                 else:
                     wikipedia.output(
                         u'   Links to: %s.'
                           % targetPage.aslink())
                     if targetPage.site() != mysite:
                         wikipedia.output(
-                            u'Warning: redirect target (%s) is on a different site.'
-                          % (targetPage.aslink()))
+                        u'Warning: redirect target (%s) is on a different site.'
+                             % (targetPage.aslink()))
                         if self.always:
                             break  # skip if automatic 
                     # watch out for redirect loops
-                    if redirList.count((u'%s:%s' 
-                           % (targetPage.site().lang, targetPage.sectionFreeTitle()))) > 0:
+                    if redirList.count(u'%s:%s' 
+                                       % (targetPage.site().lang,
+                                          targetPage.sectionFreeTitle())
+                                      ) > 0:
                         wikipedia.output(
-                            u'Warning: Redirect target %s forms a redirect loop.'
+                           u'Warning: Redirect target %s forms a redirect loop.'
                               % targetPage.aslink())
                         break  #TODO: deal with loop
                     else:
@@ -346,23 +368,26 @@
     def run(self):
         if self.action == 'double':
             # get summary text
-            wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg_double))
+            wikipedia.setAction(
+                wikipedia.translate(wikipedia.getSite(), msg_double))
             self.fix_double_redirects()
         elif self.action == 'broken':
             self.delete_broken_redirects()
def main():
     # read command line parameters
-    # what the bot should do (either resolve double redirs, or delete broken redirs)
+    # what the bot should do (either resolve double redirs, or delete broken
+    # redirs)
     action = None
-    # where the bot should get his infos from (either None to load the maintenance
-    # special page from the live wiki, or the filename of a local XML dump file)
+    # where the bot should get his infos from (either None to load the
+    # maintenance special page from the live wiki, or the filename of a
+    # local XML dump file)
     xmlFilename = None
     # Which namespace should be processed when using a XML dump
     # default to -1 which means all namespaces will be processed
     namespaces = []
-    # at which redirect shall we start searching double redirects again (only with dump)
-    # default to -1 which means all redirects are checked
+    # at which redirect shall we start searching double redirects again
+    # (only with dump); default to -1 which means all redirects are checked
     offset = -1
     always = False
     for arg in wikipedia.handleArgs():
@@ -372,7 +397,8 @@
             action = 'broken'
         elif arg.startswith('-xml'):
             if len(arg) == 4:
-                xmlFilename = wikipedia.input(u'Please enter the XML dump's filename: ')
+                xmlFilename = wikipedia.input(
+                                u'Please enter the XML dump's filename: ')
             else:
                 xmlFilename = arg[5:]
         elif arg.startswith('-namespace:'):