[Pywikipedia-l] SVN: [4400] trunk/pywikipedia - pywikibot

3 Oct 2007

Revision: 4400
Author:   russblau
Date:     2007-10-02 21:57:44 +0000 (Tue, 02 Oct 2007)

Log Message:
-----------
Major docstring cleanup (in progress); see PEP 8 and PEP 257. In the process, some minor
code changes, including (1) removed unused NoSuchEntity exception; (2) moved
ignore_bot_templates to config.py; (3) changed defaults for several methods to
throttle=True; (4) removed redundant getFileLinks method (ImagePage.usingPages does the
same thing better); (5) fixed a remaining regex bug in replaceCategoryInPlace(); (5) split
Page.previousVersion() method into separate .previousRevision() and .getOldVersion()
methods. Made conforming changes in other files.

Modified Paths:
--------------
    trunk/pywikipedia/config.py
    trunk/pywikipedia/nowcommons.py
    trunk/pywikipedia/pagegenerators.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/config.py
===================================================================

--- trunk/pywikipedia/config.py	2007-10-02 12:46:59 UTC (rev 4399)
+++ trunk/pywikipedia/config.py	2007-10-02 21:57:44 UTC (rev 4400)
@@ -78,6 +78,9 @@
         sysopnames[familyName] = {}
         disambiguation_comment[familyName] = {}
 
+# Set to True to override the {{bots}} exclusion protocol (at your own risk!)
+ignore_bot_templates = False
+
 ############## USER INTERFACE SETTINGS ##############
 
 # The encoding that's used in the user's console, i.e. how strings are encoded

Modified: trunk/pywikipedia/nowcommons.py
===================================================================
--- trunk/pywikipedia/nowcommons.py	2007-10-02 12:46:59 UTC (rev 4399)
+++ trunk/pywikipedia/nowcommons.py	2007-10-02 21:57:44 UTC (rev 4400)
@@ -134,17 +134,23 @@
                 if not filenameOnCommons:
                     wikipedia.output(u'NowCommons template not found.')
                     continue
-                commonsImagePage = wikipedia.ImagePage(commons, 'Image:%s' %
filenameOnCommons)
+                commonsImagePage = wikipedia.ImagePage(commons,
+                                               'Image:%s' % filenameOnCommons)
                 if len(localImagePage.getFileVersionHistory()) > 1:
-                    wikipedia.output(u'This image has a version history. Please
manually delete it after making sure that the old versions aren\'t worth
keeping.')
+                    wikipedia.output(u"""\
+This image has a version history. Please delete it manually after making sure
+that the old versions aren't worth keeping.""")
                     continue
                 if localImagePage.titleWithoutNamespace() !=
commonsImagePage.titleWithoutNamespace():
-                    usingPages = localImagePage.usingPages()
+                    usingPages = list(localImagePage.usingPages())
                     if usingPages and usingPages != [localImagePage]:
-                        wikipedia.output('%s is still used in %i pages. Please change
them manually.' % (localImagePage.title(), len(localImagePage.usingPages())))
+                        wikipedia.output(
+            '%s is still used in %i pages. Please change them manually.'
+                                % (localImagePage.title(), len(usingPages)))
                         continue
                     else:
-                        wikipedia.output('No page is using %s anymore.' %
localImagePage.title())
+                        wikipedia.output('No page is using %s anymore.'
+                                         % localImagePage.title())
                 commonsText = commonsImagePage.get()
                 if md5 == commonsImagePage.getFileMd5Sum():
                     wikipedia.output(u'The image is identical to the one on
Commons.')

Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py	2007-10-02 12:46:59 UTC (rev 4399)
+++ trunk/pywikipedia/pagegenerators.py	2007-10-02 21:57:44 UTC (rev 4400)
@@ -127,8 +127,8 @@
     for page in site.newpages(number=number, get_redirect=get_redirect, repeat=repeat):
         yield page[0]
 
-def FileLinksGenerator(referredPage):
-    for page in referredPage.getFileLinks():
+def FileLinksGenerator(referredImagePage):
+    for page in referredImagePage.usingPages():
         yield page
 
 def ImagesPageGenerator(pageWithImages):

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2007-10-02 12:46:59 UTC (rev 4399)
+++ trunk/pywikipedia/wikipedia.py	2007-10-02 21:57:44 UTC (rev 4400)
@@ -6,72 +6,30 @@
 late August 2004)
 
 Classes:
-Page: A MediaWiki page
-    __init__              : Page(Site, Title) - the page with title Title on wikimedia
site Site
-    title                 : The name of the page, in a form suitable for an interwiki
link
-    urlname               : The name of the page, in a form suitable for a URL
-    titleWithoutNamespace : The name of the page, with the namespace part removed
-    section               : The section of the page (the part of the name after
'#')
-    sectionFreeTitle      : The name without the section part
-    aslink                : The name of the page in the form [[Title]] or [[lang:Title]]
-    site                  : The wiki this page is in
-    encoding              : The encoding of the page
-    isAutoTitle           : If the title is a well known, auto-translatable title
-    autoFormat            : Returns (dictName, value), where value can be a year, date,
etc.,
-                            and dictName is 'YearBC', 'December', etc.
-    isCategory            : True if the page is a category, false otherwise
-    isImage               : True if the page is an image, false otherwise
+    Page(site, title):      A page on a MediaWiki site
+    ImagePage(site, title): An image descriptor Page
+    Site(lang, fam):        A MediaWiki site
+    Throttle:               Limits reading and writing rates
 
-    get (*)               : The text of the page
-    exists (*)            : True if the page actually exists, false otherwise
-    isRedirectPage (*)    : True if the page is a redirect, false otherwise
-    isEmpty (*)           : True if the page has 4 characters or less content, not
-                            counting interwiki and category links
-    botMayEdit (*)        : True if bot is allowed to edit page
-    interwiki (*)         : The interwiki links from the page (list of Pages)
-    categories (*)        : The categories the page is in (list of Pages)
-    linkedPages (*)       : The normal pages linked from the page (list of Pages)
-    imagelinks (*)        : The pictures on the page (list of ImagePages)
-    templates (*)         : All templates referenced on the page (list of strings)
-    getRedirectTarget (*) : The page the page redirects to
-    isDisambig (*)        : True if the page is a disambiguation page
-    getReferences         : List of pages linking to the page
-    namespace             : The namespace in which the page is
-    permalink (*)         : The url of the permalink of the current version
-    move                  : Move the page to another title
-    put(newtext)          : Saves the page
-    put_async(newtext)    : Queues the page to be saved asynchronously
-    delete                : Deletes the page (requires being logged in)
-
-    (*) : This loads the page if it has not been loaded before; permalink might
-          even reload it if it has been loaded before
-
-Site: a MediaWiki site
-    messages              : There are new messages on the site
-    forceLogin()          : Does not continue until the user has logged in to
-                            the site
-    getUrl()              : Retrieve an URL from the site
-    mediawiki_message(key): Retrieve the text of the MediaWiki message with
-                            the key "key"
-    has_mediawiki_message(key) : True if this site defines a MediaWiki message
-                                 with the key "key"
-    Special pages:
-        Dynamic pages:
-            allpages(): Special:Allpages
-            newpages(): Special:Newpages
-            longpages(): Special:Longpages
-            shortpages(): Special:Shortpages
-            categories(): Special:Categories
-
-        Cached pages:
-            deadendpages(): Special:Deadendpages
-            ancientpages(): Special:Ancientpages
-            lonelypages(): Special:Lonelypages
-            uncategorizedcategories(): Special:Uncategorizedcategories
-            uncategorizedpages(): Special:Uncategorizedpages
-            uncategorizedimages(): Special:Uncategorizedimages
-            unusedcategories(): Special:Unusuedcategories
-
+Exceptions:
+    Error:              Base class for all exceptions in this module
+    NoUsername:         Username is not in user-config.py
+    NoPage:             Page does not exist on the wiki
+    IsRedirectPage:     Page is a redirect page
+    IsNotRedirectPage:  Page is not a redirect page
+    LockedPage:         Page is locked
+    LockedNoPage:       Page does not exist, and creating it is not
+                        possible because of a lock (subclass of NoPage and
+                        LockedPage)
+    SectionError:       The section specified in the Page title does not exist
+    PageNotSaved:       Saving the page has failed
+      EditConflict:     PageNotSaved due to edit conflict while uploading
+      SpamfilterError:  PageNotSaved due to MediaWiki spam filter
+    ServerError:        Got unexpected response from wiki server
+    BadTitle:           Server responded with BadTitle.
+    UserBlocked:        Client's username or IP has been blocked
+    PageNotFound:       Page not found in list
+   
 Other functions:
 getall(): Load pages via Special:Export
 setAction(text): Use 'text' instead of "Wikipedia python library" in
@@ -140,7 +98,6 @@
 except NameError:
     from sets import Set as set
 
-
 # Check Unicode support (is this a wide or narrow python build?)
 # See http://www.python.org/doc/peps/pep-0261/
 try:
@@ -149,16 +106,6 @@
 except ValueError:
     WIDEBUILD = False
 
-
-# Local settings
-
-# If ignore_bot_templates is True, the bot will always ignore {{bots}}
-# and {{nobots}} templates - botMayEdit() will always return True.
-# In the default (False) state, it will honor these directives and
-# refuse to save pages that forbid it from editing.
-ignore_bot_templates = False
-
-
 # Local exceptions
 
 class Error(Exception):
@@ -182,9 +129,6 @@
 class LockedNoPage(NoPage, LockedPage):
     """Page does not exist, and creating it is not possible because of a
lock."""
 
-class NoSuchEntity(ValueError):
-    """No entity exist for this character"""
-
 class SectionError(Error):
     """The section specified by # does not exist"""
 
@@ -206,13 +150,13 @@
 class BadTitle(Error):
     """Server responded with BadTitle."""
 
-# UserBlocked exceptions should in general not be catched. If the bot has been
-# blocked, the bot operator has possibly done a mistake and should take care of
-# the issue before continuing.
+# UserBlocked exceptions should in general not be caught. If the bot has
+# been blocked, the bot operator should address the reason for the block
+# before continuing.
 class UserBlocked(Error):
     """Your username or IP has been blocked"""
 
-class PageNotFound(Exception):
+class PageNotFound(Error):
     """Page not found in list"""
 
 SaxError = xml.sax._exceptions.SAXParseException
@@ -220,19 +164,89 @@
 # Pre-compile re expressions
 reNamespace = re.compile("^(.+?) *: *(.*)$")
 
-# The most important thing in this whole module: The Page class
+
 class Page(object):
-    """A page on the wiki."""
-    def __init__(self, site, title, insite = None, defaultNamespace = 0):
-        """
-        Constructor. Normally called with two arguments:
-        Parameters:
-         1) The wikimedia site on which the page resides
-         2) The title of the page as a unicode string
+    """Page: A MediaWiki page
 
-        The argument insite can be specified to help decode
-        the name; it is the wikimedia site where this link was found.
-        """
+    Constructor has two required parameters:
+      1) The wikimedia Site on which the page resides
+      2) The title of the page as a unicode string
+
+    Optional parameters:
+      insite - the wikimedia Site where this link was found (to help decode
+               interwiki links)
+      defaultNamespace - A namespace to use if the link does not contain one
+
+    Methods available:
+    
+    title                 : The name of the page, including namespace and
+                            section if any
+    urlname               : Title, in a form suitable for a URL
+    namespace             : The namespace in which the page is found
+    titleWithoutNamespace : Title, with the namespace part removed
+    section               : The section of the page (the part of the title
+                            after '#', if any)
+    sectionFreeTitle      : Title, without the section part
+    aslink                : Title in the form [[Title]] or [[lang:Title]]
+    site                  : The wiki this page is in
+    encoding              : The encoding of the page
+    isAutoTitle           : Title can be translated using the autoFormat method
+    autoFormat            : Auto-format certain dates and other standard
+                            format page titles
+    isCategory            : True if the page is a category
+    isDisambig (*)        : True if the page is a disambiguation page
+    isImage               : True if the page is an image
+    isRedirectPage (*)    : True if the page is a redirect, false otherwise
+    getRedirectTarget (*) : The page the page redirects to
+    isTalkPage            : True if the page is in any "talk" namespace
+    toggleTalkPage        : Return the talk page (if this is one, return the
+                            non-talk page)
+    get (*)               : The text of the page
+    latestRevision (*)    : The page's current revision id
+    userName              : Last user to edit page
+    isIpEdit              : True if last editor was unregistered
+    editTime              : Timestamp of the last revision to the page
+    previousRevision (*)  : The revision id of the previous version
+    permalink (*)         : The url of the permalink of the current version
+    getOldVersion(id) (*) : The text of a previous version of the page
+    getVersionHistory     : Load the version history information from wiki
+    getVersionHistoryTable: Create a wiki table from the history data
+    fullVersionHistory    : Return all past versions including wikitext
+    contributingUsers     : Return set of users who have edited page
+    exists (*)            : True if the page actually exists, false otherwise
+    isEmpty (*)           : True if the page has 4 characters or less content,
+                            not counting interwiki and category links
+    interwiki (*)         : The interwiki links from the page (list of Pages)
+    categories (*)        : The categories the page is in (list of Pages)
+    linkedPages (*)       : The normal pages linked from the page (list of
+                            Pages)
+    imagelinks (*)        : The pictures on the page (list of ImagePages)
+    templates (*)         : All templates referenced on the page (list of
+                            strings)
+    templatesWithParams(*): All templates on the page, with list of parameters
+    templatePages (*)     : Page objects for all templates used on this page
+    isDisambig (*)        : True if the page is a disambiguation page
+    getReferences         : List of pages linking to the page
+    canBeEdited (*)       : True if page is unprotected or user has edit
+                            privileges
+    botMayEdit (*)        : True if bot is allowed to edit page
+    put(newtext)          : Saves the page
+    put_async(newtext)    : Queues the page to be saved asynchronously
+    move                  : Move the page to another title
+    delete                : Deletes the page (requires being logged in)
+    protect               : Protect or unprotect a page (requires sysop status)
+    removeImage           : Remove all instances of an image from this page
+    replaceImage          : Replace all instances of an image with another
+    loadDeletedRevisions  : Load all deleted versions of this page
+    getDeletedRevision    : Return a particular deleted revision
+    markDeletedRevision   : Mark a version to be undeleted, or not
+    undelete              : Undelete past version(s) of the page
+
+    (*) : This loads the page if it has not been loaded before; permalink might
+          even reload it if it has been loaded before
+
+    """
+    def __init__(self, site, title, insite=None, defaultNamespace=0):
         try:
             # if _editrestriction is True, it means that the page has been found
             # to have an edit restriction, but we do not know yet whether the
@@ -362,134 +376,136 @@
             raise
 
     def site(self):
-        """The site of the page this Page refers to,
-           without :"""
+        """Return the Site object for the wiki on which this Page
resides."""
         return self._site
 
     def encoding(self):
-        """
-        Returns the character encoding used on this page's wiki.
-        """
+        """Return the character encoding used on this Page's wiki
Site."""
         return self._site.encoding()
 
-    def urlname(self):
-        """The name of the page this Page refers to, in a form suitable
-           for the URL of the page."""
-        title = self.title(underscore = True)
-        encodedTitle = title.encode(self.site().encoding())
-        return urllib.quote(encodedTitle)
+    def title(self, underscore = False, savetitle = False):
+        """Return the title of this Page, as a Unicode string.
 
-    def title(self, underscore = False, savetitle = False):
-        """The name of this Page, as a Unicode string"""
+        If underscore is True, replace all ' ' characters with '_'.
+        If savetitle is True, try to quote all non-ASCII characters.
+        """
         title = self._title
         if savetitle: # Ensure there's no wiki syntax in the title
             if title.find("''") > -1:
                 try:
                     title = urllib.quote(title).replace('%20',' ')
                 except KeyError:
-                    # We can't encode everything; to be on the safe side, we encode
nothing
+                    # We can't encode everything; to be on the safe side,
+                    # we encode nothing
                     pass
         if underscore:
             title = title.replace(' ', '_')
         return title
 
-    def titleWithoutNamespace(self, underscore = False):
-        """
-        Returns the name of the page without the namespace and without section.
-        """
+    def titleWithoutNamespace(self, underscore=False):
+        """Return title of Page without namespace and without
section."""
         if self.namespace() == 0:
-            return self.title(underscore = underscore)
+            return self.sectionFreeTitle(underscore=underscore)
         else:
-            return self.sectionFreeTitle(underscore = underscore).split(':',
1)[1]
+            return self.sectionFreeTitle(underscore=underscore).split(':', 1)[1]
 
     def section(self, underscore = False):
-        """The name of the section this Page refers to. Sections are
-           denominated by a # in the title(). If no section is referenced,
-           None is returned."""
+        """Return the name of the section this Page refers to.
+
+        The section is the part of the title following a '#' character, if any.
+        If no section is present, return None.
+        """
         return self._section
-        # ln = self.title(underscore = underscore)
-        # ln = re.sub('&#', '&hash;', ln)
-        # if not '#' in ln:
-            # return None
-        # else:
-            # hn = ln[ln.find('#') + 1:]
-            # hn = re.sub('&hash;', '&#', hn)
-            # return hn
 
-    def sectionFreeTitle(self, underscore = False):
-        sectionName = self.section(underscore = underscore)
-        title = self.title(underscore = underscore)
+    def sectionFreeTitle(self, underscore=False):
+        """Return the title of this Page, without the section (if
any)."""
+        sectionName = self.section(underscore=underscore)
+        title = self.title(underscore=underscore)
         if sectionName:
             return title[:-len(sectionName)-1]
         else:
             return title
 
+    def urlname(self):
+        """Return the Page title encoded for use in an
URL."""
+        title = self.title(underscore = True)
+        encodedTitle = title.encode(self.site().encoding())
+        return urllib.quote(encodedTitle)
+
     def __str__(self):
-        """A console representation of the pagelink"""
+        """Return a console representation of the
pagelink."""
         return self.aslink().encode(config.console_encoding, 'replace')
 
     def __repr__(self):
-        """A more complete string representation"""
+        """Return a more complete string
representation."""
         return "%s{%s}" % (self.__class__.__name__, str(self))
 
-    def aslink(self, forceInterwiki = False, textlink=False):
-        """
-        A string representation in the form of a link. The link will
-        be an interwiki link if needed.
+    def aslink(self, forceInterwiki=False, textlink=False):
+        """Return a string representation in the form of a wikilink.
 
-        If you set forceInterwiki to True, the link will have the format
-        of an interwiki link even if it points to the home wiki.
+        If forceInterwiki is True, return an interwiki link even if it
+        points to the home wiki. If False, return an interwiki link only if
+        needed.
 
-        If you set textlink to True, the link will always appear in text
-        form (that is, links to the Category: and Image: namespaces will
-        be preceded by a : character).
-
-        Note that the family is never included.
+        If textlink is True, always return a link in text form (that
+        is, links to the Category: and Image: namespaces will be preceded by
+        a : character). (Not needed if forceInterwiki is True.)
+        
         """
         if forceInterwiki or self.site() != getSite():
             if self.site().family != getSite().family:
-                return '[[%s:%s:%s]]' % (self.site().family.name,
self.site().lang, self.title(savetitle=True))
+                return u'[[%s:%s:%s]]' % (self.site().family.name,
self.site().lang, self.title(savetitle=True))
             else:
-                return '[[%s:%s]]' % (self.site().lang,
self.title(savetitle=True))
-        elif textlink and self.namespace() in (6, 14): # Image: or Category:
-                return '[[:%s]]' % self.title()
+                return u'[[%s:%s]]' % (self.site().lang,
self.title(savetitle=True))
+        elif textlink and (self.isImage() or self.isCategory()):
+                return u'[[:%s]]' % self.title()
         else:
-            return '[[%s]]' % self.title()
+            return u'[[%s]]' % self.title()
 
-    def isAutoTitle(self):
-        """If the title is a well known, auto-translatable title
+    def autoFormat(self):
+        """Return (dictName, value) if title is in date.autoFormat
dictionary.
+
+        Value can be a year, date, etc., and dictName is 'YearBC',
+        'Year_December', or another dictionary name. Please note that two
+        entries may have exactly the same autoFormat, but be in two
+        different namespaces, as some sites have categories with the
+        same names. Regular titles return (None, None).
+
         """
-        return self.autoFormat()[0] is not None
-
-    def autoFormat(self):
-        """Returns (dictName, value), where value can be a year, date,
etc.,
-           and dictName is 'YearBC', 'Year_December', or another
dictionary name.
-           Please note that two entries may have exactly the same autoFormat,
-           but be in two different namespaces, as some sites have categories with the
same names.
-           Regular titles return (None,None)."""
         if not hasattr(self, '_autoFormat'):
             import date
-            _autoFormat = date.getAutoFormat(self.site().language(),
self.titleWithoutNamespace())
+            _autoFormat = date.getAutoFormat(self.site().language(),
+                                             self.titleWithoutNamespace())
         return _autoFormat
 
+    def isAutoTitle(self):
+        """Return True if title of this Page is in the autoFormat
dictionary."""
+        return self.autoFormat()[0] is not None
 
-    def get(self, force = False, get_redirect=False, throttle = True, sysop = False,
nofollow_redirects=False, change_edit_time = True):
-        """The wiki-text of the page. This will retrieve the page if it
has not
-           been retrieved yet. This can raise the following exceptions that
-           should be caught by the calling code:
+    def get(self, force=False, get_redirect=False, throttle=True,
+            sysop=False, nofollow_redirects=False, change_edit_time=True):
+        """Return the wiki-text of the page.
 
+        This will retrieve the page from the server if it has not been
+        retrieved yet, or if force is True. This can raise the following
+        exceptions that should be caught by the calling code:
+
             NoPage: The page does not exist
-
             IsRedirectPage: The page is a redirect. The argument of the
                             exception is the title of the page it redirects to.
-
             SectionError: The subject does not exist on a page with a # link
 
-            Set get_redirect to True to follow redirects rather than raise an exception.
-            Set force to True to force a reload of all page attributes, including
errors.
-            Set nofollow_redirects to True to not follow redirects but obey all other
exceptions.
-            Set change_version_date to False if you have already loaded the page before
and
-                do not check this version for changes before saving
+        If get_redirect is True, return the redirect text and save the
+        target of the redirect, do not raise an exception.
+        If force is True, reload all page attributes, including
+        errors.
+        If nofollow_redirects is True, ignore redirects entirely (do not
+        raise an exception for redirects but do not mark the page as a
+        redirect or save the redirect target page).
+        If change_edit_time is False, do not check this version for changes
+        before saving. This should be used only if the page has been loaded
+        previously.
+        
         """
         # NOTE: The following few NoPage exceptions could already be thrown at
         # the Page() constructor. They are raised here instead for convenience,
@@ -497,7 +513,7 @@
         # get(), but not for such raised by the constructor.
         # \ufffd represents a badly encoded character, the other characters are
         # disallowed by MediaWiki.
-        for illegalChar in ['#', '<', '>', '[',
']', '|', '{', '}', '\n', u'\ufffd']:
+        for illegalChar in u'#<>[]|{}\n\ufffd':
             if illegalChar in self.sectionFreeTitle():
                 if verbose:
                     output(u'Illegal character in %s!' % self.aslink())
@@ -505,11 +521,12 @@
         if self.namespace() == -1:
             raise NoPage('%s is in the Special namespace!' % self.aslink())
         if self.site().isInterwikiLink(self.title()):
-            raise NoPage('%s is not a local page on %s!' % (self.aslink(),
self.site()))
+            raise NoPage('%s is not a local page on %s!'
+                         % (self.aslink(), self.site()))
         if force:
             # When forcing, we retry the page no matter what. Old exceptions
             # and contents do not apply any more.
-            for attr in
['_redirarg','_getexception','_contents']:
+            for attr in ['_redirarg', '_getexception',
'_contents']:
                 if hasattr(self, attr):
                     delattr(self,attr)
         else:
@@ -526,7 +543,7 @@
         # Make sure we did try to get the contents once
         if not hasattr(self, '_contents'):
             try:
-                self._contents, self._isWatched, self.editRestriction =
self.getEditPage(get_redirect = get_redirect, throttle = throttle, sysop = sysop,
nofollow_redirects=nofollow_redirects)
+                self._contents, self._isWatched, self.editRestriction =
self._getEditPage(get_redirect = get_redirect, throttle = throttle, sysop = sysop,
nofollow_redirects=nofollow_redirects)
                 hn = self.section()
                 if hn:
                     m = re.search("=+ *%s *=+" % hn, self._contents)
@@ -548,15 +565,19 @@
                 raise
         return self._contents
 
-    def getEditPage(self, get_redirect=False, throttle = True, sysop = False, oldid =
None, nofollow_redirects = False, change_edit_time = True):
-        """
-        Get the contents of the Page via the edit page.
+    def _getEditPage(self, get_redirect=False, throttle=True, sysop=False,
+                     oldid=None, nofollow_redirects=False,
+                     change_edit_time=True):
+        """Get the contents of the Page via the edit page.
+
         Do not use this directly, use get() instead.
 
         Arguments:
+            oldid - Retrieve an old revision (by id), not the current one
             get_redirect  - Get the contents, even if it is a redirect page
 
-        This routine returns a unicode string containing the wiki text.
+        This method returns a 3-tuple containing the raw wiki text as a
+        unicode string, the watchlist status, and any edit restrictions.
         """
         isWatched = False
         editRestriction = None
@@ -657,14 +678,15 @@
         if matchWatching:
             isWatched = True
         # Now process the contents of the textarea
-        m = self.site().redirectRegex().match(text[i1:i2])
         if self._editTime == "0":
             if verbose:
                 output(u"DBG> page may be locked?!")
             editRestriction = 'sysop'
+        m = self.site().redirectRegex().match(text[i1:i2])
         if m:
+            # page text matches the redirect pattern
             if self.section():
-                redirtarget = "%s#%s"%(m.group(1),self.section())
+                redirtarget = "%s#%s" % (m.group(1), self.section())
             else:
                 redirtarget = m.group(1)
             if get_redirect:
@@ -686,26 +708,45 @@
 
         return x, isWatched, editRestriction
 
+    def getOldVersion(self, oldid, force=False, get_redirect=False,
+                      throttle=True, sysop=False, nofollow_redirects=False,
+                      change_edit_time=True):
+        """Return text of an old revision of this page; same options as
get()."""
+        # TODO: should probably check for bad pagename, NoPage, and other
+        # exceptions that would prevent retrieving text, as get() does
+        return self._getEditPage(
+                        get_redirect=get_redirect, throttle=throttle,
+                        sysop=sysop, oldid=oldid,
+                        nofollow_redirects=nofollow_redirects,
+                        change_edit_time=change_edit_time
+                    )[0]
+
     def permalink(self):
-        """
-        Get the permalink page for this page
-        """
-        return "%s://%s%s&oldid=%i"%(self.site().protocol(),
self.site().hostname(), self.site().get_address(self.title()), self.latestRevision())
+        """Return the permalink URL for current revision of this
page."""
+        return "%s://%s%s&oldid=%i" % (self.site().protocol(),
+                                       self.site().hostname(),
+                                       self.site().get_address(self.title()),
+                                       self.latestRevision())
 
     def latestRevision(self):
-        """
-        Get the latest revision for this page
-        """
+        """Return the latest revision id for this page."""
         if not self._permalink:
-            # When we get the page with getall, the permalink is received automatically
+            # When we get the page with getall, the permalink is received
+            # automatically
             getall(self.site(),[self],force=True)
         return int(self._permalink)
 
+    def previousRevision(self):
+        """Return the revision id for the previous revision of this
Page."""
+        vh = self.getVersionHistory(revCount=2)
+        return vh[1][0]
+
     def exists(self):
-        """
-        True if the page exists, even if it's a redirect.
+        """Return True if page exists on the wiki, even if it's a
redirect.
 
-        If the title includes a section, False if this section isn't found.
+        If the title includes a section, return False if this section isn't
+        found.
+
         """
         try:
             self.get()
@@ -718,7 +759,7 @@
         return True
 
     def isRedirectPage(self):
-        """True if the page is a redirect page, False if not or not
existing"""
+        """Return True if this is a redirect, False if not or not
existing."""
         try:
             self.get()
         except NoPage:
@@ -730,11 +771,12 @@
         return False
 
     def isEmpty(self):
+        """Return True if the page text has less than 4 characters.
+
+        Character count ignores language links and category links.
+        Can raise the same exceptions as get().
+        
         """
-        True if the page has less than 4 characters, except for
-        language links and category links, False otherwise.
-        Can raise the same exceptions as get()
-        """
         txt = self.get()
         txt = removeLanguageLinks(txt)
         txt = removeCategoryLinks(txt, site = self.site())
@@ -744,21 +786,25 @@
             return False
 
     def isTalkPage(self):
+        """Return True if this page is in any talk
namespace."""
         ns = self.namespace()
         return ns >= 0 and ns % 2 == 1
 
     def botMayEdit(self):
-        """
-        True if page doesn't contain {{bots}} or {{nobots}} or
-        contains them and active bot is allowed or not allowed
-        to edit said page
+        """Return True if this page allows bots to edit it.
+        
+        This will be True if the page doesn't contain {{bots}} or
+        {{nobots}}, or it contains them and the active bot is allowed to
+        edit this page. (This method is only useful on those sites that
+        recognize the bot-exclusion protocol; on other sites, it will always
+        return True.)
 
-        The framework enforces this restriction by default. It is possible to
-        override this by setting wikipedia.ignore_bot_templates=True or using
-        page.put(force=True).
+        The framework enforces this restriction by default. It is possible
+        to override this by setting ignore_bot_templates=True in
+        user_config.py, or using page.put(force=True).
+        
         """
-        global ignore_bot_templates
-        if ignore_bot_templates: #Check the "master ignore switch"
+        if config.ignore_bot_templates: #Check the "master ignore switch"
             return True
 
         try:
@@ -798,41 +844,53 @@
         return True
 
     def userName(self):
+        """Return name or IP address of last user to edit page.
+
+        Returns None unless page was retrieved with getAll().
+
+        """
         return self._userName
 
     def isIpEdit(self):
+        """Return True if last editor was unregistered.
+
+        Returns None unless page was retrieved with getAll().
+        
+        """
         return self._ipedit
 
     def editTime(self):
+        """Return timestamp (in MediaWiki format) of last revision to
page.
+
+        Returns None if last edit time is unknown.
+
+        """
         return self._editTime
 
     def namespace(self):
-        """Gives the number of the namespace of the page. Does not work
for
-           all namespaces in all languages, only when defined in family.py.
-           If not defined, it will return 0 (the main namespace)"""
+        """Return the number of the namespace of the page.
+
+        Only recognizes those namespaces defined in family.py.
+        If not defined, it will return 0 (the main namespace).
+
+        """
         return self._namespace
-        # t=self.sectionFreeTitle()
-        # p=t.split(':')
-        # if p[1:]==[]:
-            # return 0
-        # for namespaceNumber in self.site().family.namespaces.iterkeys():
-            # if p[0]==self.site().namespace(namespaceNumber):
-                # return namespaceNumber
-        # return 0
 
     def isCategory(self):
-        """
-        True if the page is a Category, false otherwise.
-        """
+        """Return True if the page is a Category, False
otherwise."""
         return self.namespace() == 14
 
     def isImage(self):
-        """
-        True if the page is an image description page, false otherwise.
-        """
+        """Return True if this is an image description page, False
otherwise."""
         return self.namespace() == 6
 
     def isDisambig(self):
+        """Return True if this is a disambiguation page, False otherwise.
+
+        Relies on the presence of specific templates, identified in the Family
+        file, to identify disambiguation pages.
+
+        """
         if not hasattr(self, '_isDisambig'):
             locdis = self.site().family.disambig( self._site.lang )
 
@@ -853,10 +911,9 @@
     def getReferences(self,
             follow_redirects=True, withTemplateInclusion=True,
             onlyTemplateInclusion=False, redirectsOnly=False):
-        """
-        Yield all pages that link to the page. If you need a full list of
-        referring pages, use this:
+        """Yield all pages that link to the page.
 
+        If you need a full list of referring pages, use this:
             pages = [page for page in s.getReferences()]
 
         Parameters:
@@ -867,6 +924,7 @@
         * onlyTemplateInclusion - if True, only returns pages where self is
                                   used as a template.
         * redirectsOnly         - if True, only returns redirects to self.
+        
         """
         # Temporary bug-fix while researching more robust solution:
         if config.special_page_limit > 999:
@@ -913,12 +971,10 @@
     def _parse_reflist(self, reflist,
             follow_redirects=True, withTemplateInclusion=True,
             onlyTemplateInclusion=False, redirectsOnly=False):
-        """
-        For internal use only
+        """For internal use only
 
         Parse a "Special:Whatlinkshere" list of references and yield Page
-        objects that meet the criteria
-        (used by getReferences)
+        objects that meet the criteria (used by getReferences)
         """
         for link in reflist("li", recursive=False):
             title = link.a.string
@@ -953,92 +1009,23 @@
                                 onlyTemplateInclusion, redirectsOnly):
                         yield p
 
-
-    def getFileLinks(self):
-        """
-        Yield all pages that link to the page. If you need a full list of
-        referring pages, use this:
-
-            pages = [page for page in s.getReferences()]
-
-        """
-        site = self.site()
-        #path = site.references_address(self.urlname())
-        path = site.get_address(self.urlname())
-
-        delay = 1
-
-        # NOTE: this code relies on the way MediaWiki 1.6 formats the
-        #       "Whatlinkshere" special page; if future versions change the
-        #       format, they may break this code.
-        if self.site().versionnumber() >= 5:
-            startmarker = u"<!-- start content -->"
-            endmarker = u"<!-- end content -->"
-        else:
-            startmarker = u"<body "
-            endmarker = "printfooter"
-        listitempattern = re.compile(r"<li><a
href=.*>(?P<title>.*)</a></li>")
-        # to tell the previous and next link apart, we rely on the closing ) at the end
of the "previous" label.
-        more = True
-
-        while more:
-            more = False #Kill after one loop because MediaWiki will only display up to
the first 500 File links.
-            fileLinks = set()  # use a set to avoid duplications
-            output(u'Getting references to %s' % self.aslink())
-            while True:
-                txt = site.getUrl(path)
-                # trim irrelevant portions of page
-                try:
-                    start = txt.index(startmarker) + len(startmarker)
-                    end = txt.index(endmarker)
-                except ValueError:
-                    output(u"Invalid page received from server.... Retrying in %i
minutes." % delay)
-                    time.sleep(delay * 60.)
-                    delay *= 2
-                    if delay > 30:
-                        delay = 30
-                    continue
-                txt = txt[start:end]
-                break
-            try:
-                start = txt.index(u"<ul>")
-                end = txt.rindex(u"</ul>")
-            except ValueError:
-                # No incoming links found on page
-                continue
-            txt = txt[start:end+5]
-
-            txtlines = txt.split(u"\n")
-            for num, line in enumerate(txtlines):
-                if line == u"</ul>":
-                    # end of list of references to redirect page
-                    continue
-                if line == u"</li>":
-                    continue
-                lmatch = listitempattern.search(line)
-                if lmatch:
-                    fileLinks.add(lmatch.group("title"))
-                    if lmatch is None:
-                        output(u"DBG> Unparsed line:")
-                        output(u"(%i) %s" % (num, line))
-            fileLinks = list(fileLinks)
-            fileLinks.sort()
-            for fileLink in fileLinks:
-                # create Page objects
-                yield Page(site, fileLink)
-
     def put_async(self, newtext,
                   comment=None, watchArticle=None, minorEdit=True, force=False,
                   callback=None):
-        """Asynchronous version of put (takes the same arguments), which
-           places pages on a queue to be saved by a daemon thread.
-           All arguments are the same as for .put(), except --
-           callback: a callable object that will be called after the page put
-                     operation; this object must take two arguments:
-                     (1) a Page object, and (2) an exception instance, which
-                     will be None if the page was saved successfully.
-           The callback is intended to be used by bots that need to keep track
-           of which saves were successful.
+        """Put page on queue to be saved to wiki asynchronously.
+
+        Asynchronous version of put (takes the same arguments), which places
+        pages on a queue to be saved by a daemon thread. All arguments  are
+        the same as for .put(), except --
+
+        callback: a callable object that will be called after the page put
+                  operation; this object must take two arguments:
+                  (1) a Page object, and (2) an exception instance, which
+                  will be None if the page was saved successfully.
+   
+        The callback is intended to be used by bots that need to keep track
+        of which saves were successful.
+        
         """
         try:
             page_put_queue.mutex.acquire()
@@ -1053,11 +1040,16 @@
 
     def put(self, newtext, comment=None, watchArticle=None, minorEdit=True,
             force=False):
-        """Replace the new page with the contents of the first argument.
-           The second argument is a string that is to be used as the
-           summary for the modification
+        """Save the page with the contents of the first argument as the
text.
 
-           If watchArticle is None, leaves the watchlist status unchanged.
+        Optional parameters:
+          comment:  a unicode string that is to be used as the summary for
+                    the modification.
+          watchArticle: a bool, add or remove this Page to/from bot user's
+                        watchlist (if None, leave watchlist status unchanged)
+          minorEdit: mark this edit as minor if True
+          force: ignore botMayEdit() setting
+
         """
         # Fetch a page to get an edit token. If we already have
         # fetched a page, this will do nothing, because get() is cached.
@@ -1119,17 +1111,15 @@
         # of Bordeaux
         if self.site().lang == 'eo':
             newtext = doubleXForEsperanto(newtext)
-        return self.putPage(newtext, comment, watchArticle, minorEdit, newPage,
self.site().getToken(sysop = sysop), sysop = sysop)
+        return self._putPage(newtext, comment, watchArticle, minorEdit, newPage,
self.site().getToken(sysop = sysop), sysop = sysop)
 
-    def putPage(self, text, comment=None, watchArticle=False, minorEdit=True,
+    def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True,
                 newPage=False, token=None, gettoken=False, sysop=False):
-        """
-        Upload 'text' as new contents for this Page by filling out the edit
-        page.
+        """Upload 'text' as new content of Page by filling out the
edit form.
 
         Don't use this directly, use put() instead.
+        
         """
-
         newTokenRetrieved = False
         if self.site().versionnumber() >= 4:
             if gettoken or not token:
@@ -1267,7 +1257,7 @@
                         if not sysop:
                             self.site().forceLogin(sysop = True)
                             output(u'Page is locked, retrying using sysop
account.')
-                            return self.putPage(text, comment, watchArticle,
+                            return self._putPage(text, comment, watchArticle,
                                                 minorEdit, newPage, token=None,
                                                 gettoken=True, sysop=True)
                     except NoUsername:
@@ -1275,7 +1265,7 @@
                 elif not newTokenRetrieved and "<textarea" in data:
                     # We might have been using an outdated token
                     output(u"Changing page has failed. Retrying.")
-                    return self.putPage(text = text, comment = comment,
+                    return self._putPage(text = text, comment = comment,
                             watchArticle = watchArticle, minorEdit = minorEdit, newPage =
newPage,
                             token = None, gettoken = True, sysop = sysop)
                 else:
@@ -1290,11 +1280,12 @@
                 return response.status, response.reason, data
 
     def canBeEdited(self):
+        """Return bool indicating whether this page can be edited.
+
+        This returns True if and only if:
+          * page is unprotected, and bot has an account for this site, or
+          * page is protected, and bot has a sysop account for this site.
         """
-        Returns True iff:
-            * the page is unprotected, and we have an account for this site, or
-            * the page is protected, and we have a sysop account for this site.
-        """
         if self.editRestriction:
             userdict = config.sysopnames
         else:
@@ -1308,10 +1299,12 @@
             return False
 
     def toggleTalkPage(self):
-        """
+        """Return the other member of the article-talk page pair for this
Page.
+        
         If self is a talk page, returns the associated content page; otherwise,
-        returns the associated talk page. Returns None if self is a special
-        page.
+        returns the associated talk page.
+        Returns None if self is a special page.
+        
         """
         ns = self.namespace()
         if ns < 0: # Special page
@@ -1325,36 +1318,45 @@
             return Page(self.site(), self.site().namespace(ns + 1) + ':' +
self.titleWithoutNamespace())
 
     def interwiki(self):
-        """A list of interwiki links in the page. This will retrieve
-           the page text to do its work, so it can raise the same exceptions
-           that are raised by the get() method.
+        """Return a list of interwiki links in the page text.
 
-           The return value is a list of Page objects for each of the
-           interwiki links in the page text.
+        This will retrieve the page to do its work, so it can raise
+        the same exceptions that are raised by the get() method.
+
+        The return value is a list of Page objects for each of the
+        interwiki links in the page text.
+        
         """
         result = []
-        ll = getLanguageLinks(self.get(), insite = self.site(), pageLink =
self.aslink())
+        ll = getLanguageLinks(self.get(), insite=self.site(),
+                              pageLink=self.aslink())
         for newSite, newPage in ll.iteritems():
-            for pagenametext in
self.site().family.pagenamecodes(self.site().language()):
-                newTitle = newPage.title().replace("{{" + pagenametext +
"}}", self.title())
+            for pagenametext in self.site().family.pagenamecodes(
+                                                   self.site().language()):
+                newTitle = newPage.title().replace(
+                             "{{" + pagenametext + "}}",
self.title())
             try:
-                result.append(self.__class__(newSite, newTitle, insite = self.site()))
+                result.append(
+                        self.__class__(newSite, newTitle, insite=self.site()))
             except UnicodeError:
-                output(u"ERROR: link from %s to [[%s:%s]] is invalid
encoding?!" % (self.aslink(), newSite, newTitle))
-            except NoSuchEntity:
-                output(u"ERROR: link from %s to [[%s:%s]] contains invalid
character?!" % (self.aslink(), newSite, newTitle))
+                output(
+    u"ERROR: link from %s to [[%s:%s]] is in an invalid encoding?!"
+                        % (self.aslink(), newSite, newTitle))
             except ValueError:
-                output(u"ERROR: link from %s to [[%s:%s]] contains invalid unicode
reference?!" % (self.aslink(), newSite, newTitle))
+                output(
+    u"ERROR: link from %s to [[%s:%s]] contains invalid unicode reference?!"
+                        % (self.aslink(), newSite, newTitle))
         return result
 
     def categories(self, nofollow_redirects=False):
-        """
-        A list of categories that the article is in. This will retrieve
-        the page text to do its work, so it can raise the same exceptions
-        that are raised by the get() method.
+        """Return a list of categories that the article is in.
 
+        This will retrieve the page text to do its work, so it can raise
+        the same exceptions that are raised by the get() method.
+
         The return value is a list of Category objects, one for each of the
         category links in the page text.
+        
         """
         try:
             category_links_to_return =
getCategoryLinks(self.get(nofollow_redirects=nofollow_redirects), self.site())
@@ -1363,8 +1365,7 @@
         return category_links_to_return
 
     def __cmp__(self, other):
-        """Pseudo method to be able to use equality and inequality tests
on
-           Page objects"""
+        """Test for equality and inequality of Page
objects"""
         if not isinstance(other, Page):
             # especially, return -1 if other is None
             return -1
@@ -1375,19 +1376,20 @@
         return cmp(owntitle, othertitle)
 
     def __hash__(self):
-        """Pseudo method that makes it possible to store Page objects as
-           keys in hash-tables. This relies on the fact that the string
-           representation of an instance can not change after the construction.
-        """
+        # Pseudo method that makes it possible to store Page objects as keys
+        # in hash-tables. This relies on the fact that the string
+        # representation of an instance can not change after the construction.
         return hash(str(self))
 
     def linkedPages(self):
-        """Gives the normal (not-interwiki, non-category) pages the page
-           links to, as a list of Page objects
+        """Return a list of Pages that this Page links to.
+
+        Excludes interwiki and category links.
         """
         result = []
         try:
-            thistxt = removeLanguageLinks(self.get(get_redirect=True), self.site())
+            thistxt = removeLanguageLinks(self.get(get_redirect=True),
+                                          self.site())
         except NoPage:
             raise
             #return []
@@ -1416,13 +1418,15 @@
                     result.append(page)
         return result
 
-    def imagelinks(self, followRedirects = False, loose = False):
+    def imagelinks(self, followRedirects=False, loose=False):
+        """Return a list of ImagePage objects for images displayed on this
Page.
+
+        Includes images in galleries.
+        If loose is True, this will find anything that looks like it
+        could be an image. This is useful for finding, say, images that are
+        passed as parameters to templates.
+
         """
-        Gives the images the page shows, as a list of ImagePage objects.
-        This includes images in galleries.
-        If loose is set to true, this will find anything that looks like it could be an
image.
-        This is useful for finding, say, images that are passed as parameters to
templates.
-        """
         results = []
         # Find normal images
         for page in self.linkedPages():
@@ -1446,18 +1450,21 @@
         return list(set(results))
 
     def templates(self):
-        """
-        Gives a list of template names used on a page, as a list of strings.
+        """Return a list of strings containing template names used on this
Page.
+        
         Template parameters are ignored.
+        
         """
         return [template for (template, param) in self.templatesWithParams()]
 
     def templatesWithParams(self):
+        """Return a list of templates used on this Page.
+        
+        Return value is a list of tuples. There is one tuple for each use of
+        a template in the page, with the template name as the first entry
+        and a list of parameters as the second entry.
+        
         """
-        Gives a list of tuples. There is one tuple for each use of a template
-        in the page, with the template name as the first entry and a list
-        of parameters as the second entry.
-        """
         try:
             thistxt = self.get()
         except (IsRedirectPage, NoPage):
@@ -1481,17 +1488,19 @@
         return result
 
     def templatePages(self):
+        """Return a list of Page objects for templates used on the page.
+
+        Template parameters are ignored.
         """
-        Gives a list of Page objects containing the templates used on the page. Template
parameters are ignored.
-        """
-        return [Page(self.site(), template, self.site(), 10) for template in
self.templates()]
+        return [Page(self.site(), template, self.site(), 10)
+                for template in self.templates()]
 
     def getRedirectTarget(self):
-        """
-        If the page is a redirect page, gives the page it redirects to.
-        Otherwise it will raise an IsNotRedirectPage exception.
+        """Return a Page object for the target this Page redirects to.
 
-        This function can raise a NoPage exception.
+        If this page is not a redirect page, will raise an IsNotRedirectPage
+        exception. This method also can raise a NoPage exception.
+        
         """
         try:
             self.get()
@@ -1499,22 +1508,23 @@
             raise
         except IsRedirectPage, arg:
             if '|' in arg:
-                warnings.warn("%s has a | character, this makes no sense",
Warning)
+                warnings.warn("%s has a | character, this makes no sense",
+                              Warning)
             return Page(self.site(), arg[0])
         else:
             raise IsNotRedirectPage(self)
 
-    def getPreviousVersion(self):
-        vh = self.getVersionHistory(revCount=2)
-        oldid = vh[1][0]
-        return self.getEditPage(oldid=oldid)[0]
+    def getVersionHistory(self, forceReload=False, reverseOrder=False,
+                          getAll=False, revCount=500):
+        """Load the version history page and return history information.
 
-    def getVersionHistory(self, forceReload = False, reverseOrder = False, getAll =
False, revCount = 500):
+        Return value is a list of tuples, where each tuple represents one
+        edit and is built of revision id, edit date/time, user name, and
+        edit summary. Starts with the most current revision, unless
+        reverseOrder is True. Defaults to getting the first revCount edits,
+        unless getAll is True.
+        
         """
-        Loads the version history page and returns a list of tuples, where each
-        tuple represents one edit and is built of edit date/time, user name, and edit
-        summary.  Defaults to getting the first revCount edits.
-        """
         site = self.site()
 
         # regular expression matching one edit in the version history.
@@ -1682,10 +1692,9 @@
             return self._versionhistory[0:revCount]
         return self._versionhistory
 
-    def getVersionHistoryTable(self, forceReload = False, reverseOrder = False, getAll =
False, revCount = 500):
-        """
-        Returns the version history as a wiki table.
-        """
+    def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
+                               getAll=False, revCount=500):
+        """Return the version history as a wiki table."""
         result = '{| border="1"\n'
         result += '! oldid || date/time || username || edit summary\n'
         for oldid, time, username, summary in self.getVersionHistory(forceReload =
forceReload, reverseOrder = reverseOrder, getAll = getAll, revCount = revCount):
@@ -1696,9 +1705,12 @@
 
     def fullVersionHistory(self):
         """
-        Returns all previous versions. Gives a list of tuples consisting of
-        edit date/time, user name and content
+        Return all previous versions including wikitext.
+
+        Gives a list of tuples consisting of edit date/time, user name and
+        content
         """
+        # TODO: probably should return revision id, as well.
         address = self.site().export_address()
         predata = {
             'action': 'submit',
@@ -1717,27 +1729,29 @@
         data = data.encode(self.site().encoding())
         get_throttle.setDelay(time.time() - now)
         output = []
+        # TODO: parse XML using an actual XML parser instead of regex!
         r =
re.compile("\<revision\>.*?\<timestamp\>(.*?)\<\/timestamp\>.*?\<(?:ip|username)\>(.*?)\</(?:ip|username)\>.*?\<text.*?\>(.*?)\<\/text\>",re.DOTALL)
         #r =
re.compile("\<revision\>.*?\<timestamp\>(.*?)\<\/timestamp\>.*?\<(?:ip|username)\>(.*?)\<",re.DOTALL)
-        return [(match.group(1), unescape(match.group(2)), unescape(match.group(3))) for
match in r.finditer(data)]
+        return [  (match.group(1),
+                   unescape(match.group(2)),
+                   unescape(match.group(3)))
+                for match in r.finditer(data)  ]
 
     def contributingUsers(self):
-        """
-        Returns a set of all user names (including anonymous IPs) of those who
-        edited the page.
-        """
+        """Return a set of usernames (or IPs) of users who edited this
page."""
         edits = self.getVersionHistory()
-        users = set()
-        for edit in edits:
-            users.add(edit[2])
+        users = set([edit[2] for edit in edits])
         return users
 
-    def move(self, newtitle, reason = None, movetalkpage = True, sysop = False, throttle
= False):
+    def move(self, newtitle, reason=None, movetalkpage=True, sysop=False,
+             throttle=True):
+        """Move this page to new title given by
newtitle."""
         if throttle:
             put_throttle()
         if reason == None:
-            reason = "Pagemove by bot"
-        if self.namespace() // 2 == 1:
+            reason = input(u'Please enter a reason for the move:')
+        reason = reason.encode(self.site().encoding())
+        if self.isTalkPage():
             movetalkpage = False
         host = self.site().hostname()
         address = self.site().move_address()
@@ -1767,7 +1781,9 @@
                 output(u'Page %s moved to %s' % (self.title(), newtitle))
                 return True
             elif self.site().mediawiki_message('articleexists') in data:
-                output(u'Page moved failed: Target page [[%s]] already exists.' %
newtitle)
+                output(u'Page moved failed: Target page [[%s]] already exists.'
+                       % newtitle)
+                return False
             else:
                 output(u'Page move failed for unknown reason.')
                 try:
@@ -1782,10 +1798,13 @@
                 output(data)
                 return False
 
-    def delete(self, reason = None, prompt = True, throttle = False):
-        """Deletes the page from the wiki. Requires administrator status.
If
-           reason is None, asks for a reason. If prompt is True, asks the user
-           if he wants to delete the page.
+    def delete(self, reason=None, prompt=True, throttle=True):
+        """Deletes the page from the wiki.
+
+        Requires administrator status. If reason is None, asks for a
+        reason. If prompt is True, asks the user if he wants to delete the
+        page.
+        
         """
         if throttle:
             put_throttle()
@@ -1841,9 +1860,12 @@
                     return False
 
     def loadDeletedRevisions(self):
-        """Loads up Special/Undelete for the page and stores all
revisions'
-           timestamps, dates, editors and comments.
-           Returns list of timestamps (which are used to refer to revisions later on).
+        """Retrieve all deleted revisions for this Page from
Special/Undelete.
+
+        Stores all revisions' timestamps, dates, editors and comments.
+        Returns list of timestamps (which can be used to retrieve revisions
+        later on).
+        
         """
         #TODO: Handle image file revisions too.
         output(u'Loading list of deleted revisions for [[%s]]...' %
self.title())
@@ -1868,8 +1890,12 @@
         return self._deletedRevs.keys()
 
     def getDeletedRevision(self, timestamp, retrieveText=False):
-        """Returns a deleted revision [date, editor, comment, text,
restoration marker].
-           text will be None, unless retrieveText is True (or has been retrieved
earlier).
+        """Return a particular deleted revision by timestamp.
+
+        Return value is a list of [date, editor, comment, text, restoration
+        marker]. text will be None, unless retrieveText is True (or has been
+        retrieved earlier).
+        
         """
         if self._deletedRevs == None:
             self.loadDeletedRevisions()
@@ -1889,8 +1915,10 @@
         return self._deletedRevs[timestamp]
 
     def markDeletedRevision(self, timestamp, undelete=True):
-        """Marks revision (identified by timestamp) for undeletion
(default)
-           or to remain as deleted (if undelete=False).
+        """Mark the revision identified by timestamp for undeletion.
+
+        If undelete is False, mark the revision to remain deleted.
+
         """
         if self._deletedRevs == None:
             self.loadDeletedRevisions()
@@ -1900,20 +1928,23 @@
         self._deletedRevs[timestamp][4] = undelete
         self._deletedRevsModified = True
 
-    def undelete(self, comment='', throttle=False):
+    def undelete(self, comment='', throttle=True):
         """Undeletes page based on the undeletion markers set by previous
calls.
-           If no calls have been made since loadDeletedRevisions(), everything will be
restored.
 
-           Simplest case:
-              wikipedia.Page(...).undelete('This will restore all revisions')
+        If no calls have been made since loadDeletedRevisions(), everything
+        will be restored.
 
-           More complex:
-              pg = wikipedia.Page(...)
-              revs = pg.loadDeletedRevsions()
-              for rev in revs:
-                  if ... #decide whether to undelete a revision
-                      pg.markDeletedRevision(rev) #mark for undeletion
-              pg.undelete('This will restore only selected revisions.')
+        Simplest case:
+            wikipedia.Page(...).undelete('This will restore all revisions')
+
+        More complex:
+            pg = wikipedia.Page(...)
+            revs = pg.loadDeletedRevsions()
+            for rev in revs:
+                if ... #decide whether to undelete a revision
+                    pg.markDeletedRevision(rev) #mark for undeletion
+            pg.undelete('This will restore only selected revisions.')
+
         """
         if throttle:
             put_throttle()
@@ -1939,13 +1970,17 @@
         #TODO: Check for errors below (have we succeeded? etc):
         return self.site().postForm(address,formdata,sysop=True)
 
-    def protect(self, edit = 'sysop', move = 'sysop', unprotect = False,
reason = None, prompt = True, throttle = False):
-        """(Un)protects a wiki page. Requires administrator status. If
reason is None,
-           asks for a reason. If prompt is True, asks the user if he wants to protect the
page.
-           Valid values for edit and move are:
+    def protect(self, edit='sysop', move='sysop', unprotect=False,
+                reason=None, prompt=True, throttle=True):
+        """(Un)protect a wiki page. Requires administrator status.
+
+        If reason is None,  asks for a reason. If prompt is True, asks the
+        user if he wants to protect the page. Valid values for edit and move
+        are:
            * '' (equivalent to 'none')
            * 'autoconfirmed'
            * 'sysop'
+
         """
         address = self.site().protect_address(self.urlname())
         if unprotect:
@@ -1994,23 +2029,30 @@
                 output(data)
                 return False
 
-    def removeImage(self, image, put = False, summary = None, safe = True):
+    def removeImage(self, image, put=False, summary=None, safe=True):
+        """Remove all occurrences of an image from this
Page."""
+        # TODO: this should be grouped with other functions that operate on
+        # wiki-text rather than the Page object
         return self.replaceImage(image, None, put, summary, safe)
 
-    def replaceImage(self, image, replacement = None, put = False, summary = None, safe =
True):
+    def replaceImage(self, image, replacement=None, put=False, summary=None,
+                     safe=True):
         """Replace all occurences of an image by another image.
-        Giving None as argument for replacement will delink
-        instead of replace.
 
-        The argument image must be without namespace and all
-        spaces replaced by underscores.
+        Giving None as argument for replacement will delink instead of
+        replace.
 
-        If put is false, the new text will be returned.
+        The argument image must be without namespace and all spaces replaced
+        by underscores.
 
-        If put is true, the edits will be saved to the wiki
-        and True will be returned on succes, and otherwise
-        False. Edit errors propagate."""
+        If put is False, the new text will be returned.  If put is True, the
+        edits will be saved to the wiki and True will be returned on succes,
+        and otherwise False. Edit errors propagate.
 
+        """
+        # TODO: this should be grouped with other functions that operate on
+        # wiki-text rather than the Page object
+
         # Copyright (c) Orgullomoore, Bryan
 
         # TODO: document and simplify the code
@@ -2024,7 +2066,9 @@
             Creates a pattern that matches the string case-insensitively.
             """
             s = re.escape(s)
-            return ur'(?:%s)' % u''.join([u'[%s%s]' % (c.upper(),
c.lower()) for c in s])
+            return ur'(?:%s)' % u''.join([u'[%s%s]'
+                                            % (c.upper(), c.lower())
+                                          for c in s])
 
         def capitalizationPattern(s):
             """
@@ -2088,18 +2132,20 @@
         else:
             return new_text
 
+
 class ImagePage(Page):
     # a Page in the Image namespace
     def __init__(self, site, title = None, insite = None):
+        # TODO: raise an exception if title is not in Image: namespace
         Page.__init__(self, site, title, insite)
         self._imagePageHtml = None
 
     def getImagePageHtml(self):
         """
-        Downloads the image page, and returns the HTML, as a unicode string.
+        Download the image page, and return the HTML, as a unicode string.
 
         Caches the HTML code, so that if you run this method twice on the
-        same ImagePage object, the page only will be downloaded once.
+        same ImagePage object, the page will only be downloaded once.
         """
         if not self._imagePageHtml:
             path = self.site().get_address(self.urlname())
@@ -2168,13 +2214,15 @@
         return u'{| border="1"\n! date/time || username || resolution ||
size || edit summary\n|----\n' + u'\n|----\n'.join(lines) + '\n|}'
 
     def usingPages(self):
-        result = []
-        titleList = re.search('(?s)<h2
id="filelinks">.+?</ul>', self.getImagePageHtml()).group()
-        lineR = re.compile('<li><a href=".+?"
title=".+?">(?P<title>.+?)</a></li>')
+        """Yield Pages on which this ImagePage is
displayed."""
+        titleList = re.search('(?s)<h2
id="filelinks">.+?</ul>',
+                              self.getImagePageHtml()).group()
+        lineR = re.compile(
+                    '<li><a href=".+?"
title=".+?">(?P<title>.+?)</a></li>')
         for match in lineR.finditer(titleList):
-            result.append(Page(self.site(), match.group('title')))
-        return result
+            yield Page(self.site(), match.group('title'))
 
+
 class GetAll(object):
     def __init__(self, site, pages, throttle, force):
         self.site = site
@@ -2295,7 +2343,6 @@
             output(u'Expected one of: %s' %
u','.join([page2.aslink(forceInterwiki=True) for page2 in self.pages]))
             raise PageNotFound
 
-
     def headerDone(self, header):
         # Verify our family data
         lang = self.site.lang
@@ -2364,9 +2411,9 @@
     output(u'Getting %d pages from %s...' % (len(pages), site))
     return GetAll(site, pages, throttle, force).run()
 
+
 # Library functions
 
-
 def unescape(s):
     """Replace escaped HTML-special characters by their
originals"""
     if '&' not in s:
@@ -2519,9 +2566,12 @@
         f.close()
 
     def __call__(self, requestsize=1):
-        """This is called from getEditPage without arguments. It will make
sure
-           that if there are no 'ignores' left, there are at least delay seconds
-           since the last time it was called before it returns."""
+        """
+        Block the calling program if the throttle time has not expired.
+        
+        Parameter requestsize is the number of Pages to be read/written;
+        multiply delay time by an appropriate factor.
+        """
         self.lock.acquire()
         try:
             waittime = self.waittime()
@@ -2896,8 +2946,8 @@
     text = replaceExcept(text, categoryR, '', ['nowiki',
'comment', 'math', 'pre'], marker = marker)
     return normalWhitespace(text)
 
-def replaceCategoryInPlace(oldtext, oldcat, newcat, site = None):
-    """Replaces the category oldcat with the category newcat and then
returns
+def replaceCategoryInPlace(oldtext, oldcat, newcat, site=None):
+    """Replace the category oldcat with the category newcat and then
return
        the modified Wiki source.
     """
     #Note that this doesn't work yet and it has some very strange side-effects.
@@ -2909,21 +2959,13 @@
     title = oldcat.titleWithoutNamespace()
     if not title:
         return
-    # title might not be formatted correctly on the wiki
+    # title might contain regex special characters
+    title = re.escape(title)
+    # title might not be capitalized correctly on the wiki
     if title[0].isalpha() and not site.nocapitalize:
         title = "[%s%s]" % (title[0].upper(), title[0].lower()) + title[1:]
-    # title might also contain regex special characters
-    title = title.replace(" ", "[ _]+")\
-                 .replace("(", r"\(")\
-                 .replace(")", r"\)")\
-                 .replace(".", r"\.")\
-                 .replace("^", r"\^")\
-                 .replace("$", r"\$")\
-                 .replace("*", r"\*")\
-                 .replace("+", r"\+")\
-                 .replace("?", r"\?")
-            # note: | [ ] { } not escaped here because they are not legal in
-            # MW page titles
+    # spaces and underscores in page titles are interchangeable, and collapsible
+    title = title.replace(" ", "[ _]+")
     categoryR = re.compile(r'\[\[\s*(%s)\s*:\s*%s\s*((?:\|[^]]+)?\]\])'
                             % (catNamespace, title))
     if newcat is None:
@@ -3192,6 +3234,35 @@
     return myfamily.Family()
 
 class Site(object):
+    """A MediaWiki site.
+
+    messages              : There are new messages on the site
+    forceLogin()          : Does not continue until the user has logged in to
+                            the site
+    getUrl()              : Retrieve an URL from the site
+    mediawiki_message(key): Retrieve the text of the MediaWiki message with
+                            the key "key"
+    has_mediawiki_message(key) : True if this site defines a MediaWiki message
+                                 with the key "key"
+                                 
+    Special pages:
+        Dynamic pages:
+            allpages(): Special:Allpages
+            newpages(): Special:Newpages
+            longpages(): Special:Longpages
+            shortpages(): Special:Shortpages
+            categories(): Special:Categories
+
+        Cached pages:
+            deadendpages(): Special:Deadendpages
+            ancientpages(): Special:Ancientpages
+            lonelypages(): Special:Lonelypages
+            uncategorizedcategories(): Special:Uncategorizedcategories
+            uncategorizedpages(): Special:Uncategorizedpages
+            uncategorizedimages(): Special:Uncategorizedimages
+            unusedcategories(): Special:Unusuedcategories
+
+    """
     def __init__(self, code, fam=None, user=None, persistent_http = None):
         """Constructor takes four arguments: