[Pywikipedia-l] SVN: [5088] branches/rewrite/pywikibot - pywikibot

27 Feb 2008

Revision: 5088
Author:   russblau
Date:     2008-02-27 20:08:48 +0000 (Wed, 27 Feb 2008)

Log Message:
-----------
Committing page and site modules, related tests

Modified Paths:
--------------
    branches/rewrite/pywikibot/__init__.py
    branches/rewrite/pywikibot/config.py
    branches/rewrite/pywikibot/data/api.py
    branches/rewrite/pywikibot/family.py
    branches/rewrite/pywikibot/login.py
    branches/rewrite/pywikibot/tests/api_tests.py

Added Paths:
-----------
    branches/rewrite/pywikibot/exceptions.py
    branches/rewrite/pywikibot/page.py
    branches/rewrite/pywikibot/site.py

Modified: branches/rewrite/pywikibot/__init__.py
===================================================================

--- branches/rewrite/pywikibot/__init__.py	2008-02-27 20:05:28 UTC (rev 5087)
+++ branches/rewrite/pywikibot/__init__.py	2008-02-27 20:08:48 UTC (rev 5088)
@@ -0,0 +1,67 @@
+# -*- coding: utf-8  -*-
+"""
+The initialization file for the Pywikibot framework.
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id: $'
+
+
+from exceptions import *
+
+from page import Page, ImagePage, Category
+
+import config
+
+_sites = {}
+default_family = config.family
+default_code = config.mylang
+
+def Site(code=None, fam=None, user=None, interface=None):
+    """Return the specified Site object.
+
+    Returns a cached object if possible, otherwise instantiates a new one.
+
+    @param code: language code
+    @type code: string
+    @param fam: family name or object
+    @type fam: string or Family
+    @param user: bot user name to use on this site
+    @type user: unicode
+
+    """
+    if code == None:
+        code = default_code
+    if fam == None:
+        fam = default_family
+    if user == None:
+        try:
+            user = config.usernames[fam][code]
+        except KeyError:
+            user = None
+    if interface is None:
+        interface = config.site_interface
+    try:
+        exec "from site import %s as __Site" % interface
+    except ImportError:
+        raise ValueError("Invalid interface name '%s'" % interface)
+    key = '%s:%s:%s' % (fam, code, user)
+    if not _sites.has_key(key):
+        _sites[key] = __Site(code=code, fam=fam, user=user)
+    return _sites[key]
+
+getSite = Site # alias for backwards-compability
+
+# DEBUG
+
+def output(text):
+    print text
+
+def input(prompt, password=False):
+    if password:
+        import getpass
+        return getpass.getpass(prompt)
+    return raw_input(prompt)

Modified: branches/rewrite/pywikibot/config.py
===================================================================
--- branches/rewrite/pywikibot/config.py	2008-02-27 20:05:28 UTC (rev 5087)
+++ branches/rewrite/pywikibot/config.py	2008-02-27 20:08:48 UTC (rev 5088)
@@ -26,7 +26,9 @@
 family = 'wikipedia'
 # The language code of the site we're working on.
 mylang = 'language'
-
+# The default interface for communicating with the site
+# currently the only defined interface is 'APISite', so don't change this!
+site_interface = 'APISite'
 # The dictionary usernames should contain a username for each site where you
 # have a bot account. Please set your usernames by adding such lines to your
 # user-config.py:
@@ -71,8 +73,49 @@
 
 # Get the names of all known families, and initialize
 # with empty dictionaries
-import wikipediatools as _wt
-_base_dir = _wt.get_base_dir()
+def _get_base_dir():
+    """Return the directory in which user-specific information is stored.
+
+    This is determined in the following order -
+    1.  If the script was called with a -dir: argument, use the directory
+        provided in this argument
+    2.  If the user has a PYWIKIBOT_DIR environment variable, use the value
+        of it
+    3.  If the script was started from a directory that contains a
+        user-config.py file, use this directory as the base
+    4.  If all else fails, use the directory from which this module was
+        loaded.
+    
+    """
+    for arg in __sys.argv[1:]:
+        if arg.startswith("-dir:"):
+            base_dir = arg[5:]
+            __sys.argv.remove(arg)
+            break
+    else:
+        if os.environ.has_key("PYWIKIBOT_DIR"):
+            base_dir = os.environ["PYWIKIBOT_DIR"]
+        else:
+            if os.path.exists('user-config.py'):
+                base_dir = '.'
+            else:
+                try:
+                    base_dir = os.path.split(
+                                __sys.modules['wikipediatools'].__file__)[0]
+                except KeyError:
+                    print sys.modules
+                    base_dir = '.'
+    if not os.path.isabs(base_dir):
+        base_dir = os.path.normpath(os.path.join(os.getcwd(), base_dir))
+    # make sure this path is valid and that it contains user-config file
+    if not os.path.isdir(base_dir):
+        raise RuntimeError("Directory '%s' does not exist." %
base_dir)
+    if not os.path.exists(os.path.join(base_dir, "user-config.py")):
+        raise RuntimeError("No user-config.py found in directory
'%s'."
+                           % base_dir)
+    return base_dir
+
+_base_dir = _get_base_dir()
 _RfamilyFile = re.compile('(?P<name>.+)_family.py$')
 for _filename in os.listdir(os.path.join(_base_dir, 'families')):
     _m = _RfamilyFile.match(_filename)
@@ -477,12 +520,13 @@
     """Return an absolute path to a data file in a standard location.
 
     Argument(s) are zero or more directory names, optionally followed by a
-    data file name. The return path is offset to config.base_dir. Any
-    directories in the path that do not already exist are created.
+    data file name. The return path is offset to the "data" subdirectory of
+    config.base_dir. Any directories in the path that do not already exist
+    are created.
 
     """
     import os
-    return makepath(os.path.join(base_dir, *filename))
+    return makepath(os.path.join(os.path.join(base_dir, "data"), *filename))
 
 def shortpath(path):
     """Return a file path relative to config.base_dir."""

Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py	2008-02-27 20:05:28 UTC (rev 5087)
+++ branches/rewrite/pywikibot/data/api.py	2008-02-27 20:08:48 UTC (rev 5088)
@@ -10,6 +10,7 @@
 __version__ = '$Id: $'
 
 from UserDict import DictMixin
+from datetime import datetime, timedelta
 import http
 import simplejson as json
 import logging
@@ -17,10 +18,10 @@
 import traceback
 import time
 import urllib
-# TODO - replace when Page object is written
-from pywikibot.tests.dummy import TestPage as Page
 
+from pywikibot import login
 
+
 lagpattern = re.compile(r"Waiting for [\d.]+: (?P<lag>\d+) seconds?
lagged")
 
 
@@ -127,7 +128,7 @@
         if self.params['format'] != 'json':
             raise TypeError("Query format '%s' cannot be parsed."
                             % self.params['format'])
-        uri = self.site.script_path() + "api.php"
+        uri = self.site.scriptpath() + "/api.php"
         params = urllib.urlencode(self.params)
         while True:
             # TODO wait on errors
@@ -143,6 +144,7 @@
                     rawdata = http.request(self.site, uri)
             except Exception, e: #TODO: what exceptions can occur here?
                 logging.warning(traceback.format_exc())
+                print uri, params
                 self.wait()
                 continue
             if rawdata.startswith(u"unknown_action"):
@@ -257,9 +259,44 @@
             del self.data
 
 
+class LoginManager(login.LoginManager):
+    """Supplies getCookie() method to use API
interface."""
+    def getCookie(self, remember=True, captchaId=None, captchaAnswer=None):
+        """
+        Login to the site.
+
+        Paramters are all ignored.
+
+        Returns cookie data if succesful, None otherwise.
+        """
+        if hasattr(self, '_waituntil'):
+            if datetime.now() < self._waituntil:
+                time.sleep(self._waituntil - datetime.now())
+        login_request = Request(site=self.site,
+                                action="login",
+                                lgname=self.username,
+                                lgpassword=self.password
+                               )
+        login_result = login_request.submit()
+        if u"login" not in login_result:
+            raise RuntimeError("API login response does not have 'login'
key.")
+        if login_result['login']['result'] != u'Success':
+            self._waituntil = datetime.datetime.now() + datetime.timedelta(seconds=60)
+            return None
+
+        prefix = login_result['login']['cookieprefix']
+        cookies = []
+        for key in ('Token', 'UserID', 'UserName'):
+            cookies.append("%s%s=%s"
+                           % (prefix, key,
+                              login_result['login']['lg'+key.lower()]))
+        self.username = login_result['login']['lgusername']
+        return "\n".join(cookies)
+
+
 if __name__ == "__main__":
-    from pywikibot.tests.dummy import TestSite as Site, TestPage as Page
-    mysite = Site("en.wikipedia.org")
+    from pywikibot import Site
+    mysite = Site("en", "wikipedia")
     logging.getLogger().setLevel(logging.DEBUG)
     def _test():
         import doctest

Added: branches/rewrite/pywikibot/exceptions.py
===================================================================
--- branches/rewrite/pywikibot/exceptions.py	                        (rev 0)
+++ branches/rewrite/pywikibot/exceptions.py	2008-02-27 20:08:48 UTC (rev 5088)
@@ -0,0 +1,69 @@
+# -*- coding: utf-8  -*-
+"""
+Exception classes used throughout the framework.
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id: $'
+
+
+# TODO: These are copied from wikipedia.py; not certain that all of them
+# will be needed in the rewrite.
+
+class Error(Exception):
+    """Wikipedia error"""
+
+class NoUsername(Error):
+    """Username is not in user-config.py"""
+
+class NoPage(Error):
+    """Page does not exist"""
+
+class NoSuchSite(Error):
+    """Site does not exist"""
+
+class IsRedirectPage(Error):
+    """Page is a redirect page"""
+
+class IsNotRedirectPage(Error):
+    """Page is not a redirect page"""
+
+class LockedPage(Error):
+    """Page is locked"""
+
+class SectionError(Error):
+    """The section specified by # does not exist"""
+
+class PageNotSaved(Error):
+    """Saving the page has failed"""
+
+class EditConflict(PageNotSaved):
+    """There has been an edit conflict while uploading the
page"""
+
+class SpamfilterError(PageNotSaved):
+    """Saving the page has failed because the MediaWiki spam filter
detected a blacklisted URL."""
+    def __init__(self, arg):
+        self.url = arg
+        self.args = arg,
+
+class ServerError(Error):
+    """Got unexpected server response"""
+
+class BadTitle(Error):
+    """Server responded with BadTitle."""
+
+# UserBlocked exceptions should in general not be caught. If the bot has
+# been blocked, the bot operator should address the reason for the block
+# before continuing.
+class UserBlocked(Error):
+    """Your username or IP has been blocked"""
+
+class PageNotFound(Error):
+    """Page not found in list"""
+
+class CaptchaError(Error):
+    """Captcha is asked and config.solve_captcha ==
False."""
+

Modified: branches/rewrite/pywikibot/family.py
===================================================================
--- branches/rewrite/pywikibot/family.py	2008-02-27 20:05:28 UTC (rev 5087)
+++ branches/rewrite/pywikibot/family.py	2008-02-27 20:08:48 UTC (rev 5088)
@@ -2963,6 +2963,16 @@
            wiki"""
         return self.code2encoding(code),
 
+    # aliases
+    def encoding(self, code):
+        """Return the encoding for a specific language
wiki"""
+        return self.code2encoding(code)
+
+    def encodings(self, code):
+        """Return a list of historical encodings for a specific language
+           wiki"""
+        return self.code2encodings(code)
+
     def __cmp__(self, otherfamily):
         try:
             return cmp(self.name, otherfamily.name)
@@ -2972,6 +2982,9 @@
     def __hash__(self):
         return hash(self.name)
 
+    def __repr__(self):
+        return 'Family("%s")' % self.name
+
     def RversionTab(self, code):
         """Change this to some regular expression that shows the page we
         found is an existing page, in case the normal regexp does not
work."""

Modified: branches/rewrite/pywikibot/login.py
===================================================================
--- branches/rewrite/pywikibot/login.py	2008-02-27 20:05:28 UTC (rev 5087)
+++ branches/rewrite/pywikibot/login.py	2008-02-27 20:08:48 UTC (rev 5088)
@@ -46,7 +46,10 @@
 
 import re
 import urllib2
-import wikipedia, config
+import config
+import pywikibot
+from pywikibot import Page
+from pywikibot.exceptions import *
 
 # On some wikis you are only allowed to run a bot if there is a link to
 # the bot's user page in a specific list.
@@ -65,17 +68,17 @@
 
 class LoginManager:
     def __init__(self, password = None, sysop = False, site = None):
-        self.site = site or wikipedia.getSite()
+        self.site = site or pywikibot.Site()
         if sysop:
             try:
-                self.username = config.sysopnames[self.site.family.name][self.site.lang]
+                self.username =
config.sysopnames[self.site.family().name][self.site.language()]
             except:
-                raise wikipedia.NoUsername(u'ERROR: Sysop username for %s:%s is
undefined.\nIf you have a sysop account for that site, please add such a line to
user-config.py:\n\nsysopnames[\'%s\'][\'%s\'] =
\'myUsername\'' % (self.site.family.name, self.site.lang,
self.site.family.name, self.site.lang))
+                raise NoUsername(u'ERROR: Sysop username for %s:%s is undefined.\nIf
you have a sysop account for that site, please add such a line to
user-config.py:\n\nsysopnames[\'%s\'][\'%s\'] =
\'myUsername\'' % (self.site.family.name, self.site.lang,
self.site.family.name, self.site.lang))
         else:
             try:
-                self.username = config.usernames[self.site.family.name][self.site.lang]
+                self.username =
config.usernames[self.site.family().name][self.site.language()]
             except:
-                raise wikipedia.NoUsername(u'ERROR: Username for %s:%s is
undefined.\nIf you have an account for that site, please add such a line to
user-config.py:\n\nusernames[\'%s\'][\'%s\'] = \'myUsername\''
% (self.site.family.name, self.site.lang, self.site.family.name, self.site.lang))
+                raise NoUsername(u'ERROR: Username for %s:%s is undefined.\nIf you
have an account for that site, please add such a line to
user-config.py:\n\nusernames[\'%s\'][\'%s\'] = \'myUsername\''
% (self.site.family.name, self.site.lang, self.site.family.name, self.site.lang))
         self.password = password
         if getattr(config, 'password_file', ''):
             self.readPassword()
@@ -85,9 +88,10 @@
         Checks whether the bot is listed on a specific page to comply with
         the policy on the respective wiki.
         """
+        return True # DEBUG
         if botList.has_key(self.site.family.name) and
botList[self.site.family.name].has_key(self.site.language()):
             botListPageTitle = botList[self.site.family.name][self.site.language()]
-            botListPage = wikipedia.Page(self.site, botListPageTitle)
+            botListPage = Page(self.site, botListPageTitle)
             for linkedPage in botListPage.linkedPages():
                 if linkedPage.titleWithoutNamespace() == self.username:
                     return True
@@ -171,10 +175,11 @@
 
         The argument data is the raw data, as returned by getCookie().
 
-        Returns nothing."""
-        filename = wikipedia.config.datafilepath('login-data',
-                       '%s-%s-%s-login.data'
-                       % (self.site.family.name, self.site.lang, self.username))
+        """
+        filename = config.datafilepath('%s-%s-%s-login.data'
+                                       % (self.site.family().name,
+                                          self.site.language(),
+                                          self.username))
         f = open(filename, 'w')
         f.write(data)
         f.close()
@@ -211,21 +216,21 @@
         if not self.password:
             # As we don't want the password to appear on the screen, we set
             # password = True
-            self.password = wikipedia.input(u'Password for user %s on %s:' %
(self.username, self.site), password = True)
+            self.password = pywikibot.input(u'Password for user %s on %s:' %
(self.username, self.site), password = True)
 
-        self.password = self.password.encode(self.site.encoding())
+#        self.password = self.password.encode(self.site.encoding())
 
-        wikipedia.output(u"Logging in to %s as %s" % (self.site,
self.username))
+        pywikibot.output(u"Logging in to %s as %s" % (self.site,
self.username))
         cookiedata = self.getCookie()
         if cookiedata:
             self.storecookiedata(cookiedata)
-            wikipedia.output(u"Should be logged in now")
+            pywikibot.output(u"Should be logged in now")
             # Show a warning according to the local bot policy
             if not self.botAllowed():
-                wikipedia.output(u'*** Your username is not listed on [[%s]].\n***
Please make sure you are allowed to use the robot before actually using it!' %
botList[self.site.family.name][self.site.lang])
+                pywikibot.output(u'*** Your username is not listed on [[%s]].\n***
Please make sure you are allowed to use the robot before actually using it!' %
botList[self.site.family.name][self.site.lang])
             return True
         else:
-            wikipedia.output(u"Login failed. Wrong password or CAPTCHA
answer?")
+            pywikibot.output(u"Login failed. Wrong password or CAPTCHA
answer?")
             if retry:
                 self.password = None
                 return self.login(retry = True)

Added: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py	                        (rev 0)
+++ branches/rewrite/pywikibot/page.py	2008-02-27 20:08:48 UTC (rev 5088)
@@ -0,0 +1,1579 @@
+# -*- coding: utf-8  -*-
+"""
+Objects representing various types of MediaWiki pages.
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id: $'
+
+import pywikibot
+from pywikibot.exceptions import *
+
+import htmlentitydefs
+import logging
+import re
+import unicodedata
+import urllib
+
+reNamespace = re.compile("^(.+?) *: *(.*)$")
+
+
+class Page(object):
+    """Page: A MediaWiki page
+
+    This object only implements internally methods that do not require
+    reading from or writing to the wiki.  All other methods are delegated
+    to the Site object. 
+
+    Methods available:
+     - site: The wiki this page is in
+     - title: The name of the page, with various presentation options
+     - namespace: The namespace in which the page is found
+     - section: The section of the page (the part of the title after '#', if
+         any)
+     - isAutoTitle: Title can be translated using the autoFormat method
+     - autoFormat: Auto-format certain dates and other standard format page
+         titles
+     - isCategory: True if the page is a category
+     - isDisambig (*): True if the page is a disambiguation page
+     - isImage: True if the page is an image
+     - isRedirectPage (*): True if the page is a redirect, false otherwise
+     - getRedirectTarget (*): The page the page redirects to
+     - isTalkPage: True if the page is in any "talk" namespace
+     - toggleTalkPage: Return the talk page (if this is one, return the
+         non-talk page)
+     - get (*): The text of the page
+     - latestRevision (*): The page's current revision id
+     - userName: Last user to edit page
+     - isIpEdit: True if last editor was unregistered
+     - editTime: Timestamp of the last revision to the page
+     - previousRevision (*): The revision id of the previous version
+     - permalink (*): The url of the permalink of the current version
+     - getOldVersion(id) (*): The text of a previous version of the page
+     - getVersionHistory: Load the version history information from wiki
+     - getVersionHistoryTable: Create a wiki table from the history data
+     - fullVersionHistory: Return all past versions including wikitext
+     - contributingUsers: Return set of users who have edited page
+     - exists (*): True if the page actually exists, false otherwise
+     - isEmpty (*): True if the page has 4 characters or less content, not
+         counting interwiki and category links
+     - interwiki (*): The interwiki links from the page (list of Pages)
+     - categories (*): The categories the page is in (list of Pages)
+     - linkedPages (*): The normal pages linked from the page (list of
+         Pages)
+     - imagelinks (*): The pictures on the page (list of ImagePages)
+     - templates (*): All templates referenced on the page (list of Pages)
+     - templatesWithParams(*): All templates on the page, with list of
+         parameters
+     - isDisambig (*): True if the page is a disambiguation page
+     - getReferences: List of pages linking to the page
+     - canBeEdited (*): True if page is unprotected or user has edit
+         privileges
+     - botMayEdit (*): True if bot is allowed to edit page
+     - put(newtext): Saves the page
+     - put_async(newtext): Queues the page to be saved asynchronously
+     - move: Move the page to another title
+     - delete: Deletes the page (requires being logged in)
+     - protect: Protect or unprotect a page (requires sysop status)
+     - removeImage: Remove all instances of an image from this page
+     - replaceImage: Replace all instances of an image with another
+     - loadDeletedRevisions: Load all deleted versions of this page
+     - getDeletedRevision: Return a particular deleted revision
+     - markDeletedRevision: Mark a version to be undeleted, or not
+     - undelete: Undelete past version(s) of the page
+
+    Deprecated methods (preserved for backwards-compatibility):
+     - urlname: Title, in a form suitable for a URL 
+     - titleWithoutNamespace: Title, with the namespace part removed
+     - sectionFreeTitle: Title, without the section part
+     - aslink: Title in the form [[Title]] or [[lang:Title]]
+     - encoding: The encoding of the page
+
+    (*) This loads the page if it has not been loaded before; permalink might
+        even reload it if it has been loaded before
+
+    """
+    def __init__(self, site, title, insite=None,
+                 defaultNamespace=0):
+        """Parameters:
+
+        @param site: the wikimedia Site on which the page resides
+        @param title: title of the page
+        @type title: unicode
+        @param insite: (optional) a wikimedia Site where this link was found
+            (to help decode interwiki links)
+        @param defaultNamespace: (optional) A namespace to use if the link
+            does not contain one
+        @type defaultNamespace: int
+
+        """
+        if site == None:
+            self._site = pywikibot.Site()
+        elif isinstance(site, basestring):
+            self._site = pywikibot.Site(site)
+        else:
+            self._site = site
+
+        if not insite: insite = self._site
+
+        # parse the title
+        # this can throw various exceptions if the title is invalid
+        link = Link(title, insite, defaultNamespace)
+        self._site = link.site
+        self._section = link.section
+        self._ns = link.namespace
+        self._title = link.title
+        # reassemble the canonical title from components
+        if self._section is not None:
+            self._title = self._title + "#" + self._section
+        if self._ns:
+            self._title = self.site().namespace(self._ns) + ":" + self._title
+        self._revisions = {}
+
+    def site(self):
+        """Return the Site object for the wiki on which this Page
resides."""
+        return self._site
+
+    def namespace(self):
+        """Return the number of the namespace of the page.
+
+        Only recognizes those namespaces defined in family.py.
+        If not defined, it will return 0 (the main namespace).
+
+        @return: int
+
+        """
+        return self._ns
+
+    def title(self, underscore=False, savetitle=False, withNamespace=True,
+              withSection=True, asUrl=False, asLink=False,
+              allowInterwiki=True, forceInterwiki=False, textlink=False):
+        """Return the title of this Page, as a Unicode string.
+
+        @param underscore: if true, replace all ' ' characters with '_'
+        @param savetitle: if true, try to quote all non-ASCII characters.
+            (DEPRECATED: use asURL instead)
+        @param withNamespace: if false, omit the namespace prefix
+        @param withSection: if false, omit the section
+        @param asUrl: if true, quote title as if in an URL
+        @param asLink: if true, return the title in the form of a wikilink
+        @param allowInterwiki: (only used if asLink is true) if true, format
+            the link as an interwiki link if necessary
+        @param forceInterwiki: (only used if asLink is true) if true, always
+            format the link as an interwiki link
+        @param textlink: (only used if asLink is true) if true, place a ':'
+            before Category: and Image: links
+
+        """
+        title = self._title
+        if not withNamespace and self._ns != 0:
+            title = title.split(u':', 1)[1]
+        if not withSection and self._section:
+            title = title.split(u'#', 1)[0]
+        if underscore or asUrl:
+            title = title.replace(u' ', u'_')
+        if savetitle:
+            logging.debug(
+              u"Page.title(savetitle=...) is deprecated.")
+        if savetitle or asUrl:
+            encodedTitle = title.encode(self.site().encoding())
+            title = urllib.quote(encodedTitle)
+        if asLink:
+            if forceInterwiki or (
+                    allowInterwiki and self.site() != pywikibot.Site()):
+                if self.site().family() != pywikibot.Site().family() \
+                        and self.site().family().name != self.site().language():
+# FIXME: Interwiki links shouldn't be fully urlencoded
+                    return u'[[%s:%s:%s]]' % (self.site().family().name,
+                                              self.site().language(),
+                                              self.title(asUrl=True))
+                else:
+                    return u'[[%s:%s]]' % (self.site().language(),
+                                           self.title(asUrl=True))
+            elif textlink and (self.isImage() or self.isCategory()):
+                    return u'[[:%s]]' % title
+            else:
+                return u'[[%s]]' % title
+        return title
+
+    def section(self, underscore = False):
+        """Return the name of the section this Page refers to.
+
+        The section is the part of the title following a '#' character, if
+        any. If no section is present, return None.
+
+        @param underscore: unused, but maintained for backwards compatibility
+
+        """
+        if underscore:
+            logging.debug(
+                u"Page.section(underscore=...) is deprecated.")
+        if self._section:
+            return self._section
+        else:
+            return None
+
+    def __str__(self):
+        """Return a console representation of the
pagelink."""
+        return self.title(asLink=True, forceInterwiki=True)
+
+    def __repr__(self):
+        """Return a more complete string
representation."""
+        return u"%s(%s)" % (self.__class__.__name__, self.title())
+
+    def __cmp__(self, other):
+        """Test for equality and inequality of Page
objects"""
+        if not isinstance(other, Page):
+            # especially, return -1 if other is None
+            return -1
+        if not self.site() == other.site():
+            return cmp(self.site(), other.site())
+        owntitle = self.title()
+        othertitle = other.title()
+        return cmp(owntitle, othertitle)
+
+    def __hash__(self):
+        # Pseudo method that makes it possible to store Page objects as keys
+        # in hash-tables. This relies on the fact that the string
+        # representation of an instance can not change after the construction.
+        return hash(str(self))
+
+    def autoFormat(self):
+        """Return L{date.autoFormat} dictName and value, if any.
+
+        Value can be a year, date, etc., and dictName is 'YearBC',
+        'Year_December', or another dictionary name. Please note that two
+        entries may have exactly the same autoFormat, but be in two
+        different namespaces, as some sites have categories with the
+        same names. Regular titles return (None, None).
+
+        """
+        if not hasattr(self, '_autoFormat'):
+            from pywikibot import date
+            self._autoFormat = date.getAutoFormat(
+                                        self.site().language(),
+                                        self.title(withNamespace=False)
+                                    )
+        return self._autoFormat
+
+    def isAutoTitle(self):
+        """Return True if title of this Page is in the autoFormat
dictionary."""
+        return self.autoFormat()[0] is not None
+
+    def get(self, force=False, get_redirect=False, throttle=None,
+            sysop=False, nofollow_redirects=None, change_edit_time=None):
+        """Return the wiki-text of the page.
+
+        This will retrieve the page from the server if it has not been
+        retrieved yet, or if force is True. This can raise the following
+        exceptions that should be caught by the calling code:
+
+          - NoPage: The page does not exist
+          - IsRedirectPage: The page is a redirect. The argument of the
+                exception is the title of the page it redirects to.
+          - SectionError: The section does not exist on a page with a #
+                link
+
+        @param force: reload all page attributes, including errors.
+        @param get_redirect: return the redirect text, do not follow the
+            redirect, do not raise an exception.
+        @param sysop: if the user has a sysop account, use it to retrieve
+            this page
+        @param throttle: DEPRECATED and unused
+        @param nofollow_redirects: DEPRECATED and unused
+        @param change_edit_time: DEPRECATED and unused
+
+        """
+        if throttle is not None:
+            logging.debug("Page.get(throttle) option is deprecated.")
+        if nofollow_redirects is not None:
+            logging.debug("Page.get(nofollow_redirects) option is
deprecated.")
+        if change_edit_time is not None:
+            logging.debug("Page.get(change_edit_time) option is deprecated.")
+        if force:
+            # When forcing, we retry the page no matter what. Old exceptions
+            # do not apply any more.
+            for attr in ['_redirarg', '_getexception']:
+                if hasattr(self, attr):
+                    delattr(self,attr)
+        else:
+            # Make sure we re-raise an exception we got on an earlier attempt
+            if hasattr(self, '_redirarg') and not get_redirect:
+                raise IsRedirectPage, self._redirarg
+            elif hasattr(self, '_getexception'):
+                raise self._getexception
+        if force or not hasattr(self, "_revid") \
+                 or not self._revid in self._revisions:
+            self.site().getrevisions(self, getText=True, ids=None, sysop=sysop)
+            # TODO: Exception handling for no-page, redirects, etc.
+
+        return self._revisions[self._revid].text
+
+    def getOldVersion(self, oldid, force=False, get_redirect=False,
+                      throttle=None, sysop=False, nofollow_redirects=None,
+                      change_edit_time=None):
+        """Return text of an old revision of this page; same options as
get().
+
+        @param oldid: The revid of the revision desired.
+
+        """
+        if throttle is not None:
+            logging.debug(
+                "Page.getOldVersion(throttle) option is deprecated.")
+        if nofollow_redirects is not None:
+            logging.debug(
+                "Page.getOldVersion(nofollow_redirects) option is
deprecated.")
+        if change_edit_time is not None:
+            logging.debug(
+                "Page.getOldVersion(change_edit_time) option is deprecated.")
+        if force or not oldid in self._revisions:
+            self.site().getrevisions(self, getText=True, ids=oldid,
+                                     redirs=get_redirect, sysop=sysop)
+        return self._revisions[oldid].text
+
+    def permalink(self):
+        """Return the permalink URL for current revision of this
page."""
+        return "%s://%s/%sindex.php?title=%s&oldid=%s" \
+               % (self.site().protocol(),
+                  self.site().hostname(),
+                  self.site().script_path(),
+                  self.title(asUrl=True),
+                  self.latestRevision())
+
+    def latestRevision(self):
+        """Return the current revision id for this
page."""
+        if not hasattr(self, '_revid'):
+            self.site().getrevisions(self)
+        return self._revid
+
+    def userName(self):
+        """Return name or IP address of last user to edit
page."""
+        return self._revisions[self.latestRevision()].user
+
+    def isIpEdit(self):
+        """Return True if last editor was unregistered."""
+        return self._revisions[self.latestRevision()].anon
+
+    def editTime(self):
+        """Return timestamp (in MediaWiki format) of last revision to
page."""
+        return self._revisions[self.latestRevision()].timestamp
+
+    def previousRevision(self):
+        """Return the revision id for the previous revision of this
Page."""
+        vh = self.getVersionHistory(revCount=2)
+        return vh[1][0]
+
+    def exists(self):
+        """Return True if page exists on the wiki, even if it's a
redirect.
+
+        If the title includes a section, return False if this section isn't
+        found.
+
+        """
+        return self.site().page_exists(self)
+
+    def isRedirectPage(self):
+        """Return True if this is a redirect, False if not or not
existing."""
+        return self.site().page_isredirect(self)
+
+    def isEmpty(self):
+        """Return True if the page text has less than 4 characters.
+
+        Character count ignores language links and category links.
+        Can raise the same exceptions as get().
+
+        """
+        txt = self.get()
+        txt = pywikibot.removeLanguageLinks(txt, site = self.site())
+        txt = pywikibot.removeCategoryLinks(txt, site = self.site())
+        if len(txt) < 4:
+            return True
+        else:
+            return False
+
+    def isTalkPage(self):
+        """Return True if this page is in any talk
namespace."""
+        ns = self.namespace()
+        return ns >= 0 and ns % 2 == 1
+
+    def toggleTalkPage(self):
+        """Return other member of the article-talk page pair for this
Page.
+
+        If self is a talk page, returns the associated content page;
+        otherwise, returns the associated talk page.
+        Returns None if self is a special page.
+
+        """
+        ns = self.namespace()
+        if ns < 0: # Special page
+            return None
+        if self.isTalkPage():
+            if self.namespace() == 1:
+                return Page(self.site(), self.title(withNamespace=False))
+            else:
+                return Page(self.site(),
+                            self.site().namespace(ns - 1) + ':'
+                              + self.title(withNamespace=False))
+        else:
+            return Page(self.site(),
+                        self.site().namespace(ns + 1) + ':'
+                          + self.title(withNamespace=False))
+
+    def isCategory(self):
+        """Return True if the page is a Category, False
otherwise."""
+        return self.namespace() == 14
+
+    def isImage(self):
+        """Return True if this is an image description page, False
otherwise."""
+        return self.namespace() == 6
+
+    def isDisambig(self):
+        """Return True if this is a disambiguation page, False otherwise.
+
+        Relies on the presence of specific templates, identified in the Family
+        file, to identify disambiguation pages.
+
+        """
+        if not hasattr(self, '_isDisambig'):
+            locdis = self.site().family.disambig( self.site().lang )
+            for template in self.templates():
+                tn = template.title(withNamespace=False)
+                if tn in locdis:
+                    _isDisambig = True
+                    break
+            else:
+                _isDisambig = False
+        return _isDisambig
+
+    def getReferences(self, follow_redirects=True, withTemplateInclusion=True,
+                      onlyTemplateInclusion=False, redirectsOnly=False):
+        """Yield all pages that link to the page.
+
+        If you need a full list of referring pages, use 
+        C{pages = list(s.getReferences())}
+
+        @param follow_redirects: if True, also return pages that link to a
+            redirect pointing to the page.
+        @param withTemplateInclusion: if True, also return pages where self
+            is used as a template.
+        @param onlyTemplateInclusion: if True, only return pages where self
+            is used as a template.
+        @param redirectsOnly: if True, only return redirects to self.
+
+        """
+        # N.B.: this method intentionally overlaps with backlinks() and
+        # embeddedin(). Depending on the interface, it may be more efficient
+        # to implement those methods in the site interface and then combine
+        # the results for this method, or to implement this method and then
+        # split up the results for the others. 
+        return self.site().getreferences(self, follow_redirects,
+                                         withTemplateInclusion,
+                                         onlyTemplateInclusion,
+                                         redirectsOnly)
+
+    def backlinks(self, followRedirects=True, filterRedirects=None):
+        """Yield all pages that contain ordinary wikilinks to this page.
+
+        @param followRedirects: if True, also return pages that link to a
+            redirect pointing to the page.
+        @param filterRedirects: if True, only return redirects; if False,
+            omit redirects; if None, do not filter
+
+        """
+        return self.site().getbacklinks(self, followRedirects, filterRedirects)
+
+    def embeddedin(self):
+        """Yield all pages that embed this page as a
template."""
+        return self.site().getembeddedin(self)
+
+    def canBeEdited(self):
+        """Return bool indicating whether this page can be edited.
+
+        This returns True if and only if:
+          - page is unprotected, and bot has an account for this site, or
+          - page is protected, and bot has a sysop account for this site.
+
+        """
+        return self.site().page_can_be_edited(self)
+
+    def botMayEdit(self):
+        """Return True if this page allows bots to edit it.
+
+        This will be True if the page doesn't contain {{bots}} or
+        {{nobots}}, or it contains them and the active bot is allowed to
+        edit this page. (This method is only useful on those sites that
+        recognize the bot-exclusion protocol; on other sites, it will always
+        return True.)
+
+        The framework enforces this restriction by default. It is possible
+        to override this by setting ignore_bot_templates=True in
+        user_config.py, or using page.put(force=True).
+
+        """ # TODO: move this to Site object?
+        if config.ignore_bot_templates: #Check the "master ignore switch"
+            return True
+        try:
+            templates = self.templatesWithParams();
+        except (NoPage, IsRedirectPage, SectionError):
+            return True
+        for template in templates:
+            title = template[0].title(withNamespace=False)
+            if title == 'Nobots':
+                return False
+            elif title == 'Bots':
+                if len(template[1]) == 0:
+                    return True
+                else:
+                    (ttype, bots) = template[1][0].split('=', 1)
+                    bots = bots.split(',')
+                    if ttype == 'allow':
+                        if 'all' in bots or username in bots:
+                            return True
+                        else:
+                            return False
+                    if ttype == 'deny':
+                        if 'all' in bots or username in bots:
+                            return False
+                        else:
+                            return True
+        # no restricting template found
+        return True
+
+
+    def put(self, newtext, comment=None, watchArticle=None, minorEdit=True,
+            force=False):
+        """Save the page with the contents of the first argument as the
text.
+
+        @param newtext: The complete text of the revised page.
+        @type newtext: unicode
+        @param comment: The edit summary for the modification (optional,
+            but most wikis strongly encourage its use)
+        @type comment: unicode
+        @param watchArticle: if True, add or if False, remove this Page
+            to/from bot user's watchlist; if None, leave watchlist status
+            unchanged
+        @type watchArticle: bool or None
+        @param minorEdit: if True, mark this edit as minor
+        @type minorEdit: bool
+        @param force: if True, ignore botMayEdit() setting
+        @type force: bool
+
+        """
+        return self.site().put(self, newtext, comment, watchArticle,
+                               minorEdit, force)
+
+    def put_async(self, newtext,
+                  comment=None, watchArticle=None, minorEdit=True, force=False,
+                  callback=None):
+        """Put page on queue to be saved to wiki asynchronously.
+
+        Asynchronous version of put (takes the same arguments), which places
+        pages on a queue to be saved by a daemon thread. All arguments are
+        the same as for .put(), except:
+
+        @param callback: a callable object that will be called after the
+            page put operation. This object must take two arguments: (1) a
+            Page object, and (2) an exception instance, which will be None
+            if the page was saved successfully. The callback is intended for
+            use by bots that need to keep track of which saves were
+            successful.
+
+        """
+        return self.site().put(self, newtext, comment, watchArticle,
+                               minorEdit, force, callback, async=True)
+
+    def linkedPages(self):
+        """Iterate Pages that this Page links to.
+
+        Only returns pages from "normal" internal links. Image and category
+        links are omitted unless prefixed with ":"; embedded templates are
+        omitted (but links within them are returned); all interwiki and
+        external links are omitted.
+
+        @return: a generator that yields Page objects.
+
+        """
+        return self.site().getlinks(self)
+
+    def interwiki(self):
+        """Iterate interwiki links in the page text.
+
+        @return: a generator that yields Link objects.
+
+        """
+        return self.site().getinterwiki(self)
+
+    def langlinks(self):
+        """Iterate all interlanguage links on this page.
+
+        Note that the links yielded by this method will be a subset of
+        the results of self.interwiki().
+
+        @return: a generator that yields Link objects.
+
+        """
+        return self.site().getlanglinks(self)
+
+    def imagelinks(self, followRedirects=False, loose=None):
+        """Iterate ImagePage objects for images displayed on this Page.
+
+        @param followRedirects: if an image link redirects to another page,
+            yield the redirect target instead of the original link
+        @param loose: DEPRECATED and ignored
+        @return: a generator that yields ImagePage objects.
+
+        """
+        if loose is not None:
+            logging.debug(
+                u"Page.imagelinks(loose) option is deprecated.")
+        return self.site().getimages(followRedirects)
+
+    def templates(self):
+        """Iterate Page objects for templates used on this Page.
+
+        Template parameters are ignored.  This method only returns embedded
+        templates, not template pages that happen to be referenced through
+        a normal link.
+
+        """
+        return self.site().gettemplates(self)
+
+    def templatesWithParams(self):
+        """Iterate templates used on this Page.
+
+        @return: a generator that yields a tuple for each use of a template
+        in the page, with the template Page as the first entry and a list of
+        parameters as the second entry.
+
+        """
+        return self.site().templates_with_params(self)
+
+    def categories(self, nofollow_redirects=None, withSortKey=False):
+        """Iterate categories that the article is in.
+
+        @param nofollow_redirects: DEPRECATED and ignored
+        @param withSortKey: if True, include the sort key in each Category.
+        @return: a generator that yields Category objects.
+
+        """
+        # follow_redirects makes no sense here because category membership
+        # doesn't follow redirects
+        if nofollow_redirects is not None:
+            logging.debug(
+                u"Page.categories(nofollow_redirects) option is deprecated.")
+        return self.site().categories(withSortKey=withSortKey)
+
+    def extlinks(self):
+        """Iterate all external URLs (not interwiki links) from this
page.
+
+        @return: a generator that yields unicode objects containing URLs.
+
+        """
+        return self.site().getextlinks(self)
+
+    def getRedirectTarget(self):
+        """Return a Page object for the target this Page redirects to.
+
+        If this page is not a redirect page, will raise an IsNotRedirectPage
+        exception. This method also can raise a NoPage exception.
+
+        """
+        return self.site().follow_redirect(self)
+
+    def getVersionHistory(self, forceReload=False, reverseOrder=False,
+                          getAll=False, revCount=500):
+        """Load the version history page and return history information.
+
+        Return value is a list of tuples, where each tuple represents one
+        edit and is built of revision id, edit date/time, user name, and
+        edit summary. Starts with the most current revision, unless
+        reverseOrder is True. Defaults to getting the first revCount edits,
+        unless getAll is True.
+
+        """
+        if getAll:
+            limit = None
+        else:
+            limit = revCount
+        return self.site().getrevisions(self, withText=False,
+                                        older=reverseOrder, limit=limit)
+
+    def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
+                               getAll=False, revCount=500):
+        """Return the version history as a wiki table."""
+        result = '{| border="1"\n'
+        result += '! oldid || date/time || username || edit summary\n'
+        for oldid, time, username, summary \
+                in self.getVersionHistory(forceReload=forceReload,
+                                          reverseOrder=reverseOrder,
+                                          getAll=getAll, revCount=revCount):
+            result += '|----\n'
+            result += '| %s || %s || %s || <nowiki>%s</nowiki>\n'\
+                      % (oldid, time, username, summary)
+        result += '|}\n'
+        return result
+
+    def fullVersionHistory(self):
+        """Iterate all previous versions including wikitext.
+
+        @return: A generator that yields tuples consisting of revision ID,
+            edit date/time, user name and content
+        """
+        return self.site().getrevisions(self, withText=True,
+                                        older=reverseOrder, limit=None)
+
+    def contributingUsers(self):
+        """Return a set of usernames (or IPs) of users who edited this
page."""
+        edits = self.getVersionHistory()
+        users = set([edit[2] for edit in edits])
+        return users
+
+    def move(self, newtitle, reason=None, movetalkpage=True, sysop=False,
+             throttle=None, deleteAndMove=False, safe=True):
+        """Move this page to a new title.
+
+        @param newtitle: The new page title.
+        @param reason: The edit summary for the move.
+        @param movetalkpage: If true, move this page's talk page (if it exists)
+        @param sysop: Try to move using sysop account, if available
+        @param throttle: DEPRECATED
+        @param deleteAndMove: if move succeeds, delete the old page
+            (requires sysop privileges)
+        @param safe: If false, attempt to delete existing page at newtitle
+            (if there is one) and then move this page to that title
+
+        """
+        if throttle is not None:
+            logging.debug(
+                u"Page.move: throttle option is deprecated.")
+        if reason is None:
+            pywikibot.output(u'Moving %s to [[%s]].'
+                             % (self.title(asLink=True), newtitle))
+            reason = pywikibot.input(u'Please enter a reason for the move:')
+        return self.site().move(self, newtitle, reason,
+                                movetalkpage=movetalkpage, sysop=sysop,
+                                deleteAndMove=deleteAndMove, safe=safe)
+
+    def delete(self, reason=None, prompt=True, throttle=None, mark=False):
+        """Deletes the page from the wiki. Requires administrator status.
+
+        @param reason: The edit summary for the deletion.
+        @param prompt: If true, prompt user for confirmation before deleting.
+        @param mark: if true, and user does not have sysop rights, place a
+            speedy-deletion request on the page instead.
+
+        """
+        if throttle is not None:
+            logging.debug(
+                u"Page.delete: throttle option is deprecated.")
+        if reason is None:
+            pywikibot.output(u'Deleting %s.' % (self.title(asLink=True)))
+            reason = pywikibot.input(u'Please enter a reason for the deletion:')
+        answer = u'y'
+        if prompt and not hasattr(self.site(), '_noDeletePrompt'):
+            answer = pywikibot.inputChoice(u'Do you want to delete %s?'
+                        % self.title(asLink = True, forceInterwiki = True),
+                                           ['Yes', 'No', 'All'],
+                                           ['Y', 'N', 'A'],
+                                           'N')
+            if answer in ['a', 'A']:
+                answer = 'y'
+                self.site()._noDeletePrompt = True
+        if answer in ['y', 'Y']:
+            return self.site().delete(self, reason, mark=mark)
+
+    def loadDeletedRevisions(self):
+        """Retrieve all deleted revisions for this Page from
Special/Undelete.
+
+        Stores all revisions' timestamps, dates, editors and comments in
+        self._deletedRevs attribute.
+
+        @return: list of timestamps (which can be used to retrieve revisions
+            later on).
+
+        """
+        return self.site().loadDeletedRevisions(self)
+
+    def getDeletedRevision(self, timestamp, retrieveText=False):
+        """Return a particular deleted revision by timestamp.
+
+        @return: a list of [date, editor, comment, text, restoration
+            marker]. text will be None, unless retrieveText is True (or has
+            been retrieved earlier). If timestamp is not found, returns
+            None.
+
+        """
+        return self.site().getDeletedRevision(self, timestamp,
+                                              getText=retrieveText)
+
+    def markDeletedRevision(self, timestamp, undelete=True):
+        """Mark the revision identified by timestamp for undeletion.
+
+        @param undelete: if False, mark the revision to remain deleted.
+
+        """
+        if self._deletedRevs == None:
+            self.loadDeletedRevisions()
+        if not self._deletedRevs.has_key(timestamp):
+            #TODO: Throw an exception?
+            return None
+        self._deletedRevs[timestamp][4] = undelete
+        self._deletedRevsModified = True
+
+    def undelete(self, comment=None, throttle=None):
+        """Undelete revisions based on the markers set by previous calls.
+
+        If no calls have been made since loadDeletedRevisions(), everything
+        will be restored.
+
+        Simplest case::
+            Page(...).undelete('This will restore all revisions')
+
+        More complex::
+            pg = Page(...)
+            revs = pg.loadDeletedRevsions()
+            for rev in revs:
+                if ... #decide whether to undelete a revision
+                    pg.markDeletedRevision(rev) #mark for undeletion
+            pg.undelete('This will restore only selected revisions.')
+
+        @param comment: The undeletion edit summary.
+        @param throttle: DEPRECATED
+
+        """
+        if throttle is not None:
+            logging.debug(
+                u"Page.undelete: throttle option is deprecated.")
+        if comment is None:
+            pywikibot.output(u'Preparing to undelete %s.'
+                             % (self.title(asLink=True)))
+            comment = pywikibot.input(
+                        u'Please enter a reason for the undeletion:')
+        return self.site().undelete(self, comment)
+
+    def protect(self, edit='sysop', move='sysop',
create='sysop',
+                unprotect=False, reason=None, prompt=True, throttle=None):
+        """(Un)protect a wiki page. Requires administrator status.
+
+        Valid protection levels (in MediaWiki 1.12) are '' (equivalent to
+        'none'), 'autoconfirmed', and 'sysop'.
+
+        @param edit: Level of edit protection
+        @param move: Level of move protection
+        @param create: Level of create protection
+        @param unprotect: If true, unprotect the page (equivalent to setting
+            all protection levels to '')
+        @param reason: Edit summary.
+        @param prompt: If true, ask user for confirmation.
+        @param throttle: DEPRECATED
+
+        """
+        if throttle is not None:
+            logging.debug(
+                u"Page.protect: throttle option is deprecated.")
+        if reason is None:
+            if unprotect:
+                un = u'un'
+            else:
+                un = u''
+            pywikibot.output(u'Preparing to %sprotect %s.'
+                             % (un, self.title(asLink=True)))
+            reason = pywikibot.input(u'Please enter a reason for the action:')
+        if unprotect:
+            edit = move = create = ""
+        answer = 'y'
+        if prompt and not hasattr(self.site(), '_noProtectPrompt'):
+            answer = pywikibot.inputChoice(
+                        u'Do you want to change the protection level of %s?'
+                          % self.title(asLink=True, forceInterwiki = True),
+                        ['Yes', 'No', 'All'], ['Y',
'N', 'A'], 'N')
+            if answer in ['a', 'A']:
+                answer = 'y'
+                self.site()._noProtectPrompt = True
+        if answer in ['y', 'Y']:
+            return self.site().protect(self, edit, move, create, reason)
+
+######## DEPRECATED METHODS ########
+
+    def encoding(self):
+        """Return the character encoding used on this Page's wiki
Site.
+
+        DEPRECATED: use Site.encoding() instead
+
+        """
+        logging.debug(u"Page.encoding() is deprecated; use Site.encoding().")
+        return self.site().encoding()
+
+    def titleWithoutNamespace(self, underscore=False):
+        """Return title of Page without namespace and without section.
+
+        DEPRECATED: use self.title(withNamespace=False) instead.
+
+        """
+        logging.debug(
+            u"Page.titleWithoutNamespace() method is deprecated.")
+        return self.title(underscore=underscore, withNamespace=False,
+                          withSection=False)
+
+    def sectionFreeTitle(self, underscore=False):
+        """Return the title of this Page, without the section (if any).
+
+        DEPRECATED: use self.title(withSection=False) instead.
+
+        """
+        logging.debug(
+            u"Page.sectionFreeTitle() method is deprecated.")
+        return self.title(underscore=underscore, withSection=False)
+
+    def aslink(self, forceInterwiki=False, textlink=False, noInterwiki=False):
+        """Return a string representation in the form of a wikilink.
+
+        DEPRECATED: use self.title(asLink=True) instead.
+
+        """
+        logging.debug(u"Page.aslink() method is deprecated.")
+        return self.title(asLink=True, forceInterwiki=forceInterwiki,
+                          allowInterwiki=not noInterwiki, textlink=textlink)
+
+    def urlname(self):
+        """Return the Page title encoded for use in an URL.
+
+        DEPRECATED: use self.title(asUrl=True) instead.
+
+        """
+        logging.debug(u"Page.urlname() method is deprecated.")
+        return self.title(asUrl=True)
+
+####### DISABLED METHODS (warnings provided) ######
+    # these methods are easily replaced by editing the page's text using
+    # textlib methods and then using put() on the result.
+
+    def removeImage(self, image, put=False, summary=None, safe=True):
+        """Old method to remove all instances of an image from
page."""
+        logging.warning(u"Page.removeImage() is no longer supported.")
+
+    def replaceImage(self, image, replacement=None, put=False, summary=None,
+                     safe=True):
+        """Old method to replace all instances of an image with
another."""
+        logging.warning(u"Page.replaceImage() is no longer supported.")
+
+
+class ImagePage(Page):
+    """A subclass of Page representing an image descriptor wiki page.
+
+    Supports the same interface as Page, with the following added methods:
+
+    getImagePageHtml          : Download image page and return raw HTML text.
+    fileURL                   : Return the URL for the image described on this
+                                page.
+    fileIsOnCommons           : Return True if image stored on Wikimedia
+                                Commons.
+    fileIsShared              : Return True if image stored on Wikitravel
+                                shared repository.
+    getFileMd5Sum             : Return image file's MD5 checksum.
+    getFileVersionHistory     : Return the image file's version history.
+    getFileVersionHistoryTable: Return the version history in the form of a
+                                wiki table.
+    usingPages                : Iterate Pages on which the image is displayed.
+
+    """
+    def __init__(self, site, title, insite = None):
+        Page.__init__(self, site, title, insite, defaultNamespace=6)
+        if self.namespace() != 6:
+            raise ValueError(u"'%s' is not in the image namespace!" %
title)
+
+    def getImagePageHtml(self):
+        """
+        Download the image page, and return the HTML, as a unicode string.
+
+        Caches the HTML code, so that if you run this method twice on the
+        same ImagePage object, the page will only be downloaded once.
+        """
+        if not hasattr(self, '_imagePageHtml'):
+            from pywikibot.data import http
+            path = "%s/index.php?title=%s" \
+                   % (self.site().scriptpath(), self.title(asUrl=True))
+            self._imagePageHtml = http.request(self.site(), path)
+        return self._imagePageHtml
+
+    def fileUrl(self):
+        """Return the URL for the image described on this
page."""
+        # TODO add scaling option?
+        if not hasattr(self, '_imageinfo'):
+            self._imageinfo = self.site().getimageinfo(self)
+        return self._imageinfo['url']
+
+    def fileIsOnCommons(self):
+        """Return True if the image is stored on Wikimedia
Commons"""
+        return self.fileUrl().startswith(
+            'http://upload.wikimedia.org/wikipedia/commons/')
+
+    def fileIsShared(self):
+        """Return True if image is stored on any known shared
repository."""
+        # as of now, the only known repositories are commons and wikitravel
+        if 'wikitravel_shared' in self.site().shared_image_repository():
+            return self.fileUrl().startswith(
+                u'http://wikitravel.org/upload/shared/')
+        return self.fileIsOnCommons()
+
+    def getFileMd5Sum(self):
+        """Return image file's MD5 checksum."""
+        logging.debug(
+            "ImagePage.getFileMd5Sum() is deprecated; use getFileSHA1Sum().")
+# FIXME: MD5 might be performed on incomplete file due to server disconnection
+# (see bug #1795683).
+        import md5, urllib
+        f = urllib.urlopen(self.fileUrl())
+        # TODO: check whether this needs a User-Agent header added
+        md5Checksum = md5.new(f.read()).hexdigest()
+        f.close()
+        return md5Checksum
+
+    def getFileSHA1Sum(self):
+        """Return image file's SHA1 checksum."""
+        if not hasattr(self, '_imageinfo'):
+            self._imageinfo = self.site().getimageinfo(self)
+        return self._imageinfo['sha1']
+
+    def getFileVersionHistory(self):
+        """Return the image file's version history.
+
+        @return: An iterator yielding tuples containing (timestamp,
+            username, resolution, filesize, comment).
+
+        """
+        #TODO; return value may need to change
+        return self.site().getimageinfo(self, history=True)
+
+    def getFileVersionHistoryTable(self):
+        """Return the version history in the form of a wiki
table."""
+        lines = []
+        #TODO: if getFileVersionHistory changes, make sure this follows it
+        for (datetime, username, resolution, size, comment) \
+                in self.getFileVersionHistory():
+            lines.append('| %s || %s || %s || %s ||
<nowiki>%s</nowiki>' \
+                         % (datetime, username, resolution, size, comment))
+        return u'{| border="1"\n! date/time || username || resolution ||
size || edit summary\n|----\n' + u'\n|----\n'.join(lines) + '\n|}'
+
+    def usingPages(self):
+        """Yield Pages on which the image is displayed."""
+        return self.site().getimageusage(self)
+
+class Category(Page):
+    """A page in the Category: namespace"""
+
+    def __init__(self, site, title, insite=None, sortKey=None):
+        """All parameters are the same as for Page() constructor, except:
+
+        @param sortKey: DEPRECATED (use .aslink() method instead)
+
+        """
+        Page.__init__(self, site=site, title=title, insite=insite,
+                      defaultNamespace=14)
+        if sortKey is not None:
+            logging.debug(
+                "The 'sortKey' option in Category constructor is
deprecated.")
+        if self.namespace() != 14:
+            raise ValueError(u"'%s' is not in the category namespace!"
+                             % title)
+
+    def aslink(self, sortKey=u'', forceInterwiki=None, textlink=None,
+               noInterwiki=None):
+        """Return a link to place a page in this Category.
+
+        Use this only to generate a "true" category link, not for interwikis
+        or text links to category pages.
+
+        Parameters are deprecated and preserved for backwards-compatibility,
+        except:
+
+        @param sortKey: The sort key for the article to be placed in this
+            Category; if omitted, default sort key is used.
+        @type sortKey: (optional) unicode
+
+        """
+        if forceInterwiki is not None \
+                or textlink is not None or noInterwiki is not None:
+            logging.debug("All arguments to Category.aslink() are
deprecated.")
+        if sortKey:
+            titleWithSortKey = '%s|%s' % (self.title(withSection=False),
+                                          self.sortKey)
+        else:
+            titleWithSortKey = self.title(withSection=False)
+        return '[[%s]]' % titleWithSortKey
+
+    def subcategories(self, recurse=False):
+        """Iterate all subcategories of the current category.
+
+        @param recurse: if not False or 0, also iterate subcategories of
+            subcategories. If an int, limit recursion to this number of
+            levels. (Example: recurse=1 will iterate direct subcats and
+            first-level sub-sub-cats, but no deeper.)
+        @type recurse: int or bool
+
+        """
+        if not isinstance(recurse, bool) and recurse:
+            recurse = recurse - 1
+        if not hasattr(self, "_subcats"):
+            self._subcats = []
+            for member in self.site().categorymembers(self, namespaces=[14]):
+                subcat = Category(self.site(), member.title())
+                self.subcats.append(subcat)
+                yield subcat
+                if recurse:
+                    for item in subcat.subcategories(recurse):
+                        yield item
+        else:
+            for subcat in self._subcats:
+                yield subcat
+                if recurse:
+                    for item in subcat.subcategories(recurse):
+                        yield item
+
+    def articles(self, recurse=False, startFrom=None):
+        """
+        Yields all articles in the current category.
+
+        @param recurse: if not False or 0, also iterate articles in
+            subcategories. If an int, limit recursion to this number of
+            levels. (Example: recurse=1 will iterate articles in first-level
+            subcats, but no deeper.)
+        @type recurse: int or bool
+
+        """
+        namespaces = self.site().namespaces()
+        namespaces.remove(14)
+        for member in self.site().categorymembers(self, namespaces=namespaces):
+            yield member
+        if recurse:
+            if not isinstance(recurse, bool) and recurse:
+                recurse = recurse - 1
+            for subcat in self.subcategories():
+                for article in subcat.articles(recurse):
+                    yield article
+
+    def isEmptyCategory(self):
+        """Return True if category has no members (including
subcategories)."""
+        for member in self.site().categorymembers(self, limit=1):
+            return False
+        return True
+
+    def copyTo(self, catname):
+        """
+        Copy text of category page to a new page.  Does not move contents.
+
+        @param catname: New category title (without namespace)
+        @return: True if copying was successful, False if target page
+            already existed.
+
+        """
+        # This seems far too specialized to be in the top-level framework
+        catname = self.site().category_namespace() + ':' + catname
+        targetCat = Category(self.site(), catname)
+        if targetCat.exists():
+            pywikibot.output('Target page %s already exists!'
+                             % targetCat.title())
+            return False
+        else:
+            pywikibot.output('Moving text from %s to %s.'
+                             % (self.title(), targetCat.title()))
+            authors = ', '.join(self.contributingUsers())
+            creationSummary = pywikibot.translate(
+                                  self.site(), msg_created_for_renaming
+                              ) % (self.title(), authors)
+            targetCat.put(self.get(), creationSummary)
+            return True
+
+    def copyAndKeep(self, catname, cfdTemplates):
+        """Copy partial category page text (not contents) to a new title.
+
+        Like copyTo above, except this removes a list of templates (like
+        deletion templates) that appear in the old category text.  It also
+        removes all text between the two HTML comments BEGIN CFD TEMPLATE
+        and END CFD TEMPLATE. (This is to deal with CFD templates that are
+        substituted.)
+
+        Returns true if copying was successful, false if target page already
+        existed.
+
+        @param catname: New category title (without namespace)
+        @param cfdTemplates: A list (or iterator) of templates to be removed
+            from the page text
+        @return: True if copying was successful, False if target page
+            already existed.
+
+        """
+        # I don't see why we need this as part of the framework either
+        catname = self.site().category_namespace() + ':' + catname
+        targetCat = Category(self.site(), catname)
+        if targetCat.exists():
+            pywikibot.output('Target page %s already exists!'
+                             % targetCat.title())
+            return False
+        else:
+            pywikibot.output('Moving text from %s to %s.'
+                             % (self.title(), targetCat.title()))
+            authors = ', '.join(self.contributingUsers())
+            creationSummary = pywikibot.translate(
+                                  self.site(), msg_created_for_renaming
+                              ) % (self.title(), authors)
+            newtext = self.get()
+        for regexName in cfdTemplates:
+            matchcfd = re.compile(r"{{%s.*?}}" % regexName, re.IGNORECASE)
+            newtext = matchcfd.sub('',newtext)
+            matchcomment = re.compile(
+                        r"<!--BEGIN CFD TEMPLATE-->.*?<!--END CFD
TEMPLATE-->",
+                                      re.IGNORECASE | re.MULTILINE | re.DOTALL)
+            newtext = matchcomment.sub('', newtext)
+            pos = 0
+            while (newtext[pos:pos+1] == "\n"):
+                pos = pos + 1
+            newtext = newtext[pos:]
+            targetCat.put(newtext, creationSummary)
+            return True
+
+#### DEPRECATED METHODS ####
+    def subcategoriesList(self, recurse=False):
+        """DEPRECATED: Equivalent to
list(self.subcategories(...))"""
+        logging.debug("Category.subcategoriesList() method is deprecated.")
+        return sorted(list(set(self.subcategories(recurse))))
+
+    def articlesList(self, recurse=False):
+        """DEPRECATED: equivalent to
list(self.articles(...))"""
+        logging.debug("Category.articlesList() method is deprecated.")
+        return sorted(list(set(self.articles(recurse))))
+
+    def supercategories(self):
+        """DEPRECATED: equivalent to self.categories()"""
+        logging.debug("Category.supercategories() method is deprecated.")
+        return self.categories()
+
+    def supercategoriesList(self):
+        """DEPRECATED: equivalent to
list(self.categories(...))"""
+        logging.debug("Category.articlesList() method is deprecated.")
+        return sorted(list(set(self.categories())))
+
+
+class Revision(object):
+    """A structure holding information about a single revision of a
Page."""
+    def __init__(self, revid, timestamp, user, anon=False, comment=u"",
+                 text=None, minor=False):
+        """All parameters correspond to object attributes (e.g., revid
+        parameter is stored as self.revid)
+
+        @param revid: Revision id number
+        @type revid: int
+        @param text: Revision wikitext.
+        @type text: unicode, or None if text not yet retrieved
+        @param timestamp: Revision time stamp (in MediaWiki text format)
+        @type timestamp: unicode
+        @param user: user who edited this revision
+        @type user: unicode
+        @param anon: user is unregistered
+        @type anon: bool
+        @param comment: edit comment text
+        @type comment: unicode
+        @param minor: edit flagged as minor
+        @type minor: bool
+
+        """
+        self.revid = revid
+        self.text = text
+        self.timestamp = timestamp
+        self.user = user
+        self.anon = anon
+        self.comment = comment
+        self.minor = minor
+
+
+class Link(object):
+    """A Mediawiki link (local or interwiki)
+
+    Has the following attributes:
+
+      - site:  The Site object for the wiki linked to
+      - namespace: The namespace of the page linked to (int)
+      - title: The title of the page linked to (unicode); does not include
+        namespace or section
+      - section: The section of the page linked to (unicode or None); this
+        contains any text following a '#' character in the title
+      - anchor: The anchor text (unicode or None); this contains any text
+        following a '|' character inside the link
+
+    """
+    illegal_titles_pattern = re.compile(
+        # Matching titles will be held as illegal.
+            u'''[^
%!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+]'''
+            # URL percent encoding sequences interfere with the ability
+            # to round-trip titles -- you can't link to them consistently.
+            u'|%[0-9A-Fa-f]{2}' 
+            # XML/HTML character references produce similar issues.
+            u'|&[A-Za-z0-9\x80-\xff]+;'
+            u'|&#[0-9]+;'
+            u'|&#x[0-9A-Fa-f]+;'
+        )
+    namespace_pattern = re.compile("^(.+?)_*:_*(.*)$")
+
+    def __init__(self, text, source=None, defaultNamespace=0):
+        """Parse text into a Link object.
+
+        @param text: the link text (everything appearing between [[ and ]]
+            on a wiki page)
+        @type text: unicode
+        @param source: the Site on which the link was found (not necessarily
+            the site to which the link refers)
+        @type source: Site
+        @param defaultNamespace: a namespace to use if the link does not
+            contain one (defaults to 0)
+        @type defaultNamespace: int
+
+        """
+        # First remove the anchor, which is stored unchanged, if there is one
+        if u"|" in text:
+            text, self.anchor = text.split(u"|", 1)
+        else:
+            self.anchor = None
+
+        if source is None:
+            source = pywikibot.Site()
+        self.source = self.site = source
+
+        # Clean up the name, it can come from anywhere.
+        # Convert HTML entities to unicode
+        t = html2unicode(text)
+
+        # Convert URL-encoded characters to unicode
+        t = url2unicode(t, site=self.site)
+
+        # Normalize unicode string to a NFC (composed) format to allow proper
+        # string comparisons. According to
+        #
http://svn.wikimedia.org/viewvc/mediawiki/branches/REL1_6/phase3/includes/n…
+        # the mediawiki code normalizes everything to NFC, not NFKC (which
+        # might result in information loss).
+        t = unicodedata.normalize('NFC', t)
+
+        # This code was adapted from Title.php : secureAndSplit()
+        #
+        if u'\ufffd' in t:
+            raise Error("Title contains illegal char (\\uFFFD)")
+        self.namespace = defaultNamespace
+
+        # Replace underscores by spaces
+        t = t.replace(u'_', u' ')
+        # replace multiple spaces and underscores with a single space
+        while u"  " in t: t = t.replace(u"  ", u" ")
+        # Strip spaces at both ends
+        t = t.strip()
+        # Remove left-to-right and right-to-left markers.
+        t = t.replace(u'\u200e', u'').replace(u'\u200f',
u'')
+
+        # Initial colon indicates main namespace rather than specified default
+        if t.startswith(u':'):
+            self.namespace = 0
+            # remove the colon but continue processing
+            # remove any subsequent whitespace
+            t = t[1:].strip()
+
+        # Namespace or interwiki prefix
+        firstPass = True
+        while True:
+            fam = self.site.family
+
+            m = Link.namespace_pattern.match(t)
+            if m:
+                pre = m.group(1).lower()
+                ns = self.site.getNamespaceIndex(pre)
+                if ns:
+                    # Ordinary namespace
+                    t = m.group(2)
+                    self.namespace = ns
+                elif pre in fam.langs.keys()\
+                     or pre in fam.get_known_families(site=self.site):
+
+                    if not firstPass:
+                        # Can't make a local interwiki link to an interwiki link.
+                        # That's just crazy!
+                        raise Error("Improperly formatted interwiki link
'%s'"
+                                    % text)
+
+                    # Interwiki link
+                    t = m.group(2)
+                    if pre in fam.langs.keys():
+                        newsite = pywikibot.Site(pre, fam)
+                    else:
+                        otherlang = self.site.lang
+                        familyName = fam.get_known_families(site=self.site)[pre]
+                        if familyName in ['commons', 'meta']:
+                            otherlang = familyName
+                        try:
+                            newsite = pywikibot.Site(otherlang, familyName)
+                        except ValueError:
+                            raise Error("""\
+%s is not a local page on %s, and the %s family is
+not supported by PyWikiBot!"""
+                                  % (title, self.site(), familyName))
+
+                    # Redundant interwiki prefix to the local wiki
+                    if newsite == self.site:
+                        if not t:
+                            # Can't have an empty self-link
+                            raise Error("Invalid link title: '%s'" %
text)
+                        firstPass = False
+                        continue
+                    self.site = newsite
+                    # If there's an initial colon after the interwiki, that also
+                    # resets the default namespace
+                    if t.startswith(":"):
+                        self.namespace = 0
+                        t = t[1:]
+            break
+
+        if u"#" in t:
+            t, sec = t.split(u'#', 1)
+            t, self.section = t.rstrip(), sec.lstrip()
+        else:
+            self.section = None
+
+        # Reject illegal characters.
+        if Link.illegal_titles_pattern.search(t):
+            raise Error("Invalid title (contains illegal char(s)):
'%s'" % text)
+
+        # Pages with "/./" or "/../" appearing in the URLs will
+        # often be unreachable due to the way web browsers deal
+        #* with 'relative' URLs. Forbid them explicitly.
+
+        if u'.' in t and (
+                t == u'.' or t == u'..'
+                or t.startswith(u"./")
+                or t.startswith(u"../")
+                or u"/./" in t
+                or u"/../" in t
+                or t.endswith(u"/.")
+                or t.endswith(u"/..")
+        ):
+            raise Error("Invalid title (contains . / combinations):
'%s'"
+                        % text)
+
+        # Magic tilde sequences? Nu-uh!
+        if u"~~~" in t:
+            raise Error("Invalid title (contains ~~~): '%s'" % text)
+
+        if self.namespace != -1 and len(t) > 255:
+            raise Error("Invalid title (over 255 bytes): '%s'" % t)
+
+        if self.site.case() == 'first-letter':
+            t = t[:1].upper() + t[1:]
+
+        # Can't make a link to a namespace alone...
+        # "empty" local links can only be self-links
+        # with a fragment identifier.
+        if not t and self.site == self.source and self.namespace != 0:
+            raise ValueError("Invalid link (no page title): '%s'" %
text)
+
+        self.title = t
+
+
+# Utility functions for parsing page titles
+
+def html2unicode(text, ignore = []):
+    """Return text, replacing HTML entities by equivalent unicode
characters."""
+    # This regular expression will match any decimal and hexadecimal entity and
+    # also entities that might be named entities.
+    entityR = re.compile(
+       
r'&(#(?P<decimal>\d+)|#x(?P<hex>[0-9a-fA-F]+)|(?P<name>[A-Za-z]+));')
+    # These characters are Html-illegal, but sadly you *can* find some of
+    # these and converting them to unichr(decimal) is unsuitable
+    convertIllegalHtmlEntities = {
+        128 : 8364, # €
+        130 : 8218, # ‚
+        131 : 402,  # ƒ
+        132 : 8222, # „
+        133 : 8230, # …
+        134 : 8224, # †
+        135 : 8225, # ‡
+        136 : 710,  # ˆ
+        137 : 8240, # ‰
+        138 : 352,  # Š
+        139 : 8249, # ‹
+        140 : 338,  # Œ
+        142 : 381,  # Ž
+        145 : 8216, # ‘
+        146 : 8217, # ’
+        147 : 8220, # “
+        148 : 8221, # ”
+        149 : 8226, # •
+        150 : 8211, # –
+        151 : 8212, # —
+        152 : 732,  # ˜
+        153 : 8482, # ™
+        154 : 353,  # š
+        155 : 8250, # ›
+        156 : 339,  # œ
+        158 : 382,  # ž
+        159 : 376   # Ÿ
+    }
+    #ensuring that illegal &#129; &#141; and &#157, which have no known
values,
+    #don't get converted to unichr(129), unichr(141) or unichr(157)
+    ignore = set(ignore) | set([129, 141, 157])
+    result = u''
+    i = 0
+    found = True
+    while found:
+        text = text[i:]
+        match = entityR.search(text)
+        if match:
+            unicodeCodepoint = None
+            if match.group('decimal'):
+                unicodeCodepoint = int(match.group('decimal'))
+            elif match.group('hex'):
+                unicodeCodepoint = int(match.group('hex'), 16)
+            elif match.group('name'):
+                name = match.group('name')
+                if htmlentitydefs.name2codepoint.has_key(name):
+                    # We found a known HTML entity.
+                    unicodeCodepoint = htmlentitydefs.name2codepoint[name]
+            result += text[:match.start()]
+            try:
+                unicodeCodepoint=convertIllegalHtmlEntities[unicodeCodepoint]
+            except KeyError:
+                pass
+            if unicodeCodepoint and unicodeCodepoint not in ignore and (WIDEBUILD or
unicodeCodepoint < 65534):
+                result += unichr(unicodeCodepoint)
+            else:
+                # Leave the entity unchanged
+                result += text[match.start():match.end()]
+            i = match.end()
+        else:
+            result += text
+            found = False
+    return result
+
+def url2unicode(title, site, site2 = None):
+    """Convert url-encoded text to unicode using site's encoding.
+
+    If site2 is provided, try its encodings as well.  Uses the first encoding
+    that doesn't cause an error.
+
+    """
+    # create a list of all possible encodings for both hint sites
+    encList = [site.encoding()] + list(site.encodings())
+    if site2 and site2 <> site:
+        encList.append(site2.encoding())
+        encList += list(site2.encodings())
+    firstException = None
+    # try to handle all encodings (will probably retry utf-8)
+    for enc in encList:
+        try:
+            t = title.encode(enc)
+            t = urllib.unquote(t)
+            return unicode(t, enc)
+        except UnicodeError, ex:
+            if not firstException:
+                firstException = ex
+            pass
+    # Couldn't convert, raise the original exception
+    raise firstException
+

Added: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py	                        (rev 0)
+++ branches/rewrite/pywikibot/site.py	2008-02-27 20:08:48 UTC (rev 5088)
@@ -0,0 +1,1947 @@
+# -*- coding: utf-8  -*-
+"""
+Objects representing MediaWiki sites (wikis) and families (groups of wikis
+on the same topic in different languages).
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id: $'
+
+import pywikibot
+from pywikibot.exceptions import *
+from pywikibot.data import api
+
+import os
+
+def Family(fam=None, fatal=True):
+    """Import the named family.
+
+    @param fam: family name (if omitted, uses the configured default)
+    @type fam: str
+    @param fatal: if True, the bot will stop running if the given family is
+        unknown. If False, it will only raise a ValueError exception.
+    @param fatal: bool
+    @return: a Family instance configured for the named family.
+
+    """
+    if fam == None:
+        fam = pywikibot.default_family
+    try:
+        # first try the built-in families
+        exec "import pywikibot.families.%s_family as myfamily" % fam
+    except ImportError:
+        # next see if user has defined a local family module
+        try:
+            sys.path.append(pywikibot.config.datafilepath('families'))
+            exec "import %s_family as myfamily" % fam
+        except ImportError:
+            if fatal:
+                output(u"""\
+Error importing the %s family. This probably means the family
+does not exist. Also check your configuration file."""
+                           % fam)
+                import traceback
+                traceback.print_stack()
+                sys.exit(1)
+            else:
+                raise ValueError("Family %s does not exist" % repr(fam))
+    return myfamily.Family()
+
+
+class BaseSite(object):
+    """Site methods that are independent of the communication
interface."""
+    # to implement a specific interface, define a Site class that inherits
+    # from this
+    def __init__(self, code, fam=None, user=None):
+        """
+        @param code: the site's language code
+        @type code: str
+        @param fam: wiki family name (optional)
+        @type fam: str or Family
+        @param user: bot user name (optional)
+        @type user: str
+        
+        """
+        self._lang = code.lower()
+        if isinstance(fam, basestring) or fam is None:
+            self._family = Family(fam, fatal=False)
+        else:
+            self._family = fam
+
+##        # if we got an outdated language code, use the new one instead.
+##        if self._family.obsolete.has_key(self._lang):
+##            if self._family.obsolete[self._lang] is not None:
+##                self._lang = self._family.obsolete[self._lang]
+##            else:
+##                # no such language anymore
+##                raise NoSuchSite("Language %s in family %s is obsolete"
+##                                 % (self._lang, self._family.name))
+##
+##        if self._lang not in self.languages():
+##            if self._lang == 'zh-classic' and 'zh-classical' in
self.languages():
+##                self._lang = 'zh-classical'
+##                # database hack (database is varchar[10] -> zh-classical
+##                # is cut to zh-classic.
+##            else:
+##                raise NoSuchSite("Language %s does not exist in family %s"
+##                                 % (self._lang, self._family.name))
+        self._username = user
+
+    def family(self):
+        """Return the associated Family object."""
+        return self._family
+
+    def language(self):
+        """Return the site's language code."""
+        # N.B. this code does not always identify a language as such, but
+        #      may identify a wiki that is part of any family grouping
+        return self._lang
+
+    def user(self):
+        """Return the currently-logged in bot user, or
None."""
+        if self.logged_in():
+            return self._username
+        return None
+
+    def __getattr__(self, attr):
+        """Calls to methods not defined in this object are passed to
Family."""
+        try:
+            method = getattr(self.family(), attr)
+            return lambda self=self: method(self.language())
+        except AttributeError:
+            raise AttributeError("%s instance has no attribute '%s'"
+                                 % (self.__class__.__name__, attr)
+                                )
+
+    def sitename(self):
+        """Return string representing this Site's name and
language."""
+        return self.family().name+':'+self.language()
+
+    __str__ = sitename
+    
+    def __repr__(self):
+        return 'Site("%s", "%s")' % (self.language(),
self.family().name)
+
+    def linktrail(self):
+        """Return regex for trailing chars displayed as part of a
link."""
+        return self.family().linktrail(self.language())
+
+    def languages(self):
+        """Return list of all valid language codes for this site's
Family."""
+        return self.family().langs.keys()
+
+    def getNamespaceIndex(self, namespace):
+        """Given a namespace name, return its int index, or None if
invalid."""
+        return self.family().getNamespaceIndex(self.language(), namespace)
+
+
+class APISite(BaseSite):
+    """API interface to MediaWiki site.
+
+    Do not use directly; use pywikibot.Site function.
+
+    """
+##    Site methods from version 1.0 (as these are implemented in this file,
+##     or declared deprecated/obsolete, they will be removed from this list)
+##########
+##    validLanguageLinks: A list of language codes that can be used in interwiki
+##        links.
+##
+##    messages: return True if there are new messages on the site
+##    cookies: return user's cookies as a string
+##
+##    getUrl: retrieve an URL from the site
+##    urlEncode: Encode a query to be sent using an http POST request.
+##    postForm: Post form data to an address at this site.
+##    postData: Post encoded form data to an http address at this site.
+##
+##    namespace(num): Return local name of namespace 'num'.
+##    normalizeNamespace(value): Return preferred name for namespace 'value' in
+##        this Site's language.
+##    namespaces: Return list of canonical namespace names for this Site.
+##    getNamespaceIndex(name): Return the int index of namespace 'name', or None
+##        if invalid.
+##
+##    redirect: Return the localized redirect tag for the site.
+##    redirectRegex: Return compiled regular expression matching on redirect
+##                   pages.
+##    mediawiki_message: Retrieve the text of a specified MediaWiki message
+##    has_mediawiki_message: True if this site defines specified MediaWiki
+##                           message
+##
+##    shared_image_repository: Return tuple of image repositories used by this
+##        site.
+##    category_on_one_line: Return True if this site wants all category links
+##        on one line.
+##    interwiki_putfirst: Return list of language codes for ordering of
+##        interwiki links.
+##    linkto(title): Return string in the form of a wikilink to 'title'
+##    isInterwikiLink(s): Return True if 's' is in the form of an interwiki
+##                        link.
+##    getSite(lang): Return Site object for wiki in same family, language
+##                   'lang'.
+##    version: Return MediaWiki version string from Family file.
+##    versionnumber: Return int identifying the MediaWiki version.
+##    live_version: Return version number read from Special:Version.
+##    checkCharset(charset): Warn if charset doesn't match family file.
+##
+##    linktrail: Return regex for trailing chars displayed as part of a link.
+##    disambcategory: Category in which disambiguation pages are listed.
+##
+##    Methods that yield Page objects derived from a wiki's Special: pages
+##    (note, some methods yield other information in a tuple along with the
+##    Pages; see method docs for details) --
+##
+##        search(query): query results from Special:Search
+##        allpages(): Special:Allpages
+##        prefixindex(): Special:Prefixindex
+##        newpages(): Special:Newpages
+##        newimages(): Special:Log&type=upload
+##        longpages(): Special:Longpages
+##        shortpages(): Special:Shortpages
+##        categories(): Special:Categories (yields Category objects)
+##        deadendpages(): Special:Deadendpages
+##        ancientpages(): Special:Ancientpages
+##        lonelypages(): Special:Lonelypages
+##        unwatchedpages(): Special:Unwatchedpages (sysop accounts only)
+##        uncategorizedcategories(): Special:Uncategorizedcategories (yields
+##            Category objects)
+##        uncategorizedpages(): Special:Uncategorizedpages
+##        uncategorizedimages(): Special:Uncategorizedimages (yields
+##            ImagePage objects)
+##        unusedcategories(): Special:Unusuedcategories (yields Category)
+##        unusedfiles(): Special:Unusedimages (yields ImagePage)
+##        withoutinterwiki: Special:Withoutinterwiki
+##        linksearch: Special:Linksearch
+##
+##    Convenience methods that provide access to properties of the wiki Family
+##    object; all of these are read-only and return a unicode string unless
+##    noted --
+##
+##        encoding: The current encoding for this site.
+##        encodings: List of all historical encodings for this site.
+##        category_namespace: Canonical name of the Category namespace on this
+##            site.
+##        category_namespaces: List of all valid names for the Category
+##            namespace.
+##        image_namespace: Canonical name of the Image namespace on this site.
+##        template_namespace: Canonical name of the Template namespace on this
+##            site.
+##        protocol: Protocol ('http' or 'https') for access to this
site.
+##        hostname: Host portion of site URL.
+##        path: URL path for index.php on this Site.
+##        dbName: MySQL database name.
+##
+##    Methods that return addresses to pages on this site (usually in
+##    Special: namespace); these methods only return URL paths, they do not
+##    interact with the wiki --
+##
+##        export_address: Special:Export.
+##        query_address: URL path + '?' for query.php
+##        api_address: URL path + '?' for api.php
+##        apipath: URL path for api.php
+##        move_address: Special:Movepage.
+##        delete_address(s): Delete title 's'.
+##        undelete_view_address(s): Special:Undelete for title 's'
+##        undelete_address: Special:Undelete.
+##        protect_address(s): Protect title 's'.
+##        unprotect_address(s): Unprotect title 's'.
+##        put_address(s): Submit revision to page titled 's'.
+##        get_address(s): Retrieve page titled 's'.
+##        nice_get_address(s): Short URL path to retrieve page titled 's'.
+##        edit_address(s): Edit form for page titled 's'.
+##        purge_address(s): Purge cache and retrieve page 's'.
+##        block_address: Block an IP address.
+##        unblock_address: Unblock an IP address.
+##        blocksearch_address(s): Search for blocks on IP address 's'.
+##        linksearch_address(s): Special:Linksearch for target 's'.
+##        search_address(q): Special:Search for query 'q'.
+##        allpages_address(s): Special:Allpages.
+##        newpages_address: Special:Newpages.
+##        longpages_address: Special:Longpages.
+##        shortpages_address: Special:Shortpages.
+##        unusedfiles_address: Special:Unusedimages.
+##        categories_address: Special:Categories.
+##        deadendpages_address: Special:Deadendpages.
+##        ancientpages_address: Special:Ancientpages.
+##        lonelypages_address: Special:Lonelypages.
+##        unwatchedpages_address: Special:Unwatchedpages.
+##        uncategorizedcategories_address: Special:Uncategorizedcategories.
+##        uncategorizedimages_address: Special:Uncategorizedimages.
+##        uncategorizedpages_address: Special:Uncategorizedpages.
+##        unusedcategories_address: Special:Unusedcategories.
+##        withoutinterwiki_address: Special:Withoutinterwiki.
+##        references_address(s): Special:Whatlinksere for page 's'.
+##        allmessages_address: Special:Allmessages.
+##        upload_address: Special:Upload.
+##        double_redirects_address: Special:Doubleredirects.
+##        broken_redirects_address: Special:Brokenredirects.
+##        login_address: Special:Userlogin.
+##        captcha_image_address(id): Special:Captcha for image 'id'.
+##        watchlist_address: Special:Watchlist editor.
+##        contribs_address(target): Special:Contributions for user 'target'.
+
+    def __init__(self, code, fam=None, user=None):
+        BaseSite.__init__(self, code, fam, user)
+        self._namespaces = {
+            # these are the MediaWiki built-in names, which always work
+            # localized names are loaded later upon accessing the wiki
+            -2: [u"Media"],
+            -1: [u"Special"],
+             0: [u""],
+             1: [u"Talk"],
+             2: [u"User"],
+             3: [u"User talk"],
+             4: [u"Project"],
+             5: [u"Project talk"],
+             6: [u"Image"],
+             7: [u"Image talk"],
+             8: [u"MediaWiki"],
+             9: [u"MediaWiki talk"],
+            10: [u"Template"],
+            11: [u"Template talk"],
+            12: [u"Help"],
+            13: [u"Help talk"],
+            14: [u"Category"],
+            15: [u"Category talk"],
+            }
+        return
+# START HERE
+        self._mediawiki_messages = {}
+        self.nocapitalize = self._lang in self.family().nocapitalize
+        self._userData = [False, False]
+        self._userName = [None, None]
+        self._isLoggedIn = [None, None]
+        self._isBlocked = [None, None]
+        self._messages = [None, None]
+        self._rights = [None, None]
+        self._token = [None, None]
+        self._cookies = [None, None]
+        # Calculating valid languages took quite long, so we calculate it once
+        # in initialization instead of each time it is used.
+        self._validlanguages = []
+        for language in self.languages():
+            if not language[:1].upper() + language[1:] in self.namespaces():
+                self._validlanguages.append(language)
+
+    def logged_in(self, sysop=False):
+        """Return True if logged in with specified privileges, otherwise
False.
+
+        @param sysop: if True, require sysop privileges.
+
+        """
+        if not hasattr(self, '_userinfo'):
+            return False
+        if self._userinfo['name'] != self._username:
+            return False
+        return (not sysop) or 'sysop' in self._userinfo['groups']
+        
+    def loggedInAs(self, sysop = False):
+        """Return the current username if logged in, otherwise return
None.
+
+        DEPRECATED (use .user() method instead)
+        Checks if we're logged in by loading a page and looking for the login
+        link. We assume that we're not being logged out during a bot run, so
+        loading the test page is only required once.
+
+        """
+        logging.debug("Site.loggedInAs() method is deprecated.")
+        return self.logged_in(sysop) and self.user()
+
+    def login(self, sysop=False):
+        """Log the user in if not already logged in."""
+        if not self.logged_in(sysop):
+            loginMan = api.LoginManager(site=self, sysop=sysop)
+            if loginMan.login(retry = True):
+                self._username = loginMan.username
+                if hasattr(self, "_userinfo"):
+                    del self._userinfo
+                self.getuserinfo()
+
+    forceLogin = login  # alias for backward-compatibility
+
+    def getuserinfo(self):
+        """Retrieve userinfo from site and store in _userinfo attribute.
+
+        self._userinfo will be a dict with the following keys and values:
+
+          - id: user id (numeric str)
+          - name: username (if user is logged in)
+          - anon: present if user is not logged in
+          - groups: list of groups (could be empty)
+          - rights: list of rights (could be empty)
+          - message: present if user has a new message on talk page
+          - blockinfo: present if user is blocked (dict)
+
+        """
+        if not hasattr(self, "_userinfo"):
+            uirequest = api.Request(
+                                site=self,
+                                action="query",
+                                meta="userinfo",
+                                uiprop="blockinfo|hasmsg|groups|rights"
+                            )
+            uidata = uirequest.submit()
+            assert 'query' in uidata, \
+                   "API userinfo response lacks 'query' key"
+            uidata = uidata['query']
+            assert 'userinfo' in uidata, \
+                   "API userinfo response lacks 'userinfo' key"
+            self._userinfo = uidata['userinfo']
+        return self._userinfo
+
+    def getsiteinfo(self):
+        """Retrieve siteinfo from site and store in _siteinfo
attribute."""
+        if not hasattr(self, "_siteinfo"):
+            sirequest = api.Request(
+                                site=self,
+                                action="query",
+                                meta="siteinfo",
+                                siprop="general|namespaces|namespacealiases"
+                            )
+            try:
+                sidata = sirequest.submit()
+            except api.APIError:
+                # hack for older sites that don't support 1.12 properties
+                sirequest = api.Request(
+                                    site=self,
+                                    action="query",
+                                    meta="siteinfo",
+                                    siprop="general|namespaces"
+                                )
+                sidata = sirequest.submit()
+                
+            assert 'query' in sidata, \
+                   "API siteinfo response lacks 'query' key"
+            sidata = sidata['query']
+            assert 'general' in sidata, \
+                   "API siteinfo response lacks 'general' key"
+            assert 'namespaces' in sidata, \
+                   "API siteinfo response lacks 'namespaces' key"
+            self._siteinfo = sidata['general']
+            nsdata = sidata['namespaces']
+            for nskey in nsdata:
+                ns = int(nskey)
+                if ns in self._namespaces:
+                    if nsdata[nskey]["*"] in self._namespaces[ns]:
+                        continue
+                    # this is the preferred form so it goes at front of list
+                    self._namespaces[ns].insert(0, nsdata[nskey]["*"])
+                else:
+                    self._namespaces[ns] = [nsdata[nskey]["*"]]
+            if 'namespacealiases' in sidata:
+                aliasdata = sidata['namespacealiases']
+                for item in aliasdata:
+                    # this is a less preferred form so it goes at the end
+                   
self._namespaces[int(item['id'])].append(item["*"])
+        return self._siteinfo
+
+    def case(self):
+        return self.getsiteinfo()['case']
+
+    def namespace(self, num, all = False):
+        """Return string containing local name of namespace
'num'.
+
+        If optional argument 'all' is true, return a tuple of all recognized
+        values for this namespace.
+
+        """
+        return self._namespaces[num][0]
+
+
+class NotImplementedYet:
+
+    def isBlocked(self, sysop = False):
+        """Check if the user is blocked."""
+        try:
+            text =
self.getUrl(u'%saction=query&meta=userinfo&uiprop=blockinfo'
+                               % self.api_address(), sysop=sysop)
+            return text.find('blockedby=') > -1
+        except NotImplementedError:
+            return False
+
+    def isAllowed(self, right, sysop = False):
+        """Check if the user has a specific right.
+        Among possible rights:
+        * Actions: edit, move, delete, protect, upload
+        * User levels: autoconfirmed, sysop, bot, empty string (always true)
+        """
+        if right == '' or right == None:
+            return True
+        else:
+            self._load(sysop = sysop)
+            index = self._userIndex(sysop)
+            return right in self._rights[index]
+
+    def messages(self, sysop = False):
+        """Returns true if the user has new messages, and false
otherwise."""
+        self._load(sysop = sysop)
+        index = self._userIndex(sysop)
+        return self._messages[index]
+
+    def cookies(self, sysop = False):
+        """Return a string containing the user's current
cookies."""
+        self._loadCookies(sysop = sysop)
+        index = self._userIndex(sysop)
+        return self._cookies[index]
+
+    def _loadCookies(self, sysop = False):
+        """Retrieve session cookies for login"""
+        index = self._userIndex(sysop)
+        if self._cookies[index] is not None:
+            return
+        try:
+            if sysop:
+                try:
+                    username = config.sysopnames[self.family().name][self.language()]
+                except KeyError:
+                    raise NoUsername("""\
+You tried to perform an action that requires admin privileges, but you haven't
+entered your sysop name in your user-config.py. Please add
+sysopnames['%s']['%s']='name' to your
user-config.py"""
+                                     % (self.family().name, self.language()))
+            else:
+                username = config.usernames[self.family().name][self.language()]
+        except KeyError:
+            self._cookies[index] = None
+            self._isLoggedIn[index] = False
+        else:
+            tmp = '%s-%s-%s-login.data' % (
+                    self.family().name, self.language(), username)
+            fn = config.datafilepath('login-data', tmp)
+            if not os.path.exists(fn):
+                self._cookies[index] = None
+                self._isLoggedIn[index] = False
+            else:
+                f = open(fn)
+                self._cookies[index] = '; '.join([x.strip() for x in
f.readlines()])
+                f.close()
+
+    def urlEncode(self, query):
+        """Encode a query so that it can be sent using an http POST
request."""
+        if not query:
+            return None
+        if hasattr(query, 'iteritems'):
+            iterator = query.iteritems()
+        else:
+            iterator = iter(query)
+        l = []
+        wpEditToken = None
+        for key, value in iterator:
+            if isinstance(key, unicode):
+                key = key.encode('utf-8')
+            if isinstance(value, unicode):
+                value = value.encode('utf-8')
+            key = urllib.quote(key)
+            value = urllib.quote(value)
+            if key == 'wpEditToken':
+                wpEditToken = value
+                continue
+            l.append(key + '=' + value)
+
+        # wpEditToken is explicitly added as last value.
+        # If a premature connection abort occurs while putting, the server will
+        # not have received an edit token and thus refuse saving the page
+        if wpEditToken != None:
+            l.append('wpEditToken=' + wpEditToken)
+        return '&'.join(l)
+
+    def postForm(self, address, predata, sysop=False, useCookie=True):
+        """Post http form data to the given address at this site.
+
+        address is the absolute path without hostname.
+        predata is a dict or any iterable that can be converted to a dict,
+        containing keys and values for the http form.
+
+        Return a (response, data) tuple, where response is the HTTP
+        response object and data is a Unicode string containing the
+        body of the response.
+
+        """
+        data = self.urlEncode(predata)
+        try:
+            return self.postData(address, data, sysop=sysop,
+                                 useCookie=useCookie)
+        except socket.error, e:
+            raise ServerError(e)
+
+    def postData(self, address, data,
+                 contentType='application/x-www-form-urlencoded',
+                 sysop=False, useCookie=True, compress=True):
+        """Post encoded data to the given http address at this site.
+
+        address is the absolute path without hostname.
+        data is an ASCII string that has been URL-encoded.
+
+        Returns a (response, data) tuple where response is the HTTP
+        response object and data is a Unicode string containing the
+        body of the response.
+        """
+
+        # TODO: add the authenticate stuff here
+
+        if False: #self.persistent_http:
+            conn = self.conn
+        else:
+            # Encode all of this into a HTTP request
+            if self.protocol() == 'http':
+                conn = httplib.HTTPConnection(self.hostname())
+            elif self.protocol() == 'https':
+                conn = httplib.HTTPSConnection(self.hostname())
+            # otherwise, it will crash, as other protocols are not supported
+
+        conn.putrequest('POST', address)
+        conn.putheader('Content-Length', str(len(data)))
+        conn.putheader('Content-type', contentType)
+        conn.putheader('User-agent', useragent)
+        if useCookie and self.cookies(sysop = sysop):
+            conn.putheader('Cookie', self.cookies(sysop = sysop))
+        if False: #self.persistent_http:
+            conn.putheader('Connection', 'Keep-Alive')
+        if compress:
+            conn.putheader('Accept-encoding', 'gzip')
+        conn.endheaders()
+        conn.send(data)
+
+        # Prepare the return values
+        # Note that this can raise network exceptions which are not
+        # caught here.
+        try:
+            response = conn.getresponse()
+        except httplib.BadStatusLine:
+            # Blub.
+            conn.close()
+            conn.connect()
+            return self.postData(address, data, contentType, sysop, useCookie)
+
+        data = response.read()
+
+        if compress and response.getheader('Content-Encoding') ==
'gzip':
+            data = decompress_gzip(data)
+
+        data = data.decode(self.encoding())
+        response.close()
+
+        if True: #not self.persistent_http:
+            conn.close()
+
+        # If a wiki page, get user data
+        self._getUserData(data, sysop = sysop)
+
+        return response, data
+
+    def getUrl(self, path, retry = True, sysop = False, data = None, compress = True):
+        """
+        Low-level routine to get a URL from the wiki.
+
+        Parameters:
+            path  - The absolute path, without the hostname.
+            retry - If True, retries loading the page when a network error
+                    occurs.
+            sysop - If True, the sysop account's cookie will be used.
+            data  - An optional dict providing extra post request parameters
+
+           Returns the HTML text of the page converted to unicode.
+        """
+        if False: #self.persistent_http and not data:
+            self.conn.putrequest('GET', path)
+            self.conn.putheader('User-agent', useragent)
+            self.conn.putheader('Cookie', self.cookies(sysop = sysop))
+            self.conn.putheader('Connection', 'Keep-Alive')
+            if compress:
+                    self.conn.putheader('Accept-encoding', 'gzip')
+            self.conn.endheaders()
+
+            # Prepare the return values
+            # Note that this can raise network exceptions which are not
+            # caught here.
+            try:
+                response = self.conn.getresponse()
+            except httplib.BadStatusLine:
+                # Blub.
+                self.conn.close()
+                self.conn.connect()
+                return self.getUrl(path, retry, sysop, data, compress)
+
+            text = response.read()
+            headers = dict(response.getheaders())
+
+        else:
+            if self.hostname() in config.authenticate.keys():
+                uo = authenticateURLopener
+            else:
+                uo = MyURLopener()
+                if self.cookies(sysop = sysop):
+                    uo.addheader('Cookie', self.cookies(sysop = sysop))
+                if compress:
+                    uo.addheader('Accept-encoding', 'gzip')
+
+            url = '%s://%s%s' % (self.protocol(), self.hostname(), path)
+            data = self.urlEncode(data)
+
+            # Try to retrieve the page until it was successfully loaded (just in
+            # case the server is down or overloaded).
+            # Wait for retry_idle_time minutes (growing!) between retries.
+            retry_idle_time = 1
+            retrieved = False
+            while not retrieved:
+                try:
+                    if self.hostname() in config.authenticate.keys():
+                        if False: # compress:
+                            request = urllib2.Request(url, data)
+                            request.add_header('Accept-encoding',
'gzip')
+                            opener = urllib2.build_opener()
+                            f = opener.open(request)
+                        else:
+                            f = urllib2.urlopen(url, data)
+                    else:
+                        f = uo.open(url, data)
+                    retrieved = True
+                except KeyboardInterrupt:
+                    raise
+                except Exception, e:
+                    if retry:
+                        # We assume that the server is down. Wait some time, then try
again.
+                        output(u"%s" % e)
+                        output(u"""\
+WARNING: Could not open '%s://%s%s'. Maybe the server or
+your connection is down. Retrying in %i minutes..."""
+                               % (self.protocol(), self.hostname(), path,
+                                  retry_idle_time))
+                        time.sleep(retry_idle_time * 60)
+                        # Next time wait longer, but not longer than half an hour
+                        retry_idle_time *= 2
+                        if retry_idle_time > 30:
+                            retry_idle_time = 30
+                    else:
+                        raise
+            text = f.read()
+
+            headers = f.info()
+
+        contentType = headers.get('content-type', '')
+        contentEncoding = headers.get('content-encoding', '')
+
+        # Ensure that all sent data is received
+        if int(headers.get('content-length', '0')) != len(text) and
'content-length' in headers:
+            output(u'Warning! len(text) does not match content-length: %s != %s'
% \
+                (len(text), headers.get('content-length')))
+            if False: #self.persistent_http
+                self.conn.close()
+                self.conn.connect()
+            return self.getUrl(path, retry, sysop, data, compress)
+
+        if compress and contentEncoding == 'gzip':
+            text = decompress_gzip(text)
+
+        R = re.compile('charset=([^\'\";]+)')
+        m = R.search(contentType)
+        if m:
+            charset = m.group(1)
+        else:
+            output(u"WARNING: No character set found.")
+            # UTF-8 as default
+            charset = 'utf-8'
+        # Check if this is the charset we expected
+        self.checkCharset(charset)
+        # Convert HTML to Unicode
+        try:
+            text = unicode(text, charset, errors = 'strict')
+        except UnicodeDecodeError, e:
+            print e
+            output(u'ERROR: Invalid characters found on %s://%s%s, replaced by
\\ufffd.' % (self.protocol(), self.hostname(), path))
+            # We use error='replace' in case of bad encoding.
+            text = unicode(text, charset, errors = 'replace')
+
+        # If a wiki page, get user data
+        self._getUserData(text, sysop = sysop)
+
+        return text
+
+    def _getUserData(self, text, sysop = False):
+        """
+        Get the user data from a wiki page data.
+
+        Parameters:
+        * text - the page text
+        * sysop - is the user a sysop?
+        """
+        if '<div id="globalWrapper">' not in text:
+            # Not a wiki page
+            return
+
+        index = self._userIndex(sysop)
+
+        # Check for blocks - but only if version is 1.11 (userinfo is available)
+        # and the user data was not yet loaded
+        if self.versionnumber() >= 11 and not self._userData[index]:
+            blocked = self.isBlocked(sysop = sysop)
+            if blocked and not self._isBlocked[index]:
+                # Write a warning if not shown earlier
+                if sysop:
+                    account = 'Your sysop account'
+                else:
+                    account = 'Your account'
+                output(u'WARNING: %s on %s is blocked. Editing using this account
will stop the run.' % (account, self))
+            self._isBlocked[index] = blocked
+
+        # Check for new messages
+        if '<div class="usermessage">' in text:
+            if not self._messages[index]:
+                # User has *new* messages
+                if sysop:
+                    output(u'NOTE: You have new messages in your sysop account on
%s' % self)
+                else:
+                    output(u'NOTE: You have new messages on %s' % self)
+            self._messages[index] = True
+        else:
+            self._messages[index] = False
+
+        # Don't perform other checks if the data was already loaded
+        if self._userData[index]:
+            return
+
+        # Search for the the user page link at the top.
+        # Note that the link of anonymous users (which doesn't exist at all
+        # in Wikimedia sites) has the ID pt-anonuserpage, and thus won't be
+        # found here.
+        userpageR = re.compile('<li id="pt-userpage"><a
href=".+?">(?P<username>.+?)</a></li>')
+        m = userpageR.search(text)
+        if m:
+            self._isLoggedIn[index] = True
+            self._userName[index] = m.group('username')
+        else:
+            self._isLoggedIn[index] = False
+            # No idea what is the user name, and it isn't important
+            self._userName[index] = None
+
+        # Check user groups, if possible (introduced in 1.10)
+        groupsR = re.compile(r'var wgUserGroups = \[\"(.+)\"\];')
+        m = groupsR.search(text)
+        if m:
+            rights = m.group(1)
+            rights = rights.split('", "')
+            if '*' in rights:
+                rights.remove('*')
+            self._rights[index] = rights
+            # Warnings
+            # Don't show warnings for not logged in users, they will just fail to
+            # do any action
+            if self._isLoggedIn[index]:
+                if 'bot' not in self._rights[index]:
+                    if sysop:
+                        output(u'Note: Your sysop account on %s does not have a bot
flag. Its edits will be visible in the recent changes.' % self)
+                    else:
+                        output(u'WARNING: Your account on %s does not have a bot
flag. Its edits will be visible in the recent changes and it may get blocked.' %
self)
+                if sysop and 'sysop' not in self._rights[index]:
+                    output(u'WARNING: Your sysop account on %s does not seem to have
sysop rights. You may not be able to perform any sysop-restricted actions using it.' %
self)
+        else:
+            # We don't have wgUserGroups, and can't check the rights
+            self._rights[index] = []
+            if self._isLoggedIn[index]:
+                # Logged in user
+                self._rights[index].append('user')
+                # Assume bot, and thus autoconfirmed
+                self._rights[index].extend(['bot', 'autoconfirmed'])
+                if sysop:
+                    # Assume user reported as a sysop indeed has the sysop rights
+                    self._rights[index].append('sysop')
+        # Assume the user has the default rights
+        self._rights[index].extend(['read', 'createaccount',
'edit', 'upload', 'createpage', 'createtalk',
'move', 'upload'])
+        if 'bot' in self._rights[index] or 'sysop' in
self._rights[index]:
+            self._rights[index].append('apihighlimits')
+        if 'sysop' in self._rights[index]:
+            self._rights[index].extend(['delete', 'undelete',
'block', 'protect', 'import', 'deletedhistory',
'unwatchedpages'])
+
+        # Search for a token
+        tokenR = re.compile(r"\<input type='hidden'
value=\"(.*?)\" name=\"wpEditToken\"")
+        tokenloc = tokenR.search(text)
+        if tokenloc:
+            self._token[index] = tokenloc.group(1)
+            if self._rights[index] is not None:
+                # In this case, token and rights are loaded - user data is now loaded
+                self._userData[index] = True
+        else:
+            # Token not found
+            # Possible reason for this is the user is blocked, don't show a
+            # warning in this case, otherwise do show a warning
+            # Another possible reason is that the page cannot be edited - ensure
+            # there is a textarea and the tab "view source" is not shown
+            if u'<textarea' in text and u'<li
id="ca-viewsource"' not in text and not self._isBlocked[index]:
+                # Token not found
+                output(u'WARNING: Token not found on %s. You will not be able to edit
any page.' % self)
+
+    def mediawiki_message(self, key):
+        """Return the MediaWiki message text for key "key"
"""
+        global mwpage, tree
+        if key.lower() not in self._mediawiki_messages.keys() \
+                and not hasattr(self, "_phploaded"):
+            get_throttle()
+            mwpage = self.getUrl("%s?title=%s:%s&action=edit"
+                     % (self.path(), urllib.quote(
+                            self.namespace(8).replace(' ', '_').encode(
+                                self.encoding())),
+                        key))
+            tree = BeautifulSoup(mwpage,
+                                 convertEntities=BeautifulSoup.HTML_ENTITIES,
+                                 parseOnlyThese=SoupStrainer("textarea"))
+            if tree.textarea is not None and tree.textarea.string is not None:
+                value = tree.textarea.string.strip()
+            else:
+                value = None
+            if value:
+                self._mediawiki_messages[key.lower()] = value
+            else:
+                self._mediawiki_messages[key.lower()] = None
+                # Fallback in case MediaWiki: page method doesn't work
+                if verbose:
+                    output(
+                      u"Retrieving mediawiki messages from
Special:Allmessages")
+                retry_idle_time = 1
+                while True:
+                    get_throttle()
+                    phppage =
self.getUrl(self.get_address("Special:Allmessages")
+                                      + "&ot=php")
+                    Rphpvals = re.compile(r"(?ms)'([^']*)' =&gt;
'(.*?[^\\])',")
+                    count = 0
+                    for (phpkey, phpval) in Rphpvals.findall(phppage):
+                        count += 1
+                        self._mediawiki_messages[str(phpkey).lower()] = phpval
+                    if count == 0:
+                        # No messages could be added.
+                        # We assume that the server is down.
+                        # Wait some time, then try again.
+                        output('WARNING: No messages found in Special:Allmessages.
Maybe the server is down. Retrying in %i minutes...' % retry_idle_time)
+                        time.sleep(retry_idle_time * 60)
+                        # Next time wait longer, but not longer than half an hour
+                        retry_idle_time *= 2
+                        if retry_idle_time > 30:
+                            retry_idle_time = 30
+                        continue
+                    break
+                self._phploaded = True
+
+        key = key.lower()
+        if self._mediawiki_messages[key] is None:
+            raise KeyError("MediaWiki key '%s' does not exist on %s"
+                           % (key, self))
+        return self._mediawiki_messages[key]
+
+    def has_mediawiki_message(self, key):
+        """Return True iff this site defines a MediaWiki message for
'key'."""
+        try:
+            v = self.mediawiki_message(key)
+            return True
+        except KeyError:
+            return False
+
+    def _load(self, sysop = False):
+        """
+        Loads user data.
+        This is only done if we didn't do get any page yet and the information
+        is requested, otherwise we should already have this data.
+
+        Parameters:
+        * sysop - Get sysop user data?
+        """
+        index = self._userIndex(sysop)
+        if self._userData[index]:
+            return
+
+        if verbose:
+            output(u'Getting information for site %s' % self)
+
+        # Get data
+        url = self.edit_address('Non-existing_page')
+        text = self.getUrl(url, sysop = sysop)
+
+        # Parse data
+        self._getUserData(text, sysop = sysop)
+
+    def search(self, query, number = 10, namespaces = None):
+        """Yield search results (using Special:Search page) for
query."""
+        throttle = True
+        path = self.search_address(urllib.quote_plus(query),
+                                   n=number, ns=namespaces)
+        get_throttle()
+        html = self.getUrl(path)
+
+        entryR = re.compile(ur'<li[^>]*><a href=".+?"
title="(?P<title>.+?)">.+?</a>'
+                              '<br />(?P<match>.*?)<span
style="color[^>]*>.+?: '
+                              '(?P<relevance>[0-9.]+)% - '
+#                              '(?P<size>[0-9.]*) '
+#                              '(?P<sizeunit>[A-Za-z]) '
+#                              '\((?P<words>.+?) \w+\) - '
+#                              '(?P<date>.+?)</span></li>'
+                              , re.DOTALL)
+
+        for m in entryR.finditer(html):
+            page = Page(self, m.group('title'))
+            match = m.group('match')
+            relevance = m.group('relevance')
+            #size = m.group('size')
+            ## sizeunit appears to always be "KB"
+            #words = m.group('words')
+            #date = m.group('date')
+
+            #print "%s - %s %s (%s words) - %s" % (relevance, size, sizeunit,
words, date)
+
+            #yield page, match, relevance, size, words, date
+            yield page, match, relevance, '', '', ''
+
+    # TODO: avoid code duplication for the following methods
+    def newpages(self, number = 10, get_redirect = False, repeat = False):
+        """Yield new articles (as Page objects) from Special:Newpages.
+
+        Starts with the newest article and fetches the number of articles
+        specified in the first argument. If repeat is True, it fetches
+        Newpages again. If there is no new page, it blocks until there is
+        one, sleeping between subsequent fetches of Newpages.
+
+        The objects yielded are tuples composed of the Page object,
+        timestamp (unicode), length (int), an empty unicode string, username
+        or IP address (str), comment (unicode).
+
+        """
+        # TODO: in recent MW versions Special:Newpages takes a namespace parameter,
+        #       and defaults to 0 if not specified.
+        # TODO: Detection of unregistered users is broken
+        # TODO: Repeat mechanism doesn't make much sense as implemented;
+        #       should use both offset and limit parameters, and have an
+        #       option to fetch older rather than newer pages
+        seen = set()
+        while True:
+            path = self.newpages_address(n=number)
+            # The throttling is important here, so always enabled.
+            get_throttle()
+            html = self.getUrl(path)
+
+            entryR = re.compile(
+'<li[^>]*>(?P<date>.+?) \S*?<a href=".+?"'
+'
title="(?P<title>.+?)">.+?</a>.+?[\(\[](?P<length>[\d,.]+)[^\)\]]*[\)\]]'
+' .?<a href=".+?"
title=".+?:(?P<username>.+?)">'
+                                )
+            for m in entryR.finditer(html):
+                date = m.group('date')
+                title = m.group('title')
+                title = title.replace('&quot;', '"')
+                length = int(re.sub("[,.]", "",
m.group('length')))
+                loggedIn = u''
+                username = m.group('username')
+                comment = u''
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page, date, length, loggedIn, username, comment
+            if not repeat:
+                break
+
+    def longpages(self, number = 10, repeat = False):
+        """Yield Pages from Special:Longpages.
+
+        Return values are a tuple of Page object, length(int).
+
+        """
+        #TODO: should use offset and limit parameters; 'repeat' as now
+        #      implemented is fairly useless
+        # this comment applies to all the XXXXpages methods following, as well
+        seen = set()
+        while True:
+            path = self.longpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(ur'<li>\(<a href=".+?"
title=".+?">hist</a>\) ‎<a href=".+?"
title="(?P<title>.+?)">.+?</a>
‎\[(?P<length>\d+)(.+?)\]</li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                length = int(m.group('length'))
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page, length
+            if not repeat:
+                break
+
+    def shortpages(self, number = 10, repeat = False):
+        """Yield Pages and lengths from
Special:Shortpages."""
+        throttle = True
+        seen = set()
+        while True:
+            path = self.shortpages_address(n = number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(ur'<li>\(<a href=".+?"
title=".+?">hist</a>\) ‎<a href=".+?"
title="(?P<title>.+?)">.+?</a>
‎\[(?P<length>\d+)(.+?)\]</li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                length = int(m.group('length'))
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page, length
+            if not repeat:
+                break
+
+    def categories(self, number=10, repeat=False):
+        """Yield Category objects from
Special:Categories"""
+        import catlib
+        seen = set()
+        while True:
+            path = self.categories_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+                '<li><a href=".+?"
title="(?P<title>.+?)">.+?</a>.*?</li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                if title not in seen:
+                    seen.add(title)
+                    page = catlib.Category(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def deadendpages(self, number = 10, repeat = False):
+        """Yield Page objects retrieved from
Special:Deadendpages."""
+        seen = set()
+        while True:
+            path = self.deadendpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+                '<li><a href=".+?"
title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def ancientpages(self, number = 10, repeat = False):
+        """Yield Pages, datestamps from
Special:Ancientpages."""
+        seen = set()
+        while True:
+            path = self.ancientpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+'<li><a href=".+?"
title="(?P<title>.+?)">.+?</a>
(?P<date>.+?)</li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                date = m.group('date')
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page, date
+            if not repeat:
+                break
+
+    def lonelypages(self, number = 10, repeat = False):
+        """Yield Pages retrieved from
Special:Lonelypages."""
+        throttle = True
+        seen = set()
+        while True:
+            path = self.lonelypages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+                '<li><a href=".+?"
title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def unwatchedpages(self, number = 10, repeat = False):
+        """Yield Pages from Special:Unwatchedpages (requires Admin
privileges)."""
+        seen = set()
+        while True:
+            path = self.unwatchedpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path, sysop = True)
+            entryR = re.compile(
+                '<li><a href=".+?"
title="(?P<title>.+?)">.+?</a>.+?</li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def uncategorizedcategories(self, number = 10, repeat = False):
+        """Yield Categories from
Special:Uncategorizedcategories."""
+        import catlib
+        seen = set()
+        while True:
+            path = self.uncategorizedcategories_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+                '<li><a href=".+?"
title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                if title not in seen:
+                    seen.add(title)
+                    page = catlib.Category(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def newimages(self, number = 10, repeat = False):
+        """Yield ImagePages from
Special:Log&type=upload"""
+
+        seen = set()
+        regexp = re.compile('<li[^>]*>(?P<date>.+?)\s+<a
href=.*?>(?P<user>.+?)</a>\s+\(.+?</a>\).*?<a
href=".*?"(?P<new> class="new")?
title="(?P<image>.+?)"\s*>(?:.*?<span
class="comment">(?P<comment>.*?)</span>)?', re.UNICODE)
+
+        while True:
+            path = self.log_address(number, mode = 'upload')
+            get_throttle()
+            html = self.getUrl(path)
+
+            for m in regexp.finditer(html):
+                image = m.group('image')
+
+                if image not in seen:
+                    seen.add(image)
+
+                    if m.group('new'):
+                        output(u"Image \'%s\' has been deleted." %
image)
+                        continue
+
+                    date = m.group('date')
+                    user = m.group('user')
+                    comment = m.group('comment') or ''
+
+                    yield ImagePage(self, image), date, user, comment
+            if not repeat:
+                break
+
+    def uncategorizedimages(self, number = 10, repeat = False):
+        """Yield ImagePages from
Special:Uncategorizedimages."""
+        seen = set()
+        ns = self.image_namespace()
+        entryR = re.compile(
+            '<a href=".+?"
title="(?P<title>%s:.+?)">.+?</a>' % ns)
+        while True:
+            path = self.uncategorizedimages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                if title not in seen:
+                    seen.add(title)
+                    page = ImagePage(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def uncategorizedpages(self, number = 10, repeat = False):
+        """Yield Pages from Special:Uncategorizedpages."""
+        seen = set()
+        while True:
+            path = self.uncategorizedpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+                '<li><a href=".+?"
title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def unusedcategories(self, number = 10, repeat = False):
+        """Yield Category objects from
Special:Unusedcategories."""
+        import catlib
+        seen = set()
+        while True:
+            path = self.unusedcategories_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile('<li><a href=".+?"
title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+
+                if title not in seen:
+                    seen.add(title)
+                    page = catlib.Category(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def unusedfiles(self, number = 10, repeat = False, extension = None):
+        """Yield ImagePage objects from
Special:Unusedimages."""
+        seen = set()
+        ns = self.image_namespace()
+        entryR = re.compile(
+            '<a href=".+?"
title="(?P<title>%s:.+?)">.+?</a>' % ns)
+        while True:
+            path = self.unusedfiles_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            for m in entryR.finditer(html):
+                fileext = None
+                title = m.group('title')
+                if extension:
+                    fileext = title[len(title)-3:]
+                if title not in seen and fileext == extension:
+                    ## Check whether the media is used in a Proofread page
+                    # code disabled because it slows this method down, and
+                    # because it is unclear what it's supposed to do.
+                    #basename = title[6:]
+                    #page = Page(self, 'Page:' + basename)
+
+                    #if not page.exists():
+                    seen.add(title)
+                    image = ImagePage(self, title)
+                    yield image
+            if not repeat:
+                break
+
+    def withoutinterwiki(self, number=10, repeat=False):
+        """Yield Pages without language links from
Special:Withoutinterwiki."""
+        seen = set()
+        while True:
+            path = self.withoutinterwiki_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile('<li><a href=".+?"
title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def allpages(self, start='!', namespace=0, includeredirects=True,
+                 throttle=True):
+        """Yield all Pages from Special:Allpages.
+
+        Parameters:
+        start   Start at this page. By default, it starts at '!', and yields
+                all pages.
+        namespace Yield all pages in this namespace; defaults to 0.
+                MediaWiki software will only return pages in one namespace
+                at a time.
+
+        If includeredirects is False, redirects will not be found.
+        If includeredirects equals the string 'only', only redirects
+        will be found. Note that this has not been tested on older
+        versions of the MediaWiki code.
+
+        It is advised not to use this directly, but to use the
+        AllpagesPageGenerator from pagegenerators.py instead.
+
+        """
+        while True:
+            # encode Non-ASCII characters in hexadecimal format (e.g. %F6)
+            start = start.encode(self.encoding())
+            start = urllib.quote(start)
+            # load a list which contains a series of article names (always 480)
+            path = self.allpages_address(start, namespace)
+            output(u'Retrieving Allpages special page for %s from %s, namespace
%i' % (repr(self), start, namespace))
+            returned_html = self.getUrl(path)
+            # Try to find begin and end markers
+            try:
+                # In 1.4, another table was added above the navigational links
+                if self.versionnumber() >= 4:
+                    begin_s = '</table><hr /><table'
+                    end_s = '</table'
+                else:
+                    begin_s = '<table'
+                    end_s = '</table'
+                ibegin = returned_html.index(begin_s)
+                iend = returned_html.index(end_s,ibegin + 3)
+            except ValueError:
+                raise ServerError(
+"Couldn't extract allpages special page. Make sure you're using MonoBook
skin.")
+            # remove the irrelevant sections
+            returned_html = returned_html[ibegin:iend]
+            if self.versionnumber()==2:
+                R = re.compile('/wiki/(.*?)\"
*class=[\'\"]printable')
+            elif self.versionnumber()<5:
+                # Apparently the special code for redirects was added in 1.5
+                R = re.compile('title ?=\"(.*?)\"')
+            elif not includeredirects:
+                R = re.compile('\<td(?: width="33%")?\>\<a
href=\"\S*\" +title ?="(.*?)"')
+            elif includeredirects == 'only':
+                R = re.compile('\<td(?:
width="33%")?>\<[^\<\>]*allpagesredirect\"\>\<a
href=\"\S*\" +title ?="(.*?)"')
+            else:
+                R = re.compile('title ?=\"(.*?)\"')
+            # Count the number of useful links on this page
+            n = 0
+            for hit in R.findall(returned_html):
+                # count how many articles we found on the current page
+                n = n + 1
+                if self.versionnumber()==2:
+                    yield Page(self, url2link(hit, site = self, insite = self))
+                else:
+                    yield Page(self, hit)
+                # save the last hit, so that we know where to continue when we
+                # finished all articles on the current page. Append a '!' so
that
+                # we don't yield a page twice.
+                start = Page(self,hit).titleWithoutNamespace() + '!'
+            # A small shortcut: if there are less than 100 pages listed on this
+            # page, there is certainly no next. Probably 480 would do as well,
+            # but better be safe than sorry.
+            if n < 100:
+                if (not includeredirects) or includeredirects == 'only':
+                    # Maybe there were only so few because the rest is or is not a
redirect
+                    R = re.compile('title ?=\"(.*?)\"')
+                    allLinks = R.findall(returned_html)
+                    if len(allLinks) < 100:
+                        break
+                    elif n == 0:
+                        # In this special case, no pages of the requested type
+                        # were found, and "start" will remain and be
double-encoded.
+                        # Use the last page as the start of the next page.
+                        start = Page(self, allLinks[-1]).titleWithoutNamespace() +
'!'
+                else:
+                    break
+
+    def prefixindex(self, prefix, namespace=0, includeredirects=True):
+        """Yield all pages with a given prefix.
+
+        Parameters:
+        prefix   The prefix of the pages.
+        namespace Namespace number; defaults to 0.
+                MediaWiki software will only return pages in one namespace
+                at a time.
+
+        If includeredirects is False, redirects will not be found.
+        If includeredirects equals the string 'only', only redirects
+        will be found. Note that this has not been tested on older
+        versions of the MediaWiki code.
+
+        It is advised not to use this directly, but to use the
+        PrefixingPageGenerator from pagegenerators.py instead.
+        """
+        for page in self.allpages(start = prefix, namespace = namespace, includeredirects
= includeredirects):
+            if page.titleWithoutNamespace().startswith(prefix):
+                yield page
+            else:
+                break
+
+    def linksearch(self, siteurl):
+        """Yield Pages from results of Special:Linksearch for
'siteurl'."""
+        if siteurl.startswith('*.'):
+            siteurl = siteurl[2:]
+        output(u'Querying [[Special:Linksearch]]...')
+        cache = []
+        for url in [siteurl, '*.' + siteurl]:
+            path = self.linksearch_address(url)
+            get_throttle()
+            html = self.getUrl(path)
+            loc = html.find('<div class="mw-spcontent">')
+            if loc > -1:
+                html = html[loc:]
+            loc = html.find('<div class="printfooter">')
+            if loc > -1:
+                html = html[:loc]
+            R = re.compile('title ?=\"(.*?)\"')
+            for title in R.findall(html):
+                if not siteurl in title:
+                    # the links themselves have similar form
+                    if title in cache:
+                        continue
+                    else:
+                        cache.append(title)
+                        yield Page(self, title)
+
+    def linkto(self, title, othersite = None):
+        """Return unicode string in the form of a wikilink to
'title'
+
+        Use optional Site argument 'othersite' to generate an interwiki link.
+
+        """
+        if othersite and othersite.lang != self.language():
+            return u'[[%s:%s]]' % (self.language(), title)
+        else:
+            return u'[[%s]]' % title
+
+    def isInterwikiLink(self, s):
+        """Return True if s is in the form of an interwiki link.
+
+        Interwiki links have the form "foo:bar" or ":foo:bar" where
foo is a
+        known language code or family. Called recursively if the first part
+        of the link refers to this site's own family and/or language.
+
+        """
+        s = s.strip().lstrip(":")
+        if not ':' in s:
+            return False
+        first, rest = s.split(':',1)
+        # interwiki codes are case-insensitive
+        first = first.lower().strip()
+        # commons: forwards interlanguage links to wikipedia:, etc.
+        if self.family().interwiki_forward:
+            interlangTargetFamily = Family(self.family().interwiki_forward)
+        else:
+            interlangTargetFamily = self.family()
+        if self.getNamespaceIndex(first):
+            return False
+        if first in interlangTargetFamily.langs:
+            if first == self.language():
+                return self.isInterwikiLink(rest)
+            else:
+                return True
+        if first in self.family().get_known_families(site = self):
+            if first == self.family().name:
+                return self.isInterwikiLink(rest)
+            else:
+                return True
+        return False
+
+    def redirect(self, default = False):
+        """Return the localized redirect tag for the site.
+
+        If default is True, falls back to 'REDIRECT' if the site has no
+        special redirect tag.
+
+        """
+        if default:
+            if self.language() == 'ar':
+                # It won't work with REDIRECT[[]] but it work with the local,
+                # if problems, try to find a work around. FixMe!
+                return self.family().redirect.get(self.language(),
[u"تحويل"])[0]
+            else:
+                return self.family().redirect.get(self.language(),
[u"REDIRECT"])[0]
+        else:
+            return self.family().redirect.get(self.language(), None)
+
+    def redirectRegex(self):
+        """Return a compiled regular expression matching on redirect
pages.
+
+        Group 1 in the regex match object will be the target title.
+
+        """
+        redDefault = 'redirect'
+        red = 'redirect'
+        if self.language() == 'ar':
+            red = u"تحويل"
+        try:
+            if redDefault == red:
+                redirKeywords = [red] + self.family().redirect[self.language()]
+                redirKeywordsR = r'(?:' + '|'.join(redirKeywords) +
')'
+            else:
+                redirKeywords = [red] + self.family().redirect[self.language()]
+                redirKeywordsR = r'(?:' + redDefault +
'|'.join(redirKeywords) + ')'
+        except KeyError:
+            # no localized keyword for redirects
+            if redDefault == red:
+                redirKeywordsR = r'%s' % red
+            else:
+                redirKeywordsR = r'(?:%s|%s)' % (red, redDefault)
+        # A redirect starts with hash (#), followed by a keyword, then
+        # arbitrary stuff, then a wikilink. The wikilink may contain
+        # a label, although this is not useful.
+        return re.compile(r'#' + redirKeywordsR +
+                                   '.*?\[\[(.*?)(?:\|.*?)?\]\]',
+                          re.IGNORECASE | re.UNICODE | re.DOTALL)
+
+    # The following methods are for convenience, so that you can access
+    # methods of the Family class easily.
+    def encoding(self):
+        """Return the current encoding for this site."""
+        return self.family().code2encoding(self.language())
+
+    def encodings(self):
+        """Return a list of all historical encodings for this
site."""
+        return self.family().code2encodings(self.language())
+
+    def category_namespace(self):
+        """Return the canonical name of the Category namespace on this
site."""
+        # equivalent to self.namespace(14)?
+        return self.family().category_namespace(self.language())
+
+    def category_namespaces(self):
+        """Return a list of all valid names for the Category
namespace."""
+        return self.family().category_namespaces(self.language())
+
+    def image_namespace(self, fallback = '_default'):
+        """Return the canonical name of the Image namespace on this
site."""
+        # equivalent to self.namespace(6)?
+        return self.family().image_namespace(self.language(), fallback)
+
+    def template_namespace(self, fallback = '_default'):
+        """Return the canonical name of the Template namespace on this
site."""
+        # equivalent to self.namespace(10)?
+        return self.family().template_namespace(self.language(), fallback)
+
+    def export_address(self):
+        """Return URL path for Special:Export."""
+        return self.family().export_address(self.language())
+
+    def query_address(self):
+        """Return URL path + '?' for query.php (if enabled on this
Site)."""
+        return self.family().query_address(self.language())
+
+    def api_address(self):
+        """Return URL path + '?' for api.php (if enabled on this
Site)."""
+        return self.family().api_address(self.language())
+
+    def apipath(self):
+        """Return URL path for api.php (if enabled on this
Site)."""
+        return self.family().apipath(self.language())
+
+    def protocol(self):
+        """Return protocol ('http' or 'https') for access
to this site."""
+        return self.family().protocol(self.language())
+
+    def hostname(self):
+        """Return host portion of site URL."""
+        return self.family().hostname(self.language())
+
+    def path(self):
+        """Return URL path for index.php on this Site."""
+        return self.family().path(self.language())
+
+    def dbName(self):
+        """Return MySQL database name."""
+        return self.family().dbName(self.language())
+
+    def move_address(self):
+        """Return URL path for Special:Movepage."""
+        return self.family().move_address(self.language())
+
+    def delete_address(self, s):
+        """Return URL path to delete title 's'."""
+        return self.family().delete_address(self.language(), s)
+
+    def undelete_view_address(self, s, ts=''):
+        """Return URL path to view Special:Undelete for title 's'
+
+        Optional argument 'ts' returns path to view specific deleted version.
+
+        """
+        return self.family().undelete_view_address(self.language(), s, ts)
+
+    def undelete_address(self):
+        """Return URL path to Special:Undelete."""
+        return self.family().undelete_address(self.language())
+
+    def protect_address(self, s):
+        """Return URL path to protect title
's'."""
+        return self.family().protect_address(self.language(), s)
+
+    def unprotect_address(self, s):
+        """Return URL path to unprotect title
's'."""
+        return self.family().unprotect_address(self.language(), s)
+
+    def put_address(self, s):
+        """Return URL path to submit revision to page titled
's'."""
+        return self.family().put_address(self.language(), s)
+
+    def get_address(self, s):
+        """Return URL path to retrieve page titled
's'."""
+        return self.family().get_address(self.language(), s)
+
+    def nice_get_address(self, s):
+        """Return shorter URL path to retrieve page titled
's'."""
+        return self.family().nice_get_address(self.language(), s)
+
+    def edit_address(self, s):
+        """Return URL path for edit form for page titled
's'."""
+        return self.family().edit_address(self.language(), s)
+
+    def purge_address(self, s):
+        """Return URL path to purge cache and retrieve page
's'."""
+        return self.family().purge_address(self.language(), s)
+
+    def block_address(self):
+        """Return path to block an IP address."""
+        return self.family().block_address(self.language())
+
+    def unblock_address(self):
+        """Return path to unblock an IP address."""
+        return self.family().unblock_address(self.language())
+
+    def blocksearch_address(self, s):
+        """Return path to search for blocks on IP address
's'."""
+        return self.family().blocksearch_address(self.language(), s)
+
+    def linksearch_address(self, s, limit=500, offset=0):
+        """Return path to Special:Linksearch for target
's'."""
+        return self.family().linksearch_address(self.language(), s, limit=limit,
offset=offset)
+
+    def search_address(self, q, n=50, ns=0):
+        """Return path to Special:Search for query
'q'."""
+        return self.family().search_address(self.language(), q, n, ns)
+
+    def allpages_address(self, s, ns = 0):
+        """Return path to Special:Allpages."""
+        return self.family().allpages_address(self.language(), start=s, namespace = ns)
+
+    def log_address(self, n=50, mode = ''):
+        """Return path to Special:Log."""
+        return self.family().log_address(self.language(), n, mode)
+
+    def newpages_address(self, n=50):
+        """Return path to Special:Newpages."""
+        return self.family().newpages_address(self.language(), n)
+
+    def longpages_address(self, n=500):
+        """Return path to Special:Longpages."""
+        return self.family().longpages_address(self.language(), n)
+
+    def shortpages_address(self, n=500):
+        """Return path to Special:Shortpages."""
+        return self.family().shortpages_address(self.language(), n)
+
+    def unusedfiles_address(self, n=500):
+        """Return path to Special:Unusedimages."""
+        return self.family().unusedfiles_address(self.language(), n)
+
+    def categories_address(self, n=500):
+        """Return path to Special:Categories."""
+        return self.family().categories_address(self.language(), n)
+
+    def deadendpages_address(self, n=500):
+        """Return path to Special:Deadendpages."""
+        return self.family().deadendpages_address(self.language(), n)
+
+    def ancientpages_address(self, n=500):
+        """Return path to Special:Ancientpages."""
+        return self.family().ancientpages_address(self.language(), n)
+
+    def lonelypages_address(self, n=500):
+        """Return path to Special:Lonelypages."""
+        return self.family().lonelypages_address(self.language(), n)
+
+    def unwatchedpages_address(self, n=500):
+        """Return path to Special:Unwatchedpages."""
+        return self.family().unwatchedpages_address(self.language(), n)
+
+    def uncategorizedcategories_address(self, n=500):
+        """Return path to
Special:Uncategorizedcategories."""
+        return self.family().uncategorizedcategories_address(self.language(), n)
+
+    def uncategorizedimages_address(self, n=500):
+        """Return path to Special:Uncategorizedimages."""
+        return self.family().uncategorizedimages_address(self.language(), n)
+
+    def uncategorizedpages_address(self, n=500):
+        """Return path to Special:Uncategorizedpages."""
+        return self.family().uncategorizedpages_address(self.language(), n)
+
+    def unusedcategories_address(self, n=500):
+        """Return path to Special:Unusedcategories."""
+        return self.family().unusedcategories_address(self.language(), n)
+
+    def withoutinterwiki_address(self, n=500):
+        """Return path to Special:Withoutinterwiki."""
+        return self.family().withoutinterwiki_address(self.language(), n)
+
+    def references_address(self, s):
+        """Return path to Special:Whatlinksere for page
's'."""
+        return self.family().references_address(self.language(), s)
+
+    def allmessages_address(self):
+        """Return path to Special:Allmessages."""
+        return self.family().allmessages_address(self.language())
+
+    def upload_address(self):
+        """Return path to Special:Upload."""
+        return self.family().upload_address(self.language())
+
+    def double_redirects_address(self, default_limit = True):
+        """Return path to Special:Doubleredirects."""
+        return self.family().double_redirects_address(self.language(), default_limit)
+
+    def broken_redirects_address(self, default_limit = True):
+        """Return path to Special:Brokenredirects."""
+        return self.family().broken_redirects_address(self.language(), default_limit)
+
+    def login_address(self):
+        """Return path to Special:Userlogin."""
+        return self.family().login_address(self.language())
+
+    def captcha_image_address(self, id):
+        """Return path to Special:Captcha for image
'id'."""
+        return self.family().captcha_image_address(self.language(), id)
+
+    def watchlist_address(self):
+        """Return path to Special:Watchlist editor."""
+        return self.family().watchlist_address(self.language())
+
+    def contribs_address(self, target, limit=500, offset=''):
+        """Return path to Special:Contributions for user
'target'."""
+        return self.family().contribs_address(self.language(),target,limit,offset)
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def version(self):
+        """Return MediaWiki version number as a string."""
+        return self.family().version(self.language())
+
+    def versionnumber(self):
+        """Return an int identifying MediaWiki version.
+
+        Currently this is implemented as returning the minor version
+        number; i.e., 'X' in version '1.X.Y'
+
+        """
+        return self.family().versionnumber(self.language())
+
+    def live_version(self):
+        """Return the 'real' version number found on
[[Special:Version]]
+
+        Return value is a tuple (int, int, str) of the major and minor
+        version numbers and any other text contained in the version.
+
+        """
+        global htmldata
+        if not hasattr(self, "_mw_version"):
+            versionpage = self.getUrl(self.get_address("Special:Version"))
+            htmldata = BeautifulSoup(versionpage, convertEntities="html")
+            versionstring = htmldata.findAll(text="MediaWiki"
+                                             )[1].parent.nextSibling
+            m = re.match(r"^: ([0-9]+)\.([0-9]+)(.*)$", str(versionstring))
+            if m:
+                self._mw_version = (int(m.group(1)), int(m.group(2)),
+                                        m.group(3))
+            else:
+                self._mw_version =
self.family().version(self.language()).split(".")
+        return self._mw_version
+
+    def checkCharset(self, charset):
+        """Warn if charset returned by wiki doesn't match family
file."""
+        if not hasattr(self,'charset'):
+            self.charset = charset
+        assert self.charset.lower() == charset.lower(), \
+               "charset for %s changed from %s to %s" \
+                   % (repr(self), self.charset, charset)
+        if self.encoding().lower() != charset.lower():
+            raise ValueError(
+"code2encodings has wrong charset for %s. It should be %s, but is %s"
+                             % (repr(self), charset, self.encoding()))
+
+    def shared_image_repository(self):
+        """Return a tuple of image repositories used by this
site."""
+        return self.family().shared_image_repository(self.language())
+
+    def __cmp__(self, other):
+        """Perform equality and inequality tests on Site
objects."""
+        if not isinstance(other, Site):
+            return 1
+        if self.family() == other.family:
+            return cmp(self.language() ,other.lang)
+        return cmp(self.family().name, other.family.name)
+
+    def category_on_one_line(self):
+        """Return True if this site wants all category links on one
line."""
+        return self.language() in self.family().category_on_one_line
+
+    def interwiki_putfirst(self):
+        """Return list of language codes for ordering of interwiki
links."""
+        return self.family().interwiki_putfirst.get(self.language(), None)
+
+    def interwiki_putfirst_doubled(self, list_of_links):
+        # TODO: is this even needed?  No family in the framework has this
+        # dictionary defined!
+        if self.family().interwiki_putfirst_doubled.has_key(self.language()):
+            if len(list_of_links) >=
self.family().interwiki_putfirst_doubled[self.language()][0]:
+                list_of_links2 = []
+                for lang in list_of_links:
+                    list_of_links2.append(lang.language())
+                list = []
+                for lang in
self.family().interwiki_putfirst_doubled[self.language()][1]:
+                    try:
+                        list.append(list_of_links[list_of_links2.index(lang)])
+                    except ValueError:
+                        pass
+                return list
+            else:
+                return False
+        else:
+            return False
+
+    def getSite(self, code):
+        """Return Site object for language 'code' in this
Family."""
+        return getSite(code = code, fam = self.family(), user=self.user)
+
+    def namespace(self, num, all = False):
+        """Return string containing local name of namespace
'num'.
+
+        If optional argument 'all' is true, return a tuple of all recognized
+        values for this namespace.
+
+        """
+        return self.family().namespace(self.language(), num, all = all)
+
+    def normalizeNamespace(self, value):
+        """Return canonical name for namespace 'value' in this
Site's language.
+
+        'Value' should be a string or unicode.
+        If no match, return 'value' unmodified.
+
+        """
+        if not self.nocapitalize and value[0].islower():
+            value = value[0].upper() + value[1:]
+        return self.family().normalizeNamespace(self.language(), value)
+
+    def namespaces(self):
+        """Return list of canonical namespace names for this
Site."""
+
+        # n.b.: this does not return namespace numbers; to determine which
+        # numeric namespaces the framework recognizes for this Site (which
+        # may or may not actually exist on the wiki), use
+        # self.family().namespaces.keys()
+
+        if _namespaceCache.has_key(self):
+            return _namespaceCache[self]
+        else:
+            nslist = []
+            for n in self.family().namespaces:
+                try:
+                    ns = self.family().namespace(self.language(), n)
+                except KeyError:
+                    # No default namespace defined
+                    continue
+                if ns is not None:
+                    nslist.append(self.family().namespace(self.language(), n))
+            _namespaceCache[self] = nslist
+            return nslist
+
+    def validLanguageLinks(self):
+        """Return list of language codes that can be used in interwiki
links."""
+        return self._validlanguages
+
+    def disambcategory(self):
+        """Return Category in which disambig pages are
listed."""
+        import catlib
+        try:
+            return catlib.Category(self,
+                   
self.namespace(14)+':'+self.family().disambcatname[self.language()])
+        except KeyError:
+            raise NoPage
+
+    def getToken(self, getalways = True, getagain = False, sysop = False):
+        index = self._userIndex(sysop)
+        if getagain or (getalways and self._token[index] is None):
+            output(u'Getting a token.')
+            self._load(sysop = sysop)
+        if self._token[index] is not None:
+            return self._token[index]
+        else:
+            return False
+

Modified: branches/rewrite/pywikibot/tests/api_tests.py
===================================================================
--- branches/rewrite/pywikibot/tests/api_tests.py	2008-02-27 20:05:28 UTC (rev 5087)
+++ branches/rewrite/pywikibot/tests/api_tests.py	2008-02-27 20:08:48 UTC (rev 5088)
@@ -25,7 +25,7 @@
         self.assert_(all(len(item) == 2 for item in req.items()))
 
 
-class TestListGenerator(unittest.TestCase):
+class TestPageGenerator(unittest.TestCase):
     def setUp(self):
         self.gen = api.PageGenerator(site=mysite,
                                      generator="links",