[Pywikipedia-l] SVN: [6156] branches/rewrite/pywikibot

16 Dec 2008

Revision: 6156
Author:   russblau
Date:     2008-12-16 19:40:20 +0000 (Tue, 16 Dec 2008)
Log Message:
-----------
update properties
Modified Paths:
--------------
    branches/rewrite/pywikibot/catlib.py
    branches/rewrite/pywikibot/exceptions.py
    branches/rewrite/pywikibot/page.py
    branches/rewrite/pywikibot/pagegenerators.py
    branches/rewrite/pywikibot/site.py
    branches/rewrite/pywikibot/textlib.py
    branches/rewrite/pywikibot/throttle.py
    branches/rewrite/pywikibot/tools.py
Property Changed:
----------------
    branches/rewrite/pywikibot/__init__.py
    branches/rewrite/pywikibot/bot.py
    branches/rewrite/pywikibot/catlib.py
    branches/rewrite/pywikibot/exceptions.py
    branches/rewrite/pywikibot/page.py
    branches/rewrite/pywikibot/pagegenerators.py
    branches/rewrite/pywikibot/site.py
    branches/rewrite/pywikibot/textlib.py
    branches/rewrite/pywikibot/throttle.py
    branches/rewrite/pywikibot/tools.py
Property changes on: branches/rewrite/pywikibot/__init__.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Property changes on: branches/rewrite/pywikibot/bot.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Modified: branches/rewrite/pywikibot/catlib.py
===================================================================

--- branches/rewrite/pywikibot/catlib.py	2008-12-16 19:34:48 UTC (rev 6155)
+++ branches/rewrite/pywikibot/catlib.py	2008-12-16 19:40:20 UTC (rev 6156)
@@ -1,22 +1,22 @@
-# -*- coding: utf-8  -*-
-"""
-WARNING: THIS MODULE EXISTS SOLELY TO PROVIDE BACKWARDS-COMPATIBILITY.
-
-Do not use in new scripts; use the source to find the appropriate
-function/method instead.
-
-"""
-#
-# (C) Pywikipedia bot team, 2008
-#
-# Distributed under the terms of the MIT license.
-#
-__version__ = '$Id: $'
-
-
-from pywikibot import Category
-
-
-def change_category(article, oldCat, newCat, comment=None, sortKey=None,
-                    inPlace=True):
-    return article.change_category(oldCat, newCat, comment, sortKey, inPlace)
+# -*- coding: utf-8  -*-
+"""
+WARNING: THIS MODULE EXISTS SOLELY TO PROVIDE BACKWARDS-COMPATIBILITY.
+
+Do not use in new scripts; use the source to find the appropriate
+function/method instead.
+
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+
+from pywikibot import Category
+
+
+def change_category(article, oldCat, newCat, comment=None, sortKey=None,
+                    inPlace=True):
+    return article.change_category(oldCat, newCat, comment, sortKey, inPlace)
Property changes on: branches/rewrite/pywikibot/catlib.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Added: svn:eol-style
   + native
Modified: branches/rewrite/pywikibot/exceptions.py
===================================================================
--- branches/rewrite/pywikibot/exceptions.py	2008-12-16 19:34:48 UTC (rev 6155)
+++ branches/rewrite/pywikibot/exceptions.py	2008-12-16 19:40:20 UTC (rev 6156)
@@ -1,87 +1,87 @@
-# -*- coding: utf-8  -*-
-"""
-Exception classes used throughout the framework.
-"""
-#
-# (C) Pywikipedia bot team, 2008
-#
-# Distributed under the terms of the MIT license.
-#
-__version__ = '$Id: $'
-
-
-import sys
-
-# TODO: These are copied from wikipedia.py; not certain that all of them
-# will be needed in the rewrite.
-
-class Error(Exception):
-    """Wikipedia error"""
-    def __init__(self, arg):
-        try:
-            self.string = arg.encode(sys.stderr.encoding, "xmlcharrefreplace")
-        except (AttributeError, TypeError):
-            self.string = arg.encode("ascii", "xmlcharrefreplace")
-    def __str__(self):
-        return self.string
-
-class NoUsername(Error):
-    """Username is not in user-config.py"""
-
-class NoPage(Error):
-    """Page does not exist"""
-
-class NoSuchSite(Error):
-    """Site does not exist"""
-
-class IsRedirectPage(Error):
-    """Page is a redirect page"""
-
-class IsNotRedirectPage(Error):
-    """Page is not a redirect page"""
-
-class CircularRedirect(Error):
-    """Page is a circular redirect
-
-    Exception argument is the redirect target; this may be the same title
-    as this page or a different title (in which case the target page directly
-    or indirectly redirects back to this one)
-
-    """
-
-class LockedPage(Error):
-    """Page is locked"""
-
-class SectionError(Error):
-    """The section specified by # does not exist"""
-
-class PageNotSaved(Error):
-    """Saving the page has failed"""
-
-class EditConflict(PageNotSaved):
-    """There has been an edit conflict while uploading the page"""
-
-class SpamfilterError(PageNotSaved):
-    """Saving the page has failed because the MediaWiki spam filter detected a blacklisted URL."""
-    def __init__(self, arg):
-        self.url = arg
-        self.args = arg,
-
-class ServerError(Error):
-    """Got unexpected server response"""
-
-class BadTitle(Error):
-    """Server responded with BadTitle."""
-
-# UserBlocked exceptions should in general not be caught. If the bot has
-# been blocked, the bot operator should address the reason for the block
-# before continuing.
-class UserBlocked(Error):
-    """Your username or IP has been blocked"""
-
-class PageNotFound(Error):
-    """Page not found in list"""
-
-class CaptchaError(Error):
-    """Captcha is asked and config.solve_captcha == False."""
-
+# -*- coding: utf-8  -*-
+"""
+Exception classes used throughout the framework.
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+
+import sys
+
+# TODO: These are copied from wikipedia.py; not certain that all of them
+# will be needed in the rewrite.
+
+class Error(Exception):
+    """Wikipedia error"""
+    def __init__(self, arg):
+        try:
+            self.string = arg.encode(sys.stderr.encoding, "xmlcharrefreplace")
+        except (AttributeError, TypeError):
+            self.string = arg.encode("ascii", "xmlcharrefreplace")
+    def __str__(self):
+        return self.string
+
+class NoUsername(Error):
+    """Username is not in user-config.py"""
+
+class NoPage(Error):
+    """Page does not exist"""
+
+class NoSuchSite(Error):
+    """Site does not exist"""
+
+class IsRedirectPage(Error):
+    """Page is a redirect page"""
+
+class IsNotRedirectPage(Error):
+    """Page is not a redirect page"""
+
+class CircularRedirect(Error):
+    """Page is a circular redirect
+
+    Exception argument is the redirect target; this may be the same title
+    as this page or a different title (in which case the target page directly
+    or indirectly redirects back to this one)
+
+    """
+
+class LockedPage(Error):
+    """Page is locked"""
+
+class SectionError(Error):
+    """The section specified by # does not exist"""
+
+class PageNotSaved(Error):
+    """Saving the page has failed"""
+
+class EditConflict(PageNotSaved):
+    """There has been an edit conflict while uploading the page"""
+
+class SpamfilterError(PageNotSaved):
+    """Saving the page has failed because the MediaWiki spam filter detected a blacklisted URL."""
+    def __init__(self, arg):
+        self.url = arg
+        self.args = arg,
+
+class ServerError(Error):
+    """Got unexpected server response"""
+
+class BadTitle(Error):
+    """Server responded with BadTitle."""
+
+# UserBlocked exceptions should in general not be caught. If the bot has
+# been blocked, the bot operator should address the reason for the block
+# before continuing.
+class UserBlocked(Error):
+    """Your username or IP has been blocked"""
+
+class PageNotFound(Error):
+    """Page not found in list"""
+
+class CaptchaError(Error):
+    """Captcha is asked and config.solve_captcha == False."""
+
Property changes on: branches/rewrite/pywikibot/exceptions.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Added: svn:eol-style
   + native
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py	2008-12-16 19:34:48 UTC (rev 6155)
+++ branches/rewrite/pywikibot/page.py	2008-12-16 19:40:20 UTC (rev 6156)
@@ -1,1886 +1,1886 @@
-# -*- coding: utf-8  -*-
-"""
-Objects representing various types of MediaWiki pages.
-"""
-#
-# (C) Pywikipedia bot team, 2008
-#
-# Distributed under the terms of the MIT license.
-#
-__version__ = '$Id: $'
-
-import pywikibot
-from pywikibot import deprecate_arg
-from pywikibot import config
-import pywikibot.site
-import pywikibot.textlib
-
-import htmlentitydefs
-import logging
-import re
-import sys
-import threading
-import unicodedata
-import urllib
-
-logger = logging.getLogger("wiki")
-
-reNamespace = re.compile("^(.+?) *: *(.*)$")
-
-
-class Page(object):
-    """Page: A MediaWiki page
-
-    This object only implements internally methods that do not require
-    reading from or writing to the wiki.  All other methods are delegated
-    to the Site object. 
-
-    """
-
-    @deprecate_arg("insite", None)
-    @deprecate_arg("defaultNamespace", None)
-    def __init__(self, source, title=u"", ns=0):
-        """Instantiate a Page object.
-
-        Three calling formats are supported:
-
-          - If the first argument is a Page, create a copy of that object.
-            This can be used to convert an existing Page into a subclass
-            object, such as Category or ImagePage.  (If the title is also
-            given as the second argument, creates a copy with that title;
-            this is used when pages are moved.)
-          - If the first argument is a Site, create a Page on that Site
-            using the second argument as the title (may include a section),
-            and the third as the namespace number. The namespace number is
-            mandatory, even if the title includes the namespace prefix. This
-            is the preferred syntax when using an already-normalized title
-            obtained from api.php or a database dump.  WARNING: may produce
-            invalid objects if page title isn't in normal form!
-          - If the first argument is a Link, create a Page from that link.
-            This is the preferred syntax when using a title scraped from
-            wikitext, URLs, or another non-normalized source.
-
-        @param source: the source of the page
-        @type source: Link, Page (or subclass), or Site
-        @param title: normalized title of the page; required if source is a
-            Site, ignored otherwise
-        @type title: unicode
-        @param ns: namespace number; required if source is a Site, ignored
-            otherwise
-        @type ns: int
-
-        """
-        if isinstance(source, pywikibot.site.BaseSite):
-            self._site = source
-            if ns not in source.namespaces():
-                raise pywikibot.Error(
-                      "Invalid namespace '%i' for site %s."
-                      % (ns, source.sitename()))
-            self._ns = ns
-            if ns and not title.startswith(source.namespace(ns)+u":"):
-                title = source.namespace(ns) + u":" + title
-            elif not ns and u":" in title:
-                pos = title.index(u':')
-                nsindex = source.ns_index(title[ :pos])
-                if nsindex:
-                    self._ns = nsindex
-            if u"#" in title:
-                title, self._section = title.split(u"#", 1)
-            else:
-                self._section = None
-            if not title:
-                raise pywikibot.Error(
-                      "Page object cannot be created from Site without title.")
-            self._title = title
-        elif isinstance(source, Page): 
-            # copy all of source's attributes to this object
-            self.__dict__ = source.__dict__
-            if title:
-                # overwrite title
-                if ":" in title:
-                    prefix = title[ :title.index(":")]
-                    self._ns = site.ns_index(prefix)
-                    if self._ns is None:
-                        self._ns = 0
-                    else:
-                        title = title[title.index(":")+1 : ].strip(" _")
-                        self._title = "%s:%s" % (
-                                         self.site().namespace(self._ns),
-                                         self._title)
-                else:
-                    self._ns = 0
-                if "#" in title:
-                    self._section = title[title.index("#") + 1 : ].strip(" _")
-                    title = title[ : title.index("#")].strip(" _")
-                self._title = title
-        elif isinstance(source, Link):
-            self._site = source.site
-            self._section = source.section
-            self._ns = source.namespace
-            self._title = source.title
-            # reassemble the canonical title from components
-            if self._ns:
-                self._title = "%s:%s" % (self.site().namespace(self._ns),
-                                         self._title)
-        else:
-            raise pywikibot.Error(
-                  "Invalid argument type '%s' in Page constructor: %s"
-                  % (type(source), source))
-        if self._section is not None:
-            self._title = self._title + "#" + self._section
-        self._revisions = {}
-
-    def site(self):
-        """Return the Site object for the wiki on which this Page resides."""
-        return self._site
-
-    def namespace(self):
-        """Return the number of the namespace of the page."""
-        return self._ns
-
-    @deprecate_arg("decode", None)
-    @deprecate_arg("savetitle", "asUrl")
-    def title(self, underscore=False, savetitle=False, withNamespace=True,
-              withSection=True, asUrl=False, asLink=False,
-              allowInterwiki=True, forceInterwiki=False, textlink=False,
-              as_filename=False):
-        """Return the title of this Page, as a Unicode string.
-
-        @param underscore: if true, replace all ' ' characters with '_'
-        @param withNamespace: if false, omit the namespace prefix
-        @param withSection: if false, omit the section
-        @param asUrl: if true, quote title as if in an URL
-        @param asLink: if true, return the title in the form of a wikilink
-        @param allowInterwiki: (only used if asLink is true) if true, format
-            the link as an interwiki link if necessary
-        @param forceInterwiki: (only used if asLink is true) if true, always
-            format the link as an interwiki link
-        @param textlink: (only used if asLink is true) if true, place a ':'
-            before Category: and Image: links
-        @param as_filename: if true, replace any characters that are unsafe
-            in filenames
-
-        """
-        title = self._title
-        if not withNamespace and self._ns != 0:
-            title = title.split(u':', 1)[1]
-        if not withSection and self._section:
-            title = title.split(u'#', 1)[0]
-        if underscore or asUrl:
-            title = title.replace(u' ', u'_')
-        if asUrl:
-            encodedTitle = title.encode(self.site().encoding())
-            title = urllib.quote(encodedTitle)
-        if asLink:
-            if forceInterwiki or (allowInterwiki and
-                    (self.site().family.name != config.family
-                     or self.site().code != config.mylang)):
-                if self.site().family.name != config.family \
-                        and self.site().family.name != self.site().code:
-                    return u'[[%s:%s:%s]]' % (self.site().family.name,
-                                              self.site().code,
-                                              self._title)
-                else:
-                    # use this form for sites like commons, where the
-                    # code is the same as the family name
-                    return u'[[%s:%s]]' % (self.site().code,
-                                           self._title)
-            elif textlink and (self.isImage() or self.isCategory()):
-                    return u'[[:%s]]' % title
-            else:
-                return u'[[%s]]' % title
-        if as_filename:
-            # Replace characters that are not possible in file names on some
-            # systems.
-            # Spaces are possible on most systems, but are bad for URLs.
-            for forbidden in ':*?/\ ':
-                title = title.replace(forbidden, '_')
-        return title
-
-    @deprecate_arg("decode", None)
-    @deprecate_arg("underscore", None)
-    def section(self):
-        """Return the name of the section this Page refers to.
-
-        The section is the part of the title following a '#' character, if
-        any. If no section is present, return None.
-
-        """
-        if self._section:
-            return self._section
-        else:
-            return None
-
-    def __str__(self):
-        """Return a console representation of the pagelink."""
-        return self.title(asLink=True, forceInterwiki=True
-                          ).encode(sys.stderr.encoding)
-
-    def __unicode__(self):
-        return self.title(asLink=True, forceInterwiki=True)
-
-    def __repr__(self):
-        """Return a more complete string representation."""
-        return u"%s(%s)" % (self.__class__.__name__,
-                            self.title().encode(sys.stderr.encoding))
-
-    def __cmp__(self, other):
-        """Test for equality and inequality of Page objects.
-
-        Page objects are "equal" if and only if they are on the same site
-        and have the same normalized title, including section if any.
-
-        Page objects are sortable by namespace first, then by title.
-
-        """
-        if not isinstance(other, Page):
-            # especially, return -1 if other is None
-            return -1
-        if not self.site() == other.site():
-            return cmp(self.site(), other.site())
-        if self.namespace() != other.namespace():
-            return cmp(self.namespace(), other.namespace())
-        owntitle = self.title(withNamespace=False)
-        othertitle = other.title(withNamespace=False)
-        return cmp(owntitle, othertitle)
-
-    def __hash__(self):
-        # Pseudo method that makes it possible to store Page objects as keys
-        # in hash-tables. This relies on the fact that the string
-        # representation of an instance can not change after the construction.
-        return hash(unicode(self))
-
-    def autoFormat(self):
-        """Return L{date.autoFormat} dictName and value, if any.
-
-        Value can be a year, date, etc., and dictName is 'YearBC',
-        'Year_December', or another dictionary name. Please note that two
-        entries may have exactly the same autoFormat, but be in two
-        different namespaces, as some sites have categories with the
-        same names. Regular titles return (None, None).
-
-        """
-        if not hasattr(self, '_autoFormat'):
-            from pywikibot import date
-            self._autoFormat = date.getAutoFormat(
-                                        self.site().code,
-                                        self.title(withNamespace=False)
-                                    )
-        return self._autoFormat
-
-    def isAutoTitle(self):
-        """Return True if title of this Page is in the autoFormat dictionary."""
-        return self.autoFormat()[0] is not None
-
-    @deprecate_arg("throttle", None)
-    @deprecate_arg("nofollow_redirects", None)
-    @deprecate_arg("change_edit_time", None)
-    def get(self, force=False, get_redirect=False, sysop=False):
-        """Return the wiki-text of the page.
-
-        This will retrieve the page from the server if it has not been
-        retrieved yet, or if force is True. This can raise the following
-        exceptions that should be caught by the calling code:
-
-          - NoPage: The page does not exist
-          - IsRedirectPage: The page is a redirect. The argument of the
-                exception is the title of the page it redirects to.
-          - SectionError: The section does not exist on a page with a #
-                link
-
-        @param force: reload all page attributes, including errors.
-        @param get_redirect: return the redirect text, do not follow the
-            redirect, do not raise an exception.
-        @param sysop: if the user has a sysop account, use it to retrieve
-            this page
-
-        """
-        if force:
-            # When forcing, we retry the page no matter what. Old exceptions
-            # do not apply any more.
-            for attr in ['_redirarg', '_getexception']:
-                if hasattr(self, attr):
-                    delattr(self,attr)
-        else:
-            # Make sure we re-raise an exception we got on an earlier attempt
-            if hasattr(self, '_redirarg') and not get_redirect:
-                raise pywikibot.IsRedirectPage, self._redirarg
-            elif hasattr(self, '_getexception'):
-                raise self._getexception
-        if force or not hasattr(self, "_revid") \
-                 or not self._revid in self._revisions \
-                 or self._revisions[self._revid].text is None:
-            self.site().loadrevisions(self, getText=True, sysop=sysop)
-            # TODO: Exception handling for no-page, redirects, etc.
-
-        return self._revisions[self._revid].text
-
-    @deprecate_arg("throttle", None)
-    @deprecate_arg("nofollow_redirects", None)
-    @deprecate_arg("change_edit_time", None)
-    def getOldVersion(self, oldid, force=False, get_redirect=False,
-                      sysop=False):
-        """Return text of an old revision of this page; same options as get().
-
-        @param oldid: The revid of the revision desired.
-
-        """
-        if force or not oldid in self._revisions \
-                or self._revisions[oldid].text is None:
-            self.site().loadrevisions(self, getText=True, revids=oldid,
-                                      sysop=sysop)
-        # TODO: what about redirects, errors?
-        return self._revisions[oldid].text
-
-    def permalink(self):
-        """Return the permalink URL for current revision of this page."""
-        return "%s://%s/%sindex.php?title=%s&oldid=%s" \
-               % (self.site().protocol(),
-                  self.site().hostname(),
-                  self.site().scriptpath(),
-                  self.title(asUrl=True),
-                  self.latestRevision())
-
-    def latestRevision(self):
-        """Return the current revision id for this page."""
-        if not hasattr(self, '_revid'):
-            self.site().loadrevisions(self)
-        return self._revid
-
-    def _textgetter(self):
-        """Return the current (edited) wikitext, loading it if necessary."""
-        if not hasattr(self, '_text') or self._text is None:
-            try:
-                self._text = self.get()
-            except pywikibot.NoPage:
-                # TODO: what other exceptions might be returned?
-                self._text = u""
-        return self._text
-
-    def _textsetter(self, value):
-        """Update the edited wikitext"""
-        self._text = unicode(value)
-
-    def _cleartext(self):
-        """Delete the edited wikitext"""
-        if hasattr(self, "_text"):
-            del self._text
-
-    text = property(_textgetter, _textsetter, _cleartext,
-                    "The edited wikitext (unicode) of this Page")
-
-    def expand_text(self):
-        """Return the page text with all templates expanded."""
-        req = pywikibot.data.api.Request(action="expandtemplates",
-                                         text=self.text,
-                                         title=self.title(withSection=False),
-                                         site=self.site())
-        result = req.submit()
-        return result["expandtemplates"]["*"]
-
-    def userName(self):
-        """Return name or IP address of last user to edit page."""
-        return self._revisions[self.latestRevision()].user
-
-    def isIpEdit(self):
-        """Return True if last editor was unregistered."""
-        return self._revisions[self.latestRevision()].anon
-
-    def editTime(self):
-        """Return timestamp (in ISO 8601 format) of last revision to page."""
-        return self._revisions[self.latestRevision()].timestamp
-
-    def previousRevision(self):
-        """Return the revision id for the previous revision of this Page."""
-        vh = self.getVersionHistory(revCount=2)
-        revkey = sorted(self._revisions.keys(), reverse=True)[1]
-        return revkey
-
-    def exists(self):
-        """Return True if page exists on the wiki, even if it's a redirect.
-
-        If the title includes a section, return False if this section isn't
-        found.
-
-        """
-        return self.site().page_exists(self)
-
-    def isRedirectPage(self):
-        """Return True if this is a redirect, False if not or not existing."""
-        return self.site().page_isredirect(self)
-
-    def isEmpty(self):
-        """Return True if the page text has less than 4 characters.
-
-        Character count ignores language links and category links.
-        Can raise the same exceptions as get().
-
-        """
-        txt = self.get()
-        txt = pywikibot.textlib.removeLanguageLinks(txt, site = self.site())
-        txt = pywikibot.textlib.removeCategoryLinks(txt, site = self.site())
-        if len(txt) < 4:
-            return True
-        else:
-            return False
-
-    def isTalkPage(self):
-        """Return True if this page is in any talk namespace."""
-        ns = self.namespace()
-        return ns >= 0 and ns % 2 == 1
-
-    def toggleTalkPage(self):
-        """Return other member of the article-talk page pair for this Page.
-
-        If self is a talk page, returns the associated content page;
-        otherwise, returns the associated talk page.  The returned page need
-        not actually exist on the wiki.
-        
-        Returns None if self is a special page.
-
-        """
-        ns = self.namespace()
-        if ns < 0: # Special page
-            return None
-        if self.isTalkPage():
-            if self.namespace() == 1:
-                return Page(self.site(), self.title(withNamespace=False))
-            else:
-                return Page(self.site(),
-                            self.site().namespace(ns - 1) + ':'
-                              + self.title(withNamespace=False))
-        else:
-            return Page(self.site(),
-                        self.site().namespace(ns + 1) + ':'
-                          + self.title(withNamespace=False))
-
-    def isCategory(self):
-        """Return True if the page is a Category, False otherwise."""
-        return self.namespace() == 14
-
-    def isImage(self):
-        """Return True if this is an image description page, False otherwise."""
-        return self.namespace() == 6
-
-    def isDisambig(self):
-        """Return True if this is a disambiguation page, False otherwise.
-
-        Relies on the presence of specific templates, identified in
-        the Family file or on a wiki page, to identify disambiguation
-        pages.
-
-        By default, loads a list of template names from the Family file;
-        if the value in the Family file is None, looks for the list on
-        [[MediaWiki:Disambiguationspage]].
-
-        """
-        if not hasattr(self, "_isDisambig"):
-            if not hasattr(self.site(), "_disambigtemplates"):
-                self.site()._disambigtemplates = \
-                                self.site().family.disambig(self.site().code)
-                if self.site()._disambigtemplates is None:
-                    try:
-                        disambigpages = Page(self.site(),
-                                             "MediaWiki:Disambiguationspage")
-                        self.site()._disambigtemplates = [
-                            link.title(withNamespace=False)
-                              for link in disambigpages.linkedPages()
-                              if link.namespace() == 10
-                        ]
-                    except NoPage:
-                        self.site()._disambigtemplates = ['Disambig']
-            for t in self.templates():
-                if t.title(withNamespace=False) in self.site()._disambigtemplates:
-                    self._isDisambig = True
-                    break
-            else:
-                self._isDisambig = False
-        return self._isDisambig
-
-    def getReferences(self, follow_redirects=True, withTemplateInclusion=True,
-                      onlyTemplateInclusion=False, redirectsOnly=False,
-                      namespaces=None):
-        """Return an iterator all pages that refer to or embed the page.
-
-        If you need a full list of referring pages, use 
-        C{pages = list(s.getReferences())}
-
-        @param follow_redirects: if True, also iterate pages that link to a
-            redirect pointing to the page.
-        @param withTemplateInclusion: if True, also iterate pages where self
-            is used as a template.
-        @param onlyTemplateInclusion: if True, only iterate pages where self
-            is used as a template.
-        @param redirectsOnly: if True, only iterate redirects to self.
-        @param namespaces: only iterate pages in these namespaces
-
-        """
-        # N.B.: this method intentionally overlaps with backlinks() and
-        # embeddedin(). Depending on the interface, it may be more efficient
-        # to implement those methods in the site interface and then combine
-        # the results for this method, or to implement this method and then
-        # split up the results for the others. 
-        return self.site().pagereferences(
-                           self, follow_redirects, redirectsOnly,
-                           withTemplateInclusion, onlyTemplateInclusion,
-                           namespaces)
-
-    def backlinks(self, followRedirects=True, filterRedirects=None,
-                  namespaces=None):
-        """Return an iterator for pages that link to this page.
-
-        @param followRedirects: if True, also iterate pages that link to a
-            redirect pointing to the page.
-        @param filterRedirects: if True, only iterate redirects; if False,
-            omit redirects; if None, do not filter
-        @param namespaces: only iterate pages in these namespaces
-
-        """
-        return self.site().pagebacklinks(self, followRedirects, filterRedirects,
-                                         namespaces)
-
-    def embeddedin(self, filter_redirects=None, namespaces=None):
-        """Return an iterator for pages that embed this page as a template.
-
-        @param filterRedirects: if True, only iterate redirects; if False,
-            omit redirects; if None, do not filter
-        @param namespaces: only iterate pages in these namespaces
-
-        """
-        return self.site().page_embeddedin(self, filter_redirects, namespaces)
-
-    def canBeEdited(self):
-        """Return bool indicating whether this page can be edited.
-
-        This returns True if and only if:
-          - page is unprotected, and bot has an account for this site, or
-          - page is protected, and bot has a sysop account for this site.
-
-        """
-        return self.site().page_can_be_edited(self)
-
-    def botMayEdit(self):
-        """Return True if this page allows bots to edit it.
-
-        This will be True if the page doesn't contain {{bots}} or
-        {{nobots}}, or it contains them and the active bot is allowed to
-        edit this page. (This method is only useful on those sites that
-        recognize the bot-exclusion protocol; on other sites, it will always
-        return True.)
-
-        The framework enforces this restriction by default. It is possible
-        to override this by setting ignore_bot_templates=True in
-        user_config.py, or using page.put(force=True).
-
-        """ # TODO: move this to Site object?
-        if config.ignore_bot_templates: #Check the "master ignore switch"
-            return True
-        try:
-            templates = self.templatesWithParams();
-        except (pywikibot.NoPage,
-                pywikibot.IsRedirectPage,
-                pywikibot.SectionError):
-            return True
-        for template in templates:
-            title = template[0].title(withNamespace=False)
-            if title == 'Nobots':
-                return False
-            elif title == 'Bots':
-                if len(template[1]) == 0:
-                    return True
-                else:
-                    (ttype, bots) = template[1][0].split('=', 1)
-                    bots = bots.split(',')
-                    if ttype == 'allow':
-                        if 'all' in bots or username in bots:
-                            return True
-                        else:
-                            return False
-                    if ttype == 'deny':
-                        if 'all' in bots or username in bots:
-                            return False
-                        else:
-                            return True
-        # no restricting template found
-        return True
-
-    def save(self, comment=None, watch=None, minor=True, force=False,
-             async=False, callback=None):
-        """Save the current contents of page's text to the wiki.
-
-        @param comment: The edit summary for the modification (optional, but
-            most wikis strongly encourage its use)
-        @type comment: unicode
-        @param watch: if True, add or if False, remove this Page to/from bot
-            user's watchlist; if None, leave watchlist status unchanged
-        @type watch: bool or None
-        @param minor: if True, mark this edit as minor
-        @type minor: bool
-        @param force: if True, ignore botMayEdit() setting
-        @type force: bool
-        @param async: if True, launch a separate thread to save
-            asynchronously
-        @param callback: a callable object that will be called after the
-            page put operation. This object must take two arguments: (1) a
-            Page object, and (2) an exception instance, which will be None
-            if the page was saved successfully. The callback is intended for
-            use by bots that need to keep track of which saves were
-            successful.
-
-        """
-        if not comment:
-            comment = pywikibot.default_comment # needs to be defined
-        if watch is None:
-            unwatch = False
-            watch = False
-        else:
-            unwatch = not watch
-        if not force and not self.botMayEdit:
-            raise pywikibot.PageNotSaved(
-                "Page %s not saved; editing restricted by {{bots}} template"
-                % self.title(asLink=True))
-        if async:
-            thd = threading.Thread(
-                      target=self._save,
-                      args=(comment, minor, watch, unwatch, callback)
-                  )
-            pywikibot.threadpool.append(thd)
-            thd.start()
-        else:
-            self._save(comment, minor, watch, unwatch, callback)
-
-    def _save(self, comment, minor, watch, unwatch, callback):
-        err = None
-        try:
-            done = self.site().editpage(self, summary=comment, minor=minor,
-                                        watch=watch, unwatch=unwatch)
-            if not done:
-                logger.warn("Page %s not saved" % self.title(asLink=True))
-            else:
-                logger.info("Page %s saved" % self.title(asLink=True))
-        except pywikibot.Error, err:
-            logger.exception("Error saving page %s" % self.title(asLink=True))
-        if callback:
-            callback(self, err)
-
-    def put(self, newtext, comment=u'', watchArticle=None, minorEdit=True,
-            force=False, async=False, callback=None):
-        """Save the page with the contents of the first argument as the text.
-
-        This method is maintained primarily for backwards-compatibility.
-        For new code, using Page.save() is preferred.  See save() method
-        docs for all parameters not listed here.
-
-        @param newtext: The complete text of the revised page.
-        @type newtext: unicode
-
-        """
-        self.text = newtext
-        return self.save(comment, watchArticle, minorEdit, force,
-                         async, callback)
-
-    def put_async(self, newtext, comment=u'', watchArticle=None,
-                  minorEdit=True, force=False, callback=None):
-        """Put page on queue to be saved to wiki asynchronously.
-
-        Asynchronous version of put (takes the same arguments), which places
-        pages on a queue to be saved by a daemon thread. All arguments are
-        the same as for .put().  This version is maintained solely for
-        backwards-compatibility.
-
-        """
-        return self.put(self, newtext, comment, watchArticle,
-                        minorEdit, force, callback, async=True)
-
-    def linkedPages(self):
-        """Iterate Pages that this Page links to.
-
-        Only returns pages from "normal" internal links. Image and category
-        links are omitted unless prefixed with ":". Embedded templates are
-        omitted (but links within them are returned). All interwiki and
-        external links are omitted.
-
-        @return: a generator that yields Page objects.
-
-        """
-        return self.site().pagelinks(self)
-
-    def interwiki(self, expand=True):
-        """Iterate interwiki links in the page text, excluding language links.
-
-        @param expand: if True (default), include interwiki links found in
-            templates transcluded onto this page; if False, only iterate
-            interwiki links found in this page's own wikitext
-        @return: a generator that yields Link objects
-
-        """
-        # This function does not exist in the API, so it has to be
-        # implemented by screen-scraping
-        if expand:
-            text = self.expand_text()
-        else:
-            text = self.text
-        for linkmatch in pywikibot.link_regex.finditer(
-                            pywikibot.textlib.removeDisabledParts(text)):
-            linktitle = linkmatch.group("title")
-            link = Link(linktitle, self.site())
-            # only yield links that are to a different site and that
-            # are not language links
-            try:
-                if link.site != self.site():
-                    if linktitle.lstrip().startswith(":"):
-                        # initial ":" indicates not a language link
-                        yield link
-                    elif link.site.family != self.site().family:
-                        # link to a different family is not a language link
-                        yield link
-            except pywikibot.Error:
-                # ignore any links with invalid contents
-                continue
-
-    def langlinks(self):
-        """Iterate all interlanguage links on this page.
-
-        @return: a generator that yields Link objects.
-
-        """
-        return self.site().pagelanglinks(self)
-
-    @deprecate_arg("followRedirects", None)
-    @deprecate_arg("loose", None)
-    def imagelinks(self, followRedirects=None, loose=None):
-        """Iterate ImagePage objects for images displayed on this Page.
-
-        @return: a generator that yields ImagePage objects.
-
-        """
-        return self.site().pageimages(self)
-
-    def templates(self):
-        """Iterate Page objects for templates used on this Page.
-
-        Template parameters are ignored.  This method only returns embedded
-        templates, not template pages that happen to be referenced through
-        a normal link.
-
-        """
-        return self.site().pagetemplates(self)
-
-    def templatesWithParams(self):
-        """Iterate templates used on this Page.
-
-        @return: a generator that yields a tuple for each use of a template
-        in the page, with the template Page as the first entry and a list of
-        parameters as the second entry.
-
-        """
-        templates = pywikibot.textlib.extract_templates_and_params(self.text)
-        # backwards-compatibility: convert the dict returned as the second
-        # element into a list in the format used by old scripts
-        result = []
-        for template in templates:
-            args = template[1]
-            positional = []
-            named = {}
-            for key in sorted(args.keys()):
-                try:
-                    int(key)
-                except ValueError:
-                    named[key] = args[key]
-                else:
-                    positional.append(args[key])
-            for name in named:
-                positional.append("%s=%s" % (name, named[name]))
-            result.append((pywikibot.Page(
-                             pywikibot.Link(template[0], self.site())),
-                           positional))
-        return result
-
-    @deprecate_arg("nofollow_redirects", None)
-    def categories(self, withSortKey=False):
-        """Iterate categories that the article is in.
-
-        @param withSortKey: if True, include the sort key in each Category.
-        @return: a generator that yields Category objects.
-
-        """
-        return self.site().pagecategories(self, withSortKey=withSortKey)
-
-    def extlinks(self):
-        """Iterate all external URLs (not interwiki links) from this page.
-
-        @return: a generator that yields unicode objects containing URLs.
-
-        """
-        return self.site().page_extlinks(self)
-
-    def getRedirectTarget(self):
-        """Return a Page object for the target this Page redirects to.
-
-        If this page is not a redirect page, will raise an IsNotRedirectPage
-        exception. This method also can raise a NoPage exception.
-
-        """
-        if not self.isRedirectPage():
-            raise pywikibot.IsNotRedirectPage
-        if not isinstance(self._redir, Page):
-            self.site().getredirtarget(self)
-        return self._redir
-
-    @deprecate_arg("forceReload", None)
-    def getVersionHistory(self, reverseOrder=False, getAll=False,
-                          revCount=500):
-        """Load the version history page and return history information.
-
-        Return value is a list of tuples, where each tuple represents one
-        edit and is built of revision id, edit date/time, user name, and
-        edit summary. Starts with the most current revision, unless
-        reverseOrder is True. Defaults to getting the first revCount edits,
-        unless getAll is True.
-
-        """
-        if getAll:
-            limit = None
-        else:
-            limit = revCount
-        self.site().loadrevisions(self, getText=False, rvdir=reverseOrder,
-                                  limit=limit)
-        if getAll:
-            revCount = len(self._revisions)
-        return [ ( self._revisions[rev].revid,
-                   self._revisions[rev].timestamp,
-                   self._revisions[rev].user,
-                   self._revisions[rev].comment
-                 ) for rev in sorted(self._revisions.keys(),
-                                     reverse=not reverseOrder)[ : revCount]
-               ]
-
-    def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
-                               getAll=False, revCount=500):
-        """Return the version history as a wiki table."""
-        result = '{| border="1"\n'
-        result += '! oldid || date/time || username || edit summary\n'
-        for oldid, time, username, summary \
-                in self.getVersionHistory(forceReload=forceReload,
-                                          reverseOrder=reverseOrder,
-                                          getAll=getAll, revCount=revCount):
-            result += '|----\n'
-            result += '| %s || %s || %s || <nowiki>%s</nowiki>\n'\
-                      % (oldid, time, username, summary)
-        result += '|}\n'
-        return result
-
-    def fullVersionHistory(self):
-        """Iterate all previous versions including wikitext.
-
-        @return: A generator that yields tuples consisting of revision ID,
-            edit date/time, user name and content
-        """
-        return self.site().loadrevisions(self, withText=True)
-
-    def contributingUsers(self):
-        """Return a set of usernames (or IPs) of users who edited this page."""
-        edits = self.getVersionHistory()
-        users = set([edit[2] for edit in edits])
-        return users
-
-    @deprecate_arg("throttle", None)
-    def move(self, newtitle, reason=None, movetalkpage=True, sysop=False,
-             deleteAndMove=False, safe=True):
-        """Move this page to a new title.
-
-        @param newtitle: The new page title.
-        @param reason: The edit summary for the move.
-        @param movetalkpage: If true, move this page's talk page (if it exists)
-        @param sysop: Try to move using sysop account, if available
-        @param deleteAndMove: if move succeeds, delete the old page
-            (usually requires sysop privileges, depending on wiki settings)
-        @param safe: If false, attempt to delete existing page at newtitle
-            (if there is one) and then move this page to that title
-
-        """
-        if reason is None:
-            logger.info(u'Moving %s to [[%s]].'
-                             % (self.title(asLink=True), newtitle))
-            reason = pywikibot.input(u'Please enter a reason for the move:')
-        # TODO: implement "safe" parameter
-        # TODO: implement "sysop" parameter
-        return self.site().movepage(self, newtitle, reason,
-                                    movetalk=movetalkpage,
-                                    noredirect=deleteAndMove)
-
-    @deprecate_arg("throttle", None)
-    def delete(self, reason=None, prompt=True, throttle=None, mark=False):
-        """Deletes the page from the wiki. Requires administrator status.
-
-        @param reason: The edit summary for the deletion.
-        @param prompt: If true, prompt user for confirmation before deleting.
-        @param mark: if true, and user does not have sysop rights, place a
-            speedy-deletion request on the page instead.
-
-        """
-        if reason is None:
-            logger.info(u'Deleting %s.' % (self.title(asLink=True)))
-            reason = pywikibot.input(u'Please enter a reason for the deletion:')
-        answer = u'y'
-        if prompt and not hasattr(self.site(), '_noDeletePrompt'):
-            answer = pywikibot.inputChoice(u'Do you want to delete %s?'
-                        % self.title(asLink = True, forceInterwiki = True),
-                                           ['Yes', 'No', 'All'],
-                                           ['Y', 'N', 'A'],
-                                           'N')
-            if answer in ['a', 'A']:
-                answer = 'y'
-                self.site()._noDeletePrompt = True
-        if answer in ['y', 'Y']:
-            return self.site().delete(self, reason, mark=mark)
-
-    def loadDeletedRevisions(self):
-        """Retrieve all deleted revisions for this Page from Special/Undelete.
-
-        Stores all revisions' timestamps, dates, editors and comments in
-        self._deletedRevs attribute.
-
-        @return: list of timestamps (which can be used to retrieve revisions
-            later on).
-
-        """
-        return self.site().loadDeletedRevisions(self)
-
-    def getDeletedRevision(self, timestamp, retrieveText=False):
-        """Return a particular deleted revision by timestamp.
-
-        @return: a list of [date, editor, comment, text, restoration
-            marker]. text will be None, unless retrieveText is True (or has
-            been retrieved earlier). If timestamp is not found, returns
-            None.
-
-        """
-        return self.site().getDeletedRevision(self, timestamp,
-                                              getText=retrieveText)
-
-    def markDeletedRevision(self, timestamp, undelete=True):
-        """Mark the revision identified by timestamp for undeletion.
-
-        @param undelete: if False, mark the revision to remain deleted.
-
-        """
-        if self._deletedRevs == None:
-            self.loadDeletedRevisions()
-        if not self._deletedRevs.has_key(timestamp):
-            #TODO: Throw an exception?
-            return None
-        self._deletedRevs[timestamp][4] = undelete
-        self._deletedRevsModified = True
-
-    @deprecate_arg("throttle", None)
-    def undelete(self, comment=None):
-        """Undelete revisions based on the markers set by previous calls.
-
-        If no calls have been made since loadDeletedRevisions(), everything
-        will be restored.
-
-        Simplest case::
-            Page(...).undelete('This will restore all revisions')
-
-        More complex::
-            pg = Page(...)
-            revs = pg.loadDeletedRevsions()
-            for rev in revs:
-                if ... #decide whether to undelete a revision
-                    pg.markDeletedRevision(rev) #mark for undeletion
-            pg.undelete('This will restore only selected revisions.')
-
-        @param comment: The undeletion edit summary.
-
-        """
-        if comment is None:
-            logger.info(u'Preparing to undelete %s.'
-                             % (self.title(asLink=True)))
-            comment = pywikibot.input(
-                        u'Please enter a reason for the undeletion:')
-        return self.site().undelete(self, comment)
-
-    @deprecate_arg("throttle", None)
-    def protect(self, edit='sysop', move='sysop', unprotect=False,
-                reason=None, prompt=True):
-        """(Un)protect a wiki page. Requires administrator status.
-
-        Valid protection levels (in MediaWiki 1.12) are '' (equivalent to
-        'none'), 'autoconfirmed', and 'sysop'.
-
-        @param edit: Level of edit protection
-        @param move: Level of move protection
-        @param unprotect: If true, unprotect the page (equivalent to setting
-            all protection levels to '')
-        @param reason: Edit summary.
-        @param prompt: If true, ask user for confirmation.
-
-        """
-        if reason is None:
-            if unprotect:
-                un = u'un'
-            else:
-                un = u''
-            logger.info(u'Preparing to %sprotect %s.'
-                             % (un, self.title(asLink=True)))
-            reason = pywikibot.input(u'Please enter a reason for the action:')
-        if unprotect:
-            edit = move = ""
-        answer = 'y'
-        if prompt and not hasattr(self.site(), '_noProtectPrompt'):
-            answer = pywikibot.inputChoice(
-                        u'Do you want to change the protection level of %s?'
-                          % self.title(asLink=True, forceInterwiki = True),
-                        ['Yes', 'No', 'All'], ['Y', 'N', 'A'], 'N')
-            if answer in ['a', 'A']:
-                answer = 'y'
-                self.site()._noProtectPrompt = True
-        if answer in ['y', 'Y']:
-            return self.site().protect(self, edit, move, reason)
-
-    def change_category(article, oldCat, newCat, comment=None, sortKey=None,
-                        inPlace=True):
-        """Remove page from oldCat and add it to newCat.
-
-        oldCat and newCat should be Category objects.
-        If newCat is None, the category will be removed.
-        
-        """ # TODO: document remaining arguments
-        cats = self.categories(get_redirect=True)
-        site = self.site()
-        changesMade = False
-
-        if not self.canBeEdited():
-            pywikibot.output(u"Can't edit %s, skipping it..."
-                              % self.title(asLink=True))
-            return False
-        if inPlace == True:
-            newtext = pywikibot.textlib.replaceCategoryInPlace(
-                                            self.text, oldCat, newCat)
-            if newtext == self.text:
-                pywikibot.output(
-                    u'No changes in made in page %s.'
-                     % self.title(asLink=True))
-                return False
-            try:
-                self.put(newtext, comment)
-                return True
-            except pywikibot.EditConflict:
-                pywikibot.output(
-                    u'Skipping %s because of edit conflict'
-                     % self.title(asLink=True))
-            except pywikibot.LockedPage:
-                pywikibot.output(u'Skipping locked page %s'
-                                  % self.title(asLink=True))
-            except pywikibot.SpamfilterError, error:
-                pywikibot.output(
-                    u'Changing page %s blocked by spam filter (URL=%s)'
-                                 % (self.title(asLink=True), error.url))
-            except pywikibot.NoUsername:
-                pywikibot.output(
-                    u"Page %s not saved; sysop privileges required."
-                                 % self.title(asLink=True))
-            except pywikibot.PageNotSaved, error:
-                pywikibot.output(u"Saving page %s failed: %s"
-                                 % (self.title(asLink=True), error.message))
-            return False
-
-        # This loop will replace all occurrences of the category to be changed,
-        # and remove duplicates.
-        newCatList = []
-        newCatSet = set()
-        for i in range(len(cats)):
-            cat = cats[i]
-            if cat == oldCat:
-                changesMade = True
-                if not sortKey:
-                    sortKey = cat.sortKey
-                if newCat:
-                    if newCat.title() not in newCatSet:
-                        newCategory = Category(site, newCat.title(),
-                                               sortKey=sortKey)
-                        newCatSet.add(newCat.title())
-                        newCatList.append(newCategory)
-            elif cat.title() not in newCatSet:
-                newCatSet.add(cat.title())
-                newCatList.append(cat)
-
-        if not changesMade:
-            pywikibot.output(u'ERROR: %s is not in category %s!'
-                              % (self.title(asLink=True), oldCat.title()))
-        else:
-            try:
-                text = pywikibot.textlib.replaceCategoryLinks(self.text,
-                                                              newCatList)
-            except ValueError:
-                # Make sure that the only way replaceCategoryLinks() can return
-                # a ValueError is in the case of interwiki links to self.
-                pywikibot.output(
-                        u'Skipping %s because of interwiki link to self' % self)
-            try:
-                self.put(text, comment)
-            except pywikibot.EditConflict:
-                pywikibot.output(
-                        u'Skipping %s because of edit conflict' % self.title())
-            except pywikibot.SpamfilterError, e:
-                pywikibot.output(
-                        u'Skipping %s because of blacklist entry %s'
-                        % (self.title(), e.url))
-            except pywikibot.LockedPage:
-                pywikibot.output(
-                        u'Skipping %s because page is locked' % self.title())
-            except pywikibot.PageNotSaved, error:
-                pywikibot.output(u"Saving page %s failed: %s"
-                                 % (self.title(asLink=True), error.message))
-
-######## DEPRECATED METHODS ########
-
-    def encoding(self):
-        """DEPRECATED: use Site.encoding() instead"""
-        logger.debug(u"Page.encoding() is deprecated; use Site.encoding().")
-        return self.site().encoding()
-
-    def titleWithoutNamespace(self, underscore=False):
-        """DEPRECATED: use self.title(withNamespace=False) instead."""
-        logger.debug(
-            u"Page.titleWithoutNamespace() method is deprecated.")
-        return self.title(underscore=underscore, withNamespace=False,
-                          withSection=False)
-
-    def titleForFilename(self):
-        """DEPRECATED: use self.title(as_filename=True) instead."""
-        logger.debug(
-            u"Page.titleForFilename() method is deprecated.")
-        return self.title(as_filename=True)
-
-    def sectionFreeTitle(self, underscore=False):
-        """DEPRECATED: use self.title(withSection=False) instead."""
-        logger.debug(
-            u"Page.sectionFreeTitle() method is deprecated.")
-        return self.title(underscore=underscore, withSection=False)
-
-    def aslink(self, forceInterwiki=False, textlink=False, noInterwiki=False):
-        """DEPRECATED: use self.title(asLink=True) instead."""
-        logger.debug(u"Page.aslink() method is deprecated.")
-        return self.title(asLink=True, forceInterwiki=forceInterwiki,
-                          allowInterwiki=not noInterwiki, textlink=textlink)
-
-    def urlname(self):
-        """Return the Page title encoded for use in an URL.
-
-        DEPRECATED: use self.title(asUrl=True) instead.
-
-        """
-        logger.debug(u"Page.urlname() method is deprecated.")
-        return self.title(asUrl=True)
-
-####### DISABLED METHODS (warnings provided) ######
-    # these methods are easily replaced by editing the page's text using
-    # textlib methods and then using put() on the result.
-
-    def removeImage(self, image, put=False, summary=None, safe=True):
-        """Old method to remove all instances of an image from page."""
-        logger.warning(u"Page.removeImage() is no longer supported.")
-
-    def replaceImage(self, image, replacement=None, put=False, summary=None,
-                     safe=True):
-        """Old method to replace all instances of an image with another."""
-        logger.warning(u"Page.replaceImage() is no longer supported.")
-
-
-class ImagePage(Page):
-    """A subclass of Page representing an image descriptor wiki page.
-
-    Supports the same interface as Page, with the following added methods:
-
-    getImagePageHtml          : Download image page and return raw HTML text.
-    fileURL                   : Return the URL for the image described on this
-                                page.
-    fileIsOnCommons           : Return True if image stored on Wikimedia
-                                Commons.
-    fileIsShared              : Return True if image stored on Wikitravel
-                                shared repository.
-    getFileMd5Sum             : Return image file's MD5 checksum.
-    getFileVersionHistory     : Return the image file's version history.
-    getFileVersionHistoryTable: Return the version history in the form of a
-                                wiki table.
-    usingPages                : Iterate Pages on which the image is displayed.
-
-    """
-    def __init__(self, source, title=u"", insite=None):
-        Page.__init__(self, source, title, 6)
-        if self.namespace() != 6:
-            raise ValueError(u"'%s' is not in the image namespace!" % title)
-
-    def getImagePageHtml(self):
-        """
-        Download the image page, and return the HTML, as a unicode string.
-
-        Caches the HTML code, so that if you run this method twice on the
-        same ImagePage object, the page will only be downloaded once.
-        """
-        if not hasattr(self, '_imagePageHtml'):
-            from pywikibot.data import http
-            path = "%s/index.php?title=%s" \
-                   % (self.site().scriptpath(), self.title(asUrl=True))
-            self._imagePageHtml = http.request(self.site(), path)
-        return self._imagePageHtml
-
-    def fileUrl(self):
-        """Return the URL for the image described on this page."""
-        # TODO add scaling option?
-        if not hasattr(self, '_imageinfo'):
-            self._imageinfo = self.site().getimageinfo(self) #FIXME
-        return self._imageinfo['url']
-
-    def fileIsOnCommons(self):
-        """Return True if the image is stored on Wikimedia Commons"""
-        return self.fileUrl().startswith(
-            'http://upload.wikimedia.org/wikipedia/commons/')
-
-    def fileIsShared(self):
-        """Return True if image is stored on any known shared repository."""
-        # as of now, the only known repositories are commons and wikitravel
-        if 'wikitravel_shared' in self.site().shared_image_repository():
-            return self.fileUrl().startswith(
-                u'http://wikitravel.org/upload/shared/')
-        return self.fileIsOnCommons()
-
-    def getFileMd5Sum(self):
-        """Return image file's MD5 checksum."""
-        logger.debug(
-            "ImagePage.getFileMd5Sum() is deprecated; use getFileSHA1Sum().")
-# FIXME: MD5 might be performed on incomplete file due to server disconnection
-# (see bug #1795683).
-        import md5, urllib
-        f = urllib.urlopen(self.fileUrl())
-        # TODO: check whether this needs a User-Agent header added
-        md5Checksum = md5.new(f.read()).hexdigest()
-        f.close()
-        return md5Checksum
-
-    def getFileSHA1Sum(self):
-        """Return image file's SHA1 checksum."""
-        if not hasattr(self, '_imageinfo'):
-            self._imageinfo = self.site().getimageinfo(self) #FIXME
-        return self._imageinfo['sha1']
-
-    def getFileVersionHistory(self):
-        """Return the image file's version history.
-
-        @return: An iterator yielding tuples containing (timestamp,
-            username, resolution, filesize, comment).
-
-        """
-        #TODO; return value may need to change
-        return self.site().getimageinfo(self, history=True) #FIXME
-
-    def getFileVersionHistoryTable(self):
-        """Return the version history in the form of a wiki table."""
-        lines = []
-        #TODO: if getFileVersionHistory changes, make sure this follows it
-        for (datetime, username, resolution, size, comment) \
-                in self.getFileVersionHistory():
-            lines.append('| %s || %s || %s || %s || <nowiki>%s</nowiki>' \
-                         % (datetime, username, resolution, size, comment))
-        return u'{| border="1"\n! date/time || username || resolution || size || edit summary\n|----\n' + u'\n|----\n'.join(lines) + '\n|}'
-
-    def usingPages(self):
-        """Yield Pages on which the image is displayed."""
-        return self.site().getimageusage(self)
-
-
-class Category(Page):
-    """A page in the Category: namespace"""
-
-    @deprecate_arg("sortKey", None)
-    def __init__(self, source, title=u"", insite=None):
-        """All parameters are the same as for Page() constructor.
-
-        """
-        Page.__init__(self, source, title, 14)
-        if self.namespace() != 14:
-            raise ValueError(u"'%s' is not in the category namespace!"
-                             % title)
-
-    @deprecate_arg("forceInterwiki", None)
-    @deprecate_arg("textlink", None)
-    @deprecate_arg("noInterwiki", None)
-    def aslink(self, sortKey=u''):
-        """Return a link to place a page in this Category.
-
-        Use this only to generate a "true" category link, not for interwikis
-        or text links to category pages.
-
-        @param sortKey: The sort key for the article to be placed in this
-            Category; if omitted, default sort key is used.
-        @type sortKey: (optional) unicode
-
-        """
-        if sortKey:
-            titleWithSortKey = '%s|%s' % (self.title(withSection=False),
-                                          self.sortKey)
-        else:
-            titleWithSortKey = self.title(withSection=False)
-        return '[[%s]]' % titleWithSortKey
-
-    @deprecate_arg("startFrom", None)
-    @deprecate_arg("cacheResults", None)
-    def subcategories(self, recurse=False):
-        """Iterate all subcategories of the current category.
-
-        @param recurse: if not False or 0, also iterate subcategories of
-            subcategories. If an int, limit recursion to this number of
-            levels. (Example: recurse=1 will iterate direct subcats and
-            first-level sub-sub-cats, but no deeper.)
-        @type recurse: int or bool
-
-        """
-        if not isinstance(recurse, bool) and recurse:
-            recurse = recurse - 1
-        if not hasattr(self, "_subcats"):
-            self._subcats = []
-            for member in self.site().categorymembers(self, namespaces=[14]):
-                subcat = Category(self.site(), member.title())
-                self._subcats.append(subcat)
-                yield subcat
-                if recurse:
-                    for item in subcat.subcategories(recurse):
-                        yield item
-        else:
-            for subcat in self._subcats:
-                yield subcat
-                if recurse:
-                    for item in subcat.subcategories(recurse):
-                        yield item
-
-    @deprecate_arg("startFrom", None)
-    def articles(self, recurse=False):
-        """
-        Yields all articles in the current category.
-
-        @param recurse: if not False or 0, also iterate articles in
-            subcategories. If an int, limit recursion to this number of
-            levels. (Example: recurse=1 will iterate articles in first-level
-            subcats, but no deeper.)
-        @type recurse: int or bool
-
-        """
-        namespaces = [x for x in self.site().namespaces().keys()
-                      if x>=0 and x!=14]
-        for member in self.site().categorymembers(self,
-                                                  namespaces=namespaces):
-            yield member
-        if recurse:
-            if not isinstance(recurse, bool) and recurse:
-                recurse = recurse - 1
-            for subcat in self.subcategories():
-                for article in subcat.articles(recurse):
-                    yield article
-
-    def isEmptyCategory(self):
-        """Return True if category has no members (including subcategories)."""
-        for member in self.site().categorymembers(self, limit=1):
-            return False
-        return True
-
-    def copyTo(self, catname):
-        """
-        Copy text of category page to a new page.  Does not move contents.
-
-        @param catname: New category title (without namespace)
-        @return: True if copying was successful, False if target page
-            already existed.
-
-        """
-        # This seems far too specialized to be in the top-level framework
-        catname = self.site().category_namespace() + ':' + catname
-        targetCat = Category(self.site(), catname)
-        if targetCat.exists():
-            logger.warn('Target page %s already exists!'
-                             % targetCat.title())
-            return False
-        else:
-            logger.info('Moving text from %s to %s.'
-                             % (self.title(), targetCat.title()))
-            authors = ', '.join(self.contributingUsers())
-            creationSummary = pywikibot.translate(
-                                  self.site(), msg_created_for_renaming
-                              ) % (self.title(), authors)
-            targetCat.put(self.get(), creationSummary)
-            return True
-
-    def copyAndKeep(self, catname, cfdTemplates):
-        """Copy partial category page text (not contents) to a new title.
-
-        Like copyTo above, except this removes a list of templates (like
-        deletion templates) that appear in the old category text.  It also
-        removes all text between the two HTML comments BEGIN CFD TEMPLATE
-        and END CFD TEMPLATE. (This is to deal with CFD templates that are
-        substituted.)
-
-        Returns true if copying was successful, false if target page already
-        existed.
-
-        @param catname: New category title (without namespace)
-        @param cfdTemplates: A list (or iterator) of templates to be removed
-            from the page text
-        @return: True if copying was successful, False if target page
-            already existed.
-
-        """
-        # I don't see why we need this as part of the framework either
-        catname = self.site().category_namespace() + ':' + catname
-        targetCat = Category(self.site(), catname)
-        if targetCat.exists():
-            logger.warn('Target page %s already exists!'
-                             % targetCat.title())
-            return False
-        else:
-            logger.info('Moving text from %s to %s.'
-                             % (self.title(), targetCat.title()))
-            authors = ', '.join(self.contributingUsers())
-            creationSummary = pywikibot.translate(
-                                  self.site(), msg_created_for_renaming
-                              ) % (self.title(), authors)
-            newtext = self.get()
-        for regexName in cfdTemplates:
-            matchcfd = re.compile(r"{{%s.*?}}" % regexName, re.IGNORECASE)
-            newtext = matchcfd.sub('',newtext)
-            matchcomment = re.compile(
-                        r"<!--BEGIN CFD TEMPLATE-->.*?<!--END CFD TEMPLATE-->",
-                                      re.IGNORECASE | re.MULTILINE | re.DOTALL)
-            newtext = matchcomment.sub('', newtext)
-            pos = 0
-            while (newtext[pos:pos+1] == "\n"):
-                pos = pos + 1
-            newtext = newtext[pos:]
-            targetCat.put(newtext, creationSummary)
-            return True
-
-#### DEPRECATED METHODS ####
-    def subcategoriesList(self, recurse=False):
-        """DEPRECATED: Equivalent to list(self.subcategories(...))"""
-        logger.debug("Category.subcategoriesList() method is deprecated.")
-        return sorted(list(set(self.subcategories(recurse))))
-
-    def articlesList(self, recurse=False):
-        """DEPRECATED: equivalent to list(self.articles(...))"""
-        logger.debug("Category.articlesList() method is deprecated.")
-        return sorted(list(set(self.articles(recurse))))
-
-    def supercategories(self):
-        """DEPRECATED: equivalent to self.categories()"""
-        logger.debug("Category.supercategories() method is deprecated.")
-        return self.categories()
-
-    def supercategoriesList(self):
-        """DEPRECATED: equivalent to list(self.categories(...))"""
-        logger.debug("Category.articlesList() method is deprecated.")
-        return sorted(list(set(self.categories())))
-
-
-class Revision(object):
-    """A structure holding information about a single revision of a Page."""
-    def __init__(self, revid, timestamp, user, anon=False, comment=u"",
-                 text=None, minor=False):
-        """All parameters correspond to object attributes (e.g., revid
-        parameter is stored as self.revid)
-
-        @param revid: Revision id number
-        @type revid: int
-        @param text: Revision wikitext.
-        @type text: unicode, or None if text not yet retrieved
-        @param timestamp: Revision time stamp (in ISO 8601 format)
-        @type timestamp: unicode
-        @param user: user who edited this revision
-        @type user: unicode
-        @param anon: user is unregistered
-        @type anon: bool
-        @param comment: edit comment text
-        @type comment: unicode
-        @param minor: edit flagged as minor
-        @type minor: bool
-
-        """
-        self.revid = revid
-        self.text = text
-        self.timestamp = timestamp
-        self.user = user
-        self.anon = anon
-        self.comment = comment
-        self.minor = minor
-
-
-class Link(object):
-    """A Mediawiki link (local or interwiki)
-
-    Has the following attributes:
-
-      - site:  The Site object for the wiki linked to
-      - namespace: The namespace of the page linked to (int)
-      - title: The title of the page linked to (unicode); does not include
-        namespace or section
-      - section: The section of the page linked to (unicode or None); this
-        contains any text following a '#' character in the title
-      - anchor: The anchor text (unicode or None); this contains any text
-        following a '|' character inside the link
-
-    """
-    illegal_titles_pattern = re.compile(
-        # Matching titles will be held as illegal.
-            u'''[^ %!"$&'()*,\-.\/0-9:;=?@A-Z\\^_`a-z~\u0080-\uFFFF+]'''
-            # URL percent encoding sequences interfere with the ability
-            # to round-trip titles -- you can't link to them consistently.
-            u'|%[0-9A-Fa-f]{2}' 
-            # XML/HTML character references produce similar issues.
-            u'|&[A-Za-z0-9\x80-\xff]+;'
-            u'|&#[0-9]+;'
-            u'|&#x[0-9A-Fa-f]+;'
-        )
-
-    def __init__(self, text, source=None, defaultNamespace=0):
-        """Constructor
-
-        @param text: the link text (everything appearing between [[ and ]]
-            on a wiki page)
-        @type text: unicode
-        @param source: the Site on which the link was found (not necessarily
-            the site to which the link refers)
-        @type source: Site
-        @param defaultNamespace: a namespace to use if the link does not
-            contain one (defaults to 0)
-        @type defaultNamespace: int
-
-        """
-        self._text = text
-        self._source = source
-        self._defaultns = defaultNamespace
-
-    def parse(self):
-        """Parse text; called internally when accessing attributes"""
-        
-        # First remove the anchor, which is stored unchanged, if there is one
-        if u"|" in self._text:
-            self._text, self._anchor = self._text.split(u"|", 1)
-        else:
-            self._anchor = None
-
-        if self._source is None:
-            self._source = pywikibot.Site()
-        self._site = self._source
-
-        # Clean up the name, it can come from anywhere.
-        # Convert HTML entities to unicode
-        t = html2unicode(self._text)
-
-        # Convert URL-encoded characters to unicode
-        t = url2unicode(t, site=self._site)
-
-        # Normalize unicode string to a NFC (composed) format to allow proper
-        # string comparisons. According to
-        # http://svn.wikimedia.org/viewvc/mediawiki/branches/REL1_6/phase3/includes/no...
-        # the mediawiki code normalizes everything to NFC, not NFKC (which
-        # might result in information loss).
-        t = unicodedata.normalize('NFC', t)
-
-        # This code was adapted from Title.php : secureAndSplit()
-        #
-        if u'\ufffd' in t:
-            raise pywikibot.Error("Title contains illegal char (\uFFFD)")
-        self._namespace = self._defaultns
-
-        # Replace underscores by spaces
-        t = t.replace(u"_", u" ")
-        # replace multiple spaces and underscores with a single space
-        while u"  " in t: t = t.replace(u"  ", u" ")
-        # Strip spaces at both ends
-        t = t.strip(" ")
-        # Remove left-to-right and right-to-left markers.
-        t = t.replace(u"\u200e", u"").replace(u"\u200f", u"")
-
-        firstPass = True
-        while u":" in t:
-            # Initial colon indicates main namespace rather than default
-            if t.startswith(u":"):
-                self._namespace = 0
-                # remove the colon but continue processing
-                # remove any subsequent whitespace
-                t = t.lstrip(u":").lstrip(u" ")
-                continue
-
-            fam = self._site.family
-            prefix = t[ :t.index(u":")].lower()
-            ns = self._site.ns_index(prefix)
-            if ns:
-                # Ordinary namespace
-                t = t[t.index(u":"): ].lstrip(u":").lstrip(u" ")
-                self._namespace = ns
-                break
-            if prefix in fam.langs.keys()\
-                   or prefix in fam.get_known_families(site=self._site):
-                # looks like an interwiki link
-                if not firstPass:
-                    # Can't make a local interwiki link to an interwiki link.
-                    raise pywikibot.Error(
-                          "Improperly formatted interwiki link '%s'"
-                          % self._text)
-                t = t[t.index(u":"): ].lstrip(u":").lstrip(u" ")
-                if prefix in fam.langs.keys():
-                    newsite = pywikibot.Site(prefix, fam)
-                else:
-                    otherlang = self._site.code
-                    familyName = fam.get_known_families(site=self._site)[prefix]
-                    if familyName in ['commons', 'meta']:
-                        otherlang = familyName
-                    try:
-                        newsite = pywikibot.Site(otherlang, familyName)
-                    except ValueError:
-                        raise pywikibot.Error("""\
-%s is not a local page on %s, and the %s family is
-not supported by PyWikiBot!"""
-                              % (title, self._site(), familyName))
-
-                # Redundant interwiki prefix to the local wiki
-                if newsite == self._site:
-                    if not t:
-                        # Can't have an empty self-link
-                        raise pywikibot.Error(
-                              "Invalid link title: '%s'" % self._text)
-                    firstPass = False
-                    continue
-                self._site = newsite
-            else:
-                break   # text before : doesn't match any known prefix
-
-        if u"#" in t:
-            t, sec = t.split(u'#', 1)
-            t, self._section = t.rstrip(), sec.lstrip()
-        else:
-            self._section = None
-
-        # Reject illegal characters.
-        m = Link.illegal_titles_pattern.search(t)
-        if m:
-            raise pywikibot.Error(
-                  u"Invalid title: contains illegal char(s) '%s'" % m.group(0))
-
-        # Pages with "/./" or "/../" appearing in the URLs will
-        # often be unreachable due to the way web browsers deal
-        #* with 'relative' URLs. Forbid them explicitly.
-
-        if u'.' in t and (
-                t == u'.' or t == u'..'
-                or t.startswith(u"./")
-                or t.startswith(u"../")
-                or u"/./" in t
-                or u"/../" in t
-                or t.endswith(u"/.")
-                or t.endswith(u"/..")
-        ):
-            raise pywikibot.Error(
-                  "Invalid title (contains . / combinations): '%s'"
-                        % self._text)
-
-        # Magic tilde sequences? Nu-uh!
-        if u"~~~" in t:
-            raise pywikibot.Error("Invalid title (contains ~~~): '%s'" % self._text)
-
-        if self._namespace != -1 and len(t) > 255:
-            raise pywikibot.Error("Invalid title (over 255 bytes): '%s'" % t)
-
-        if self._site.case() == 'first-letter':
-            t = t[:1].upper() + t[1:]
-
-        # Can't make a link to a namespace alone...
-        # "empty" local links can only be self-links
-        # with a fragment identifier.
-        if not t and self._site == self._source and self._namespace != 0:
-            raise ValueError("Invalid link (no page title): '%s'" % self._text)
-
-        self._title = t
-
-    # define attributes, to be evaluated lazily
-
-    @property
-    def site(self):
-        if not hasattr(self, "_site"):
-            self.parse()
-        return self._site
-
-    @property
-    def namespace(self):
-        if not hasattr(self, "_namespace"):
-            self.parse()
-        return self._namespace
-
-    @property
-    def title(self):
-        if not hasattr(self, "_title"):
-            self.parse()
-        return self._title
-
-    @property
-    def section(self):
-        if not hasattr(self, "_section"):
-            self.parse()
-        return self._section
-
-    @property
-    def anchor(self):
-        if not hasattr(self, "_anchor"):
-            self.parse()
-        return self._anchor
-
-    def astext(self, onsite=None):
-        """Return a text representation of the link.
-
-        @param onsite: if specified, present as a (possibly interwiki) link
-            from the given site; otherwise, present as an internal link on
-            the source site.
-
-        """
-        if onsite is None:
-            onsite = self.site
-        title = self.title
-        if self.namespace:
-            title = onsite.namespace(self.namespace) + ":" + title
-        if self.section:
-            title = title + "#" + self.section
-        if onsite == self.site:
-            return u'[[%s]]' % title
-        if onsite.family == self.site.family:
-            return u'[[%s:%s]]' % (self.site.code, title)
-        if self.site.family.name == self.site.code:
-            # use this form for sites like commons, where the
-            # code is the same as the family name
-            return u'[[%s:%s]]' % (self.site.code,
-                                   title)
-        return u'[[%s:%s:%s]]' % (self.site.family.name,
-                                  self.site.code,
-                                  title)
-
-    def __str__(self):
-        return self.astext()
-
-    def __cmp__(self, other):
-        """Test for equality and inequality of Link objects.
-
-        Link objects are "equal" if and only if they are on the same site
-        and have the same normalized title, including section if any.
-
-        Link objects are sortable by site, then namespace, then title.
-
-        """
-        if not isinstance(other, Link):
-            # especially, return -1 if other is None
-            return -1
-        if not self.site == other.site:
-            return cmp(self.site, other.site)
-        if self.namespace != other.namespace:
-            return cmp(self.namespace, other.namespace)
-        return cmp(self.title, other.title)
-
-
-# Utility functions for parsing page titles
-
-def html2unicode(text, ignore = []):
-    """Return text, replacing HTML entities by equivalent unicode characters."""
-    # This regular expression will match any decimal and hexadecimal entity and
-    # also entities that might be named entities.
-    entityR = re.compile(
-        r'&(#(?P<decimal>\d+)|#x(?P<hex>[0-9a-fA-F]+)|(?P<name>[A-Za-z]+));')
-    # These characters are Html-illegal, but sadly you *can* find some of
-    # these and converting them to unichr(decimal) is unsuitable
-    convertIllegalHtmlEntities = {
-        128 : 8364, # €
-        130 : 8218, # ‚
-        131 : 402,  # ƒ
-        132 : 8222, # „
-        133 : 8230, # …
-        134 : 8224, # †
-        135 : 8225, # ‡
-        136 : 710,  # ˆ
-        137 : 8240, # ‰
-        138 : 352,  # Š
-        139 : 8249, # ‹
-        140 : 338,  # Œ
-        142 : 381,  # Ž
-        145 : 8216, # ‘
-        146 : 8217, # ’
-        147 : 8220, # “
-        148 : 8221, # ”
-        149 : 8226, # •
-        150 : 8211, # –
-        151 : 8212, # —
-        152 : 732,  # ˜
-        153 : 8482, # ™
-        154 : 353,  # š
-        155 : 8250, # ›
-        156 : 339,  # œ
-        158 : 382,  # ž
-        159 : 376   # Ÿ
-    }
-    #ensuring that illegal &#129; &#141; and &#157, which have no known values,
-    #don't get converted to unichr(129), unichr(141) or unichr(157)
-    ignore = set(ignore) | set([129, 141, 157])
-    result = u''
-    i = 0
-    found = True
-    while found:
-        text = text[i:]
-        match = entityR.search(text)
-        if match:
-            unicodeCodepoint = None
-            if match.group('decimal'):
-                unicodeCodepoint = int(match.group('decimal'))
-            elif match.group('hex'):
-                unicodeCodepoint = int(match.group('hex'), 16)
-            elif match.group('name'):
-                name = match.group('name')
-                if htmlentitydefs.name2codepoint.has_key(name):
-                    # We found a known HTML entity.
-                    unicodeCodepoint = htmlentitydefs.name2codepoint[name]
-            result += text[:match.start()]
-            try:
-                unicodeCodepoint=convertIllegalHtmlEntities[unicodeCodepoint]
-            except KeyError:
-                pass
-            if unicodeCodepoint and unicodeCodepoint not in ignore:
-                result += unichr(unicodeCodepoint)
-            else:
-                # Leave the entity unchanged
-                result += text[match.start():match.end()]
-            i = match.end()
-        else:
-            result += text
-            found = False
-    return result
-
-def url2unicode(title, site, site2 = None):
-    """Convert url-encoded text to unicode using site's encoding.
-
-    If site2 is provided, try its encodings as well.  Uses the first encoding
-    that doesn't cause an error.
-
-    """
-    # create a list of all possible encodings for both hint sites
-    encList = [site.encoding()] + list(site.encodings())
-    if site2 and site2 <> site:
-        encList.append(site2.encoding())
-        encList += list(site2.encodings())
-    firstException = None
-    # try to handle all encodings (will probably retry utf-8)
-    for enc in encList:
-        try:
-            t = title.encode(enc)
-            t = urllib.unquote(t)
-            return unicode(t, enc)
-        except UnicodeError, ex:
-            if not firstException:
-                firstException = ex
-            pass
-    # Couldn't convert, raise the original exception
-    raise firstException
-
+# -*- coding: utf-8  -*-
+"""
+Objects representing various types of MediaWiki pages.
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+import pywikibot
+from pywikibot import deprecate_arg
+from pywikibot import config
+import pywikibot.site
+import pywikibot.textlib
+
+import htmlentitydefs
+import logging
+import re
+import sys
+import threading
+import unicodedata
+import urllib
+
+logger = logging.getLogger("wiki")
+
+reNamespace = re.compile("^(.+?) *: *(.*)$")
+
+
+class Page(object):
+    """Page: A MediaWiki page
+
+    This object only implements internally methods that do not require
+    reading from or writing to the wiki.  All other methods are delegated
+    to the Site object. 
+
+    """
+
+    @deprecate_arg("insite", None)
+    @deprecate_arg("defaultNamespace", None)
+    def __init__(self, source, title=u"", ns=0):
+        """Instantiate a Page object.
+
+        Three calling formats are supported:
+
+          - If the first argument is a Page, create a copy of that object.
+            This can be used to convert an existing Page into a subclass
+            object, such as Category or ImagePage.  (If the title is also
+            given as the second argument, creates a copy with that title;
+            this is used when pages are moved.)
+          - If the first argument is a Site, create a Page on that Site
+            using the second argument as the title (may include a section),
+            and the third as the namespace number. The namespace number is
+            mandatory, even if the title includes the namespace prefix. This
+            is the preferred syntax when using an already-normalized title
+            obtained from api.php or a database dump.  WARNING: may produce
+            invalid objects if page title isn't in normal form!
+          - If the first argument is a Link, create a Page from that link.
+            This is the preferred syntax when using a title scraped from
+            wikitext, URLs, or another non-normalized source.
+
+        @param source: the source of the page
+        @type source: Link, Page (or subclass), or Site
+        @param title: normalized title of the page; required if source is a
+            Site, ignored otherwise
+        @type title: unicode
+        @param ns: namespace number; required if source is a Site, ignored
+            otherwise
+        @type ns: int
+
+        """
+        if isinstance(source, pywikibot.site.BaseSite):
+            self._site = source
+            if ns not in source.namespaces():
+                raise pywikibot.Error(
+                      "Invalid namespace '%i' for site %s."
+                      % (ns, source.sitename()))
+            self._ns = ns
+            if ns and not title.startswith(source.namespace(ns)+u":"):
+                title = source.namespace(ns) + u":" + title
+            elif not ns and u":" in title:
+                pos = title.index(u':')
+                nsindex = source.ns_index(title[ :pos])
+                if nsindex:
+                    self._ns = nsindex
+            if u"#" in title:
+                title, self._section = title.split(u"#", 1)
+            else:
+                self._section = None
+            if not title:
+                raise pywikibot.Error(
+                      "Page object cannot be created from Site without title.")
+            self._title = title
+        elif isinstance(source, Page): 
+            # copy all of source's attributes to this object
+            self.__dict__ = source.__dict__
+            if title:
+                # overwrite title
+                if ":" in title:
+                    prefix = title[ :title.index(":")]
+                    self._ns = site.ns_index(prefix)
+                    if self._ns is None:
+                        self._ns = 0
+                    else:
+                        title = title[title.index(":")+1 : ].strip(" _")
+                        self._title = "%s:%s" % (
+                                         self.site().namespace(self._ns),
+                                         self._title)
+                else:
+                    self._ns = 0
+                if "#" in title:
+                    self._section = title[title.index("#") + 1 : ].strip(" _")
+                    title = title[ : title.index("#")].strip(" _")
+                self._title = title
+        elif isinstance(source, Link):
+            self._site = source.site
+            self._section = source.section
+            self._ns = source.namespace
+            self._title = source.title
+            # reassemble the canonical title from components
+            if self._ns:
+                self._title = "%s:%s" % (self.site().namespace(self._ns),
+                                         self._title)
+        else:
+            raise pywikibot.Error(
+                  "Invalid argument type '%s' in Page constructor: %s"
+                  % (type(source), source))
+        if self._section is not None:
+            self._title = self._title + "#" + self._section
+        self._revisions = {}
+
+    def site(self):
+        """Return the Site object for the wiki on which this Page resides."""
+        return self._site
+
+    def namespace(self):
+        """Return the number of the namespace of the page."""
+        return self._ns
+
+    @deprecate_arg("decode", None)
+    @deprecate_arg("savetitle", "asUrl")
+    def title(self, underscore=False, savetitle=False, withNamespace=True,
+              withSection=True, asUrl=False, asLink=False,
+              allowInterwiki=True, forceInterwiki=False, textlink=False,
+              as_filename=False):
+        """Return the title of this Page, as a Unicode string.
+
+        @param underscore: if true, replace all ' ' characters with '_'
+        @param withNamespace: if false, omit the namespace prefix
+        @param withSection: if false, omit the section
+        @param asUrl: if true, quote title as if in an URL
+        @param asLink: if true, return the title in the form of a wikilink
+        @param allowInterwiki: (only used if asLink is true) if true, format
+            the link as an interwiki link if necessary
+        @param forceInterwiki: (only used if asLink is true) if true, always
+            format the link as an interwiki link
+        @param textlink: (only used if asLink is true) if true, place a ':'
+            before Category: and Image: links
+        @param as_filename: if true, replace any characters that are unsafe
+            in filenames
+
+        """
+        title = self._title
+        if not withNamespace and self._ns != 0:
+            title = title.split(u':', 1)[1]
+        if not withSection and self._section:
+            title = title.split(u'#', 1)[0]
+        if underscore or asUrl:
+            title = title.replace(u' ', u'_')
+        if asUrl:
+            encodedTitle = title.encode(self.site().encoding())
+            title = urllib.quote(encodedTitle)
+        if asLink:
+            if forceInterwiki or (allowInterwiki and
+                    (self.site().family.name != config.family
+                     or self.site().code != config.mylang)):
+                if self.site().family.name != config.family \
+                        and self.site().family.name != self.site().code:
+                    return u'[[%s:%s:%s]]' % (self.site().family.name,
+                                              self.site().code,
+                                              self._title)
+                else:
+                    # use this form for sites like commons, where the
+                    # code is the same as the family name
+                    return u'[[%s:%s]]' % (self.site().code,
+                                           self._title)
+            elif textlink and (self.isImage() or self.isCategory()):
+                    return u'[[:%s]]' % title
+            else:
+                return u'[[%s]]' % title
+        if as_filename:
+            # Replace characters that are not possible in file names on some
+            # systems.
+            # Spaces are possible on most systems, but are bad for URLs.
+            for forbidden in ':*?/\ ':
+                title = title.replace(forbidden, '_')
+        return title
+
+    @deprecate_arg("decode", None)
+    @deprecate_arg("underscore", None)
+    def section(self):
+        """Return the name of the section this Page refers to.
+
+        The section is the part of the title following a '#' character, if
+        any. If no section is present, return None.
+
+        """
+        if self._section:
+            return self._section
+        else:
+            return None
+
+    def __str__(self):
+        """Return a console representation of the pagelink."""
+        return self.title(asLink=True, forceInterwiki=True
+                          ).encode(sys.stderr.encoding)
+
+    def __unicode__(self):
+        return self.title(asLink=True, forceInterwiki=True)
+
+    def __repr__(self):
+        """Return a more complete string representation."""
+        return u"%s(%s)" % (self.__class__.__name__,
+                            self.title().encode(sys.stderr.encoding))
+
+    def __cmp__(self, other):
+        """Test for equality and inequality of Page objects.
+
+        Page objects are "equal" if and only if they are on the same site
+        and have the same normalized title, including section if any.
+
+        Page objects are sortable by namespace first, then by title.
+
+        """
+        if not isinstance(other, Page):
+            # especially, return -1 if other is None
+            return -1
+        if not self.site() == other.site():
+            return cmp(self.site(), other.site())
+        if self.namespace() != other.namespace():
+            return cmp(self.namespace(), other.namespace())
+        owntitle = self.title(withNamespace=False)
+        othertitle = other.title(withNamespace=False)
+        return cmp(owntitle, othertitle)
+
+    def __hash__(self):
+        # Pseudo method that makes it possible to store Page objects as keys
+        # in hash-tables. This relies on the fact that the string
+        # representation of an instance can not change after the construction.
+        return hash(unicode(self))
+
+    def autoFormat(self):
+        """Return L{date.autoFormat} dictName and value, if any.
+
+        Value can be a year, date, etc., and dictName is 'YearBC',
+        'Year_December', or another dictionary name. Please note that two
+        entries may have exactly the same autoFormat, but be in two
+        different namespaces, as some sites have categories with the
+        same names. Regular titles return (None, None).
+
+        """
+        if not hasattr(self, '_autoFormat'):
+            from pywikibot import date
+            self._autoFormat = date.getAutoFormat(
+                                        self.site().code,
+                                        self.title(withNamespace=False)
+                                    )
+        return self._autoFormat
+
+    def isAutoTitle(self):
+        """Return True if title of this Page is in the autoFormat dictionary."""
+        return self.autoFormat()[0] is not None
+
+    @deprecate_arg("throttle", None)
+    @deprecate_arg("nofollow_redirects", None)
+    @deprecate_arg("change_edit_time", None)
+    def get(self, force=False, get_redirect=False, sysop=False):
+        """Return the wiki-text of the page.
+
+        This will retrieve the page from the server if it has not been
+        retrieved yet, or if force is True. This can raise the following
+        exceptions that should be caught by the calling code:
+
+          - NoPage: The page does not exist
+          - IsRedirectPage: The page is a redirect. The argument of the
+                exception is the title of the page it redirects to.
+          - SectionError: The section does not exist on a page with a #
+                link
+
+        @param force: reload all page attributes, including errors.
+        @param get_redirect: return the redirect text, do not follow the
+            redirect, do not raise an exception.
+        @param sysop: if the user has a sysop account, use it to retrieve
+            this page
+
+        """
+        if force:
+            # When forcing, we retry the page no matter what. Old exceptions
+            # do not apply any more.
+            for attr in ['_redirarg', '_getexception']:
+                if hasattr(self, attr):
+                    delattr(self,attr)
+        else:
+            # Make sure we re-raise an exception we got on an earlier attempt
+            if hasattr(self, '_redirarg') and not get_redirect:
+                raise pywikibot.IsRedirectPage, self._redirarg
+            elif hasattr(self, '_getexception'):
+                raise self._getexception
+        if force or not hasattr(self, "_revid") \
+                 or not self._revid in self._revisions \
+                 or self._revisions[self._revid].text is None:
+            self.site().loadrevisions(self, getText=True, sysop=sysop)
+            # TODO: Exception handling for no-page, redirects, etc.
+
+        return self._revisions[self._revid].text
+
+    @deprecate_arg("throttle", None)
+    @deprecate_arg("nofollow_redirects", None)
+    @deprecate_arg("change_edit_time", None)
+    def getOldVersion(self, oldid, force=False, get_redirect=False,
+                      sysop=False):
+        """Return text of an old revision of this page; same options as get().
+
+        @param oldid: The revid of the revision desired.
+
+        """
+        if force or not oldid in self._revisions \
+                or self._revisions[oldid].text is None:
+            self.site().loadrevisions(self, getText=True, revids=oldid,
+                                      sysop=sysop)
+        # TODO: what about redirects, errors?
+        return self._revisions[oldid].text
+
+    def permalink(self):
+        """Return the permalink URL for current revision of this page."""
+        return "%s://%s/%sindex.php?title=%s&oldid=%s" \
+               % (self.site().protocol(),
+                  self.site().hostname(),
+                  self.site().scriptpath(),
+                  self.title(asUrl=True),
+                  self.latestRevision())
+
+    def latestRevision(self):
+        """Return the current revision id for this page."""
+        if not hasattr(self, '_revid'):
+            self.site().loadrevisions(self)
+        return self._revid
+
+    def _textgetter(self):
+        """Return the current (edited) wikitext, loading it if necessary."""
+        if not hasattr(self, '_text') or self._text is None:
+            try:
+                self._text = self.get()
+            except pywikibot.NoPage:
+                # TODO: what other exceptions might be returned?
+                self._text = u""
+        return self._text
+
+    def _textsetter(self, value):
+        """Update the edited wikitext"""
+        self._text = unicode(value)
+
+    def _cleartext(self):
+        """Delete the edited wikitext"""
+        if hasattr(self, "_text"):
+            del self._text
+
+    text = property(_textgetter, _textsetter, _cleartext,
+                    "The edited wikitext (unicode) of this Page")
+
+    def expand_text(self):
+        """Return the page text with all templates expanded."""
+        req = pywikibot.data.api.Request(action="expandtemplates",
+                                         text=self.text,
+                                         title=self.title(withSection=False),
+                                         site=self.site())
+        result = req.submit()
+        return result["expandtemplates"]["*"]
+
+    def userName(self):
+        """Return name or IP address of last user to edit page."""
+        return self._revisions[self.latestRevision()].user
+
+    def isIpEdit(self):
+        """Return True if last editor was unregistered."""
+        return self._revisions[self.latestRevision()].anon
+
+    def editTime(self):
+        """Return timestamp (in ISO 8601 format) of last revision to page."""
+        return self._revisions[self.latestRevision()].timestamp
+
+    def previousRevision(self):
+        """Return the revision id for the previous revision of this Page."""
+        vh = self.getVersionHistory(revCount=2)
+        revkey = sorted(self._revisions.keys(), reverse=True)[1]
+        return revkey
+
+    def exists(self):
+        """Return True if page exists on the wiki, even if it's a redirect.
+
+        If the title includes a section, return False if this section isn't
+        found.
+
+        """
+        return self.site().page_exists(self)
+
+    def isRedirectPage(self):
+        """Return True if this is a redirect, False if not or not existing."""
+        return self.site().page_isredirect(self)
+
+    def isEmpty(self):
+        """Return True if the page text has less than 4 characters.
+
+        Character count ignores language links and category links.
+        Can raise the same exceptions as get().
+
+        """
+        txt = self.get()
+        txt = pywikibot.textlib.removeLanguageLinks(txt, site = self.site())
+        txt = pywikibot.textlib.removeCategoryLinks(txt, site = self.site())
+        if len(txt) < 4:
+            return True
+        else:
+            return False
+
+    def isTalkPage(self):
+        """Return True if this page is in any talk namespace."""
+        ns = self.namespace()
+        return ns >= 0 and ns % 2 == 1
+
+    def toggleTalkPage(self):
+        """Return other member of the article-talk page pair for this Page.
+
+        If self is a talk page, returns the associated content page;
+        otherwise, returns the associated talk page.  The returned page need
+        not actually exist on the wiki.
+        
+        Returns None if self is a special page.
+
+        """
+        ns = self.namespace()
+        if ns < 0: # Special page
+            return None
+        if self.isTalkPage():
+            if self.namespace() == 1:
+                return Page(self.site(), self.title(withNamespace=False))
+            else:
+                return Page(self.site(),
+                            self.site().namespace(ns - 1) + ':'
+                              + self.title(withNamespace=False))
+        else:
+            return Page(self.site(),
+                        self.site().namespace(ns + 1) + ':'
+                          + self.title(withNamespace=False))
+
+    def isCategory(self):
+        """Return True if the page is a Category, False otherwise."""
+        return self.namespace() == 14
+
+    def isImage(self):
+        """Return True if this is an image description page, False otherwise."""
+        return self.namespace() == 6
+
+    def isDisambig(self):
+        """Return True if this is a disambiguation page, False otherwise.
+
+        Relies on the presence of specific templates, identified in
+        the Family file or on a wiki page, to identify disambiguation
+        pages.
+
+        By default, loads a list of template names from the Family file;
+        if the value in the Family file is None, looks for the list on
+        [[MediaWiki:Disambiguationspage]].
+
+        """
+        if not hasattr(self, "_isDisambig"):
+            if not hasattr(self.site(), "_disambigtemplates"):
+                self.site()._disambigtemplates = \
+                                self.site().family.disambig(self.site().code)
+                if self.site()._disambigtemplates is None:
+                    try:
+                        disambigpages = Page(self.site(),
+                                             "MediaWiki:Disambiguationspage")
+                        self.site()._disambigtemplates = [
+                            link.title(withNamespace=False)
+                              for link in disambigpages.linkedPages()
+                              if link.namespace() == 10
+                        ]
+                    except NoPage:
+                        self.site()._disambigtemplates = ['Disambig']
+            for t in self.templates():
+                if t.title(withNamespace=False) in self.site()._disambigtemplates:
+                    self._isDisambig = True
+                    break
+            else:
+                self._isDisambig = False
+        return self._isDisambig
+
+    def getReferences(self, follow_redirects=True, withTemplateInclusion=True,
+                      onlyTemplateInclusion=False, redirectsOnly=False,
+                      namespaces=None):
+        """Return an iterator all pages that refer to or embed the page.
+
+        If you need a full list of referring pages, use 
+        C{pages = list(s.getReferences())}
+
+        @param follow_redirects: if True, also iterate pages that link to a
+            redirect pointing to the page.
+        @param withTemplateInclusion: if True, also iterate pages where self
+            is used as a template.
+        @param onlyTemplateInclusion: if True, only iterate pages where self
+            is used as a template.
+        @param redirectsOnly: if True, only iterate redirects to self.
+        @param namespaces: only iterate pages in these namespaces
+
+        """
+        # N.B.: this method intentionally overlaps with backlinks() and
+        # embeddedin(). Depending on the interface, it may be more efficient
+        # to implement those methods in the site interface and then combine
+        # the results for this method, or to implement this method and then
+        # split up the results for the others. 
+        return self.site().pagereferences(
+                           self, follow_redirects, redirectsOnly,
+                           withTemplateInclusion, onlyTemplateInclusion,
+                           namespaces)
+
+    def backlinks(self, followRedirects=True, filterRedirects=None,
+                  namespaces=None):
+        """Return an iterator for pages that link to this page.
+
+        @param followRedirects: if True, also iterate pages that link to a
+            redirect pointing to the page.
+        @param filterRedirects: if True, only iterate redirects; if False,
+            omit redirects; if None, do not filter
+        @param namespaces: only iterate pages in these namespaces
+
+        """
+        return self.site().pagebacklinks(self, followRedirects, filterRedirects,
+                                         namespaces)
+
+    def embeddedin(self, filter_redirects=None, namespaces=None):
+        """Return an iterator for pages that embed this page as a template.
+
+        @param filterRedirects: if True, only iterate redirects; if False,
+            omit redirects; if None, do not filter
+        @param namespaces: only iterate pages in these namespaces
+
+        """
+        return self.site().page_embeddedin(self, filter_redirects, namespaces)
+
+    def canBeEdited(self):
+        """Return bool indicating whether this page can be edited.
+
+        This returns True if and only if:
+          - page is unprotected, and bot has an account for this site, or
+          - page is protected, and bot has a sysop account for this site.
+
+        """
+        return self.site().page_can_be_edited(self)
+
+    def botMayEdit(self):
+        """Return True if this page allows bots to edit it.
+
+        This will be True if the page doesn't contain {{bots}} or
+        {{nobots}}, or it contains them and the active bot is allowed to
+        edit this page. (This method is only useful on those sites that
+        recognize the bot-exclusion protocol; on other sites, it will always
+        return True.)
+
+        The framework enforces this restriction by default. It is possible
+        to override this by setting ignore_bot_templates=True in
+        user_config.py, or using page.put(force=True).
+
+        """ # TODO: move this to Site object?
+        if config.ignore_bot_templates: #Check the "master ignore switch"
+            return True
+        try:
+            templates = self.templatesWithParams();
+        except (pywikibot.NoPage,
+                pywikibot.IsRedirectPage,
+                pywikibot.SectionError):
+            return True
+        for template in templates:
+            title = template[0].title(withNamespace=False)
+            if title == 'Nobots':
+                return False
+            elif title == 'Bots':
+                if len(template[1]) == 0:
+                    return True
+                else:
+                    (ttype, bots) = template[1][0].split('=', 1)
+                    bots = bots.split(',')
+                    if ttype == 'allow':
+                        if 'all' in bots or username in bots:
+                            return True
+                        else:
+                            return False
+                    if ttype == 'deny':
+                        if 'all' in bots or username in bots:
+                            return False
+                        else:
+                            return True
+        # no restricting template found
+        return True
+
+    def save(self, comment=None, watch=None, minor=True, force=False,
+             async=False, callback=None):
+        """Save the current contents of page's text to the wiki.
+
+        @param comment: The edit summary for the modification (optional, but
+            most wikis strongly encourage its use)
+        @type comment: unicode
+        @param watch: if True, add or if False, remove this Page to/from bot
+            user's watchlist; if None, leave watchlist status unchanged
+        @type watch: bool or None
+        @param minor: if True, mark this edit as minor
+        @type minor: bool
+        @param force: if True, ignore botMayEdit() setting
+        @type force: bool
+        @param async: if True, launch a separate thread to save
+            asynchronously
+        @param callback: a callable object that will be called after the
+            page put operation. This object must take two arguments: (1) a
+            Page object, and (2) an exception instance, which will be None
+            if the page was saved successfully. The callback is intended for
+            use by bots that need to keep track of which saves were
+            successful.
+
+        """
+        if not comment:
+            comment = pywikibot.default_comment # needs to be defined
+        if watch is None:
+            unwatch = False
+            watch = False
+        else:
+            unwatch = not watch
+        if not force and not self.botMayEdit:
+            raise pywikibot.PageNotSaved(
+                "Page %s not saved; editing restricted by {{bots}} template"
+                % self.title(asLink=True))
+        if async:
+            thd = threading.Thread(
+                      target=self._save,
+                      args=(comment, minor, watch, unwatch, callback)
+                  )
+            pywikibot.threadpool.append(thd)
+            thd.start()
+        else:
+            self._save(comment, minor, watch, unwatch, callback)
+
+    def _save(self, comment, minor, watch, unwatch, callback):
+        err = None
+        try:
+            done = self.site().editpage(self, summary=comment, minor=minor,
+                                        watch=watch, unwatch=unwatch)
+            if not done:
+                logger.warn("Page %s not saved" % self.title(asLink=True))
+            else:
+                logger.info("Page %s saved" % self.title(asLink=True))
+        except pywikibot.Error, err:
+            logger.exception("Error saving page %s" % self.title(asLink=True))
+        if callback:
+            callback(self, err)
+
+    def put(self, newtext, comment=u'', watchArticle=None, minorEdit=True,
+            force=False, async=False, callback=None):
+        """Save the page with the contents of the first argument as the text.
+
+        This method is maintained primarily for backwards-compatibility.
+        For new code, using Page.save() is preferred.  See save() method
+        docs for all parameters not listed here.
+
+        @param newtext: The complete text of the revised page.
+        @type newtext: unicode
+
+        """
+        self.text = newtext
+        return self.save(comment, watchArticle, minorEdit, force,
+                         async, callback)
+
+    def put_async(self, newtext, comment=u'', watchArticle=None,
+                  minorEdit=True, force=False, callback=None):
+        """Put page on queue to be saved to wiki asynchronously.
+
+        Asynchronous version of put (takes the same arguments), which places
+        pages on a queue to be saved by a daemon thread. All arguments are
+        the same as for .put().  This version is maintained solely for
+        backwards-compatibility.
+
+        """
+        return self.put(self, newtext, comment, watchArticle,
+                        minorEdit, force, callback, async=True)
+
+    def linkedPages(self):
+        """Iterate Pages that this Page links to.
+
+        Only returns pages from "normal" internal links. Image and category
+        links are omitted unless prefixed with ":". Embedded templates are
+        omitted (but links within them are returned). All interwiki and
+        external links are omitted.
+
+        @return: a generator that yields Page objects.
+
+        """
+        return self.site().pagelinks(self)
+
+    def interwiki(self, expand=True):
+        """Iterate interwiki links in the page text, excluding language links.
+
+        @param expand: if True (default), include interwiki links found in
+            templates transcluded onto this page; if False, only iterate
+            interwiki links found in this page's own wikitext
+        @return: a generator that yields Link objects
+
+        """
+        # This function does not exist in the API, so it has to be
+        # implemented by screen-scraping
+        if expand:
+            text = self.expand_text()
+        else:
+            text = self.text
+        for linkmatch in pywikibot.link_regex.finditer(
+                            pywikibot.textlib.removeDisabledParts(text)):
+            linktitle = linkmatch.group("title")
+            link = Link(linktitle, self.site())
+            # only yield links that are to a different site and that
+            # are not language links
+            try:
+                if link.site != self.site():
+                    if linktitle.lstrip().startswith(":"):
+                        # initial ":" indicates not a language link
+                        yield link
+                    elif link.site.family != self.site().family:
+                        # link to a different family is not a language link
+                        yield link
+            except pywikibot.Error:
+                # ignore any links with invalid contents
+                continue
+
+    def langlinks(self):
+        """Iterate all interlanguage links on this page.
+
+        @return: a generator that yields Link objects.
+
+        """
+        return self.site().pagelanglinks(self)
+
+    @deprecate_arg("followRedirects", None)
+    @deprecate_arg("loose", None)
+    def imagelinks(self, followRedirects=None, loose=None):
+        """Iterate ImagePage objects for images displayed on this Page.
+
+        @return: a generator that yields ImagePage objects.
+
+        """
+        return self.site().pageimages(self)
+
+    def templates(self):
+        """Iterate Page objects for templates used on this Page.
+
+        Template parameters are ignored.  This method only returns embedded
+        templates, not template pages that happen to be referenced through
+        a normal link.
+
+        """
+        return self.site().pagetemplates(self)
+
+    def templatesWithParams(self):
+        """Iterate templates used on this Page.
+
+        @return: a generator that yields a tuple for each use of a template
+        in the page, with the template Page as the first entry and a list of
+        parameters as the second entry.
+
+        """
+        templates = pywikibot.textlib.extract_templates_and_params(self.text)
+        # backwards-compatibility: convert the dict returned as the second
+        # element into a list in the format used by old scripts
+        result = []
+        for template in templates:
+            args = template[1]
+            positional = []
+            named = {}
+            for key in sorted(args.keys()):
+                try:
+                    int(key)
+                except ValueError:
+                    named[key] = args[key]
+                else:
+                    positional.append(args[key])
+            for name in named:
+                positional.append("%s=%s" % (name, named[name]))
+            result.append((pywikibot.Page(
+                             pywikibot.Link(template[0], self.site())),
+                           positional))
+        return result
+
+    @deprecate_arg("nofollow_redirects", None)
+    def categories(self, withSortKey=False):
+        """Iterate categories that the article is in.
+
+        @param withSortKey: if True, include the sort key in each Category.
+        @return: a generator that yields Category objects.
+
+        """
+        return self.site().pagecategories(self, withSortKey=withSortKey)
+
+    def extlinks(self):
+        """Iterate all external URLs (not interwiki links) from this page.
+
+        @return: a generator that yields unicode objects containing URLs.
+
+        """
+        return self.site().page_extlinks(self)
+
+    def getRedirectTarget(self):
+        """Return a Page object for the target this Page redirects to.
+
+        If this page is not a redirect page, will raise an IsNotRedirectPage
+        exception. This method also can raise a NoPage exception.
+
+        """
+        if not self.isRedirectPage():
+            raise pywikibot.IsNotRedirectPage
+        if not isinstance(self._redir, Page):
+            self.site().getredirtarget(self)
+        return self._redir
+
+    @deprecate_arg("forceReload", None)
+    def getVersionHistory(self, reverseOrder=False, getAll=False,
+                          revCount=500):
+        """Load the version history page and return history information.
+
+        Return value is a list of tuples, where each tuple represents one
+        edit and is built of revision id, edit date/time, user name, and
+        edit summary. Starts with the most current revision, unless
+        reverseOrder is True. Defaults to getting the first revCount edits,
+        unless getAll is True.
+
+        """
+        if getAll:
+            limit = None
+        else:
+            limit = revCount
+        self.site().loadrevisions(self, getText=False, rvdir=reverseOrder,
+                                  limit=limit)
+        if getAll:
+            revCount = len(self._revisions)
+        return [ ( self._revisions[rev].revid,
+                   self._revisions[rev].timestamp,
+                   self._revisions[rev].user,
+                   self._revisions[rev].comment
+                 ) for rev in sorted(self._revisions.keys(),
+                                     reverse=not reverseOrder)[ : revCount]
+               ]
+
+    def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
+                               getAll=False, revCount=500):
+        """Return the version history as a wiki table."""
+        result = '{| border="1"\n'
+        result += '! oldid || date/time || username || edit summary\n'
+        for oldid, time, username, summary \
+                in self.getVersionHistory(forceReload=forceReload,
+                                          reverseOrder=reverseOrder,
+                                          getAll=getAll, revCount=revCount):
+            result += '|----\n'
+            result += '| %s || %s || %s || <nowiki>%s</nowiki>\n'\
+                      % (oldid, time, username, summary)
+        result += '|}\n'
+        return result
+
+    def fullVersionHistory(self):
+        """Iterate all previous versions including wikitext.
+
+        @return: A generator that yields tuples consisting of revision ID,
+            edit date/time, user name and content
+        """
+        return self.site().loadrevisions(self, withText=True)
+
+    def contributingUsers(self):
+        """Return a set of usernames (or IPs) of users who edited this page."""
+        edits = self.getVersionHistory()
+        users = set([edit[2] for edit in edits])
+        return users
+
+    @deprecate_arg("throttle", None)
+    def move(self, newtitle, reason=None, movetalkpage=True, sysop=False,
+             deleteAndMove=False, safe=True):
+        """Move this page to a new title.
+
+        @param newtitle: The new page title.
+        @param reason: The edit summary for the move.
+        @param movetalkpage: If true, move this page's talk page (if it exists)
+        @param sysop: Try to move using sysop account, if available
+        @param deleteAndMove: if move succeeds, delete the old page
+            (usually requires sysop privileges, depending on wiki settings)
+        @param safe: If false, attempt to delete existing page at newtitle
+            (if there is one) and then move this page to that title
+
+        """
+        if reason is None:
+            logger.info(u'Moving %s to [[%s]].'
+                             % (self.title(asLink=True), newtitle))
+            reason = pywikibot.input(u'Please enter a reason for the move:')
+        # TODO: implement "safe" parameter
+        # TODO: implement "sysop" parameter
+        return self.site().movepage(self, newtitle, reason,
+                                    movetalk=movetalkpage,
+                                    noredirect=deleteAndMove)
+
+    @deprecate_arg("throttle", None)
+    def delete(self, reason=None, prompt=True, throttle=None, mark=False):
+        """Deletes the page from the wiki. Requires administrator status.
+
+        @param reason: The edit summary for the deletion.
+        @param prompt: If true, prompt user for confirmation before deleting.
+        @param mark: if true, and user does not have sysop rights, place a
+            speedy-deletion request on the page instead.
+
+        """
+        if reason is None:
+            logger.info(u'Deleting %s.' % (self.title(asLink=True)))
+            reason = pywikibot.input(u'Please enter a reason for the deletion:')
+        answer = u'y'
+        if prompt and not hasattr(self.site(), '_noDeletePrompt'):
+            answer = pywikibot.inputChoice(u'Do you want to delete %s?'
+                        % self.title(asLink = True, forceInterwiki = True),
+                                           ['Yes', 'No', 'All'],
+                                           ['Y', 'N', 'A'],
+                                           'N')
+            if answer in ['a', 'A']:
+                answer = 'y'
+                self.site()._noDeletePrompt = True
+        if answer in ['y', 'Y']:
+            return self.site().delete(self, reason, mark=mark)
+
+    def loadDeletedRevisions(self):
+        """Retrieve all deleted revisions for this Page from Special/Undelete.
+
+        Stores all revisions' timestamps, dates, editors and comments in
+        self._deletedRevs attribute.
+
+        @return: list of timestamps (which can be used to retrieve revisions
+            later on).
+
+        """
+        return self.site().loadDeletedRevisions(self)
+
+    def getDeletedRevision(self, timestamp, retrieveText=False):
+        """Return a particular deleted revision by timestamp.
+
+        @return: a list of [date, editor, comment, text, restoration
+            marker]. text will be None, unless retrieveText is True (or has
+            been retrieved earlier). If timestamp is not found, returns
+            None.
+
+        """
+        return self.site().getDeletedRevision(self, timestamp,
+                                              getText=retrieveText)
+
+    def markDeletedRevision(self, timestamp, undelete=True):
+        """Mark the revision identified by timestamp for undeletion.
+
+        @param undelete: if False, mark the revision to remain deleted.
+
+        """
+        if self._deletedRevs == None:
+            self.loadDeletedRevisions()
+        if not self._deletedRevs.has_key(timestamp):
+            #TODO: Throw an exception?
+            return None
+        self._deletedRevs[timestamp][4] = undelete
+        self._deletedRevsModified = True
+
+    @deprecate_arg("throttle", None)
+    def undelete(self, comment=None):
+        """Undelete revisions based on the markers set by previous calls.
+
+        If no calls have been made since loadDeletedRevisions(), everything
+        will be restored.
+
+        Simplest case::
+            Page(...).undelete('This will restore all revisions')
+
+        More complex::
+            pg = Page(...)
+            revs = pg.loadDeletedRevsions()
+            for rev in revs:
+                if ... #decide whether to undelete a revision
+                    pg.markDeletedRevision(rev) #mark for undeletion
+            pg.undelete('This will restore only selected revisions.')
+
+        @param comment: The undeletion edit summary.
+
+        """
+        if comment is None:
+            logger.info(u'Preparing to undelete %s.'
+                             % (self.title(asLink=True)))
+            comment = pywikibot.input(
+                        u'Please enter a reason for the undeletion:')
+        return self.site().undelete(self, comment)
+
+    @deprecate_arg("throttle", None)
+    def protect(self, edit='sysop', move='sysop', unprotect=False,
+                reason=None, prompt=True):
+        """(Un)protect a wiki page. Requires administrator status.
+
+        Valid protection levels (in MediaWiki 1.12) are '' (equivalent to
+        'none'), 'autoconfirmed', and 'sysop'.
+
+        @param edit: Level of edit protection
+        @param move: Level of move protection
+        @param unprotect: If true, unprotect the page (equivalent to setting
+            all protection levels to '')
+        @param reason: Edit summary.
+        @param prompt: If true, ask user for confirmation.
+
+        """
+        if reason is None:
+            if unprotect:
+                un = u'un'
+            else:
+                un = u''
+            logger.info(u'Preparing to %sprotect %s.'
+                             % (un, self.title(asLink=True)))
+            reason = pywikibot.input(u'Please enter a reason for the action:')
+        if unprotect:
+            edit = move = ""
+        answer = 'y'
+        if prompt and not hasattr(self.site(), '_noProtectPrompt'):
+            answer = pywikibot.inputChoice(
+                        u'Do you want to change the protection level of %s?'
+                          % self.title(asLink=True, forceInterwiki = True),
+                        ['Yes', 'No', 'All'], ['Y', 'N', 'A'], 'N')
+            if answer in ['a', 'A']:
+                answer = 'y'
+                self.site()._noProtectPrompt = True
+        if answer in ['y', 'Y']:
+            return self.site().protect(self, edit, move, reason)
+
+    def change_category(article, oldCat, newCat, comment=None, sortKey=None,
+                        inPlace=True):
+        """Remove page from oldCat and add it to newCat.
+
+        oldCat and newCat should be Category objects.
+        If newCat is None, the category will be removed.
+        
+        """ # TODO: document remaining arguments
+        cats = self.categories(get_redirect=True)
+        site = self.site()
+        changesMade = False
+
+        if not self.canBeEdited():
+            pywikibot.output(u"Can't edit %s, skipping it..."
+                              % self.title(asLink=True))
+            return False
+        if inPlace == True:
+            newtext = pywikibot.textlib.replaceCategoryInPlace(
+                                            self.text, oldCat, newCat)
+            if newtext == self.text:
+                pywikibot.output(
+                    u'No changes in made in page %s.'
+                     % self.title(asLink=True))
+                return False
+            try:
+                self.put(newtext, comment)
+                return True
+            except pywikibot.EditConflict:
+                pywikibot.output(
+                    u'Skipping %s because of edit conflict'
+                     % self.title(asLink=True))
+            except pywikibot.LockedPage:
+                pywikibot.output(u'Skipping locked page %s'
+                                  % self.title(asLink=True))
+            except pywikibot.SpamfilterError, error:
+                pywikibot.output(
+                    u'Changing page %s blocked by spam filter (URL=%s)'
+                                 % (self.title(asLink=True), error.url))
+            except pywikibot.NoUsername:
+                pywikibot.output(
+                    u"Page %s not saved; sysop privileges required."
+                                 % self.title(asLink=True))
+            except pywikibot.PageNotSaved, error:
+                pywikibot.output(u"Saving page %s failed: %s"
+                                 % (self.title(asLink=True), error.message))
+            return False
+
+        # This loop will replace all occurrences of the category to be changed,
+        # and remove duplicates.
+        newCatList = []
+        newCatSet = set()
+        for i in range(len(cats)):
+            cat = cats[i]
+            if cat == oldCat:
+                changesMade = True
+                if not sortKey:
+                    sortKey = cat.sortKey
+                if newCat:
+                    if newCat.title() not in newCatSet:
+                        newCategory = Category(site, newCat.title(),
+                                               sortKey=sortKey)
+                        newCatSet.add(newCat.title())
+                        newCatList.append(newCategory)
+            elif cat.title() not in newCatSet:
+                newCatSet.add(cat.title())
+                newCatList.append(cat)
+
+        if not changesMade:
+            pywikibot.output(u'ERROR: %s is not in category %s!'
+                              % (self.title(asLink=True), oldCat.title()))
+        else:
+            try:
+                text = pywikibot.textlib.replaceCategoryLinks(self.text,
+                                                              newCatList)
+            except ValueError:
+                # Make sure that the only way replaceCategoryLinks() can return
+                # a ValueError is in the case of interwiki links to self.
+                pywikibot.output(
+                        u'Skipping %s because of interwiki link to self' % self)
+            try:
+                self.put(text, comment)
+            except pywikibot.EditConflict:
+                pywikibot.output(
+                        u'Skipping %s because of edit conflict' % self.title())
+            except pywikibot.SpamfilterError, e:
+                pywikibot.output(
+                        u'Skipping %s because of blacklist entry %s'
+                        % (self.title(), e.url))
+            except pywikibot.LockedPage:
+                pywikibot.output(
+                        u'Skipping %s because page is locked' % self.title())
+            except pywikibot.PageNotSaved, error:
+                pywikibot.output(u"Saving page %s failed: %s"
+                                 % (self.title(asLink=True), error.message))
+
+######## DEPRECATED METHODS ########
+
+    def encoding(self):
+        """DEPRECATED: use Site.encoding() instead"""
+        logger.debug(u"Page.encoding() is deprecated; use Site.encoding().")
+        return self.site().encoding()
+
+    def titleWithoutNamespace(self, underscore=False):
+        """DEPRECATED: use self.title(withNamespace=False) instead."""
+        logger.debug(
+            u"Page.titleWithoutNamespace() method is deprecated.")
+        return self.title(underscore=underscore, withNamespace=False,
+                          withSection=False)
+
+    def titleForFilename(self):
+        """DEPRECATED: use self.title(as_filename=True) instead."""
+        logger.debug(
+            u"Page.titleForFilename() method is deprecated.")
+        return self.title(as_filename=True)
+
+    def sectionFreeTitle(self, underscore=False):
+        """DEPRECATED: use self.title(withSection=False) instead."""
+        logger.debug(
+            u"Page.sectionFreeTitle() method is deprecated.")
+        return self.title(underscore=underscore, withSection=False)
+
+    def aslink(self, forceInterwiki=False, textlink=False, noInterwiki=False):
+        """DEPRECATED: use self.title(asLink=True) instead."""
+        logger.debug(u"Page.aslink() method is deprecated.")
+        return self.title(asLink=True, forceInterwiki=forceInterwiki,
+                          allowInterwiki=not noInterwiki, textlink=textlink)
+
+    def urlname(self):
+        """Return the Page title encoded for use in an URL.
+
+        DEPRECATED: use self.title(asUrl=True) instead.
+
+        """
+        logger.debug(u"Page.urlname() method is deprecated.")
+        return self.title(asUrl=True)
+
+####### DISABLED METHODS (warnings provided) ######
+    # these methods are easily replaced by editing the page's text using
+    # textlib methods and then using put() on the result.
+
+    def removeImage(self, image, put=False, summary=None, safe=True):
+        """Old method to remove all instances of an image from page."""
+        logger.warning(u"Page.removeImage() is no longer supported.")
+
+    def replaceImage(self, image, replacement=None, put=False, summary=None,
+                     safe=True):
+        """Old method to replace all instances of an image with another."""
+        logger.warning(u"Page.replaceImage() is no longer supported.")
+
+
+class ImagePage(Page):
+    """A subclass of Page representing an image descriptor wiki page.
+
+    Supports the same interface as Page, with the following added methods:
+
+    getImagePageHtml          : Download image page and return raw HTML text.
+    fileURL                   : Return the URL for the image described on this
+                                page.
+    fileIsOnCommons           : Return True if image stored on Wikimedia
+                                Commons.
+    fileIsShared              : Return True if image stored on Wikitravel
+                                shared repository.
+    getFileMd5Sum             : Return image file's MD5 checksum.
+    getFileVersionHistory     : Return the image file's version history.
+    getFileVersionHistoryTable: Return the version history in the form of a
+                                wiki table.
+    usingPages                : Iterate Pages on which the image is displayed.
+
+    """
+    def __init__(self, source, title=u"", insite=None):
+        Page.__init__(self, source, title, 6)
+        if self.namespace() != 6:
+            raise ValueError(u"'%s' is not in the image namespace!" % title)
+
+    def getImagePageHtml(self):
+        """
+        Download the image page, and return the HTML, as a unicode string.
+
+        Caches the HTML code, so that if you run this method twice on the
+        same ImagePage object, the page will only be downloaded once.
+        """
+        if not hasattr(self, '_imagePageHtml'):
+            from pywikibot.data import http
+            path = "%s/index.php?title=%s" \
+                   % (self.site().scriptpath(), self.title(asUrl=True))
+            self._imagePageHtml = http.request(self.site(), path)
+        return self._imagePageHtml
+
+    def fileUrl(self):
+        """Return the URL for the image described on this page."""
+        # TODO add scaling option?
+        if not hasattr(self, '_imageinfo'):
+            self._imageinfo = self.site().getimageinfo(self) #FIXME
+        return self._imageinfo['url']
+
+    def fileIsOnCommons(self):
+        """Return True if the image is stored on Wikimedia Commons"""
+        return self.fileUrl().startswith(
+            'http://upload.wikimedia.org/wikipedia/commons/')
+
+    def fileIsShared(self):
+        """Return True if image is stored on any known shared repository."""
+        # as of now, the only known repositories are commons and wikitravel
+        if 'wikitravel_shared' in self.site().shared_image_repository():
+            return self.fileUrl().startswith(
+                u'http://wikitravel.org/upload/shared/')
+        return self.fileIsOnCommons()
+
+    def getFileMd5Sum(self):
+        """Return image file's MD5 checksum."""
+        logger.debug(
+            "ImagePage.getFileMd5Sum() is deprecated; use getFileSHA1Sum().")
+# FIXME: MD5 might be performed on incomplete file due to server disconnection
+# (see bug #1795683).
+        import md5, urllib
+        f = urllib.urlopen(self.fileUrl())
+        # TODO: check whether this needs a User-Agent header added
+        md5Checksum = md5.new(f.read()).hexdigest()
+        f.close()
+        return md5Checksum
+
+    def getFileSHA1Sum(self):
+        """Return image file's SHA1 checksum."""
+        if not hasattr(self, '_imageinfo'):
+            self._imageinfo = self.site().getimageinfo(self) #FIXME
+        return self._imageinfo['sha1']
+
+    def getFileVersionHistory(self):
+        """Return the image file's version history.
+
+        @return: An iterator yielding tuples containing (timestamp,
+            username, resolution, filesize, comment).
+
+        """
+        #TODO; return value may need to change
+        return self.site().getimageinfo(self, history=True) #FIXME
+
+    def getFileVersionHistoryTable(self):
+        """Return the version history in the form of a wiki table."""
+        lines = []
+        #TODO: if getFileVersionHistory changes, make sure this follows it
+        for (datetime, username, resolution, size, comment) \
+                in self.getFileVersionHistory():
+            lines.append('| %s || %s || %s || %s || <nowiki>%s</nowiki>' \
+                         % (datetime, username, resolution, size, comment))
+        return u'{| border="1"\n! date/time || username || resolution || size || edit summary\n|----\n' + u'\n|----\n'.join(lines) + '\n|}'
+
+    def usingPages(self):
+        """Yield Pages on which the image is displayed."""
+        return self.site().getimageusage(self)
+
+
+class Category(Page):
+    """A page in the Category: namespace"""
+
+    @deprecate_arg("sortKey", None)
+    def __init__(self, source, title=u"", insite=None):
+        """All parameters are the same as for Page() constructor.
+
+        """
+        Page.__init__(self, source, title, 14)
+        if self.namespace() != 14:
+            raise ValueError(u"'%s' is not in the category namespace!"
+                             % title)
+
+    @deprecate_arg("forceInterwiki", None)
+    @deprecate_arg("textlink", None)
+    @deprecate_arg("noInterwiki", None)
+    def aslink(self, sortKey=u''):
+        """Return a link to place a page in this Category.
+
+        Use this only to generate a "true" category link, not for interwikis
+        or text links to category pages.
+
+        @param sortKey: The sort key for the article to be placed in this
+            Category; if omitted, default sort key is used.
+        @type sortKey: (optional) unicode
+
+        """
+        if sortKey:
+            titleWithSortKey = '%s|%s' % (self.title(withSection=False),
+                                          self.sortKey)
+        else:
+            titleWithSortKey = self.title(withSection=False)
+        return '[[%s]]' % titleWithSortKey
+
+    @deprecate_arg("startFrom", None)
+    @deprecate_arg("cacheResults", None)
+    def subcategories(self, recurse=False):
+        """Iterate all subcategories of the current category.
+
+        @param recurse: if not False or 0, also iterate subcategories of
+            subcategories. If an int, limit recursion to this number of
+            levels. (Example: recurse=1 will iterate direct subcats and
+            first-level sub-sub-cats, but no deeper.)
+        @type recurse: int or bool
+
+        """
+        if not isinstance(recurse, bool) and recurse:
+            recurse = recurse - 1
+        if not hasattr(self, "_subcats"):
+            self._subcats = []
+            for member in self.site().categorymembers(self, namespaces=[14]):
+                subcat = Category(self.site(), member.title())
+                self._subcats.append(subcat)
+                yield subcat
+                if recurse:
+                    for item in subcat.subcategories(recurse):
+                        yield item
+        else:
+            for subcat in self._subcats:
+                yield subcat
+                if recurse:
+                    for item in subcat.subcategories(recurse):
+                        yield item
+
+    @deprecate_arg("startFrom", None)
+    def articles(self, recurse=False):
+        """
+        Yields all articles in the current category.
+
+        @param recurse: if not False or 0, also iterate articles in
+            subcategories. If an int, limit recursion to this number of
+            levels. (Example: recurse=1 will iterate articles in first-level
+            subcats, but no deeper.)
+        @type recurse: int or bool
+
+        """
+        namespaces = [x for x in self.site().namespaces().keys()
+                      if x>=0 and x!=14]
+        for member in self.site().categorymembers(self,
+                                                  namespaces=namespaces):
+            yield member
+        if recurse:
+            if not isinstance(recurse, bool) and recurse:
+                recurse = recurse - 1
+            for subcat in self.subcategories():
+                for article in subcat.articles(recurse):
+                    yield article
+
+    def isEmptyCategory(self):
+        """Return True if category has no members (including subcategories)."""
+        for member in self.site().categorymembers(self, limit=1):
+            return False
+        return True
+
+    def copyTo(self, catname):
+        """
+        Copy text of category page to a new page.  Does not move contents.
+
+        @param catname: New category title (without namespace)
+        @return: True if copying was successful, False if target page
+            already existed.
+
+        """
+        # This seems far too specialized to be in the top-level framework
+        catname = self.site().category_namespace() + ':' + catname
+        targetCat = Category(self.site(), catname)
+        if targetCat.exists():
+            logger.warn('Target page %s already exists!'
+                             % targetCat.title())
+            return False
+        else:
+            logger.info('Moving text from %s to %s.'
+                             % (self.title(), targetCat.title()))
+            authors = ', '.join(self.contributingUsers())
+            creationSummary = pywikibot.translate(
+                                  self.site(), msg_created_for_renaming
+                              ) % (self.title(), authors)
+            targetCat.put(self.get(), creationSummary)
+            return True
+
+    def copyAndKeep(self, catname, cfdTemplates):
+        """Copy partial category page text (not contents) to a new title.
+
+        Like copyTo above, except this removes a list of templates (like
+        deletion templates) that appear in the old category text.  It also
+        removes all text between the two HTML comments BEGIN CFD TEMPLATE
+        and END CFD TEMPLATE. (This is to deal with CFD templates that are
+        substituted.)
+
+        Returns true if copying was successful, false if target page already
+        existed.
+
+        @param catname: New category title (without namespace)
+        @param cfdTemplates: A list (or iterator) of templates to be removed
+            from the page text
+        @return: True if copying was successful, False if target page
+            already existed.
+
+        """
+        # I don't see why we need this as part of the framework either
+        catname = self.site().category_namespace() + ':' + catname
+        targetCat = Category(self.site(), catname)
+        if targetCat.exists():
+            logger.warn('Target page %s already exists!'
+                             % targetCat.title())
+            return False
+        else:
+            logger.info('Moving text from %s to %s.'
+                             % (self.title(), targetCat.title()))
+            authors = ', '.join(self.contributingUsers())
+            creationSummary = pywikibot.translate(
+                                  self.site(), msg_created_for_renaming
+                              ) % (self.title(), authors)
+            newtext = self.get()
+        for regexName in cfdTemplates:
+            matchcfd = re.compile(r"{{%s.*?}}" % regexName, re.IGNORECASE)
+            newtext = matchcfd.sub('',newtext)
+            matchcomment = re.compile(
+                        r"<!--BEGIN CFD TEMPLATE-->.*?<!--END CFD TEMPLATE-->",
+                                      re.IGNORECASE | re.MULTILINE | re.DOTALL)
+            newtext = matchcomment.sub('', newtext)
+            pos = 0
+            while (newtext[pos:pos+1] == "\n"):
+                pos = pos + 1
+            newtext = newtext[pos:]
+            targetCat.put(newtext, creationSummary)
+            return True
+
+#### DEPRECATED METHODS ####
+    def subcategoriesList(self, recurse=False):
+        """DEPRECATED: Equivalent to list(self.subcategories(...))"""
+        logger.debug("Category.subcategoriesList() method is deprecated.")
+        return sorted(list(set(self.subcategories(recurse))))
+
+    def articlesList(self, recurse=False):
+        """DEPRECATED: equivalent to list(self.articles(...))"""
+        logger.debug("Category.articlesList() method is deprecated.")
+        return sorted(list(set(self.articles(recurse))))
+
+    def supercategories(self):
+        """DEPRECATED: equivalent to self.categories()"""
+        logger.debug("Category.supercategories() method is deprecated.")
+        return self.categories()
+
+    def supercategoriesList(self):
+        """DEPRECATED: equivalent to list(self.categories(...))"""
+        logger.debug("Category.articlesList() method is deprecated.")
+        return sorted(list(set(self.categories())))
+
+
+class Revision(object):
+    """A structure holding information about a single revision of a Page."""
+    def __init__(self, revid, timestamp, user, anon=False, comment=u"",
+                 text=None, minor=False):
+        """All parameters correspond to object attributes (e.g., revid
+        parameter is stored as self.revid)
+
+        @param revid: Revision id number
+        @type revid: int
+        @param text: Revision wikitext.
+        @type text: unicode, or None if text not yet retrieved
+        @param timestamp: Revision time stamp (in ISO 8601 format)
+        @type timestamp: unicode
+        @param user: user who edited this revision
+        @type user: unicode
+        @param anon: user is unregistered
+        @type anon: bool
+        @param comment: edit comment text
+        @type comment: unicode
+        @param minor: edit flagged as minor
+        @type minor: bool
+
+        """
+        self.revid = revid
+        self.text = text
+        self.timestamp = timestamp
+        self.user = user
+        self.anon = anon
+        self.comment = comment
+        self.minor = minor
+
+
+class Link(object):
+    """A Mediawiki link (local or interwiki)
+
+    Has the following attributes:
+
+      - site:  The Site object for the wiki linked to
+      - namespace: The namespace of the page linked to (int)
+      - title: The title of the page linked to (unicode); does not include
+        namespace or section
+      - section: The section of the page linked to (unicode or None); this
+        contains any text following a '#' character in the title
+      - anchor: The anchor text (unicode or None); this contains any text
+        following a '|' character inside the link
+
+    """
+    illegal_titles_pattern = re.compile(
+        # Matching titles will be held as illegal.
+            u'''[^ %!"$&'()*,\-.\/0-9:;=?@A-Z\\^_`a-z~\u0080-\uFFFF+]'''
+            # URL percent encoding sequences interfere with the ability
+            # to round-trip titles -- you can't link to them consistently.
+            u'|%[0-9A-Fa-f]{2}' 
+            # XML/HTML character references produce similar issues.
+            u'|&[A-Za-z0-9\x80-\xff]+;'
+            u'|&#[0-9]+;'
+            u'|&#x[0-9A-Fa-f]+;'
+        )
+
+    def __init__(self, text, source=None, defaultNamespace=0):
+        """Constructor
+
+        @param text: the link text (everything appearing between [[ and ]]
+            on a wiki page)
+        @type text: unicode
+        @param source: the Site on which the link was found (not necessarily
+            the site to which the link refers)
+        @type source: Site
+        @param defaultNamespace: a namespace to use if the link does not
+            contain one (defaults to 0)
+        @type defaultNamespace: int
+
+        """
+        self._text = text
+        self._source = source
+        self._defaultns = defaultNamespace
+
+    def parse(self):
+        """Parse text; called internally when accessing attributes"""
+        
+        # First remove the anchor, which is stored unchanged, if there is one
+        if u"|" in self._text:
+            self._text, self._anchor = self._text.split(u"|", 1)
+        else:
+            self._anchor = None
+
+        if self._source is None:
+            self._source = pywikibot.Site()
+        self._site = self._source
+
+        # Clean up the name, it can come from anywhere.
+        # Convert HTML entities to unicode
+        t = html2unicode(self._text)
+
+        # Convert URL-encoded characters to unicode
+        t = url2unicode(t, site=self._site)
+
+        # Normalize unicode string to a NFC (composed) format to allow proper
+        # string comparisons. According to
+        # http://svn.wikimedia.org/viewvc/mediawiki/branches/REL1_6/phase3/includes/no...
+        # the mediawiki code normalizes everything to NFC, not NFKC (which
+        # might result in information loss).
+        t = unicodedata.normalize('NFC', t)
+
+        # This code was adapted from Title.php : secureAndSplit()
+        #
+        if u'\ufffd' in t:
+            raise pywikibot.Error("Title contains illegal char (\uFFFD)")
+        self._namespace = self._defaultns
+
+        # Replace underscores by spaces
+        t = t.replace(u"_", u" ")
+        # replace multiple spaces and underscores with a single space
+        while u"  " in t: t = t.replace(u"  ", u" ")
+        # Strip spaces at both ends
+        t = t.strip(" ")
+        # Remove left-to-right and right-to-left markers.
+        t = t.replace(u"\u200e", u"").replace(u"\u200f", u"")
+
+        firstPass = True
+        while u":" in t:
+            # Initial colon indicates main namespace rather than default
+            if t.startswith(u":"):
+                self._namespace = 0
+                # remove the colon but continue processing
+                # remove any subsequent whitespace
+                t = t.lstrip(u":").lstrip(u" ")
+                continue
+
+            fam = self._site.family
+            prefix = t[ :t.index(u":")].lower()
+            ns = self._site.ns_index(prefix)
+            if ns:
+                # Ordinary namespace
+                t = t[t.index(u":"): ].lstrip(u":").lstrip(u" ")
+                self._namespace = ns
+                break
+            if prefix in fam.langs.keys()\
+                   or prefix in fam.get_known_families(site=self._site):
+                # looks like an interwiki link
+                if not firstPass:
+                    # Can't make a local interwiki link to an interwiki link.
+                    raise pywikibot.Error(
+                          "Improperly formatted interwiki link '%s'"
+                          % self._text)
+                t = t[t.index(u":"): ].lstrip(u":").lstrip(u" ")
+                if prefix in fam.langs.keys():
+                    newsite = pywikibot.Site(prefix, fam)
+                else:
+                    otherlang = self._site.code
+                    familyName = fam.get_known_families(site=self._site)[prefix]
+                    if familyName in ['commons', 'meta']:
+                        otherlang = familyName
+                    try:
+                        newsite = pywikibot.Site(otherlang, familyName)
+                    except ValueError:
+                        raise pywikibot.Error("""\
+%s is not a local page on %s, and the %s family is
+not supported by PyWikiBot!"""
+                              % (title, self._site(), familyName))
+
+                # Redundant interwiki prefix to the local wiki
+                if newsite == self._site:
+                    if not t:
+                        # Can't have an empty self-link
+                        raise pywikibot.Error(
+                              "Invalid link title: '%s'" % self._text)
+                    firstPass = False
+                    continue
+                self._site = newsite
+            else:
+                break   # text before : doesn't match any known prefix
+
+        if u"#" in t:
+            t, sec = t.split(u'#', 1)
+            t, self._section = t.rstrip(), sec.lstrip()
+        else:
+            self._section = None
+
+        # Reject illegal characters.
+        m = Link.illegal_titles_pattern.search(t)
+        if m:
+            raise pywikibot.Error(
+                  u"Invalid title: contains illegal char(s) '%s'" % m.group(0))
+
+        # Pages with "/./" or "/../" appearing in the URLs will
+        # often be unreachable due to the way web browsers deal
+        #* with 'relative' URLs. Forbid them explicitly.
+
+        if u'.' in t and (
+                t == u'.' or t == u'..'
+                or t.startswith(u"./")
+                or t.startswith(u"../")
+                or u"/./" in t
+                or u"/../" in t
+                or t.endswith(u"/.")
+                or t.endswith(u"/..")
+        ):
+            raise pywikibot.Error(
+                  "Invalid title (contains . / combinations): '%s'"
+                        % self._text)
+
+        # Magic tilde sequences? Nu-uh!
+        if u"~~~" in t:
+            raise pywikibot.Error("Invalid title (contains ~~~): '%s'" % self._text)
+
+        if self._namespace != -1 and len(t) > 255:
+            raise pywikibot.Error("Invalid title (over 255 bytes): '%s'" % t)
+
+        if self._site.case() == 'first-letter':
+            t = t[:1].upper() + t[1:]
+
+        # Can't make a link to a namespace alone...
+        # "empty" local links can only be self-links
+        # with a fragment identifier.
+        if not t and self._site == self._source and self._namespace != 0:
+            raise ValueError("Invalid link (no page title): '%s'" % self._text)
+
+        self._title = t
+
+    # define attributes, to be evaluated lazily
+
+    @property
+    def site(self):
+        if not hasattr(self, "_site"):
+            self.parse()
+        return self._site
+
+    @property
+    def namespace(self):
+        if not hasattr(self, "_namespace"):
+            self.parse()
+        return self._namespace
+
+    @property
+    def title(self):
+        if not hasattr(self, "_title"):
+            self.parse()
+        return self._title
+
+    @property
+    def section(self):
+        if not hasattr(self, "_section"):
+            self.parse()
+        return self._section
+
+    @property
+    def anchor(self):
+        if not hasattr(self, "_anchor"):
+            self.parse()
+        return self._anchor
+
+    def astext(self, onsite=None):
+        """Return a text representation of the link.
+
+        @param onsite: if specified, present as a (possibly interwiki) link
+            from the given site; otherwise, present as an internal link on
+            the source site.
+
+        """
+        if onsite is None:
+            onsite = self.site
+        title = self.title
+        if self.namespace:
+            title = onsite.namespace(self.namespace) + ":" + title
+        if self.section:
+            title = title + "#" + self.section
+        if onsite == self.site:
+            return u'[[%s]]' % title
+        if onsite.family == self.site.family:
+            return u'[[%s:%s]]' % (self.site.code, title)
+        if self.site.family.name == self.site.code:
+            # use this form for sites like commons, where the
+            # code is the same as the family name
+            return u'[[%s:%s]]' % (self.site.code,
+                                   title)
+        return u'[[%s:%s:%s]]' % (self.site.family.name,
+                                  self.site.code,
+                                  title)
+
+    def __str__(self):
+        return self.astext()
+
+    def __cmp__(self, other):
+        """Test for equality and inequality of Link objects.
+
+        Link objects are "equal" if and only if they are on the same site
+        and have the same normalized title, including section if any.
+
+        Link objects are sortable by site, then namespace, then title.
+
+        """
+        if not isinstance(other, Link):
+            # especially, return -1 if other is None
+            return -1
+        if not self.site == other.site:
+            return cmp(self.site, other.site)
+        if self.namespace != other.namespace:
+            return cmp(self.namespace, other.namespace)
+        return cmp(self.title, other.title)
+
+
+# Utility functions for parsing page titles
+
+def html2unicode(text, ignore = []):
+    """Return text, replacing HTML entities by equivalent unicode characters."""
+    # This regular expression will match any decimal and hexadecimal entity and
+    # also entities that might be named entities.
+    entityR = re.compile(
+        r'&(#(?P<decimal>\d+)|#x(?P<hex>[0-9a-fA-F]+)|(?P<name>[A-Za-z]+));')
+    # These characters are Html-illegal, but sadly you *can* find some of
+    # these and converting them to unichr(decimal) is unsuitable
+    convertIllegalHtmlEntities = {
+        128 : 8364, # €
+        130 : 8218, # ‚
+        131 : 402,  # ƒ
+        132 : 8222, # „
+        133 : 8230, # …
+        134 : 8224, # †
+        135 : 8225, # ‡
+        136 : 710,  # ˆ
+        137 : 8240, # ‰
+        138 : 352,  # Š
+        139 : 8249, # ‹
+        140 : 338,  # Œ
+        142 : 381,  # Ž
+        145 : 8216, # ‘
+        146 : 8217, # ’
+        147 : 8220, # “
+        148 : 8221, # ”
+        149 : 8226, # •
+        150 : 8211, # –
+        151 : 8212, # —
+        152 : 732,  # ˜
+        153 : 8482, # ™
+        154 : 353,  # š
+        155 : 8250, # ›
+        156 : 339,  # œ
+        158 : 382,  # ž
+        159 : 376   # Ÿ
+    }
+    #ensuring that illegal &#129; &#141; and &#157, which have no known values,
+    #don't get converted to unichr(129), unichr(141) or unichr(157)
+    ignore = set(ignore) | set([129, 141, 157])
+    result = u''
+    i = 0
+    found = True
+    while found:
+        text = text[i:]
+        match = entityR.search(text)
+        if match:
+            unicodeCodepoint = None
+            if match.group('decimal'):
+                unicodeCodepoint = int(match.group('decimal'))
+            elif match.group('hex'):
+                unicodeCodepoint = int(match.group('hex'), 16)
+            elif match.group('name'):
+                name = match.group('name')
+                if htmlentitydefs.name2codepoint.has_key(name):
+                    # We found a known HTML entity.
+                    unicodeCodepoint = htmlentitydefs.name2codepoint[name]
+            result += text[:match.start()]
+            try:
+                unicodeCodepoint=convertIllegalHtmlEntities[unicodeCodepoint]
+            except KeyError:
+                pass
+            if unicodeCodepoint and unicodeCodepoint not in ignore:
+                result += unichr(unicodeCodepoint)
+            else:
+                # Leave the entity unchanged
+                result += text[match.start():match.end()]
+            i = match.end()
+        else:
+            result += text
+            found = False
+    return result
+
+def url2unicode(title, site, site2 = None):
+    """Convert url-encoded text to unicode using site's encoding.
+
+    If site2 is provided, try its encodings as well.  Uses the first encoding
+    that doesn't cause an error.
+
+    """
+    # create a list of all possible encodings for both hint sites
+    encList = [site.encoding()] + list(site.encodings())
+    if site2 and site2 <> site:
+        encList.append(site2.encoding())
+        encList += list(site2.encodings())
+    firstException = None
+    # try to handle all encodings (will probably retry utf-8)
+    for enc in encList:
+        try:
+            t = title.encode(enc)
+            t = urllib.unquote(t)
+            return unicode(t, enc)
+        except UnicodeError, ex:
+            if not firstException:
+                firstException = ex
+            pass
+    # Couldn't convert, raise the original exception
+    raise firstException
+
Property changes on: branches/rewrite/pywikibot/page.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Added: svn:eol-style
   + native
Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py	2008-12-16 19:34:48 UTC (rev 6155)
+++ branches/rewrite/pywikibot/pagegenerators.py	2008-12-16 19:40:20 UTC (rev 6156)
@@ -1,965 +1,965 @@
-# -*- coding: utf-8  -*-
-"""This module offers a wide variety of page generators. A page generator is an
-object that is iterable (see http://www.python.org/dev/peps/pep-0255/ ) and
-that yields page objects on which other scripts can then work.
-
-In general, there is no need to run this script directly. It can, however,
-be run for testing purposes. It will then print the page titles to standard
-output.
-
-These parameters are supported to specify which pages titles to print:
-
-&params;
-"""
-#
-# (C) Pywikipedia bot team, 2008
-#
-# Distributed under the terms of the MIT license.
-#
-__version__ = '$Id: $'
-
-import pywikibot
-
-import itertools
-import Queue
-import re
-import sys
-import threading
-
-
-# ported from version 1 for backwards-compatibility
-# most of these functions just wrap a Site or Page method that returns
-# a generator
-
-parameterHelp = """\
--cat              Work on all pages which are in a specific category.
-                  Argument can also be given as "-cat:categoryname" or
-                  as "-cat:categoryname|fromtitle".
-
--catr             Like -cat, but also recursively includes pages in
-                  subcategories, sub-subcategories etc. of the
-                  given category.
-                  Argument can also be given as "-catr:categoryname" or
-                  as "-catr:categoryname|fromtitle".
-
--subcats          Work on all subcategories of a specific category.
-                  Argument can also be given as "-subcats:categoryname" or
-                  as "-subcats:categoryname|fromtitle".
-
--subcatsr         Like -subcats, but also includes sub-subcategories etc. of
-                  the given category.
-                  Argument can also be given as "-subcatsr:categoryname" or
-                  as "-subcatsr:categoryname|fromtitle".
-
--uncat            Work on all pages which are not categorised.
-
--uncatcat         Work on all categories which are not categorised.
-
--uncatfiles       Work on all files which are not categorised.
-
--file             Read a list of pages to treat from the named text file.
-                  Page titles in the file must be enclosed with [[brackets]].
-                  Argument can also be given as "-file:filename".
-
--filelinks        Work on all pages that use a certain image/media file.
-                  Argument can also be given as "-filelinks:filename".
-
--yahoo            Work on all pages that are found in a Yahoo search.
-                  Depends on python module pYsearch.  See yahoo_appid in
-                  config.py for instructions.
-
--search           Work on all pages that are found in a MediaWiki search
-                  across all namespaces.
-
--google           Work on all pages that are found in a Google search.
-                  You need a Google Web API license key. Note that Google
-                  doesn't give out license keys anymore. See google_key in
-                  config.py for instructions.
-                  Argument can also be given as "-google:searchstring".
-
--interwiki        Work on the given page and all equivalent pages in other
-                  languages. This can, for example, be used to fight
-                  multi-site spamming.
-                  Attention: this will cause the bot to modify
-                  pages on several wiki sites, this is not well tested,
-                  so check your edits!
-
--links            Work on all pages that are linked from a certain page.
-                  Argument can also be given as "-links:linkingpagetitle".
-
--new              Work on the 60 newest pages. If given as -new:x, will work
-                  on the x newest pages.
-
--imagelinks       Work on all images that are linked from a certain page.
-                  Argument can also be given as "-imagelinks:linkingpagetitle".
-
--newimages        Work on the 100 newest images. If given as -newimages:x,
-                  will work on the x newest images.
-
--ref              Work on all pages that link to a certain page.
-                  Argument can also be given as "-ref:referredpagetitle".
-
--start            Specifies that the robot should go alphabetically through
-                  all pages on the home wiki, starting at the named page.
-                  Argument can also be given as "-start:pagetitle".
-
-                  You can also include a namespace. For example,
-                  "-start:Template:!" will make the bot work on all pages
-                  in the template namespace.
-
--prefixindex      Work on pages commencing with a common prefix.
-
--regex            Obsolete, use -titleregex
-
--titleregex       Work on titles that match the given regular expression.
-
--transcludes      Work on all pages that use a certain template.
-                  Argument can also be given as "-transcludes:Template:Title".
-
--unusedfiles      Work on all description pages of images/media files that are
-                  not used anywhere.
-                  Argument can be given as "-unusedfiles:n" where
-                  n is the maximum number of articles to work on.
-
--unwatched        Work on all articles that are not watched by anyone.
-                  Argument can be given as "-unwatched:n" where
-                  n is the maximum number of articles to work on.
-
--usercontribs     Work on all articles that were edited by a certain user :
-                  Example : -usercontribs:DumZiBoT
-
--weblink          Work on all articles that contain an external link to
-                  a given URL; may be given as "-weblink:url"
-
--withoutinterwiki Work on all pages that don't have interlanguage links.
-                  Argument can be given as "-withoutinterwiki:n" where
-                  n is some number (??).
-"""
-
-docuReplacements = {'&params;': parameterHelp}
-
-# if a bot uses GeneratorFactory, the module should include the line
-#   docuReplacements = {'&params;': pywikibot.pagegenerators.parameterHelp}
-# and include the marker &params; in the module's docstring
-
-
-class GeneratorFactory(object):
-    """Process command line arguments and return appropriate page generator."""
-
-    def setCategoryGen(self, arg, length, recurse = False):
-        if len(arg) == length:
-            categoryname = pywikibot.input(u'Please enter the category name:')
-        else:
-            categoryname = arg[length + 1:]
-
-        ind = categoryname.find('|')
-        if ind > 0:
-            startfrom = categoryname[ind + 1:]
-            categoryname = categoryname[:ind]
-        else:
-            startfrom = None
-
-        cat = pywikibot.Category(pywikibot.Link('Category:%s' % categoryname))
-        return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
-
-    def setSubCategoriesGen(self, arg, length, recurse=False):
-        if len(arg) == length:
-            categoryname = pywikibot.input(u'Please enter the category name:')
-        else:
-            categoryname = arg[length + 1:]
-
-        ind = categoryname.find('|')
-        if ind > 0:
-            startfrom = categoryname[ind + 1:]
-            categoryname = categoryname[:ind]
-        else:
-            startfrom = None
-
-        cat = pywikibot.Category(pywikibot.Link('Category:%s' % categoryname))
-        return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse)
-
-    def handleArg(self, arg):
-        gen = None
-        if arg.startswith('-filelinks'):
-            fileLinksPageTitle = arg[11:]
-            if not fileLinksPageTitle:
-                fileLinksPageTitle = pywikibot.input(
-                    u'Links to which image page should be processed?')
-            if fileLinksPageTitle.startswith(pywikibot.Site().namespace(6)
-                                             + ":"):
-                fileLinksPage = pywikibot.ImagePage(pywikibot.Site(),
-                                                    fileLinksPageTitle)
-            else:
-                fileLinksPage = pywikibot.ImagePage(pywikibot.Site(),
-                                                    'Image:' +
-                                                    fileLinksPageTitle)
-            gen = FileLinksGenerator(fileLinksPage)
-        elif arg.startswith('-unusedfiles'):
-            if len(arg) == 12:
-                gen = UnusedFilesGenerator()
-            else:
-                gen = UnusedFilesGenerator(number = int(arg[13:]))
-        elif arg.startswith('-unwatched'):
-            if len(arg) == 10:
-                gen = UnwatchedPagesPageGenerator()
-            else:
-                gen = UnwatchedPagesPageGenerator(number = int(arg[11:]))
-        elif arg.startswith('-usercontribs'):
-            gen = UserContributionsGenerator(arg[14:])
-        elif arg.startswith('-withoutinterwiki'):
-            if len(arg) == 17:
-                gen = WithoutInterwikiPageGenerator()
-            else:
-                gen = WithoutInterwikiPageGenerator(number = int(arg[18:]))
-        elif arg.startswith('-interwiki'):
-            title = arg[11:]
-            if not title:
-                title = pywikibot.input(u'Which page should be processed?')
-            page = pywikibot.Page(pywikibot.Site(), title)
-            gen = InterwikiPageGenerator(page)
-        elif arg.startswith('-file'):
-            textfilename = arg[6:]
-            if not textfilename:
-                textfilename = pywikibot.input(
-                    u'Please enter the local file name:')
-            gen = TextfilePageGenerator(textfilename)
-        elif arg.startswith('-catr'):
-            gen = self.setCategoryGen(arg, 5, recurse = True)
-        elif arg.startswith('-cat'):
-            gen = self.setCategoryGen(arg, 4)
-        elif arg.startswith('-subcatsr'):
-            gen = self.setSubCategoriesGen(arg, 9, recurse = True)
-        elif arg.startswith('-subcats'):
-            gen = self.setSubCategoriesGen(arg, 8)
-        elif arg.startswith('-uncatfiles'):
-            gen = UnCategorizedImageGenerator()
-        elif arg.startswith('-uncatcat'):
-            gen = UnCategorizedCategoryGenerator()
-        elif arg.startswith('-uncat'):
-            gen = UnCategorizedPageGenerator()
-        elif arg.startswith('-ref'):
-            referredPageTitle = arg[5:]
-            if not referredPageTitle:
-                referredPageTitle = pywikibot.input(
-                    u'Links to which page should be processed?')
-            referredPage = pywikibot.Page(pywikibot.Site(), referredPageTitle)
-            gen = ReferringPageGenerator(referredPage)
-        elif arg.startswith('-links'):
-            linkingPageTitle = arg[7:]
-            if not linkingPageTitle:
-                linkingPageTitle = pywikibot.input(
-                    u'Links from which page should be processed?')
-            linkingPage = pywikibot.Page(pywikibot.Site(), linkingPageTitle)
-            gen = LinkedPageGenerator(linkingPage)
-        elif arg.startswith('-weblink'):
-            url = arg[9:]
-            if not url:
-                url = pywikibot.input(
-                    u'Pages with which weblink should be processed?')
-            gen = LinksearchPageGenerator(url)
-        elif arg.startswith('-transcludes'):
-            transclusionPageTitle = arg[len('-transcludes:'):]
-            if not transclusionPageTitle:
-                transclusionPageTitle = pywikibot.input(
-                    u'Pages that transclude which page should be processed?')
-            transclusionPage = pywikibot.Page(pywikibot.Site(),
-                                    'Template:%s' % transclusionPageTitle)
-            gen = ReferringPageGenerator(transclusionPage,
-                                         onlyTemplateInclusion=True)
-        elif arg.startswith('-start'):
-            if arg.startswith('-startxml'):
-                pywikibot.output(u'-startxml : wrong parameter')
-                raise ValueError
-            firstPageTitle = arg[7:]
-            if not firstPageTitle:
-                firstPageTitle = pywikibot.input(
-                    u'At which page do you want to start?')
-            namespace = pywikibot.Page(pywikibot.Site(),
-                                       firstPageTitle).namespace()
-            firstPageTitle = pywikibot.Page(pywikibot.link(firstPageTitle)
-                                           ).titleWithoutNamespace()
-            gen = AllpagesPageGenerator(firstPageTitle, namespace,
-                                        includeredirects=False)
-        elif arg.startswith('-prefixindex'):
-            prefix = arg[13:]
-            namespace = None
-            if not prefix:
-                prefix = pywikibot.input(
-                    u'What page names are you looking for?')
-            gen = PrefixingPageGenerator(prefix=prefix)
-        elif arg.startswith('-newimages'):
-            limit = arg[11:] or pywikibot.input(
-                u'How many images do you want to load?')
-            gen = NewimagesPageGenerator(number=int(limit))
-        elif arg.startswith('-new'):
-            if len(arg) >=5:
-              gen = NewpagesPageGenerator(number=int(arg[5:]))
-            else:
-              gen = NewpagesPageGenerator(number=60)
-        elif arg.startswith('-imagelinks'):
-            imagelinkstitle = arg[len('-imagelinks:'):]
-            if not imagelinkstitle:
-                imagelinkstitle = pywikibot.input(
-                    u'Images on which page should be processed?')
-            imagelinksPage = pywikibot.Page(pywikibot.Link(imagelinkstitle))
-            gen = ImagesPageGenerator(imagelinksPage)
-        elif arg.startswith('-search'):
-            mediawikiQuery = arg[8:]
-            if not mediawikiQuery:
-                mediawikiQuery = pywikibot.input(
-                    u'What do you want to search for?')
-            # In order to be useful, all namespaces are required
-            gen = SearchPageGenerator(mediawikiQuery, namespaces = [])
-        elif arg.startswith('-google'):
-            gen = GoogleSearchPageGenerator(arg[8:])
-        elif arg.startswith('-titleregex'):
-            if len(arg) == 6:
-                regex = pywikibot.input(
-                    u'What page names are you looking for?')
-            else:
-                regex = arg[7:]
-            gen = RegexFilterPageGenerator(pywikibot.Site().allpages(), regex)
-        elif arg.startswith('-yahoo'):
-            gen = YahooSearchPageGenerator(arg[7:])
-        else:
-            return None
-        # make sure all yielded pages are unique
-        gen = DuplicateFilterPageGenerator(gen)
-        return gen
-
-
-class ThreadedGenerator(threading.Thread):
-    """Look-ahead generator class.
-
-    Runs a generator in a separate thread and queues the results; can
-    be called like a regular generator.
-
-    Subclasses should override self.generator, _not_ self.run
-
-    Important: the generator thread will stop itself if the generator's
-    internal queue is exhausted; but, if the calling program does not use
-    all the generated values, it must call the generator's stop() method to
-    stop the background thread.  Example usage:
-
-    >>> gen = ThreadedGenerator(target=foo)
-    >>> try:
-    ...     for data in gen:
-    ...         do_work(data)
-    ... finally:
-    ...     gen.stop()
-
-    """ #NOT CURRENTLY USED: Intended for future development
-
-    def __init__(self, group=None, target=None, name="GeneratorThread",
-                 args=(), kwargs=None, qsize=65536):
-        """Constructor.  Takes same keyword arguments as threading.Thread.
-
-        target must be a generator function (or other callable that returns
-        an iterable object).
-
-        @param qsize: The size of the lookahead queue. The larger the qsize,
-        the more values will be computed in advance of use (which can eat
-        up memory and processor time).
-        @type qsize: int
-
-        """
-        if kwargs is None:
-            kwargs = {}
-        if target:
-            self.generator = target
-        if not hasattr(self, "generator"):
-            raise RuntimeError("No generator for ThreadedGenerator to run.")
-        self.args, self.kwargs = args, kwargs
-        threading.Thread.__init__(self, group=group, name=name)
-        self.queue = Queue.Queue(qsize)
-        self.finished = threading.Event()
-
-    def __iter__(self):
-        """Iterate results from the queue."""
-        if not self.isAlive() and not self.finished.isSet():
-            self.start()
-        # if there is an item in the queue, yield it, otherwise wait
-        while not self.finished.isSet():
-            try:
-                yield self.queue.get(True, 0.25)
-            except Queue.Empty:
-                pass
-            except KeyboardInterrupt:
-                self.stop()
-
-    def stop(self):
-        """Stop the background thread."""
-##        if not self.finished.isSet():
-##            pywikibot.output("DEBUG: signalling %s to stop." % self)
-        self.finished.set()
-
-    def run(self):
-        """Run the generator and store the results on the queue."""
-        self.__gen = self.generator(*self.args, **self.kwargs)
-        for result in self.__gen:
-            while True:
-                if self.finished.isSet():
-##                    pywikibot.output("DEBUG: %s received stop signal." % self)
-                    return
-                try:
-                    self.queue.put_nowait(result)
-                except Queue.Full:
-                    time.sleep(0.25)
-                    continue
-                break
-        # wait for queue to be emptied, then kill the thread
-        while not self.finished.isSet() and not self.queue.empty():
-            time.sleep(0.25)
-        self.stop()
-##        pywikibot.output("DEBUG: %s stopped because generator exhausted." % self)
-
-
-def AllpagesPageGenerator(start ='!', namespace=None, includeredirects=True,
-                          site=None):
-    """
-    Using the Allpages special page, retrieve all articles' titles, and yield
-    page objects.
-    If includeredirects is False, redirects are not included. If
-    includeredirects equals the string 'only', only redirects are added.
-    """
-    if site is None:
-        site = pywikibot.getSite()
-    if includeredirects:
-        if includeredirects == 'only':
-            filterredir = True
-        else:
-            filterredir = None
-    else:
-        filterredir = False
-    return site.allpages(start=start, namespace=namespace,
-                         filterredir=filterredir)
-
-
-def PrefixingPageGenerator(prefix, namespace=None, includeredirects=True,
-                           site=None):
-    if site is None:
-        site = pywikibot.Site()
-    page = pywikibot.Page(site, prefix)
-    if namespace is None:
-        namespace = page.namespace()
-    title = page.titleWithoutNamespace()
-    if includeredirects:
-        if includeredirects == 'only':
-            filterredir = True
-        else:
-            filterredir = None
-    else:
-        filterredir = False
-    return site.allpages(prefix=title, namespace=namespace,
-                         filterredir=filterredir)
-
-
-def NewpagesPageGenerator(number=100, get_redirect=False, repeat=False,
-                          site=None):
-    # API does not (yet) have a newpages function, so this tries to duplicate
-    # it by filtering the recentchanges output
-    # defaults to namespace 0 because that's how Special:Newpages defaults
-    if site is None:
-        site = pywikibot.Site()
-    return site.recentchanges(limit=number, showredirects=get_redirect,
-                              changetype="new", namespaces=0)
-
-
-def FileLinksGenerator(referredImagePage):
-    return referredImagePage.usingPages()
-
-
-def ImagesPageGenerator(pageWithImages):
-    return pageWithImages.imagelinks()
-
-
-def InterwikiPageGenerator(page):
-    """Iterator over all interwiki (non-language) links on a page."""
-    for link in page.interwiki():
-        yield pywikibot.Page(link)
-
-
-def LanguageLinksPageGenerator(page):
-    """Iterator over all interwiki language links on a page."""
-    for link in page.langlinks():
-        yield pywikibot.Page(link)
-
-
-def ReferringPageGenerator(referredPage, followRedirects=False,
-                           withTemplateInclusion=True,
-                           onlyTemplateInclusion=False):
-    '''Yields all pages referring to a specific page.'''
-    return referredPage.getReferences(
-                follow_redirects=followRedirects,
-                withTemplateInclusion=withTemplateInclusion,
-                onlyTemplateInclusion=onlyTemplateInclusion)
-
-
-def CategorizedPageGenerator(category, recurse=False, start=None):
-    '''Yield all pages in a specific category.
-
-    If recurse is True, pages in subcategories are included as well; if
-    recurse is an int, only subcategories to that depth will be included
-    (e.g., recurse=2 will get pages in subcats and sub-subcats, but will
-    not go any further).
-    If start is a string value, only pages whose sortkey comes after start
-    alphabetically are included.
-
-    ''' # TODO: page generator could be modified to use cmstartsortkey ...
-    for a in category.articles(recurse=recurse):
-        if start is None or a.title(withNamespace=False) >= start:
-            yield a
-
-
-def SubCategoriesPageGenerator(category, recurse=False, start=None):
-    '''Yields all subcategories in a specific category.
-
-    If recurse is True, pages in subcategories are included as well; if
-    recurse is an int, only subcategories to that depth will be included
-    (e.g., recurse=2 will get pages in subcats and sub-subcats, but will
-    not go any further).
-    If start is a string value, only categories whose sortkey comes after
-    start alphabetically are included.
-
-    ''' # TODO: page generator could be modified to use cmstartsortkey ...
-    for s in category.subcategories(recurse=recurse):
-        if start is None or s.title(withNamespace=False) >= start:
-            yield s
-
-
-def LinkedPageGenerator(linkingPage):
-    """Yields all pages linked from a specific page."""
-    return linkingPage.linkedPages()
-
-
-def TextfilePageGenerator(filename=None, site=None):
-    """Iterate pages from a list in a text file.
-
-    The file must contain page links between double-square-brackets.  The
-    generator will yield each corresponding Page object.
-
-    @param filename: the name of the file that should be read. If no name is
-        given, the generator prompts the user.
-    @param site: the default Site for which Page objects should be created
-
-    """
-    if filename is None:
-        filename = pywikibot.input(u'Please enter the filename:')
-    if site is None:
-        site = pywikibot.Site()
-    f = codecs.open(filename, 'r', config.textfile_encoding)
-    for linkmatch in pywikibot.link_regex.finditer(f.read()):
-        # If the link is in interwiki format, the Page object may reside
-        # on a different Site than the default.
-        # This makes it possible to work on different wikis using a single
-        # text file, but also could be dangerous because you might
-        # inadvertently change pages on another wiki!
-        yield pywikibot.Page(pywikibot.Link(linkmatch.groups("title"), site))
-    f.close()
-
-
-def PagesFromTitlesGenerator(iterable, site=None):
-    """Generate pages from the titles (unicode strings) yielded by iterable."""
-    if site is None:
-        site = pywikibot.Site()
-    for title in iterable:
-        if not isinstance(title, basestring):
-            break
-        yield pywikibot.Page(pywikibot.Link(title, site))
-
-
-def UserContributionsGenerator(username, number=250, namespaces=None,
-                               site=None):
-    """Yields number unique pages edited by user:username
-    namespaces : list of namespace numbers to fetch contribs from
-
-    """
-    if site is None:
-        site = pywikibot.Site()
-    return site.usercontribs(user=username, limit=number, namespaces=namespaces)
-
-
-def NamespaceFilterPageGenerator(generator, namespaces, site=None):
-    """
-    Wraps around another generator. Yields only those pages that are in one
-    of the given namespaces.
-
-    The namespace list can contain both integers (namespace numbers) and
-    strings/unicode strings (namespace names).
-
-    """
-    if site is None:
-        site = pywikibot.Site()
-    # convert namespace names to namespace numbers
-    for i in xrange(len(namespaces)):
-        ns = namespaces[i]
-        if isinstance(ns, basestring):
-            index = site.getNamespaceIndex(ns)
-            if index is None:
-                raise ValueError(u'Unknown namespace: %s' % ns)
-            namespaces[i] = index
-    for page in generator:
-        if page.namespace() in namespaces:
-            yield page
-
-
-def RedirectFilterPageGenerator(generator):
-    """Yields pages from another generator that are not redirects."""
-    for page in generator:
-        if not page.isRedirectPage():
-            yield page
-
-
-def DuplicateFilterPageGenerator(generator):
-    """Yield all unique pages from another generator, omitting duplicates."""
-    seenPages = {}
-    for page in generator:
-        if page not in seenPages:
-            seenPages[page] = None
-            yield page
-
-
-def RegexFilterPageGenerator(generator, regex):
-    """Yield pages from another generator whose titles match regex."""
-    reg = re.compile(regex, re.I)
-    for page in generator:
-        if reg.match(page.titleWithoutNamespace()):
-            yield page
-
-
-def CombinedPageGenerator(generators):
-    return itertools.chain(*generators)
-
-
-def CategoryGenerator(generator):
-    """Yield pages from another generator as Category objects.
-
-    Makes sense only if it is ascertained that only categories are being
-    retrieved.
-
-    """
-    for page in generator:
-        yield pywikibot.Category(page)
-
-
-def PageWithTalkPageGenerator(generator):
-    """
-    Wraps around another generator. Yields the same pages, but for non-talk
-    pages, it also includes associated talk pages.
-    This generator does not check if the talk page in fact exists.
-    """
-    for page in generator:
-        yield page
-        if not page.isTalkPage():
-            yield page.toggleTalkPage()
-
-
-def PreloadingGenerator(generator, pageNumber=60, lookahead=10):
-    """Yield preloaded pages taken from another generator."""
-
-    # pages may be on more than one site, for example if an interwiki
-    # generator is used, so use a separate preloader for each site
-    sites = {}
-    # build a list of pages for each site found in the iterator
-    for page in generator:
-        sites.setdefault(page.site(), []).append(page)
-    return itertools.chain(*(site.preloadpages(sites[site], pageNumber)
-                             for site in sites))
-
-
-#TODO below
-
-def UnusedFilesGenerator(number=100, repeat=False, site=None, extension=None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.unusedfiles(number=number, repeat=repeat,
-                                 extension=extension):
-        yield pywikibot.ImagePage(page.site(), page.title())
-
-def WithoutInterwikiPageGenerator(number=100, repeat=False, site=None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.withoutinterwiki(number=number, repeat=repeat):
-        yield page
-
-def UnCategorizedCategoryGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.uncategorizedcategories(number=number, repeat=repeat):
-        yield page
-
-def UnCategorizedImageGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.uncategorizedimages(number=number, repeat=repeat):
-        yield page
-
-def NewimagesPageGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.newimages(number, repeat=repeat):
-        yield page[0]
-
-def UnCategorizedPageGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.uncategorizedpages(number=number, repeat=repeat):
-        yield page
-
-def LonelyPagesPageGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.lonelypages(number=number, repeat=repeat):
-        yield page
-
-def UnwatchedPagesPageGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.unwatchedpages(number=number, repeat=repeat):
-        yield page
-
-def AncientPagesPageGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.ancientpages(number=number, repeat=repeat):
-        yield page[0]
-
-def DeadendPagesPageGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.deadendpages(number=number, repeat=repeat):
-        yield page
-
-def LongPagesPageGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.longpages(number=number, repeat=repeat):
-        yield page[0]
-
-def ShortPagesPageGenerator(number = 100, repeat = False, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.shortpages(number=number, repeat=repeat):
-        yield page[0]
-
-def LinksearchPageGenerator(link, step=500, site=None):
-    """Yields all pages that include a specified link, according to
-    [[Special:Linksearch]].
-
-    """
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.linksearch(link, limit=step):
-        yield page
-
-def SearchPageGenerator(query, number = 100, namespaces = None, site = None):
-    """
-    Provides a list of results using the internal MediaWiki search engine
-    """
-    if site is None:
-        site = pywikibot.Site()
-    for page in site.search(query, number=number, namespaces = namespaces):
-        yield page[0]
-
-class YahooSearchPageGenerator:
-    '''
-    To use this generator, install pYsearch
-    '''
-    def __init__(self, query = None, count = 100, site = None): # values larger than 100 fail
-        self.query = query or pywikibot.input(u'Please enter the search query:')
-        self.count = count
-        if site is None:
-            site = pywikibot.Site()
-        self.site = site
-
-    def queryYahoo(self, query):
-       from yahoo.search.web import WebSearch
-       srch = WebSearch(config.yahoo_appid, query=query, results=self.count)
-
-       dom = srch.get_results()
-       results = srch.parse_results(dom)
-       for res in results:
-           url = res.Url
-           yield url
-
-    def __iter__(self):
-        # restrict query to local site
-        localQuery = '%s site:%s' % (self.query, self.site.hostname())
-        base = 'http://%s%s' % (self.site.hostname(), self.site.nice_get_address(''))
-        for url in self.queryYahoo(localQuery):
-            if url[:len(base)] == base:
-                title = url[len(base):]
-                page = pywikibot.Page(self.site, title)
-                yield page
-
-class GoogleSearchPageGenerator:
-    '''
-    To use this generator, you must install the pyGoogle module from
-    http://pygoogle.sf.net/ and get a Google Web API license key from
-    http://www.google.com/apis/index.html . The google_key must be set to your
-    license key in your configuration.
-    '''
-    def __init__(self, query = None, site = None):
-        self.query = query or pywikibot.input(u'Please enter the search query:')
-        if site is None:
-            site = pywikibot.Site()
-        self.site = site
-
-    #########
-    # partially commented out because it is probably not in compliance with Google's "Terms of
-    # service" (see 5.3, http://www.google.com/accounts/TOS?loc=US)
-    def queryGoogle(self, query):
-        #if config.google_key:
-        if True:
-            #try:
-                for url in self.queryViaSoapApi(query):
-                    yield url
-                return
-            #except ImportError:
-                #pass
-        # No google license key, or pygoogle not installed. Do it the ugly way.
-        #for url in self.queryViaWeb(query):
-        #    yield url
-
-    def queryViaSoapApi(self, query):
-        import google
-        google.LICENSE_KEY = config.google_key
-        offset = 0
-        estimatedTotalResultsCount = None
-        while not estimatedTotalResultsCount or offset < estimatedTotalResultsCount:
-            while (True):
-                # Google often yields 502 errors.
-                try:
-                    pywikibot.output(u'Querying Google, offset %i' % offset)
-                    data = google.doGoogleSearch(query, start = offset, filter = False)
-                    break
-                except KeyboardInterrupt:
-                    raise
-                except:
-                    # SOAPpy.Errors.HTTPError or SOAP.HTTPError (502 Bad Gateway)
-                    # can happen here, depending on the module used. It's not easy
-                    # to catch this properly because pygoogle decides which one of
-                    # the soap modules to use.
-                    pywikibot.output(u"An error occured. Retrying in 10 seconds...")
-                    time.sleep(10)
-                    continue
-
-            for result in data.results:
-                #print 'DBG: ', result.URL
-                yield result.URL
-            # give an estimate of pages to work on, but only once.
-            if not estimatedTotalResultsCount:
-                pywikibot.output(u'Estimated total result count: %i pages.' % data.meta.estimatedTotalResultsCount)
-            estimatedTotalResultsCount = data.meta.estimatedTotalResultsCount
-            #print 'estimatedTotalResultsCount: ', estimatedTotalResultsCount
-            offset += 10
-
-    #########
-    # commented out because it is probably not in compliance with Google's "Terms of
-    # service" (see 5.3, http://www.google.com/accounts/TOS?loc=US)
-
-    #def queryViaWeb(self, query):
-        #"""
-        #Google has stopped giving out API license keys, and sooner or later
-        #they will probably shut down the service.
-        #This is a quick and ugly solution: we just grab the search results from
-        #the normal web interface.
-        #"""
-        #linkR = re.compile(r'<a href="([^>"]+?)" class=l>', re.IGNORECASE)
-        #offset = 0
-
-        #while True:
-            #pywikibot.output("Google: Querying page %d" % (offset / 100 + 1))
-            #address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" % (urllib.quote_plus(query), offset)
-            ## we fake being Firefox because Google blocks unknown browsers
-            #request = urllib2.Request(address, None, {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8) Gecko/20051128 SUSE/1.5-0.1 Firefox/1.5'})
-            #urlfile = urllib2.urlopen(request)
-            #page = urlfile.read()
-            #urlfile.close()
-            #for url in linkR.findall(page):
-                #yield url
-            #if "<div id=nn>" in page: # Is there a "Next" link for next page of results?
-                #offset += 100  # Yes, go to next page of results.
-            #else:
-                #return
-    #########
-
-    def __iter__(self):
-        # restrict query to local site
-        localQuery = '%s site:%s' % (self.query, self.site.hostname())
-        base = 'http://%s%s' % (self.site.hostname(), self.site.nice_get_address(''))
-        for url in self.queryGoogle(localQuery):
-            if url[:len(base)] == base:
-                title = url[len(base):]
-                page = pywikibot.Page(self.site, title)
-                # Google contains links in the format http://de.wikipedia.org/wiki/en:Foobar
-                if page.site() == self.site:
-                    yield page
-
-def MySQLPageGenerator(query, site = None):
-    import MySQLdb as mysqldb
-    if site is None:
-        site = pywikibot.Site()
-    conn = mysqldb.connect(config.db_hostname, db = site.dbName(),
-                           user = config.db_username,
-                           passwd = config.db_password)
-    cursor = conn.cursor()
-    pywikibot.output(u'Executing query:\n%s' % query)
-    query = query.encode(site.encoding())
-    cursor.execute(query)
-    while True:
-        try:
-            namespaceNumber, pageName = cursor.fetchone()
-            print namespaceNumber, pageName
-        except TypeError:
-            # Limit reached or no more results
-            break
-        #print pageName
-        if pageName:
-            namespace = site.namespace(namespaceNumber)
-            pageName = unicode(pageName, site.encoding())
-            if namespace:
-                pageTitle = '%s:%s' % (namespace, pageName)
-            else:
-                pageTitle = pageName
-            page = pywikibot.Page(site, pageTitle)
-            yield page
-
-def YearPageGenerator(start = 1, end = 2050, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    pywikibot.output(u"Starting with year %i" % start)
-    for i in xrange(start, end + 1):
-        if i % 100 == 0:
-            pywikibot.output(u'Preparing %i...' % i)
-        # There is no year 0
-        if i != 0:
-            current_year = date.formatYear(site.lang, i )
-            yield pywikibot.Page(site, current_year)
-
-def DayPageGenerator(startMonth = 1, endMonth = 12, site = None):
-    if site is None:
-        site = pywikibot.Site()
-    fd = date.FormatDate(site)
-    firstPage = pywikibot.Page(site, fd(startMonth, 1))
-    pywikibot.output(u"Starting with %s" % firstPage.aslink())
-    for month in xrange(startMonth, endMonth+1):
-        for day in xrange(1, date.getNumberOfDaysInMonth(month)+1):
-            yield pywikibot.Page(site, fd(month, day))
-
-
-if __name__ == "__main__":
-    try:
-        gen = None
-        genFactory = GeneratorFactory()
-        for arg in pywikibot.handleArgs():
-            generator = genFactory.handleArg(arg)
-            if generator:
-                gen = generator
-        if gen:
-            for page in gen:
-                pywikibot.output(page.title(), toStdout = True)
-        else:
-            pywikibot.showHelp()
-    finally:
-        pywikibot.stopme()
+# -*- coding: utf-8  -*-
+"""This module offers a wide variety of page generators. A page generator is an
+object that is iterable (see http://www.python.org/dev/peps/pep-0255/ ) and
+that yields page objects on which other scripts can then work.
+
+In general, there is no need to run this script directly. It can, however,
+be run for testing purposes. It will then print the page titles to standard
+output.
+
+These parameters are supported to specify which pages titles to print:
+
+&params;
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+import pywikibot
+
+import itertools
+import Queue
+import re
+import sys
+import threading
+
+
+# ported from version 1 for backwards-compatibility
+# most of these functions just wrap a Site or Page method that returns
+# a generator
+
+parameterHelp = """\
+-cat              Work on all pages which are in a specific category.
+                  Argument can also be given as "-cat:categoryname" or
+                  as "-cat:categoryname|fromtitle".
+
+-catr             Like -cat, but also recursively includes pages in
+                  subcategories, sub-subcategories etc. of the
+                  given category.
+                  Argument can also be given as "-catr:categoryname" or
+                  as "-catr:categoryname|fromtitle".
+
+-subcats          Work on all subcategories of a specific category.
+                  Argument can also be given as "-subcats:categoryname" or
+                  as "-subcats:categoryname|fromtitle".
+
+-subcatsr         Like -subcats, but also includes sub-subcategories etc. of
+                  the given category.
+                  Argument can also be given as "-subcatsr:categoryname" or
+                  as "-subcatsr:categoryname|fromtitle".
+
+-uncat            Work on all pages which are not categorised.
+
+-uncatcat         Work on all categories which are not categorised.
+
+-uncatfiles       Work on all files which are not categorised.
+
+-file             Read a list of pages to treat from the named text file.
+                  Page titles in the file must be enclosed with [[brackets]].
+                  Argument can also be given as "-file:filename".
+
+-filelinks        Work on all pages that use a certain image/media file.
+                  Argument can also be given as "-filelinks:filename".
+
+-yahoo            Work on all pages that are found in a Yahoo search.
+                  Depends on python module pYsearch.  See yahoo_appid in
+                  config.py for instructions.
+
+-search           Work on all pages that are found in a MediaWiki search
+                  across all namespaces.
+
+-google           Work on all pages that are found in a Google search.
+                  You need a Google Web API license key. Note that Google
+                  doesn't give out license keys anymore. See google_key in
+                  config.py for instructions.
+                  Argument can also be given as "-google:searchstring".
+
+-interwiki        Work on the given page and all equivalent pages in other
+                  languages. This can, for example, be used to fight
+                  multi-site spamming.
+                  Attention: this will cause the bot to modify
+                  pages on several wiki sites, this is not well tested,
+                  so check your edits!
+
+-links            Work on all pages that are linked from a certain page.
+                  Argument can also be given as "-links:linkingpagetitle".
+
+-new              Work on the 60 newest pages. If given as -new:x, will work
+                  on the x newest pages.
+
+-imagelinks       Work on all images that are linked from a certain page.
+                  Argument can also be given as "-imagelinks:linkingpagetitle".
+
+-newimages        Work on the 100 newest images. If given as -newimages:x,
+                  will work on the x newest images.
+
+-ref              Work on all pages that link to a certain page.
+                  Argument can also be given as "-ref:referredpagetitle".
+
+-start            Specifies that the robot should go alphabetically through
+                  all pages on the home wiki, starting at the named page.
+                  Argument can also be given as "-start:pagetitle".
+
+                  You can also include a namespace. For example,
+                  "-start:Template:!" will make the bot work on all pages
+                  in the template namespace.
+
+-prefixindex      Work on pages commencing with a common prefix.
+
+-regex            Obsolete, use -titleregex
+
+-titleregex       Work on titles that match the given regular expression.
+
+-transcludes      Work on all pages that use a certain template.
+                  Argument can also be given as "-transcludes:Template:Title".
+
+-unusedfiles      Work on all description pages of images/media files that are
+                  not used anywhere.
+                  Argument can be given as "-unusedfiles:n" where
+                  n is the maximum number of articles to work on.
+
+-unwatched        Work on all articles that are not watched by anyone.
+                  Argument can be given as "-unwatched:n" where
+                  n is the maximum number of articles to work on.
+
+-usercontribs     Work on all articles that were edited by a certain user :
+                  Example : -usercontribs:DumZiBoT
+
+-weblink          Work on all articles that contain an external link to
+                  a given URL; may be given as "-weblink:url"
+
+-withoutinterwiki Work on all pages that don't have interlanguage links.
+                  Argument can be given as "-withoutinterwiki:n" where
+                  n is some number (??).
+"""
+
+docuReplacements = {'&params;': parameterHelp}
+
+# if a bot uses GeneratorFactory, the module should include the line
+#   docuReplacements = {'&params;': pywikibot.pagegenerators.parameterHelp}
+# and include the marker &params; in the module's docstring
+
+
+class GeneratorFactory(object):
+    """Process command line arguments and return appropriate page generator."""
+
+    def setCategoryGen(self, arg, length, recurse = False):
+        if len(arg) == length:
+            categoryname = pywikibot.input(u'Please enter the category name:')
+        else:
+            categoryname = arg[length + 1:]
+
+        ind = categoryname.find('|')
+        if ind > 0:
+            startfrom = categoryname[ind + 1:]
+            categoryname = categoryname[:ind]
+        else:
+            startfrom = None
+
+        cat = pywikibot.Category(pywikibot.Link('Category:%s' % categoryname))
+        return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
+
+    def setSubCategoriesGen(self, arg, length, recurse=False):
+        if len(arg) == length:
+            categoryname = pywikibot.input(u'Please enter the category name:')
+        else:
+            categoryname = arg[length + 1:]
+
+        ind = categoryname.find('|')
+        if ind > 0:
+            startfrom = categoryname[ind + 1:]
+            categoryname = categoryname[:ind]
+        else:
+            startfrom = None
+
+        cat = pywikibot.Category(pywikibot.Link('Category:%s' % categoryname))
+        return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse)
+
+    def handleArg(self, arg):
+        gen = None
+        if arg.startswith('-filelinks'):
+            fileLinksPageTitle = arg[11:]
+            if not fileLinksPageTitle:
+                fileLinksPageTitle = pywikibot.input(
+                    u'Links to which image page should be processed?')
+            if fileLinksPageTitle.startswith(pywikibot.Site().namespace(6)
+                                             + ":"):
+                fileLinksPage = pywikibot.ImagePage(pywikibot.Site(),
+                                                    fileLinksPageTitle)
+            else:
+                fileLinksPage = pywikibot.ImagePage(pywikibot.Site(),
+                                                    'Image:' +
+                                                    fileLinksPageTitle)
+            gen = FileLinksGenerator(fileLinksPage)
+        elif arg.startswith('-unusedfiles'):
+            if len(arg) == 12:
+                gen = UnusedFilesGenerator()
+            else:
+                gen = UnusedFilesGenerator(number = int(arg[13:]))
+        elif arg.startswith('-unwatched'):
+            if len(arg) == 10:
+                gen = UnwatchedPagesPageGenerator()
+            else:
+                gen = UnwatchedPagesPageGenerator(number = int(arg[11:]))
+        elif arg.startswith('-usercontribs'):
+            gen = UserContributionsGenerator(arg[14:])
+        elif arg.startswith('-withoutinterwiki'):
+            if len(arg) == 17:
+                gen = WithoutInterwikiPageGenerator()
+            else:
+                gen = WithoutInterwikiPageGenerator(number = int(arg[18:]))
+        elif arg.startswith('-interwiki'):
+            title = arg[11:]
+            if not title:
+                title = pywikibot.input(u'Which page should be processed?')
+            page = pywikibot.Page(pywikibot.Site(), title)
+            gen = InterwikiPageGenerator(page)
+        elif arg.startswith('-file'):
+            textfilename = arg[6:]
+            if not textfilename:
+                textfilename = pywikibot.input(
+                    u'Please enter the local file name:')
+            gen = TextfilePageGenerator(textfilename)
+        elif arg.startswith('-catr'):
+            gen = self.setCategoryGen(arg, 5, recurse = True)
+        elif arg.startswith('-cat'):
+            gen = self.setCategoryGen(arg, 4)
+        elif arg.startswith('-subcatsr'):
+            gen = self.setSubCategoriesGen(arg, 9, recurse = True)
+        elif arg.startswith('-subcats'):
+            gen = self.setSubCategoriesGen(arg, 8)
+        elif arg.startswith('-uncatfiles'):
+            gen = UnCategorizedImageGenerator()
+        elif arg.startswith('-uncatcat'):
+            gen = UnCategorizedCategoryGenerator()
+        elif arg.startswith('-uncat'):
+            gen = UnCategorizedPageGenerator()
+        elif arg.startswith('-ref'):
+            referredPageTitle = arg[5:]
+            if not referredPageTitle:
+                referredPageTitle = pywikibot.input(
+                    u'Links to which page should be processed?')
+            referredPage = pywikibot.Page(pywikibot.Site(), referredPageTitle)
+            gen = ReferringPageGenerator(referredPage)
+        elif arg.startswith('-links'):
+            linkingPageTitle = arg[7:]
+            if not linkingPageTitle:
+                linkingPageTitle = pywikibot.input(
+                    u'Links from which page should be processed?')
+            linkingPage = pywikibot.Page(pywikibot.Site(), linkingPageTitle)
+            gen = LinkedPageGenerator(linkingPage)
+        elif arg.startswith('-weblink'):
+            url = arg[9:]
+            if not url:
+                url = pywikibot.input(
+                    u'Pages with which weblink should be processed?')
+            gen = LinksearchPageGenerator(url)
+        elif arg.startswith('-transcludes'):
+            transclusionPageTitle = arg[len('-transcludes:'):]
+            if not transclusionPageTitle:
+                transclusionPageTitle = pywikibot.input(
+                    u'Pages that transclude which page should be processed?')
+            transclusionPage = pywikibot.Page(pywikibot.Site(),
+                                    'Template:%s' % transclusionPageTitle)
+            gen = ReferringPageGenerator(transclusionPage,
+                                         onlyTemplateInclusion=True)
+        elif arg.startswith('-start'):
+            if arg.startswith('-startxml'):
+                pywikibot.output(u'-startxml : wrong parameter')
+                raise ValueError
+            firstPageTitle = arg[7:]
+            if not firstPageTitle:
+                firstPageTitle = pywikibot.input(
+                    u'At which page do you want to start?')
+            namespace = pywikibot.Page(pywikibot.Site(),
+                                       firstPageTitle).namespace()
+            firstPageTitle = pywikibot.Page(pywikibot.link(firstPageTitle)
+                                           ).titleWithoutNamespace()
+            gen = AllpagesPageGenerator(firstPageTitle, namespace,
+                                        includeredirects=False)
+        elif arg.startswith('-prefixindex'):
+            prefix = arg[13:]
+            namespace = None
+            if not prefix:
+                prefix = pywikibot.input(
+                    u'What page names are you looking for?')
+            gen = PrefixingPageGenerator(prefix=prefix)
+        elif arg.startswith('-newimages'):
+            limit = arg[11:] or pywikibot.input(
+                u'How many images do you want to load?')
+            gen = NewimagesPageGenerator(number=int(limit))
+        elif arg.startswith('-new'):
+            if len(arg) >=5:
+              gen = NewpagesPageGenerator(number=int(arg[5:]))
+            else:
+              gen = NewpagesPageGenerator(number=60)
+        elif arg.startswith('-imagelinks'):
+            imagelinkstitle = arg[len('-imagelinks:'):]
+            if not imagelinkstitle:
+                imagelinkstitle = pywikibot.input(
+                    u'Images on which page should be processed?')
+            imagelinksPage = pywikibot.Page(pywikibot.Link(imagelinkstitle))
+            gen = ImagesPageGenerator(imagelinksPage)
+        elif arg.startswith('-search'):
+            mediawikiQuery = arg[8:]
+            if not mediawikiQuery:
+                mediawikiQuery = pywikibot.input(
+                    u'What do you want to search for?')
+            # In order to be useful, all namespaces are required
+            gen = SearchPageGenerator(mediawikiQuery, namespaces = [])
+        elif arg.startswith('-google'):
+            gen = GoogleSearchPageGenerator(arg[8:])
+        elif arg.startswith('-titleregex'):
+            if len(arg) == 6:
+                regex = pywikibot.input(
+                    u'What page names are you looking for?')
+            else:
+                regex = arg[7:]
+            gen = RegexFilterPageGenerator(pywikibot.Site().allpages(), regex)
+        elif arg.startswith('-yahoo'):
+            gen = YahooSearchPageGenerator(arg[7:])
+        else:
+            return None
+        # make sure all yielded pages are unique
+        gen = DuplicateFilterPageGenerator(gen)
+        return gen
+
+
+class ThreadedGenerator(threading.Thread):
+    """Look-ahead generator class.
+
+    Runs a generator in a separate thread and queues the results; can
+    be called like a regular generator.
+
+    Subclasses should override self.generator, _not_ self.run
+
+    Important: the generator thread will stop itself if the generator's
+    internal queue is exhausted; but, if the calling program does not use
+    all the generated values, it must call the generator's stop() method to
+    stop the background thread.  Example usage:
+
+    >>> gen = ThreadedGenerator(target=foo)
+    >>> try:
+    ...     for data in gen:
+    ...         do_work(data)
+    ... finally:
+    ...     gen.stop()
+
+    """ #NOT CURRENTLY USED: Intended for future development
+
+    def __init__(self, group=None, target=None, name="GeneratorThread",
+                 args=(), kwargs=None, qsize=65536):
+        """Constructor.  Takes same keyword arguments as threading.Thread.
+
+        target must be a generator function (or other callable that returns
+        an iterable object).
+
+        @param qsize: The size of the lookahead queue. The larger the qsize,
+        the more values will be computed in advance of use (which can eat
+        up memory and processor time).
+        @type qsize: int
+
+        """
+        if kwargs is None:
+            kwargs = {}
+        if target:
+            self.generator = target
+        if not hasattr(self, "generator"):
+            raise RuntimeError("No generator for ThreadedGenerator to run.")
+        self.args, self.kwargs = args, kwargs
+        threading.Thread.__init__(self, group=group, name=name)
+        self.queue = Queue.Queue(qsize)
+        self.finished = threading.Event()
+
+    def __iter__(self):
+        """Iterate results from the queue."""
+        if not self.isAlive() and not self.finished.isSet():
+            self.start()
+        # if there is an item in the queue, yield it, otherwise wait
+        while not self.finished.isSet():
+            try:
+                yield self.queue.get(True, 0.25)
+            except Queue.Empty:
+                pass
+            except KeyboardInterrupt:
+                self.stop()
+
+    def stop(self):
+        """Stop the background thread."""
+##        if not self.finished.isSet():
+##            pywikibot.output("DEBUG: signalling %s to stop." % self)
+        self.finished.set()
+
+    def run(self):
+        """Run the generator and store the results on the queue."""
+        self.__gen = self.generator(*self.args, **self.kwargs)
+        for result in self.__gen:
+            while True:
+                if self.finished.isSet():
+##                    pywikibot.output("DEBUG: %s received stop signal." % self)
+                    return
+                try:
+                    self.queue.put_nowait(result)
+                except Queue.Full:
+                    time.sleep(0.25)
+                    continue
+                break
+        # wait for queue to be emptied, then kill the thread
+        while not self.finished.isSet() and not self.queue.empty():
+            time.sleep(0.25)
+        self.stop()
+##        pywikibot.output("DEBUG: %s stopped because generator exhausted." % self)
+
+
+def AllpagesPageGenerator(start ='!', namespace=None, includeredirects=True,
+                          site=None):
+    """
+    Using the Allpages special page, retrieve all articles' titles, and yield
+    page objects.
+    If includeredirects is False, redirects are not included. If
+    includeredirects equals the string 'only', only redirects are added.
+    """
+    if site is None:
+        site = pywikibot.getSite()
+    if includeredirects:
+        if includeredirects == 'only':
+            filterredir = True
+        else:
+            filterredir = None
+    else:
+        filterredir = False
+    return site.allpages(start=start, namespace=namespace,
+                         filterredir=filterredir)
+
+
+def PrefixingPageGenerator(prefix, namespace=None, includeredirects=True,
+                           site=None):
+    if site is None:
+        site = pywikibot.Site()
+    page = pywikibot.Page(site, prefix)
+    if namespace is None:
+        namespace = page.namespace()
+    title = page.titleWithoutNamespace()
+    if includeredirects:
+        if includeredirects == 'only':
+            filterredir = True
+        else:
+            filterredir = None
+    else:
+        filterredir = False
+    return site.allpages(prefix=title, namespace=namespace,
+                         filterredir=filterredir)
+
+
+def NewpagesPageGenerator(number=100, get_redirect=False, repeat=False,
+                          site=None):
+    # API does not (yet) have a newpages function, so this tries to duplicate
+    # it by filtering the recentchanges output
+    # defaults to namespace 0 because that's how Special:Newpages defaults
+    if site is None:
+        site = pywikibot.Site()
+    return site.recentchanges(limit=number, showredirects=get_redirect,
+                              changetype="new", namespaces=0)
+
+
+def FileLinksGenerator(referredImagePage):
+    return referredImagePage.usingPages()
+
+
+def ImagesPageGenerator(pageWithImages):
+    return pageWithImages.imagelinks()
+
+
+def InterwikiPageGenerator(page):
+    """Iterator over all interwiki (non-language) links on a page."""
+    for link in page.interwiki():
+        yield pywikibot.Page(link)
+
+
+def LanguageLinksPageGenerator(page):
+    """Iterator over all interwiki language links on a page."""
+    for link in page.langlinks():
+        yield pywikibot.Page(link)
+
+
+def ReferringPageGenerator(referredPage, followRedirects=False,
+                           withTemplateInclusion=True,
+                           onlyTemplateInclusion=False):
+    '''Yields all pages referring to a specific page.'''
+    return referredPage.getReferences(
+                follow_redirects=followRedirects,
+                withTemplateInclusion=withTemplateInclusion,
+                onlyTemplateInclusion=onlyTemplateInclusion)
+
+
+def CategorizedPageGenerator(category, recurse=False, start=None):
+    '''Yield all pages in a specific category.
+
+    If recurse is True, pages in subcategories are included as well; if
+    recurse is an int, only subcategories to that depth will be included
+    (e.g., recurse=2 will get pages in subcats and sub-subcats, but will
+    not go any further).
+    If start is a string value, only pages whose sortkey comes after start
+    alphabetically are included.
+
+    ''' # TODO: page generator could be modified to use cmstartsortkey ...
+    for a in category.articles(recurse=recurse):
+        if start is None or a.title(withNamespace=False) >= start:
+            yield a
+
+
+def SubCategoriesPageGenerator(category, recurse=False, start=None):
+    '''Yields all subcategories in a specific category.
+
+    If recurse is True, pages in subcategories are included as well; if
+    recurse is an int, only subcategories to that depth will be included
+    (e.g., recurse=2 will get pages in subcats and sub-subcats, but will
+    not go any further).
+    If start is a string value, only categories whose sortkey comes after
+    start alphabetically are included.
+
+    ''' # TODO: page generator could be modified to use cmstartsortkey ...
+    for s in category.subcategories(recurse=recurse):
+        if start is None or s.title(withNamespace=False) >= start:
+            yield s
+
+
+def LinkedPageGenerator(linkingPage):
+    """Yields all pages linked from a specific page."""
+    return linkingPage.linkedPages()
+
+
+def TextfilePageGenerator(filename=None, site=None):
+    """Iterate pages from a list in a text file.
+
+    The file must contain page links between double-square-brackets.  The
+    generator will yield each corresponding Page object.
+
+    @param filename: the name of the file that should be read. If no name is
+        given, the generator prompts the user.
+    @param site: the default Site for which Page objects should be created
+
+    """
+    if filename is None:
+        filename = pywikibot.input(u'Please enter the filename:')
+    if site is None:
+        site = pywikibot.Site()
+    f = codecs.open(filename, 'r', config.textfile_encoding)
+    for linkmatch in pywikibot.link_regex.finditer(f.read()):
+        # If the link is in interwiki format, the Page object may reside
+        # on a different Site than the default.
+        # This makes it possible to work on different wikis using a single
+        # text file, but also could be dangerous because you might
+        # inadvertently change pages on another wiki!
+        yield pywikibot.Page(pywikibot.Link(linkmatch.groups("title"), site))
+    f.close()
+
+
+def PagesFromTitlesGenerator(iterable, site=None):
+    """Generate pages from the titles (unicode strings) yielded by iterable."""
+    if site is None:
+        site = pywikibot.Site()
+    for title in iterable:
+        if not isinstance(title, basestring):
+            break
+        yield pywikibot.Page(pywikibot.Link(title, site))
+
+
+def UserContributionsGenerator(username, number=250, namespaces=None,
+                               site=None):
+    """Yields number unique pages edited by user:username
+    namespaces : list of namespace numbers to fetch contribs from
+
+    """
+    if site is None:
+        site = pywikibot.Site()
+    return site.usercontribs(user=username, limit=number, namespaces=namespaces)
+
+
+def NamespaceFilterPageGenerator(generator, namespaces, site=None):
+    """
+    Wraps around another generator. Yields only those pages that are in one
+    of the given namespaces.
+
+    The namespace list can contain both integers (namespace numbers) and
+    strings/unicode strings (namespace names).
+
+    """
+    if site is None:
+        site = pywikibot.Site()
+    # convert namespace names to namespace numbers
+    for i in xrange(len(namespaces)):
+        ns = namespaces[i]
+        if isinstance(ns, basestring):
+            index = site.getNamespaceIndex(ns)
+            if index is None:
+                raise ValueError(u'Unknown namespace: %s' % ns)
+            namespaces[i] = index
+    for page in generator:
+        if page.namespace() in namespaces:
+            yield page
+
+
+def RedirectFilterPageGenerator(generator):
+    """Yields pages from another generator that are not redirects."""
+    for page in generator:
+        if not page.isRedirectPage():
+            yield page
+
+
+def DuplicateFilterPageGenerator(generator):
+    """Yield all unique pages from another generator, omitting duplicates."""
+    seenPages = {}
+    for page in generator:
+        if page not in seenPages:
+            seenPages[page] = None
+            yield page
+
+
+def RegexFilterPageGenerator(generator, regex):
+    """Yield pages from another generator whose titles match regex."""
+    reg = re.compile(regex, re.I)
+    for page in generator:
+        if reg.match(page.titleWithoutNamespace()):
+            yield page
+
+
+def CombinedPageGenerator(generators):
+    return itertools.chain(*generators)
+
+
+def CategoryGenerator(generator):
+    """Yield pages from another generator as Category objects.
+
+    Makes sense only if it is ascertained that only categories are being
+    retrieved.
+
+    """
+    for page in generator:
+        yield pywikibot.Category(page)
+
+
+def PageWithTalkPageGenerator(generator):
+    """
+    Wraps around another generator. Yields the same pages, but for non-talk
+    pages, it also includes associated talk pages.
+    This generator does not check if the talk page in fact exists.
+    """
+    for page in generator:
+        yield page
+        if not page.isTalkPage():
+            yield page.toggleTalkPage()
+
+
+def PreloadingGenerator(generator, pageNumber=60, lookahead=10):
+    """Yield preloaded pages taken from another generator."""
+
+    # pages may be on more than one site, for example if an interwiki
+    # generator is used, so use a separate preloader for each site
+    sites = {}
+    # build a list of pages for each site found in the iterator
+    for page in generator:
+        sites.setdefault(page.site(), []).append(page)
+    return itertools.chain(*(site.preloadpages(sites[site], pageNumber)
+                             for site in sites))
+
+
+#TODO below
+
+def UnusedFilesGenerator(number=100, repeat=False, site=None, extension=None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.unusedfiles(number=number, repeat=repeat,
+                                 extension=extension):
+        yield pywikibot.ImagePage(page.site(), page.title())
+
+def WithoutInterwikiPageGenerator(number=100, repeat=False, site=None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.withoutinterwiki(number=number, repeat=repeat):
+        yield page
+
+def UnCategorizedCategoryGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.uncategorizedcategories(number=number, repeat=repeat):
+        yield page
+
+def UnCategorizedImageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.uncategorizedimages(number=number, repeat=repeat):
+        yield page
+
+def NewimagesPageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.newimages(number, repeat=repeat):
+        yield page[0]
+
+def UnCategorizedPageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.uncategorizedpages(number=number, repeat=repeat):
+        yield page
+
+def LonelyPagesPageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.lonelypages(number=number, repeat=repeat):
+        yield page
+
+def UnwatchedPagesPageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.unwatchedpages(number=number, repeat=repeat):
+        yield page
+
+def AncientPagesPageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.ancientpages(number=number, repeat=repeat):
+        yield page[0]
+
+def DeadendPagesPageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.deadendpages(number=number, repeat=repeat):
+        yield page
+
+def LongPagesPageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.longpages(number=number, repeat=repeat):
+        yield page[0]
+
+def ShortPagesPageGenerator(number = 100, repeat = False, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.shortpages(number=number, repeat=repeat):
+        yield page[0]
+
+def LinksearchPageGenerator(link, step=500, site=None):
+    """Yields all pages that include a specified link, according to
+    [[Special:Linksearch]].
+
+    """
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.linksearch(link, limit=step):
+        yield page
+
+def SearchPageGenerator(query, number = 100, namespaces = None, site = None):
+    """
+    Provides a list of results using the internal MediaWiki search engine
+    """
+    if site is None:
+        site = pywikibot.Site()
+    for page in site.search(query, number=number, namespaces = namespaces):
+        yield page[0]
+
+class YahooSearchPageGenerator:
+    '''
+    To use this generator, install pYsearch
+    '''
+    def __init__(self, query = None, count = 100, site = None): # values larger than 100 fail
+        self.query = query or pywikibot.input(u'Please enter the search query:')
+        self.count = count
+        if site is None:
+            site = pywikibot.Site()
+        self.site = site
+
+    def queryYahoo(self, query):
+       from yahoo.search.web import WebSearch
+       srch = WebSearch(config.yahoo_appid, query=query, results=self.count)
+
+       dom = srch.get_results()
+       results = srch.parse_results(dom)
+       for res in results:
+           url = res.Url
+           yield url
+
+    def __iter__(self):
+        # restrict query to local site
+        localQuery = '%s site:%s' % (self.query, self.site.hostname())
+        base = 'http://%s%s' % (self.site.hostname(), self.site.nice_get_address(''))
+        for url in self.queryYahoo(localQuery):
+            if url[:len(base)] == base:
+                title = url[len(base):]
+                page = pywikibot.Page(self.site, title)
+                yield page
+
+class GoogleSearchPageGenerator:
+    '''
+    To use this generator, you must install the pyGoogle module from
+    http://pygoogle.sf.net/ and get a Google Web API license key from
+    http://www.google.com/apis/index.html . The google_key must be set to your
+    license key in your configuration.
+    '''
+    def __init__(self, query = None, site = None):
+        self.query = query or pywikibot.input(u'Please enter the search query:')
+        if site is None:
+            site = pywikibot.Site()
+        self.site = site
+
+    #########
+    # partially commented out because it is probably not in compliance with Google's "Terms of
+    # service" (see 5.3, http://www.google.com/accounts/TOS?loc=US)
+    def queryGoogle(self, query):
+        #if config.google_key:
+        if True:
+            #try:
+                for url in self.queryViaSoapApi(query):
+                    yield url
+                return
+            #except ImportError:
+                #pass
+        # No google license key, or pygoogle not installed. Do it the ugly way.
+        #for url in self.queryViaWeb(query):
+        #    yield url
+
+    def queryViaSoapApi(self, query):
+        import google
+        google.LICENSE_KEY = config.google_key
+        offset = 0
+        estimatedTotalResultsCount = None
+        while not estimatedTotalResultsCount or offset < estimatedTotalResultsCount:
+            while (True):
+                # Google often yields 502 errors.
+                try:
+                    pywikibot.output(u'Querying Google, offset %i' % offset)
+                    data = google.doGoogleSearch(query, start = offset, filter = False)
+                    break
+                except KeyboardInterrupt:
+                    raise
+                except:
+                    # SOAPpy.Errors.HTTPError or SOAP.HTTPError (502 Bad Gateway)
+                    # can happen here, depending on the module used. It's not easy
+                    # to catch this properly because pygoogle decides which one of
+                    # the soap modules to use.
+                    pywikibot.output(u"An error occured. Retrying in 10 seconds...")
+                    time.sleep(10)
+                    continue
+
+            for result in data.results:
+                #print 'DBG: ', result.URL
+                yield result.URL
+            # give an estimate of pages to work on, but only once.
+            if not estimatedTotalResultsCount:
+                pywikibot.output(u'Estimated total result count: %i pages.' % data.meta.estimatedTotalResultsCount)
+            estimatedTotalResultsCount = data.meta.estimatedTotalResultsCount
+            #print 'estimatedTotalResultsCount: ', estimatedTotalResultsCount
+            offset += 10
+
+    #########
+    # commented out because it is probably not in compliance with Google's "Terms of
+    # service" (see 5.3, http://www.google.com/accounts/TOS?loc=US)
+
+    #def queryViaWeb(self, query):
+        #"""
+        #Google has stopped giving out API license keys, and sooner or later
+        #they will probably shut down the service.
+        #This is a quick and ugly solution: we just grab the search results from
+        #the normal web interface.
+        #"""
+        #linkR = re.compile(r'<a href="([^>"]+?)" class=l>', re.IGNORECASE)
+        #offset = 0
+
+        #while True:
+            #pywikibot.output("Google: Querying page %d" % (offset / 100 + 1))
+            #address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" % (urllib.quote_plus(query), offset)
+            ## we fake being Firefox because Google blocks unknown browsers
+            #request = urllib2.Request(address, None, {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8) Gecko/20051128 SUSE/1.5-0.1 Firefox/1.5'})
+            #urlfile = urllib2.urlopen(request)
+            #page = urlfile.read()
+            #urlfile.close()
+            #for url in linkR.findall(page):
+                #yield url
+            #if "<div id=nn>" in page: # Is there a "Next" link for next page of results?
+                #offset += 100  # Yes, go to next page of results.
+            #else:
+                #return
+    #########
+
+    def __iter__(self):
+        # restrict query to local site
+        localQuery = '%s site:%s' % (self.query, self.site.hostname())
+        base = 'http://%s%s' % (self.site.hostname(), self.site.nice_get_address(''))
+        for url in self.queryGoogle(localQuery):
+            if url[:len(base)] == base:
+                title = url[len(base):]
+                page = pywikibot.Page(self.site, title)
+                # Google contains links in the format http://de.wikipedia.org/wiki/en:Foobar
+                if page.site() == self.site:
+                    yield page
+
+def MySQLPageGenerator(query, site = None):
+    import MySQLdb as mysqldb
+    if site is None:
+        site = pywikibot.Site()
+    conn = mysqldb.connect(config.db_hostname, db = site.dbName(),
+                           user = config.db_username,
+                           passwd = config.db_password)
+    cursor = conn.cursor()
+    pywikibot.output(u'Executing query:\n%s' % query)
+    query = query.encode(site.encoding())
+    cursor.execute(query)
+    while True:
+        try:
+            namespaceNumber, pageName = cursor.fetchone()
+            print namespaceNumber, pageName
+        except TypeError:
+            # Limit reached or no more results
+            break
+        #print pageName
+        if pageName:
+            namespace = site.namespace(namespaceNumber)
+            pageName = unicode(pageName, site.encoding())
+            if namespace:
+                pageTitle = '%s:%s' % (namespace, pageName)
+            else:
+                pageTitle = pageName
+            page = pywikibot.Page(site, pageTitle)
+            yield page
+
+def YearPageGenerator(start = 1, end = 2050, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    pywikibot.output(u"Starting with year %i" % start)
+    for i in xrange(start, end + 1):
+        if i % 100 == 0:
+            pywikibot.output(u'Preparing %i...' % i)
+        # There is no year 0
+        if i != 0:
+            current_year = date.formatYear(site.lang, i )
+            yield pywikibot.Page(site, current_year)
+
+def DayPageGenerator(startMonth = 1, endMonth = 12, site = None):
+    if site is None:
+        site = pywikibot.Site()
+    fd = date.FormatDate(site)
+    firstPage = pywikibot.Page(site, fd(startMonth, 1))
+    pywikibot.output(u"Starting with %s" % firstPage.aslink())
+    for month in xrange(startMonth, endMonth+1):
+        for day in xrange(1, date.getNumberOfDaysInMonth(month)+1):
+            yield pywikibot.Page(site, fd(month, day))
+
+
+if __name__ == "__main__":
+    try:
+        gen = None
+        genFactory = GeneratorFactory()
+        for arg in pywikibot.handleArgs():
+            generator = genFactory.handleArg(arg)
+            if generator:
+                gen = generator
+        if gen:
+            for page in gen:
+                pywikibot.output(page.title(), toStdout = True)
+        else:
+            pywikibot.showHelp()
+    finally:
+        pywikibot.stopme()
Property changes on: branches/rewrite/pywikibot/pagegenerators.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Added: svn:eol-style
   + native
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py	2008-12-16 19:34:48 UTC (rev 6155)
+++ branches/rewrite/pywikibot/site.py	2008-12-16 19:40:20 UTC (rev 6156)
@@ -1,2861 +1,2861 @@
- # -*- coding: utf-8  -*-
-"""
-Objects representing MediaWiki sites (wikis) and families (groups of wikis
-on the same topic in different languages).
-"""
-#
-# (C) Pywikipedia bot team, 2008
-#
-# Distributed under the terms of the MIT license.
-#
-__version__ = '$Id: $'
-
-import pywikibot
-from pywikibot import deprecate_arg
-from pywikibot import config
-from pywikibot.throttle import Throttle
-from pywikibot.data import api
-from pywikibot.exceptions import *
-
-try:
-    from hashlib import md5
-except ImportError:
-    from md5 import md5
-import logging
-import os
-import re
-import sys
-import threading
-import urllib
-
-logger = logging.getLogger("wiki")
-
-class PageInUse(pywikibot.Error):
-    """Page cannot be reserved for writing due to existing lock."""
-
-
-def Family(fam=None, fatal=True):
-    """Import the named family.
-
-    @param fam: family name (if omitted, uses the configured default)
-    @type fam: str
-    @param fatal: if True, the bot will stop running if the given family is
-        unknown. If False, it will only raise a ValueError exception.
-    @param fatal: bool
-    @return: a Family instance configured for the named family.
-
-    """
-    if fam == None:
-        fam = config.family
-    try:
-        # first try the built-in families
-        exec "import pywikibot.families.%s_family as myfamily" % fam
-    except ImportError:
-        # next see if user has defined a local family module
-        try:
-            sys.path.append(config.datafilepath('families'))
-            exec "import %s_family as myfamily" % fam
-        except ImportError:
-            if fatal:
-                logger.exception(u"""\
-Error importing the %s family. This probably means the family
-does not exist. Also check your configuration file."""
-                           % fam)
-                sys.exit(1)
-            else:
-                raise Error("Family %s does not exist" % fam)
-    return myfamily.Family()
-
-
-class BaseSite(object):
-    """Site methods that are independent of the communication interface."""
-    # to implement a specific interface, define a Site class that inherits
-    # from this
-
-    def __init__(self, code, fam=None, user=None, sysop=None):
-        """
-        @param code: the site's language code
-        @type code: str
-        @param fam: wiki family name (optional)
-        @type fam: str or Family
-        @param user: bot user name (optional)
-        @type user: str
-        @param sysop: sysop account user name (optional)
-        @type sysop: str
-
-        """
-        self.__code = code.lower()
-        if isinstance(fam, basestring) or fam is None:
-            self.__family = Family(fam, fatal=False)
-        else:
-            self.__family = fam
-
-        # if we got an outdated language code, use the new one instead.
-        if self.__family.obsolete.has_key(self.__code):
-            if self.__family.obsolete[self.__code] is not None:
-                self.__code = self.__family.obsolete[self.__code]
-            else:
-                # no such language anymore
-                raise NoSuchSite("Language %s in family %s is obsolete"
-                                 % (self.__code, self.__family.name))
-        if self.__code not in self.languages():
-            if self.__code == 'zh-classic' and 'zh-classical' in self.languages():
-                self.__code = 'zh-classical'
-                # database hack (database is varchar[10] -> zh-classical
-                # is cut to zh-classic.
-            else:
-                raise NoSuchSite("Language %s does not exist in family %s"
-                                 % (self.__code, self.__family.name))
-
-        self._username = [user, sysop]
-
-        # following are for use with lock_page and unlock_page methods
-        self._pagemutex = threading.Lock()
-        self._locked_pages = []
-
-    @property
-    def throttle(self):
-        """Return this Site's throttle.  Initialize a new one if needed."""
-
-        if not hasattr(self, "_throttle"):
-            self._throttle = Throttle(self, multiplydelay=True,
-                                      verbosedelay=True)
-            try:
-                self.login(False)
-            except pywikibot.NoUsername:
-                pass
-        return self._throttle
-
-    @property
-    def family(self):
-        """The Family object for this Site's wiki family."""
-
-        return self.__family
-
-    @property
-    def code(self):
-        """The identifying code for this Site."""
-
-        return self.__code
-
-    @property
-    def lang(self):
-        """The ISO language code for this Site.
-
-        Presumed to be equal to the wiki prefix, but this can be overridden.
-
-        """
-        return self.__code
-
-    def __cmp__(self, other):
-        """Perform equality and inequality tests on Site objects."""
-
-        if not isinstance(other, BaseSite):
-            return 1
-        if self.family == other.family:
-            return cmp(self.code, other.code)
-        return cmp(self.family.name, other.family.name)
-
-    def user(self):
-        """Return the currently-logged in bot user, or None."""
-
-        if self.logged_in(True):
-            return self._username[True]
-        elif self.logged_in(False):
-            return self._username[False]
-        return None
-
-    def username(self, sysop = False):
-        return self._username[sysop]
-
-    def __getattr__(self, attr):
-        """Calls to methods not defined in this object are passed to Family."""
-
-        if hasattr(self.__class__, attr):
-            return self.__class__.attr
-        try:
-            method = getattr(self.family, attr)
-            f = lambda *args, **kwargs: \
-                       method(self.code, *args, **kwargs)
-            if hasattr(method, "__doc__"):
-                f.__doc__ = method.__doc__
-            return f
-        except AttributeError:
-            raise AttributeError("%s instance has no attribute '%s'"
-                                 % (self.__class__.__name__, attr)  )
-
-    def sitename(self):
-        """Return string representing this Site's name and language."""
-
-        return self.family.name+':'+self.code
-
-    __str__ = sitename
-
-    def __repr__(self):
-        return 'Site("%s", "%s")' % (self.code, self.family.name)
-
-    def __hash__(self):
-        return hash(repr(self))
-
-    def linktrail(self):
-        """Return regex for trailing chars displayed as part of a link.
-
-        Returns a string, not a compiled regular expression object.
-
-        This reads from the family file, and ''not'' from
-        [[MediaWiki:Linktrail]], because the MW software currently uses a
-        built-in linktrail from its message files and ignores the wiki
-        value.
-
-        """
-        return self.family.linktrail(self.code)
-
-    def languages(self):
-        """Return list of all valid language codes for this site's Family."""
-
-        return self.family.langs.keys()
-
-    def validLanguageLinks(self):
-        """Return list of language codes that can be used in interwiki links."""
-
-        nsnames = sum(self.namespaces().values(), [])
-        return [l for l in self.languages()
-                  if l[:1].upper() + l[1:] not in self.namespaces()]
-
-    def ns_index(self, namespace):
-        """Given a namespace name, return its int index, or None if invalid."""
-
-        for ns in self.namespaces():
-            if namespace.lower() in [name.lower()
-                                     for name in self.namespaces()[ns]]:
-                return ns
-        return None
-
-    getNamespaceIndex = ns_index  # for backwards-compatibility
-
-    def namespaces(self):
-        """Return dict of valid namespaces on this wiki."""
-
-        return self._namespaces
-
-    def ns_normalize(self, value):
-        """Return canonical local form of namespace name.
-
-        @param value: A namespace name
-        @type value: unicode
-
-        """
-        index = self.ns_index(value)
-        return self.namespace(index)
-
-    normalizeNamespace = ns_normalize  # for backwards-compatibility
-
-    def redirect(self, default=True):
-        """Return the localized redirect tag for the site.
-
-        If default is True, falls back to 'REDIRECT' if the site has no
-        special redirect tag.
-
-        """
-        if default:
-            return self.family.redirect.get(self.code, [u"REDIRECT"])[0]
-        else:
-            return self.family.redirect.get(self.code, None)
-
-    def lock_page(self, page, block=True):
-        """Lock page for writing.  Must be called before writing any page.
-
-        We don't want different threads trying to write to the same page
-        at the same time, even to different sections.
-
-        @param page: the page to be locked
-        @type page: pywikibot.Page
-        @param block: if true, wait until the page is available to be locked;
-            otherwise, raise an exception if page can't be locked
-
-        """
-        self._pagemutex.acquire()
-        try:
-            while page in self._locked_pages:
-                if not block:
-                    raise PageInUse
-                time.sleep(.25)
-            self._locked_pages.append(page.title(withSection=False))
-        finally:
-            self._pagemutex.release()
-
-    def unlock_page(self, page):
-        """Unlock page.  Call as soon as a write operation has completed.
-
-        @param page: the page to be locked
-        @type page: pywikibot.Page
-
-        """
-        self._pagemutex.acquire()
-        try:
-            self._locked_pages.remove(page.title(withSection=False))
-        finally:
-            self._pagemutex.release()
-
-    def disambcategory(self):
-        """Return Category in which disambig pages are listed."""
-
-        try:
-            name = self.namespace(14)+':'+self.family.disambcatname[self.code]
-        except KeyError:
-            raise Error(u"No disambiguation category name found for %(site)s"
-                         % {'site': self})
-        return pywikibot.Category(pywikibot.Link(name, self))
-
-    def linkto(self, title, othersite = None):
-        """Return unicode string in the form of a wikilink to 'title'
-
-        Use optional Site argument 'othersite' to generate an interwiki link.
-
-        """
-        logger.debug("Site.linkto() method is deprecated; use pywikibot.Link")
-        return pywikibot.Link(title, self).astext(othersite)
-
-    def isInterwikiLink(self, s):
-        """Return True if s is in the form of an interwiki link.
-
-        If a link object constructed using "s" as the link text parses as
-        belonging to a different site, this method returns True.
-
-        """
-        return (pywikibot.Link(s, self).site != self)
-
-    def redirectRegex(self):
-        """Return a compiled regular expression matching on redirect pages.
-
-        Group 1 in the regex match object will be the target title.
-
-        """
-        #TODO: is this needed, since the API identifies redirects?
-        #      (maybe, the API can give false positives)
-        default = 'REDIRECT'
-        try:
-            keywords = set(self.family.redirect[self.code])
-            keywords.add(default)
-            pattern = r'(?:' + '|'.join(keywords) + ')'
-        except KeyError:
-            # no localized keyword for redirects
-            pattern = r'%s' % default
-        # A redirect starts with hash (#), followed by a keyword, then
-        # arbitrary stuff, then a wikilink. The wikilink may contain
-        # a label, although this is not useful.
-        return re.compile(r'\s*#%(pattern)s\s*:?\s*[[(.+?)(?:|.*?)?]]'
-                           % locals(),
-                          re.IGNORECASE | re.UNICODE | re.DOTALL)
-
-    # namespace shortcuts for backwards-compatibility
-
-    def special_namespace(self):
-        return self.namespace(-1)
-
-    def image_namespace(self):
-        return self.namespace(6)
-
-    def mediawiki_namespace(self):
-        return self.namespace(8)
-
-    def template_namespace(self):
-        return self.namespace(10)
-
-    def category_namespace(self):
-        return self.namespace(14)
-
-    def category_namespaces(self):
-        return self.namespace(14, all=True)
-
-    # site-specific formatting preferences
-
-    def category_on_one_line(self):
-        """Return True if this site wants all category links on one line."""
-
-        return self.code in self.family.category_on_one_line
-
-    def interwiki_putfirst(self):
-        """Return list of language codes for ordering of interwiki links."""
-
-        return self.family.interwiki_putfirst.get(self.code, None)
-
-    def interwiki_putfirst_doubled(self, list_of_links):
-        # TODO: is this even needed?  No family in the framework has this
-        # dictionary defined!
-        if self.lang in self.family.interwiki_putfirst_doubled:
-            if len(list_of_links) >= \
-                        self.family.interwiki_putfirst_doubled[self.lang][0]:
-                links2 = [lang.language() for lang in list_of_links]
-                result = []
-                for lang in self.family.interwiki_putfirst_doubled[self.lang][1]:
-                    try:
-                        result.append(list_of_links[links2.index(lang)])
-                    except ValueError:
-                        pass
-                return result
-            else:
-                return False
-        else:
-            return False
-
-    def getSite(self, code):
-        """Return Site object for language 'code' in this Family."""
-
-        return pywikibot.Site(code=code, fam=self.family, user=self.user)
-
-    # deprecated methods for backwards-compatibility
-
-    def fam(self):
-        """Return Family object for this Site."""
-        return self.family
-
-    def urlEncode(self, query):
-        """DEPRECATED"""
-        return urllib.urlencode(query)
-
-    def getUrl(self, path, retry=True, sysop=False, data=None,
-               compress=True, no_hostname=False, cookie_only=False):
-        """DEPRECATED.
-
-        Retained for compatibility only. All arguments except path and data
-        are ignored.
-
-        """
-        if data:
-            if not isinstance(data, basestring):
-                data = urllib.urlencode(data)
-            return pywikibot.comms.data.request(self, path, method="PUT",
-                                                body=data)
-        else:
-            return pywikibot.comms.data.request(self, path)
-
-    def postForm(self, address, predata, sysop=False, cookies=None):
-        """DEPRECATED"""
-        return self.getUrl(address, data=predata)
-
-    def postData(self, address, data, contentType=None, sysop=False,
-                 compress=True, cookies=None):
-        """DEPRECATED"""
-        return self.getUrl(address, data=data)
-
-    # unsupported methods from version 1
-
-    def checkCharset(self, charset):
-        raise NotImplementedError
-    def getToken(self, getalways=True, getagain=False, sysop=False):
-        raise NotImplementedError
-    def export_address(self):
-        raise NotImplementedError
-    def move_address(self):
-        raise NotImplementedError
-    def delete_address(self, s):
-        raise NotImplementedError
-    def undelete_view_address(self, s, ts=''):
-        raise NotImplementedError
-    def undelete_address(self):
-        raise NotImplementedError
-    def protect_address(self, s):
-        raise NotImplementedError
-    def unprotect_address(self, s):
-        raise NotImplementedError
-    def put_address(self, s):
-        raise NotImplementedError
-    def get_address(self, s):
-        raise NotImplementedError
-    def nice_get_address(self, s):
-        raise NotImplementedError
-    def edit_address(self, s):
-        raise NotImplementedError
-    def purge_address(self, s):
-        raise NotImplementedError
-    def block_address(self):
-        raise NotImplementedError
-    def unblock_address(self):
-        raise NotImplementedError
-    def blocksearch_address(self, s):
-        raise NotImplementedError
-    def linksearch_address(self, s, limit=500, offset=0):
-        raise NotImplementedError
-    def search_address(self, q, n=50, ns=0):
-        raise NotImplementedError
-    def allpages_address(self, s, ns = 0):
-        raise NotImplementedError
-    def log_address(self, n=50, mode = ''):
-        raise NotImplementedError
-    def newpages_address(self, n=50):
-        raise NotImplementedError
-    def longpages_address(self, n=500):
-        raise NotImplementedError
-    def shortpages_address(self, n=500):
-        raise NotImplementedError
-    def unusedfiles_address(self, n=500):
-        raise NotImplementedError
-    def categories_address(self, n=500):
-        raise NotImplementedError
-    def deadendpages_address(self, n=500):
-        raise NotImplementedError
-    def ancientpages_address(self, n=500):
-        raise NotImplementedError
-    def lonelypages_address(self, n=500):
-        raise NotImplementedError
-    def protectedpages_address(self, n=500):
-        raise NotImplementedError
-    def unwatchedpages_address(self, n=500):
-        raise NotImplementedError
-    def uncategorizedcategories_address(self, n=500):
-        raise NotImplementedError
-    def uncategorizedimages_address(self, n=500):
-        raise NotImplementedError
-    def uncategorizedpages_address(self, n=500):
-        raise NotImplementedError
-    def unusedcategories_address(self, n=500):
-        raise NotImplementedError
-    def withoutinterwiki_address(self, n=500):
-        raise NotImplementedError
-    def references_address(self, s):
-        raise NotImplementedError
-    def allmessages_address(self):
-        raise NotImplementedError
-    def upload_address(self):
-        raise NotImplementedError
-    def double_redirects_address(self, default_limit = True):
-        raise NotImplementedError
-    def broken_redirects_address(self, default_limit = True):
-        raise NotImplementedError
-    def login_address(self):
-        raise NotImplementedError
-    def captcha_image_address(self, id):
-        raise NotImplementedError
-    def watchlist_address(self):
-        raise NotImplementedError
-    def contribs_address(self, target, limit=500, offset=''):
-        raise NotImplementedError
-
-
-class APISite(BaseSite):
-    """API interface to MediaWiki site.
-
-    Do not use directly; use pywikibot.Site function.
-
-    """
-##    Site methods from version 1.0 (as these are implemented in this file,
-##     or declared deprecated/obsolete, they will be removed from this list)
-##########
-##    cookies: return user's cookies as a string
-##
-##    urlEncode: Encode a query to be sent using an http POST request.
-##    postForm: Post form data to an address at this site.
-##    postData: Post encoded form data to an http address at this site.
-##
-##    shared_image_repository: Return tuple of image repositories used by this
-##        site.
-##    version: Return MediaWiki version string from Family file.
-##    versionnumber: Return int identifying the MediaWiki version.
-##    live_version: Return version number read from Special:Version.
-##    checkCharset(charset): Warn if charset doesn't match family file.
-##
-##    linktrail: Return regex for trailing chars displayed as part of a link.
-##    disambcategory: Category in which disambiguation pages are listed.
-##
-##    Methods that yield Page objects derived from a wiki's Special: pages
-##    (note, some methods yield other information in a tuple along with the
-##    Pages; see method docs for details) --
-##
-##        newpages(): Special:Newpages
-##        newimages(): Special:Log&type=upload
-##        longpages(): Special:Longpages
-##        shortpages(): Special:Shortpages
-##        deadendpages(): Special:Deadendpages
-##        ancientpages(): Special:Ancientpages
-##        lonelypages(): Special:Lonelypages
-##        unwatchedpages(): Special:Unwatchedpages (sysop accounts only)
-##        uncategorizedcategories(): Special:Uncategorizedcategories (yields
-##            Category objects)
-##        uncategorizedpages(): Special:Uncategorizedpages
-##        uncategorizedimages(): Special:Uncategorizedimages (yields
-##            ImagePage objects)
-##        unusedcategories(): Special:Unusuedcategories (yields Category)
-##        unusedfiles(): Special:Unusedimages (yields ImagePage)
-##        withoutinterwiki: Special:Withoutinterwiki
-##        linksearch: Special:Linksearch
-
-    def __init__(self, code, fam=None, user=None, sysop=None):
-        BaseSite.__init__(self, code, fam, user, sysop)
-        self._namespaces = {
-            # these are the MediaWiki built-in names, which always work
-            # localized names are loaded later upon accessing the wiki
-            # namespace prefixes are always case-insensitive, but the
-            # canonical forms are capitalized
-            -2: [u"Media"],
-            -1: [u"Special"],
-             0: [u""],
-             1: [u"Talk"],
-             2: [u"User"],
-             3: [u"User talk"],
-             4: [u"Project"],
-             5: [u"Project talk"],
-             6: [u"Image"],
-             7: [u"Image talk"],
-             8: [u"MediaWiki"],
-             9: [u"MediaWiki talk"],
-            10: [u"Template"],
-            11: [u"Template talk"],
-            12: [u"Help"],
-            13: [u"Help talk"],
-            14: [u"Category"],
-            15: [u"Category talk"],
-            }
-        self.sitelock = threading.Lock()
-        self._msgcache = {}
-        return
-
-# ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__()
-        self.nocapitalize = self.__code in self.family.nocapitalize
-        # Calculating valid languages took quite long, so we calculate it once
-        # in initialization instead of each time it is used.
-        self._validlanguages = []
-        for language in self.languages():
-            if not language[:1].upper() + language[1:] in self.namespaces():
-                self._validlanguages.append(language)
-
-    def logged_in(self, sysop=False):
-        """Return True if logged in with specified privileges, otherwise False.
-
-        @param sysop: if True, require sysop privileges.
-
-        """
-        if self.userinfo['name'] != self._username[sysop]:
-            return False
-        return (not sysop) or 'sysop' in self.userinfo['groups']
-
-    def loggedInAs(self, sysop = False):
-        """Return the current username if logged in, otherwise return None.
-
-        DEPRECATED (use .user() method instead)
-
-        """
-        logger.debug("Site.loggedInAs() method is deprecated.")
-        return self.logged_in(sysop) and self.user()
-
-    def login(self, sysop=False):
-        """Log the user in if not already logged in."""
-        if not hasattr(self, "_siteinfo"):
-            self._getsiteinfo()
-        # check whether a login cookie already exists for this user
-        if hasattr(self, "_userinfo"):
-            if self.userinfo['name'] == self._username[sysop]:
-                return
-        if not self.logged_in(sysop):
-            loginMan = api.LoginManager(site=self, sysop=sysop,
-                                        user=self._username[sysop])
-            if loginMan.login(retry = True):
-                self._username[sysop] = loginMan.username
-                if hasattr(self, "_userinfo"):
-                    del self._userinfo
-                self.getuserinfo()
-
-    forceLogin = login  # alias for backward-compatibility
-
-    def getuserinfo(self):
-        """Retrieve userinfo from site and store in _userinfo attribute.
-
-        self._userinfo will be a dict with the following keys and values:
-
-          - id: user id (numeric str)
-          - name: username (if user is logged in)
-          - anon: present if user is not logged in
-          - groups: list of groups (could be empty)
-          - rights: list of rights (could be empty)
-          - message: present if user has a new message on talk page
-          - blockinfo: present if user is blocked (dict)
-
-        """
-        if (not hasattr(self, "_userinfo")
-                or "rights" not in self._userinfo
-                or self._userinfo['name']
-                   != self._username["sysop" in self._userinfo["groups"]]):
-            uirequest = api.Request(
-                                site=self,
-                                action="query",
-                                meta="userinfo",
-                                uiprop="blockinfo|hasmsg|groups|rights"
-                            )
-            uidata = uirequest.submit()
-            assert 'query' in uidata, \
-                   "API userinfo response lacks 'query' key"
-            assert 'userinfo' in uidata['query'], \
-                   "API userinfo response lacks 'userinfo' key"
-            self._userinfo = uidata['query']['userinfo']
-        return self._userinfo
-
-    userinfo = property(fget=getuserinfo, doc=getuserinfo.__doc__)
-
-    def is_blocked(self, sysop=False):
-        """Return true if and only if user is blocked.
-
-        @param sysop: If true, log in to sysop account (if available)
-
-        """
-        if not self.logged_in(sysop):
-            self.login(sysop)
-        return 'blockinfo' in self._userinfo
-
-    def isBlocked(self, sysop=False):
-        """Deprecated synonym for is_blocked"""
-        logger.debug(
-            "Site method 'isBlocked' should be changed to 'is_blocked'")
-        return self.is_blocked(sysop)
-
-    def checkBlocks(self, sysop = False):
-        """Check if the user is blocked, and raise an exception if so."""
-        if self.is_blocked(sysop):
-            # User blocked
-            raise UserBlocked('User is blocked in site %s' % self)
-
-    def has_right(self, right, sysop=False):
-        """Return true if and only if the user has a specific right.
-
-        Possible values of 'right' may vary depending on wiki settings,
-        but will usually include:
-
-        * Actions: edit, move, delete, protect, upload
-        * User levels: autoconfirmed, sysop, bot
-
-        """
-        if not self.logged_in(sysop):
-            self.login(sysop)
-        return right.lower() in self._userinfo['rights']
-
-    def isAllowed(self, right, sysop=False):
-        """Deprecated; retained for backwards-compatibility"""
-        logger.debug("Site.isAllowed() method is deprecated; use has_right()")
-        return self.has_right(right, sysop)
-
-    def has_group(self, group, sysop=False):
-        """Return true if and only if the user is a member of specified group.
-
-        Possible values of 'group' may vary depending on wiki settings,
-        but will usually include bot.
-
-        """
-        if not self.logged_in(sysop):
-            self.login(sysop)
-        return group.lower() in self._userinfo['groups']
-
-    def messages(self, sysop=False):
-        """Returns true if the user has new messages, and false otherwise."""
-        if not self.logged_in(sysop):
-            self.login(sysop)
-        return 'hasmsg' in self._userinfo
-
-    def mediawiki_message(self, key):
-        """Return the MediaWiki message text for key "key" """
-        if not key in self._msgcache:
-            msg_query = api.QueryGenerator(site=self, meta="allmessages",
-                                           amfilter=key)
-            for msg in msg_query:
-                if msg['name'] == key and not 'missing' in msg:
-                    self._msgcache[key] = msg['*']
-                    break
-            else:
-                raise KeyError("Site %(self)s has no message '%(key)s'"
-                               % locals())
-        return self._msgcache[key]
-
-    def has_mediawiki_message(self, key):
-        """Return True iff this site defines a MediaWiki message for 'key'."""
-        try:
-            v = self.mediawiki_message(key)
-            return True
-        except KeyError:
-            return False
-
-    def getcurrenttimestamp(self):
-        """Return (Mediawiki) timestamp, {{CURRENTTIMESTAMP}}, the server time.
-
-        Format is yyyymmddhhmmss
-
-        """
-        r = api.Request(site=self,
-                        action="parse",
-                        text="{{CURRENTTIMESTAMP}}")
-        result = r.submit()
-        return re.search('\d+', result['parse']['text']['*']).group()
-
-    def _getsiteinfo(self):
-        """Retrieve siteinfo and namespaces from site."""
-        sirequest = api.Request(
-                            site=self,
-                            action="query",
-                            meta="siteinfo",
-                            siprop="general|namespaces|namespacealiases"
-                        )
-        try:
-            sidata = sirequest.submit()
-        except api.APIError:
-            # hack for older sites that don't support 1.12 properties
-            # probably should delete if we're not going to support pre-1.12
-            sirequest = api.Request(
-                                site=self,
-                                action="query",
-                                meta="siteinfo",
-                                siprop="general|namespaces"
-                            )
-            sidata = sirequest.submit()
-
-        assert 'query' in sidata, \
-               "API siteinfo response lacks 'query' key"
-        sidata = sidata['query']
-        assert 'general' in sidata, \
-               "API siteinfo response lacks 'general' key"
-        assert 'namespaces' in sidata, \
-               "API siteinfo response lacks 'namespaces' key"
-        self._siteinfo = sidata['general']
-        nsdata = sidata['namespaces']
-        for nskey in nsdata:
-            ns = int(nskey)
-            if ns in self._namespaces:
-                if nsdata[nskey]["*"] in self._namespaces[ns]:
-                    continue
-                # this is the preferred form so it goes at front of list
-                self._namespaces[ns].insert(0, nsdata[nskey]["*"])
-            else:
-                self._namespaces[ns] = [nsdata[nskey]["*"]]
-        if 'namespacealiases' in sidata:
-            aliasdata = sidata['namespacealiases']
-            for item in aliasdata:
-                if item["*"] in self._namespaces[int(item['id'])]:
-                    continue
-                # this is a less preferred form so it goes at the end
-                self._namespaces[int(item['id'])].append(item["*"])
-
-    @property
-    def siteinfo(self):
-        """Site information dict."""
-
-        if not hasattr(self, "_siteinfo"):
-            self._getsiteinfo()
-        return self._siteinfo
-
-    def case(self):
-        """Return this site's capitalization rule."""
-
-        return self.siteinfo['case']
-
-    def language(self):
-        """Return the code for the language of this Site."""
-
-        return self.siteinfo['lang']
-
-    lang = property(fget=language, doc=language.__doc__)
-
-    def namespaces(self):
-        """Return dict of valid namespaces on this wiki."""
-
-        if not hasattr(self, "_siteinfo"):
-            self._getsiteinfo()
-        return self._namespaces
-
-    def namespace(self, num, all=False):
-        """Return string containing local name of namespace 'num'.
-
-        If optional argument 'all' is true, return a list of all recognized
-        values for this namespace.
-
-        """
-        if all:
-            return self.namespaces()[num]
-        return self.namespaces()[num][0]
-
-    def live_version(self):
-        """Return the 'real' version number found on [[Special:Version]]
-
-        Return value is a tuple (int, int, str) of the major and minor
-        version numbers and any other text contained in the version.
-
-        """
-        versionstring = self.siteinfo['generator']
-        m = re.match(r"^MediaWiki ([0-9]+).([0-9]+)(.*)$", versionstring)
-        if m:
-            return (int(m.group(1)), int(m.group(2)), m.group(3))
-        else:
-            return None
-
-    def loadpageinfo(self, page):
-        """Load page info from api and save in page attributes"""
-        title = page.title(withSection=False)
-        query = api.PropertyGenerator("info", site=self,
-                                      titles=title.encode(self.encoding()),
-                                      inprop="protection")
-        for pageitem in query:
-            if pageitem['title'] != title:
-                raise Error(
-                    u"loadpageinfo: Query on %s returned data on '%s'"
-                    % (page, pageitem['title']))
-            api.update_page(page, pageitem)
-
-    def loadimageinfo(self, page, history=False):
-        """Load image info from api and save in page attributes
-
-        @param history: if true, return the image's version history
-
-        """
-        title = page.title(withSection=False)
-        query = api.PropertyGenerator("imageinfo", site=self,
-                                      titles=title.encode(self.encoding()),
-                                      iiprop=["timestamp", "user", "comment",
-                                              "url", "size", "sha1", "mime",
-                                              "metadata", "archivename"])
-        if history:
-            query.request["iilimit"] = "max"
-        for pageitem in query:
-            if pageitem['title'] != title:
-                raise Error(
-                    u"loadpageinfo: Query on %s returned data on '%s'"
-                    % (page, pageitem['title']))
-            api.update_page(page, pageitem)
-            if history:
-                return pageitem['imageinfo']
-
-    def page_exists(self, page):
-        """Return True if and only if page is an existing page on site."""
-        if not hasattr(page, "_pageid"):
-            self.loadpageinfo(page)
-        return page._pageid > 0
-
-    def page_restrictions(self, page):
-        """Returns a dictionary reflecting page protections"""
-        if not self.page_exists(page):
-            raise NoPage(u'No page %s.' % page)
-        if not hasattr(page, "_protection"):
-            self.loadpageinfo(page)
-        return page._protection
-
-    def page_can_be_edited(self, page):
-        """
-        Returns True if and only if:
-          - page is unprotected, and bot has an account for this site, or
-          - page is protected, and bot has a sysop account for this site.
-
-        """
-        rest = self.page_restrictions(page)
-        sysop_protected = rest.has_key('edit') and rest['edit'][0] == 'sysop'
-        try:
-            api.LoginManager(site=self, sysop=sysop_protected)
-        except NoUsername:
-            return False
-        return True
-
-    def page_isredirect(self, page):
-        """Return True if and only if page is a redirect."""
-        if not hasattr(page, "_redir"):
-            self.loadpageinfo(page)
-        return bool(page._redir)
-
-    def getredirtarget(self, page):
-        """Return Page object for the redirect target of page."""
-        if not hasattr(page, "_redir"):
-            self.loadpageinfo(page)
-        if not page._redir:
-            raise pywikibot.IsNotRedirectPage(page.title())
-        title = page.title(withSection=False)
-        query = api.Request(site=self, action="query", property="info",
-                            inprop="protection|talkid|subjectid",
-                            titles=title.encode(self.encoding()),
-                            redirects="")
-        result = query.submit()
-        if "query" not in result or "redirects" not in result["query"]:
-            raise RuntimeError(
-                "getredirtarget: No 'redirects' found for page %s."
-                % title)
-        redirmap = dict((item['from'], item['to'])
-                            for item in result['query']['redirects'])
-        if title not in redirmap:
-            raise RuntimeError(
-                "getredirtarget: 'redirects' contains no key for page %s."
-                % title)
-        if "pages" not in result['query']:
-            # no "pages" element indicates a circular redirect
-            raise pywikibot.CircularRedirect(redirmap[title])
-        for pagedata in result['query']['pages'].values():
-            # there should be only one value in 'pages', and it is the target
-            if pagedata['title'] not in redirmap.values():
-                raise RuntimeError(
-                    "getredirtarget: target page '%s' not found in 'redirects'"
-                    % pagedata['title'])
-            target = pywikibot.Page(self, pagedata['title'], pagedata['ns'])
-            api.update_page(target, pagedata)
-            page._redir = target
-
-    def preloadpages(self, pagelist, groupsize=60):
-        """Return a generator to a list of preloaded pages.
-
-        Note that [at least in current implementation] pages may be iterated
-        in a different order than in the underlying pagelist.
-
-        @param pagelist: an iterable that returns Page objects
-        @param groupsize: how many Pages to query at a time
-        @type groupsize: int
-
-        """
-        from pywikibot.tools import itergroup
-        for sublist in itergroup(pagelist, groupsize):
-            pageids = [str(p._pageid) for p in sublist
-                                      if hasattr(p, "_pageid")
-                                         and p._pageid > 0]
-            cache = dict((p.title(withSection=False), p) for p in sublist)
-            rvgen = api.PropertyGenerator("revisions|info", site=self)
-            rvgen.limit = -1
-            if len(pageids) == len(sublist):
-                # only use pageids if all pages have them
-                rvgen.request["pageids"] = "|".join(pageids)
-            else:
-                rvgen.request["titles"] = "|".join(cache.keys())
-            rvgen.request[u"rvprop"] = \
-                    u"ids|flags|timestamp|user|comment|content"
-            logger.info(u"Retrieving %s pages from %s."
-                           % (len(cache), self)
-                        )
-            for pagedata in rvgen:
-                logger.debug("Preloading %s" % pagedata)
-                try:
-                    if pagedata['title'] not in cache:
-                        raise Error(
-                        u"preloadpages: Query returned unexpected title '%s'"
-                             % pagedata['title']
-                        )
-                except KeyError:
-                    logger.debug("No 'title' in %s" % pagedata)
-                    logger.debug("pageids=%s" % pageids)
-                    logger.debug("titles=%s" % cache.keys())
-                    continue
-                page = cache[pagedata['title']]
-                api.update_page(page, pagedata)
-                yield page
-
-    def token(self, page, tokentype):
-        """Return token retrieved from wiki to allow changing page content.
-
-        @param page: the Page for which a token should be retrieved
-        @param tokentype: the type of token (e.g., "edit", "move", "delete");
-            see API documentation for full list of types
-
-        """
-        query = api.PropertyGenerator("info|revisions", site=self,
-                                      titles=page.title(withSection=False),
-                                      intoken=tokentype)
-        for item in query:
-            if item['title'] != page.title(withSection=False):
-                raise Error(
-                    u"token: Query on page %s returned data on page [[%s]]"
-                     % (page.title(withSection=False, asLink=True),
-                        item['title']))
-            api.update_page(page, item)
-            logging.debug(str(item))
-            return item[tokentype + "token"]
-
-    # following group of methods map more-or-less directly to API queries
-
-    def pagebacklinks(self, page, followRedirects=False, filterRedirects=None,
-                      namespaces=None):
-        """Iterate all pages that link to the given page.
-
-        @param page: The Page to get links to.
-        @param followRedirects: Also return links to redirects pointing to
-            the given page.
-        @param filterRedirects: If True, only return redirects to the given
-            page. If False, only return non-redirect links. If None, return
-            both (no filtering).
-        @param namespaces: If present, only return links from the namespaces
-            in this list.
-
-        """
-        bltitle = page.title(withSection=False).encode(self.encoding())
-        blgen = api.PageGenerator("backlinks", gbltitle=bltitle, site=self)
-        if isinstance(namespaces, list):
-            blgen.request["gblnamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            blgen.request["gblnamespace"] = str(namespaces)
-        if filterRedirects is not None:
-            blgen.request["gblfilterredir"] = filterRedirects and "redirects"\
-                                                              or "nonredirects"
-        if followRedirects:
-            # bug: see http://bugzilla.wikimedia.org/show_bug.cgi?id=16218
-            # links identified by MediaWiki as redirects may not really be,
-            # so we have to check each "redirect" page and see if it
-            # really redirects to this page
-            blgen.request["gblfilterredir"] = "nonredirects"
-            redirgen = api.PageGenerator("backlinks", gbltitle=bltitle,
-                                         site=self, gblfilterredir="redirects")
-            if "gblnamespace" in blgen.request:
-                redirgen.request["gblnamespace"] = blgen.request["gblnamespace"]
-            genlist = [blgen]
-            for redir in redirgen:
-                if redir.getRedirectTarget() == page:
-                    genlist.append(
-                        self.pagebacklinks(
-                            redir, True, None, namespaces))
-            import itertools
-            return itertools.chain(*genlist)
-        return blgen
-
-    def page_embeddedin(self, page, filterRedirects=None, namespaces=None):
-        """Iterate all pages that embedded the given page as a template.
-
-        @param page: The Page to get inclusions for.
-        @param filterRedirects: If True, only return redirects that embed
-            the given page. If False, only return non-redirect links. If
-            None, return both (no filtering).
-        @param namespaces: If present, only return links from the namespaces
-            in this list.
-
-        """
-        eititle = page.title(withSection=False).encode(self.encoding())
-        eigen = api.PageGenerator("embeddedin", geititle=eititle, site=self)
-        if isinstance(namespaces, list):
-            eigen.request["geinamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            eigen.request["geinamespace"] = str(namespaces)
-        if filterRedirects is not None:
-            eigen.request["geifilterredir"] = filterRedirects and "redirects"\
-                                                              or "nonredirects"
-        return eigen
-
-    def pagereferences(self, page, followRedirects=False, filterRedirects=None,
-                       withTemplateInclusion=True, onlyTemplateInclusion=False,
-                       namespaces=None):
-        """Convenience method combining pagebacklinks and page_embeddedin."""
-
-        if onlyTemplateInclusion:
-            return self.page_embeddedin(page, namespaces=namespaces)
-        if not withTemplateInclusion:
-            return self.pagebacklinks(page, followRedirects,
-                                      namespaces=namespaces)
-        import itertools
-        return itertools.chain(
-                   self.pagebacklinks(page, followRedirects,
-                                      filterRedirects, namespaces=namespaces),
-                   self.page_embeddedin(page, filterRedirects,
-                                        namespaces=namespaces)
-               )
-
-    def pagelinks(self, page, namespaces=None, follow_redirects=False,
-                  limit=None):
-        """Iterate internal wikilinks contained (or transcluded) on page.
-
-        @param namespaces: Only iterate pages in these namespaces (default: all)
-        @type namespaces: list of ints
-        @param follow_redirects: if True, yields the target of any redirects,
-            rather than the redirect page
-
-        """
-        plgen = api.PageGenerator("links", site=self)
-        if isinstance(limit, int):
-            plgen.limit = limit
-        if hasattr(page, "_pageid"):
-            plgen.request['pageids'] = str(page._pageid)
-        else:
-            pltitle = page.title(withSection=False).encode(self.encoding())
-            plgen.request['titles'] = pltitle
-        if follow_redirects:
-            plgen.request['redirects'] = ''
-        if isinstance(namespaces, list):
-            plgen.request["gplnamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            plgen.request["gplnamespace"] = str(namespaces)
-        return plgen
-
-    @deprecate_arg("withSortKey", None) # Sortkey doesn't work with generator
-    def pagecategories(self, page, withSortKey=None):
-        """Iterate categories to which page belongs."""
-
-        clgen = api.CategoryPageGenerator("categories", site=self)
-        if hasattr(page, "_pageid"):
-            clgen.request['pageids'] = str(page._pageid)
-        else:
-            cltitle = page.title(withSection=False).encode(self.encoding())
-            clgen.request['titles'] = cltitle
-        return clgen
-
-    def pageimages(self, page):
-        """Iterate images used (not just linked) on the page."""
-
-        imtitle = page.title(withSection=False).encode(self.encoding())
-        imgen = api.ImagePageGenerator("images", titles=imtitle, site=self)
-        return imgen
-
-    def pagetemplates(self, page, namespaces=None):
-        """Iterate templates transcluded (not just linked) on the page."""
-
-        tltitle = page.title(withSection=False).encode(self.encoding())
-        tlgen = api.PageGenerator("templates", titles=tltitle, site=self)
-        if isinstance(namespaces, list):
-            tlgen.request["gtlnamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            tlgen.request["gtlnamespace"] = str(namespaces)
-        return tlgen
-
-    def categorymembers(self, category, namespaces=None, limit=None):
-        """Iterate members of specified category.
-
-        @param category: The Category to iterate.
-        @param namespaces: If present, only return category members from
-            these namespaces. For example, use namespaces=[14] to yield
-            subcategories, use namespaces=[6] to yield image files, etc. Note,
-            however, that the iterated values are always Page objects, even
-            if in the Category or Image namespace.
-        @type namespaces: list of ints
-        @param limit: maximum number of pages to iterate (default: all)
-        @type limit: int
-
-        """
-        if category.namespace() != 14:
-            raise Error(
-                u"categorymembers: non-Category page '%s' specified"
-                % category.title())
-        cmtitle = category.title(withSection=False).encode(self.encoding())
-        cmgen = api.PageGenerator("categorymembers", gcmtitle=cmtitle,
-                                  gcmprop="ids|title|sortkey", site=self)
-        if isinstance(namespaces, list):
-            cmgen.request["gcmnamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            cmgen.request["gcmnamespace"] = str(namespaces)
-        if isinstance(limit, int):
-            cmgen.limit = limit
-        return cmgen
-
-    def loadrevisions(self, page=None, getText=False, revids=None,
-                     limit=None, startid=None, endid=None, starttime=None,
-                     endtime=None, rvdir=None, user=None, excludeuser=None,
-                     section=None, sysop=False):
-        """Retrieve and store revision information.
-
-        By default, retrieves the last (current) revision of the page,
-        I{unless} any of the optional parameters revids, startid, endid,
-        starttime, endtime, rvdir, user, excludeuser, or limit are
-        specified. Unless noted below, all parameters not specified
-        default to False.
-
-        If rvdir is False or not specified, startid must be greater than
-        endid if both are specified; likewise, starttime must be greater
-        than endtime. If rvdir is True, these relationships are reversed.
-
-        @param page: retrieve revisions of this Page (required unless ids
-            is specified)
-        @param getText: if True, retrieve the wiki-text of each revision;
-            otherwise, only retrieve the revision metadata (default)
-        @param section: if specified, retrieve only this section of the text
-            (getText must be True); section must be given by number (top of
-            the article is section 0), not name
-        @type section: int
-        @param revids: retrieve only the specified revision ids (required
-            unless page is specified)
-        @type revids: list of ints
-        @param limit: Retrieve no more than this number of revisions
-        @type limit: int
-        @param startid: retrieve revisions starting with this revid
-        @param endid: stop upon retrieving this revid
-        @param starttime: retrieve revisions starting at this timestamp
-        @param endtime: stop upon reaching this timestamp
-        @param rvdir: if false, retrieve newest revisions first (default);
-            if true, retrieve earliest first
-        @param user: retrieve only revisions authored by this user
-        @param excludeuser: retrieve all revisions not authored by this user
-        @param sysop: if True, switch to sysop account (if available) to
-            retrieve this page
-
-        """
-        latest = (revids is None and
-                  startid is None and
-                  endid is None and
-                  starttime is None and
-                  endtime is None and
-                  rvdir is None and
-                  user is None and
-                  excludeuser is None and
-                  limit is None)  # if True, we are retrieving current revision
-
-        # check for invalid argument combinations
-        if page is None and revids is None:
-            raise ValueError(
-                "loadrevisions:  either page or revids argument required")
-        if (startid is not None or endid is not None) and \
-                (starttime is not None or endtime is not None):
-            raise ValueError(
-                "loadrevisions: startid/endid combined with starttime/endtime")
-        if starttime is not None and endtime is not None:
-            if rvdir and starttime >= endtime:
-                raise ValueError(
-                    "loadrevisions: starttime > endtime with rvdir=True")
-            if (not rvdir) and endtime >= starttime:
-                raise ValueError(
-                    "loadrevisions: endtime > starttime with rvdir=False")
-        if startid is not None and endid is not None:
-            if rvdir and startid >= endid:
-                raise ValueError(
-                    "loadrevisions: startid > endid with rvdir=True")
-            if (not rvdir) and endid >= startid:
-                raise ValueError(
-                    "loadrevisions: endid > startid with rvdir=False")
-
-        # assemble API request
-        if revids is None:
-            rvtitle = page.title(withSection=False).encode(self.encoding())
-            rvgen = api.PropertyGenerator(u"info|revisions", titles=rvtitle,
-                                          site=self)
-        else:
-            if isinstance(revids, (int, basestring)):
-                ids = unicode(revids)
-            else:
-                ids = u"|".join(unicode(r) for r in revids)
-            rvgen = api.PropertyGenerator(u"info|revisions", revids=ids,
-                                          site=self)
-        if getText:
-            rvgen.request[u"rvprop"] = \
-                    u"ids|flags|timestamp|user|comment|content"
-            if section is not None:
-                rvgen.request[u"rvsection"] = unicode(section)
-        if latest or "revids" in rvgen.request:
-            rvgen.limit = -1  # suppress use of rvlimit parameter
-        elif isinstance(limit, int):
-            rvgen.limit = limit
-        if rvdir:
-            rvgen.request[u"rvdir"] = u"newer"
-        elif rvdir is not None:
-            rvgen.request[u"rvdir"] = u"older"
-        if startid:
-            rvgen.request[u"rvstartid"] = startid
-        if endid:
-            rvgen.request[u"rvendid"] = endid
-        if starttime:
-            rvgen.request[u"rvstart"] = starttime
-        if endtime:
-            rvgen.request[u"rvend"] = endtime
-        if user:
-            rvgen.request[u"rvuser"] = user
-        elif excludeuser:
-            rvgen.request[u"rvexcludeuser"] = excludeuser
-        # TODO if sysop: something
-        rvgen.continuekey = "revisions"
-        for pagedata in rvgen:
-            if page is not None:
-                if pagedata['title'] != page.title(withSection=False):
-                    raise Error(
-                        u"loadrevisions: Query on %s returned data on '%s'"
-                        % (page, pagedata['title']))
-                if pagedata.has_key('missing'):
-                    raise NoPage(u'Page %s does not exist'
-                                  % page.title(asLink=True)) 
-            else:
-                page = Page(self, pagedata['title'])
-            api.update_page(page, pagedata)
-
-    def pageinterwiki(self, page):
-        # No such function in the API (this method isn't called anywhere)
-        raise NotImplementedError
-
-    def pagelanglinks(self, page):
-        """Iterate all interlanguage links on page, yielding Link objects."""
-        lltitle = page.title(withSection=False)
-        llquery = api.PropertyGenerator("langlinks",
-                                        titles=lltitle.encode(self.encoding()),
-                                        site=self)
-        for pageitem in llquery:
-            if pageitem['title'] != lltitle:
-                raise Error(
-                    u"getlanglinks: Query on %s returned data on '%s'"
-                    % (page, pageitem['title']))
-            if 'langlinks' not in pageitem:
-                continue
-            for linkdata in pageitem['langlinks']:
-                yield pywikibot.Link(linkdata['*'],
-                                     source=pywikibot.Site(linkdata['lang']))
-
-    def page_extlinks(self, page):
-        """Iterate all external links on page, yielding URL strings."""
-        eltitle = page.title(withSection=False)
-        elquery = api.PropertyGenerator("extlinks",
-                                        titles=eltitle.encode(self.encoding()),
-                                        site=self)
-        for pageitem in elquery:
-            if pageitem['title'] != eltitle:
-                raise RuntimeError(
-                    "getlanglinks: Query on %s returned data on '%s'"
-                    % (page, pageitem['title']))
-            if 'extlinks' not in pageitem:
-                continue
-            for linkdata in pageitem['extlinks']:
-                yield linkdata['*']
-
-    @deprecate_arg("throttle", None)
-    @deprecate_arg("includeredirects", "filterredir")
-    def allpages(self, start="!", prefix="", namespace=0, filterredir=None,
-                 filterlanglinks=None, minsize=None, maxsize=None,
-                 protect_type=None, protect_level=None, limit=None,
-                 reverse=False, includeredirects=None):
-        """Iterate pages in a single namespace.
-
-        Note: parameters includeRedirects and throttle are deprecated and
-        included only for backwards compatibility.
-
-        @param start: Start at this title (page need not exist).
-        @param prefix: Only yield pages starting with this string.
-        @param namespace: Iterate pages from this (single) namespace
-           (default: 0)
-        @param filterredir: if True, only yield redirects; if False (and not
-            None), only yield non-redirects (default: yield both)
-        @param filterlanglinks: if True, only yield pages with language links;
-            if False (and not None), only yield pages without language links
-            (default: yield both)
-        @param minsize: if present, only yield pages at least this many
-            bytes in size
-        @param maxsize: if present, only yield pages at most this many bytes
-            in size
-        @param protect_type: only yield pages that have a protection of the
-            specified type
-        @type protect_type: str
-        @param protect_level: only yield pages that have protection at this
-            level; can only be used if protect_type is specified
-        @param limit: maximum number of pages to iterate (default: iterate
-            all pages in namespace)
-        @param reverse: if True, iterate in reverse Unicode lexigraphic
-            order (default: iterate in forward order)
-        @param includeredirects: DEPRECATED, use filterredirs instead
-
-        """
-        if not isinstance(namespace, int):
-            raise Error("allpages: only one namespace permitted.")
-        if includeredirects is not None:
-            logger.debug(
-"allpages: 'includeRedirects' argument is deprecated; use 'filterredirs'.")
-            if includeredirects:
-                if includeredirects == "only":
-                    filterredirs = True
-                else:
-                    filterredirs = None
-            else:
-                filterredirs = False
-
-        apgen = api.PageGenerator("allpages", gapnamespace=str(namespace),
-                                  gapfrom=start, site=self)
-        if prefix:
-            apgen.request["gapprefix"] = prefix
-        if filterredir is not None:
-            apgen.request["gapfilterredir"] = (filterredir
-                                               and "redirects"
-                                               or "nonredirects")
-        if filterlanglinks is not None:
-            apgen.request["gapfilterlanglinks"] = (filterlanglinks
-                                                   and "withlanglinks"
-                                                   or "withoutlanglinks")
-        if isinstance(minsize, int):
-            apgen.request["gapminsize"] = str(minsize)
-        if isinstance(maxsize, int):
-            apgen.request["gapmaxsize"] = str(maxsize)
-        if isinstance(protect_type, basestring):
-            apgen.request["gapprtype"] = protect_type
-            if isinstance(protect_level, basestring):
-                apgen.request["gapprlevel"] = protect_level
-        if isinstance(limit, int):
-            apgen.limit = limit
-        if reverse:
-            apgen.request["gapdir"] = "descending"
-        return apgen
-
-    def prefixindex(self, prefix, namespace=0, includeredirects=True):
-        """Yield all pages with a given prefix. Deprecated.
-
-        Use allpages() with the prefix= parameter instead of this method.
-
-        """
-        logger.debug("Site.prefixindex() is deprecated; use allpages instead.")
-        return self.allpages(prefix=prefix, namespace=namespace,
-                             includeredirects=includeredirects)
-
-
-    def alllinks(self, start="!", prefix="", namespace=0, unique=False,
-                 limit=None, fromids=False):
-        """Iterate all links to pages (which need not exist) in one namespace.
-
-        Note that, in practice, links that were found on pages that have
-        been deleted may not have been removed from the links table, so this
-        method can return false positives.
-
-        @param start: Start at this title (page need not exist).
-        @param prefix: Only yield pages starting with this string.
-        @param namespace: Iterate pages from this (single) namespace
-            (default: 0)
-        @param unique: If True, only iterate each link title once (default:
-            iterate once for each linking page)
-        @param limit: maximum number of pages to iterate (default: iterate
-            all pages in namespace)
-        @param fromids: if True, include the pageid of the page containing
-            each link (default: False) as the '_fromid' attribute of the Page;
-            cannot be combined with unique
-
-        """
-        if unique and fromids:
-            raise Error("alllinks: unique and fromids cannot both be True.")
-        if not isinstance(namespace, int):
-            raise Error("alllinks: only one namespace permitted.")
-        algen = api.ListGenerator("alllinks", alnamespace=str(namespace),
-                                  alfrom=start, site=self)
-        if prefix:
-            algen.request["alprefix"] = prefix
-        if isinstance(limit, int):
-            algen.limit = limit
-        if unique:
-            algen.request["alunique"] = ""
-        if fromids:
-            algen.request["alprop"] = "title|ids"
-        for link in algen:
-            p = pywikibot.Page(self, link['title'], link['ns'])
-            if fromids:
-                p._fromid = link['fromid']
-            yield p
-
-    def allcategories(self, start="!", prefix="", limit=None,
-                      reverse=False):
-        """Iterate categories used (which need not have a Category page).
-
-        Iterator yields Category objects. Note that, in practice, links that
-        were found on pages that have been deleted may not have been removed
-        from the database table, so this method can return false positives.
-
-        @param start: Start at this category title (category need not exist).
-        @param prefix: Only yield categories starting with this string.
-        @param limit: maximum number of categories to iterate (default:
-            iterate all)
-        @param reverse: if True, iterate in reverse Unicode lexigraphic
-            order (default: iterate in forward order)
-
-        """
-        acgen = api.CategoryPageGenerator("allcategories",
-                                          gacfrom=start, site=self)
-        if prefix:
-            acgen.request["gacprefix"] = prefix
-        if isinstance(limit, int):
-            acgen.limit = limit
-        if reverse:
-            acgen.request["gacdir"] = "descending"
-        return acgen
-
-    def categories(self, number=10, repeat=False):
-        """Deprecated; retained for backwards-compatibility"""
-        logger.debug(
-            "Site.categories() method is deprecated; use .allcategories()")
-        if repeat:
-            limit = None
-        else:
-            limit = number
-        return self.allcategories(limit=limit)
-
-    def allusers(self, start="!", prefix="", limit=None, group=None):
-        """Iterate registered users, ordered by username.
-
-        Iterated values are dicts containing 'name', 'editcount',
-        'registration', and (sometimes) 'groups' keys. 'groups' will be
-        present only if the user is a member of at least 1 group, and will
-        be a list of unicodes; all the other values are unicodes and should
-        always be present.
-
-        @param start: start at this username (name need not exist)
-        @param prefix: only iterate usernames starting with this substring
-        @param limit: maximum number of users to iterate (default: all)
-        @param group: only iterate users that are members of this group
-        @type group: str
-
-        """
-        augen = api.ListGenerator("allusers", aufrom=start,
-                                  auprop="editcount|groups|registration",
-                                  site=self)
-        if prefix:
-            augen.request["auprefix"] = prefix
-        if group:
-            augen.request["augroup"] = group
-        if isinstance(limit, int):
-            augen.limit = limit
-        return augen
-
-    def allimages(self, start="!", prefix="", minsize=None, maxsize=None,
-                  limit=None, reverse=False, sha1=None, sha1base36=None):
-        """Iterate all images, ordered by image title.
-
-        Yields ImagePages, but these pages need not exist on the wiki.
-
-        @param start: start at this title (name need not exist)
-        @param prefix: only iterate titles starting with this substring
-        @param limit: maximum number of titles to iterate (default: all)
-        @param minsize: only iterate images of at least this many bytes
-        @param maxsize: only iterate images of no more than this many bytes
-        @param reverse: if True, iterate in reverse lexigraphic order
-        @param sha1: only iterate image (it is theoretically possible there
-            could be more than one) with this sha1 hash
-        @param sha1base36: same as sha1 but in base 36
-
-        """        
-        aigen = api.ImagePageGenerator("allimages", gaifrom=start,
-                                       site=self)
-        if prefix:
-            aigen.request["gaiprefix"] = prefix
-        if isinstance(limit, int):
-            aigen.limit = limit
-        if isinstance(minsize, int):
-            aigen.request["gaiminsize"] = str(minsize)
-        if isinstance(maxsize, int):
-            aigen.request["gaimaxsize"] = str(maxsize)
-        if reverse:
-            aigen.request["gaidir"] = "descending"
-        if sha1:
-            aigen.request["gaisha1"] = sha1
-        if sha1base36:
-            aigen.request["gaisha1base36"] = sha1base36
-        return aigen
-
-    def blocks(self, starttime=None, endtime=None, reverse=False,
-               blockids=None, users=None, limit=None):
-        """Iterate all current blocks, in order of creation.
-
-        Note that logevents only logs user blocks, while this method
-        iterates all blocks including IP ranges.  The iterator yields dicts
-        containing keys corresponding to the block properties (see
-        http://www.mediawiki.org/wiki/API:Query_-_Lists for documentation).
-
-        @param starttime: start iterating at this timestamp
-        @param endtime: stop iterating at this timestamp
-        @param reverse: if True, iterate oldest blocks first (default: newest)
-        @param blockids: only iterate blocks with these id numbers
-        @param users: only iterate blocks affecting these usernames or IPs
-        @param limit: maximum number of blocks to iterate (default: all)
-
-        """
-        if starttime and endtime:
-            if reverse:
-                if starttime > endtime:
-                    raise pywikibot.Error(
-                "blocks: starttime must be before endtime with reverse=True")
-            else:
-                if endtime > starttime:
-                    raise pywikibot.Error(
-                "blocks: endtime must be before starttime with reverse=False")
-        bkgen = api.ListGenerator("blocks", site=self)
-        bkgen.request["bkprop"] = \
-                            "id|user|by|timestamp|expiry|reason|range|flags"
-        if starttime:
-            bkgen.request["bkstart"] = starttime
-        if endtime:
-            bkgen.request["bkend"] = endtime
-        if reverse:
-            bkgen.request["bkdir"] = "newer"
-        if blockids:
-            bkgen.request["bkids"] = blockids
-        if users:
-            bkgen.request["bkusers"] = users
-        if isinstance(limit, int):
-            bkgen.limit = limit
-        return bkgen
-
-    def exturlusage(self, url, protocol="http", namespaces=None,
-                    limit=None):
-        """Iterate Pages that contain links to the given URL.
-
-        @param url: The URL to search for (without the protocol prefix);
-            this many include a '*' as a wildcard, only at the start of the
-            hostname
-        @param protocol: The protocol prefix (default: "http")
-        @param namespaces: Only iterate pages in these namespaces (default: all)
-        @type namespaces: list of ints
-        @param limit: Only iterate this many linking pages (default: all)
-
-        """
-        eugen = api.PageGenerator("exturlusage", geuquery=url,
-                                  geuprotocol=protocol, site=self)
-        if isinstance(namespaces, list):
-            eugen.request["geunamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            eugen.request["geunamespace"] = str(namespaces)
-        if isinstance(limit, int):
-            eugen.limit = limit
-        return eugen
-
-    def imageusage(self, image, namespaces=None, filterredir=None,
-                   limit=None):
-        """Iterate Pages that contain links to the given ImagePage.
-
-        @param image: the image to search for (ImagePage need not exist on the wiki)
-        @type image: ImagePage
-        @param namespaces: Only iterate pages in these namespaces (default: all)
-        @type namespaces: list of ints
-        @param filterredir: if True, only yield redirects; if False (and not
-            None), only yield non-redirects (default: yield both)
-        @param limit: Only iterate this many linking pages (default: all)
-
-        """
-        iugen = api.PageGenerator("imageusage", site=self,
-                                  giutitle=image.title(withSection=False))
-        if isinstance(namespaces, list):
-            iugen.request["giunamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            iugen.request["giunamespace"] = str(namespaces)
-        if isinstance(limit, int):
-            iugen.limit = limit
-        if filterredir is not None:
-            iugen.request["giufilterredir"] = (filterredir and "redirects"
-                                                           or "nonredirects")
-        return iugen
-
-    def logevents(self, logtype=None, user=None, page=None,
-                  start=None, end=None, reverse=False, limit=None):
-        """Iterate all log entries.
-
-        @param logtype: only iterate entries of this type (see wiki
-            documentation for available types, which will include "block",
-            "protect", "rights", "delete", "upload", "move", "import",
-            "patrol", "merge")
-        @param user: only iterate entries that match this user name
-        @param page: only iterate entries affecting this page
-        @param start: only iterate entries from and after this timestamp
-        @param end: only iterate entries up to and through this timestamp
-        @param reverse: if True, iterate oldest entries first (default: newest)
-        @param limit: only iterate up to this many entries
-
-        """
-        if start and end:
-            if reverse:
-                if end < start:
-                    raise Error(
-                  "logevents: end must be later than start with reverse=True")
-            else:
-                if start < end:
-                    raise Error(
-                  "logevents: start must be later than end with reverse=False")
-        legen = api.ListGenerator("logevents", site=self)
-        if logtype is not None:
-            legen.request["letype"] = logtype
-        if user is not None:
-            legen.request["leuser"] = user
-        if page is not None:
-            legen.request["letitle"] = page.title(withSection=False)
-        if start is not None:
-            legen.request["lestart"] = start
-        if end is not None:
-            legen.request["leend"] = end
-        if reverse:
-            legen.request["ledir"] = "newer"
-        if isinstance(limit, int):
-            legen.limit = limit
-        return legen
-
-    def recentchanges(self, start=None, end=None, reverse=False, limit=None,
-                      namespaces=None, pagelist=None, changetype=None,
-                      showMinor=None, showBot=None, showAnon=None,
-                      showRedirects=None, showPatrolled=None):
-        """Iterate recent changes.
-
-        @param start: timestamp to start listing from
-        @param end: timestamp to end listing at
-        @param reverse: if True, start with oldest changes (default: newest)
-        @param limit: iterate no more than this number of entries
-        @param namespaces: iterate changes to pages in these namespaces only
-        @type namespaces: list of ints
-        @param pagelist: iterate changes to pages in this list only
-        @param pagelist: list of Pages
-        @param changetype: only iterate changes of this type ("edit" for
-            edits to existing pages, "new" for new pages, "log" for log
-            entries)
-        @param showMinor: if True, only list minor edits; if False (and not
-            None), only list non-minor edits
-        @param showBot: if True, only list bot edits; if False (and not
-            None), only list non-bot edits
-        @param showAnon: if True, only list anon edits; if False (and not
-            None), only list non-anon edits
-        @param showRedirects: if True, only list edits to redirect pages; if
-            False (and not None), only list edits to non-redirect pages
-        @param showPatrolled: if True, only list patrolled edits; if False
-            (and not None), only list non-patrolled edits
-
-        """
-        if start and end:
-            if reverse:
-                if end < start:
-                    raise Error(
-            "recentchanges: end must be later than start with reverse=True")
-            else:
-                if start < end:
-                    raise Error(
-            "recentchanges: start must be later than end with reverse=False")
-        rcgen = api.ListGenerator("recentchanges", site=self,
-                                  rcprop="user|comment|timestamp|title|ids"
-                                         "|redirect|patrolled|loginfo|flags")
-        if start is not None:
-            rcgen.request["rcstart"] = start
-        if end is not None:
-            rcgen.request["rcend"] = end
-        if reverse:
-            rcgen.request["rcdir"] = "newer"
-        if isinstance(limit, int):
-            rcgen.limit = limit
-        if isinstance(namespaces, list):
-            rcgen.request["rcnamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            rcgen.request["rcnamespace"] = str(namespaces)
-        if pagelist:
-            rcgen.request["rctitles"] = u"|".join(p.title(withSection=False)
-                                                 for p in pagelist)
-        if changetype:
-            rcgen.request["rctype"] = changetype
-        filters = {'minor': showMinor,
-                   'bot': showBot,
-                   'anon': showAnon,
-                   'redirect': showRedirects,
-                   'patrolled': showPatrolled}
-        rcshow = []
-        for item in filters:
-            if filters[item] is not None:
-                rcshow.append(filters[item] and item or ("!"+item))
-        if rcshow:
-            rcgen.request["rcshow"] = "|".join(rcshow)
-        return rcgen
-
-    @deprecate_arg("number", "limit")
-    def search(self, searchstring, namespaces=None, where="text",
-               getredirects=False, limit=None):
-        """Iterate Pages that contain the searchstring.
-
-        Note that this may include non-existing Pages if the wiki's database
-        table contains outdated entries.
-
-        @param searchstring: the text to search for
-        @type searchstring: unicode
-        @param where: Where to search; value must be "text" or "titles" (many
-            wikis do not support title search)
-        @param namespaces: search only in these namespaces (defaults to 0)
-        @type namespaces: list of ints
-        @param getredirects: if True, include redirects in results
-        @param limit: maximum number of results to iterate
-
-        """
-        if not searchstring:
-            raise Error("search: searchstring cannot be empty")
-        if where not in ("text", "titles"):
-            raise Error("search: unrecognized 'where' value: %s" % where)
-        srgen = api.PageGenerator("search", gsrsearch=searchstring,
-                                  gsrwhat=where, site=self)
-        if not namespaces:
-            logger.warning("search: namespaces cannot be empty; using [0].")
-            namespaces = [0]
-        if isinstance(namespaces, list):
-            srgen.request["gsrnamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        else:
-            srgen.request["gsrnamespace"] = str(namespaces)
-        if getredirects:
-            srgen.request["gsrredirects"] = ""
-        if isinstance(limit, int):
-            srgen.limit = limit
-        return srgen
-
-    def usercontribs(self, user=None, userprefix=None, start=None, end=None,
-                     reverse=False, limit=None, namespaces=None,
-                     showMinor=None):
-        """Iterate contributions by a particular user.
-
-        Iterated values are in the same format as recentchanges.
-
-        @param user: Iterate contributions by this user (name or IP)
-        @param userprefix: Iterate contributions by all users whose names
-            or IPs start with this substring
-        @param start: Iterate contributions starting at this timestamp
-        @param end: Iterate contributions ending at this timestamp
-        @param reverse: Iterate oldest contributions first (default: newest)
-        @param limit: Maximum number of contributions to iterate
-        @param namespaces: Only iterate contributions in these namespaces
-        @type namespaces: list of ints
-        @param showMinor: if True, iterate only minor edits; if False and
-            not None, iterate only non-minor edits (default: iterate both)
-
-        """
-        if not (user or userprefix):
-            raise Error(
-                "usercontribs: either user or userprefix must be non-empty")
-        if start and end:
-            if reverse:
-                if end < start:
-                    raise Error(
-                "usercontribs: end must be later than start with reverse=True")
-            else:
-                if start < end:
-                    raise Error(
-                "usercontribs: start must be later than end with reverse=False")
-        ucgen = api.ListGenerator("usercontribs", site=self,
-                              ucprop="ids|title|timestamp|comment|flags")
-        if user:
-            ucgen.request["ucuser"] = user
-        if userprefix:
-            ucgen.request["ucuserprefix"] = userprefix
-        if start is not None:
-            ucgen.request["ucstart"] = start
-        if end is not None:
-            ucgen.request["ucend"] = end
-        if reverse:
-            ucgen.request["ucdir"] = "newer"
-        if isinstance(limit, int):
-            ucgen.limit = limit
-        if isinstance(namespaces, list):
-            ucgen.request["ucnamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            ucgen.request["ucnamespace"] = str(namespaces)
-        if showMinor is not None:
-            ucgen.request["ucshow"] = showMinor and "minor" or "!minor"
-        return ucgen
-
-    def watchlist_revs(self, start=None, end=None, reverse=False,
-                       namespaces=None, showMinor=None, showBot=None,
-                       showAnon=None, limit=None):
-        """Iterate revisions to pages on the bot user's watchlist.
-
-        Iterated values will be in same format as recentchanges.
-
-        @param start: Iterate revisions starting at this timestamp
-        @param end: Iterate revisions ending at this timestamp
-        @param reverse: Iterate oldest revisions first (default: newest)
-        @param namespaces: only iterate revisions to pages in these
-            namespaces (default: all)
-        @type namespaces: list of ints
-        @param showMinor: if True, only list minor edits; if False (and not
-            None), only list non-minor edits
-        @param showBot: if True, only list bot edits; if False (and not
-            None), only list non-bot edits
-        @param showAnon: if True, only list anon edits; if False (and not
-            None), only list non-anon edits
-        @param limit: Maximum number of revisions to iterate
-
-        """
-        if start and end:
-            if reverse:
-                if end < start:
-                    raise Error(
-            "watchlist_revs: end must be later than start with reverse=True")
-            else:
-                if start < end:
-                    raise Error(
-            "watchlist_revs: start must be later than end with reverse=False")
-        wlgen = api.ListGenerator("watchlist", wlallrev="", site=self,
-                           wlprop="user|comment|timestamp|title|ids|flags")
-        #TODO: allow users to ask for "patrol" as well?
-        if start is not None:
-            wlgen.request["wlstart"] = start
-        if end is not None:
-            wlgen.request["wlend"] = end
-        if reverse:
-            wlgen.request["wldir"] = "newer"
-        if isinstance(limit, int):
-            wlgen.limit = limit
-        if isinstance(namespaces, list):
-            wlgen.request["wlnamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            wlgen.request["wlnamespace"] = str(namespaces)
-        filters = {'minor': showMinor,
-                   'bot': showBot,
-                   'anon': showAnon}
-        wlshow = []
-        for item in filters:
-            if filters[item] is not None:
-                wlshow.append(filters[item] and item or ("!"+item))
-        if wlshow:
-            wlgen.request["wlshow"] = "|".join(wlshow)
-        return wlgen
-
-    def deletedrevs(self, page, start=None, end=None, reverse=None, limit=None,
-                    get_text=False):
-        """Iterate deleted revisions.
-
-        Each value returned by the iterator will be a dict containing the
-        'title' and 'ns' keys for a particular Page and a 'revisions' key
-        whose value is a list of revisions in the same format as
-        recentchanges (plus a 'content' element if requested). If get_text
-        is true, the toplevel dict will contain a 'token' key as well.
-
-        @param page: The page to check for deleted revisions
-        @param start: Iterate revisions starting at this timestamp
-        @param end: Iterate revisions ending at this timestamp
-        @param reverse: Iterate oldest revisions first (default: newest)
-        @param limit: Iterate no more than this number of revisions.
-        @param get_text: If True, retrieve the content of each revision and
-            an undelete token
-
-        """
-        if start and end:
-            if reverse:
-                if end < start:
-                    raise Error(
-"deletedrevs: end must be later than start with reverse=True")
-            else:
-                if start < end:
-                    raise Error(
-"deletedrevs: start must be later than end with reverse=False")
-        if not self.logged_in():
-            self.login()
-        if "deletedhistory" not in self.userinfo['rights']:
-            try:
-                self.login(True)
-            except NoUsername:
-                pass
-            if "deletedhistory" not in self.userinfo['rights']:
-                raise Error(
-"deletedrevs: User:%s not authorized to access deleted revisions."
-                        % self.user())
-        if get_text:
-            if "undelete" not in self.userinfo['rights']:
-                try:
-                    self.login(True)
-                except NoUsername:
-                    pass
-                if "undelete" not in self.userinfo['rights']:
-                    raise Error(
-"deletedrevs: User:%s not authorized to view deleted content."
-                            % self.user())
-
-        drgen = api.ListGenerator("deletedrevs", site=self,
-                                  titles=page.title(withSection=False),
-                                  drprop="revid|user|comment|minor")
-        if get_text:
-            drgen.request['drprop'] = drgen.request['drprop'] + "|content|token"
-        if start is not None:
-            drgen.request["drstart"] = start
-        if end is not None:
-            drgen.request["drend"] = end
-        if reverse:
-            drgen.request["drdir"] = "newer"
-        if isinstance(limit, int):
-            drgen.limit = limit
-        return drgen
-
-    def users(self, usernames):
-        """Iterate info about a list of users by name or IP.
-
-        @param usernames: a list of user names
-        @type usernames: list, or other iterable, of unicodes
-
-        """
-        if not isinstance(usernames, basestring):
-            usernames = u"|".join(usernames)
-        usgen = api.ListGenerator("users", ususers=usernames, site=self,
-                          usprop="blockinfo|groups|editcount|registration")
-        return usgen
-
-    def randompages(self, limit=1, namespaces=None, redirects=False):
-        """Iterate a number of random pages.
-
-        Pages are listed in a fixed sequence, only the starting point is
-        random.
-
-        @param limit: the maximum number of pages to iterate (default: 1)
-        @param namespaces: only iterate pages in these namespaces.
-        @param redirects: if True, include only redirect pages in results
-            (default: include only non-redirects)
-
-        """
-        rngen = api.PageGenerator("random", site=self)
-        rngen.limit = limit
-        if isinstance(namespaces, list):
-            rngen.request["grnnamespace"] = u"|".join(unicode(ns)
-                                                      for ns in namespaces)
-        elif namespaces is not None:
-            rngen.request["grnnamespace"] = str(namespaces)
-        if redirects:
-            rngen.request["grnredirect"] = ""
-        return rngen
-
-    # catalog of editpage error codes, for use in generating messages
-    _ep_errors = {
-        "noapiwrite": "API editing not enabled on %(site)s wiki",
-        "writeapidenied":
-"User %(user)s is not authorized to edit on %(site)s wiki",
-        "protectedtitle":
-"Title %(title)s is protected against creation on %(site)s",
-        "cantcreate":
-"User %(user)s not authorized to create new pages on %(site)s wiki",
-        "cantcreate-anon":
-"""Bot is not logged in, and anon users are not authorized to create new pages
-on %(site)s wiki""",
-        "articleexists": "Page %(title)s already exists on %(site)s wiki",
-        "noimageredirect-anon":
-"""Bot is not logged in, and anon users are not authorized to create image
-redirects on %(site)s wiki""",
-        "noimageredirect":
-"User %(user)s not authorized to create image redirects on %(site)s wiki",
-        "spamdetected":
-"Edit to page %(title)s rejected by spam filter due to content:\n",
-        "filtered": "%(info)s",
-        "contenttoobig": "%(info)s",
-        "noedit-anon":
-"""Bot is not logged in, and anon users are not authorized to edit on
-%(site)s wiki""",
-        "noedit": "User %(user)s not authorized to edit pages on %(site)s wiki",
-        "pagedeleted":
-"Page %(title)s has been deleted since last retrieved from %(site)s wiki",
-        "editconflict": "Page %(title)s not saved due to edit conflict.",
-    }
-
-    def editpage(self, page, summary, minor=True, notminor=False,
-                 recreate=True, createonly=False, watch=False, unwatch=False):
-        """Submit an edited Page object to be saved to the wiki.
-
-        @param page: The Page to be saved; its .text property will be used
-            as the new text to be saved to the wiki
-        @param token: the edit token retrieved using Site.token()
-        @param summary: the edit summary (required!)
-        @param minor: if True (default), mark edit as minor
-        @param notminor: if True, override account preferences to mark edit
-            as non-minor
-        @param recreate: if True (default), create new page even if this
-            title has previously been deleted
-        @param createonly: if True, raise an error if this title already
-            exists on the wiki
-        @param watch: if True, add this Page to bot's watchlist
-        @param unwatch: if True, remove this Page from bot's watchlist if
-            possible
-        @return: True if edit succeeded, False if it failed
-
-        """
-        text = page.text
-        if not text:
-            raise Error("editpage: no text to be saved")
-        try:
-            lastrev = page.latestRevision()
-        except NoPage:
-            lastrev = None
-            if not recreate:
-                raise Error("Page %s does not exist on %s wiki."
-                            % (page.title(withSection=False), self))
-        token = self.token(page, "edit")
-        self.lock_page(page)
-        if lastrev is not None and page.latestRevision() != lastrev:
-            raise Error("editpage: Edit conflict detected; saving aborted.")
-        req = api.Request(site=self, action="edit",
-                          title=page.title(withSection=False),
-                          text=text, token=token, summary=summary)
-##        if lastrev is not None:
-##            req["basetimestamp"] = page._revisions[lastrev].timestamp
-        if minor:
-            req['minor'] = ""
-        elif notminor:
-            req['notminor'] = ""
-        if 'bot' in self.userinfo['groups']:
-            req['bot'] = ""
-        if recreate:
-            req['recreate'] = ""
-        if createonly:
-            req['createonly'] = ""
-        if watch:
-            req['watch'] = ""
-        elif unwatch:
-            req['unwatch'] = ""
-## FIXME: API gives 'badmd5' error
-##        md5hash = md5()
-##        md5hash.update(urllib.quote_plus(text.encode(self.encoding())))
-##        req['md5'] = md5hash.digest()
-        while True:
-            try:
-                result = req.submit()
-                logger.debug("editpage response: %s" % result)
-            except api.APIError, err:
-                self.unlock_page(page)
-                if err.code.endswith("anon") and self.logged_in():
-                    logger.debug(
-"editpage: received '%s' even though bot is logged in" % err.code)
-                errdata = {
-                    'site': self,
-                    'title': page.title(withSection=False),
-                    'user': self.user(),
-                    'info': err.info
-                }
-                if err.code == "spamdetected":
-                    raise SpamfilterError(self._ep_errors[err.code] % errdata
-                            + err.info[ err.info.index("fragment: ") + 9: ])
-
-                if err.code == "editconflict":
-                    raise EditConflict(self._ep_errors[err.code] % errdata)
-                if err.code in self._ep_errors:
-                    raise Error(self._ep_errors[err.code] % errdata)
-                logger.debug("editpage: Unexpected error code '%s' received."
-                              % err.code)
-                raise
-            assert ("edit" in result and "result" in result["edit"]), result
-            if result["edit"]["result"] == "Success":
-                self.unlock_page(page)
-                if "nochange" in result["edit"]:
-                    # null edit, page not changed
-                    # TODO: do we want to notify the user of this?
-                    return True
-                page._revid = result["edit"]["newrevid"]
-                # see http://www.mediawiki.org/wiki/API:Wikimania_2006_API_discussion#Notes
-                # not safe to assume that saved text is the same as sent
-                self.loadrevisions(page, getText=True)
-                return True
-            elif result["edit"]["result"] == "Failure":
-                if "captcha" in result["edit"]:
-                    captcha = result["edit"]["captcha"]
-                    req['captchaid'] = captcha['id']
-                    if captcha["type"] == "math":
-                        req['captchaword'] = input(captcha["question"])
-                        continue
-                    elif "url" in captcha:
-                        webbrowser.open(url)
-                        req['captchaword'] = cap_answerwikipedia.input(
-"Please view CAPTCHA in your browser, then type answer here:")
-                        continue
-                    else:
-                        self.unlock_page(page)
-                        logger.error(
-"editpage: unknown CAPTCHA response %s, page not saved"
-                                      % captcha)
-                        return False
-                else:
-                    self.unlock_page(page)
-                    logger.error("editpage: unknown failure reason %s"
-                                  % str(result))
-                    return False
-            else:
-                self.unlock_page(page)
-                logger.error(
-"editpage: Unknown result code '%s' received; page not saved"
-                    % result["edit"]["result"])
-                logger.error(str(result))
-                return False
-
-    # catalog of move errors for use in error messages
-    _mv_errors = {
-        "noapiwrite": "API editing not enabled on %(site)s wiki",
-        "writeapidenied":
-"User %(user)s is not authorized to edit on %(site)s wiki",
-        "nosuppress":
-"User %(user)s is not authorized to move pages without creating redirects",
-        "cantmove-anon":
-"""Bot is not logged in, and anon users are not authorized to move pages on
-%(site)s wiki""",
-        "cantmove":
-"User %(user)s is not authorized to move pages on %(site)s wiki",
-        "immobilenamespace":
-"Pages in %(oldnamespace)s namespace cannot be moved on %(site)s wiki",
-        "articleexists":
-"Cannot move because page [[%(newtitle)s]] already exists on %(site)s wiki",
-        "protectedpage":
-"Page [[%(oldtitle)s]] is protected against moving on %(site)s wiki",
-        "protectedtitle":
-"Page [[%(newtitle)s]] is protected against creation on %(site)s wiki",
-        "nonfilenamespace":
-"Cannot move a file to %(newnamespace)s namespace on %(site)s wiki",
-        "filetypemismatch":
-"[[%(newtitle)s]] file extension does not match content of [[%(oldtitle)s]]"
-    }
-
-    def movepage(self, page, newtitle, summary, movetalk=True,
-                 noredirect=False):
-        """Move a Page to a new title.
-
-        @param page: the Page to be moved (must exist)
-        @param newtitle: the new title for the Page
-        @type newtitle: unicode
-        @param summary: edit summary (required!)
-        @param movetalk: if True (default), also move the talk page if possible
-        @param noredirect: if True, suppress creation of a redirect from the
-            old title to the new one
-        @return: Page object with the new title
-
-        """
-        oldtitle = page.title(withSection=False)
-        newlink = pywikibot.Link(newtitle, self)
-        if newlink.namespace:
-            newtitle = self.namespace(newlink.namespace) + ":" + newlink.title
-        else:
-            newtitle = newlink.title
-        if oldtitle == newtitle:
-            raise Error("Cannot move page %s to its own title."
-                        % oldtitle)
-        if not page.exists():
-            raise Error("Cannot move page %s because it does not exist on %s."
-                        % (oldtitle, self))
-        token = self.token(page, "move")
-        self.lock_page(page)
-        req = api.Request(site=self, action="move", to=newtitle,
-                          token=token, reason=summary)
-        req['from'] = oldtitle  # "from" is a python keyword
-        if movetalk:
-            req['movetalk'] = ""
-        if noredirect:
-            req['noredirect'] = ""
-        try:
-            result = req.submit()
-            logger.debug("movepage response: %s" % result)
-        except api.APIError, err:
-            if err.code.endswith("anon") and self.logged_in():
-                logger.debug(
-"movepage: received '%s' even though bot is logged in" % err.code)
-            errdata = {
-                'site': self,
-                'oldtitle': oldtitle,
-                'oldnamespace': self.namespace(page.namespace()),
-                'newtitle': newtitle,
-                'newnamespace': self.namespace(newlink.namespace),
-                'user': self.user(),
-            }
-            if err.code in self._mv_errors:
-                raise Error(self._mv_errors[err.code] % errdata)
-            logger.debug("movepage: Unexpected error code '%s' received."
-                          % err.code)
-            raise
-        finally:
-            self.unlock_page(page)
-        if "move" not in result:
-            logger.error("movepage: %s" % result)
-            raise Error("movepage: unexpected response")
-        # TODO: Check for talkmove-error messages
-        if "talkmove-error-code" in result["move"]:
-            logger.warning(u"movepage: Talk page %s not moved"
-                            % (page.toggleTalkPage().title(asLink=True)))
-        return pywikibot.Page(page, newtitle)
-
-    # catalog of rollback errors for use in error messages
-    _rb_errors = {
-        "noapiwrite":
-            "API editing not enabled on %(site)s wiki",
-        "writeapidenied":
-            "User %(user)s not allowed to edit through the API",
-        "alreadyrolled":
-            "Page [[%(title)s]] already rolled back; action aborted.",
-    } # other errors shouldn't arise because we check for those errors
-
-    def rollbackpage(self, page, summary=u''):
-        """Roll back page to version before last user's edits.
-
-        As a precaution against errors, this method will fail unless
-        the page history contains at least two revisions, and at least
-        one that is not by the same user who made the last edit.
-
-        @param page: the Page to be rolled back (must exist)
-        @param summary: edit summary (defaults to a standardized message)
-
-        """
-        if len(page._revisions) < 2:
-            raise pywikibot.Error(
-                  u"Rollback of %s aborted; load revision history first."
-                    % page.title(asLink=True))
-        last_rev = page._revisions[page.latestRevision()]
-        last_user = last_rev.user
-        for rev in sorted(page._revisions.keys(), reverse=True):
-            # start with most recent revision first
-            if rev.user != last_user:
-                prev_user = rev.user
-                break
-        else:
-            raise pywikibot.Error(
-                  u"Rollback of %s aborted; only one user in revision history."
-                   % page.title(asLink=True))
-        summary = summary or (
-u"Reverted edits by [[Special:Contributions/%(last_user)s|%(last_user)s]] "
-u"([[User talk:%(last_user)s|Talk]]) to last version by %(prev_user)s"
-                  % locals())
-        token = self.token(page, "rollback")
-        self.lock_page(page)
-        req = api.Request(site=self, action="rollback",
-                          title=page.title(withSection=False),
-                          user=last_user,
-                          token=token)
-        try:
-            result = req.submit()
-        except api.APIError, err:
-            errdata = {
-                'site': self,
-                'title': page.title(withSection=False),
-                'user': self.user(),
-            }
-            if err.code in self._rb_errors:
-                raise Error(self._rb_errors[err.code] % errdata)
-            logger.debug("rollback: Unexpected error code '%s' received."
-                          % err.code)
-            raise
-        finally:
-            self.unlock_page(page)
-
-    # catalog of delete errors for use in error messages
-    _dl_errors = {
-        "noapiwrite":
-            "API editing not enabled on %(site)s wiki",
-        "writeapidenied":
-            "User %(user)s not allowed to edit through the API",
-        "permissiondenied":
-            "User %(user)s not authorized to delete pages on %(site)s wiki.",
-        "cantdelete":
-            "Could not delete [[%(title)s]]. Maybe it was deleted already.",
-    } # other errors shouldn't occur because of pre-submission checks
-
-    def deletepage(self, page, summary):
-        """Delete page from the wiki. Requires appropriate privilege level.
-
-        @param page: Page to be deleted.
-        @param summary: Edit summary (required!).
-
-        """
-        try:
-            self.login(sysop=True)
-        except pywikibot.Error, e:
-            raise Error("delete: Unable to login as sysop (%s)"
-                        % e.__class__.__name__)
-        if not self.logged_in(sysop=True):
-            raise Error("delete: Unable to login as sysop")
-        token = self.token("delete")
-        req = api.Request(site=self, action="delete", token=token,
-                          title=page.title(withSection=False),
-                          reason=summary)
-        try:
-            result = req.submit()
-        except api.APIError, err:
-            errdata = {
-                'site': self,
-                'title': page.title(withSection=False),
-                'user': self.user(),
-            }
-            if err.code in self._dl_errors:
-                raise Error(self._dl_errors[err.code] % errdata)
-            logger.debug("delete: Unexpected error code '%s' received."
-                          % err.code)
-            raise
-        finally:
-            self.unlock_page(page)
-
-    # TODO: implement undelete
-
-    # TODO: implement patrol
-
-    def linksearch(self, siteurl, limit=500):
-        """Backwards-compatible interface to exturlusage()"""
-        return self.exturlusage(siteurl, limit=limit)
-
-    @deprecate_arg("repeat", None)
-    def newimages(self, number=100, lestart=None, leend=None, leuser=None,
-                  letitle=None):
-        """Yield ImagePages from most recent uploads"""
-        return self.logevents(logtype="upload", limit=number, start=lestart,
-                              end=leend, user=leuser, title=letitle)
-
-    def getImagesFromAnHash(self, hash_found=None):
-        """Return all images that have the same hash.
-
-        Useful to find duplicates or nowcommons.
-
-        NOTE: it returns also the image itself, if you don't want it, just
-        filter the list returned.
-
-        NOTE 2: it returns the image title WITHOUT the image namespace.
-        
-        """
-        if hash_found == None: # If the hash is none return None and not continue
-            return None
-        return [image.title(withNamespace=False)
-                for image in self.allimages(sha1=hash_found)]
-
-
-#### METHODS NOT IMPLEMENTED YET ####
-class NotImplementedYet:
-
-    # TODO: is this needed any more? can it be obtained from the http module?
-    def cookies(self, sysop = False):
-        """Return a string containing the user's current cookies."""
-        self._loadCookies(sysop = sysop)
-        index = self._userIndex(sysop)
-        return self._cookies[index]
-
-    def _loadCookies(self, sysop = False):
-        """Retrieve session cookies for login"""
-        index = self._userIndex(sysop)
-        if self._cookies[index] is not None:
-            return
-        try:
-            if sysop:
-                try:
-                    username = config.sysopnames[self.family.name
-                                                            ][self.code]
-                except KeyError:
-                    raise NoUsername("""\
-You tried to perform an action that requires admin privileges, but you haven't
-entered your sysop name in your user-config.py. Please add
-sysopnames['%s']['%s']='name' to your user-config.py"""
-                                     % (self.family.name, self.code))
-            else:
-                username = pywikiobt.config2.usernames[self.family.name
-                                                       ][self.code]
-        except KeyError:
-            self._cookies[index] = None
-            self._isLoggedIn[index] = False
-        else:
-            tmp = '%s-%s-%s-login.data' % (
-                    self.family.name, self.code, username)
-            fn = config.datafilepath('login-data', tmp)
-            if not os.path.exists(fn):
-                self._cookies[index] = None
-                self._isLoggedIn[index] = False
-            else:
-                f = open(fn)
-                self._cookies[index] = '; '.join([x.strip() for x in f.readlines()])
-                f.close()
-
-    # THESE ARE FUNCTIONS NOT YET IMPLEMENTED IN THE API
-    # TODO: avoid code duplication for the following methods
-    def newpages(self, number = 10, get_redirect = False, repeat = False):
-        """Yield new articles (as Page objects) from Special:Newpages.
-
-        Starts with the newest article and fetches the number of articles
-        specified in the first argument. If repeat is True, it fetches
-        Newpages again. If there is no new page, it blocks until there is
-        one, sleeping between subsequent fetches of Newpages.
-
-        The objects yielded are tuples composed of the Page object,
-        timestamp (unicode), length (int), an empty unicode string, username
-        or IP address (str), comment (unicode).
-
-        """
-        # TODO: in recent MW versions Special:Newpages takes a namespace parameter,
-        #       and defaults to 0 if not specified.
-        # TODO: Detection of unregistered users is broken
-        # TODO: Repeat mechanism doesn't make much sense as implemented;
-        #       should use both offset and limit parameters, and have an
-        #       option to fetch older rather than newer pages
-        seen = set()
-        while True:
-            path = self.newpages_address(n=number)
-            # The throttling is important here, so always enabled.
-            get_throttle()
-            html = self.getUrl(path)
-
-            entryR = re.compile(
-'<li[^>]*>(?P<date>.+?) \S*?<a href=".+?"'
-' title="(?P<title>.+?)">.+?</a>.+?[([](?P<length>[\d,.]+)[^)]]*[)]]'
-' .?<a href=".+?" title=".+?:(?P<username>.+?)">'
-                                )
-            for m in entryR.finditer(html):
-                date = m.group('date')
-                title = m.group('title')
-                title = title.replace('&quot;', '"')
-                length = int(re.sub("[,.]", "", m.group('length')))
-                loggedIn = u''
-                username = m.group('username')
-                comment = u''
-
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
-                    yield page, date, length, loggedIn, username, comment
-            if not repeat:
-                break
-
-    def longpages(self, number = 10, repeat = False):
-        """Yield Pages from Special:Longpages.
-
-        Return values are a tuple of Page object, length(int).
-
-        """
-        #TODO: should use offset and limit parameters; 'repeat' as now
-        #      implemented is fairly useless
-        # this comment applies to all the XXXXpages methods following, as well
-        seen = set()
-        while True:
-            path = self.longpages_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(ur'<li>(<a href=".+?" title=".+?">hist</a>) ‎<a href=".+?" title="(?P<title>.+?)">.+?</a> ‎[(?P<length>\d+)(.+?)]</li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-                length = int(m.group('length'))
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
-                    yield page, length
-            if not repeat:
-                break
-
-    def shortpages(self, number = 10, repeat = False):
-        """Yield Pages and lengths from Special:Shortpages."""
-        throttle = True
-        seen = set()
-        while True:
-            path = self.shortpages_address(n = number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(ur'<li>(<a href=".+?" title=".+?">hist</a>) ‎<a href=".+?" title="(?P<title>.+?)">.+?</a> ‎[(?P<length>\d+)(.+?)]</li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-                length = int(m.group('length'))
-
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
-                    yield page, length
-            if not repeat:
-                break
-
-    def deadendpages(self, number = 10, repeat = False):
-        """Yield Page objects retrieved from Special:Deadendpages."""
-        seen = set()
-        while True:
-            path = self.deadendpages_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(
-                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
-                    yield page
-            if not repeat:
-                break
-
-    def ancientpages(self, number = 10, repeat = False):
-        """Yield Pages, datestamps from Special:Ancientpages."""
-        seen = set()
-        while True:
-            path = self.ancientpages_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(
-'<li><a href=".+?" title="(?P<title>.+?)">.+?</a> (?P<date>.+?)</li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-                date = m.group('date')
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
-                    yield page, date
-            if not repeat:
-                break
-
-    def lonelypages(self, number = 10, repeat = False):
-        """Yield Pages retrieved from Special:Lonelypages."""
-        throttle = True
-        seen = set()
-        while True:
-            path = self.lonelypages_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(
-                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
-                    yield page
-            if not repeat:
-                break
-
-    def unwatchedpages(self, number = 10, repeat = False):
-        """Yield Pages from Special:Unwatchedpages (requires Admin privileges)."""
-        seen = set()
-        while True:
-            path = self.unwatchedpages_address(n=number)
-            get_throttle()
-            html = self.getUrl(path, sysop = True)
-            entryR = re.compile(
-                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a>.+?</li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
-                    yield page
-            if not repeat:
-                break
-
-    def uncategorizedcategories(self, number = 10, repeat = False):
-        """Yield Categories from Special:Uncategorizedcategories."""
-        import catlib
-        seen = set()
-        while True:
-            path = self.uncategorizedcategories_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(
-                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-                if title not in seen:
-                    seen.add(title)
-                    page = catlib.Category(self, title)
-                    yield page
-            if not repeat:
-                break
-
-    def newimages(self, number = 10, repeat = False):
-        """Yield ImagePages from Special:Log&type=upload"""
-
-        seen = set()
-        regexp = re.compile('<li[^>]*>(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a>\s+(.+?</a>).*?<a href=".*?"(?P<new> class="new")? title="(?P<image>.+?)"\s*>(?:.*?<span class="comment">(?P<comment>.*?)</span>)?', re.UNICODE)
-
-        while True:
-            path = self.log_address(number, mode = 'upload')
-            get_throttle()
-            html = self.getUrl(path)
-
-            for m in regexp.finditer(html):
-                image = m.group('image')
-
-                if image not in seen:
-                    seen.add(image)
-
-                    if m.group('new'):
-                        output(u"Image '%s' has been deleted." % image)
-                        continue
-
-                    date = m.group('date')
-                    user = m.group('user')
-                    comment = m.group('comment') or ''
-
-                    yield ImagePage(self, image), date, user, comment
-            if not repeat:
-                break
-
-    def uncategorizedimages(self, number = 10, repeat = False):
-        """Yield ImagePages from Special:Uncategorizedimages."""
-        seen = set()
-        ns = self.image_namespace()
-        entryR = re.compile(
-            '<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns)
-        while True:
-            path = self.uncategorizedimages_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            for m in entryR.finditer(html):
-                title = m.group('title')
-                if title not in seen:
-                    seen.add(title)
-                    page = ImagePage(self, title)
-                    yield page
-            if not repeat:
-                break
-
-    def uncategorizedpages(self, number = 10, repeat = False):
-        """Yield Pages from Special:Uncategorizedpages."""
-        seen = set()
-        while True:
-            path = self.uncategorizedpages_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile(
-                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
-                    yield page
-            if not repeat:
-                break
-
-    def unusedcategories(self, number = 10, repeat = False):
-        """Yield Category objects from Special:Unusedcategories."""
-        import catlib
-        seen = set()
-        while True:
-            path = self.unusedcategories_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile('<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-
-                if title not in seen:
-                    seen.add(title)
-                    page = catlib.Category(self, title)
-                    yield page
-            if not repeat:
-                break
-
-    def unusedfiles(self, number = 10, repeat = False, extension = None):
-        """Yield ImagePage objects from Special:Unusedimages."""
-        seen = set()
-        ns = self.image_namespace()
-        entryR = re.compile(
-            '<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns)
-        while True:
-            path = self.unusedfiles_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            for m in entryR.finditer(html):
-                fileext = None
-                title = m.group('title')
-                if extension:
-                    fileext = title[len(title)-3:]
-                if title not in seen and fileext == extension:
-                    ## Check whether the media is used in a Proofread page
-                    # code disabled because it slows this method down, and
-                    # because it is unclear what it's supposed to do.
-                    #basename = title[6:]
-                    #page = Page(self, 'Page:' + basename)
-
-                    #if not page.exists():
-                    seen.add(title)
-                    image = ImagePage(self, title)
-                    yield image
-            if not repeat:
-                break
-
-    def withoutinterwiki(self, number=10, repeat=False):
-        """Yield Pages without language links from Special:Withoutinterwiki."""
-        seen = set()
-        while True:
-            path = self.withoutinterwiki_address(n=number)
-            get_throttle()
-            html = self.getUrl(path)
-            entryR = re.compile('<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
-            for m in entryR.finditer(html):
-                title = m.group('title')
-                if title not in seen:
-                    seen.add(title)
-                    page = Page(self, title)
-                    yield page
-            if not repeat:
-                break
-
-    def linksearch(self, siteurl):
-        """Yield Pages from results of Special:Linksearch for 'siteurl'."""
-        if siteurl.startswith('*.'):
-            siteurl = siteurl[2:]
-        output(u'Querying [[Special:Linksearch]]...')
-        cache = []
-        for url in [siteurl, '*.' + siteurl]:
-            path = self.linksearch_address(url)
-            get_throttle()
-            html = self.getUrl(path)
-            loc = html.find('<div class="mw-spcontent">')
-            if loc > -1:
-                html = html[loc:]
-            loc = html.find('<div class="printfooter">')
-            if loc > -1:
-                html = html[:loc]
-            R = re.compile('title ?="(.*?)"')
-            for title in R.findall(html):
-                if not siteurl in title:
-                    # the links themselves have similar form
-                    if title in cache:
-                        continue
-                    else:
-                        cache.append(title)
-                        yield Page(self, title)
-
+ # -*- coding: utf-8  -*-
+"""
+Objects representing MediaWiki sites (wikis) and families (groups of wikis
+on the same topic in different languages).
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+import pywikibot
+from pywikibot import deprecate_arg
+from pywikibot import config
+from pywikibot.throttle import Throttle
+from pywikibot.data import api
+from pywikibot.exceptions import *
+
+try:
+    from hashlib import md5
+except ImportError:
+    from md5 import md5
+import logging
+import os
+import re
+import sys
+import threading
+import urllib
+
+logger = logging.getLogger("wiki")
+
+class PageInUse(pywikibot.Error):
+    """Page cannot be reserved for writing due to existing lock."""
+
+
+def Family(fam=None, fatal=True):
+    """Import the named family.
+
+    @param fam: family name (if omitted, uses the configured default)
+    @type fam: str
+    @param fatal: if True, the bot will stop running if the given family is
+        unknown. If False, it will only raise a ValueError exception.
+    @param fatal: bool
+    @return: a Family instance configured for the named family.
+
+    """
+    if fam == None:
+        fam = config.family
+    try:
+        # first try the built-in families
+        exec "import pywikibot.families.%s_family as myfamily" % fam
+    except ImportError:
+        # next see if user has defined a local family module
+        try:
+            sys.path.append(config.datafilepath('families'))
+            exec "import %s_family as myfamily" % fam
+        except ImportError:
+            if fatal:
+                logger.exception(u"""\
+Error importing the %s family. This probably means the family
+does not exist. Also check your configuration file."""
+                           % fam)
+                sys.exit(1)
+            else:
+                raise Error("Family %s does not exist" % fam)
+    return myfamily.Family()
+
+
+class BaseSite(object):
+    """Site methods that are independent of the communication interface."""
+    # to implement a specific interface, define a Site class that inherits
+    # from this
+
+    def __init__(self, code, fam=None, user=None, sysop=None):
+        """
+        @param code: the site's language code
+        @type code: str
+        @param fam: wiki family name (optional)
+        @type fam: str or Family
+        @param user: bot user name (optional)
+        @type user: str
+        @param sysop: sysop account user name (optional)
+        @type sysop: str
+
+        """
+        self.__code = code.lower()
+        if isinstance(fam, basestring) or fam is None:
+            self.__family = Family(fam, fatal=False)
+        else:
+            self.__family = fam
+
+        # if we got an outdated language code, use the new one instead.
+        if self.__family.obsolete.has_key(self.__code):
+            if self.__family.obsolete[self.__code] is not None:
+                self.__code = self.__family.obsolete[self.__code]
+            else:
+                # no such language anymore
+                raise NoSuchSite("Language %s in family %s is obsolete"
+                                 % (self.__code, self.__family.name))
+        if self.__code not in self.languages():
+            if self.__code == 'zh-classic' and 'zh-classical' in self.languages():
+                self.__code = 'zh-classical'
+                # database hack (database is varchar[10] -> zh-classical
+                # is cut to zh-classic.
+            else:
+                raise NoSuchSite("Language %s does not exist in family %s"
+                                 % (self.__code, self.__family.name))
+
+        self._username = [user, sysop]
+
+        # following are for use with lock_page and unlock_page methods
+        self._pagemutex = threading.Lock()
+        self._locked_pages = []
+
+    @property
+    def throttle(self):
+        """Return this Site's throttle.  Initialize a new one if needed."""
+
+        if not hasattr(self, "_throttle"):
+            self._throttle = Throttle(self, multiplydelay=True,
+                                      verbosedelay=True)
+            try:
+                self.login(False)
+            except pywikibot.NoUsername:
+                pass
+        return self._throttle
+
+    @property
+    def family(self):
+        """The Family object for this Site's wiki family."""
+
+        return self.__family
+
+    @property
+    def code(self):
+        """The identifying code for this Site."""
+
+        return self.__code
+
+    @property
+    def lang(self):
+        """The ISO language code for this Site.
+
+        Presumed to be equal to the wiki prefix, but this can be overridden.
+
+        """
+        return self.__code
+
+    def __cmp__(self, other):
+        """Perform equality and inequality tests on Site objects."""
+
+        if not isinstance(other, BaseSite):
+            return 1
+        if self.family == other.family:
+            return cmp(self.code, other.code)
+        return cmp(self.family.name, other.family.name)
+
+    def user(self):
+        """Return the currently-logged in bot user, or None."""
+
+        if self.logged_in(True):
+            return self._username[True]
+        elif self.logged_in(False):
+            return self._username[False]
+        return None
+
+    def username(self, sysop = False):
+        return self._username[sysop]
+
+    def __getattr__(self, attr):
+        """Calls to methods not defined in this object are passed to Family."""
+
+        if hasattr(self.__class__, attr):
+            return self.__class__.attr
+        try:
+            method = getattr(self.family, attr)
+            f = lambda *args, **kwargs: \
+                       method(self.code, *args, **kwargs)
+            if hasattr(method, "__doc__"):
+                f.__doc__ = method.__doc__
+            return f
+        except AttributeError:
+            raise AttributeError("%s instance has no attribute '%s'"
+                                 % (self.__class__.__name__, attr)  )
+
+    def sitename(self):
+        """Return string representing this Site's name and language."""
+
+        return self.family.name+':'+self.code
+
+    __str__ = sitename
+
+    def __repr__(self):
+        return 'Site("%s", "%s")' % (self.code, self.family.name)
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def linktrail(self):
+        """Return regex for trailing chars displayed as part of a link.
+
+        Returns a string, not a compiled regular expression object.
+
+        This reads from the family file, and ''not'' from
+        [[MediaWiki:Linktrail]], because the MW software currently uses a
+        built-in linktrail from its message files and ignores the wiki
+        value.
+
+        """
+        return self.family.linktrail(self.code)
+
+    def languages(self):
+        """Return list of all valid language codes for this site's Family."""
+
+        return self.family.langs.keys()
+
+    def validLanguageLinks(self):
+        """Return list of language codes that can be used in interwiki links."""
+
+        nsnames = sum(self.namespaces().values(), [])
+        return [l for l in self.languages()
+                  if l[:1].upper() + l[1:] not in self.namespaces()]
+
+    def ns_index(self, namespace):
+        """Given a namespace name, return its int index, or None if invalid."""
+
+        for ns in self.namespaces():
+            if namespace.lower() in [name.lower()
+                                     for name in self.namespaces()[ns]]:
+                return ns
+        return None
+
+    getNamespaceIndex = ns_index  # for backwards-compatibility
+
+    def namespaces(self):
+        """Return dict of valid namespaces on this wiki."""
+
+        return self._namespaces
+
+    def ns_normalize(self, value):
+        """Return canonical local form of namespace name.
+
+        @param value: A namespace name
+        @type value: unicode
+
+        """
+        index = self.ns_index(value)
+        return self.namespace(index)
+
+    normalizeNamespace = ns_normalize  # for backwards-compatibility
+
+    def redirect(self, default=True):
+        """Return the localized redirect tag for the site.
+
+        If default is True, falls back to 'REDIRECT' if the site has no
+        special redirect tag.
+
+        """
+        if default:
+            return self.family.redirect.get(self.code, [u"REDIRECT"])[0]
+        else:
+            return self.family.redirect.get(self.code, None)
+
+    def lock_page(self, page, block=True):
+        """Lock page for writing.  Must be called before writing any page.
+
+        We don't want different threads trying to write to the same page
+        at the same time, even to different sections.
+
+        @param page: the page to be locked
+        @type page: pywikibot.Page
+        @param block: if true, wait until the page is available to be locked;
+            otherwise, raise an exception if page can't be locked
+
+        """
+        self._pagemutex.acquire()
+        try:
+            while page in self._locked_pages:
+                if not block:
+                    raise PageInUse
+                time.sleep(.25)
+            self._locked_pages.append(page.title(withSection=False))
+        finally:
+            self._pagemutex.release()
+
+    def unlock_page(self, page):
+        """Unlock page.  Call as soon as a write operation has completed.
+
+        @param page: the page to be locked
+        @type page: pywikibot.Page
+
+        """
+        self._pagemutex.acquire()
+        try:
+            self._locked_pages.remove(page.title(withSection=False))
+        finally:
+            self._pagemutex.release()
+
+    def disambcategory(self):
+        """Return Category in which disambig pages are listed."""
+
+        try:
+            name = self.namespace(14)+':'+self.family.disambcatname[self.code]
+        except KeyError:
+            raise Error(u"No disambiguation category name found for %(site)s"
+                         % {'site': self})
+        return pywikibot.Category(pywikibot.Link(name, self))
+
+    def linkto(self, title, othersite = None):
+        """Return unicode string in the form of a wikilink to 'title'
+
+        Use optional Site argument 'othersite' to generate an interwiki link.
+
+        """
+        logger.debug("Site.linkto() method is deprecated; use pywikibot.Link")
+        return pywikibot.Link(title, self).astext(othersite)
+
+    def isInterwikiLink(self, s):
+        """Return True if s is in the form of an interwiki link.
+
+        If a link object constructed using "s" as the link text parses as
+        belonging to a different site, this method returns True.
+
+        """
+        return (pywikibot.Link(s, self).site != self)
+
+    def redirectRegex(self):
+        """Return a compiled regular expression matching on redirect pages.
+
+        Group 1 in the regex match object will be the target title.
+
+        """
+        #TODO: is this needed, since the API identifies redirects?
+        #      (maybe, the API can give false positives)
+        default = 'REDIRECT'
+        try:
+            keywords = set(self.family.redirect[self.code])
+            keywords.add(default)
+            pattern = r'(?:' + '|'.join(keywords) + ')'
+        except KeyError:
+            # no localized keyword for redirects
+            pattern = r'%s' % default
+        # A redirect starts with hash (#), followed by a keyword, then
+        # arbitrary stuff, then a wikilink. The wikilink may contain
+        # a label, although this is not useful.
+        return re.compile(r'\s*#%(pattern)s\s*:?\s*[[(.+?)(?:|.*?)?]]'
+                           % locals(),
+                          re.IGNORECASE | re.UNICODE | re.DOTALL)
+
+    # namespace shortcuts for backwards-compatibility
+
+    def special_namespace(self):
+        return self.namespace(-1)
+
+    def image_namespace(self):
+        return self.namespace(6)
+
+    def mediawiki_namespace(self):
+        return self.namespace(8)
+
+    def template_namespace(self):
+        return self.namespace(10)
+
+    def category_namespace(self):
+        return self.namespace(14)
+
+    def category_namespaces(self):
+        return self.namespace(14, all=True)
+
+    # site-specific formatting preferences
+
+    def category_on_one_line(self):
+        """Return True if this site wants all category links on one line."""
+
+        return self.code in self.family.category_on_one_line
+
+    def interwiki_putfirst(self):
+        """Return list of language codes for ordering of interwiki links."""
+
+        return self.family.interwiki_putfirst.get(self.code, None)
+
+    def interwiki_putfirst_doubled(self, list_of_links):
+        # TODO: is this even needed?  No family in the framework has this
+        # dictionary defined!
+        if self.lang in self.family.interwiki_putfirst_doubled:
+            if len(list_of_links) >= \
+                        self.family.interwiki_putfirst_doubled[self.lang][0]:
+                links2 = [lang.language() for lang in list_of_links]
+                result = []
+                for lang in self.family.interwiki_putfirst_doubled[self.lang][1]:
+                    try:
+                        result.append(list_of_links[links2.index(lang)])
+                    except ValueError:
+                        pass
+                return result
+            else:
+                return False
+        else:
+            return False
+
+    def getSite(self, code):
+        """Return Site object for language 'code' in this Family."""
+
+        return pywikibot.Site(code=code, fam=self.family, user=self.user)
+
+    # deprecated methods for backwards-compatibility
+
+    def fam(self):
+        """Return Family object for this Site."""
+        return self.family
+
+    def urlEncode(self, query):
+        """DEPRECATED"""
+        return urllib.urlencode(query)
+
+    def getUrl(self, path, retry=True, sysop=False, data=None,
+               compress=True, no_hostname=False, cookie_only=False):
+        """DEPRECATED.
+
+        Retained for compatibility only. All arguments except path and data
+        are ignored.
+
+        """
+        if data:
+            if not isinstance(data, basestring):
+                data = urllib.urlencode(data)
+            return pywikibot.comms.data.request(self, path, method="PUT",
+                                                body=data)
+        else:
+            return pywikibot.comms.data.request(self, path)
+
+    def postForm(self, address, predata, sysop=False, cookies=None):
+        """DEPRECATED"""
+        return self.getUrl(address, data=predata)
+
+    def postData(self, address, data, contentType=None, sysop=False,
+                 compress=True, cookies=None):
+        """DEPRECATED"""
+        return self.getUrl(address, data=data)
+
+    # unsupported methods from version 1
+
+    def checkCharset(self, charset):
+        raise NotImplementedError
+    def getToken(self, getalways=True, getagain=False, sysop=False):
+        raise NotImplementedError
+    def export_address(self):
+        raise NotImplementedError
+    def move_address(self):
+        raise NotImplementedError
+    def delete_address(self, s):
+        raise NotImplementedError
+    def undelete_view_address(self, s, ts=''):
+        raise NotImplementedError
+    def undelete_address(self):
+        raise NotImplementedError
+    def protect_address(self, s):
+        raise NotImplementedError
+    def unprotect_address(self, s):
+        raise NotImplementedError
+    def put_address(self, s):
+        raise NotImplementedError
+    def get_address(self, s):
+        raise NotImplementedError
+    def nice_get_address(self, s):
+        raise NotImplementedError
+    def edit_address(self, s):
+        raise NotImplementedError
+    def purge_address(self, s):
+        raise NotImplementedError
+    def block_address(self):
+        raise NotImplementedError
+    def unblock_address(self):
+        raise NotImplementedError
+    def blocksearch_address(self, s):
+        raise NotImplementedError
+    def linksearch_address(self, s, limit=500, offset=0):
+        raise NotImplementedError
+    def search_address(self, q, n=50, ns=0):
+        raise NotImplementedError
+    def allpages_address(self, s, ns = 0):
+        raise NotImplementedError
+    def log_address(self, n=50, mode = ''):
+        raise NotImplementedError
+    def newpages_address(self, n=50):
+        raise NotImplementedError
+    def longpages_address(self, n=500):
+        raise NotImplementedError
+    def shortpages_address(self, n=500):
+        raise NotImplementedError
+    def unusedfiles_address(self, n=500):
+        raise NotImplementedError
+    def categories_address(self, n=500):
+        raise NotImplementedError
+    def deadendpages_address(self, n=500):
+        raise NotImplementedError
+    def ancientpages_address(self, n=500):
+        raise NotImplementedError
+    def lonelypages_address(self, n=500):
+        raise NotImplementedError
+    def protectedpages_address(self, n=500):
+        raise NotImplementedError
+    def unwatchedpages_address(self, n=500):
+        raise NotImplementedError
+    def uncategorizedcategories_address(self, n=500):
+        raise NotImplementedError
+    def uncategorizedimages_address(self, n=500):
+        raise NotImplementedError
+    def uncategorizedpages_address(self, n=500):
+        raise NotImplementedError
+    def unusedcategories_address(self, n=500):
+        raise NotImplementedError
+    def withoutinterwiki_address(self, n=500):
+        raise NotImplementedError
+    def references_address(self, s):
+        raise NotImplementedError
+    def allmessages_address(self):
+        raise NotImplementedError
+    def upload_address(self):
+        raise NotImplementedError
+    def double_redirects_address(self, default_limit = True):
+        raise NotImplementedError
+    def broken_redirects_address(self, default_limit = True):
+        raise NotImplementedError
+    def login_address(self):
+        raise NotImplementedError
+    def captcha_image_address(self, id):
+        raise NotImplementedError
+    def watchlist_address(self):
+        raise NotImplementedError
+    def contribs_address(self, target, limit=500, offset=''):
+        raise NotImplementedError
+
+
+class APISite(BaseSite):
+    """API interface to MediaWiki site.
+
+    Do not use directly; use pywikibot.Site function.
+
+    """
+##    Site methods from version 1.0 (as these are implemented in this file,
+##     or declared deprecated/obsolete, they will be removed from this list)
+##########
+##    cookies: return user's cookies as a string
+##
+##    urlEncode: Encode a query to be sent using an http POST request.
+##    postForm: Post form data to an address at this site.
+##    postData: Post encoded form data to an http address at this site.
+##
+##    shared_image_repository: Return tuple of image repositories used by this
+##        site.
+##    version: Return MediaWiki version string from Family file.
+##    versionnumber: Return int identifying the MediaWiki version.
+##    live_version: Return version number read from Special:Version.
+##    checkCharset(charset): Warn if charset doesn't match family file.
+##
+##    linktrail: Return regex for trailing chars displayed as part of a link.
+##    disambcategory: Category in which disambiguation pages are listed.
+##
+##    Methods that yield Page objects derived from a wiki's Special: pages
+##    (note, some methods yield other information in a tuple along with the
+##    Pages; see method docs for details) --
+##
+##        newpages(): Special:Newpages
+##        newimages(): Special:Log&type=upload
+##        longpages(): Special:Longpages
+##        shortpages(): Special:Shortpages
+##        deadendpages(): Special:Deadendpages
+##        ancientpages(): Special:Ancientpages
+##        lonelypages(): Special:Lonelypages
+##        unwatchedpages(): Special:Unwatchedpages (sysop accounts only)
+##        uncategorizedcategories(): Special:Uncategorizedcategories (yields
+##            Category objects)
+##        uncategorizedpages(): Special:Uncategorizedpages
+##        uncategorizedimages(): Special:Uncategorizedimages (yields
+##            ImagePage objects)
+##        unusedcategories(): Special:Unusuedcategories (yields Category)
+##        unusedfiles(): Special:Unusedimages (yields ImagePage)
+##        withoutinterwiki: Special:Withoutinterwiki
+##        linksearch: Special:Linksearch
+
+    def __init__(self, code, fam=None, user=None, sysop=None):
+        BaseSite.__init__(self, code, fam, user, sysop)
+        self._namespaces = {
+            # these are the MediaWiki built-in names, which always work
+            # localized names are loaded later upon accessing the wiki
+            # namespace prefixes are always case-insensitive, but the
+            # canonical forms are capitalized
+            -2: [u"Media"],
+            -1: [u"Special"],
+             0: [u""],
+             1: [u"Talk"],
+             2: [u"User"],
+             3: [u"User talk"],
+             4: [u"Project"],
+             5: [u"Project talk"],
+             6: [u"Image"],
+             7: [u"Image talk"],
+             8: [u"MediaWiki"],
+             9: [u"MediaWiki talk"],
+            10: [u"Template"],
+            11: [u"Template talk"],
+            12: [u"Help"],
+            13: [u"Help talk"],
+            14: [u"Category"],
+            15: [u"Category talk"],
+            }
+        self.sitelock = threading.Lock()
+        self._msgcache = {}
+        return
+
+# ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__()
+        self.nocapitalize = self.__code in self.family.nocapitalize
+        # Calculating valid languages took quite long, so we calculate it once
+        # in initialization instead of each time it is used.
+        self._validlanguages = []
+        for language in self.languages():
+            if not language[:1].upper() + language[1:] in self.namespaces():
+                self._validlanguages.append(language)
+
+    def logged_in(self, sysop=False):
+        """Return True if logged in with specified privileges, otherwise False.
+
+        @param sysop: if True, require sysop privileges.
+
+        """
+        if self.userinfo['name'] != self._username[sysop]:
+            return False
+        return (not sysop) or 'sysop' in self.userinfo['groups']
+
+    def loggedInAs(self, sysop = False):
+        """Return the current username if logged in, otherwise return None.
+
+        DEPRECATED (use .user() method instead)
+
+        """
+        logger.debug("Site.loggedInAs() method is deprecated.")
+        return self.logged_in(sysop) and self.user()
+
+    def login(self, sysop=False):
+        """Log the user in if not already logged in."""
+        if not hasattr(self, "_siteinfo"):
+            self._getsiteinfo()
+        # check whether a login cookie already exists for this user
+        if hasattr(self, "_userinfo"):
+            if self.userinfo['name'] == self._username[sysop]:
+                return
+        if not self.logged_in(sysop):
+            loginMan = api.LoginManager(site=self, sysop=sysop,
+                                        user=self._username[sysop])
+            if loginMan.login(retry = True):
+                self._username[sysop] = loginMan.username
+                if hasattr(self, "_userinfo"):
+                    del self._userinfo
+                self.getuserinfo()
+
+    forceLogin = login  # alias for backward-compatibility
+
+    def getuserinfo(self):
+        """Retrieve userinfo from site and store in _userinfo attribute.
+
+        self._userinfo will be a dict with the following keys and values:
+
+          - id: user id (numeric str)
+          - name: username (if user is logged in)
+          - anon: present if user is not logged in
+          - groups: list of groups (could be empty)
+          - rights: list of rights (could be empty)
+          - message: present if user has a new message on talk page
+          - blockinfo: present if user is blocked (dict)
+
+        """
+        if (not hasattr(self, "_userinfo")
+                or "rights" not in self._userinfo
+                or self._userinfo['name']
+                   != self._username["sysop" in self._userinfo["groups"]]):
+            uirequest = api.Request(
+                                site=self,
+                                action="query",
+                                meta="userinfo",
+                                uiprop="blockinfo|hasmsg|groups|rights"
+                            )
+            uidata = uirequest.submit()
+            assert 'query' in uidata, \
+                   "API userinfo response lacks 'query' key"
+            assert 'userinfo' in uidata['query'], \
+                   "API userinfo response lacks 'userinfo' key"
+            self._userinfo = uidata['query']['userinfo']
+        return self._userinfo
+
+    userinfo = property(fget=getuserinfo, doc=getuserinfo.__doc__)
+
+    def is_blocked(self, sysop=False):
+        """Return true if and only if user is blocked.
+
+        @param sysop: If true, log in to sysop account (if available)
+
+        """
+        if not self.logged_in(sysop):
+            self.login(sysop)
+        return 'blockinfo' in self._userinfo
+
+    def isBlocked(self, sysop=False):
+        """Deprecated synonym for is_blocked"""
+        logger.debug(
+            "Site method 'isBlocked' should be changed to 'is_blocked'")
+        return self.is_blocked(sysop)
+
+    def checkBlocks(self, sysop = False):
+        """Check if the user is blocked, and raise an exception if so."""
+        if self.is_blocked(sysop):
+            # User blocked
+            raise UserBlocked('User is blocked in site %s' % self)
+
+    def has_right(self, right, sysop=False):
+        """Return true if and only if the user has a specific right.
+
+        Possible values of 'right' may vary depending on wiki settings,
+        but will usually include:
+
+        * Actions: edit, move, delete, protect, upload
+        * User levels: autoconfirmed, sysop, bot
+
+        """
+        if not self.logged_in(sysop):
+            self.login(sysop)
+        return right.lower() in self._userinfo['rights']
+
+    def isAllowed(self, right, sysop=False):
+        """Deprecated; retained for backwards-compatibility"""
+        logger.debug("Site.isAllowed() method is deprecated; use has_right()")
+        return self.has_right(right, sysop)
+
+    def has_group(self, group, sysop=False):
+        """Return true if and only if the user is a member of specified group.
+
+        Possible values of 'group' may vary depending on wiki settings,
+        but will usually include bot.
+
+        """
+        if not self.logged_in(sysop):
+            self.login(sysop)
+        return group.lower() in self._userinfo['groups']
+
+    def messages(self, sysop=False):
+        """Returns true if the user has new messages, and false otherwise."""
+        if not self.logged_in(sysop):
+            self.login(sysop)
+        return 'hasmsg' in self._userinfo
+
+    def mediawiki_message(self, key):
+        """Return the MediaWiki message text for key "key" """
+        if not key in self._msgcache:
+            msg_query = api.QueryGenerator(site=self, meta="allmessages",
+                                           amfilter=key)
+            for msg in msg_query:
+                if msg['name'] == key and not 'missing' in msg:
+                    self._msgcache[key] = msg['*']
+                    break
+            else:
+                raise KeyError("Site %(self)s has no message '%(key)s'"
+                               % locals())
+        return self._msgcache[key]
+
+    def has_mediawiki_message(self, key):
+        """Return True iff this site defines a MediaWiki message for 'key'."""
+        try:
+            v = self.mediawiki_message(key)
+            return True
+        except KeyError:
+            return False
+
+    def getcurrenttimestamp(self):
+        """Return (Mediawiki) timestamp, {{CURRENTTIMESTAMP}}, the server time.
+
+        Format is yyyymmddhhmmss
+
+        """
+        r = api.Request(site=self,
+                        action="parse",
+                        text="{{CURRENTTIMESTAMP}}")
+        result = r.submit()
+        return re.search('\d+', result['parse']['text']['*']).group()
+
+    def _getsiteinfo(self):
+        """Retrieve siteinfo and namespaces from site."""
+        sirequest = api.Request(
+                            site=self,
+                            action="query",
+                            meta="siteinfo",
+                            siprop="general|namespaces|namespacealiases"
+                        )
+        try:
+            sidata = sirequest.submit()
+        except api.APIError:
+            # hack for older sites that don't support 1.12 properties
+            # probably should delete if we're not going to support pre-1.12
+            sirequest = api.Request(
+                                site=self,
+                                action="query",
+                                meta="siteinfo",
+                                siprop="general|namespaces"
+                            )
+            sidata = sirequest.submit()
+
+        assert 'query' in sidata, \
+               "API siteinfo response lacks 'query' key"
+        sidata = sidata['query']
+        assert 'general' in sidata, \
+               "API siteinfo response lacks 'general' key"
+        assert 'namespaces' in sidata, \
+               "API siteinfo response lacks 'namespaces' key"
+        self._siteinfo = sidata['general']
+        nsdata = sidata['namespaces']
+        for nskey in nsdata:
+            ns = int(nskey)
+            if ns in self._namespaces:
+                if nsdata[nskey]["*"] in self._namespaces[ns]:
+                    continue
+                # this is the preferred form so it goes at front of list
+                self._namespaces[ns].insert(0, nsdata[nskey]["*"])
+            else:
+                self._namespaces[ns] = [nsdata[nskey]["*"]]
+        if 'namespacealiases' in sidata:
+            aliasdata = sidata['namespacealiases']
+            for item in aliasdata:
+                if item["*"] in self._namespaces[int(item['id'])]:
+                    continue
+                # this is a less preferred form so it goes at the end
+                self._namespaces[int(item['id'])].append(item["*"])
+
+    @property
+    def siteinfo(self):
+        """Site information dict."""
+
+        if not hasattr(self, "_siteinfo"):
+            self._getsiteinfo()
+        return self._siteinfo
+
+    def case(self):
+        """Return this site's capitalization rule."""
+
+        return self.siteinfo['case']
+
+    def language(self):
+        """Return the code for the language of this Site."""
+
+        return self.siteinfo['lang']
+
+    lang = property(fget=language, doc=language.__doc__)
+
+    def namespaces(self):
+        """Return dict of valid namespaces on this wiki."""
+
+        if not hasattr(self, "_siteinfo"):
+            self._getsiteinfo()
+        return self._namespaces
+
+    def namespace(self, num, all=False):
+        """Return string containing local name of namespace 'num'.
+
+        If optional argument 'all' is true, return a list of all recognized
+        values for this namespace.
+
+        """
+        if all:
+            return self.namespaces()[num]
+        return self.namespaces()[num][0]
+
+    def live_version(self):
+        """Return the 'real' version number found on [[Special:Version]]
+
+        Return value is a tuple (int, int, str) of the major and minor
+        version numbers and any other text contained in the version.
+
+        """
+        versionstring = self.siteinfo['generator']
+        m = re.match(r"^MediaWiki ([0-9]+).([0-9]+)(.*)$", versionstring)
+        if m:
+            return (int(m.group(1)), int(m.group(2)), m.group(3))
+        else:
+            return None
+
+    def loadpageinfo(self, page):
+        """Load page info from api and save in page attributes"""
+        title = page.title(withSection=False)
+        query = api.PropertyGenerator("info", site=self,
+                                      titles=title.encode(self.encoding()),
+                                      inprop="protection")
+        for pageitem in query:
+            if pageitem['title'] != title:
+                raise Error(
+                    u"loadpageinfo: Query on %s returned data on '%s'"
+                    % (page, pageitem['title']))
+            api.update_page(page, pageitem)
+
+    def loadimageinfo(self, page, history=False):
+        """Load image info from api and save in page attributes
+
+        @param history: if true, return the image's version history
+
+        """
+        title = page.title(withSection=False)
+        query = api.PropertyGenerator("imageinfo", site=self,
+                                      titles=title.encode(self.encoding()),
+                                      iiprop=["timestamp", "user", "comment",
+                                              "url", "size", "sha1", "mime",
+                                              "metadata", "archivename"])
+        if history:
+            query.request["iilimit"] = "max"
+        for pageitem in query:
+            if pageitem['title'] != title:
+                raise Error(
+                    u"loadpageinfo: Query on %s returned data on '%s'"
+                    % (page, pageitem['title']))
+            api.update_page(page, pageitem)
+            if history:
+                return pageitem['imageinfo']
+
+    def page_exists(self, page):
+        """Return True if and only if page is an existing page on site."""
+        if not hasattr(page, "_pageid"):
+            self.loadpageinfo(page)
+        return page._pageid > 0
+
+    def page_restrictions(self, page):
+        """Returns a dictionary reflecting page protections"""
+        if not self.page_exists(page):
+            raise NoPage(u'No page %s.' % page)
+        if not hasattr(page, "_protection"):
+            self.loadpageinfo(page)
+        return page._protection
+
+    def page_can_be_edited(self, page):
+        """
+        Returns True if and only if:
+          - page is unprotected, and bot has an account for this site, or
+          - page is protected, and bot has a sysop account for this site.
+
+        """
+        rest = self.page_restrictions(page)
+        sysop_protected = rest.has_key('edit') and rest['edit'][0] == 'sysop'
+        try:
+            api.LoginManager(site=self, sysop=sysop_protected)
+        except NoUsername:
+            return False
+        return True
+
+    def page_isredirect(self, page):
+        """Return True if and only if page is a redirect."""
+        if not hasattr(page, "_redir"):
+            self.loadpageinfo(page)
+        return bool(page._redir)
+
+    def getredirtarget(self, page):
+        """Return Page object for the redirect target of page."""
+        if not hasattr(page, "_redir"):
+            self.loadpageinfo(page)
+        if not page._redir:
+            raise pywikibot.IsNotRedirectPage(page.title())
+        title = page.title(withSection=False)
+        query = api.Request(site=self, action="query", property="info",
+                            inprop="protection|talkid|subjectid",
+                            titles=title.encode(self.encoding()),
+                            redirects="")
+        result = query.submit()
+        if "query" not in result or "redirects" not in result["query"]:
+            raise RuntimeError(
+                "getredirtarget: No 'redirects' found for page %s."
+                % title)
+        redirmap = dict((item['from'], item['to'])
+                            for item in result['query']['redirects'])
+        if title not in redirmap:
+            raise RuntimeError(
+                "getredirtarget: 'redirects' contains no key for page %s."
+                % title)
+        if "pages" not in result['query']:
+            # no "pages" element indicates a circular redirect
+            raise pywikibot.CircularRedirect(redirmap[title])
+        for pagedata in result['query']['pages'].values():
+            # there should be only one value in 'pages', and it is the target
+            if pagedata['title'] not in redirmap.values():
+                raise RuntimeError(
+                    "getredirtarget: target page '%s' not found in 'redirects'"
+                    % pagedata['title'])
+            target = pywikibot.Page(self, pagedata['title'], pagedata['ns'])
+            api.update_page(target, pagedata)
+            page._redir = target
+
+    def preloadpages(self, pagelist, groupsize=60):
+        """Return a generator to a list of preloaded pages.
+
+        Note that [at least in current implementation] pages may be iterated
+        in a different order than in the underlying pagelist.
+
+        @param pagelist: an iterable that returns Page objects
+        @param groupsize: how many Pages to query at a time
+        @type groupsize: int
+
+        """
+        from pywikibot.tools import itergroup
+        for sublist in itergroup(pagelist, groupsize):
+            pageids = [str(p._pageid) for p in sublist
+                                      if hasattr(p, "_pageid")
+                                         and p._pageid > 0]
+            cache = dict((p.title(withSection=False), p) for p in sublist)
+            rvgen = api.PropertyGenerator("revisions|info", site=self)
+            rvgen.limit = -1
+            if len(pageids) == len(sublist):
+                # only use pageids if all pages have them
+                rvgen.request["pageids"] = "|".join(pageids)
+            else:
+                rvgen.request["titles"] = "|".join(cache.keys())
+            rvgen.request[u"rvprop"] = \
+                    u"ids|flags|timestamp|user|comment|content"
+            logger.info(u"Retrieving %s pages from %s."
+                           % (len(cache), self)
+                        )
+            for pagedata in rvgen:
+                logger.debug("Preloading %s" % pagedata)
+                try:
+                    if pagedata['title'] not in cache:
+                        raise Error(
+                        u"preloadpages: Query returned unexpected title '%s'"
+                             % pagedata['title']
+                        )
+                except KeyError:
+                    logger.debug("No 'title' in %s" % pagedata)
+                    logger.debug("pageids=%s" % pageids)
+                    logger.debug("titles=%s" % cache.keys())
+                    continue
+                page = cache[pagedata['title']]
+                api.update_page(page, pagedata)
+                yield page
+
+    def token(self, page, tokentype):
+        """Return token retrieved from wiki to allow changing page content.
+
+        @param page: the Page for which a token should be retrieved
+        @param tokentype: the type of token (e.g., "edit", "move", "delete");
+            see API documentation for full list of types
+
+        """
+        query = api.PropertyGenerator("info|revisions", site=self,
+                                      titles=page.title(withSection=False),
+                                      intoken=tokentype)
+        for item in query:
+            if item['title'] != page.title(withSection=False):
+                raise Error(
+                    u"token: Query on page %s returned data on page [[%s]]"
+                     % (page.title(withSection=False, asLink=True),
+                        item['title']))
+            api.update_page(page, item)
+            logging.debug(str(item))
+            return item[tokentype + "token"]
+
+    # following group of methods map more-or-less directly to API queries
+
+    def pagebacklinks(self, page, followRedirects=False, filterRedirects=None,
+                      namespaces=None):
+        """Iterate all pages that link to the given page.
+
+        @param page: The Page to get links to.
+        @param followRedirects: Also return links to redirects pointing to
+            the given page.
+        @param filterRedirects: If True, only return redirects to the given
+            page. If False, only return non-redirect links. If None, return
+            both (no filtering).
+        @param namespaces: If present, only return links from the namespaces
+            in this list.
+
+        """
+        bltitle = page.title(withSection=False).encode(self.encoding())
+        blgen = api.PageGenerator("backlinks", gbltitle=bltitle, site=self)
+        if isinstance(namespaces, list):
+            blgen.request["gblnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            blgen.request["gblnamespace"] = str(namespaces)
+        if filterRedirects is not None:
+            blgen.request["gblfilterredir"] = filterRedirects and "redirects"\
+                                                              or "nonredirects"
+        if followRedirects:
+            # bug: see http://bugzilla.wikimedia.org/show_bug.cgi?id=16218
+            # links identified by MediaWiki as redirects may not really be,
+            # so we have to check each "redirect" page and see if it
+            # really redirects to this page
+            blgen.request["gblfilterredir"] = "nonredirects"
+            redirgen = api.PageGenerator("backlinks", gbltitle=bltitle,
+                                         site=self, gblfilterredir="redirects")
+            if "gblnamespace" in blgen.request:
+                redirgen.request["gblnamespace"] = blgen.request["gblnamespace"]
+            genlist = [blgen]
+            for redir in redirgen:
+                if redir.getRedirectTarget() == page:
+                    genlist.append(
+                        self.pagebacklinks(
+                            redir, True, None, namespaces))
+            import itertools
+            return itertools.chain(*genlist)
+        return blgen
+
+    def page_embeddedin(self, page, filterRedirects=None, namespaces=None):
+        """Iterate all pages that embedded the given page as a template.
+
+        @param page: The Page to get inclusions for.
+        @param filterRedirects: If True, only return redirects that embed
+            the given page. If False, only return non-redirect links. If
+            None, return both (no filtering).
+        @param namespaces: If present, only return links from the namespaces
+            in this list.
+
+        """
+        eititle = page.title(withSection=False).encode(self.encoding())
+        eigen = api.PageGenerator("embeddedin", geititle=eititle, site=self)
+        if isinstance(namespaces, list):
+            eigen.request["geinamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            eigen.request["geinamespace"] = str(namespaces)
+        if filterRedirects is not None:
+            eigen.request["geifilterredir"] = filterRedirects and "redirects"\
+                                                              or "nonredirects"
+        return eigen
+
+    def pagereferences(self, page, followRedirects=False, filterRedirects=None,
+                       withTemplateInclusion=True, onlyTemplateInclusion=False,
+                       namespaces=None):
+        """Convenience method combining pagebacklinks and page_embeddedin."""
+
+        if onlyTemplateInclusion:
+            return self.page_embeddedin(page, namespaces=namespaces)
+        if not withTemplateInclusion:
+            return self.pagebacklinks(page, followRedirects,
+                                      namespaces=namespaces)
+        import itertools
+        return itertools.chain(
+                   self.pagebacklinks(page, followRedirects,
+                                      filterRedirects, namespaces=namespaces),
+                   self.page_embeddedin(page, filterRedirects,
+                                        namespaces=namespaces)
+               )
+
+    def pagelinks(self, page, namespaces=None, follow_redirects=False,
+                  limit=None):
+        """Iterate internal wikilinks contained (or transcluded) on page.
+
+        @param namespaces: Only iterate pages in these namespaces (default: all)
+        @type namespaces: list of ints
+        @param follow_redirects: if True, yields the target of any redirects,
+            rather than the redirect page
+
+        """
+        plgen = api.PageGenerator("links", site=self)
+        if isinstance(limit, int):
+            plgen.limit = limit
+        if hasattr(page, "_pageid"):
+            plgen.request['pageids'] = str(page._pageid)
+        else:
+            pltitle = page.title(withSection=False).encode(self.encoding())
+            plgen.request['titles'] = pltitle
+        if follow_redirects:
+            plgen.request['redirects'] = ''
+        if isinstance(namespaces, list):
+            plgen.request["gplnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            plgen.request["gplnamespace"] = str(namespaces)
+        return plgen
+
+    @deprecate_arg("withSortKey", None) # Sortkey doesn't work with generator
+    def pagecategories(self, page, withSortKey=None):
+        """Iterate categories to which page belongs."""
+
+        clgen = api.CategoryPageGenerator("categories", site=self)
+        if hasattr(page, "_pageid"):
+            clgen.request['pageids'] = str(page._pageid)
+        else:
+            cltitle = page.title(withSection=False).encode(self.encoding())
+            clgen.request['titles'] = cltitle
+        return clgen
+
+    def pageimages(self, page):
+        """Iterate images used (not just linked) on the page."""
+
+        imtitle = page.title(withSection=False).encode(self.encoding())
+        imgen = api.ImagePageGenerator("images", titles=imtitle, site=self)
+        return imgen
+
+    def pagetemplates(self, page, namespaces=None):
+        """Iterate templates transcluded (not just linked) on the page."""
+
+        tltitle = page.title(withSection=False).encode(self.encoding())
+        tlgen = api.PageGenerator("templates", titles=tltitle, site=self)
+        if isinstance(namespaces, list):
+            tlgen.request["gtlnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            tlgen.request["gtlnamespace"] = str(namespaces)
+        return tlgen
+
+    def categorymembers(self, category, namespaces=None, limit=None):
+        """Iterate members of specified category.
+
+        @param category: The Category to iterate.
+        @param namespaces: If present, only return category members from
+            these namespaces. For example, use namespaces=[14] to yield
+            subcategories, use namespaces=[6] to yield image files, etc. Note,
+            however, that the iterated values are always Page objects, even
+            if in the Category or Image namespace.
+        @type namespaces: list of ints
+        @param limit: maximum number of pages to iterate (default: all)
+        @type limit: int
+
+        """
+        if category.namespace() != 14:
+            raise Error(
+                u"categorymembers: non-Category page '%s' specified"
+                % category.title())
+        cmtitle = category.title(withSection=False).encode(self.encoding())
+        cmgen = api.PageGenerator("categorymembers", gcmtitle=cmtitle,
+                                  gcmprop="ids|title|sortkey", site=self)
+        if isinstance(namespaces, list):
+            cmgen.request["gcmnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            cmgen.request["gcmnamespace"] = str(namespaces)
+        if isinstance(limit, int):
+            cmgen.limit = limit
+        return cmgen
+
+    def loadrevisions(self, page=None, getText=False, revids=None,
+                     limit=None, startid=None, endid=None, starttime=None,
+                     endtime=None, rvdir=None, user=None, excludeuser=None,
+                     section=None, sysop=False):
+        """Retrieve and store revision information.
+
+        By default, retrieves the last (current) revision of the page,
+        I{unless} any of the optional parameters revids, startid, endid,
+        starttime, endtime, rvdir, user, excludeuser, or limit are
+        specified. Unless noted below, all parameters not specified
+        default to False.
+
+        If rvdir is False or not specified, startid must be greater than
+        endid if both are specified; likewise, starttime must be greater
+        than endtime. If rvdir is True, these relationships are reversed.
+
+        @param page: retrieve revisions of this Page (required unless ids
+            is specified)
+        @param getText: if True, retrieve the wiki-text of each revision;
+            otherwise, only retrieve the revision metadata (default)
+        @param section: if specified, retrieve only this section of the text
+            (getText must be True); section must be given by number (top of
+            the article is section 0), not name
+        @type section: int
+        @param revids: retrieve only the specified revision ids (required
+            unless page is specified)
+        @type revids: list of ints
+        @param limit: Retrieve no more than this number of revisions
+        @type limit: int
+        @param startid: retrieve revisions starting with this revid
+        @param endid: stop upon retrieving this revid
+        @param starttime: retrieve revisions starting at this timestamp
+        @param endtime: stop upon reaching this timestamp
+        @param rvdir: if false, retrieve newest revisions first (default);
+            if true, retrieve earliest first
+        @param user: retrieve only revisions authored by this user
+        @param excludeuser: retrieve all revisions not authored by this user
+        @param sysop: if True, switch to sysop account (if available) to
+            retrieve this page
+
+        """
+        latest = (revids is None and
+                  startid is None and
+                  endid is None and
+                  starttime is None and
+                  endtime is None and
+                  rvdir is None and
+                  user is None and
+                  excludeuser is None and
+                  limit is None)  # if True, we are retrieving current revision
+
+        # check for invalid argument combinations
+        if page is None and revids is None:
+            raise ValueError(
+                "loadrevisions:  either page or revids argument required")
+        if (startid is not None or endid is not None) and \
+                (starttime is not None or endtime is not None):
+            raise ValueError(
+                "loadrevisions: startid/endid combined with starttime/endtime")
+        if starttime is not None and endtime is not None:
+            if rvdir and starttime >= endtime:
+                raise ValueError(
+                    "loadrevisions: starttime > endtime with rvdir=True")
+            if (not rvdir) and endtime >= starttime:
+                raise ValueError(
+                    "loadrevisions: endtime > starttime with rvdir=False")
+        if startid is not None and endid is not None:
+            if rvdir and startid >= endid:
+                raise ValueError(
+                    "loadrevisions: startid > endid with rvdir=True")
+            if (not rvdir) and endid >= startid:
+                raise ValueError(
+                    "loadrevisions: endid > startid with rvdir=False")
+
+        # assemble API request
+        if revids is None:
+            rvtitle = page.title(withSection=False).encode(self.encoding())
+            rvgen = api.PropertyGenerator(u"info|revisions", titles=rvtitle,
+                                          site=self)
+        else:
+            if isinstance(revids, (int, basestring)):
+                ids = unicode(revids)
+            else:
+                ids = u"|".join(unicode(r) for r in revids)
+            rvgen = api.PropertyGenerator(u"info|revisions", revids=ids,
+                                          site=self)
+        if getText:
+            rvgen.request[u"rvprop"] = \
+                    u"ids|flags|timestamp|user|comment|content"
+            if section is not None:
+                rvgen.request[u"rvsection"] = unicode(section)
+        if latest or "revids" in rvgen.request:
+            rvgen.limit = -1  # suppress use of rvlimit parameter
+        elif isinstance(limit, int):
+            rvgen.limit = limit
+        if rvdir:
+            rvgen.request[u"rvdir"] = u"newer"
+        elif rvdir is not None:
+            rvgen.request[u"rvdir"] = u"older"
+        if startid:
+            rvgen.request[u"rvstartid"] = startid
+        if endid:
+            rvgen.request[u"rvendid"] = endid
+        if starttime:
+            rvgen.request[u"rvstart"] = starttime
+        if endtime:
+            rvgen.request[u"rvend"] = endtime
+        if user:
+            rvgen.request[u"rvuser"] = user
+        elif excludeuser:
+            rvgen.request[u"rvexcludeuser"] = excludeuser
+        # TODO if sysop: something
+        rvgen.continuekey = "revisions"
+        for pagedata in rvgen:
+            if page is not None:
+                if pagedata['title'] != page.title(withSection=False):
+                    raise Error(
+                        u"loadrevisions: Query on %s returned data on '%s'"
+                        % (page, pagedata['title']))
+                if pagedata.has_key('missing'):
+                    raise NoPage(u'Page %s does not exist'
+                                  % page.title(asLink=True)) 
+            else:
+                page = Page(self, pagedata['title'])
+            api.update_page(page, pagedata)
+
+    def pageinterwiki(self, page):
+        # No such function in the API (this method isn't called anywhere)
+        raise NotImplementedError
+
+    def pagelanglinks(self, page):
+        """Iterate all interlanguage links on page, yielding Link objects."""
+        lltitle = page.title(withSection=False)
+        llquery = api.PropertyGenerator("langlinks",
+                                        titles=lltitle.encode(self.encoding()),
+                                        site=self)
+        for pageitem in llquery:
+            if pageitem['title'] != lltitle:
+                raise Error(
+                    u"getlanglinks: Query on %s returned data on '%s'"
+                    % (page, pageitem['title']))
+            if 'langlinks' not in pageitem:
+                continue
+            for linkdata in pageitem['langlinks']:
+                yield pywikibot.Link(linkdata['*'],
+                                     source=pywikibot.Site(linkdata['lang']))
+
+    def page_extlinks(self, page):
+        """Iterate all external links on page, yielding URL strings."""
+        eltitle = page.title(withSection=False)
+        elquery = api.PropertyGenerator("extlinks",
+                                        titles=eltitle.encode(self.encoding()),
+                                        site=self)
+        for pageitem in elquery:
+            if pageitem['title'] != eltitle:
+                raise RuntimeError(
+                    "getlanglinks: Query on %s returned data on '%s'"
+                    % (page, pageitem['title']))
+            if 'extlinks' not in pageitem:
+                continue
+            for linkdata in pageitem['extlinks']:
+                yield linkdata['*']
+
+    @deprecate_arg("throttle", None)
+    @deprecate_arg("includeredirects", "filterredir")
+    def allpages(self, start="!", prefix="", namespace=0, filterredir=None,
+                 filterlanglinks=None, minsize=None, maxsize=None,
+                 protect_type=None, protect_level=None, limit=None,
+                 reverse=False, includeredirects=None):
+        """Iterate pages in a single namespace.
+
+        Note: parameters includeRedirects and throttle are deprecated and
+        included only for backwards compatibility.
+
+        @param start: Start at this title (page need not exist).
+        @param prefix: Only yield pages starting with this string.
+        @param namespace: Iterate pages from this (single) namespace
+           (default: 0)
+        @param filterredir: if True, only yield redirects; if False (and not
+            None), only yield non-redirects (default: yield both)
+        @param filterlanglinks: if True, only yield pages with language links;
+            if False (and not None), only yield pages without language links
+            (default: yield both)
+        @param minsize: if present, only yield pages at least this many
+            bytes in size
+        @param maxsize: if present, only yield pages at most this many bytes
+            in size
+        @param protect_type: only yield pages that have a protection of the
+            specified type
+        @type protect_type: str
+        @param protect_level: only yield pages that have protection at this
+            level; can only be used if protect_type is specified
+        @param limit: maximum number of pages to iterate (default: iterate
+            all pages in namespace)
+        @param reverse: if True, iterate in reverse Unicode lexigraphic
+            order (default: iterate in forward order)
+        @param includeredirects: DEPRECATED, use filterredirs instead
+
+        """
+        if not isinstance(namespace, int):
+            raise Error("allpages: only one namespace permitted.")
+        if includeredirects is not None:
+            logger.debug(
+"allpages: 'includeRedirects' argument is deprecated; use 'filterredirs'.")
+            if includeredirects:
+                if includeredirects == "only":
+                    filterredirs = True
+                else:
+                    filterredirs = None
+            else:
+                filterredirs = False
+
+        apgen = api.PageGenerator("allpages", gapnamespace=str(namespace),
+                                  gapfrom=start, site=self)
+        if prefix:
+            apgen.request["gapprefix"] = prefix
+        if filterredir is not None:
+            apgen.request["gapfilterredir"] = (filterredir
+                                               and "redirects"
+                                               or "nonredirects")
+        if filterlanglinks is not None:
+            apgen.request["gapfilterlanglinks"] = (filterlanglinks
+                                                   and "withlanglinks"
+                                                   or "withoutlanglinks")
+        if isinstance(minsize, int):
+            apgen.request["gapminsize"] = str(minsize)
+        if isinstance(maxsize, int):
+            apgen.request["gapmaxsize"] = str(maxsize)
+        if isinstance(protect_type, basestring):
+            apgen.request["gapprtype"] = protect_type
+            if isinstance(protect_level, basestring):
+                apgen.request["gapprlevel"] = protect_level
+        if isinstance(limit, int):
+            apgen.limit = limit
+        if reverse:
+            apgen.request["gapdir"] = "descending"
+        return apgen
+
+    def prefixindex(self, prefix, namespace=0, includeredirects=True):
+        """Yield all pages with a given prefix. Deprecated.
+
+        Use allpages() with the prefix= parameter instead of this method.
+
+        """
+        logger.debug("Site.prefixindex() is deprecated; use allpages instead.")
+        return self.allpages(prefix=prefix, namespace=namespace,
+                             includeredirects=includeredirects)
+
+
+    def alllinks(self, start="!", prefix="", namespace=0, unique=False,
+                 limit=None, fromids=False):
+        """Iterate all links to pages (which need not exist) in one namespace.
+
+        Note that, in practice, links that were found on pages that have
+        been deleted may not have been removed from the links table, so this
+        method can return false positives.
+
+        @param start: Start at this title (page need not exist).
+        @param prefix: Only yield pages starting with this string.
+        @param namespace: Iterate pages from this (single) namespace
+            (default: 0)
+        @param unique: If True, only iterate each link title once (default:
+            iterate once for each linking page)
+        @param limit: maximum number of pages to iterate (default: iterate
+            all pages in namespace)
+        @param fromids: if True, include the pageid of the page containing
+            each link (default: False) as the '_fromid' attribute of the Page;
+            cannot be combined with unique
+
+        """
+        if unique and fromids:
+            raise Error("alllinks: unique and fromids cannot both be True.")
+        if not isinstance(namespace, int):
+            raise Error("alllinks: only one namespace permitted.")
+        algen = api.ListGenerator("alllinks", alnamespace=str(namespace),
+                                  alfrom=start, site=self)
+        if prefix:
+            algen.request["alprefix"] = prefix
+        if isinstance(limit, int):
+            algen.limit = limit
+        if unique:
+            algen.request["alunique"] = ""
+        if fromids:
+            algen.request["alprop"] = "title|ids"
+        for link in algen:
+            p = pywikibot.Page(self, link['title'], link['ns'])
+            if fromids:
+                p._fromid = link['fromid']
+            yield p
+
+    def allcategories(self, start="!", prefix="", limit=None,
+                      reverse=False):
+        """Iterate categories used (which need not have a Category page).
+
+        Iterator yields Category objects. Note that, in practice, links that
+        were found on pages that have been deleted may not have been removed
+        from the database table, so this method can return false positives.
+
+        @param start: Start at this category title (category need not exist).
+        @param prefix: Only yield categories starting with this string.
+        @param limit: maximum number of categories to iterate (default:
+            iterate all)
+        @param reverse: if True, iterate in reverse Unicode lexigraphic
+            order (default: iterate in forward order)
+
+        """
+        acgen = api.CategoryPageGenerator("allcategories",
+                                          gacfrom=start, site=self)
+        if prefix:
+            acgen.request["gacprefix"] = prefix
+        if isinstance(limit, int):
+            acgen.limit = limit
+        if reverse:
+            acgen.request["gacdir"] = "descending"
+        return acgen
+
+    def categories(self, number=10, repeat=False):
+        """Deprecated; retained for backwards-compatibility"""
+        logger.debug(
+            "Site.categories() method is deprecated; use .allcategories()")
+        if repeat:
+            limit = None
+        else:
+            limit = number
+        return self.allcategories(limit=limit)
+
+    def allusers(self, start="!", prefix="", limit=None, group=None):
+        """Iterate registered users, ordered by username.
+
+        Iterated values are dicts containing 'name', 'editcount',
+        'registration', and (sometimes) 'groups' keys. 'groups' will be
+        present only if the user is a member of at least 1 group, and will
+        be a list of unicodes; all the other values are unicodes and should
+        always be present.
+
+        @param start: start at this username (name need not exist)
+        @param prefix: only iterate usernames starting with this substring
+        @param limit: maximum number of users to iterate (default: all)
+        @param group: only iterate users that are members of this group
+        @type group: str
+
+        """
+        augen = api.ListGenerator("allusers", aufrom=start,
+                                  auprop="editcount|groups|registration",
+                                  site=self)
+        if prefix:
+            augen.request["auprefix"] = prefix
+        if group:
+            augen.request["augroup"] = group
+        if isinstance(limit, int):
+            augen.limit = limit
+        return augen
+
+    def allimages(self, start="!", prefix="", minsize=None, maxsize=None,
+                  limit=None, reverse=False, sha1=None, sha1base36=None):
+        """Iterate all images, ordered by image title.
+
+        Yields ImagePages, but these pages need not exist on the wiki.
+
+        @param start: start at this title (name need not exist)
+        @param prefix: only iterate titles starting with this substring
+        @param limit: maximum number of titles to iterate (default: all)
+        @param minsize: only iterate images of at least this many bytes
+        @param maxsize: only iterate images of no more than this many bytes
+        @param reverse: if True, iterate in reverse lexigraphic order
+        @param sha1: only iterate image (it is theoretically possible there
+            could be more than one) with this sha1 hash
+        @param sha1base36: same as sha1 but in base 36
+
+        """        
+        aigen = api.ImagePageGenerator("allimages", gaifrom=start,
+                                       site=self)
+        if prefix:
+            aigen.request["gaiprefix"] = prefix
+        if isinstance(limit, int):
+            aigen.limit = limit
+        if isinstance(minsize, int):
+            aigen.request["gaiminsize"] = str(minsize)
+        if isinstance(maxsize, int):
+            aigen.request["gaimaxsize"] = str(maxsize)
+        if reverse:
+            aigen.request["gaidir"] = "descending"
+        if sha1:
+            aigen.request["gaisha1"] = sha1
+        if sha1base36:
+            aigen.request["gaisha1base36"] = sha1base36
+        return aigen
+
+    def blocks(self, starttime=None, endtime=None, reverse=False,
+               blockids=None, users=None, limit=None):
+        """Iterate all current blocks, in order of creation.
+
+        Note that logevents only logs user blocks, while this method
+        iterates all blocks including IP ranges.  The iterator yields dicts
+        containing keys corresponding to the block properties (see
+        http://www.mediawiki.org/wiki/API:Query_-_Lists for documentation).
+
+        @param starttime: start iterating at this timestamp
+        @param endtime: stop iterating at this timestamp
+        @param reverse: if True, iterate oldest blocks first (default: newest)
+        @param blockids: only iterate blocks with these id numbers
+        @param users: only iterate blocks affecting these usernames or IPs
+        @param limit: maximum number of blocks to iterate (default: all)
+
+        """
+        if starttime and endtime:
+            if reverse:
+                if starttime > endtime:
+                    raise pywikibot.Error(
+                "blocks: starttime must be before endtime with reverse=True")
+            else:
+                if endtime > starttime:
+                    raise pywikibot.Error(
+                "blocks: endtime must be before starttime with reverse=False")
+        bkgen = api.ListGenerator("blocks", site=self)
+        bkgen.request["bkprop"] = \
+                            "id|user|by|timestamp|expiry|reason|range|flags"
+        if starttime:
+            bkgen.request["bkstart"] = starttime
+        if endtime:
+            bkgen.request["bkend"] = endtime
+        if reverse:
+            bkgen.request["bkdir"] = "newer"
+        if blockids:
+            bkgen.request["bkids"] = blockids
+        if users:
+            bkgen.request["bkusers"] = users
+        if isinstance(limit, int):
+            bkgen.limit = limit
+        return bkgen
+
+    def exturlusage(self, url, protocol="http", namespaces=None,
+                    limit=None):
+        """Iterate Pages that contain links to the given URL.
+
+        @param url: The URL to search for (without the protocol prefix);
+            this many include a '*' as a wildcard, only at the start of the
+            hostname
+        @param protocol: The protocol prefix (default: "http")
+        @param namespaces: Only iterate pages in these namespaces (default: all)
+        @type namespaces: list of ints
+        @param limit: Only iterate this many linking pages (default: all)
+
+        """
+        eugen = api.PageGenerator("exturlusage", geuquery=url,
+                                  geuprotocol=protocol, site=self)
+        if isinstance(namespaces, list):
+            eugen.request["geunamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            eugen.request["geunamespace"] = str(namespaces)
+        if isinstance(limit, int):
+            eugen.limit = limit
+        return eugen
+
+    def imageusage(self, image, namespaces=None, filterredir=None,
+                   limit=None):
+        """Iterate Pages that contain links to the given ImagePage.
+
+        @param image: the image to search for (ImagePage need not exist on the wiki)
+        @type image: ImagePage
+        @param namespaces: Only iterate pages in these namespaces (default: all)
+        @type namespaces: list of ints
+        @param filterredir: if True, only yield redirects; if False (and not
+            None), only yield non-redirects (default: yield both)
+        @param limit: Only iterate this many linking pages (default: all)
+
+        """
+        iugen = api.PageGenerator("imageusage", site=self,
+                                  giutitle=image.title(withSection=False))
+        if isinstance(namespaces, list):
+            iugen.request["giunamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            iugen.request["giunamespace"] = str(namespaces)
+        if isinstance(limit, int):
+            iugen.limit = limit
+        if filterredir is not None:
+            iugen.request["giufilterredir"] = (filterredir and "redirects"
+                                                           or "nonredirects")
+        return iugen
+
+    def logevents(self, logtype=None, user=None, page=None,
+                  start=None, end=None, reverse=False, limit=None):
+        """Iterate all log entries.
+
+        @param logtype: only iterate entries of this type (see wiki
+            documentation for available types, which will include "block",
+            "protect", "rights", "delete", "upload", "move", "import",
+            "patrol", "merge")
+        @param user: only iterate entries that match this user name
+        @param page: only iterate entries affecting this page
+        @param start: only iterate entries from and after this timestamp
+        @param end: only iterate entries up to and through this timestamp
+        @param reverse: if True, iterate oldest entries first (default: newest)
+        @param limit: only iterate up to this many entries
+
+        """
+        if start and end:
+            if reverse:
+                if end < start:
+                    raise Error(
+                  "logevents: end must be later than start with reverse=True")
+            else:
+                if start < end:
+                    raise Error(
+                  "logevents: start must be later than end with reverse=False")
+        legen = api.ListGenerator("logevents", site=self)
+        if logtype is not None:
+            legen.request["letype"] = logtype
+        if user is not None:
+            legen.request["leuser"] = user
+        if page is not None:
+            legen.request["letitle"] = page.title(withSection=False)
+        if start is not None:
+            legen.request["lestart"] = start
+        if end is not None:
+            legen.request["leend"] = end
+        if reverse:
+            legen.request["ledir"] = "newer"
+        if isinstance(limit, int):
+            legen.limit = limit
+        return legen
+
+    def recentchanges(self, start=None, end=None, reverse=False, limit=None,
+                      namespaces=None, pagelist=None, changetype=None,
+                      showMinor=None, showBot=None, showAnon=None,
+                      showRedirects=None, showPatrolled=None):
+        """Iterate recent changes.
+
+        @param start: timestamp to start listing from
+        @param end: timestamp to end listing at
+        @param reverse: if True, start with oldest changes (default: newest)
+        @param limit: iterate no more than this number of entries
+        @param namespaces: iterate changes to pages in these namespaces only
+        @type namespaces: list of ints
+        @param pagelist: iterate changes to pages in this list only
+        @param pagelist: list of Pages
+        @param changetype: only iterate changes of this type ("edit" for
+            edits to existing pages, "new" for new pages, "log" for log
+            entries)
+        @param showMinor: if True, only list minor edits; if False (and not
+            None), only list non-minor edits
+        @param showBot: if True, only list bot edits; if False (and not
+            None), only list non-bot edits
+        @param showAnon: if True, only list anon edits; if False (and not
+            None), only list non-anon edits
+        @param showRedirects: if True, only list edits to redirect pages; if
+            False (and not None), only list edits to non-redirect pages
+        @param showPatrolled: if True, only list patrolled edits; if False
+            (and not None), only list non-patrolled edits
+
+        """
+        if start and end:
+            if reverse:
+                if end < start:
+                    raise Error(
+            "recentchanges: end must be later than start with reverse=True")
+            else:
+                if start < end:
+                    raise Error(
+            "recentchanges: start must be later than end with reverse=False")
+        rcgen = api.ListGenerator("recentchanges", site=self,
+                                  rcprop="user|comment|timestamp|title|ids"
+                                         "|redirect|patrolled|loginfo|flags")
+        if start is not None:
+            rcgen.request["rcstart"] = start
+        if end is not None:
+            rcgen.request["rcend"] = end
+        if reverse:
+            rcgen.request["rcdir"] = "newer"
+        if isinstance(limit, int):
+            rcgen.limit = limit
+        if isinstance(namespaces, list):
+            rcgen.request["rcnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            rcgen.request["rcnamespace"] = str(namespaces)
+        if pagelist:
+            rcgen.request["rctitles"] = u"|".join(p.title(withSection=False)
+                                                 for p in pagelist)
+        if changetype:
+            rcgen.request["rctype"] = changetype
+        filters = {'minor': showMinor,
+                   'bot': showBot,
+                   'anon': showAnon,
+                   'redirect': showRedirects,
+                   'patrolled': showPatrolled}
+        rcshow = []
+        for item in filters:
+            if filters[item] is not None:
+                rcshow.append(filters[item] and item or ("!"+item))
+        if rcshow:
+            rcgen.request["rcshow"] = "|".join(rcshow)
+        return rcgen
+
+    @deprecate_arg("number", "limit")
+    def search(self, searchstring, namespaces=None, where="text",
+               getredirects=False, limit=None):
+        """Iterate Pages that contain the searchstring.
+
+        Note that this may include non-existing Pages if the wiki's database
+        table contains outdated entries.
+
+        @param searchstring: the text to search for
+        @type searchstring: unicode
+        @param where: Where to search; value must be "text" or "titles" (many
+            wikis do not support title search)
+        @param namespaces: search only in these namespaces (defaults to 0)
+        @type namespaces: list of ints
+        @param getredirects: if True, include redirects in results
+        @param limit: maximum number of results to iterate
+
+        """
+        if not searchstring:
+            raise Error("search: searchstring cannot be empty")
+        if where not in ("text", "titles"):
+            raise Error("search: unrecognized 'where' value: %s" % where)
+        srgen = api.PageGenerator("search", gsrsearch=searchstring,
+                                  gsrwhat=where, site=self)
+        if not namespaces:
+            logger.warning("search: namespaces cannot be empty; using [0].")
+            namespaces = [0]
+        if isinstance(namespaces, list):
+            srgen.request["gsrnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        else:
+            srgen.request["gsrnamespace"] = str(namespaces)
+        if getredirects:
+            srgen.request["gsrredirects"] = ""
+        if isinstance(limit, int):
+            srgen.limit = limit
+        return srgen
+
+    def usercontribs(self, user=None, userprefix=None, start=None, end=None,
+                     reverse=False, limit=None, namespaces=None,
+                     showMinor=None):
+        """Iterate contributions by a particular user.
+
+        Iterated values are in the same format as recentchanges.
+
+        @param user: Iterate contributions by this user (name or IP)
+        @param userprefix: Iterate contributions by all users whose names
+            or IPs start with this substring
+        @param start: Iterate contributions starting at this timestamp
+        @param end: Iterate contributions ending at this timestamp
+        @param reverse: Iterate oldest contributions first (default: newest)
+        @param limit: Maximum number of contributions to iterate
+        @param namespaces: Only iterate contributions in these namespaces
+        @type namespaces: list of ints
+        @param showMinor: if True, iterate only minor edits; if False and
+            not None, iterate only non-minor edits (default: iterate both)
+
+        """
+        if not (user or userprefix):
+            raise Error(
+                "usercontribs: either user or userprefix must be non-empty")
+        if start and end:
+            if reverse:
+                if end < start:
+                    raise Error(
+                "usercontribs: end must be later than start with reverse=True")
+            else:
+                if start < end:
+                    raise Error(
+                "usercontribs: start must be later than end with reverse=False")
+        ucgen = api.ListGenerator("usercontribs", site=self,
+                              ucprop="ids|title|timestamp|comment|flags")
+        if user:
+            ucgen.request["ucuser"] = user
+        if userprefix:
+            ucgen.request["ucuserprefix"] = userprefix
+        if start is not None:
+            ucgen.request["ucstart"] = start
+        if end is not None:
+            ucgen.request["ucend"] = end
+        if reverse:
+            ucgen.request["ucdir"] = "newer"
+        if isinstance(limit, int):
+            ucgen.limit = limit
+        if isinstance(namespaces, list):
+            ucgen.request["ucnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            ucgen.request["ucnamespace"] = str(namespaces)
+        if showMinor is not None:
+            ucgen.request["ucshow"] = showMinor and "minor" or "!minor"
+        return ucgen
+
+    def watchlist_revs(self, start=None, end=None, reverse=False,
+                       namespaces=None, showMinor=None, showBot=None,
+                       showAnon=None, limit=None):
+        """Iterate revisions to pages on the bot user's watchlist.
+
+        Iterated values will be in same format as recentchanges.
+
+        @param start: Iterate revisions starting at this timestamp
+        @param end: Iterate revisions ending at this timestamp
+        @param reverse: Iterate oldest revisions first (default: newest)
+        @param namespaces: only iterate revisions to pages in these
+            namespaces (default: all)
+        @type namespaces: list of ints
+        @param showMinor: if True, only list minor edits; if False (and not
+            None), only list non-minor edits
+        @param showBot: if True, only list bot edits; if False (and not
+            None), only list non-bot edits
+        @param showAnon: if True, only list anon edits; if False (and not
+            None), only list non-anon edits
+        @param limit: Maximum number of revisions to iterate
+
+        """
+        if start and end:
+            if reverse:
+                if end < start:
+                    raise Error(
+            "watchlist_revs: end must be later than start with reverse=True")
+            else:
+                if start < end:
+                    raise Error(
+            "watchlist_revs: start must be later than end with reverse=False")
+        wlgen = api.ListGenerator("watchlist", wlallrev="", site=self,
+                           wlprop="user|comment|timestamp|title|ids|flags")
+        #TODO: allow users to ask for "patrol" as well?
+        if start is not None:
+            wlgen.request["wlstart"] = start
+        if end is not None:
+            wlgen.request["wlend"] = end
+        if reverse:
+            wlgen.request["wldir"] = "newer"
+        if isinstance(limit, int):
+            wlgen.limit = limit
+        if isinstance(namespaces, list):
+            wlgen.request["wlnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            wlgen.request["wlnamespace"] = str(namespaces)
+        filters = {'minor': showMinor,
+                   'bot': showBot,
+                   'anon': showAnon}
+        wlshow = []
+        for item in filters:
+            if filters[item] is not None:
+                wlshow.append(filters[item] and item or ("!"+item))
+        if wlshow:
+            wlgen.request["wlshow"] = "|".join(wlshow)
+        return wlgen
+
+    def deletedrevs(self, page, start=None, end=None, reverse=None, limit=None,
+                    get_text=False):
+        """Iterate deleted revisions.
+
+        Each value returned by the iterator will be a dict containing the
+        'title' and 'ns' keys for a particular Page and a 'revisions' key
+        whose value is a list of revisions in the same format as
+        recentchanges (plus a 'content' element if requested). If get_text
+        is true, the toplevel dict will contain a 'token' key as well.
+
+        @param page: The page to check for deleted revisions
+        @param start: Iterate revisions starting at this timestamp
+        @param end: Iterate revisions ending at this timestamp
+        @param reverse: Iterate oldest revisions first (default: newest)
+        @param limit: Iterate no more than this number of revisions.
+        @param get_text: If True, retrieve the content of each revision and
+            an undelete token
+
+        """
+        if start and end:
+            if reverse:
+                if end < start:
+                    raise Error(
+"deletedrevs: end must be later than start with reverse=True")
+            else:
+                if start < end:
+                    raise Error(
+"deletedrevs: start must be later than end with reverse=False")
+        if not self.logged_in():
+            self.login()
+        if "deletedhistory" not in self.userinfo['rights']:
+            try:
+                self.login(True)
+            except NoUsername:
+                pass
+            if "deletedhistory" not in self.userinfo['rights']:
+                raise Error(
+"deletedrevs: User:%s not authorized to access deleted revisions."
+                        % self.user())
+        if get_text:
+            if "undelete" not in self.userinfo['rights']:
+                try:
+                    self.login(True)
+                except NoUsername:
+                    pass
+                if "undelete" not in self.userinfo['rights']:
+                    raise Error(
+"deletedrevs: User:%s not authorized to view deleted content."
+                            % self.user())
+
+        drgen = api.ListGenerator("deletedrevs", site=self,
+                                  titles=page.title(withSection=False),
+                                  drprop="revid|user|comment|minor")
+        if get_text:
+            drgen.request['drprop'] = drgen.request['drprop'] + "|content|token"
+        if start is not None:
+            drgen.request["drstart"] = start
+        if end is not None:
+            drgen.request["drend"] = end
+        if reverse:
+            drgen.request["drdir"] = "newer"
+        if isinstance(limit, int):
+            drgen.limit = limit
+        return drgen
+
+    def users(self, usernames):
+        """Iterate info about a list of users by name or IP.
+
+        @param usernames: a list of user names
+        @type usernames: list, or other iterable, of unicodes
+
+        """
+        if not isinstance(usernames, basestring):
+            usernames = u"|".join(usernames)
+        usgen = api.ListGenerator("users", ususers=usernames, site=self,
+                          usprop="blockinfo|groups|editcount|registration")
+        return usgen
+
+    def randompages(self, limit=1, namespaces=None, redirects=False):
+        """Iterate a number of random pages.
+
+        Pages are listed in a fixed sequence, only the starting point is
+        random.
+
+        @param limit: the maximum number of pages to iterate (default: 1)
+        @param namespaces: only iterate pages in these namespaces.
+        @param redirects: if True, include only redirect pages in results
+            (default: include only non-redirects)
+
+        """
+        rngen = api.PageGenerator("random", site=self)
+        rngen.limit = limit
+        if isinstance(namespaces, list):
+            rngen.request["grnnamespace"] = u"|".join(unicode(ns)
+                                                      for ns in namespaces)
+        elif namespaces is not None:
+            rngen.request["grnnamespace"] = str(namespaces)
+        if redirects:
+            rngen.request["grnredirect"] = ""
+        return rngen
+
+    # catalog of editpage error codes, for use in generating messages
+    _ep_errors = {
+        "noapiwrite": "API editing not enabled on %(site)s wiki",
+        "writeapidenied":
+"User %(user)s is not authorized to edit on %(site)s wiki",
+        "protectedtitle":
+"Title %(title)s is protected against creation on %(site)s",
+        "cantcreate":
+"User %(user)s not authorized to create new pages on %(site)s wiki",
+        "cantcreate-anon":
+"""Bot is not logged in, and anon users are not authorized to create new pages
+on %(site)s wiki""",
+        "articleexists": "Page %(title)s already exists on %(site)s wiki",
+        "noimageredirect-anon":
+"""Bot is not logged in, and anon users are not authorized to create image
+redirects on %(site)s wiki""",
+        "noimageredirect":
+"User %(user)s not authorized to create image redirects on %(site)s wiki",
+        "spamdetected":
+"Edit to page %(title)s rejected by spam filter due to content:\n",
+        "filtered": "%(info)s",
+        "contenttoobig": "%(info)s",
+        "noedit-anon":
+"""Bot is not logged in, and anon users are not authorized to edit on
+%(site)s wiki""",
+        "noedit": "User %(user)s not authorized to edit pages on %(site)s wiki",
+        "pagedeleted":
+"Page %(title)s has been deleted since last retrieved from %(site)s wiki",
+        "editconflict": "Page %(title)s not saved due to edit conflict.",
+    }
+
+    def editpage(self, page, summary, minor=True, notminor=False,
+                 recreate=True, createonly=False, watch=False, unwatch=False):
+        """Submit an edited Page object to be saved to the wiki.
+
+        @param page: The Page to be saved; its .text property will be used
+            as the new text to be saved to the wiki
+        @param token: the edit token retrieved using Site.token()
+        @param summary: the edit summary (required!)
+        @param minor: if True (default), mark edit as minor
+        @param notminor: if True, override account preferences to mark edit
+            as non-minor
+        @param recreate: if True (default), create new page even if this
+            title has previously been deleted
+        @param createonly: if True, raise an error if this title already
+            exists on the wiki
+        @param watch: if True, add this Page to bot's watchlist
+        @param unwatch: if True, remove this Page from bot's watchlist if
+            possible
+        @return: True if edit succeeded, False if it failed
+
+        """
+        text = page.text
+        if not text:
+            raise Error("editpage: no text to be saved")
+        try:
+            lastrev = page.latestRevision()
+        except NoPage:
+            lastrev = None
+            if not recreate:
+                raise Error("Page %s does not exist on %s wiki."
+                            % (page.title(withSection=False), self))
+        token = self.token(page, "edit")
+        self.lock_page(page)
+        if lastrev is not None and page.latestRevision() != lastrev:
+            raise Error("editpage: Edit conflict detected; saving aborted.")
+        req = api.Request(site=self, action="edit",
+                          title=page.title(withSection=False),
+                          text=text, token=token, summary=summary)
+##        if lastrev is not None:
+##            req["basetimestamp"] = page._revisions[lastrev].timestamp
+        if minor:
+            req['minor'] = ""
+        elif notminor:
+            req['notminor'] = ""
+        if 'bot' in self.userinfo['groups']:
+            req['bot'] = ""
+        if recreate:
+            req['recreate'] = ""
+        if createonly:
+            req['createonly'] = ""
+        if watch:
+            req['watch'] = ""
+        elif unwatch:
+            req['unwatch'] = ""
+## FIXME: API gives 'badmd5' error
+##        md5hash = md5()
+##        md5hash.update(urllib.quote_plus(text.encode(self.encoding())))
+##        req['md5'] = md5hash.digest()
+        while True:
+            try:
+                result = req.submit()
+                logger.debug("editpage response: %s" % result)
+            except api.APIError, err:
+                self.unlock_page(page)
+                if err.code.endswith("anon") and self.logged_in():
+                    logger.debug(
+"editpage: received '%s' even though bot is logged in" % err.code)
+                errdata = {
+                    'site': self,
+                    'title': page.title(withSection=False),
+                    'user': self.user(),
+                    'info': err.info
+                }
+                if err.code == "spamdetected":
+                    raise SpamfilterError(self._ep_errors[err.code] % errdata
+                            + err.info[ err.info.index("fragment: ") + 9: ])
+
+                if err.code == "editconflict":
+                    raise EditConflict(self._ep_errors[err.code] % errdata)
+                if err.code in self._ep_errors:
+                    raise Error(self._ep_errors[err.code] % errdata)
+                logger.debug("editpage: Unexpected error code '%s' received."
+                              % err.code)
+                raise
+            assert ("edit" in result and "result" in result["edit"]), result
+            if result["edit"]["result"] == "Success":
+                self.unlock_page(page)
+                if "nochange" in result["edit"]:
+                    # null edit, page not changed
+                    # TODO: do we want to notify the user of this?
+                    return True
+                page._revid = result["edit"]["newrevid"]
+                # see http://www.mediawiki.org/wiki/API:Wikimania_2006_API_discussion#Notes
+                # not safe to assume that saved text is the same as sent
+                self.loadrevisions(page, getText=True)
+                return True
+            elif result["edit"]["result"] == "Failure":
+                if "captcha" in result["edit"]:
+                    captcha = result["edit"]["captcha"]
+                    req['captchaid'] = captcha['id']
+                    if captcha["type"] == "math":
+                        req['captchaword'] = input(captcha["question"])
+                        continue
+                    elif "url" in captcha:
+                        webbrowser.open(url)
+                        req['captchaword'] = cap_answerwikipedia.input(
+"Please view CAPTCHA in your browser, then type answer here:")
+                        continue
+                    else:
+                        self.unlock_page(page)
+                        logger.error(
+"editpage: unknown CAPTCHA response %s, page not saved"
+                                      % captcha)
+                        return False
+                else:
+                    self.unlock_page(page)
+                    logger.error("editpage: unknown failure reason %s"
+                                  % str(result))
+                    return False
+            else:
+                self.unlock_page(page)
+                logger.error(
+"editpage: Unknown result code '%s' received; page not saved"
+                    % result["edit"]["result"])
+                logger.error(str(result))
+                return False
+
+    # catalog of move errors for use in error messages
+    _mv_errors = {
+        "noapiwrite": "API editing not enabled on %(site)s wiki",
+        "writeapidenied":
+"User %(user)s is not authorized to edit on %(site)s wiki",
+        "nosuppress":
+"User %(user)s is not authorized to move pages without creating redirects",
+        "cantmove-anon":
+"""Bot is not logged in, and anon users are not authorized to move pages on
+%(site)s wiki""",
+        "cantmove":
+"User %(user)s is not authorized to move pages on %(site)s wiki",
+        "immobilenamespace":
+"Pages in %(oldnamespace)s namespace cannot be moved on %(site)s wiki",
+        "articleexists":
+"Cannot move because page [[%(newtitle)s]] already exists on %(site)s wiki",
+        "protectedpage":
+"Page [[%(oldtitle)s]] is protected against moving on %(site)s wiki",
+        "protectedtitle":
+"Page [[%(newtitle)s]] is protected against creation on %(site)s wiki",
+        "nonfilenamespace":
+"Cannot move a file to %(newnamespace)s namespace on %(site)s wiki",
+        "filetypemismatch":
+"[[%(newtitle)s]] file extension does not match content of [[%(oldtitle)s]]"
+    }
+
+    def movepage(self, page, newtitle, summary, movetalk=True,
+                 noredirect=False):
+        """Move a Page to a new title.
+
+        @param page: the Page to be moved (must exist)
+        @param newtitle: the new title for the Page
+        @type newtitle: unicode
+        @param summary: edit summary (required!)
+        @param movetalk: if True (default), also move the talk page if possible
+        @param noredirect: if True, suppress creation of a redirect from the
+            old title to the new one
+        @return: Page object with the new title
+
+        """
+        oldtitle = page.title(withSection=False)
+        newlink = pywikibot.Link(newtitle, self)
+        if newlink.namespace:
+            newtitle = self.namespace(newlink.namespace) + ":" + newlink.title
+        else:
+            newtitle = newlink.title
+        if oldtitle == newtitle:
+            raise Error("Cannot move page %s to its own title."
+                        % oldtitle)
+        if not page.exists():
+            raise Error("Cannot move page %s because it does not exist on %s."
+                        % (oldtitle, self))
+        token = self.token(page, "move")
+        self.lock_page(page)
+        req = api.Request(site=self, action="move", to=newtitle,
+                          token=token, reason=summary)
+        req['from'] = oldtitle  # "from" is a python keyword
+        if movetalk:
+            req['movetalk'] = ""
+        if noredirect:
+            req['noredirect'] = ""
+        try:
+            result = req.submit()
+            logger.debug("movepage response: %s" % result)
+        except api.APIError, err:
+            if err.code.endswith("anon") and self.logged_in():
+                logger.debug(
+"movepage: received '%s' even though bot is logged in" % err.code)
+            errdata = {
+                'site': self,
+                'oldtitle': oldtitle,
+                'oldnamespace': self.namespace(page.namespace()),
+                'newtitle': newtitle,
+                'newnamespace': self.namespace(newlink.namespace),
+                'user': self.user(),
+            }
+            if err.code in self._mv_errors:
+                raise Error(self._mv_errors[err.code] % errdata)
+            logger.debug("movepage: Unexpected error code '%s' received."
+                          % err.code)
+            raise
+        finally:
+            self.unlock_page(page)
+        if "move" not in result:
+            logger.error("movepage: %s" % result)
+            raise Error("movepage: unexpected response")
+        # TODO: Check for talkmove-error messages
+        if "talkmove-error-code" in result["move"]:
+            logger.warning(u"movepage: Talk page %s not moved"
+                            % (page.toggleTalkPage().title(asLink=True)))
+        return pywikibot.Page(page, newtitle)
+
+    # catalog of rollback errors for use in error messages
+    _rb_errors = {
+        "noapiwrite":
+            "API editing not enabled on %(site)s wiki",
+        "writeapidenied":
+            "User %(user)s not allowed to edit through the API",
+        "alreadyrolled":
+            "Page [[%(title)s]] already rolled back; action aborted.",
+    } # other errors shouldn't arise because we check for those errors
+
+    def rollbackpage(self, page, summary=u''):
+        """Roll back page to version before last user's edits.
+
+        As a precaution against errors, this method will fail unless
+        the page history contains at least two revisions, and at least
+        one that is not by the same user who made the last edit.
+
+        @param page: the Page to be rolled back (must exist)
+        @param summary: edit summary (defaults to a standardized message)
+
+        """
+        if len(page._revisions) < 2:
+            raise pywikibot.Error(
+                  u"Rollback of %s aborted; load revision history first."
+                    % page.title(asLink=True))
+        last_rev = page._revisions[page.latestRevision()]
+        last_user = last_rev.user
+        for rev in sorted(page._revisions.keys(), reverse=True):
+            # start with most recent revision first
+            if rev.user != last_user:
+                prev_user = rev.user
+                break
+        else:
+            raise pywikibot.Error(
+                  u"Rollback of %s aborted; only one user in revision history."
+                   % page.title(asLink=True))
+        summary = summary or (
+u"Reverted edits by [[Special:Contributions/%(last_user)s|%(last_user)s]] "
+u"([[User talk:%(last_user)s|Talk]]) to last version by %(prev_user)s"
+                  % locals())
+        token = self.token(page, "rollback")
+        self.lock_page(page)
+        req = api.Request(site=self, action="rollback",
+                          title=page.title(withSection=False),
+                          user=last_user,
+                          token=token)
+        try:
+            result = req.submit()
+        except api.APIError, err:
+            errdata = {
+                'site': self,
+                'title': page.title(withSection=False),
+                'user': self.user(),
+            }
+            if err.code in self._rb_errors:
+                raise Error(self._rb_errors[err.code] % errdata)
+            logger.debug("rollback: Unexpected error code '%s' received."
+                          % err.code)
+            raise
+        finally:
+            self.unlock_page(page)
+
+    # catalog of delete errors for use in error messages
+    _dl_errors = {
+        "noapiwrite":
+            "API editing not enabled on %(site)s wiki",
+        "writeapidenied":
+            "User %(user)s not allowed to edit through the API",
+        "permissiondenied":
+            "User %(user)s not authorized to delete pages on %(site)s wiki.",
+        "cantdelete":
+            "Could not delete [[%(title)s]]. Maybe it was deleted already.",
+    } # other errors shouldn't occur because of pre-submission checks
+
+    def deletepage(self, page, summary):
+        """Delete page from the wiki. Requires appropriate privilege level.
+
+        @param page: Page to be deleted.
+        @param summary: Edit summary (required!).
+
+        """
+        try:
+            self.login(sysop=True)
+        except pywikibot.Error, e:
+            raise Error("delete: Unable to login as sysop (%s)"
+                        % e.__class__.__name__)
+        if not self.logged_in(sysop=True):
+            raise Error("delete: Unable to login as sysop")
+        token = self.token("delete")
+        req = api.Request(site=self, action="delete", token=token,
+                          title=page.title(withSection=False),
+                          reason=summary)
+        try:
+            result = req.submit()
+        except api.APIError, err:
+            errdata = {
+                'site': self,
+                'title': page.title(withSection=False),
+                'user': self.user(),
+            }
+            if err.code in self._dl_errors:
+                raise Error(self._dl_errors[err.code] % errdata)
+            logger.debug("delete: Unexpected error code '%s' received."
+                          % err.code)
+            raise
+        finally:
+            self.unlock_page(page)
+
+    # TODO: implement undelete
+
+    # TODO: implement patrol
+
+    def linksearch(self, siteurl, limit=500):
+        """Backwards-compatible interface to exturlusage()"""
+        return self.exturlusage(siteurl, limit=limit)
+
+    @deprecate_arg("repeat", None)
+    def newimages(self, number=100, lestart=None, leend=None, leuser=None,
+                  letitle=None):
+        """Yield ImagePages from most recent uploads"""
+        return self.logevents(logtype="upload", limit=number, start=lestart,
+                              end=leend, user=leuser, title=letitle)
+
+    def getImagesFromAnHash(self, hash_found=None):
+        """Return all images that have the same hash.
+
+        Useful to find duplicates or nowcommons.
+
+        NOTE: it returns also the image itself, if you don't want it, just
+        filter the list returned.
+
+        NOTE 2: it returns the image title WITHOUT the image namespace.
+        
+        """
+        if hash_found == None: # If the hash is none return None and not continue
+            return None
+        return [image.title(withNamespace=False)
+                for image in self.allimages(sha1=hash_found)]
+
+
+#### METHODS NOT IMPLEMENTED YET ####
+class NotImplementedYet:
+
+    # TODO: is this needed any more? can it be obtained from the http module?
+    def cookies(self, sysop = False):
+        """Return a string containing the user's current cookies."""
+        self._loadCookies(sysop = sysop)
+        index = self._userIndex(sysop)
+        return self._cookies[index]
+
+    def _loadCookies(self, sysop = False):
+        """Retrieve session cookies for login"""
+        index = self._userIndex(sysop)
+        if self._cookies[index] is not None:
+            return
+        try:
+            if sysop:
+                try:
+                    username = config.sysopnames[self.family.name
+                                                            ][self.code]
+                except KeyError:
+                    raise NoUsername("""\
+You tried to perform an action that requires admin privileges, but you haven't
+entered your sysop name in your user-config.py. Please add
+sysopnames['%s']['%s']='name' to your user-config.py"""
+                                     % (self.family.name, self.code))
+            else:
+                username = pywikiobt.config2.usernames[self.family.name
+                                                       ][self.code]
+        except KeyError:
+            self._cookies[index] = None
+            self._isLoggedIn[index] = False
+        else:
+            tmp = '%s-%s-%s-login.data' % (
+                    self.family.name, self.code, username)
+            fn = config.datafilepath('login-data', tmp)
+            if not os.path.exists(fn):
+                self._cookies[index] = None
+                self._isLoggedIn[index] = False
+            else:
+                f = open(fn)
+                self._cookies[index] = '; '.join([x.strip() for x in f.readlines()])
+                f.close()
+
+    # THESE ARE FUNCTIONS NOT YET IMPLEMENTED IN THE API
+    # TODO: avoid code duplication for the following methods
+    def newpages(self, number = 10, get_redirect = False, repeat = False):
+        """Yield new articles (as Page objects) from Special:Newpages.
+
+        Starts with the newest article and fetches the number of articles
+        specified in the first argument. If repeat is True, it fetches
+        Newpages again. If there is no new page, it blocks until there is
+        one, sleeping between subsequent fetches of Newpages.
+
+        The objects yielded are tuples composed of the Page object,
+        timestamp (unicode), length (int), an empty unicode string, username
+        or IP address (str), comment (unicode).
+
+        """
+        # TODO: in recent MW versions Special:Newpages takes a namespace parameter,
+        #       and defaults to 0 if not specified.
+        # TODO: Detection of unregistered users is broken
+        # TODO: Repeat mechanism doesn't make much sense as implemented;
+        #       should use both offset and limit parameters, and have an
+        #       option to fetch older rather than newer pages
+        seen = set()
+        while True:
+            path = self.newpages_address(n=number)
+            # The throttling is important here, so always enabled.
+            get_throttle()
+            html = self.getUrl(path)
+
+            entryR = re.compile(
+'<li[^>]*>(?P<date>.+?) \S*?<a href=".+?"'
+' title="(?P<title>.+?)">.+?</a>.+?[([](?P<length>[\d,.]+)[^)]]*[)]]'
+' .?<a href=".+?" title=".+?:(?P<username>.+?)">'
+                                )
+            for m in entryR.finditer(html):
+                date = m.group('date')
+                title = m.group('title')
+                title = title.replace('&quot;', '"')
+                length = int(re.sub("[,.]", "", m.group('length')))
+                loggedIn = u''
+                username = m.group('username')
+                comment = u''
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page, date, length, loggedIn, username, comment
+            if not repeat:
+                break
+
+    def longpages(self, number = 10, repeat = False):
+        """Yield Pages from Special:Longpages.
+
+        Return values are a tuple of Page object, length(int).
+
+        """
+        #TODO: should use offset and limit parameters; 'repeat' as now
+        #      implemented is fairly useless
+        # this comment applies to all the XXXXpages methods following, as well
+        seen = set()
+        while True:
+            path = self.longpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(ur'<li>(<a href=".+?" title=".+?">hist</a>) ‎<a href=".+?" title="(?P<title>.+?)">.+?</a> ‎[(?P<length>\d+)(.+?)]</li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                length = int(m.group('length'))
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page, length
+            if not repeat:
+                break
+
+    def shortpages(self, number = 10, repeat = False):
+        """Yield Pages and lengths from Special:Shortpages."""
+        throttle = True
+        seen = set()
+        while True:
+            path = self.shortpages_address(n = number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(ur'<li>(<a href=".+?" title=".+?">hist</a>) ‎<a href=".+?" title="(?P<title>.+?)">.+?</a> ‎[(?P<length>\d+)(.+?)]</li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                length = int(m.group('length'))
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page, length
+            if not repeat:
+                break
+
+    def deadendpages(self, number = 10, repeat = False):
+        """Yield Page objects retrieved from Special:Deadendpages."""
+        seen = set()
+        while True:
+            path = self.deadendpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def ancientpages(self, number = 10, repeat = False):
+        """Yield Pages, datestamps from Special:Ancientpages."""
+        seen = set()
+        while True:
+            path = self.ancientpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+'<li><a href=".+?" title="(?P<title>.+?)">.+?</a> (?P<date>.+?)</li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                date = m.group('date')
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page, date
+            if not repeat:
+                break
+
+    def lonelypages(self, number = 10, repeat = False):
+        """Yield Pages retrieved from Special:Lonelypages."""
+        throttle = True
+        seen = set()
+        while True:
+            path = self.lonelypages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def unwatchedpages(self, number = 10, repeat = False):
+        """Yield Pages from Special:Unwatchedpages (requires Admin privileges)."""
+        seen = set()
+        while True:
+            path = self.unwatchedpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path, sysop = True)
+            entryR = re.compile(
+                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a>.+?</li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def uncategorizedcategories(self, number = 10, repeat = False):
+        """Yield Categories from Special:Uncategorizedcategories."""
+        import catlib
+        seen = set()
+        while True:
+            path = self.uncategorizedcategories_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                if title not in seen:
+                    seen.add(title)
+                    page = catlib.Category(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def newimages(self, number = 10, repeat = False):
+        """Yield ImagePages from Special:Log&type=upload"""
+
+        seen = set()
+        regexp = re.compile('<li[^>]*>(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a>\s+(.+?</a>).*?<a href=".*?"(?P<new> class="new")? title="(?P<image>.+?)"\s*>(?:.*?<span class="comment">(?P<comment>.*?)</span>)?', re.UNICODE)
+
+        while True:
+            path = self.log_address(number, mode = 'upload')
+            get_throttle()
+            html = self.getUrl(path)
+
+            for m in regexp.finditer(html):
+                image = m.group('image')
+
+                if image not in seen:
+                    seen.add(image)
+
+                    if m.group('new'):
+                        output(u"Image '%s' has been deleted." % image)
+                        continue
+
+                    date = m.group('date')
+                    user = m.group('user')
+                    comment = m.group('comment') or ''
+
+                    yield ImagePage(self, image), date, user, comment
+            if not repeat:
+                break
+
+    def uncategorizedimages(self, number = 10, repeat = False):
+        """Yield ImagePages from Special:Uncategorizedimages."""
+        seen = set()
+        ns = self.image_namespace()
+        entryR = re.compile(
+            '<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns)
+        while True:
+            path = self.uncategorizedimages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                if title not in seen:
+                    seen.add(title)
+                    page = ImagePage(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def uncategorizedpages(self, number = 10, repeat = False):
+        """Yield Pages from Special:Uncategorizedpages."""
+        seen = set()
+        while True:
+            path = self.uncategorizedpages_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(
+                '<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def unusedcategories(self, number = 10, repeat = False):
+        """Yield Category objects from Special:Unusedcategories."""
+        import catlib
+        seen = set()
+        while True:
+            path = self.unusedcategories_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile('<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+
+                if title not in seen:
+                    seen.add(title)
+                    page = catlib.Category(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def unusedfiles(self, number = 10, repeat = False, extension = None):
+        """Yield ImagePage objects from Special:Unusedimages."""
+        seen = set()
+        ns = self.image_namespace()
+        entryR = re.compile(
+            '<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns)
+        while True:
+            path = self.unusedfiles_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            for m in entryR.finditer(html):
+                fileext = None
+                title = m.group('title')
+                if extension:
+                    fileext = title[len(title)-3:]
+                if title not in seen and fileext == extension:
+                    ## Check whether the media is used in a Proofread page
+                    # code disabled because it slows this method down, and
+                    # because it is unclear what it's supposed to do.
+                    #basename = title[6:]
+                    #page = Page(self, 'Page:' + basename)
+
+                    #if not page.exists():
+                    seen.add(title)
+                    image = ImagePage(self, title)
+                    yield image
+            if not repeat:
+                break
+
+    def withoutinterwiki(self, number=10, repeat=False):
+        """Yield Pages without language links from Special:Withoutinterwiki."""
+        seen = set()
+        while True:
+            path = self.withoutinterwiki_address(n=number)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile('<li><a href=".+?" title="(?P<title>.+?)">.+?</a></li>')
+            for m in entryR.finditer(html):
+                title = m.group('title')
+                if title not in seen:
+                    seen.add(title)
+                    page = Page(self, title)
+                    yield page
+            if not repeat:
+                break
+
+    def linksearch(self, siteurl):
+        """Yield Pages from results of Special:Linksearch for 'siteurl'."""
+        if siteurl.startswith('*.'):
+            siteurl = siteurl[2:]
+        output(u'Querying [[Special:Linksearch]]...')
+        cache = []
+        for url in [siteurl, '*.' + siteurl]:
+            path = self.linksearch_address(url)
+            get_throttle()
+            html = self.getUrl(path)
+            loc = html.find('<div class="mw-spcontent">')
+            if loc > -1:
+                html = html[loc:]
+            loc = html.find('<div class="printfooter">')
+            if loc > -1:
+                html = html[:loc]
+            R = re.compile('title ?="(.*?)"')
+            for title in R.findall(html):
+                if not siteurl in title:
+                    # the links themselves have similar form
+                    if title in cache:
+                        continue
+                    else:
+                        cache.append(title)
+                        yield Page(self, title)
+
Property changes on: branches/rewrite/pywikibot/site.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Added: svn:eol-style
   + native
Modified: branches/rewrite/pywikibot/textlib.py
===================================================================
--- branches/rewrite/pywikibot/textlib.py	2008-12-16 19:34:48 UTC (rev 6155)
+++ branches/rewrite/pywikibot/textlib.py	2008-12-16 19:40:20 UTC (rev 6156)
@@ -1,675 +1,675 @@
-# -*- coding: utf-8  -*-
-"""
-Functions for manipulating wiki-text.
-
-Unless otherwise noted, all functions take a unicode string as the argument
-and return a unicode string.
-
-"""
-#
-# (C) Pywikipedia bot team, 2008
-#
-# Distributed under the terms of the MIT license.
-#
-__version__ = '$Id: $'
-
-
-import pywikibot
-import re
-
-
-def unescape(s):
-    """Replace escaped HTML-special characters by their originals"""
-    if '&' not in s:
-        return s
-    s = s.replace("&lt;", "<")
-    s = s.replace("&gt;", ">")
-    s = s.replace("&apos;", "'")
-    s = s.replace("&quot;", '"')
-    s = s.replace("&amp;", "&") # Must be last
-    return s
-
-
-def replaceExcept(text, old, new, exceptions, caseInsensitive=False,
-                  allowoverlap=False, marker = '', site = None):
-    """
-    Return text with 'old' replaced by 'new', ignoring specified types of text.
-
-    Skips occurences of 'old' within exceptions; e.g., within nowiki tags or
-    HTML comments. If caseInsensitive is true, then use case insensitive
-    regex matching. If allowoverlap is true, overlapping occurences are all
-    replaced (watch out when using this, it might lead to infinite loops!).
-
-    Parameters:
-        text            - a unicode string
-        old             - a compiled regular expression
-        new             - a unicode string (which can contain regular
-                          expression references), or a function which takes
-                          a match object as parameter. See parameter repl of
-                          re.sub().
-        exceptions      - a list of strings which signal what to leave out,
-                          e.g. ['math', 'table', 'template']
-        caseInsensitive - a boolean
-        marker          - a string that will be added to the last replacement;
-                          if nothing is changed, it is added at the end
-
-    """
-    if site is None:
-        site = pywikibot.getSite()
-
-    exceptionRegexes = {
-        'comment':     re.compile(r'(?s)<!--.*?-->'),
-        # section headers
-        'header':      re.compile(r'\r\n=+.+=+ *\r\n'),
-        'includeonly': re.compile(r'(?is)<includeonly>.*?</includeonly>'),
-        'math':        re.compile(r'(?is)<math>.*?</math>'),
-        'noinclude':   re.compile(r'(?is)<noinclude>.*?</noinclude>'),
-        # wiki tags are ignored inside nowiki tags.
-        'nowiki':      re.compile(r'(?is)<nowiki>.*?</nowiki>'),
-        # preformatted text
-        'pre':         re.compile(r'(?ism)<pre>.*?</pre>'),
-        'source':      re.compile(r'(?is)<source .*?</source>'),
-        # inline references
-        'ref':         re.compile(r'(?ism)<ref[ >].*?</ref>'),
-        'timeline':    re.compile(r'(?is)<timeline>.*?</timeline>'),
-        # lines that start with a space are shown in a monospace font and
-        # have whitespace preserved.
-        'startspace':  re.compile(r'(?m)^ (.*?)$'),
-        # tables often have whitespace that is used to improve wiki
-        # source code readability.
-        # TODO: handle nested tables.
-        'table':       re.compile(r'(?ims)^{|.*?^|}|<table>.*?</table>'),
-        # templates with parameters often have whitespace that is used to
-        # improve wiki source code readability.
-        # 'template':    re.compile(r'(?s){{.*?}}'),
-        # The regex above fails on nested templates. This regex can handle
-        # templates cascaded up to level 3, but no deeper. For arbitrary
-        # depth, we'd need recursion which can't be done in Python's re.
-        # After all, the language of correct parenthesis words is not regular.
-        'template':    re.compile(r'(?s){{(({{(({{.*?}})|.)*}})|.)*}}'),
-        'hyperlink':   compileLinkR(),
-        'gallery':     re.compile(r'(?is)<gallery.*?>.*?</gallery>'),
-        # this matches internal wikilinks, but also interwiki, categories, and
-        # images.
-        'link':        re.compile(r'[[[^]|]*(|[^]]*)?]]'),
-        'interwiki':   re.compile(r'(?i)[[(%s)\s?:[^]]*]][\s]*'
-                               % '|'.join(site.validLanguageLinks() + site.family.obsolete.keys())),
-
-    }
-
-    # if we got a string, compile it as a regular expression
-    if type(old) is str or type(old) is unicode:
-        if caseInsensitive:
-            old = re.compile(old, re.IGNORECASE | re.UNICODE)
-        else:
-            old = re.compile(old)
-
-    dontTouchRegexes = []
-    for exc in exceptions:
-        if isinstance(exc, str) or isinstance(exc, unicode):
-            # assume it's a reference to the exceptionRegexes dictionary
-            # defined above.
-            if not exceptionRegexes.has_key(exc):
-                raise ValueError("Unknown tag type: " + exc)
-            dontTouchRegexes.append(exceptionRegexes[exc])
-        else:
-            # assume it's a regular expression
-            dontTouchRegexes.append(exc)
-    index = 0
-    markerpos = len(text)
-    while True:
-        match = old.search(text, index)
-        if not match:
-            # nothing left to replace
-            break
-
-        # check which exception will occur next.
-        nextExceptionMatch = None
-        for dontTouchR in dontTouchRegexes:
-            excMatch = dontTouchR.search(text, index)
-            if excMatch and (
-                    nextExceptionMatch is None or
-                    excMatch.start() < nextExceptionMatch.start()):
-                nextExceptionMatch = excMatch
-
-        if nextExceptionMatch is not None and nextExceptionMatch.start() <= match.start():
-            # an HTML comment or text in nowiki tags stands before the next valid match. Skip.
-            index = nextExceptionMatch.end()
-        else:
-            # We found a valid match. Replace it.
-            if callable(new):
-                # the parameter new can be a function which takes the match as a parameter.
-                replacement = new(match)
-            else:
-                # it is not a function, but a string.
-
-                # it is a little hack to make \n work. It would be better to fix it
-                # previously, but better than nothing.
-                new = new.replace('\n', '\n')
-
-                # We cannot just insert the new string, as it may contain regex
-                # group references such as \2 or \g<name>.
-                # On the other hand, this approach does not work because it can't
-                # handle lookahead or lookbehind (see bug #1731008):
-                #replacement = old.sub(new, text[match.start():match.end()])
-                #text = text[:match.start()] + replacement + text[match.end():]
-
-                # So we have to process the group references manually.
-                replacement = new
-
-                groupR = re.compile(r'\(?P<number>\d+)|\g<(?P<name>.+?)>')
-                while True:
-                    groupMatch = groupR.search(replacement)
-                    if not groupMatch:
-                        break
-                    groupID = groupMatch.group('name') or int(groupMatch.group('number'))
-                    replacement = replacement[:groupMatch.start()] + match.group(groupID) + replacement[groupMatch.end():]
-            text = text[:match.start()] + replacement + text[match.end():]
-
-            # continue the search on the remaining text
-            if allowoverlap:
-                index = match.start() + 1
-            else:
-                index = match.start() + len(replacement)
-            markerpos = match.start() + len(replacement)
-    text = text[:markerpos] + marker + text[markerpos:]
-    return text
-
-
-def removeDisabledParts(text, tags = ['*']):
-    """
-    Return text without portions where wiki markup is disabled
-
-    Parts that can/will be removed are --
-    * HTML comments
-    * nowiki tags
-    * pre tags
-    * includeonly tags
-
-    The exact set of parts which should be removed can be passed as the
-    'parts' parameter, which defaults to all.
-    """
-    regexes = {
-            'comments' :   r'<!--.*?-->',
-            'includeonly': r'<includeonly>.*?</includeonly>',
-            'nowiki':      r'<nowiki>.*?</nowiki>',
-            'pre':         r'<pre>.*?</pre>',
-            'source':      r'<source .*?</source>',
-    }
-    if '*' in tags:
-        tags = regexes.keys()
-    toRemoveR = re.compile('|'.join([regexes[tag] for tag in tags]),
-                           re.IGNORECASE | re.DOTALL)
-    return toRemoveR.sub('', text)
-
-
-def isDisabled(text, index, tags = ['*']):
-    """
-    Return True if text[index] is disabled, e.g. by a comment or by nowiki tags.
-
-    For the tags parameter, see removeDisabledParts() above.
-    """
-    # Find a marker that is not already in the text.
-    marker = '@@'
-    while marker in text:
-        marker += '@'
-    text = text[:index] + marker + text[index:]
-    text = removeDisabledParts(text, tags)
-    return (marker not in text)
-
-
-# Functions dealing with interwiki language links
-
-# Note - MediaWiki supports two kinds of interwiki links; interlanguage and
-#        interproject.  These functions only deal with links to a
-#        corresponding page in another language on the same project (e.g.,
-#        Wikipedia, Wiktionary, etc.) in another language. They do not find
-#        or change links to a different project, or any that are formatted
-#        as in-line interwiki links (e.g., "[[:es:Articulo]]".  (CONFIRM)
-
-def getLanguageLinks(text, insite = None, pageLink = "[[]]"):
-    """
-    Return a dict of interlanguage links found in text.
-
-    Dict uses language codes as keys and Page objects as values.
-    Do not call this routine directly, use Page.interwiki() method
-    instead.
-
-    """
-    if insite == None:
-        insite = pywikibot.getSite()
-    result = {}
-    # Ignore interwiki links within nowiki tags, includeonly tags, pre tags,
-    # and HTML comments
-    text = removeDisabledParts(text)
-
-    # This regular expression will find every link that is possibly an
-    # interwiki link.
-    # NOTE: language codes are case-insensitive and only consist of basic latin
-    # letters and hyphens.
-    interwikiR = re.compile(r'[[([a-zA-Z-]+)\s?:([^[]\n]*)]]')
-    for lang, pagetitle in interwikiR.findall(text):
-        lang = lang.lower()
-        # Check if it really is in fact an interwiki link to a known
-        # language, or if it's e.g. a category tag or an internal link
-        if lang in insite.family.obsolete:
-            lang = insite.family.obsolete[lang]
-        if lang in insite.validLanguageLinks():
-            if '|' in pagetitle:
-                # ignore text after the pipe
-                pagetitle = pagetitle[:pagetitle.index('|')]
-            # we want the actual page objects rather than the titles
-            site = insite.getSite(code = lang)
-            try:
-                result[site] = pywikibot.Page(site, pagetitle, insite = insite)
-            except InvalidTitle:
-                output(
-        u"[getLanguageLinks] Text contains invalid interwiki link [[%s:%s]]."
-                           % (lang, pagetitle))
-                continue
-    return result
-
-
-def removeLanguageLinks(text, site = None, marker = ''):
-    """Return text with all interlanguage links removed.
-
-    If a link to an unknown language is encountered, a warning is printed.
-    If a marker is defined, that string is placed at the location of the
-    last occurence of an interwiki link (at the end if there are no
-    interwiki links).
-
-    """
-    if site == None:
-        site = pywikibot.getSite()
-    if not site.validLanguageLinks():
-        return text
-    # This regular expression will find every interwiki link, plus trailing
-    # whitespace.
-    languages = '|'.join(site.validLanguageLinks() + site.family.obsolete.keys())
-    interwikiR = re.compile(r'[[(%s)\s?:[^]]*]][\s]*'
-                            % languages, re.IGNORECASE)
-    text = replaceExcept(text, interwikiR, '',
-                         ['nowiki', 'comment', 'math', 'pre', 'source'], marker=marker)
-    return text.strip()
-
-
-def replaceLanguageLinks(oldtext, new, site = None):
-    """Replace interlanguage links in the text with a new set of links.
-
-    'new' should be a dict with the Site objects as keys, and Page objects
-    as values (i.e., just like the dict returned by getLanguageLinks
-    function).
-    
-    """
-    # Find a marker that is not already in the text.
-    marker = '@@'
-    while marker in oldtext:
-        marker += '@'
-    if site == None:
-        site = pywikibot.getSite()
-    s = interwikiFormat(new, insite = site)
-    s2 = removeLanguageLinks(oldtext, site = site, marker = marker)
-    if s:
-        if site.language() in site.family.interwiki_attop:
-            newtext = s + site.family.interwiki_text_separator + s2.replace(marker,'').strip()
-        else:
-            # calculate what was after the language links on the page
-            firstafter = s2.find(marker) + len(marker)
-            # Is there any text in the 'after' part that means we should keep it after?
-            if "</noinclude>" in s2[firstafter:]:
-                newtext = s2[:firstafter] + s + s2[firstafter:]
-            elif site.language() in site.family.categories_last:
-                cats = getCategoryLinks(s2, site = site)
-                s2 = removeCategoryLinks(s2.replace(marker,'').strip(), site) + site.family.interwiki_text_separator + s
-                newtext = replaceCategoryLinks(s2, cats, site=site)
-            else:
-                newtext = s2.replace(marker,'').strip() + site.family.interwiki_text_separator + s
-            newtext = newtext.replace(marker,'')
-    else:
-        newtext = s2.replace(marker,'')
-    return newtext
-
-
-def interwikiFormat(links, insite = None):
-    """Convert interwiki link dict into a wikitext string.
-
-    'links' should be a dict with the Site objects as keys, and Page
-    objects as values.
-
-    Return a unicode string that is formatted for inclusion in insite
-    (defaulting to the current site).
-    """
-    if insite is None:
-        insite = pywikibot.getSite()
-    if not links:
-        return ''
-
-    ar = interwikiSort(links.keys(), insite)
-    s = []
-    for site in ar:
-        try:
-            link = links[site].aslink(forceInterwiki=True)
-            s.append(link)
-        except AttributeError:
-            s.append(pywikibot.getSite(site).linkto(links[site],
-                                                    othersite=insite))
-    if insite.lang in insite.family.interwiki_on_one_line:
-        sep = u' '
-    else:
-        sep = u'\r\n'
-    s=sep.join(s) + u'\r\n'
-    return s
-
-
-# Sort sites according to local interwiki sort logic
-def interwikiSort(sites, insite = None):
-    if insite is None:
-      insite = pywikibot.getSite()
-    if not sites:
-      return []
-
-    sites.sort()
-    putfirst = insite.interwiki_putfirst()
-    if putfirst:
-        #In this case I might have to change the order
-        firstsites = []
-        for code in putfirst:
-            # The code may not exist in this family?
-            if code in insite.family.obsolete:
-                code = insite.family.obsolete[code]
-            if code in insite.validLanguageLinks():
-                site = insite.getSite(code = code)
-                if site in sites:
-                    del sites[sites.index(site)]
-                    firstsites = firstsites + [site]
-        sites = firstsites + sites
-    if insite.interwiki_putfirst_doubled(sites): #some implementations return False
-        sites = insite.interwiki_putfirst_doubled(sites) + sites
-    return sites
-
-
-# Functions dealing with category links
-
-def getCategoryLinks(text, site):
-    """Return a list of category links found in text.
-
-    List contains Category objects.
-    Do not call this routine directly, use Page.categories() instead.
-
-    """
-    result = []
-    # Ignore category links within nowiki tags, pre tags, includeonly tags,
-    # and HTML comments
-    text = removeDisabledParts(text)
-    catNamespace = '|'.join(site.category_namespaces())
-    R = re.compile(r'[[\s*(?P<namespace>%s)\s*:\s*(?P<catName>.+?)'
-                   r'(?:|(?P<sortKey>.+?))?\s*]]'
-                   % catNamespace, re.I)
-    for match in R.finditer(text):
-        cat = pywikibot.Category(site,
-                                 '%s:%s' % (match.group('namespace'),
-                                            match.group('catName')),
-                                 sortKey = match.group('sortKey'))
-        result.append(cat)
-    return result
-
-
-def removeCategoryLinks(text, site, marker = ''):
-    """Return text with all category links removed.
-
-    Put the string marker after the last replacement (at the end of the text
-    if  there is no replacement).
-
-    """
-    # This regular expression will find every link that is possibly an
-    # interwiki link, plus trailing whitespace. The language code is grouped.
-    # NOTE: This assumes that language codes only consist of non-capital
-    # ASCII letters and hyphens.
-    catNamespace = '|'.join(site.category_namespaces())
-    categoryR = re.compile(r'[[\s*(%s)\s*:.*?]]\s*' % catNamespace, re.I)
-    text = replaceExcept(text, categoryR, '', ['nowiki', 'comment', 'math', 'pre', 'source'], marker = marker)
-    if marker:
-        #avoid having multiple linefeeds at the end of the text
-        text = re.sub('\s*%s' % re.escape(marker), '\r\n' + marker, text.strip())
-    return text.strip()
-
-
-def replaceCategoryInPlace(oldtext, oldcat, newcat, site=None):
-    """Replace the category oldcat with the category newcat and return
-       the modified text.
-
-    """
-    if site is None:
-        site = pywikibot.getSite()
-
-    catNamespace = '|'.join(site.category_namespaces())
-    title = oldcat.titleWithoutNamespace()
-    if not title:
-        return
-    # title might contain regex special characters
-    title = re.escape(title)
-    # title might not be capitalized correctly on the wiki
-    if title[0].isalpha() and not site.nocapitalize:
-        title = "[%s%s]" % (title[0].upper(), title[0].lower()) + title[1:]
-    # spaces and underscores in page titles are interchangeable, and collapsible
-    title = title.replace(r"\ ", "[ _]+").replace(r"_", "[ _]+")
-    categoryR = re.compile(r'[[\s*(%s)\s*:\s*%s\s*((?:|[^]]+)?]])'
-                            % (catNamespace, title), re.I)
-    if newcat is None:
-        text = replaceExcept(oldtext, categoryR, '',
-                             ['nowiki', 'comment', 'math', 'pre', 'source'])
-    else:
-        text = replaceExcept(oldtext, categoryR,
-                             '[[%s:%s\2' % (site.namespace(14),
-                                             newcat.titleWithoutNamespace()),
-                             ['nowiki', 'comment', 'math', 'pre', 'source'])
-    return text
-
-
-def replaceCategoryLinks(oldtext, new, site = None, addOnly = False):
-    """Replace the category links given in the wikitext given
-       in oldtext by the new links given in new.
-
-       'new' should be a list of Category objects.
-
-       If addOnly is True, the old category won't be deleted and
-       the category(s) given will be added
-       (and so they won't replace anything).
-    """
-
-    # Find a marker that is not already in the text.
-    marker = '@@'
-    while marker in oldtext:
-        marker += '@'
-
-    if site is None:
-        site = pywikibot.getSite()
-    if site.sitename() == 'wikipedia:de' and "{{Personendaten" in oldtext:
-        raise Error('The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia on pages that contain the person data template because of the non-standard placement of that template. See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006#...')
-
-    s = categoryFormat(new, insite = site)
-    if addOnly:
-        s2 = oldtext
-    else:
-        s2 = removeCategoryLinks(oldtext, site = site, marker = marker)
-
-    if s:
-        if site.language() in site.family.category_attop:
-            newtext = s + site.family.category_text_separator + s2
-        else:
-            # calculate what was after the categories links on the page
-            firstafter = s2.find(marker)
-            # Is there any text in the 'after' part that means we should keep it after?
-            if "</noinclude>" in s2[firstafter:]:
-                newtext = s2[:firstafter] + s + s2[firstafter:]
-            elif site.language() in site.family.categories_last:
-                newtext = s2.replace(marker,'').strip() + site.family.category_text_separator + s
-            else:
-                interwiki = getLanguageLinks(s2)
-                s2 = removeLanguageLinks(s2.replace(marker,''), site) + site.family.category_text_separator + s
-                newtext = replaceLanguageLinks(s2, interwiki, site)
-        newtext = newtext.replace(marker,'')
-    else:
-        s2 = s2.replace(marker,'')
-        return s2
-    return newtext.strip()
-
-
-def categoryFormat(categories, insite = None):
-    """Return a string containing links to all categories in a list.
-
-    'categories' should be a list of Category objects.
-
-    The string is formatted for inclusion in insite.
-    
-    """
-    if not categories:
-        return ''
-    if insite is None:
-        insite = pywikibot.getSite()
-    catLinks = [category.aslink(noInterwiki = True) for category in categories]
-    if insite.category_on_one_line():
-        sep = ' '
-    else:
-        sep = '\r\n'
-    # Some people don't like the categories sorted
-    #catLinks.sort()
-    return sep.join(catLinks) + '\r\n'
-
-
-def compileLinkR(withoutBracketed=False, onlyBracketed=False):
-    """Return a regex that matches external links."""
-    # RFC 2396 says that URLs may only contain certain characters.
-    # For this regex we also accept non-allowed characters, so that the bot
-    # will later show these links as broken ('Non-ASCII Characters in URL').
-    # Note: While allowing parenthesis inside URLs, MediaWiki will regard
-    # right parenthesis at the end of the URL as not part of that URL.
-    # The same applies to dot, comma, colon and some other characters.
-    notAtEnd = ']\s).:;,<>"'
-    # So characters inside the URL can be anything except whitespace,
-    # closing squared brackets, quotation marks, greater than and less
-    # than, and the last character also can't be parenthesis or another
-    # character disallowed by MediaWiki.
-    notInside = ']\s<>"'
-    # The first half of this regular expression is required because '' is
-    # not allowed inside links. For example, in this wiki text:
-    #       ''Please see http://www.example.org.''
-    # .'' shouldn't be considered as part of the link.
-    regex = r'(?P<url>http[s]?://[^' + notInside + ']*?[^' + notAtEnd + '](?=[' + notAtEnd+ ']*'')|http[s]?://[^' + notInside + ']*[^' + notAtEnd + '])'
-
-    if withoutBracketed:
-        regex = r'(?<![)' + regex
-    elif onlyBracketed:
-        regex = r'[' + regex
-    linkR = re.compile(regex)
-    return linkR
-
-def extract_templates_and_params(text, get_redirect=False):
-    """Return list of template calls found in text.
-
-    Return value is a list of tuples. There is one tuple for each use of a
-    template in the page, with the template title as the first entry and a
-    dict of parameters as the second entry.  Parameters are indexed by
-    strings; as in MediaWiki, an unnamed parameter is given a parameter name
-    with an integer value corresponding to its position among the unnnamed
-    parameters, and if this results multiple parameters with the same name
-    only the last value provided will be returned.
-
-    """
-    # remove commented-out stuff etc.
-    thistxt = removeDisabledParts(text)
-
-    # marker for inside templates or parameters
-    marker = u'@@'
-    while marker in thistxt:
-        marker += u'@'
-
-    # marker for links
-    marker2 = u'##'
-    while marker2 in thistxt:
-        marker2 += u'#'
-
-    # marker for math
-    marker3 = u'%%'
-    while marker2 in thistxt:
-        marker3 += u'%'
-
-    result = []
-    inside = {}
-    count = 0
-    Rtemplate = re.compile(
-                ur'{{(msg:)?(?P<name>[^{|]+?)(|(?P<params>[^{]+?))?}}')
-    Rmath = re.compile(ur'<math>[^<]+</math>')
-    Rmarker = re.compile(ur'%s(\d+)%s' % (marker, marker))
-    Rmarker2 = re.compile(ur'%s(\d+)%s' % (marker2, marker2))
-    Rmarker3 = re.compile(ur'%s(\d+)%s' % (marker3, marker3))
-
-    # Replace math with markers
-    maths = {}
-    count = 0
-    for m in Rmath.finditer(thistxt):
-        count += 1
-        text = m.group()
-        thistxt = thistxt.replace(text, '%s%d%s' % (marker3, count, marker3))
-        maths[count] = text
-
-    while Rtemplate.search(thistxt) is not None:
-        for m in Rtemplate.finditer(thistxt):
-            # Make sure it is not detected again
-            count += 1
-            text = m.group()
-            thistxt = thistxt.replace(text,
-                                      '%s%d%s' % (marker, count, marker))
-            # Make sure stored templates don't contain markers
-            for m2 in Rmarker.finditer(text):
-                text = text.replace(m2.group(), inside[int(m2.group(1))])
-            for m2 in Rmarker3.finditer(text):
-                text = text.replace(m2.group(), maths[int(m2.group(1))])
-            inside[count] = text
-
-            # Name
-            name = m.group('name').strip()
-            m2 = Rmarker.search(name) or Rmath.search(name)
-            if m2 is not None:
-                # Doesn't detect templates whose name changes,
-                # or templates whose name contains math tags
-                continue
-            # Parameters
-            paramString = m.group('params')
-            params = {}
-            numbered_param = 1
-            if paramString:
-                # Replace wikilinks with markers
-                links = {}
-                count2 = 0
-                for m2 in pywikibot.link_regex.finditer(paramString):
-                    count2 += 1
-                    text = m2.group(0)
-                    paramString = paramString.replace(text,
-                                    '%s%d%s' % (marker2, count2, marker2))
-                    links[count2] = text
-                # Parse string
-                markedParams = paramString.split('|')
-                # Replace markers
-                for param in markedParams:
-                    if "=" in param:
-                        param_name, param_val = param.split("=", 1)
-                    else:
-                        param_name = unicode(numbered_param)
-                        param_val = param
-                        numbered_param += 1
-                    for m2 in Rmarker.finditer(param_val):
-                        param_val = param_val.replace(m2.group(),
-                                                      inside[int(m2.group(1))])
-                    for m2 in Rmarker2.finditer(param_val):
-                        param_val = param_val.replace(m2.group(),
-                                                      links[int(m2.group(1))])
-                    for m2 in Rmarker3.finditer(param_val):
-                        param_val = param_val.replace(m2.group(),
-                                                      maths[int(m2.group(1))])
-                    params[param_name] = param_val
-
-            # Add it to the result
-            result.append((name, params))
-    return result
-
+# -*- coding: utf-8  -*-
+"""
+Functions for manipulating wiki-text.
+
+Unless otherwise noted, all functions take a unicode string as the argument
+and return a unicode string.
+
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+
+import pywikibot
+import re
+
+
+def unescape(s):
+    """Replace escaped HTML-special characters by their originals"""
+    if '&' not in s:
+        return s
+    s = s.replace("&lt;", "<")
+    s = s.replace("&gt;", ">")
+    s = s.replace("&apos;", "'")
+    s = s.replace("&quot;", '"')
+    s = s.replace("&amp;", "&") # Must be last
+    return s
+
+
+def replaceExcept(text, old, new, exceptions, caseInsensitive=False,
+                  allowoverlap=False, marker = '', site = None):
+    """
+    Return text with 'old' replaced by 'new', ignoring specified types of text.
+
+    Skips occurences of 'old' within exceptions; e.g., within nowiki tags or
+    HTML comments. If caseInsensitive is true, then use case insensitive
+    regex matching. If allowoverlap is true, overlapping occurences are all
+    replaced (watch out when using this, it might lead to infinite loops!).
+
+    Parameters:
+        text            - a unicode string
+        old             - a compiled regular expression
+        new             - a unicode string (which can contain regular
+                          expression references), or a function which takes
+                          a match object as parameter. See parameter repl of
+                          re.sub().
+        exceptions      - a list of strings which signal what to leave out,
+                          e.g. ['math', 'table', 'template']
+        caseInsensitive - a boolean
+        marker          - a string that will be added to the last replacement;
+                          if nothing is changed, it is added at the end
+
+    """
+    if site is None:
+        site = pywikibot.getSite()
+
+    exceptionRegexes = {
+        'comment':     re.compile(r'(?s)<!--.*?-->'),
+        # section headers
+        'header':      re.compile(r'\r\n=+.+=+ *\r\n'),
+        'includeonly': re.compile(r'(?is)<includeonly>.*?</includeonly>'),
+        'math':        re.compile(r'(?is)<math>.*?</math>'),
+        'noinclude':   re.compile(r'(?is)<noinclude>.*?</noinclude>'),
+        # wiki tags are ignored inside nowiki tags.
+        'nowiki':      re.compile(r'(?is)<nowiki>.*?</nowiki>'),
+        # preformatted text
+        'pre':         re.compile(r'(?ism)<pre>.*?</pre>'),
+        'source':      re.compile(r'(?is)<source .*?</source>'),
+        # inline references
+        'ref':         re.compile(r'(?ism)<ref[ >].*?</ref>'),
+        'timeline':    re.compile(r'(?is)<timeline>.*?</timeline>'),
+        # lines that start with a space are shown in a monospace font and
+        # have whitespace preserved.
+        'startspace':  re.compile(r'(?m)^ (.*?)$'),
+        # tables often have whitespace that is used to improve wiki
+        # source code readability.
+        # TODO: handle nested tables.
+        'table':       re.compile(r'(?ims)^{|.*?^|}|<table>.*?</table>'),
+        # templates with parameters often have whitespace that is used to
+        # improve wiki source code readability.
+        # 'template':    re.compile(r'(?s){{.*?}}'),
+        # The regex above fails on nested templates. This regex can handle
+        # templates cascaded up to level 3, but no deeper. For arbitrary
+        # depth, we'd need recursion which can't be done in Python's re.
+        # After all, the language of correct parenthesis words is not regular.
+        'template':    re.compile(r'(?s){{(({{(({{.*?}})|.)*}})|.)*}}'),
+        'hyperlink':   compileLinkR(),
+        'gallery':     re.compile(r'(?is)<gallery.*?>.*?</gallery>'),
+        # this matches internal wikilinks, but also interwiki, categories, and
+        # images.
+        'link':        re.compile(r'[[[^]|]*(|[^]]*)?]]'),
+        'interwiki':   re.compile(r'(?i)[[(%s)\s?:[^]]*]][\s]*'
+                               % '|'.join(site.validLanguageLinks() + site.family.obsolete.keys())),
+
+    }
+
+    # if we got a string, compile it as a regular expression
+    if type(old) is str or type(old) is unicode:
+        if caseInsensitive:
+            old = re.compile(old, re.IGNORECASE | re.UNICODE)
+        else:
+            old = re.compile(old)
+
+    dontTouchRegexes = []
+    for exc in exceptions:
+        if isinstance(exc, str) or isinstance(exc, unicode):
+            # assume it's a reference to the exceptionRegexes dictionary
+            # defined above.
+            if not exceptionRegexes.has_key(exc):
+                raise ValueError("Unknown tag type: " + exc)
+            dontTouchRegexes.append(exceptionRegexes[exc])
+        else:
+            # assume it's a regular expression
+            dontTouchRegexes.append(exc)
+    index = 0
+    markerpos = len(text)
+    while True:
+        match = old.search(text, index)
+        if not match:
+            # nothing left to replace
+            break
+
+        # check which exception will occur next.
+        nextExceptionMatch = None
+        for dontTouchR in dontTouchRegexes:
+            excMatch = dontTouchR.search(text, index)
+            if excMatch and (
+                    nextExceptionMatch is None or
+                    excMatch.start() < nextExceptionMatch.start()):
+                nextExceptionMatch = excMatch
+
+        if nextExceptionMatch is not None and nextExceptionMatch.start() <= match.start():
+            # an HTML comment or text in nowiki tags stands before the next valid match. Skip.
+            index = nextExceptionMatch.end()
+        else:
+            # We found a valid match. Replace it.
+            if callable(new):
+                # the parameter new can be a function which takes the match as a parameter.
+                replacement = new(match)
+            else:
+                # it is not a function, but a string.
+
+                # it is a little hack to make \n work. It would be better to fix it
+                # previously, but better than nothing.
+                new = new.replace('\n', '\n')
+
+                # We cannot just insert the new string, as it may contain regex
+                # group references such as \2 or \g<name>.
+                # On the other hand, this approach does not work because it can't
+                # handle lookahead or lookbehind (see bug #1731008):
+                #replacement = old.sub(new, text[match.start():match.end()])
+                #text = text[:match.start()] + replacement + text[match.end():]
+
+                # So we have to process the group references manually.
+                replacement = new
+
+                groupR = re.compile(r'\(?P<number>\d+)|\g<(?P<name>.+?)>')
+                while True:
+                    groupMatch = groupR.search(replacement)
+                    if not groupMatch:
+                        break
+                    groupID = groupMatch.group('name') or int(groupMatch.group('number'))
+                    replacement = replacement[:groupMatch.start()] + match.group(groupID) + replacement[groupMatch.end():]
+            text = text[:match.start()] + replacement + text[match.end():]
+
+            # continue the search on the remaining text
+            if allowoverlap:
+                index = match.start() + 1
+            else:
+                index = match.start() + len(replacement)
+            markerpos = match.start() + len(replacement)
+    text = text[:markerpos] + marker + text[markerpos:]
+    return text
+
+
+def removeDisabledParts(text, tags = ['*']):
+    """
+    Return text without portions where wiki markup is disabled
+
+    Parts that can/will be removed are --
+    * HTML comments
+    * nowiki tags
+    * pre tags
+    * includeonly tags
+
+    The exact set of parts which should be removed can be passed as the
+    'parts' parameter, which defaults to all.
+    """
+    regexes = {
+            'comments' :   r'<!--.*?-->',
+            'includeonly': r'<includeonly>.*?</includeonly>',
+            'nowiki':      r'<nowiki>.*?</nowiki>',
+            'pre':         r'<pre>.*?</pre>',
+            'source':      r'<source .*?</source>',
+    }
+    if '*' in tags:
+        tags = regexes.keys()
+    toRemoveR = re.compile('|'.join([regexes[tag] for tag in tags]),
+                           re.IGNORECASE | re.DOTALL)
+    return toRemoveR.sub('', text)
+
+
+def isDisabled(text, index, tags = ['*']):
+    """
+    Return True if text[index] is disabled, e.g. by a comment or by nowiki tags.
+
+    For the tags parameter, see removeDisabledParts() above.
+    """
+    # Find a marker that is not already in the text.
+    marker = '@@'
+    while marker in text:
+        marker += '@'
+    text = text[:index] + marker + text[index:]
+    text = removeDisabledParts(text, tags)
+    return (marker not in text)
+
+
+# Functions dealing with interwiki language links
+
+# Note - MediaWiki supports two kinds of interwiki links; interlanguage and
+#        interproject.  These functions only deal with links to a
+#        corresponding page in another language on the same project (e.g.,
+#        Wikipedia, Wiktionary, etc.) in another language. They do not find
+#        or change links to a different project, or any that are formatted
+#        as in-line interwiki links (e.g., "[[:es:Articulo]]".  (CONFIRM)
+
+def getLanguageLinks(text, insite = None, pageLink = "[[]]"):
+    """
+    Return a dict of interlanguage links found in text.
+
+    Dict uses language codes as keys and Page objects as values.
+    Do not call this routine directly, use Page.interwiki() method
+    instead.
+
+    """
+    if insite == None:
+        insite = pywikibot.getSite()
+    result = {}
+    # Ignore interwiki links within nowiki tags, includeonly tags, pre tags,
+    # and HTML comments
+    text = removeDisabledParts(text)
+
+    # This regular expression will find every link that is possibly an
+    # interwiki link.
+    # NOTE: language codes are case-insensitive and only consist of basic latin
+    # letters and hyphens.
+    interwikiR = re.compile(r'[[([a-zA-Z-]+)\s?:([^[]\n]*)]]')
+    for lang, pagetitle in interwikiR.findall(text):
+        lang = lang.lower()
+        # Check if it really is in fact an interwiki link to a known
+        # language, or if it's e.g. a category tag or an internal link
+        if lang in insite.family.obsolete:
+            lang = insite.family.obsolete[lang]
+        if lang in insite.validLanguageLinks():
+            if '|' in pagetitle:
+                # ignore text after the pipe
+                pagetitle = pagetitle[:pagetitle.index('|')]
+            # we want the actual page objects rather than the titles
+            site = insite.getSite(code = lang)
+            try:
+                result[site] = pywikibot.Page(site, pagetitle, insite = insite)
+            except InvalidTitle:
+                output(
+        u"[getLanguageLinks] Text contains invalid interwiki link [[%s:%s]]."
+                           % (lang, pagetitle))
+                continue
+    return result
+
+
+def removeLanguageLinks(text, site = None, marker = ''):
+    """Return text with all interlanguage links removed.
+
+    If a link to an unknown language is encountered, a warning is printed.
+    If a marker is defined, that string is placed at the location of the
+    last occurence of an interwiki link (at the end if there are no
+    interwiki links).
+
+    """
+    if site == None:
+        site = pywikibot.getSite()
+    if not site.validLanguageLinks():
+        return text
+    # This regular expression will find every interwiki link, plus trailing
+    # whitespace.
+    languages = '|'.join(site.validLanguageLinks() + site.family.obsolete.keys())
+    interwikiR = re.compile(r'[[(%s)\s?:[^]]*]][\s]*'
+                            % languages, re.IGNORECASE)
+    text = replaceExcept(text, interwikiR, '',
+                         ['nowiki', 'comment', 'math', 'pre', 'source'], marker=marker)
+    return text.strip()
+
+
+def replaceLanguageLinks(oldtext, new, site = None):
+    """Replace interlanguage links in the text with a new set of links.
+
+    'new' should be a dict with the Site objects as keys, and Page objects
+    as values (i.e., just like the dict returned by getLanguageLinks
+    function).
+    
+    """
+    # Find a marker that is not already in the text.
+    marker = '@@'
+    while marker in oldtext:
+        marker += '@'
+    if site == None:
+        site = pywikibot.getSite()
+    s = interwikiFormat(new, insite = site)
+    s2 = removeLanguageLinks(oldtext, site = site, marker = marker)
+    if s:
+        if site.language() in site.family.interwiki_attop:
+            newtext = s + site.family.interwiki_text_separator + s2.replace(marker,'').strip()
+        else:
+            # calculate what was after the language links on the page
+            firstafter = s2.find(marker) + len(marker)
+            # Is there any text in the 'after' part that means we should keep it after?
+            if "</noinclude>" in s2[firstafter:]:
+                newtext = s2[:firstafter] + s + s2[firstafter:]
+            elif site.language() in site.family.categories_last:
+                cats = getCategoryLinks(s2, site = site)
+                s2 = removeCategoryLinks(s2.replace(marker,'').strip(), site) + site.family.interwiki_text_separator + s
+                newtext = replaceCategoryLinks(s2, cats, site=site)
+            else:
+                newtext = s2.replace(marker,'').strip() + site.family.interwiki_text_separator + s
+            newtext = newtext.replace(marker,'')
+    else:
+        newtext = s2.replace(marker,'')
+    return newtext
+
+
+def interwikiFormat(links, insite = None):
+    """Convert interwiki link dict into a wikitext string.
+
+    'links' should be a dict with the Site objects as keys, and Page
+    objects as values.
+
+    Return a unicode string that is formatted for inclusion in insite
+    (defaulting to the current site).
+    """
+    if insite is None:
+        insite = pywikibot.getSite()
+    if not links:
+        return ''
+
+    ar = interwikiSort(links.keys(), insite)
+    s = []
+    for site in ar:
+        try:
+            link = links[site].aslink(forceInterwiki=True)
+            s.append(link)
+        except AttributeError:
+            s.append(pywikibot.getSite(site).linkto(links[site],
+                                                    othersite=insite))
+    if insite.lang in insite.family.interwiki_on_one_line:
+        sep = u' '
+    else:
+        sep = u'\r\n'
+    s=sep.join(s) + u'\r\n'
+    return s
+
+
+# Sort sites according to local interwiki sort logic
+def interwikiSort(sites, insite = None):
+    if insite is None:
+      insite = pywikibot.getSite()
+    if not sites:
+      return []
+
+    sites.sort()
+    putfirst = insite.interwiki_putfirst()
+    if putfirst:
+        #In this case I might have to change the order
+        firstsites = []
+        for code in putfirst:
+            # The code may not exist in this family?
+            if code in insite.family.obsolete:
+                code = insite.family.obsolete[code]
+            if code in insite.validLanguageLinks():
+                site = insite.getSite(code = code)
+                if site in sites:
+                    del sites[sites.index(site)]
+                    firstsites = firstsites + [site]
+        sites = firstsites + sites
+    if insite.interwiki_putfirst_doubled(sites): #some implementations return False
+        sites = insite.interwiki_putfirst_doubled(sites) + sites
+    return sites
+
+
+# Functions dealing with category links
+
+def getCategoryLinks(text, site):
+    """Return a list of category links found in text.
+
+    List contains Category objects.
+    Do not call this routine directly, use Page.categories() instead.
+
+    """
+    result = []
+    # Ignore category links within nowiki tags, pre tags, includeonly tags,
+    # and HTML comments
+    text = removeDisabledParts(text)
+    catNamespace = '|'.join(site.category_namespaces())
+    R = re.compile(r'[[\s*(?P<namespace>%s)\s*:\s*(?P<catName>.+?)'
+                   r'(?:|(?P<sortKey>.+?))?\s*]]'
+                   % catNamespace, re.I)
+    for match in R.finditer(text):
+        cat = pywikibot.Category(site,
+                                 '%s:%s' % (match.group('namespace'),
+                                            match.group('catName')),
+                                 sortKey = match.group('sortKey'))
+        result.append(cat)
+    return result
+
+
+def removeCategoryLinks(text, site, marker = ''):
+    """Return text with all category links removed.
+
+    Put the string marker after the last replacement (at the end of the text
+    if  there is no replacement).
+
+    """
+    # This regular expression will find every link that is possibly an
+    # interwiki link, plus trailing whitespace. The language code is grouped.
+    # NOTE: This assumes that language codes only consist of non-capital
+    # ASCII letters and hyphens.
+    catNamespace = '|'.join(site.category_namespaces())
+    categoryR = re.compile(r'[[\s*(%s)\s*:.*?]]\s*' % catNamespace, re.I)
+    text = replaceExcept(text, categoryR, '', ['nowiki', 'comment', 'math', 'pre', 'source'], marker = marker)
+    if marker:
+        #avoid having multiple linefeeds at the end of the text
+        text = re.sub('\s*%s' % re.escape(marker), '\r\n' + marker, text.strip())
+    return text.strip()
+
+
+def replaceCategoryInPlace(oldtext, oldcat, newcat, site=None):
+    """Replace the category oldcat with the category newcat and return
+       the modified text.
+
+    """
+    if site is None:
+        site = pywikibot.getSite()
+
+    catNamespace = '|'.join(site.category_namespaces())
+    title = oldcat.titleWithoutNamespace()
+    if not title:
+        return
+    # title might contain regex special characters
+    title = re.escape(title)
+    # title might not be capitalized correctly on the wiki
+    if title[0].isalpha() and not site.nocapitalize:
+        title = "[%s%s]" % (title[0].upper(), title[0].lower()) + title[1:]
+    # spaces and underscores in page titles are interchangeable, and collapsible
+    title = title.replace(r"\ ", "[ _]+").replace(r"_", "[ _]+")
+    categoryR = re.compile(r'[[\s*(%s)\s*:\s*%s\s*((?:|[^]]+)?]])'
+                            % (catNamespace, title), re.I)
+    if newcat is None:
+        text = replaceExcept(oldtext, categoryR, '',
+                             ['nowiki', 'comment', 'math', 'pre', 'source'])
+    else:
+        text = replaceExcept(oldtext, categoryR,
+                             '[[%s:%s\2' % (site.namespace(14),
+                                             newcat.titleWithoutNamespace()),
+                             ['nowiki', 'comment', 'math', 'pre', 'source'])
+    return text
+
+
+def replaceCategoryLinks(oldtext, new, site = None, addOnly = False):
+    """Replace the category links given in the wikitext given
+       in oldtext by the new links given in new.
+
+       'new' should be a list of Category objects.
+
+       If addOnly is True, the old category won't be deleted and
+       the category(s) given will be added
+       (and so they won't replace anything).
+    """
+
+    # Find a marker that is not already in the text.
+    marker = '@@'
+    while marker in oldtext:
+        marker += '@'
+
+    if site is None:
+        site = pywikibot.getSite()
+    if site.sitename() == 'wikipedia:de' and "{{Personendaten" in oldtext:
+        raise Error('The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia on pages that contain the person data template because of the non-standard placement of that template. See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006#...')
+
+    s = categoryFormat(new, insite = site)
+    if addOnly:
+        s2 = oldtext
+    else:
+        s2 = removeCategoryLinks(oldtext, site = site, marker = marker)
+
+    if s:
+        if site.language() in site.family.category_attop:
+            newtext = s + site.family.category_text_separator + s2
+        else:
+            # calculate what was after the categories links on the page
+            firstafter = s2.find(marker)
+            # Is there any text in the 'after' part that means we should keep it after?
+            if "</noinclude>" in s2[firstafter:]:
+                newtext = s2[:firstafter] + s + s2[firstafter:]
+            elif site.language() in site.family.categories_last:
+                newtext = s2.replace(marker,'').strip() + site.family.category_text_separator + s
+            else:
+                interwiki = getLanguageLinks(s2)
+                s2 = removeLanguageLinks(s2.replace(marker,''), site) + site.family.category_text_separator + s
+                newtext = replaceLanguageLinks(s2, interwiki, site)
+        newtext = newtext.replace(marker,'')
+    else:
+        s2 = s2.replace(marker,'')
+        return s2
+    return newtext.strip()
+
+
+def categoryFormat(categories, insite = None):
+    """Return a string containing links to all categories in a list.
+
+    'categories' should be a list of Category objects.
+
+    The string is formatted for inclusion in insite.
+    
+    """
+    if not categories:
+        return ''
+    if insite is None:
+        insite = pywikibot.getSite()
+    catLinks = [category.aslink(noInterwiki = True) for category in categories]
+    if insite.category_on_one_line():
+        sep = ' '
+    else:
+        sep = '\r\n'
+    # Some people don't like the categories sorted
+    #catLinks.sort()
+    return sep.join(catLinks) + '\r\n'
+
+
+def compileLinkR(withoutBracketed=False, onlyBracketed=False):
+    """Return a regex that matches external links."""
+    # RFC 2396 says that URLs may only contain certain characters.
+    # For this regex we also accept non-allowed characters, so that the bot
+    # will later show these links as broken ('Non-ASCII Characters in URL').
+    # Note: While allowing parenthesis inside URLs, MediaWiki will regard
+    # right parenthesis at the end of the URL as not part of that URL.
+    # The same applies to dot, comma, colon and some other characters.
+    notAtEnd = ']\s).:;,<>"'
+    # So characters inside the URL can be anything except whitespace,
+    # closing squared brackets, quotation marks, greater than and less
+    # than, and the last character also can't be parenthesis or another
+    # character disallowed by MediaWiki.
+    notInside = ']\s<>"'
+    # The first half of this regular expression is required because '' is
+    # not allowed inside links. For example, in this wiki text:
+    #       ''Please see http://www.example.org.''
+    # .'' shouldn't be considered as part of the link.
+    regex = r'(?P<url>http[s]?://[^' + notInside + ']*?[^' + notAtEnd + '](?=[' + notAtEnd+ ']*'')|http[s]?://[^' + notInside + ']*[^' + notAtEnd + '])'
+
+    if withoutBracketed:
+        regex = r'(?<![)' + regex
+    elif onlyBracketed:
+        regex = r'[' + regex
+    linkR = re.compile(regex)
+    return linkR
+
+def extract_templates_and_params(text, get_redirect=False):
+    """Return list of template calls found in text.
+
+    Return value is a list of tuples. There is one tuple for each use of a
+    template in the page, with the template title as the first entry and a
+    dict of parameters as the second entry.  Parameters are indexed by
+    strings; as in MediaWiki, an unnamed parameter is given a parameter name
+    with an integer value corresponding to its position among the unnnamed
+    parameters, and if this results multiple parameters with the same name
+    only the last value provided will be returned.
+
+    """
+    # remove commented-out stuff etc.
+    thistxt = removeDisabledParts(text)
+
+    # marker for inside templates or parameters
+    marker = u'@@'
+    while marker in thistxt:
+        marker += u'@'
+
+    # marker for links
+    marker2 = u'##'
+    while marker2 in thistxt:
+        marker2 += u'#'
+
+    # marker for math
+    marker3 = u'%%'
+    while marker2 in thistxt:
+        marker3 += u'%'
+
+    result = []
+    inside = {}
+    count = 0
+    Rtemplate = re.compile(
+                ur'{{(msg:)?(?P<name>[^{|]+?)(|(?P<params>[^{]+?))?}}')
+    Rmath = re.compile(ur'<math>[^<]+</math>')
+    Rmarker = re.compile(ur'%s(\d+)%s' % (marker, marker))
+    Rmarker2 = re.compile(ur'%s(\d+)%s' % (marker2, marker2))
+    Rmarker3 = re.compile(ur'%s(\d+)%s' % (marker3, marker3))
+
+    # Replace math with markers
+    maths = {}
+    count = 0
+    for m in Rmath.finditer(thistxt):
+        count += 1
+        text = m.group()
+        thistxt = thistxt.replace(text, '%s%d%s' % (marker3, count, marker3))
+        maths[count] = text
+
+    while Rtemplate.search(thistxt) is not None:
+        for m in Rtemplate.finditer(thistxt):
+            # Make sure it is not detected again
+            count += 1
+            text = m.group()
+            thistxt = thistxt.replace(text,
+                                      '%s%d%s' % (marker, count, marker))
+            # Make sure stored templates don't contain markers
+            for m2 in Rmarker.finditer(text):
+                text = text.replace(m2.group(), inside[int(m2.group(1))])
+            for m2 in Rmarker3.finditer(text):
+                text = text.replace(m2.group(), maths[int(m2.group(1))])
+            inside[count] = text
+
+            # Name
+            name = m.group('name').strip()
+            m2 = Rmarker.search(name) or Rmath.search(name)
+            if m2 is not None:
+                # Doesn't detect templates whose name changes,
+                # or templates whose name contains math tags
+                continue
+            # Parameters
+            paramString = m.group('params')
+            params = {}
+            numbered_param = 1
+            if paramString:
+                # Replace wikilinks with markers
+                links = {}
+                count2 = 0
+                for m2 in pywikibot.link_regex.finditer(paramString):
+                    count2 += 1
+                    text = m2.group(0)
+                    paramString = paramString.replace(text,
+                                    '%s%d%s' % (marker2, count2, marker2))
+                    links[count2] = text
+                # Parse string
+                markedParams = paramString.split('|')
+                # Replace markers
+                for param in markedParams:
+                    if "=" in param:
+                        param_name, param_val = param.split("=", 1)
+                    else:
+                        param_name = unicode(numbered_param)
+                        param_val = param
+                        numbered_param += 1
+                    for m2 in Rmarker.finditer(param_val):
+                        param_val = param_val.replace(m2.group(),
+                                                      inside[int(m2.group(1))])
+                    for m2 in Rmarker2.finditer(param_val):
+                        param_val = param_val.replace(m2.group(),
+                                                      links[int(m2.group(1))])
+                    for m2 in Rmarker3.finditer(param_val):
+                        param_val = param_val.replace(m2.group(),
+                                                      maths[int(m2.group(1))])
+                    params[param_name] = param_val
+
+            # Add it to the result
+            result.append((name, params))
+    return result
+
Property changes on: branches/rewrite/pywikibot/textlib.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Added: svn:eol-style
   + native
Modified: branches/rewrite/pywikibot/throttle.py
===================================================================
--- branches/rewrite/pywikibot/throttle.py	2008-12-16 19:34:48 UTC (rev 6155)
+++ branches/rewrite/pywikibot/throttle.py	2008-12-16 19:40:20 UTC (rev 6156)
@@ -1,275 +1,275 @@
-# -*- coding: utf-8  -*-
-"""
-Mechanics to slow down wiki read and/or write rate.
-"""
-#
-# (C) Pywikipedia bot team, 2008
-#
-# Distributed under the terms of the MIT license.
-#
-__version__ = '$Id: $'
-
-import pywikibot
-from pywikibot import config2 as config
-
-import logging
-import math
-import threading
-import time
-
-logger = logging.getLogger("wiki.throttle")
-
-pid = False     # global process identifier
-                # when the first Throttle is instantiated, it will set this
-                # variable to a positive integer, which will apply to all
-                # throttle objects created by this process.
-
-
-class Throttle(object):
-    """Control rate of access to wiki server
-
-    Calling this object blocks the calling thread until at least 'delay'
-    seconds have passed since the previous call.
-
-    Each Site initiates one Throttle object (site.throttle) to control the
-    rate of access.
-
-    """
-    def __init__(self, site, mindelay=None, maxdelay=None, writedelay=None,
-                 multiplydelay=True, verbosedelay=False):
-        self.lock = threading.RLock()
-        self.mysite = str(site)
-        self.logfn = config.datafilepath('throttle.log')
-        self.mindelay = mindelay
-        if self.mindelay is None:
-            self.mindelay = config.minthrottle
-        self.maxdelay = maxdelay
-        if self.maxdelay is None:
-            self.maxdelay = config.maxthrottle
-        self.writedelay = writedelay
-        self.last_read = 0
-        self.last_write = 0
-        self.next_multiplicity = 1.0
-        self.checkdelay = 300  # Check logfile again after this many seconds
-        self.dropdelay = 600   # Ignore processes that have not made
-                               # a check in this many seconds
-        self.releasepid = 1200 # Free the process id after this many seconds
-        self.lastwait = 0.0
-        self.delay = 0
-        self.verbosedelay = verbosedelay
-        if multiplydelay:
-            self.checkMultiplicity()
-        self.setDelays()
-
-    def checkMultiplicity(self):
-        """Count running processes for site and set process_multiplicity."""
-        global pid
-        self.lock.acquire()
-        mysite = self.mysite
-        logger.debug("Checking multiplicity: pid = %(pid)s" % globals())
-        try:
-            processes = []
-            my_pid = pid or 1  # start at 1 if global pid not yet set
-            count = 1
-            # open throttle.log
-            try:
-                f = open(self.logfn, 'r')
-            except IOError:
-                if not pid:
-                    pass
-                else:
-                    raise
-            else:
-                now = time.time()
-                for line in f.readlines():
-                    # parse line; format is "pid timestamp site"
-                    try:
-                        line = line.split(' ')
-                        this_pid = int(line[0])
-                        ptime = int(line[1].split('.')[0])
-                        this_site = line[2].rstrip()
-                    except (IndexError, ValueError):
-                        continue    # Sometimes the file gets corrupted
-                                    # ignore that line
-                    if now - ptime > self.releasepid:
-                        continue    # process has expired, drop from file
-                    if now - ptime <= self.dropdelay \
-                            and this_site == mysite \
-                            and this_pid != pid:
-                        count += 1
-                    if this_site != self.mysite or this_pid != pid:
-                        processes.append({'pid': this_pid,
-                                          'time': ptime,
-                                          'site': this_site})
-                    if not pid and this_pid >= my_pid:
-                        my_pid = this_pid+1 # next unused process id
-
-            if not pid:
-                pid = my_pid
-            self.checktime = time.time()
-            processes.append({'pid': pid,
-                              'time': self.checktime,
-                              'site': mysite})
-            f = open(self.logfn, 'w')
-            processes.sort(key=lambda p:(p['pid'], p['site']))
-            for p in processes:
-                f.write("%(pid)s %(time)s %(site)s\n" % p)
-            f.close()
-            self.process_multiplicity = count
-            if self.verbosedelay:
-                logger.info(
-u"Found %(count)s %(mysite)s processes running, including this one."
-                    % locals())
-        finally:
-            self.lock.release()
-
-    def setDelays(self, delay=None, writedelay=None, absolute=False):
-        """Set the nominal delays in seconds. Defaults to config values."""
-        self.lock.acquire()
-        try:
-            maxdelay = self.maxdelay
-            if delay is None:
-                delay = self.mindelay
-            if writedelay is None:
-                writedelay = config.put_throttle
-            if absolute:
-                self.maxdelay = delay
-                self.mindelay = delay
-            self.delay = delay
-            self.writedelay = min(max(self.mindelay, writedelay),
-                                  self.maxdelay)
-            # Start the delay count now, not at the next check
-            self.last_read = self.last_write = time.time()
-        finally:
-            self.lock.release()
-
-    def getDelay(self, write=False):
-        """Return the actual delay, accounting for multiple processes.
-
-        This value is the maximum wait between reads/writes, not taking
-        account of how much time has elapsed since the last access.
-
-        """
-        global pid
-        if write:
-            thisdelay = self.writedelay
-        else:
-            thisdelay = self.delay
-        if pid: # If set, we're checking for multiple processes
-            if time.time() > self.checktime + self.checkdelay:
-                self.checkMultiplicity()
-            if thisdelay < (self.mindelay * self.next_multiplicity):
-                thisdelay = self.mindelay * self.next_multiplicity
-            elif thisdelay > self.maxdelay:
-                thisdelay = self.maxdelay
-            thisdelay *= self.process_multiplicity
-        return thisdelay
-
-    def waittime(self, write=False):
-        """Return waiting time in seconds if a query would be made right now"""
-        # Take the previous requestsize in account calculating the desired
-        # delay this time
-        thisdelay = self.getDelay(write=write)
-        now = time.time()
-        if write:
-            ago = now - self.last_write
-        else:
-            ago = now - self.last_read
-        if ago < thisdelay:
-            delta = thisdelay - ago
-            return delta
-        else:
-            return 0.0
-
-    def drop(self):
-        """Remove me from the list of running bot processes."""
-        # drop all throttles with this process's pid, regardless of site
-        self.checktime = 0
-        processes = []
-        try:
-            f = open(self.logfn, 'r')
-        except IOError:
-            return
-        else:
-            now = time.time()
-            for line in f.readlines():
-                try:
-                    line = line.split(' ')
-                    this_pid = int(line[0])
-                    ptime = int(line[1].split('.')[0])
-                    this_site = line[2].rstrip()
-                except (IndexError,ValueError):
-                    continue    # Sometimes the file gets corrupted
-                                # ignore that line
-                if now - ptime <= self.releasepid \
-                        and this_pid != pid:
-                    processes.append({'pid': this_pid,
-                                      'time': ptime,
-                                      'site': this_site})
-        f = open(self.logfn, 'w')
-        processes.sort(key=lambda p:p['pid'])
-        for p in processes:
-            f.write("%(pid)s %(time)s %(site)s\n" % p)
-        f.close()
-
-    def __call__(self, requestsize=1, write=False):
-        """
-        Block the calling program if the throttle time has not expired.
-
-        Parameter requestsize is the number of Pages to be read/written;
-        multiply delay time by an appropriate factor.
-
-        Because this seizes the throttle lock, it will prevent any other
-        thread from writing to the same site until the wait expires.
-
-        """
-        self.lock.acquire()
-        try:
-            wait = self.waittime(write=write)
-            # Calculate the multiplicity of the next delay based on how
-            # big the request is that is being posted now.
-            # We want to add "one delay" for each factor of two in the
-            # size of the request. Getting 64 pages at once allows 6 times
-            # the delay time for the server.
-            self.next_multiplicity = math.log(1+requestsize)/math.log(2.0)
-            # Announce the delay if it exceeds a preset limit
-            if wait > config.noisysleep:
-                logger.info(u"Sleeping for %(wait).1f seconds, %(now)s"
-                              % {'wait': wait,
-                                 'now': time.strftime("%Y-%m-%d %H:%M:%S",
-                                                      time.localtime())
-                                } )
-            time.sleep(wait)
-            if write:
-                self.last_write = time.time()
-            else:
-                self.last_read = time.time()
-        finally:
-            self.lock.release()
-
-    def lag(self, lagtime):
-        """
-        Seize the throttle lock due to server lag.
-
-        This will prevent any thread from accessing this site.
-
-        """
-        started = time.time()
-        self.lock.acquire()
-        try:
-            # start at 1/2 the current server lag time
-            # wait at least 5 seconds but not more than 120 seconds
-            delay = min(max(5, lagtime//2), 120)
-            # account for any time we waited while acquiring the lock
-            wait = delay - (time.time() - started)
-            if wait > 0:
-                if wait > config.noisysleep:
-                    logger.info(u"Sleeping for %(wait).1f seconds, %(now)s"
-                                  % {'wait': wait,
-                                     'now': time.strftime("%Y-%m-%d %H:%M:%S",
-                                                          time.localtime())
-                                    } )
-                time.sleep(wait)
-        finally:
-            self.lock.release()
-
+# -*- coding: utf-8  -*-
+"""
+Mechanics to slow down wiki read and/or write rate.
+"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+import pywikibot
+from pywikibot import config2 as config
+
+import logging
+import math
+import threading
+import time
+
+logger = logging.getLogger("wiki.throttle")
+
+pid = False     # global process identifier
+                # when the first Throttle is instantiated, it will set this
+                # variable to a positive integer, which will apply to all
+                # throttle objects created by this process.
+
+
+class Throttle(object):
+    """Control rate of access to wiki server
+
+    Calling this object blocks the calling thread until at least 'delay'
+    seconds have passed since the previous call.
+
+    Each Site initiates one Throttle object (site.throttle) to control the
+    rate of access.
+
+    """
+    def __init__(self, site, mindelay=None, maxdelay=None, writedelay=None,
+                 multiplydelay=True, verbosedelay=False):
+        self.lock = threading.RLock()
+        self.mysite = str(site)
+        self.logfn = config.datafilepath('throttle.log')
+        self.mindelay = mindelay
+        if self.mindelay is None:
+            self.mindelay = config.minthrottle
+        self.maxdelay = maxdelay
+        if self.maxdelay is None:
+            self.maxdelay = config.maxthrottle
+        self.writedelay = writedelay
+        self.last_read = 0
+        self.last_write = 0
+        self.next_multiplicity = 1.0
+        self.checkdelay = 300  # Check logfile again after this many seconds
+        self.dropdelay = 600   # Ignore processes that have not made
+                               # a check in this many seconds
+        self.releasepid = 1200 # Free the process id after this many seconds
+        self.lastwait = 0.0
+        self.delay = 0
+        self.verbosedelay = verbosedelay
+        if multiplydelay:
+            self.checkMultiplicity()
+        self.setDelays()
+
+    def checkMultiplicity(self):
+        """Count running processes for site and set process_multiplicity."""
+        global pid
+        self.lock.acquire()
+        mysite = self.mysite
+        logger.debug("Checking multiplicity: pid = %(pid)s" % globals())
+        try:
+            processes = []
+            my_pid = pid or 1  # start at 1 if global pid not yet set
+            count = 1
+            # open throttle.log
+            try:
+                f = open(self.logfn, 'r')
+            except IOError:
+                if not pid:
+                    pass
+                else:
+                    raise
+            else:
+                now = time.time()
+                for line in f.readlines():
+                    # parse line; format is "pid timestamp site"
+                    try:
+                        line = line.split(' ')
+                        this_pid = int(line[0])
+                        ptime = int(line[1].split('.')[0])
+                        this_site = line[2].rstrip()
+                    except (IndexError, ValueError):
+                        continue    # Sometimes the file gets corrupted
+                                    # ignore that line
+                    if now - ptime > self.releasepid:
+                        continue    # process has expired, drop from file
+                    if now - ptime <= self.dropdelay \
+                            and this_site == mysite \
+                            and this_pid != pid:
+                        count += 1
+                    if this_site != self.mysite or this_pid != pid:
+                        processes.append({'pid': this_pid,
+                                          'time': ptime,
+                                          'site': this_site})
+                    if not pid and this_pid >= my_pid:
+                        my_pid = this_pid+1 # next unused process id
+
+            if not pid:
+                pid = my_pid
+            self.checktime = time.time()
+            processes.append({'pid': pid,
+                              'time': self.checktime,
+                              'site': mysite})
+            f = open(self.logfn, 'w')
+            processes.sort(key=lambda p:(p['pid'], p['site']))
+            for p in processes:
+                f.write("%(pid)s %(time)s %(site)s\n" % p)
+            f.close()
+            self.process_multiplicity = count
+            if self.verbosedelay:
+                logger.info(
+u"Found %(count)s %(mysite)s processes running, including this one."
+                    % locals())
+        finally:
+            self.lock.release()
+
+    def setDelays(self, delay=None, writedelay=None, absolute=False):
+        """Set the nominal delays in seconds. Defaults to config values."""
+        self.lock.acquire()
+        try:
+            maxdelay = self.maxdelay
+            if delay is None:
+                delay = self.mindelay
+            if writedelay is None:
+                writedelay = config.put_throttle
+            if absolute:
+                self.maxdelay = delay
+                self.mindelay = delay
+            self.delay = delay
+            self.writedelay = min(max(self.mindelay, writedelay),
+                                  self.maxdelay)
+            # Start the delay count now, not at the next check
+            self.last_read = self.last_write = time.time()
+        finally:
+            self.lock.release()
+
+    def getDelay(self, write=False):
+        """Return the actual delay, accounting for multiple processes.
+
+        This value is the maximum wait between reads/writes, not taking
+        account of how much time has elapsed since the last access.
+
+        """
+        global pid
+        if write:
+            thisdelay = self.writedelay
+        else:
+            thisdelay = self.delay
+        if pid: # If set, we're checking for multiple processes
+            if time.time() > self.checktime + self.checkdelay:
+                self.checkMultiplicity()
+            if thisdelay < (self.mindelay * self.next_multiplicity):
+                thisdelay = self.mindelay * self.next_multiplicity
+            elif thisdelay > self.maxdelay:
+                thisdelay = self.maxdelay
+            thisdelay *= self.process_multiplicity
+        return thisdelay
+
+    def waittime(self, write=False):
+        """Return waiting time in seconds if a query would be made right now"""
+        # Take the previous requestsize in account calculating the desired
+        # delay this time
+        thisdelay = self.getDelay(write=write)
+        now = time.time()
+        if write:
+            ago = now - self.last_write
+        else:
+            ago = now - self.last_read
+        if ago < thisdelay:
+            delta = thisdelay - ago
+            return delta
+        else:
+            return 0.0
+
+    def drop(self):
+        """Remove me from the list of running bot processes."""
+        # drop all throttles with this process's pid, regardless of site
+        self.checktime = 0
+        processes = []
+        try:
+            f = open(self.logfn, 'r')
+        except IOError:
+            return
+        else:
+            now = time.time()
+            for line in f.readlines():
+                try:
+                    line = line.split(' ')
+                    this_pid = int(line[0])
+                    ptime = int(line[1].split('.')[0])
+                    this_site = line[2].rstrip()
+                except (IndexError,ValueError):
+                    continue    # Sometimes the file gets corrupted
+                                # ignore that line
+                if now - ptime <= self.releasepid \
+                        and this_pid != pid:
+                    processes.append({'pid': this_pid,
+                                      'time': ptime,
+                                      'site': this_site})
+        f = open(self.logfn, 'w')
+        processes.sort(key=lambda p:p['pid'])
+        for p in processes:
+            f.write("%(pid)s %(time)s %(site)s\n" % p)
+        f.close()
+
+    def __call__(self, requestsize=1, write=False):
+        """
+        Block the calling program if the throttle time has not expired.
+
+        Parameter requestsize is the number of Pages to be read/written;
+        multiply delay time by an appropriate factor.
+
+        Because this seizes the throttle lock, it will prevent any other
+        thread from writing to the same site until the wait expires.
+
+        """
+        self.lock.acquire()
+        try:
+            wait = self.waittime(write=write)
+            # Calculate the multiplicity of the next delay based on how
+            # big the request is that is being posted now.
+            # We want to add "one delay" for each factor of two in the
+            # size of the request. Getting 64 pages at once allows 6 times
+            # the delay time for the server.
+            self.next_multiplicity = math.log(1+requestsize)/math.log(2.0)
+            # Announce the delay if it exceeds a preset limit
+            if wait > config.noisysleep:
+                logger.info(u"Sleeping for %(wait).1f seconds, %(now)s"
+                              % {'wait': wait,
+                                 'now': time.strftime("%Y-%m-%d %H:%M:%S",
+                                                      time.localtime())
+                                } )
+            time.sleep(wait)
+            if write:
+                self.last_write = time.time()
+            else:
+                self.last_read = time.time()
+        finally:
+            self.lock.release()
+
+    def lag(self, lagtime):
+        """
+        Seize the throttle lock due to server lag.
+
+        This will prevent any thread from accessing this site.
+
+        """
+        started = time.time()
+        self.lock.acquire()
+        try:
+            # start at 1/2 the current server lag time
+            # wait at least 5 seconds but not more than 120 seconds
+            delay = min(max(5, lagtime//2), 120)
+            # account for any time we waited while acquiring the lock
+            wait = delay - (time.time() - started)
+            if wait > 0:
+                if wait > config.noisysleep:
+                    logger.info(u"Sleeping for %(wait).1f seconds, %(now)s"
+                                  % {'wait': wait,
+                                     'now': time.strftime("%Y-%m-%d %H:%M:%S",
+                                                          time.localtime())
+                                    } )
+                time.sleep(wait)
+        finally:
+            self.lock.release()
+
Property changes on: branches/rewrite/pywikibot/throttle.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Added: svn:eol-style
   + native
Modified: branches/rewrite/pywikibot/tools.py
===================================================================
--- branches/rewrite/pywikibot/tools.py	2008-12-16 19:34:48 UTC (rev 6155)
+++ branches/rewrite/pywikibot/tools.py	2008-12-16 19:40:20 UTC (rev 6156)
@@ -1,174 +1,174 @@
-# -*- coding: utf-8  -*-
-"""Miscellaneous helper functions (not wiki-dependent)"""
-#
-# (C) Pywikipedia bot team, 2008
-#
-# Distributed under the terms of the MIT license.
-#
-__version__ = '$Id: $'
-
-import sys
-import threading
-import time
-import Queue
-
-
-class ThreadedGenerator(threading.Thread):
-    """Look-ahead generator class.
-
-    Runs a generator in a separate thread and queues the results; can
-    be called like a regular generator.
-
-    Subclasses should override self.generator, I{not} self.run
-
-    Important: the generator thread will stop itself if the generator's
-    internal queue is exhausted; but, if the calling program does not use
-    all the generated values, it must call the generator's stop() method to
-    stop the background thread.  Example usage:
-
-    >>> gen = ThreadedGenerator(target=xrange, args=(20,))
-    >>> try:
-    ...     for data in gen:
-    ...         print data,
-    ... finally:
-    ...     gen.stop()
-    0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
-    
-    """
-
-    def __init__(self, group=None, target=None, name="GeneratorThread",
-                 args=(), kwargs=None, qsize=65536):
-        """Constructor.  Takes same keyword arguments as threading.Thread.
-
-        target must be a generator function (or other callable that returns
-        an iterable object).
-
-        @param qsize: The size of the lookahead queue. The larger the qsize,
-        the more values will be computed in advance of use (which can eat
-        up memory and processor time).
-        @type qsize: int
-
-        """
-        if kwargs is None:
-            kwargs = {}
-        if target:
-            self.generator = target
-        if not hasattr(self, "generator"):
-            raise RuntimeError("No generator for ThreadedGenerator to run.")
-        self.args, self.kwargs = args, kwargs
-        threading.Thread.__init__(self, group=group, name=name)
-        self.queue = Queue.Queue(qsize)
-        self.finished = threading.Event()
-
-    def __iter__(self):
-        """Iterate results from the queue."""
-        if not self.isAlive() and not self.finished.isSet():
-            self.start()
-        # if there is an item in the queue, yield it, otherwise wait
-        while not self.finished.isSet():
-            try:
-                yield self.queue.get(True, 0.25)
-            except Queue.Empty:
-                pass
-            except KeyboardInterrupt:
-                self.stop()
-
-    def stop(self):
-        """Stop the background thread."""
-        self.finished.set()
-
-    def run(self):
-        """Run the generator and store the results on the queue."""
-        self.__gen = self.generator(*self.args, **self.kwargs)
-        for result in self.__gen:
-            while True:
-                if self.finished.isSet():
-                    return
-                try:
-                    self.queue.put_nowait(result)
-                except Queue.Full:
-                    time.sleep(0.25)
-                    continue
-                break
-        # wait for queue to be emptied, then kill the thread
-        while not self.finished.isSet() and not self.queue.empty():
-            time.sleep(0.25)
-        self.stop()
-
-
-def itergroup(iterable, size):
-    """Make an iterator that returns lists of (up to) size items from iterable.
-
-    Example:
-
-    >>> i = itergroup(xrange(25), 10)
-    >>> print i.next()
-    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-    >>> print i.next()
-    [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-    >>> print i.next()
-    [20, 21, 22, 23, 24]
-    >>> print i.next()
-    Traceback (most recent call last):
-     ...
-    StopIteration
-
-    """
-    group = []
-    for item in iterable:
-        group.append(item)
-        if len(group) == size:
-            yield group
-            group = []
-    if group:
-        yield group
-
-
-class ThreadList(list):
-    """A simple threadpool class to limit the number of simultaneous threads.
-
-    Any threading.Thread object can be added to the pool using the append()
-    method.  If the maximum number of simultaneous threads has not been reached,
-    the Thread object will be started immediately; if not, the append() call
-    will block until the thread is able to start.
-
-    >>> pool = ThreadList(limit=10)
-    >>> def work():
-    ...     time.sleep(1)
-    ...
-    >>> for x in xrange(20):
-    ...     pool.append(threading.Thread(target=work))
-    ...
-    
-    """
-    def __init__(self, limit=sys.maxint, *args):
-        self.limit = limit
-        list.__init__(self, *args)
-        for item in list(self):
-            if not isinstance(threading.Thread, item):
-                raise TypeError("Cannot add '%s' to ThreadList" % type(item))
-
-    def active_count(self):
-        """Return the number of alive threads, and delete all non-alive ones."""
-        count = 0
-        for item in list(self):
-            if item.isAlive():
-                count += 1
-            else:
-                self.remove(item)
-        return count
-
-    def append(self, thd):
-        if not isinstance(thd, threading.Thread):
-            raise TypeError("Cannot append '%s' to ThreadList" % type(thd))
-        while self.active_count() >= self.limit:
-            time.sleep(2)
-        list.append(self, thd)
-        thd.start()
-
-
-if __name__ == "__main__":
-    def _test():
-        import doctest
-        doctest.testmod()
-    _test()
+# -*- coding: utf-8  -*-
+"""Miscellaneous helper functions (not wiki-dependent)"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+import sys
+import threading
+import time
+import Queue
+
+
+class ThreadedGenerator(threading.Thread):
+    """Look-ahead generator class.
+
+    Runs a generator in a separate thread and queues the results; can
+    be called like a regular generator.
+
+    Subclasses should override self.generator, I{not} self.run
+
+    Important: the generator thread will stop itself if the generator's
+    internal queue is exhausted; but, if the calling program does not use
+    all the generated values, it must call the generator's stop() method to
+    stop the background thread.  Example usage:
+
+    >>> gen = ThreadedGenerator(target=xrange, args=(20,))
+    >>> try:
+    ...     for data in gen:
+    ...         print data,
+    ... finally:
+    ...     gen.stop()
+    0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
+    
+    """
+
+    def __init__(self, group=None, target=None, name="GeneratorThread",
+                 args=(), kwargs=None, qsize=65536):
+        """Constructor.  Takes same keyword arguments as threading.Thread.
+
+        target must be a generator function (or other callable that returns
+        an iterable object).
+
+        @param qsize: The size of the lookahead queue. The larger the qsize,
+        the more values will be computed in advance of use (which can eat
+        up memory and processor time).
+        @type qsize: int
+
+        """
+        if kwargs is None:
+            kwargs = {}
+        if target:
+            self.generator = target
+        if not hasattr(self, "generator"):
+            raise RuntimeError("No generator for ThreadedGenerator to run.")
+        self.args, self.kwargs = args, kwargs
+        threading.Thread.__init__(self, group=group, name=name)
+        self.queue = Queue.Queue(qsize)
+        self.finished = threading.Event()
+
+    def __iter__(self):
+        """Iterate results from the queue."""
+        if not self.isAlive() and not self.finished.isSet():
+            self.start()
+        # if there is an item in the queue, yield it, otherwise wait
+        while not self.finished.isSet():
+            try:
+                yield self.queue.get(True, 0.25)
+            except Queue.Empty:
+                pass
+            except KeyboardInterrupt:
+                self.stop()
+
+    def stop(self):
+        """Stop the background thread."""
+        self.finished.set()
+
+    def run(self):
+        """Run the generator and store the results on the queue."""
+        self.__gen = self.generator(*self.args, **self.kwargs)
+        for result in self.__gen:
+            while True:
+                if self.finished.isSet():
+                    return
+                try:
+                    self.queue.put_nowait(result)
+                except Queue.Full:
+                    time.sleep(0.25)
+                    continue
+                break
+        # wait for queue to be emptied, then kill the thread
+        while not self.finished.isSet() and not self.queue.empty():
+            time.sleep(0.25)
+        self.stop()
+
+
+def itergroup(iterable, size):
+    """Make an iterator that returns lists of (up to) size items from iterable.
+
+    Example:
+
+    >>> i = itergroup(xrange(25), 10)
+    >>> print i.next()
+    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+    >>> print i.next()
+    [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+    >>> print i.next()
+    [20, 21, 22, 23, 24]
+    >>> print i.next()
+    Traceback (most recent call last):
+     ...
+    StopIteration
+
+    """
+    group = []
+    for item in iterable:
+        group.append(item)
+        if len(group) == size:
+            yield group
+            group = []
+    if group:
+        yield group
+
+
+class ThreadList(list):
+    """A simple threadpool class to limit the number of simultaneous threads.
+
+    Any threading.Thread object can be added to the pool using the append()
+    method.  If the maximum number of simultaneous threads has not been reached,
+    the Thread object will be started immediately; if not, the append() call
+    will block until the thread is able to start.
+
+    >>> pool = ThreadList(limit=10)
+    >>> def work():
+    ...     time.sleep(1)
+    ...
+    >>> for x in xrange(20):
+    ...     pool.append(threading.Thread(target=work))
+    ...
+    
+    """
+    def __init__(self, limit=sys.maxint, *args):
+        self.limit = limit
+        list.__init__(self, *args)
+        for item in list(self):
+            if not isinstance(threading.Thread, item):
+                raise TypeError("Cannot add '%s' to ThreadList" % type(item))
+
+    def active_count(self):
+        """Return the number of alive threads, and delete all non-alive ones."""
+        count = 0
+        for item in list(self):
+            if item.isAlive():
+                count += 1
+            else:
+                self.remove(item)
+        return count
+
+    def append(self, thd):
+        if not isinstance(thd, threading.Thread):
+            raise TypeError("Cannot append '%s' to ThreadList" % type(thd))
+        while self.active_count() >= self.limit:
+            time.sleep(2)
+        list.append(self, thd)
+        thd.start()
+
+
+if __name__ == "__main__":
+    def _test():
+        import doctest
+        doctest.testmod()
+    _test()
Property changes on: branches/rewrite/pywikibot/tools.py
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision
Added: svn:eol-style
   + native

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

[Pywikipedia-l] SVN: [6156] branches/rewrite/pywikibot