Revision: 5102 Author: russblau Date: 2008-03-04 18:13:32 +0000 (Tue, 04 Mar 2008)
Log Message: ----------- Improve initialization of Page objects; related changes to other modules
Modified Paths: -------------- branches/rewrite/pywikibot/__init__.py branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/families/wikipedia_family.py branches/rewrite/pywikibot/login.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/__init__.py =================================================================== --- branches/rewrite/pywikibot/__init__.py 2008-03-03 14:58:39 UTC (rev 5101) +++ branches/rewrite/pywikibot/__init__.py 2008-03-04 18:13:32 UTC (rev 5102) @@ -65,3 +65,6 @@ import getpass return getpass.getpass(prompt) return raw_input(prompt) + +import logging +logging.getLogger().setLevel(logging.DEBUG)
Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2008-03-03 14:58:39 UTC (rev 5101) +++ branches/rewrite/pywikibot/data/api.py 2008-03-04 18:13:32 UTC (rev 5102) @@ -26,7 +26,7 @@ lagpattern = re.compile(r"Waiting for [\d.]+: (?P<lag>\d+) seconds? lagged")
-class APIError(Exception): +class APIError(pywikibot.Error): """The wiki site returned an error message.""" def __init__(self, code, info, **kwargs): """Save error dict returned by MW API.""" @@ -39,7 +39,7 @@ return "%(code)s: %(info)s" % self.__dict__
-class TimeoutError(Exception): +class TimeoutError(pywikibot.Error): pass
Modified: branches/rewrite/pywikibot/families/wikipedia_family.py =================================================================== --- branches/rewrite/pywikibot/families/wikipedia_family.py 2008-03-03 14:58:39 UTC (rev 5101) +++ branches/rewrite/pywikibot/families/wikipedia_family.py 2008-03-04 18:13:32 UTC (rev 5102) @@ -773,7 +773,7 @@ def get_known_families(self, site): # In Swedish Wikipedia 's:' is part of page title not a family # prefix for 'wikisource'. - if site.lang == 'sv': + if site.language() == 'sv': d = self.known_families.copy() d.pop('s') ; d['src'] = 'wikisource' return d
Modified: branches/rewrite/pywikibot/login.py =================================================================== --- branches/rewrite/pywikibot/login.py 2008-03-03 14:58:39 UTC (rev 5101) +++ branches/rewrite/pywikibot/login.py 2008-03-04 18:13:32 UTC (rev 5102) @@ -48,7 +48,6 @@ import urllib2 import config import pywikibot -from pywikibot import Page from pywikibot.exceptions import *
# On some wikis you are only allowed to run a bot if there is a link to @@ -91,7 +90,7 @@ return True # DEBUG if botList.has_key(self.site.family.name) and botList[self.site.family.name].has_key(self.site.language()): botListPageTitle = botList[self.site.family.name][self.site.language()] - botListPage = Page(self.site, botListPageTitle) + botListPage = pywikibot.Page(self.site, botListPageTitle) for linkedPage in botListPage.linkedPages(): if linkedPage.titleWithoutNamespace() == self.username: return True
Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2008-03-03 14:58:39 UTC (rev 5101) +++ branches/rewrite/pywikibot/page.py 2008-03-04 18:13:32 UTC (rev 5102) @@ -10,7 +10,7 @@ __version__ = '$Id: $'
import pywikibot -from pywikibot.exceptions import * +import pywikibot.site
import htmlentitydefs import logging @@ -28,109 +28,84 @@ reading from or writing to the wiki. All other methods are delegated to the Site object.
- Methods available: - - site: The wiki this page is in - - title: The name of the page, with various presentation options - - namespace: The namespace in which the page is found - - section: The section of the page (the part of the title after '#', if - any) - - isAutoTitle: Title can be translated using the autoFormat method - - autoFormat: Auto-format certain dates and other standard format page - titles - - isCategory: True if the page is a category - - isDisambig (*): True if the page is a disambiguation page - - isImage: True if the page is an image - - isRedirectPage (*): True if the page is a redirect, false otherwise - - getRedirectTarget (*): The page the page redirects to - - isTalkPage: True if the page is in any "talk" namespace - - toggleTalkPage: Return the talk page (if this is one, return the - non-talk page) - - get (*): The text of the page - - latestRevision (*): The page's current revision id - - userName: Last user to edit page - - isIpEdit: True if last editor was unregistered - - editTime: Timestamp of the last revision to the page - - previousRevision (*): The revision id of the previous version - - permalink (*): The url of the permalink of the current version - - getOldVersion(id) (*): The text of a previous version of the page - - getVersionHistory: Load the version history information from wiki - - getVersionHistoryTable: Create a wiki table from the history data - - fullVersionHistory: Return all past versions including wikitext - - contributingUsers: Return set of users who have edited page - - exists (*): True if the page actually exists, false otherwise - - isEmpty (*): True if the page has 4 characters or less content, not - counting interwiki and category links - - interwiki (*): The interwiki links from the page (list of Pages) - - categories (*): The categories the page is in (list of Pages) - - linkedPages (*): The normal pages linked from the page (list of - Pages) - - imagelinks (*): The pictures on the page (list of ImagePages) - - templates (*): All templates referenced on the page (list of Pages) - - templatesWithParams(*): All templates on the page, with list of - parameters - - isDisambig (*): True if the page is a disambiguation page - - getReferences: List of pages linking to the page - - canBeEdited (*): True if page is unprotected or user has edit - privileges - - botMayEdit (*): True if bot is allowed to edit page - - put(newtext): Saves the page - - put_async(newtext): Queues the page to be saved asynchronously - - move: Move the page to another title - - delete: Deletes the page (requires being logged in) - - protect: Protect or unprotect a page (requires sysop status) - - removeImage: Remove all instances of an image from this page - - replaceImage: Replace all instances of an image with another - - loadDeletedRevisions: Load all deleted versions of this page - - getDeletedRevision: Return a particular deleted revision - - markDeletedRevision: Mark a version to be undeleted, or not - - undelete: Undelete past version(s) of the page + """ + def __init__(self, source, title=u"", ns=0, insite=None, + defaultNamespace=None): + """Instantiate a Page object.
- Deprecated methods (preserved for backwards-compatibility): - - urlname: Title, in a form suitable for a URL - - titleWithoutNamespace: Title, with the namespace part removed - - sectionFreeTitle: Title, without the section part - - aslink: Title in the form [[Title]] or [[lang:Title]] - - encoding: The encoding of the page + Three calling formats are supported:
- (*) This loads the page if it has not been loaded before; permalink might - even reload it if it has been loaded before + - If the first argument is a Page, create a copy of that object. + This can be used to convert an existing Page into a subclass + object, such as Category or ImagePage. + - If the first argument is a Site, create a Page on that Site + using the second argument as the title (may include a section), + and the third as the namespace number. The namespace number is + mandatory, even if the title includes the namespace prefix. This + is the preferred syntax when using an already-normalized title + obtained from api.php or a database dump. WARNING: may produce + invalid objects if page title isn't in normal form! + - If the first argument is a Link, create a Page from that link. + This is the preferred syntax when using a title scraped from + wikitext, URLs, or another non-normalized source.
- """ - def __init__(self, site, title, insite=None, - defaultNamespace=0): - """Parameters: - - @param site: the wikimedia Site on which the page resides - @param title: title of the page + @param source: the source of the page + @type source: Link, Page (or subclass), or Site + @param title: normalized title of the page; required if source is a + Site, ignored otherwise @type title: unicode - @param insite: (optional) a wikimedia Site where this link was found - (to help decode interwiki links) - @param defaultNamespace: (optional) A namespace to use if the link - does not contain one - @type defaultNamespace: int + @param ns: namespace number; required if source is a Site, ignored + otherwise + @type ns: int + @param insite: DEPRECATED (use Link instead) + @param defaultNamespace: DEPRECATED (use Link instead)
""" - if site == None: - self._site = pywikibot.Site() - elif isinstance(site, basestring): - self._site = pywikibot.Site(site) + if insite is not None: + logging.debug( + "The 'insite' option in Page constructor is deprecated.") + if defaultNamespace is not None: + logging.debug( + "The 'defaultNamespace' option in Page constructor is deprecated.") + if isinstance(source, pywikibot.site.BaseSite): + self._site = source + if ns not in source.namespaces(): + raise pywikibot.Error( + "Invalid namespace '%i' for site %s." + % (ns, source.sitename())) + self._ns = ns + if ns and not title.startswith(source.namespace(ns)+u":"): + title = source.namespace(ns) + u":" + title + elif not ns and u":" in title: + nsindex = source.getNamespaceIndex(title[ :title.index(u":")]) + if nsindex: + self._ns = nsindex + if u"#" in title: + title, self._section = title.split(u"#", 1) + else: + self._section = None + if not title: + raise pywikibot.Error( + "Page object cannot be created from Site without title.") + self._title = title + elif isinstance(source, Page): + # copy all of source's attributes to this object + self.__dict__ = source.__dict__ + elif isinstance(source, Link): + self._site = link.site + self._section = link.section + self._ns = link.namespace + self._title = link.title + # reassemble the canonical title from components + if self._ns: + self._title = "%s:%s" % (self.site().namespace(self._ns), + self._title) else: - self._site = site - - if not insite: insite = self._site - - # parse the title - # this can throw various exceptions if the title is invalid - link = Link(title, insite, defaultNamespace) - self._site = link.site - self._section = link.section - self._ns = link.namespace - self._title = link.title - # reassemble the canonical title from components + raise pywikibot.Error( + "Invalid argument type '%s' in Page constructor: %s" + % (type(source), source)) if self._section is not None: self._title = self._title + "#" + self._section - if self._ns: - self._title = self.site().namespace(self._ns) + ":" + self._title self._revisions = {}
def site(self): @@ -138,14 +113,7 @@ return self._site
def namespace(self): - """Return the number of the namespace of the page. - - Only recognizes those namespaces defined in family.py. - If not defined, it will return 0 (the main namespace). - - @return: int - - """ + """Return the number of the namespace of the page.""" return self._ns
def title(self, underscore=False, savetitle=False, withNamespace=True, @@ -186,13 +154,12 @@ allowInterwiki and self.site() != pywikibot.Site()): if self.site().family() != pywikibot.Site().family() \ and self.site().family().name != self.site().language(): -# FIXME: Interwiki links shouldn't be fully urlencoded return u'[[%s:%s:%s]]' % (self.site().family().name, self.site().language(), - self.title(asUrl=True)) + self._title) else: return u'[[%s:%s]]' % (self.site().language(), - self.title(asUrl=True)) + self._title) elif textlink and (self.isImage() or self.isCategory()): return u'[[:%s]]' % title else: @@ -225,7 +192,12 @@ return u"%s(%s)" % (self.__class__.__name__, self.title())
def __cmp__(self, other): - """Test for equality and inequality of Page objects""" + """Test for equality and inequality of Page objects. + + Page objects are "equal" if and only if they are on the same site + and have the same normalized title, including section if any. + + """ if not isinstance(other, Page): # especially, return -1 if other is None return -1 @@ -302,7 +274,7 @@ else: # Make sure we re-raise an exception we got on an earlier attempt if hasattr(self, '_redirarg') and not get_redirect: - raise IsRedirectPage, self._redirarg + raise pywikibot.IsRedirectPage, self._redirarg elif hasattr(self, '_getexception'): raise self._getexception if force or not hasattr(self, "_revid") \ @@ -517,7 +489,9 @@ return True try: templates = self.templatesWithParams(); - except (NoPage, IsRedirectPage, SectionError): + except (pywikibot.NoPage, + pywikibot.IsRedirectPage, + pywikibot.SectionError): return True for template in templates: title = template[0].title(withNamespace=False) @@ -980,8 +954,8 @@ usingPages : Iterate Pages on which the image is displayed.
""" - def __init__(self, site, title, insite = None): - Page.__init__(self, site, title, insite, defaultNamespace=6) + def __init__(self, source, title=u"", insite=None): + Page.__init__(self, source, title, 6) if self.namespace() != 6: raise ValueError(u"'%s' is not in the image namespace!" % title)
@@ -1065,17 +1039,16 @@ class Category(Page): """A page in the Category: namespace"""
- def __init__(self, site, title, insite=None, sortKey=None): + def __init__(self, source, title, insite=None, sortKey=None): """All parameters are the same as for Page() constructor, except:
@param sortKey: DEPRECATED (use .aslink() method instead)
""" - Page.__init__(self, site=site, title=title, insite=insite, - defaultNamespace=14) if sortKey is not None: logging.debug( "The 'sortKey' option in Category constructor is deprecated.") + Page.__init__(self, source, title, 14) if self.namespace() != 14: raise ValueError(u"'%s' is not in the category namespace!" % title) @@ -1358,78 +1331,70 @@ # This code was adapted from Title.php : secureAndSplit() # if u'\ufffd' in t: - raise Error("Title contains illegal char (\uFFFD)") + raise pywikibot.Error("Title contains illegal char (\uFFFD)") self.namespace = defaultNamespace
# Replace underscores by spaces - t = t.replace(u'_', u' ') + t = t.replace(u"_", u" ") # replace multiple spaces and underscores with a single space while u" " in t: t = t.replace(u" ", u" ") # Strip spaces at both ends - t = t.strip() + t = t.strip(" ") # Remove left-to-right and right-to-left markers. - t = t.replace(u'\u200e', u'').replace(u'\u200f', u'') + t = t.replace(u"\u200e", u"").replace(u"\u200f", u"")
- # Initial colon indicates main namespace rather than specified default - if t.startswith(u':'): - self.namespace = 0 - # remove the colon but continue processing - # remove any subsequent whitespace - t = t[1:].strip() - - # Namespace or interwiki prefix firstPass = True - while True: - fam = self.site.family + while u":" in t: + # Initial colon indicates main namespace rather than default + if t.startswith(u":"): + self.namespace = 0 + # remove the colon but continue processing + # remove any subsequent whitespace + t = t.lstrip(u":").lstrip(u" ") + continue
- m = Link.namespace_pattern.match(t) - if m: - pre = m.group(1).lower() - ns = self.site.getNamespaceIndex(pre) - if ns: - # Ordinary namespace - t = m.group(2) - self.namespace = ns - elif pre in fam.langs.keys()\ - or pre in fam.get_known_families(site=self.site): - - if not firstPass: - # Can't make a local interwiki link to an interwiki link. - # That's just crazy! - raise Error("Improperly formatted interwiki link '%s'" - % text) - - # Interwiki link - t = m.group(2) - if pre in fam.langs.keys(): - newsite = pywikibot.Site(pre, fam) - else: - otherlang = self.site.lang - familyName = fam.get_known_families(site=self.site)[pre] - if familyName in ['commons', 'meta']: - otherlang = familyName - try: - newsite = pywikibot.Site(otherlang, familyName) - except ValueError: - raise Error("""\ + fam = self.site.family() + prefix = t[ :t.index(u":")].lower() + ns = self.site.getNamespaceIndex(prefix) + if ns: + # Ordinary namespace + t = t[t.index(u":"): ].lstrip(u":").lstrip(u" ") + self.namespace = ns + break + if prefix in fam.langs.keys()\ + or prefix in fam.get_known_families(site=self.site): + # looks like an interwiki link + if not firstPass: + # Can't make a local interwiki link to an interwiki link. + # That's just crazy! + raise pywikibot.Error( + "Improperly formatted interwiki link '%s'" + % text) + t = t[t.index(u":"): ].lstrip(u":").lstrip(u" ") + if prefix in fam.langs.keys(): + newsite = pywikibot.Site(pre, fam) + else: + otherlang = self.site.lang + familyName = fam.get_known_families(site=self.site)[pre] + if familyName in ['commons', 'meta']: + otherlang = familyName + try: + newsite = pywikibot.Site(otherlang, familyName) + except ValueError: + raise pywikibot.Error("""\ %s is not a local page on %s, and the %s family is not supported by PyWikiBot!""" - % (title, self.site(), familyName)) + % (title, self.site(), familyName))
- # Redundant interwiki prefix to the local wiki - if newsite == self.site: - if not t: - # Can't have an empty self-link - raise Error("Invalid link title: '%s'" % text) - firstPass = False - continue - self.site = newsite - # If there's an initial colon after the interwiki, that also - # resets the default namespace - if t.startswith(":"): - self.namespace = 0 - t = t[1:] - break + # Redundant interwiki prefix to the local wiki + if newsite == self.site: + if not t: + # Can't have an empty self-link + raise pywikibot.Error( + "Invalid link title: '%s'" % text) + firstPass = False + continue + self.site = newsite
if u"#" in t: t, sec = t.split(u'#', 1) @@ -1438,8 +1403,10 @@ self.section = None
# Reject illegal characters. - if Link.illegal_titles_pattern.search(t): - raise Error("Invalid title (contains illegal char(s)): '%s'" % text) + m = Link.illegal_titles_pattern.search(t) + if m: + raise pywikibot.Error( + "Invalid title: contains illegal char(s) '%s'" % m.group(0))
# Pages with "/./" or "/../" appearing in the URLs will # often be unreachable due to the way web browsers deal @@ -1454,15 +1421,16 @@ or t.endswith(u"/.") or t.endswith(u"/..") ): - raise Error("Invalid title (contains . / combinations): '%s'" + raise pywikibot.Error( + "Invalid title (contains . / combinations): '%s'" % text)
# Magic tilde sequences? Nu-uh! if u"~~~" in t: - raise Error("Invalid title (contains ~~~): '%s'" % text) + raise pywikibot.Error("Invalid title (contains ~~~): '%s'" % text)
if self.namespace != -1 and len(t) > 255: - raise Error("Invalid title (over 255 bytes): '%s'" % t) + raise pywikibot.Error("Invalid title (over 255 bytes): '%s'" % t)
if self.site.case() == 'first-letter': t = t[:1].upper() + t[1:]
Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2008-03-03 14:58:39 UTC (rev 5101) +++ branches/rewrite/pywikibot/site.py 2008-03-04 18:13:32 UTC (rev 5102) @@ -106,15 +106,14 @@ return self._username return None
- def __getattr__(self, attr): + def __getattr__(self, attr, *args, **kwargs): """Calls to methods not defined in this object are passed to Family.""" try: method = getattr(self.family(), attr) - return lambda self=self: method(self.language()) + return lambda self=self: method(self.language(), *args, **kwargs) except AttributeError: raise AttributeError("%s instance has no attribute '%s'" - % (self.__class__.__name__, attr) - ) + % (self.__class__.__name__, attr) )
def sitename(self): """Return string representing this Site's name and language.""" @@ -135,9 +134,18 @@
def getNamespaceIndex(self, namespace): """Given a namespace name, return its int index, or None if invalid.""" - return self.family().getNamespaceIndex(self.language(), namespace) + if self.case() == "first-letter": + namespace = namespace[:1].upper() + namespace[1:] + for ns in self._namespaces: + if namespace in self._namespaces[ns]: + return ns + return None
+ def namespaces(self): + """Return dict of valid namespaces on this wiki.""" + return self._namespaces
+ class APISite(BaseSite): """API interface to MediaWiki site.
@@ -158,7 +166,6 @@ ## postForm: Post form data to an address at this site. ## postData: Post encoded form data to an http address at this site. ## -## namespace(num): Return local name of namespace 'num'. ## normalizeNamespace(value): Return preferred name for namespace 'value' in ## this Site's language. ## namespaces: Return list of canonical namespace names for this Site. @@ -308,8 +315,9 @@ 14: [u"Category"], 15: [u"Category talk"], } + self.getsiteinfo() return -# START HERE +# ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__() self._mediawiki_messages = {} self.nocapitalize = self._lang in self.family().nocapitalize self._userData = [False, False] @@ -451,7 +459,7 @@ """ return self._namespaces[num][0]
- +#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) #### class NotImplementedYet:
def isBlocked(self, sysop = False):