Revision: 5885
Author: russblau
Date: 2008-09-10 21:27:40 +0000 (Wed, 10 Sep 2008)
Log Message:
-----------
implement several more site methods
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-09-10 20:40:55 UTC (rev 5884)
+++ branches/rewrite/pywikibot/site.py 2008-09-10 21:27:40 UTC (rev 5885)
@@ -71,6 +71,7 @@
"""Site methods that are independent of the communication interface."""
# to implement a specific interface, define a Site class that inherits
# from this
+
def __init__(self, code, fam=None, user=None):
"""
@param code: the site's language code
@@ -131,6 +132,14 @@
"""The identifying code for this Site."""
return self.__code
+ def __cmp__(self, other):
+ """Perform equality and inequality tests on Site objects."""
+ if not isinstance(other, Site):
+ return 1
+ if self.family == other.family:
+ return cmp(self.code, other.code)
+ return cmp(self.family.name, other.family.name)
+
def user(self):
"""Return the currently-logged in bot user, or None."""
if self.logged_in():
@@ -206,12 +215,7 @@
"""
if default:
- if self.language() == 'ar':
- # It won't work with REDIRECT[[]] but it work with the local,
- # if problems, try to find a work around. FixMe!
- return self.family.redirect.get(self.code, [u"تحويل"])[0]
- else:
- return self.family.redirect.get(self.code, [u"REDIRECT"])[0]
+ return self.family.redirect.get(self.code, [u"REDIRECT"])[0]
else:
return self.family.redirect.get(self.code, None)
@@ -250,7 +254,102 @@
finally:
self._pagemutex.release()
+ def disambcategory(self):
+ """Return Category in which disambig pages are listed."""
+
+ try:
+ name = self.namespace(14)+':'+self.family.disambcatname[self.code])
+ except KeyError:
+ raise Error(u"No disambiguation category name found for %(site)s"
+ % {'site': self})
+ return pywikibot.Category(pywikibot.Link(name, self))
+ def linkto(self, title, othersite = None):
+ """Return unicode string in the form of a wikilink to 'title'
+
+ Use optional Site argument 'othersite' to generate an interwiki link.
+
+ """
+ # TODO convert to Link method, deprecate
+ if othersite and othersite.code != self.code:
+ return u'[[%s:%s]]' % (self.code, title)
+ else:
+ return u'[[%s]]' % title
+
+ def isInterwikiLink(self, s):
+ """Return True if s is in the form of an interwiki link.
+
+ Interwiki links have the form "foo:bar" or ":foo:bar" where foo is a
+ known language code or family. Called recursively if the first part
+ of the link refers to this site's own family and/or language. Do
+ not include brackets around the link!
+
+ """
+ # TODO: convert to Link method
+ s = s.strip().lstrip(":")
+ if not ':' in s:
+ return False
+ first, rest = s.split(':',1)
+ # interwiki codes are case-insensitive
+ first = first.lower().strip()
+ # commons: forwards interlanguage links to wikipedia:, etc.
+ if self.family.interwiki_forward:
+ interlangTargetFamily = pywikibot.Family(self.family.interwiki_forward)
+ else:
+ interlangTargetFamily = self.family
+ if self.ns_index(first):
+ return False
+ if first in interlangTargetFamily.langs:
+ if first == self.code:
+ return self.isInterwikiLink(rest)
+ else:
+ return True
+ if first in self.family.get_known_families(site = self):
+ if first == self.family.name:
+ return self.isInterwikiLink(rest)
+ else:
+ return True
+ return False
+
+ def redirectRegex(self):
+ """Return a compiled regular expression matching on redirect pages.
+
+ Group 1 in the regex match object will be the target title.
+
+ """
+ #TODO: is this needed, since the API identifies redirects?
+ # (maybe, the API can give false positives)
+ default = 'REDIRECT'
+ try:
+ keywords = set(self.family.redirect[self.code])
+ keywords.add(default)
+ pattern = r'(?:' + '|'.join(keywords) + ')'
+ except KeyError:
+ # no localized keyword for redirects
+ pattern = r'%s' % default
+ # A redirect starts with hash (#), followed by a keyword, then
+ # arbitrary stuff, then a wikilink. The wikilink may contain
+ # a label, although this is not useful.
+ return re.compile(r'\s*#%(pattern)s\s*:?\s*\[\[(.+?)(?:\|.*?)?\]\]'
+ % locals(),
+ re.IGNORECASE | re.UNICODE | re.DOTALL)
+
+ # site-specific formatting preferences
+
+ def category_on_one_line(self):
+ """Return True if this site wants all category links on one line."""
+ return self.code in self.family.category_on_one_line
+
+ def interwiki_putfirst(self):
+ """Return list of language codes for ordering of interwiki links."""
+ return self.family.interwiki_putfirst.get(self.code, None)
+
+ def getSite(self, code):
+ """Return Site object for language 'code' in this Family."""
+
+ return pywikibot.Site(code=code, fam=self.family, user=self.user)
+
+
class APISite(BaseSite):
"""API interface to MediaWiki site.
@@ -301,9 +400,6 @@
## (note, some methods yield other information in a tuple along with the
## Pages; see method docs for details) --
##
-## search(query): query results from Special:Search
-## allpages(): Special:Allpages
-## prefixindex(): Special:Prefixindex
## newpages(): Special:Newpages
## newimages(): Special:Log&type=upload
## longpages(): Special:Longpages
@@ -397,12 +493,16 @@
self._getsiteinfo()
# check whether a login cookie already exists for this user
if hasattr(self, "_userinfo"):
- if sysop:
- name = config.sysopnames[self.family.name][self.code]
- else:
- name = config.usernames[self.family.name][self.code]
- if self._userinfo['name'] == name:
- self._username = name
+ try:
+ if sysop:
+ name = config.sysopnames[self.family.name][self.code]
+ else:
+ name = config.usernames[self.family.name][self.code]
+ if self._userinfo['name'] == name:
+ self._username = name
+ except KeyError:
+ # no username for this site
+ pass
if not self.logged_in(sysop):
loginMan = api.LoginManager(site=self, sysop=sysop)
if loginMan.login(retry = True):
@@ -607,6 +707,20 @@
return self.namespaces()[num]
return self.namespaces()[num][0]
+ def live_version(self):
+ """Return the 'real' version number found on [[Special:Version]]
+
+ Return value is a tuple (int, int, str) of the major and minor
+ version numbers and any other text contained in the version.
+
+ """
+ versionstring = self.siteinfo['generator']
+ m = re.match(r"^MediaWiki ([0-9]+)\.([0-9]+)(.*)$", versionstring)
+ if m:
+ return (int(m.group(1)), int(m.group(2)), m.group(3))
+ else:
+ return None
+
def loadpageinfo(self, page):
"""Load page info from api and save in page attributes"""
title = page.title(withSection=False)
@@ -821,16 +935,16 @@
def pagereferences(self, page, followRedirects=False, filterRedirects=None,
withTemplateInclusion=True, onlyTemplateInclusion=False):
"""Convenience method combining pagebacklinks and page_embeddedin."""
- #TODO Warn about deprecated arguments
+
if onlyTemplateInclusion:
return self.page_embeddedin(page)
if not withTemplateInclusion:
return self.pagebacklinks(page, followRedirects)
import itertools
- return itertools.chain(self.pagebacklinks(
- page, followRedirects, filterRedirects),
- self.page_embeddedin(page, filterRedirects)
- )
+ return itertools.chain(
+ self.pagebacklinks(page, followRedirects, filterRedirects),
+ self.page_embeddedin(page, filterRedirects)
+ )
def pagelinks(self, page, namespaces=None, follow_redirects=False):
"""Iterate internal wikilinks contained (or transcluded) on page.
@@ -854,10 +968,13 @@
for ns in namespaces)
return plgen
- def pagecategories(self, page, withSortKey=False):
+ def pagecategories(self, page, withSortKey=None):
"""Iterate categories to which page belongs."""
- # Sortkey doesn't work with generator; FIXME or deprecate
+ # Sortkey doesn't work with generator; deprecate
+ if withSortKey is not None:
+ logger.debug(
+ "site.pagecategories(): withSortKey option is deprecated")
clgen = api.CategoryPageGenerator("categories", site=self)
if hasattr(page, "_pageid"):
clgen.request['pageids'] = str(page._pageid)
@@ -868,12 +985,14 @@
def pageimages(self, page):
"""Iterate images used (not just linked) on the page."""
+
imtitle = page.title(withSection=False).encode(self.encoding())
imgen = api.ImagePageGenerator("images", titles=imtitle, site=self)
return imgen
def pagetemplates(self, page, namespaces=None):
"""Iterate templates transcluded (not just linked) on the page."""
+
tltitle = page.title(withSection=False).encode(self.encoding())
tlgen = api.PageGenerator("templates", titles=tltitle, site=self)
if namespaces is not None:
@@ -2114,8 +2233,7 @@
# TODO: implement undelete
-
-#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
+#### METHODS NOT IMPLEMENTED YET ####
class NotImplementedYet:
# TODO: is this needed any more? can it be obtained from the http module?
@@ -2503,101 +2621,8 @@
cache.append(title)
yield Page(self, title)
- def linkto(self, title, othersite = None):
- """Return unicode string in the form of a wikilink to 'title'
-
- Use optional Site argument 'othersite' to generate an interwiki link.
-
- """
- if othersite and othersite.code != self.code:
- return u'[[%s:%s]]' % (self.code, title)
- else:
- return u'[[%s]]' % title
-
- def isInterwikiLink(self, s):
- """Return True if s is in the form of an interwiki link.
-
- Interwiki links have the form "foo:bar" or ":foo:bar" where foo is a
- known language code or family. Called recursively if the first part
- of the link refers to this site's own family and/or language.
-
- """
- s = s.strip().lstrip(":")
- if not ':' in s:
- return False
- first, rest = s.split(':',1)
- # interwiki codes are case-insensitive
- first = first.lower().strip()
- # commons: forwards interlanguage links to wikipedia:, etc.
- if self.family.interwiki_forward:
- interlangTargetFamily = Family(self.family.interwiki_forward)
- else:
- interlangTargetFamily = self.family
- if self.ns_index(first):
- return False
- if first in interlangTargetFamily.langs:
- if first == self.code:
- return self.isInterwikiLink(rest)
- else:
- return True
- if first in self.family.get_known_families(site = self):
- if first == self.family.name:
- return self.isInterwikiLink(rest)
- else:
- return True
- return False
-
- def redirectRegex(self):
- """Return a compiled regular expression matching on redirect pages.
-
- Group 1 in the regex match object will be the target title.
-
- """
- redDefault = 'redirect'
- red = 'redirect'
- if self.language() == 'ar':
- red = u"تحويل"
- try:
- if redDefault == red:
- redirKeywords = [red] + self.family.redirect[self.code]
- redirKeywordsR = r'(?:' + '|'.join(redirKeywords) + ')'
- else:
- redirKeywords = [red] + self.family.redirect[self.code]
- redirKeywordsR = r'(?:' + redDefault + '|'.join(redirKeywords) + ')'
- except KeyError:
- # no localized keyword for redirects
- if redDefault == red:
- redirKeywordsR = r'%s' % red
- else:
- redirKeywordsR = r'(?:%s|%s)' % (red, redDefault)
- # A redirect starts with hash (#), followed by a keyword, then
- # arbitrary stuff, then a wikilink. The wikilink may contain
- # a label, although this is not useful.
- return re.compile(r'#' + redirKeywordsR +
- '.*?\[\[(.*?)(?:\|.*?)?\]\]',
- re.IGNORECASE | re.UNICODE | re.DOTALL)
-
- def live_version(self):
- """Return the 'real' version number found on [[Special:Version]]
-
- Return value is a tuple (int, int, str) of the major and minor
- version numbers and any other text contained in the version.
-
- """
- global htmldata
- if not hasattr(self, "_mw_version"):
- versionpage = self.getUrl(self.get_address("Special:Version"))
- htmldata = BeautifulSoup(versionpage, convertEntities="html")
- versionstring = htmldata.findAll(text="MediaWiki"
- )[1].parent.nextSibling
- m = re.match(r"^: ([0-9]+)\.([0-9]+)(.*)$", str(versionstring))
- if m:
- self._mw_version = (int(m.group(1)), int(m.group(2)),
- m.group(3))
- else:
- self._mw_version = self.family.version(self.code).split(".")
- return self._mw_version
-
+ # TODO: why should we rely on the family file to contain the correct
+ # encoding?
def checkCharset(self, charset):
"""Warn if charset returned by wiki doesn't match family file."""
if not hasattr(self,'charset'):
@@ -2610,26 +2635,6 @@
"code2encodings has wrong charset for %s. It should be %s, but is %s"
% (repr(self), charset, self.encoding()))
- def shared_image_repository(self):
- """Return a tuple of image repositories used by this site."""
- return self.family.shared_image_repository(self.code)
-
- def __cmp__(self, other):
- """Perform equality and inequality tests on Site objects."""
- if not isinstance(other, Site):
- return 1
- if self.family == other.family:
- return cmp(self.code, other.code)
- return cmp(self.family.name, other.family.name)
-
- def category_on_one_line(self):
- """Return True if this site wants all category links on one line."""
- return self.code in self.family.category_on_one_line
-
- def interwiki_putfirst(self):
- """Return list of language codes for ordering of interwiki links."""
- return self.family.interwiki_putfirst.get(self.code, None)
-
def interwiki_putfirst_doubled(self, list_of_links):
# TODO: is this even needed? No family in the framework has this
# dictionary defined!
@@ -2650,19 +2655,6 @@
else:
return False
- def getSite(self, code):
- """Return Site object for language 'code' in this Family."""
- return getSite(code = code, fam = self.family, user=self.user)
-
def validLanguageLinks(self):
"""Return list of language codes that can be used in interwiki links."""
return self._validlanguages
-
- def disambcategory(self):
- """Return Category in which disambig pages are listed."""
- import catlib
- try:
- return catlib.Category(self,
- self.namespace(14)+':'+self.family.disambcatname[self.code])
- except KeyError:
- raise NoPage(u'No page %s.' % page)