Revision: 5274
Author: russblau
Date: 2008-04-25 19:13:20 +0000 (Fri, 25 Apr 2008)
Log Message:
-----------
more site methods; fix bugs; lazier site login (although I'm still not happy with
this; getting langlinks requires connecting to each wiki server)
Modified Paths:
--------------
branches/rewrite/pywikibot/__init__.py
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/exceptions.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/__init__.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -51,11 +51,6 @@
key = '%s:%s:%s' % (fam, code, user)
if not _sites.has_key(key):
_sites[key] = __Site(code=code, fam=fam, user=user)
- _sites[key].getsiteinfo()
- try:
- _sites[key].login(False)
- except NoUsername:
- pass
return _sites[key]
getSite = Site # alias for backwards-compability
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/data/api.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -223,19 +223,7 @@
# double the next wait, but do not exceed 120 seconds
self.retry_wait = min(120, self.retry_wait * 2)
- def lag_wait(self, lag):
- """Wait due to server lag."""
- # unlike regular wait, this shuts down all access to site
- self.site.sitelock.acquire()
- try:
- # wait at least 5 seconds, no more than 120
- wait = max(5, min(120, lag//2))
- logging.warn("Pausing %s seconds due to server lag." % wait)
- time.sleep(wait)
- finally:
- self.site.sitelock.release()
-
class PageGenerator(object):
"""Iterator for response to a request of type
action=query&generator=foo."""
def __init__(self, generator, **kwargs):
@@ -293,7 +281,6 @@
# FIXME: this won't handle generators with <redirlinks> subelements
# correctly yet
while True:
- self.site.throttle()
self.data = self.request.submit()
if not self.data or not isinstance(self.data, dict):
raise StopIteration
@@ -392,7 +379,6 @@
"""Iterate objects for elements found in
response."""
# this looks for the resultkey ''inside'' a <page> entry
while True:
- self.site.throttle()
self.data = self.request.submit()
if not self.data or not isinstance(self.data, dict):
raise StopIteration
Modified: branches/rewrite/pywikibot/exceptions.py
===================================================================
--- branches/rewrite/pywikibot/exceptions.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/exceptions.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -31,6 +31,15 @@
class IsNotRedirectPage(Error):
"""Page is not a redirect page"""
+class CircularRedirect(Error):
+ """Page is a circular redirect
+
+ Exception argument is the redirect target; this may be the same title
+ as this page or a different title (in which case the target page directly
+ or indirectly redirects back to this one)
+
+ """
+
class LockedPage(Error):
"""Page is locked"""
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/page.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -660,7 +660,11 @@
exception. This method also can raise a NoPage exception.
"""
- return self.site().follow_redirect(self)
+ if not self.isRedirectPage():
+ raise pywikibot.IsNotRedirectPage
+ if not isinstance(self._redir, Page):
+ self.site().getredirtarget(self)
+ return self._redir
def getVersionHistory(self, forceReload=False, reverseOrder=False,
getAll=False, revCount=500):
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/site.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -103,7 +103,17 @@
self._pagemutex = threading.Lock()
self._locked_pages = []
- self.throttle = Throttle(self, multiplydelay=True, verbosedelay=True)
+ @property
+ def throttle(self):
+ """Return this Site's throttle. Initialize a new one if
needed."""
+ if not hasattr(self, "_throttle"):
+ self._throttle = Throttle(self, multiplydelay=True, verbosedelay=True)
+ self.getsiteinfo()
+ try:
+ self.login(False)
+ except pywikibot.NoUsername:
+ pass
+ return self._throttle
def family(self):
"""Return the associated Family object."""
@@ -124,6 +134,8 @@
def __getattr__(self, attr):
"""Calls to methods not defined in this object are passed to
Family."""
+ if hasattr(self.__class__, attr):
+ return self.__class__.attr
try:
method = getattr(self.family(), attr)
f = lambda *args, **kwargs: \
@@ -509,7 +521,42 @@
if not hasattr(page, "_redir"):
self.getpageinfo(page)
return bool(page._redir)
-
+
+ def getredirtarget(self, page):
+ """Return Page object for the redirect target of
page."""
+ if not hasattr(page, "_redir"):
+ self.getpageinfo(page)
+ if not page._redir:
+ raise pywikibot.IsNotRedirectPage
+ title = page.title(withSection=False)
+ query = api.Request(site=self, action="query",
property="info",
+ inprop="protection|talkid|subjectid",
+ titles=title.encode(self.encoding()),
+ redirects="")
+ result = query.submit()
+ if "query" not in result or "redirects" not in
result["query"]:
+ raise RuntimeError(
+ "getredirtarget: No 'redirects' found for page %s."
+ % title)
+ redirmap = dict((item['from'], item['to'])
+ for item in result['query']['redirects'])
+ if title not in redirmap:
+ raise RuntimeError(
+ "getredirtarget: 'redirects' contains no key for page
%s."
+ % title)
+ if "pages" not in result['query']:
+ # no "pages" element indicates a circular redirect
+ raise pywikibot.CircularRedirect(redirmap[title])
+ for pagedata in result['query']['pages'].values():
+ # there should be only one value in 'pages', and it is the target
+ if pagedata['title'] not in redirmap.values():
+ raise RuntimeError(
+ "getredirtarget: target page '%s' not found in
'redirects'"
+ % pagedata['title'])
+ target = pywikibot.Page(self, pagedata['title'],
pagedata['ns'])
+ api.update_page(target, pagedata)
+ page._redir = target
+
# following group of methods map more-or-less directly to API queries
def getbacklinks(self, page, followRedirects=False, filterRedirects=None,
@@ -783,7 +830,22 @@
yield pywikibot.Link(linkdata['*'],
source=pywikibot.Site(linkdata['lang']))
+ def getextlinks(self, page):
+ """Iterate all external links on page, yielding URL
strings."""
+ eltitle = page.title(withSection=False)
+ elquery = api.PropertyGenerator("extlinks",
+ titles=eltitle.encode(self.encoding())
+ )
+ for pageitem in elquery:
+ if pageitem['title'] != eltitle:
+ raise RuntimeError(
+ "getlanglinks: Query on %s returned data on '%s'"
+ % (page, pageitem['title']))
+ for linkdata in pageitem['extlinks']:
+ yield linkdata['*']
+
+
#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
class NotImplementedYet: