Revision: 5274 Author: russblau Date: 2008-04-25 19:13:20 +0000 (Fri, 25 Apr 2008)
Log Message: ----------- more site methods; fix bugs; lazier site login (although I'm still not happy with this; getting langlinks requires connecting to each wiki server)
Modified Paths: -------------- branches/rewrite/pywikibot/__init__.py branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/exceptions.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/__init__.py =================================================================== --- branches/rewrite/pywikibot/__init__.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/__init__.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -51,11 +51,6 @@ key = '%s:%s:%s' % (fam, code, user) if not _sites.has_key(key): _sites[key] = __Site(code=code, fam=fam, user=user) - _sites[key].getsiteinfo() - try: - _sites[key].login(False) - except NoUsername: - pass return _sites[key]
getSite = Site # alias for backwards-compability
Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/data/api.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -223,19 +223,7 @@ # double the next wait, but do not exceed 120 seconds self.retry_wait = min(120, self.retry_wait * 2)
- def lag_wait(self, lag): - """Wait due to server lag.""" - # unlike regular wait, this shuts down all access to site - self.site.sitelock.acquire() - try: - # wait at least 5 seconds, no more than 120 - wait = max(5, min(120, lag//2)) - logging.warn("Pausing %s seconds due to server lag." % wait) - time.sleep(wait) - finally: - self.site.sitelock.release()
- class PageGenerator(object): """Iterator for response to a request of type action=query&generator=foo.""" def __init__(self, generator, **kwargs): @@ -293,7 +281,6 @@ # FIXME: this won't handle generators with <redirlinks> subelements # correctly yet while True: - self.site.throttle() self.data = self.request.submit() if not self.data or not isinstance(self.data, dict): raise StopIteration @@ -392,7 +379,6 @@ """Iterate objects for elements found in response.""" # this looks for the resultkey ''inside'' a <page> entry while True: - self.site.throttle() self.data = self.request.submit() if not self.data or not isinstance(self.data, dict): raise StopIteration
Modified: branches/rewrite/pywikibot/exceptions.py =================================================================== --- branches/rewrite/pywikibot/exceptions.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/exceptions.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -31,6 +31,15 @@ class IsNotRedirectPage(Error): """Page is not a redirect page"""
+class CircularRedirect(Error): + """Page is a circular redirect + + Exception argument is the redirect target; this may be the same title + as this page or a different title (in which case the target page directly + or indirectly redirects back to this one) + + """ + class LockedPage(Error): """Page is locked"""
Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/page.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -660,7 +660,11 @@ exception. This method also can raise a NoPage exception.
""" - return self.site().follow_redirect(self) + if not self.isRedirectPage(): + raise pywikibot.IsNotRedirectPage + if not isinstance(self._redir, Page): + self.site().getredirtarget(self) + return self._redir
def getVersionHistory(self, forceReload=False, reverseOrder=False, getAll=False, revCount=500):
Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/site.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -103,7 +103,17 @@ self._pagemutex = threading.Lock() self._locked_pages = []
- self.throttle = Throttle(self, multiplydelay=True, verbosedelay=True) + @property + def throttle(self): + """Return this Site's throttle. Initialize a new one if needed.""" + if not hasattr(self, "_throttle"): + self._throttle = Throttle(self, multiplydelay=True, verbosedelay=True) + self.getsiteinfo() + try: + self.login(False) + except pywikibot.NoUsername: + pass + return self._throttle
def family(self): """Return the associated Family object.""" @@ -124,6 +134,8 @@
def __getattr__(self, attr): """Calls to methods not defined in this object are passed to Family.""" + if hasattr(self.__class__, attr): + return self.__class__.attr try: method = getattr(self.family(), attr) f = lambda *args, **kwargs: \ @@ -509,7 +521,42 @@ if not hasattr(page, "_redir"): self.getpageinfo(page) return bool(page._redir) - + + def getredirtarget(self, page): + """Return Page object for the redirect target of page.""" + if not hasattr(page, "_redir"): + self.getpageinfo(page) + if not page._redir: + raise pywikibot.IsNotRedirectPage + title = page.title(withSection=False) + query = api.Request(site=self, action="query", property="info", + inprop="protection|talkid|subjectid", + titles=title.encode(self.encoding()), + redirects="") + result = query.submit() + if "query" not in result or "redirects" not in result["query"]: + raise RuntimeError( + "getredirtarget: No 'redirects' found for page %s." + % title) + redirmap = dict((item['from'], item['to']) + for item in result['query']['redirects']) + if title not in redirmap: + raise RuntimeError( + "getredirtarget: 'redirects' contains no key for page %s." + % title) + if "pages" not in result['query']: + # no "pages" element indicates a circular redirect + raise pywikibot.CircularRedirect(redirmap[title]) + for pagedata in result['query']['pages'].values(): + # there should be only one value in 'pages', and it is the target + if pagedata['title'] not in redirmap.values(): + raise RuntimeError( + "getredirtarget: target page '%s' not found in 'redirects'" + % pagedata['title']) + target = pywikibot.Page(self, pagedata['title'], pagedata['ns']) + api.update_page(target, pagedata) + page._redir = target + # following group of methods map more-or-less directly to API queries
def getbacklinks(self, page, followRedirects=False, filterRedirects=None, @@ -783,7 +830,22 @@ yield pywikibot.Link(linkdata['*'], source=pywikibot.Site(linkdata['lang']))
+ def getextlinks(self, page): + """Iterate all external links on page, yielding URL strings.""" + eltitle = page.title(withSection=False) + elquery = api.PropertyGenerator("extlinks", + titles=eltitle.encode(self.encoding()) + ) + for pageitem in elquery: + if pageitem['title'] != eltitle: + raise RuntimeError( + "getlanglinks: Query on %s returned data on '%s'" + % (page, pageitem['title'])) + for linkdata in pageitem['extlinks']: + yield linkdata['*']
+ + #### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) #### class NotImplementedYet:
pywikipedia-l@lists.wikimedia.org