Revision: 5166 Author: russblau Date: 2008-03-25 15:14:48 +0000 (Tue, 25 Mar 2008)
Log Message: ----------- additional methods and unit tests
Modified Paths: -------------- branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py branches/rewrite/pywikibot/tests/page_tests.py branches/rewrite/pywikibot/throttle.py
Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2008-03-24 22:03:25 UTC (rev 5165) +++ branches/rewrite/pywikibot/data/api.py 2008-03-25 15:14:48 UTC (rev 5166) @@ -91,6 +91,18 @@ self.max_retries = kwargs.pop("max_retries", 25) self.retry_wait = kwargs.pop("retry_wait", 5) self.params = {} + if "action" not in kwargs: + raise ValueError("'action' specification missing from Request.") + if kwargs["action"] == 'query': + if "meta" in kwargs: + if "userinfo" not in kwargs["meta"]: + kwargs["meta"] += "|userinfo" + else: + kwargs["meta"] = "userinfo" + if "uiprop" in kwargs: + kwargs["uiprop"] += "|blockinfo|hasmsg" + else: + kwargs["uiprop"] = "blockinfo|hasmsg" if "format" not in kwargs: self.params["format"] = "json" if "maxlag" not in kwargs: @@ -131,7 +143,6 @@ uri = self.site.scriptpath() + "/api.php" params = urllib.urlencode(self.params) while True: - # TODO wait on errors # TODO catch http errors try: if self.params.get("action", "") in ("login",): @@ -154,7 +165,6 @@ except ValueError: # if the result isn't valid JSON, there must be a server # problem. Wait a few seconds and try again - # TODO: implement a throttle logging.warning( "Non-JSON response received from server %s; the server may be down." % self.site) @@ -168,6 +178,13 @@ "Unable to process query response of type %s." % type(result), {'data': result}) + if self['action'] == 'query': + if 'userinfo' in result.get('query', ()): + if hasattr(self.site, '_userinfo'): + self.site._userinfo.update(result['query']['userinfo']) + else: + self.site._userinfo = result['query']['userinfo'] + if "error" not in result: return result if "*" in result["error"]: @@ -196,14 +213,15 @@ self.max_retries -= 1 if self.max_retries < 0: raise TimeoutError("Maximum retries attempted without success.") - wait = self.retry_wait if lag is not None: - if lag > 2 * self.retry_wait: - wait = min(120, lag // 2) - logging.warn("Waiting %s seconds before retrying." % self.retry_wait) + # in case of database lag, wait half the lag time, + # but not less than 5 or more than 120 seconds + wait = max(5, min(lag // 2, 120)) + logging.warn("Waiting %s seconds before retrying." % wait) time.sleep(wait) - self.retry_wait = min(120, self.retry_wait * 2) + if lag is None: + self.retry_wait = min(120, self.retry_wait * 2)
class PageGenerator(object): @@ -219,11 +237,48 @@ """ if not generator: raise ValueError("generator argument is required.") + if generator not in self.limits: + raise ValueError("Unrecognized generator '%s'" % generator) self.request = Request(action="query", generator=generator, **kwargs) + # set limit to max, if applicable + if self.limits[generator]: + self.request['g'+self.limits[generator]] = "max" + if 'prop' in self.request: + self.request['prop'] += "|info|imageinfo" + else: + self.request['prop'] = 'info|imageinfo' + if "inprop" in self.request: + if "protection" not in self.request["inprop"]: + self.request["inprop"] += "|protection" + else: + self.request['inprop'] = 'protection' + if "iiprop" in self.request: + self.request["iiprop"] += 'timestamp|user|comment|url|size|sha1|metadata' + else: + self.request['iiprop'] = 'timestamp|user|comment|url|size|sha1|metadata' self.generator = generator self.site = self.request.site self.resultkey = "pages" # element to look for in result
+ # dict mapping generator types to their limit parameter names + + limits = {'links': None, + 'images': None, + 'templates': None, + 'categories': None, + 'allpages': 'aplimit', + 'alllinks': 'allimit', + 'allcategories': 'aclimit', + 'backlinks': 'bllimit', + 'categorymembers': 'cmlimit', + 'embeddedin': 'eilimit', + 'imageusage': 'iulimit', + 'search': 'srlimit', + 'watchlist': 'wllimit', + 'exturlusage': 'eulimit', + 'random': 'rnlimit', + } + def __iter__(self): """Iterate objects for elements found in response.""" # FIXME: this won't handle generators with <redirlinks> subelements @@ -270,11 +325,29 @@ if 'touched' in pagedata: p._timestamp = pagedata['touched'] if 'protection' in pagedata: + p._protection = {} for item in pagedata['protection']: - p._protection[item['key']] = item['level'] + p._protection[item['type']] = item['level'] return p
+class CategoryPageGenerator(PageGenerator): + """Generator that yields Category objects instead of Pages.""" + def result(self, pagedata): + p = PageGenerator.result(self, pagedata) + return pywikibot.Category(p) + + +class ImagePageGenerator(PageGenerator): + """Generator that yields ImagePage objects instead of Pages.""" + def result(self, pagedata): + p = PageGenerator.result(self, pagedata) + image = pywikibot.ImagePage(p) + if 'imageinfo' in pagedata: + image._imageinfo = pagedata['imageinfo'] + return image + + class LoginManager(login.LoginManager): """Supplies getCookie() method to use API interface.""" def getCookie(self, remember=True, captchaId=None, captchaAnswer=None):
Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2008-03-24 22:03:25 UTC (rev 5165) +++ branches/rewrite/pywikibot/page.py 2008-03-25 15:14:48 UTC (rev 5166) @@ -594,19 +594,21 @@ """ return self.site().getlanglinks(self)
- def imagelinks(self, followRedirects=False, loose=None): + def imagelinks(self, followRedirects=None, loose=None): """Iterate ImagePage objects for images displayed on this Page.
- @param followRedirects: if an image link redirects to another page, - yield the redirect target instead of the original link + @param followRedirects: DEPRECATED and ignored @param loose: DEPRECATED and ignored @return: a generator that yields ImagePage objects.
""" + if followRedirects is not None: + logging.debug( + u"Page.imagelinks(followRedirects) option is deprecated.") if loose is not None: logging.debug( u"Page.imagelinks(loose) option is deprecated.") - return self.site().getimages(followRedirects) + return self.site().getimages(self)
def templates(self): """Iterate Page objects for templates used on this Page. @@ -641,7 +643,7 @@ if nofollow_redirects is not None: logging.debug( u"Page.categories(nofollow_redirects) option is deprecated.") - return self.site().categories(withSortKey=withSortKey) + return self.site().getcategories(self, withSortKey=withSortKey)
def extlinks(self): """Iterate all external URLs (not interwiki links) from this page. @@ -1041,7 +1043,7 @@ class Category(Page): """A page in the Category: namespace"""
- def __init__(self, source, title, insite=None, sortKey=None): + def __init__(self, source, title=u"", insite=None, sortKey=None): """All parameters are the same as for Page() constructor, except:
@param sortKey: DEPRECATED (use .aslink() method instead) @@ -1094,9 +1096,9 @@ recurse = recurse - 1 if not hasattr(self, "_subcats"): self._subcats = [] - for member in self.site().categorymembers(self, namespaces=[14]): + for member in self.site().getcategorymembers(self, namespaces=[14]): subcat = Category(self.site(), member.title()) - self.subcats.append(subcat) + self._subcats.append(subcat) yield subcat if recurse: for item in subcat.subcategories(recurse): @@ -1119,9 +1121,10 @@ @type recurse: int or bool
""" - namespaces = self.site().namespaces() - namespaces.remove(14) - for member in self.site().categorymembers(self, namespaces=namespaces): + namespaces = [x for x in self.site().namespaces().keys() + if x>=0 and x!=14] + for member in self.site().getcategorymembers(self, + namespaces=namespaces): yield member if recurse: if not isinstance(recurse, bool) and recurse:
Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2008-03-24 22:03:25 UTC (rev 5165) +++ branches/rewrite/pywikibot/site.py 2008-03-25 15:14:48 UTC (rev 5166) @@ -72,7 +72,7 @@ @type fam: str or Family @param user: bot user name (optional) @type user: str - + """ self._lang = code.lower() if isinstance(fam, basestring) or fam is None: @@ -106,7 +106,7 @@ pt_min = min(config.minthrottle, config.put_throttle) self.put_throttle = Throttle(self, pt_min, config.maxthrottle) self.put_throttle.setDelay(config.put_throttle) - + gt_min = min(config.minthrottle, config.get_throttle) self.get_throttle = Throttle(self, gt_min, config.maxthrottle) self.get_throttle.setDelay(config.get_throttle) @@ -119,6 +119,7 @@ """Return the site's language code.""" # N.B. this code does not always identify a language as such, but # may identify a wiki that is part of any family grouping + # FIXME: need to separate language (for L18N purposes) from code return self._lang
def user(self): @@ -145,7 +146,7 @@ return self.family().name+':'+self.language()
__str__ = sitename - + def __repr__(self): return 'Site("%s", "%s")' % (self.language(), self.family().name)
@@ -238,7 +239,7 @@ finally: self._mutex.release()
- + class APISite(BaseSite): """API interface to MediaWiki site.
@@ -337,8 +338,8 @@ 14: [u"Category"], 15: [u"Category talk"], } -# self.getsiteinfo() return + # ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__() self._mediawiki_messages = {} self.nocapitalize = self._lang in self.family().nocapitalize @@ -368,7 +369,7 @@ if self._userinfo['name'] != self._username: return False return (not sysop) or 'sysop' in self._userinfo['groups'] - + def loggedInAs(self, sysop = False): """Return the current username if logged in, otherwise return None.
@@ -417,10 +418,9 @@ uidata = uirequest.submit() assert 'query' in uidata, \ "API userinfo response lacks 'query' key" - uidata = uidata['query'] - assert 'userinfo' in uidata, \ + assert 'userinfo' in uidata['query'], \ "API userinfo response lacks 'userinfo' key" - self._userinfo = uidata['userinfo'] + self._userinfo = uidata['query']['userinfo'] return self._userinfo
def getsiteinfo(self): @@ -436,6 +436,7 @@ sidata = sirequest.submit() except api.APIError: # hack for older sites that don't support 1.12 properties + # probably should delete if we're not going to support pre-1.12 sirequest = api.Request( site=self, action="query", @@ -443,7 +444,7 @@ siprop="general|namespaces" ) sidata = sirequest.submit() - + assert 'query' in sidata, \ "API siteinfo response lacks 'query' key" sidata = sidata['query'] @@ -497,23 +498,16 @@
@param page: The Page to get links to. @param followRedirects: Also return links to redirects pointing to - the given page. [Not yet implemented on API] + the given page. @param filterRedirects: If True, only return redirects to the given page. If False, only return non-redirect links. If None, return both (no filtering). @param namespaces: If present, only return links from the namespaces in this list. - + """ - if 'bot' in self.getuserinfo()['groups']: - limit = 5000 - else: - limit = 500 - if followRedirects: - limit = limit / 2 bltitle = page.title(withSection=False) - blgen = api.PageGenerator("backlinks", gbltitle=bltitle, - gbllimit=str(limit)) + blgen = api.PageGenerator("backlinks", gbltitle=bltitle) if namespaces is not None: blgen.request["gblnamespace"] = u"|".join(unicode(ns) for ns in namespaces) @@ -524,13 +518,10 @@ blgen.request["gblredirect"] = "" return blgen
- def getembeddedin(self, page, followRedirects=False, filterRedirects=None, - namespaces=None): + def getembeddedin(self, page, filterRedirects=None, namespaces=None): """Iterate all pages that embedded the given page as a template.
@param page: The Page to get inclusions for. - @param followRedirects: Also return pages transcluding redirects to - the given page. [Not yet implemented on API] @param filterRedirects: If True, only return redirects that embed the given page. If False, only return non-redirect links. If None, return both (no filtering). @@ -539,20 +530,13 @@
""" eititle = page.title(withSection=False) - if 'bot' in self.getuserinfo()['groups']: - limit = 5000 - else: - limit = 500 - eigen = api.PageGenerator("embeddedin", geititle=eititle, - geilimit=str(limit)) + eigen = api.PageGenerator("embeddedin", geititle=eititle) if namespaces is not None: eigen.request["geinamespace"] = u"|".join(unicode(ns) for ns in namespaces) if filterRedirects is not None: eigen.request["geifilterredir"] = filterRedirects and "redirects"\ or "nonredirects" - if followRedirects: - eigen.request["geiredirect"] = "" return eigen
def getreferences(self, page, followRedirects, filterRedirects, @@ -565,10 +549,65 @@ import itertools return itertools.chain(self.getbacklinks( page, followRedirects, filterRedirects), - self.getembeddedin( - page, followRedirects, filterRedirects) + self.getembeddedin(page, filterRedirects) )
+ def getlinks(self, page, namespaces=None): + """Iterate internal wikilinks contained (or transcluded) on page.""" + pltitle = page.title(withSection=False) + plgen = api.PageGenerator("links", titles=pltitle) + if namespaces is not None: + plgen.request["gplnamespace"] = u"|".join(unicode(ns) + for ns in namespaces) + return plgen + + def getcategories(self, page, withSortKey=False): + """Iterate categories to which page belongs.""" + # Sortkey doesn't seem to work with generator; FIXME + cltitle = page.title(withSection=False) + clgen = api.CategoryPageGenerator("categories", titles=cltitle) + return clgen + + def getimages(self, page): + """Iterate images used (not just linked) on the page.""" + imtitle = page.title(withSection=False) + imgen = api.ImagePageGenerator("images", titles=imtitle) + return imgen + + def gettemplates(self, page, namespaces=None): + """Iterate templates transcluded (not just linked) on the page.""" + tltitle = page.title(withSection=False) + tlgen = api.PageGenerator("templates", titles=tltitle) + if namespaces is not None: + tlgen.request["gtlnamespace"] = u"|".join(unicode(ns) + for ns in namespaces) + return tlgen + + def getcategorymembers(self, category, namespaces=None): + """Iterate members of specified category. + + @param category: The Category to iterate. + @param namespaces: If present, only return category members from + these namespaces. For example, use namespaces=[14] to yield + subcategories, use namespaces=[6] to yield image files, etc. Note, + however, that the iterated values are always Page objects, even + if in the Category or Image namespace. + @type namespaces: list of ints + + """ + if category.namespace() != 14: + raise ValueError( + "Cannot get category members of non-Category page '%s'" + % category.title()) + cmtitle = category.title(withSection=False) + cmgen = api.PageGenerator("categorymembers", gcmtitle=cmtitle, + gcmprop="ids|title|sortkey") + if namespaces is not None: + cmgen.request["gcmnamespace"] = u"|".join(unicode(ns) + for ns in namespaces) + return cmgen + + #### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) #### class NotImplementedYet:
@@ -660,7 +699,7 @@ continue l.append(key + '=' + value)
- # wpEditToken is explicitly added as last value. + # wpEditToken is explicicmy added as last value. # If a premature connection abort occurs while putting, the server will # not have received an edit token and thus refuse saving the page if wpEditToken != None:
Modified: branches/rewrite/pywikibot/tests/page_tests.py =================================================================== --- branches/rewrite/pywikibot/tests/page_tests.py 2008-03-24 22:03:25 UTC (rev 5165) +++ branches/rewrite/pywikibot/tests/page_tests.py 2008-03-25 15:14:48 UTC (rev 5166) @@ -160,6 +160,46 @@ self.assertEqual(p2.isCategory(), True) self.assertEqual(p3.isCategory(), False)
+ # testIsImage -- todo + + def testApiMethods(self): + """Test various methods that rely on API.""" + # since there is no way to predict what data the wiki will return, + # we only check that the returned objects are of correct type. + main = pywikibot.Page(site, u"Main Page") + self.assertEqual(type(main.get()), unicode) + self.assertEqual(type(main.latestRevision()), int) + self.assertEqual(type(main.userName()), unicode) + self.assertEqual(type(main.isIpEdit()), bool) + self.assertEqual(type(main.exists()), bool) + self.assertEqual(type(main.isRedirectPage()), bool) + self.assertEqual(type(main.isEmpty()), bool) + self.assertEqual(type(main.toggleTalkPage()), type(main)) + self.assertEqual(type(main.isDisambig()), bool) + self.assertEqual(type(main.canBeEdited()), bool) + self.assertEqual(type(main.botMayEdit()), bool) + for p in main.getReferences(): + self.assertEqual(type(p), type(main)) + for p in main.backlinks(): + self.assertEqual(type(p), type(main)) + for p in main.embeddedin(): + self.assertEqual(type(p), type(main)) + for p in main.linkedPages(): + self.assertEqual(type(p), type(main)) + for p in main.interwiki(): + self.assertEqual(type(p), pywikibot.page.Link) + for p in main.langlinks(): + self.assertEqual(type(p), pywikibot.page.Link) + for p in main.imagelinks(): + self.assertEqual(type(p), pywikibot.page.ImagePage) + for p in main.templates(): + self.assertEqual(type(p), type(main)) + # todo - templatesWithParameters + for p in main.categories(): + self.assertEqual(type(p), pywikibot.page.Category) + for p in main.extlinks(): + self.assertEqual(type(p), unicode) + # more to come
if __name__ == '__main__': try:
Modified: branches/rewrite/pywikibot/throttle.py =================================================================== --- branches/rewrite/pywikibot/throttle.py 2008-03-24 22:03:25 UTC (rev 5165) +++ branches/rewrite/pywikibot/throttle.py 2008-03-25 15:14:48 UTC (rev 5166) @@ -87,7 +87,6 @@ and this_site == self.mysite \ and this_pid != pid: count += 1 - print line, if this_site != self.mysite or this_pid != pid: processes.append({'pid': this_pid, 'time': ptime,
pywikipedia-l@lists.wikimedia.org