Revision: 5166
Author: russblau
Date: 2008-03-25 15:14:48 +0000 (Tue, 25 Mar 2008)
Log Message:
-----------
additional methods and unit tests
Modified Paths:
--------------
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
branches/rewrite/pywikibot/tests/page_tests.py
branches/rewrite/pywikibot/throttle.py
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-03-24 22:03:25 UTC (rev 5165)
+++ branches/rewrite/pywikibot/data/api.py 2008-03-25 15:14:48 UTC (rev 5166)
@@ -91,6 +91,18 @@
self.max_retries = kwargs.pop("max_retries", 25)
self.retry_wait = kwargs.pop("retry_wait", 5)
self.params = {}
+ if "action" not in kwargs:
+ raise ValueError("'action' specification missing from Request.")
+ if kwargs["action"] == 'query':
+ if "meta" in kwargs:
+ if "userinfo" not in kwargs["meta"]:
+ kwargs["meta"] += "|userinfo"
+ else:
+ kwargs["meta"] = "userinfo"
+ if "uiprop" in kwargs:
+ kwargs["uiprop"] += "|blockinfo|hasmsg"
+ else:
+ kwargs["uiprop"] = "blockinfo|hasmsg"
if "format" not in kwargs:
self.params["format"] = "json"
if "maxlag" not in kwargs:
@@ -131,7 +143,6 @@
uri = self.site.scriptpath() + "/api.php"
params = urllib.urlencode(self.params)
while True:
- # TODO wait on errors
# TODO catch http errors
try:
if self.params.get("action", "") in ("login",):
@@ -154,7 +165,6 @@
except ValueError:
# if the result isn't valid JSON, there must be a server
# problem. Wait a few seconds and try again
- # TODO: implement a throttle
logging.warning(
"Non-JSON response received from server %s; the server may be down."
% self.site)
@@ -168,6 +178,13 @@
"Unable to process query response of type %s."
% type(result),
{'data': result})
+ if self['action'] == 'query':
+ if 'userinfo' in result.get('query', ()):
+ if hasattr(self.site, '_userinfo'):
+ self.site._userinfo.update(result['query']['userinfo'])
+ else:
+ self.site._userinfo = result['query']['userinfo']
+
if "error" not in result:
return result
if "*" in result["error"]:
@@ -196,14 +213,15 @@
self.max_retries -= 1
if self.max_retries < 0:
raise TimeoutError("Maximum retries attempted without success.")
-
wait = self.retry_wait
if lag is not None:
- if lag > 2 * self.retry_wait:
- wait = min(120, lag // 2)
- logging.warn("Waiting %s seconds before retrying." % self.retry_wait)
+ # in case of database lag, wait half the lag time,
+ # but not less than 5 or more than 120 seconds
+ wait = max(5, min(lag // 2, 120))
+ logging.warn("Waiting %s seconds before retrying." % wait)
time.sleep(wait)
- self.retry_wait = min(120, self.retry_wait * 2)
+ if lag is None:
+ self.retry_wait = min(120, self.retry_wait * 2)
class PageGenerator(object):
@@ -219,11 +237,48 @@
"""
if not generator:
raise ValueError("generator argument is required.")
+ if generator not in self.limits:
+ raise ValueError("Unrecognized generator '%s'" % generator)
self.request = Request(action="query", generator=generator, **kwargs)
+ # set limit to max, if applicable
+ if self.limits[generator]:
+ self.request['g'+self.limits[generator]] = "max"
+ if 'prop' in self.request:
+ self.request['prop'] += "|info|imageinfo"
+ else:
+ self.request['prop'] = 'info|imageinfo'
+ if "inprop" in self.request:
+ if "protection" not in self.request["inprop"]:
+ self.request["inprop"] += "|protection"
+ else:
+ self.request['inprop'] = 'protection'
+ if "iiprop" in self.request:
+ self.request["iiprop"] += 'timestamp|user|comment|url|size|sha1|metadata'
+ else:
+ self.request['iiprop'] = 'timestamp|user|comment|url|size|sha1|metadata'
self.generator = generator
self.site = self.request.site
self.resultkey = "pages" # element to look for in result
+ # dict mapping generator types to their limit parameter names
+
+ limits = {'links': None,
+ 'images': None,
+ 'templates': None,
+ 'categories': None,
+ 'allpages': 'aplimit',
+ 'alllinks': 'allimit',
+ 'allcategories': 'aclimit',
+ 'backlinks': 'bllimit',
+ 'categorymembers': 'cmlimit',
+ 'embeddedin': 'eilimit',
+ 'imageusage': 'iulimit',
+ 'search': 'srlimit',
+ 'watchlist': 'wllimit',
+ 'exturlusage': 'eulimit',
+ 'random': 'rnlimit',
+ }
+
def __iter__(self):
"""Iterate objects for elements found in response."""
# FIXME: this won't handle generators with <redirlinks> subelements
@@ -270,11 +325,29 @@
if 'touched' in pagedata:
p._timestamp = pagedata['touched']
if 'protection' in pagedata:
+ p._protection = {}
for item in pagedata['protection']:
- p._protection[item['key']] = item['level']
+ p._protection[item['type']] = item['level']
return p
+class CategoryPageGenerator(PageGenerator):
+ """Generator that yields Category objects instead of Pages."""
+ def result(self, pagedata):
+ p = PageGenerator.result(self, pagedata)
+ return pywikibot.Category(p)
+
+
+class ImagePageGenerator(PageGenerator):
+ """Generator that yields ImagePage objects instead of Pages."""
+ def result(self, pagedata):
+ p = PageGenerator.result(self, pagedata)
+ image = pywikibot.ImagePage(p)
+ if 'imageinfo' in pagedata:
+ image._imageinfo = pagedata['imageinfo']
+ return image
+
+
class LoginManager(login.LoginManager):
"""Supplies getCookie() method to use API interface."""
def getCookie(self, remember=True, captchaId=None, captchaAnswer=None):
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-03-24 22:03:25 UTC (rev 5165)
+++ branches/rewrite/pywikibot/page.py 2008-03-25 15:14:48 UTC (rev 5166)
@@ -594,19 +594,21 @@
"""
return self.site().getlanglinks(self)
- def imagelinks(self, followRedirects=False, loose=None):
+ def imagelinks(self, followRedirects=None, loose=None):
"""Iterate ImagePage objects for images displayed on this Page.
- @param followRedirects: if an image link redirects to another page,
- yield the redirect target instead of the original link
+ @param followRedirects: DEPRECATED and ignored
@param loose: DEPRECATED and ignored
@return: a generator that yields ImagePage objects.
"""
+ if followRedirects is not None:
+ logging.debug(
+ u"Page.imagelinks(followRedirects) option is deprecated.")
if loose is not None:
logging.debug(
u"Page.imagelinks(loose) option is deprecated.")
- return self.site().getimages(followRedirects)
+ return self.site().getimages(self)
def templates(self):
"""Iterate Page objects for templates used on this Page.
@@ -641,7 +643,7 @@
if nofollow_redirects is not None:
logging.debug(
u"Page.categories(nofollow_redirects) option is deprecated.")
- return self.site().categories(withSortKey=withSortKey)
+ return self.site().getcategories(self, withSortKey=withSortKey)
def extlinks(self):
"""Iterate all external URLs (not interwiki links) from this page.
@@ -1041,7 +1043,7 @@
class Category(Page):
"""A page in the Category: namespace"""
- def __init__(self, source, title, insite=None, sortKey=None):
+ def __init__(self, source, title=u"", insite=None, sortKey=None):
"""All parameters are the same as for Page() constructor, except:
@param sortKey: DEPRECATED (use .aslink() method instead)
@@ -1094,9 +1096,9 @@
recurse = recurse - 1
if not hasattr(self, "_subcats"):
self._subcats = []
- for member in self.site().categorymembers(self, namespaces=[14]):
+ for member in self.site().getcategorymembers(self, namespaces=[14]):
subcat = Category(self.site(), member.title())
- self.subcats.append(subcat)
+ self._subcats.append(subcat)
yield subcat
if recurse:
for item in subcat.subcategories(recurse):
@@ -1119,9 +1121,10 @@
@type recurse: int or bool
"""
- namespaces = self.site().namespaces()
- namespaces.remove(14)
- for member in self.site().categorymembers(self, namespaces=namespaces):
+ namespaces = [x for x in self.site().namespaces().keys()
+ if x>=0 and x!=14]
+ for member in self.site().getcategorymembers(self,
+ namespaces=namespaces):
yield member
if recurse:
if not isinstance(recurse, bool) and recurse:
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-03-24 22:03:25 UTC (rev 5165)
+++ branches/rewrite/pywikibot/site.py 2008-03-25 15:14:48 UTC (rev 5166)
@@ -72,7 +72,7 @@
@type fam: str or Family
@param user: bot user name (optional)
@type user: str
-
+
"""
self._lang = code.lower()
if isinstance(fam, basestring) or fam is None:
@@ -106,7 +106,7 @@
pt_min = min(config.minthrottle, config.put_throttle)
self.put_throttle = Throttle(self, pt_min, config.maxthrottle)
self.put_throttle.setDelay(config.put_throttle)
-
+
gt_min = min(config.minthrottle, config.get_throttle)
self.get_throttle = Throttle(self, gt_min, config.maxthrottle)
self.get_throttle.setDelay(config.get_throttle)
@@ -119,6 +119,7 @@
"""Return the site's language code."""
# N.B. this code does not always identify a language as such, but
# may identify a wiki that is part of any family grouping
+ # FIXME: need to separate language (for L18N purposes) from code
return self._lang
def user(self):
@@ -145,7 +146,7 @@
return self.family().name+':'+self.language()
__str__ = sitename
-
+
def __repr__(self):
return 'Site("%s", "%s")' % (self.language(), self.family().name)
@@ -238,7 +239,7 @@
finally:
self._mutex.release()
-
+
class APISite(BaseSite):
"""API interface to MediaWiki site.
@@ -337,8 +338,8 @@
14: [u"Category"],
15: [u"Category talk"],
}
-# self.getsiteinfo()
return
+
# ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__()
self._mediawiki_messages = {}
self.nocapitalize = self._lang in self.family().nocapitalize
@@ -368,7 +369,7 @@
if self._userinfo['name'] != self._username:
return False
return (not sysop) or 'sysop' in self._userinfo['groups']
-
+
def loggedInAs(self, sysop = False):
"""Return the current username if logged in, otherwise return None.
@@ -417,10 +418,9 @@
uidata = uirequest.submit()
assert 'query' in uidata, \
"API userinfo response lacks 'query' key"
- uidata = uidata['query']
- assert 'userinfo' in uidata, \
+ assert 'userinfo' in uidata['query'], \
"API userinfo response lacks 'userinfo' key"
- self._userinfo = uidata['userinfo']
+ self._userinfo = uidata['query']['userinfo']
return self._userinfo
def getsiteinfo(self):
@@ -436,6 +436,7 @@
sidata = sirequest.submit()
except api.APIError:
# hack for older sites that don't support 1.12 properties
+ # probably should delete if we're not going to support pre-1.12
sirequest = api.Request(
site=self,
action="query",
@@ -443,7 +444,7 @@
siprop="general|namespaces"
)
sidata = sirequest.submit()
-
+
assert 'query' in sidata, \
"API siteinfo response lacks 'query' key"
sidata = sidata['query']
@@ -497,23 +498,16 @@
@param page: The Page to get links to.
@param followRedirects: Also return links to redirects pointing to
- the given page. [Not yet implemented on API]
+ the given page.
@param filterRedirects: If True, only return redirects to the given
page. If False, only return non-redirect links. If None, return
both (no filtering).
@param namespaces: If present, only return links from the namespaces
in this list.
-
+
"""
- if 'bot' in self.getuserinfo()['groups']:
- limit = 5000
- else:
- limit = 500
- if followRedirects:
- limit = limit / 2
bltitle = page.title(withSection=False)
- blgen = api.PageGenerator("backlinks", gbltitle=bltitle,
- gbllimit=str(limit))
+ blgen = api.PageGenerator("backlinks", gbltitle=bltitle)
if namespaces is not None:
blgen.request["gblnamespace"] = u"|".join(unicode(ns)
for ns in namespaces)
@@ -524,13 +518,10 @@
blgen.request["gblredirect"] = ""
return blgen
- def getembeddedin(self, page, followRedirects=False, filterRedirects=None,
- namespaces=None):
+ def getembeddedin(self, page, filterRedirects=None, namespaces=None):
"""Iterate all pages that embedded the given page as a template.
@param page: The Page to get inclusions for.
- @param followRedirects: Also return pages transcluding redirects to
- the given page. [Not yet implemented on API]
@param filterRedirects: If True, only return redirects that embed
the given page. If False, only return non-redirect links. If
None, return both (no filtering).
@@ -539,20 +530,13 @@
"""
eititle = page.title(withSection=False)
- if 'bot' in self.getuserinfo()['groups']:
- limit = 5000
- else:
- limit = 500
- eigen = api.PageGenerator("embeddedin", geititle=eititle,
- geilimit=str(limit))
+ eigen = api.PageGenerator("embeddedin", geititle=eititle)
if namespaces is not None:
eigen.request["geinamespace"] = u"|".join(unicode(ns)
for ns in namespaces)
if filterRedirects is not None:
eigen.request["geifilterredir"] = filterRedirects and "redirects"\
or "nonredirects"
- if followRedirects:
- eigen.request["geiredirect"] = ""
return eigen
def getreferences(self, page, followRedirects, filterRedirects,
@@ -565,10 +549,65 @@
import itertools
return itertools.chain(self.getbacklinks(
page, followRedirects, filterRedirects),
- self.getembeddedin(
- page, followRedirects, filterRedirects)
+ self.getembeddedin(page, filterRedirects)
)
+ def getlinks(self, page, namespaces=None):
+ """Iterate internal wikilinks contained (or transcluded) on page."""
+ pltitle = page.title(withSection=False)
+ plgen = api.PageGenerator("links", titles=pltitle)
+ if namespaces is not None:
+ plgen.request["gplnamespace"] = u"|".join(unicode(ns)
+ for ns in namespaces)
+ return plgen
+
+ def getcategories(self, page, withSortKey=False):
+ """Iterate categories to which page belongs."""
+ # Sortkey doesn't seem to work with generator; FIXME
+ cltitle = page.title(withSection=False)
+ clgen = api.CategoryPageGenerator("categories", titles=cltitle)
+ return clgen
+
+ def getimages(self, page):
+ """Iterate images used (not just linked) on the page."""
+ imtitle = page.title(withSection=False)
+ imgen = api.ImagePageGenerator("images", titles=imtitle)
+ return imgen
+
+ def gettemplates(self, page, namespaces=None):
+ """Iterate templates transcluded (not just linked) on the page."""
+ tltitle = page.title(withSection=False)
+ tlgen = api.PageGenerator("templates", titles=tltitle)
+ if namespaces is not None:
+ tlgen.request["gtlnamespace"] = u"|".join(unicode(ns)
+ for ns in namespaces)
+ return tlgen
+
+ def getcategorymembers(self, category, namespaces=None):
+ """Iterate members of specified category.
+
+ @param category: The Category to iterate.
+ @param namespaces: If present, only return category members from
+ these namespaces. For example, use namespaces=[14] to yield
+ subcategories, use namespaces=[6] to yield image files, etc. Note,
+ however, that the iterated values are always Page objects, even
+ if in the Category or Image namespace.
+ @type namespaces: list of ints
+
+ """
+ if category.namespace() != 14:
+ raise ValueError(
+ "Cannot get category members of non-Category page '%s'"
+ % category.title())
+ cmtitle = category.title(withSection=False)
+ cmgen = api.PageGenerator("categorymembers", gcmtitle=cmtitle,
+ gcmprop="ids|title|sortkey")
+ if namespaces is not None:
+ cmgen.request["gcmnamespace"] = u"|".join(unicode(ns)
+ for ns in namespaces)
+ return cmgen
+
+
#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
class NotImplementedYet:
@@ -660,7 +699,7 @@
continue
l.append(key + '=' + value)
- # wpEditToken is explicitly added as last value.
+ # wpEditToken is explicicmy added as last value.
# If a premature connection abort occurs while putting, the server will
# not have received an edit token and thus refuse saving the page
if wpEditToken != None:
Modified: branches/rewrite/pywikibot/tests/page_tests.py
===================================================================
--- branches/rewrite/pywikibot/tests/page_tests.py 2008-03-24 22:03:25 UTC (rev 5165)
+++ branches/rewrite/pywikibot/tests/page_tests.py 2008-03-25 15:14:48 UTC (rev 5166)
@@ -160,6 +160,46 @@
self.assertEqual(p2.isCategory(), True)
self.assertEqual(p3.isCategory(), False)
+ # testIsImage -- todo
+
+ def testApiMethods(self):
+ """Test various methods that rely on API."""
+ # since there is no way to predict what data the wiki will return,
+ # we only check that the returned objects are of correct type.
+ main = pywikibot.Page(site, u"Main Page")
+ self.assertEqual(type(main.get()), unicode)
+ self.assertEqual(type(main.latestRevision()), int)
+ self.assertEqual(type(main.userName()), unicode)
+ self.assertEqual(type(main.isIpEdit()), bool)
+ self.assertEqual(type(main.exists()), bool)
+ self.assertEqual(type(main.isRedirectPage()), bool)
+ self.assertEqual(type(main.isEmpty()), bool)
+ self.assertEqual(type(main.toggleTalkPage()), type(main))
+ self.assertEqual(type(main.isDisambig()), bool)
+ self.assertEqual(type(main.canBeEdited()), bool)
+ self.assertEqual(type(main.botMayEdit()), bool)
+ for p in main.getReferences():
+ self.assertEqual(type(p), type(main))
+ for p in main.backlinks():
+ self.assertEqual(type(p), type(main))
+ for p in main.embeddedin():
+ self.assertEqual(type(p), type(main))
+ for p in main.linkedPages():
+ self.assertEqual(type(p), type(main))
+ for p in main.interwiki():
+ self.assertEqual(type(p), pywikibot.page.Link)
+ for p in main.langlinks():
+ self.assertEqual(type(p), pywikibot.page.Link)
+ for p in main.imagelinks():
+ self.assertEqual(type(p), pywikibot.page.ImagePage)
+ for p in main.templates():
+ self.assertEqual(type(p), type(main))
+ # todo - templatesWithParameters
+ for p in main.categories():
+ self.assertEqual(type(p), pywikibot.page.Category)
+ for p in main.extlinks():
+ self.assertEqual(type(p), unicode)
+ # more to come
if __name__ == '__main__':
try:
Modified: branches/rewrite/pywikibot/throttle.py
===================================================================
--- branches/rewrite/pywikibot/throttle.py 2008-03-24 22:03:25 UTC (rev 5165)
+++ branches/rewrite/pywikibot/throttle.py 2008-03-25 15:14:48 UTC (rev 5166)
@@ -87,7 +87,6 @@
and this_site == self.mysite \
and this_pid != pid:
count += 1
- print line,
if this_site != self.mysite or this_pid != pid:
processes.append({'pid': this_pid,
'time': ptime,