Revision: 4984 Author: russblau Date: 2008-02-06 16:21:18 +0000 (Wed, 06 Feb 2008)
Log Message: ----------- Update docu and tests; add PageGenerator object in api.py
Modified Paths: -------------- branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/data/http.py branches/rewrite/pywikibot/data/threadedhttp.py branches/rewrite/pywikibot/tests/api_tests.py branches/rewrite/pywikibot/tests/dummy.py
Removed Paths: ------------- branches/rewrite/pywikibot/data/test.py
Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2008-02-06 14:09:28 UTC (rev 4983) +++ branches/rewrite/pywikibot/data/api.py 2008-02-06 16:21:18 UTC (rev 4984) @@ -3,7 +3,7 @@ Interface functions to Mediawiki's api.php """ # -# (C) Pywikipedia bot team, 2007 +# (C) Pywikipedia bot team, 2007-08 # # Distributed under the terms of the MIT license. # @@ -17,10 +17,13 @@ import traceback import time import urllib +# TODO - replace when Page object is written +from pywikibot.tests.dummy import TestPage as Page
lagpattern = re.compile(r"Waiting for [\d.]+: (?P<lag>\d+) seconds? lagged")
+ class APIError(Exception): """The wiki site returned an error message.""" def __init__(self, code, info, **kwargs): @@ -43,36 +46,45 @@
Attributes of this object (except for the special parameters listed below) get passed as commands to api.php, and can be get or set using - the dict interface. All attributes must be strings (unicode). - Attributes supplied without values are passed to the API as keys. + the dict interface. All attributes must be strings (or unicode). Use + an empty string for parameters that don't require a value (e.g., + "action=query&...&redirects").
@param site: The Site to which the request will be submitted. If not supplied, uses the user's configured default Site. - @param format: (optional) Defaults to "json" @param max_retries: (optional) Maximum number of times to retry after errors, defaults to 25 @param retry_wait: (optional) Minimum time to wait after an error, defaults to 5 seconds (doubles each retry until max of 120 is reached) + @param format: (optional) Defaults to "json"
Example:
>>> r = Request(site=mysite, action="query", meta="userinfo") >>> # This is equivalent to >>> # http://%5Bpath%5D/api.php?action=query&meta=userinfo&format=json + >>> # r.data is undefined until request is submitted + >>> print r.data + Traceback (most recent call last): + ... + AttributeError: Request instance has no attribute 'data' >>> # change a parameter >>> r['meta'] = "userinfo|siteinfo" >>> # add a new parameter >>> r['siprop'] = "namespaces" >>> r.params - {'action': 'query', 'meta': 'userinfo|siteinfo', 'siprop': 'namespaces', - 'format': 'json'} + {'action': 'query', 'meta': 'userinfo|siteinfo', 'maxlag': '5', 'siprop': 'namespaces', 'format': 'json'} >>> data = r.submit() >>> type(data) - <type 'dict'> + <type 'dict'> + >>> data.keys() + [u'query'] + >>> data[u'query'].keys() + [u'userinfo', u'namespaces']
""" - def __init__(self, *args, **kwargs): + def __init__(self, **kwargs): self.site = kwargs.pop("site", None) # else use defaultSite() ... when written self.max_retries = kwargs.pop("max_retries", 25) @@ -81,8 +93,8 @@ if "format" not in kwargs: self.params["format"] = "json" if "maxlag" not in kwargs: - self.params["maxlag"] = "5" - self.update(*args, **kwargs) + self.params["maxlag"] = "5" # replace with configurable constant? + self.update(**kwargs)
# implement dict interface def __getitem__(self, key): @@ -106,13 +118,6 @@ def iteritems(self): return self.params.iteritems()
- def update(self, *args, **kwargs): - """Update the request parameters""" - self.params.update(kwargs) - for arg in args: - if arg not in self.params: - self.params[arg] = "" - def submit(self): """Submit a query and parse the response.
@@ -163,6 +168,9 @@ {'data': result}) if "error" not in result: return result + if "*" in result["error"]: + # help text returned + result['error']['help'] = result['error'].pop("*") code = result["error"].pop("code", "Unknown") info = result["error"].pop("info", None) if code == "maxlag": @@ -172,25 +180,90 @@ "Pausing due to database lag: " + info) self.wait(int(lag.group("lag"))) continue + if code in (u'internal_api_error_DBConnectionError', ): + self.wait() + continue # raise error - raise APIError(code, info, **result["error"]) + try: + raise APIError(code, info, **result["error"]) + except TypeError: + raise RuntimeError(result)
- def wait(self, lag=None): """Determine how long to wait after a failed request.""" self.max_retries -= 1 if self.max_retries < 0: raise TimeoutError("Maximum retries attempted without success.") - + + wait = self.retry_wait if lag is not None: if lag > 2 * self.retry_wait: - self.retry_wait = min(120, lag // 2) + wait = min(120, lag // 2) logging.warn("Waiting %s seconds before retrying." % self.retry_wait) - time.sleep(self.retry_wait) + time.sleep(wait) self.retry_wait = min(120, self.retry_wait * 2) + + +class PageGenerator(object): + """Iterator for response to a request of type action=query&generator=foo.""" + def __init__(self, generator="", **kwargs): + """ + Required and optional parameters are as for C{Request}, except that + action=query is assumed and generator is required.
+ @param generator: the "generator=" type from api.php + @type generator: str
+ """ + if not generator: + raise ValueError("generator argument is required.") + self.request = Request(action="query", generator=generator, **kwargs) + self.generator = generator + self.site = self.request.site + + def __iter__(self): + """Iterate Page objects for pages found in response.""" + while True: + # following "if" is used for testing with plugged-in data; it wouldn't + # be needed for actual usage + if not hasattr(self, "data"): + self.data = self.request.submit() + if not self.data or not isinstance(self.data, dict): + raise StopIteration + if not "query" in self.data: + raise StopIteration + query = self.data["query"] + if not "pages" in query: + raise StopIteration + # TODO: instead of "yield Page", yield a Page returned by a + # method that converts the dict info to a Page object + if isinstance(query["pages"], dict): + for v in query["pages"].itervalues(): + yield Page(self.site, v['title']) + elif isinstance(query["pages"], list): + for v in query["pages"]: + yield Page(self.site, v['title']) + else: + raise APIError("Unknown", + "Unknown format in ['query']['pages'] value.", + data=query["pages"]) + if not "query-continue" in self.data: + return + if not self.generator in self.data["query-continue"]: + raise APIError("Unknown", + "Missing '%s' key in ['query-continue'] value.", + data=self.data["query-continue"]) + self.request.update(self.data["query-continue"][self.generator]) + del self.data + + if __name__ == "__main__": - from pywikibot.tests.dummy import TestSite as Site + from pywikibot.tests.dummy import TestSite as Site, TestPage as Page mysite = Site("en.wikipedia.org") logging.getLogger().setLevel(logging.DEBUG) + def _test(): + import doctest + doctest.testmod() + _test() + +
Modified: branches/rewrite/pywikibot/data/http.py =================================================================== --- branches/rewrite/pywikibot/data/http.py 2008-02-06 14:09:28 UTC (rev 4983) +++ branches/rewrite/pywikibot/data/http.py 2008-02-06 16:21:18 UTC (rev 4984) @@ -5,12 +5,11 @@ This module handles communication between the bot and the HTTP threads.
This module is responsible for - -- Setting up a connection pool -- Providing a (blocking) interface for HTTP requests -- Translate site objects with query strings into urls -- Urlencoding all data -- Basic HTTP error handling + - Setting up a connection pool + - Providing a (blocking) interface for HTTP requests + - Translate site objects with query strings into urls + - Urlencoding all data + - Basic HTTP error handling """
# @@ -58,10 +57,13 @@ atexit.register(_flush)
def request(site, uri, *args, **kwargs): - """ @param site The Site to connect to - All other parameters are the same as L{httplib2.Http.request}, but - the uri is relative - @return The received data (a unicode string). + """Queue a request to be submitted to Site. + + All parameters not listed below are the same as + L{httplib2.Http.request}, but the uri is relative + + @param site: The Site to connect to + @return: The received data (a unicode string). """ baseuri = "%s://%s/" % (site.protocol(), site.hostname()) uri = urlparse.urljoin(baseuri, uri)
Deleted: branches/rewrite/pywikibot/data/test.py =================================================================== --- branches/rewrite/pywikibot/data/test.py 2008-02-06 14:09:28 UTC (rev 4983) +++ branches/rewrite/pywikibot/data/test.py 2008-02-06 16:21:18 UTC (rev 4984) @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Set of test suites for the data module. -""" -# -# (C) Pywikipedia bot team, 2007 -# -# Distributed under the terms of the MIT license. -# -__version__ = '$Id: $' - - -import unittest -import http, api - - -class HTTPTest(unittest.TestCase): - - def setUp(self): - self.HTTP = http.HTTP(None) #TODO: Replace None with an actual Site object once implemented - - def testGETMainPage(self): - """GETting the Main Page should give a HTTP 200 response.""" - status, data = self.HTTP.GET('/w/index.php', {'title' : 'Main_Page'}) - self.assertEqual(status, 200) - - -class APITest(unittest.TestCase): - - def setUp(self): - self.API = api.API(None) #TODO: Replace None with an actual Site object once implemented - - def testEmptyQuery(self): - """Querying for nothing should return an empty list.""" - status, data = self.API.query() - self.assertEqual(status, 200) - self.assertEqual(data, '[]') - - -if __name__ == '__main__': - unittest.main()
Modified: branches/rewrite/pywikibot/data/threadedhttp.py =================================================================== --- branches/rewrite/pywikibot/data/threadedhttp.py 2008-02-06 14:09:28 UTC (rev 4983) +++ branches/rewrite/pywikibot/data/threadedhttp.py 2008-02-06 16:21:18 UTC (rev 4984) @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- """ Httplib2 threaded cookie layer
-This class extends Httplib2, adding support for: -- Cookies, guarded for cross-site redirects -- Thread safe ConnectionPool and LockableCookieJar classes -- HttpProcessor thread class -- HttpRequest object +This class extends httplib2, adding support for: + - Cookies, guarded for cross-site redirects + - Thread safe ConnectionPool and LockableCookieJar classes + - HttpProcessor thread class + - HttpRequest object
"""
@@ -36,13 +36,14 @@
class ConnectionPool(object): - """A thread-safe connection pool. + """A thread-safe connection pool.""" + + def __init__(self, maxnum=5): + """ + @param maxnum: Maximum number of connections per identifier. + The pool drops excessive connections added.
- @param maxnum: Maximum number of connections per identifier. - The pool drops excessive connections added. - - """ - def __init__(self, maxnum=5): + """ self.connections = {} self.lock = threading.Lock() self.maxnum = maxnum @@ -98,7 +99,7 @@ self.lock.release()
class LockableCookieJar(cookielib.CookieJar): - """ CookieJar with integrated Lock object """ + """CookieJar with integrated Lock object.""" def __init__(self, *args, **kwargs): cookielib.CookieJar.__init__(self, *args, **kwargs) self.lock = threading.Lock() @@ -109,15 +110,17 @@ Overrides httplib2's internal redirect support to prevent cookies being eaten by the wrong sites.
- @param cookiejar: (optional) CookieJar to use. A new one will be used - when not supplied. - @param connection_pool: (optional) Connection pool to use. A new one - will be used when not supplied. - @param max_redirects: (optional) The maximum number of redirects to - follow. 5 is default. - """ def __init__(self, *args, **kwargs): + """ + @param cookiejar: (optional) CookieJar to use. A new one will be + used when not supplied. + @param connection_pool: (optional) Connection pool to use. A new one + will be used when not supplied. + @param max_redirects: (optional) The maximum number of redirects to + follow. 5 is default. + + """ self.cookiejar = kwargs.pop('cookiejar', LockableCookieJar()) self.connection_pool = kwargs.pop('connection_pool', ConnectionPool()) self.max_redirects = kwargs.pop('max_redirects', 5) @@ -268,6 +271,7 @@
""" def __init__(self, *args, **kwargs): + """See C{Http.request} for parameters.""" self.args = args self.kwargs = kwargs self.data = None @@ -275,17 +279,17 @@
class HttpProcessor(threading.Thread): - """ Thread object to spawn multiple HTTP connection threads. + """Thread object to spawn multiple HTTP connection threads.""" + def __init__(self, queue, cookiejar, connection_pool): + """ + @param queue: The C{Queue.Queue} object that contains L{HttpRequest} + objects. + @param cookiejar: The C{LockableCookieJar} cookie object to share among + requests. + @param connection_pool: The C{ConnectionPool} object which contains + connections to share among requests.
- @param queue: The C{Queue.Queue} object that contains L{HttpRequest} - objects. - @param cookiejar: The C{LockableCookieJar} cookie object to share among - requests. - @param connection_pool: The C{ConnectionPool} object which contains - connections to share among requests. - - """ - def __init__(self, queue, cookiejar, connection_pool): + """ threading.Thread.__init__(self) self.queue = queue self.http = Http(cookiejar=cookiejar, connection_pool=connection_pool) @@ -305,7 +309,7 @@ if item.lock: item.lock.release()
- + # Metaweb Technologies, Inc. License: # ======================================================================== # The following dummy classes are:
Modified: branches/rewrite/pywikibot/tests/api_tests.py =================================================================== --- branches/rewrite/pywikibot/tests/api_tests.py 2008-02-06 14:09:28 UTC (rev 4983) +++ branches/rewrite/pywikibot/tests/api_tests.py 2008-02-06 16:21:18 UTC (rev 4984) @@ -1,7 +1,7 @@ import unittest import pywikibot.data.api as api
-from pywikibot.tests.dummy import TestSite as Site +from pywikibot.tests.dummy import TestSite as Site, TestPage as Page mysite = Site('en.wikipedia.org')
@@ -9,7 +9,7 @@
def testObjectCreation(self): """Test that api.Request() creates an object with desired attributes""" - req = api.Request(mysite, "foo", bar="test") + req = api.Request(site=mysite, foo="", bar="test") self.assert_(req) self.assertEqual(req.site, mysite) self.assert_("foo" in req.params) @@ -18,7 +18,52 @@ # test item assignment req["one"] = "1" self.assertEqual(req.params['one'], "1") + # test compliance with dict interface + # req.keys() should contain "foo", "bar", "format", "maxlag", "one" + self.assertEqual(len(req.keys()), 5) + self.assert_("test" in req.values()) + self.assert_(all(len(item) == 2 for item in req.items()))
+ +class TestListGenerator(unittest.TestCase): + def setUp(self): + self.gen = api.PageGenerator(site=mysite, + generator="links", + titles="User:R'n'B") + # following test data is copied from an actual api.php response + self.gen.data = { + "query": {"pages": {"296589": {"pageid": 296589, + "ns": 0, + "title": "Broadcaster.com" + }, + "13918157": {"pageid": 13918157, + "ns": 0, + "title": "Broadcaster (definition)" + }, + "156658": {"pageid": 156658, + "ns": 0, + "title": "Wiktionary" + }, + "47757": {"pageid": 47757, + "ns": 4, + "title": "Wikipedia:Disambiguation" + } + } + } + } + + def testGeneratorResults(self): + """Test that PageGenerator yields pages with expected attributes.""" + titles = ["Broadcaster.com", "Broadcaster (definition)", + "Wiktionary", "Wikipedia:Disambiguation"] + results = [p for p in self.gen] + self.assertEqual(len(results), 4) + for page in results: + self.assertEqual(type(page), Page) + self.assertEqual(page.site(), mysite) + self.assert_(page.title() in titles) + + if __name__ == '__main__': try: unittest.main()
Modified: branches/rewrite/pywikibot/tests/dummy.py =================================================================== --- branches/rewrite/pywikibot/tests/dummy.py 2008-02-06 14:09:28 UTC (rev 4983) +++ branches/rewrite/pywikibot/tests/dummy.py 2008-02-06 16:21:18 UTC (rev 4984) @@ -32,7 +32,6 @@ def __init__(self, site, title): self._site = site self._title = title - def site(self): return self._site def title(self):
pywikipedia-l@lists.wikimedia.org