Revision: 4984
Author: russblau
Date: 2008-02-06 16:21:18 +0000 (Wed, 06 Feb 2008)
Log Message:
-----------
Update docu and tests; add PageGenerator object in api.py
Modified Paths:
--------------
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/data/http.py
branches/rewrite/pywikibot/data/threadedhttp.py
branches/rewrite/pywikibot/tests/api_tests.py
branches/rewrite/pywikibot/tests/dummy.py
Removed Paths:
-------------
branches/rewrite/pywikibot/data/test.py
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-02-06 14:09:28 UTC (rev 4983)
+++ branches/rewrite/pywikibot/data/api.py 2008-02-06 16:21:18 UTC (rev 4984)
@@ -3,7 +3,7 @@
Interface functions to Mediawiki's api.php
"""
#
-# (C) Pywikipedia bot team, 2007
+# (C) Pywikipedia bot team, 2007-08
#
# Distributed under the terms of the MIT license.
#
@@ -17,10 +17,13 @@
import traceback
import time
import urllib
+# TODO - replace when Page object is written
+from pywikibot.tests.dummy import TestPage as Page
lagpattern = re.compile(r"Waiting for [\d.]+: (?P<lag>\d+) seconds?
lagged")
+
class APIError(Exception):
"""The wiki site returned an error message."""
def __init__(self, code, info, **kwargs):
@@ -43,36 +46,45 @@
Attributes of this object (except for the special parameters listed
below) get passed as commands to api.php, and can be get or set using
- the dict interface. All attributes must be strings (unicode).
- Attributes supplied without values are passed to the API as keys.
+ the dict interface. All attributes must be strings (or unicode). Use
+ an empty string for parameters that don't require a value (e.g.,
+ "action=query&...&redirects").
@param site: The Site to which the request will be submitted. If not
supplied, uses the user's configured default Site.
- @param format: (optional) Defaults to "json"
@param max_retries: (optional) Maximum number of times to retry after
errors, defaults to 25
@param retry_wait: (optional) Minimum time to wait after an error,
defaults to 5 seconds (doubles each retry until max of 120 is
reached)
+ @param format: (optional) Defaults to "json"
Example:
>> r = Request(site=mysite,
action="query", meta="userinfo")
>> # This is equivalent to
>> #
http://[path]/api.php?action=query&meta=userinfo&format=json +
>>> # r.data is undefined until request is submitted
+ >>> print r.data
+ Traceback (most recent call last):
+ ...
+ AttributeError: Request instance has no attribute 'data'
>> # change a parameter
>> r['meta'] = "userinfo|siteinfo"
>> # add a new parameter
>> r['siprop'] = "namespaces"
>> r.params
- {'action': 'query', 'meta':
'userinfo|siteinfo', 'siprop': 'namespaces',
- 'format': 'json'}
+ {'action': 'query', 'meta': 'userinfo|siteinfo',
'maxlag': '5', 'siprop': 'namespaces', 'format':
'json'}
>> data = r.submit()
>> type(data)
- <type 'dict'>
+ <type 'dict'>
+ >>> data.keys()
+ [u'query']
+ >>> data[u'query'].keys()
+ [u'userinfo', u'namespaces']
"""
- def __init__(self, *args, **kwargs):
+ def __init__(self, **kwargs):
self.site = kwargs.pop("site", None)
# else use defaultSite() ... when written
self.max_retries = kwargs.pop("max_retries", 25)
@@ -81,8 +93,8 @@
if "format" not in kwargs:
self.params["format"] = "json"
if "maxlag" not in kwargs:
- self.params["maxlag"] = "5"
- self.update(*args, **kwargs)
+ self.params["maxlag"] = "5" # replace with configurable
constant?
+ self.update(**kwargs)
# implement dict interface
def __getitem__(self, key):
@@ -106,13 +118,6 @@
def iteritems(self):
return self.params.iteritems()
- def update(self, *args, **kwargs):
- """Update the request parameters"""
- self.params.update(kwargs)
- for arg in args:
- if arg not in self.params:
- self.params[arg] = ""
-
def submit(self):
"""Submit a query and parse the response.
@@ -163,6 +168,9 @@
{'data': result})
if "error" not in result:
return result
+ if "*" in result["error"]:
+ # help text returned
+ result['error']['help'] =
result['error'].pop("*")
code = result["error"].pop("code", "Unknown")
info = result["error"].pop("info", None)
if code == "maxlag":
@@ -172,25 +180,90 @@
"Pausing due to database lag: " + info)
self.wait(int(lag.group("lag")))
continue
+ if code in (u'internal_api_error_DBConnectionError', ):
+ self.wait()
+ continue
# raise error
- raise APIError(code, info, **result["error"])
+ try:
+ raise APIError(code, info, **result["error"])
+ except TypeError:
+ raise RuntimeError(result)
-
def wait(self, lag=None):
"""Determine how long to wait after a failed
request."""
self.max_retries -= 1
if self.max_retries < 0:
raise TimeoutError("Maximum retries attempted without success.")
-
+
+ wait = self.retry_wait
if lag is not None:
if lag > 2 * self.retry_wait:
- self.retry_wait = min(120, lag // 2)
+ wait = min(120, lag // 2)
logging.warn("Waiting %s seconds before retrying." % self.retry_wait)
- time.sleep(self.retry_wait)
+ time.sleep(wait)
self.retry_wait = min(120, self.retry_wait * 2)
+
+
+class PageGenerator(object):
+ """Iterator for response to a request of type
action=query&generator=foo."""
+ def __init__(self, generator="", **kwargs):
+ """
+ Required and optional parameters are as for C{Request}, except that
+ action=query is assumed and generator is required.
+ @param generator: the "generator=" type from api.php
+ @type generator: str
+ """
+ if not generator:
+ raise ValueError("generator argument is required.")
+ self.request = Request(action="query", generator=generator, **kwargs)
+ self.generator = generator
+ self.site = self.request.site
+
+ def __iter__(self):
+ """Iterate Page objects for pages found in
response."""
+ while True:
+ # following "if" is used for testing with plugged-in data; it
wouldn't
+ # be needed for actual usage
+ if not hasattr(self, "data"):
+ self.data = self.request.submit()
+ if not self.data or not isinstance(self.data, dict):
+ raise StopIteration
+ if not "query" in self.data:
+ raise StopIteration
+ query = self.data["query"]
+ if not "pages" in query:
+ raise StopIteration
+ # TODO: instead of "yield Page", yield a Page returned by a
+ # method that converts the dict info to a Page object
+ if isinstance(query["pages"], dict):
+ for v in query["pages"].itervalues():
+ yield Page(self.site, v['title'])
+ elif isinstance(query["pages"], list):
+ for v in query["pages"]:
+ yield Page(self.site, v['title'])
+ else:
+ raise APIError("Unknown",
+ "Unknown format in ['query']['pages']
value.",
+ data=query["pages"])
+ if not "query-continue" in self.data:
+ return
+ if not self.generator in self.data["query-continue"]:
+ raise APIError("Unknown",
+ "Missing '%s' key in
['query-continue'] value.",
+ data=self.data["query-continue"])
+ self.request.update(self.data["query-continue"][self.generator])
+ del self.data
+
+
if __name__ == "__main__":
- from pywikibot.tests.dummy import TestSite as Site
+ from pywikibot.tests.dummy import TestSite as Site, TestPage as Page
mysite = Site("en.wikipedia.org")
logging.getLogger().setLevel(logging.DEBUG)
+ def _test():
+ import doctest
+ doctest.testmod()
+ _test()
+
+
Modified: branches/rewrite/pywikibot/data/http.py
===================================================================
--- branches/rewrite/pywikibot/data/http.py 2008-02-06 14:09:28 UTC (rev 4983)
+++ branches/rewrite/pywikibot/data/http.py 2008-02-06 16:21:18 UTC (rev 4984)
@@ -5,12 +5,11 @@
This module handles communication between the bot and the HTTP threads.
This module is responsible for
-
-- Setting up a connection pool
-- Providing a (blocking) interface for HTTP requests
-- Translate site objects with query strings into urls
-- Urlencoding all data
-- Basic HTTP error handling
+ - Setting up a connection pool
+ - Providing a (blocking) interface for HTTP requests
+ - Translate site objects with query strings into urls
+ - Urlencoding all data
+ - Basic HTTP error handling
"""
#
@@ -58,10 +57,13 @@
atexit.register(_flush)
def request(site, uri, *args, **kwargs):
- """ @param site The Site to connect to
- All other parameters are the same as L{httplib2.Http.request}, but
- the uri is relative
- @return The received data (a unicode string).
+ """Queue a request to be submitted to Site.
+
+ All parameters not listed below are the same as
+ L{httplib2.Http.request}, but the uri is relative
+
+ @param site: The Site to connect to
+ @return: The received data (a unicode string).
"""
baseuri = "%s://%s/" % (site.protocol(), site.hostname())
uri = urlparse.urljoin(baseuri, uri)
Deleted: branches/rewrite/pywikibot/data/test.py
===================================================================
--- branches/rewrite/pywikibot/data/test.py 2008-02-06 14:09:28 UTC (rev 4983)
+++ branches/rewrite/pywikibot/data/test.py 2008-02-06 16:21:18 UTC (rev 4984)
@@ -1,41 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Set of test suites for the data module.
-"""
-#
-# (C) Pywikipedia bot team, 2007
-#
-# Distributed under the terms of the MIT license.
-#
-__version__ = '$Id: $'
-
-
-import unittest
-import http, api
-
-
-class HTTPTest(unittest.TestCase):
-
- def setUp(self):
- self.HTTP = http.HTTP(None) #TODO: Replace None with an actual Site object once
implemented
-
- def testGETMainPage(self):
- """GETting the Main Page should give a HTTP 200
response."""
- status, data = self.HTTP.GET('/w/index.php', {'title' :
'Main_Page'})
- self.assertEqual(status, 200)
-
-
-class APITest(unittest.TestCase):
-
- def setUp(self):
- self.API = api.API(None) #TODO: Replace None with an actual Site object once
implemented
-
- def testEmptyQuery(self):
- """Querying for nothing should return an empty
list."""
- status, data = self.API.query()
- self.assertEqual(status, 200)
- self.assertEqual(data, '[]')
-
-
-if __name__ == '__main__':
- unittest.main()
Modified: branches/rewrite/pywikibot/data/threadedhttp.py
===================================================================
--- branches/rewrite/pywikibot/data/threadedhttp.py 2008-02-06 14:09:28 UTC (rev 4983)
+++ branches/rewrite/pywikibot/data/threadedhttp.py 2008-02-06 16:21:18 UTC (rev 4984)
@@ -1,11 +1,11 @@
# -*- coding: utf-8 -*-
""" Httplib2 threaded cookie layer
-This class extends Httplib2, adding support for:
-- Cookies, guarded for cross-site redirects
-- Thread safe ConnectionPool and LockableCookieJar classes
-- HttpProcessor thread class
-- HttpRequest object
+This class extends httplib2, adding support for:
+ - Cookies, guarded for cross-site redirects
+ - Thread safe ConnectionPool and LockableCookieJar classes
+ - HttpProcessor thread class
+ - HttpRequest object
"""
@@ -36,13 +36,14 @@
class ConnectionPool(object):
- """A thread-safe connection pool.
+ """A thread-safe connection pool."""
+
+ def __init__(self, maxnum=5):
+ """
+ @param maxnum: Maximum number of connections per identifier.
+ The pool drops excessive connections added.
- @param maxnum: Maximum number of connections per identifier.
- The pool drops excessive connections added.
-
- """
- def __init__(self, maxnum=5):
+ """
self.connections = {}
self.lock = threading.Lock()
self.maxnum = maxnum
@@ -98,7 +99,7 @@
self.lock.release()
class LockableCookieJar(cookielib.CookieJar):
- """ CookieJar with integrated Lock object """
+ """CookieJar with integrated Lock object."""
def __init__(self, *args, **kwargs):
cookielib.CookieJar.__init__(self, *args, **kwargs)
self.lock = threading.Lock()
@@ -109,15 +110,17 @@
Overrides httplib2's internal redirect support to prevent cookies being
eaten by the wrong sites.
- @param cookiejar: (optional) CookieJar to use. A new one will be used
- when not supplied.
- @param connection_pool: (optional) Connection pool to use. A new one
- will be used when not supplied.
- @param max_redirects: (optional) The maximum number of redirects to
- follow. 5 is default.
-
"""
def __init__(self, *args, **kwargs):
+ """
+ @param cookiejar: (optional) CookieJar to use. A new one will be
+ used when not supplied.
+ @param connection_pool: (optional) Connection pool to use. A new one
+ will be used when not supplied.
+ @param max_redirects: (optional) The maximum number of redirects to
+ follow. 5 is default.
+
+ """
self.cookiejar = kwargs.pop('cookiejar', LockableCookieJar())
self.connection_pool = kwargs.pop('connection_pool', ConnectionPool())
self.max_redirects = kwargs.pop('max_redirects', 5)
@@ -268,6 +271,7 @@
"""
def __init__(self, *args, **kwargs):
+ """See C{Http.request} for parameters."""
self.args = args
self.kwargs = kwargs
self.data = None
@@ -275,17 +279,17 @@
class HttpProcessor(threading.Thread):
- """ Thread object to spawn multiple HTTP connection threads.
+ """Thread object to spawn multiple HTTP connection
threads."""
+ def __init__(self, queue, cookiejar, connection_pool):
+ """
+ @param queue: The C{Queue.Queue} object that contains L{HttpRequest}
+ objects.
+ @param cookiejar: The C{LockableCookieJar} cookie object to share among
+ requests.
+ @param connection_pool: The C{ConnectionPool} object which contains
+ connections to share among requests.
- @param queue: The C{Queue.Queue} object that contains L{HttpRequest}
- objects.
- @param cookiejar: The C{LockableCookieJar} cookie object to share among
- requests.
- @param connection_pool: The C{ConnectionPool} object which contains
- connections to share among requests.
-
- """
- def __init__(self, queue, cookiejar, connection_pool):
+ """
threading.Thread.__init__(self)
self.queue = queue
self.http = Http(cookiejar=cookiejar, connection_pool=connection_pool)
@@ -305,7 +309,7 @@
if item.lock:
item.lock.release()
-
+
# Metaweb Technologies, Inc. License:
# ========================================================================
# The following dummy classes are:
Modified: branches/rewrite/pywikibot/tests/api_tests.py
===================================================================
--- branches/rewrite/pywikibot/tests/api_tests.py 2008-02-06 14:09:28 UTC (rev 4983)
+++ branches/rewrite/pywikibot/tests/api_tests.py 2008-02-06 16:21:18 UTC (rev 4984)
@@ -1,7 +1,7 @@
import unittest
import pywikibot.data.api as api
-from pywikibot.tests.dummy import TestSite as Site
+from pywikibot.tests.dummy import TestSite as Site, TestPage as Page
mysite = Site('en.wikipedia.org')
@@ -9,7 +9,7 @@
def testObjectCreation(self):
"""Test that api.Request() creates an object with desired
attributes"""
- req = api.Request(mysite, "foo", bar="test")
+ req = api.Request(site=mysite, foo="", bar="test")
self.assert_(req)
self.assertEqual(req.site, mysite)
self.assert_("foo" in req.params)
@@ -18,7 +18,52 @@
# test item assignment
req["one"] = "1"
self.assertEqual(req.params['one'], "1")
+ # test compliance with dict interface
+ # req.keys() should contain "foo", "bar", "format",
"maxlag", "one"
+ self.assertEqual(len(req.keys()), 5)
+ self.assert_("test" in req.values())
+ self.assert_(all(len(item) == 2 for item in req.items()))
+
+class TestListGenerator(unittest.TestCase):
+ def setUp(self):
+ self.gen = api.PageGenerator(site=mysite,
+ generator="links",
+ titles="User:R'n'B")
+ # following test data is copied from an actual api.php response
+ self.gen.data = {
+ "query": {"pages": {"296589":
{"pageid": 296589,
+ "ns": 0,
+ "title":
"Broadcaster.com"
+ },
+ "13918157": {"pageid": 13918157,
+ "ns": 0,
+ "title": "Broadcaster
(definition)"
+ },
+ "156658": {"pageid": 156658,
+ "ns": 0,
+ "title": "Wiktionary"
+ },
+ "47757": {"pageid": 47757,
+ "ns": 4,
+ "title":
"Wikipedia:Disambiguation"
+ }
+ }
+ }
+ }
+
+ def testGeneratorResults(self):
+ """Test that PageGenerator yields pages with expected
attributes."""
+ titles = ["Broadcaster.com", "Broadcaster (definition)",
+ "Wiktionary", "Wikipedia:Disambiguation"]
+ results = [p for p in self.gen]
+ self.assertEqual(len(results), 4)
+ for page in results:
+ self.assertEqual(type(page), Page)
+ self.assertEqual(page.site(), mysite)
+ self.assert_(page.title() in titles)
+
+
if __name__ == '__main__':
try:
unittest.main()
Modified: branches/rewrite/pywikibot/tests/dummy.py
===================================================================
--- branches/rewrite/pywikibot/tests/dummy.py 2008-02-06 14:09:28 UTC (rev 4983)
+++ branches/rewrite/pywikibot/tests/dummy.py 2008-02-06 16:21:18 UTC (rev 4984)
@@ -32,7 +32,6 @@
def __init__(self, site, title):
self._site = site
self._title = title
-
def site(self):
return self._site
def title(self):