Revision: 4775 Author: valhallasw Date: 2007-12-29 00:56:31 +0000 (Sat, 29 Dec 2007)
Log Message: ----------- Minor fixes to threadedhttp.py Major rewrite of http.py: http.py now starts http threads and handles them; adds exit function to stop them; implements locking mechanisms for asynchronous http requests.
Modified Paths: -------------- branches/rewrite/pywikibot/data/http.py branches/rewrite/pywikibot/data/threadedhttp.py
Modified: branches/rewrite/pywikibot/data/http.py =================================================================== --- branches/rewrite/pywikibot/data/http.py 2007-12-29 00:08:32 UTC (rev 4774) +++ branches/rewrite/pywikibot/data/http.py 2007-12-29 00:56:31 UTC (rev 4775) @@ -1,45 +1,73 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- """ -Basic HTTP access interface (GET/POST/HEAD wrappers). +Basic HTTP access interface. + +This module handles communication between the bot and the HTTP threads. + +This module is responsible for + * Setting up a connection pool + * Providing a (blocking) interface for HTTP requests + * Translate site objects with query strings into urls + * Urlencoding all data + * Basic HTTP error handling """ + # # (C) Pywikipedia bot team, 2007 # # Distributed under the terms of the MIT license. # + __version__ = '$Id: $' +__docformat__ = 'epytext'
+import Queue +import urllib +import urlparse +import logging +import atexit
-import urllib, httplib +import threadedhttp
-class HTTP: +# global variables
- def __init__(self, site): - self.site = site - self.useragent = 'PythonWikipediaBot/2.0' - #TODO: Initiate persistent connection here? +useragent = 'Pywikipediabot/2.0' # This should include some global version string +numthreads = 1 +threads = []
- def GET(self, address, query={}): - return self._request('GET',address + '?' + urllib.urlencode(query)) +connection_pool = threadedhttp.ConnectionPool() +cookie_jar = threadedhttp.LockableCookieJar() +http_queue = Queue.Queue()
- def POST(self, address, query={}): - return self._request('POST',address,urllib.urlencode(query)) +# Build up HttpProcessors +logging.info('Starting %i threads...' % numthreads) +for i in range(numthreads): + proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool) + threads.append(proc) + proc.start()
- def HEAD(self, address, query={}): - return self._request('HEAD',address + '?' + urllib.urlencode(query)) - - def _request(self, method, address, data=''): - #TODO: Resuse said connection. - conn = httplib.HTTPConnection('en.wikipedia.org',80) #TODO: Obviously, get these from the site object (unimplemented yet) - conn.putrequest(method,address) - conn.putheader('User-agent',self.useragent) - conn.putheader('Content-type','application/x-www-form-urlencoded') - conn.putheader('Content-Length',len(data)) - conn.endheaders() - conn.send(data) - - response = conn.getresponse() - rdata = response.read() - - return response.status, rdata +# Prepare flush on quit +def _flush(): + for i in threads: + http_queue.put(None) + logging.info('Waiting for threads to finish... ') + for i in threads: + i.join() +atexit.register(_flush) + +def request(site, uri, *args, **kwargs): + """ @param site The Site to connect to + All other parameters are the same as `Http.request`, but the uri is relative + Returns: The recieved data. + """ + baseuri = site #.baseuri(), etc + uri = urlparse.urljoin(baseuri, uri) + + request = threadedhttp.HttpRequest(uri, *args, **kwargs) + http_queue.put(request) + request.lock.acquire() + + #do some error correcting stuff + + return request.data[1] \ No newline at end of file
Modified: branches/rewrite/pywikibot/data/threadedhttp.py =================================================================== --- branches/rewrite/pywikibot/data/threadedhttp.py 2007-12-29 00:08:32 UTC (rev 4774) +++ branches/rewrite/pywikibot/data/threadedhttp.py 2007-12-29 00:56:31 UTC (rev 4775) @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - """ Httplib2 threaded cookie layer This class extends Httplib2, adding support for: * Cookies, guarded for cross-site redirects @@ -239,9 +238,11 @@ def run(self): # The Queue item is expected to either an HttpRequest object # or None (to shut down the thread) + logging.debug('Thread started, waiting for requests.') while (True): item = self.queue.get() if item is None: + logging.debug('Shutting down thread.') return try: item.data = self.http.request(*item.args, **item.kwargs)
pywikipedia-l@lists.wikimedia.org