[Pywikipedia-l] SVN: [4775] branches/rewrite/pywikibot/data
valhallasw at svn.wikimedia.org
valhallasw at svn.wikimedia.org
Sat Dec 29 00:56:31 UTC 2007
Revision: 4775
Author: valhallasw
Date: 2007-12-29 00:56:31 +0000 (Sat, 29 Dec 2007)
Log Message:
-----------
Minor fixes to threadedhttp.py
Major rewrite of http.py:
http.py now starts http threads and handles them; adds exit function to stop them; implements locking mechanisms for asynchronous http requests.
Modified Paths:
--------------
branches/rewrite/pywikibot/data/http.py
branches/rewrite/pywikibot/data/threadedhttp.py
Modified: branches/rewrite/pywikibot/data/http.py
===================================================================
--- branches/rewrite/pywikibot/data/http.py 2007-12-29 00:08:32 UTC (rev 4774)
+++ branches/rewrite/pywikibot/data/http.py 2007-12-29 00:56:31 UTC (rev 4775)
@@ -1,45 +1,73 @@
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
"""
-Basic HTTP access interface (GET/POST/HEAD wrappers).
+Basic HTTP access interface.
+
+This module handles communication between the bot and the HTTP threads.
+
+This module is responsible for
+ * Setting up a connection pool
+ * Providing a (blocking) interface for HTTP requests
+ * Translate site objects with query strings into urls
+ * Urlencoding all data
+ * Basic HTTP error handling
"""
+
#
# (C) Pywikipedia bot team, 2007
#
# Distributed under the terms of the MIT license.
#
+
__version__ = '$Id: $'
+__docformat__ = 'epytext'
+import Queue
+import urllib
+import urlparse
+import logging
+import atexit
-import urllib, httplib
+import threadedhttp
-class HTTP:
+# global variables
- def __init__(self, site):
- self.site = site
- self.useragent = 'PythonWikipediaBot/2.0'
- #TODO: Initiate persistent connection here?
+useragent = 'Pywikipediabot/2.0' # This should include some global version string
+numthreads = 1
+threads = []
- def GET(self, address, query={}):
- return self._request('GET',address + '?' + urllib.urlencode(query))
+connection_pool = threadedhttp.ConnectionPool()
+cookie_jar = threadedhttp.LockableCookieJar()
+http_queue = Queue.Queue()
- def POST(self, address, query={}):
- return self._request('POST',address,urllib.urlencode(query))
+# Build up HttpProcessors
+logging.info('Starting %i threads...' % numthreads)
+for i in range(numthreads):
+ proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool)
+ threads.append(proc)
+ proc.start()
- def HEAD(self, address, query={}):
- return self._request('HEAD',address + '?' + urllib.urlencode(query))
-
- def _request(self, method, address, data=''):
- #TODO: Resuse said connection.
- conn = httplib.HTTPConnection('en.wikipedia.org',80) #TODO: Obviously, get these from the site object (unimplemented yet)
- conn.putrequest(method,address)
- conn.putheader('User-agent',self.useragent)
- conn.putheader('Content-type','application/x-www-form-urlencoded')
- conn.putheader('Content-Length',len(data))
- conn.endheaders()
- conn.send(data)
-
- response = conn.getresponse()
- rdata = response.read()
-
- return response.status, rdata
+# Prepare flush on quit
+def _flush():
+ for i in threads:
+ http_queue.put(None)
+ logging.info('Waiting for threads to finish... ')
+ for i in threads:
+ i.join()
+atexit.register(_flush)
+
+def request(site, uri, *args, **kwargs):
+ """ @param site The Site to connect to
+ All other parameters are the same as `Http.request`, but the uri is relative
+ Returns: The recieved data.
+ """
+ baseuri = site #.baseuri(), etc
+ uri = urlparse.urljoin(baseuri, uri)
+
+ request = threadedhttp.HttpRequest(uri, *args, **kwargs)
+ http_queue.put(request)
+ request.lock.acquire()
+
+ #do some error correcting stuff
+
+ return request.data[1]
\ No newline at end of file
Modified: branches/rewrite/pywikibot/data/threadedhttp.py
===================================================================
--- branches/rewrite/pywikibot/data/threadedhttp.py 2007-12-29 00:08:32 UTC (rev 4774)
+++ branches/rewrite/pywikibot/data/threadedhttp.py 2007-12-29 00:56:31 UTC (rev 4775)
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-
""" Httplib2 threaded cookie layer
This class extends Httplib2, adding support for:
* Cookies, guarded for cross-site redirects
@@ -239,9 +238,11 @@
def run(self):
# The Queue item is expected to either an HttpRequest object
# or None (to shut down the thread)
+ logging.debug('Thread started, waiting for requests.')
while (True):
item = self.queue.get()
if item is None:
+ logging.debug('Shutting down thread.')
return
try:
item.data = self.http.request(*item.args, **item.kwargs)
More information about the Pywikipedia-l
mailing list