[Pywikipedia-l] SVN: [5230] branches/rewrite/pywikibot

17 Apr 2008

Revision: 5230
Author:   russblau
Date:     2008-04-17 17:54:39 +0000 (Thu, 17 Apr 2008)
Log Message:
-----------
Moved comm protocol classes to separate directory, simplified throttle, made other behind-the-scenes changes
Modified Paths:
--------------
    branches/rewrite/pywikibot/__init__.py
    branches/rewrite/pywikibot/config.py
    branches/rewrite/pywikibot/data/api.py
    branches/rewrite/pywikibot/login.py
    branches/rewrite/pywikibot/page.py
    branches/rewrite/pywikibot/site.py
    branches/rewrite/pywikibot/throttle.py
Added Paths:
-----------
    branches/rewrite/pywikibot/comms/
    branches/rewrite/pywikibot/comms/__init__.py
    branches/rewrite/pywikibot/comms/http.py
    branches/rewrite/pywikibot/comms/threadedhttp.py
Removed Paths:
-------------
    branches/rewrite/pywikibot/data/http.py
    branches/rewrite/pywikibot/data/threadedhttp.py
Modified: branches/rewrite/pywikibot/__init__.py
===================================================================

--- branches/rewrite/pywikibot/__init__.py	2008-04-17 16:43:25 UTC (rev 5229)
+++ branches/rewrite/pywikibot/__init__.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -9,11 +9,11 @@
 #
 __version__ = '$Id: $'
+import logging
+logging.getLogger().setLevel(logging.DEBUG)
from exceptions import *
-from page import Page, ImagePage, Category
-
 import config
_sites = {}
@@ -60,6 +60,20 @@
getSite = Site # alias for backwards-compability
+from page import Page, ImagePage, Category
+
+##def Page(*args, **kwargs):
+##    from page import Page as _Page
+##    return _Page(*args, **kwargs)
+##
+##def ImagePage(*args, **kwargs):
+##    from page import ImagePage as _ImagePage
+##    return _ImagePage(*args, **kwargs)
+##
+##def Category(*args, **kwargs):
+##    from page import Category as _Category
+##    return _Category(*args, **kwargs)
+
 # DEBUG
def output(text):
@@ -71,9 +85,6 @@
         return getpass.getpass(prompt)
     return raw_input(prompt)
-import logging
-logging.getLogger().setLevel(logging.DEBUG)
-
 def stopme():
     """Drop this process from the throttle log.
@@ -82,7 +93,11 @@
"""
     # only need one drop() call because all throttles use the same global pid
-    Site().get_throttle.drop()
+    try:
+        _sites[_sites.keys()[0]].throttle.drop()
+        logging.info("Dropped throttle(s).")
+    except IndexError:
+        pass
import atexit
 atexit.register(stopme)
Added: branches/rewrite/pywikibot/comms/__init__.py
===================================================================
Added: branches/rewrite/pywikibot/comms/http.py
===================================================================
--- branches/rewrite/pywikibot/comms/http.py	                        (rev 0)
+++ branches/rewrite/pywikibot/comms/http.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -0,0 +1,104 @@
+# -*- coding: utf-8  -*-
+"""
+Basic HTTP access interface.
+
+This module handles communication between the bot and the HTTP threads.
+
+This module is responsible for
+    - Setting up a connection pool
+    - Providing a (blocking) interface for HTTP requests
+    - Translate site objects with query strings into urls
+    - Urlencoding all data
+    - Basic HTTP error handling
+"""
+
+#
+# (C) Pywikipedia bot team, 2007
+#
+# Distributed under the terms of the MIT license.
+#
+
+__version__ = '$Id: $'
+__docformat__ = 'epytext'
+
+import Queue
+import urllib
+import urlparse
+import logging
+import atexit
+
+import config
+import cookielib
+import threadedhttp
+
+
+# global variables
+
+useragent = 'Pywikipediabot/2.0' # This should include some global version string
+numthreads = 1
+threads = []
+
+connection_pool = threadedhttp.ConnectionPool()
+http_queue = Queue.Queue()
+
+cookie_jar = threadedhttp.LockableCookieJar(
+                 config.datafilepath("%s-%s-%s.lwp"
+                        % (config.family,
+                           config.mylang,
+                           config.usernames[config.family][config.mylang])))
+try:
+    cookie_jar.load()
+except (IOError, cookielib.LoadError):
+    logging.debug("Loading cookies failed.")
+else:
+    logging.debug("Loaded cookies from file.")
+
+
+# Build up HttpProcessors
+logging.info('Starting %i threads...' % numthreads)
+for i in range(numthreads):
+    proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool)
+    proc.setDaemon(True)
+    threads.append(proc)
+    proc.start()
+
+# Prepare flush on quit
+def _flush():
+    for i in threads:
+        http_queue.put(None)
+    logging.info('Waiting for threads to finish... ')
+    for i in threads:
+        i.join()
+    logging.debug('All threads finished.')
+atexit.register(_flush)
+
+# export cookie_jar to global namespace
+import pywikibot
+pywikibot.cookie_jar = cookie_jar
+
+def request(site, uri, *args, **kwargs):
+    """Queue a request to be submitted to Site.
+
+    All parameters not listed below are the same as
+    L{httplib2.Http.request}, but the uri is relative
+
+    @param site: The Site to connect to
+    @return: The received data (a unicode string).
+    """
+    baseuri = "%s://%s/" % (site.protocol(), site.hostname())
+    uri = urlparse.urljoin(baseuri, uri)
+
+    request = threadedhttp.HttpRequest(uri, *args, **kwargs)
+    http_queue.put(request)
+    request.lock.acquire()
+
+    #do some error correcting stuff
+
+    #if all else fails
+    if isinstance(request.data, Exception):
+        raise request.data
+
+    if request.data[0].status != 200:
+        logging.warning("Http response status %s" % request.data[0].status)
+
+    return request.data[1]
Added: branches/rewrite/pywikibot/comms/threadedhttp.py
===================================================================
--- branches/rewrite/pywikibot/comms/threadedhttp.py	                        (rev 0)
+++ branches/rewrite/pywikibot/comms/threadedhttp.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -0,0 +1,428 @@
+# -*- coding: utf-8  -*-
+""" Httplib2 threaded cookie layer
+
+This class extends httplib2, adding support for:
+    - Cookies, guarded for cross-site redirects
+    - Thread safe ConnectionPool and LockableCookieJar classes
+    - HttpProcessor thread class
+    - HttpRequest object
+
+"""
+
+# (C) 2007 Pywikipedia bot team, 2007
+# (C) 2006 Httplib 2 team, 2006
+# (C) 2007 Metaweb Technologies, Inc.
+# 
+# Partially distributed under the MIT license
+# Partially distributed under Metaweb Technologies, Incs license
+#    which is compatible with the MIT license
+
+__version__ = '$Id: threadedhttp.py 4984 2008-02-06 16:21:18Z russblau $'
+__docformat__ = 'epytext'
+
+# standard python libraries
+import re
+import threading
+import time
+import logging
+
+import urllib
+import cookielib
+
+# easy_install safeguarded dependencies
+import pkg_resources
+pkg_resources.require("httplib2")
+import httplib2
+
+
+class ConnectionPool(object):
+    """A thread-safe connection pool."""
+
+    def __init__(self, maxnum=5):
+        """
+        @param maxnum: Maximum number of connections per identifier.
+                       The pool drops excessive connections added.
+
+        """
+        logging.debug("Creating connection pool.")
+        self.connections = {}
+        self.lock = threading.Lock()
+        self.maxnum = maxnum
+
+    def __del__(self):
+        """Destructor to close all connections in the pool."""
+        self.lock.acquire()
+        try:
+            logging.debug("Closing connection pool (%s connections)"
+                         % len(self.connections))
+            for key in self.connections:
+                for connection in self.connections[key]:
+                    connection.close()
+        finally:
+            self.lock.release()
+
+    def __repr__(self):
+        return self.connections.__repr__()
+
+    def pop_connection(self, identifier):
+        """Get a connection from identifier's connection pool.
+
+        @param identifier: The pool identifier
+        @return: A connection object if found, None otherwise
+
+        """
+        self.lock.acquire()
+        try:
+            if identifier in self.connections:
+                if len(self.connections[identifier]) > 0:
+                    logging.debug("Retrieved connection from '%s' pool."
+                                  % identifier)
+                    return self.connections[identifier].pop()
+            return None
+        finally:
+            self.lock.release()
+
+    def push_connection(self, identifier, connection):
+        """Add a connection to identifier's connection pool.
+
+        @param identifier: The pool identifier
+        @param connection: The connection to add to the pool
+
+        """
+        self.lock.acquire()
+        try:
+            if identifier not in self.connections:
+                self.connections[identifier] = []
+
+            if len(self.connections[identifier]) == self.maxnum:
+                logging.debug('closing %s connection %r'
+                              % (identifier, connection))
+                connection.close()
+                del connection
+            else:
+                self.connections[identifier].append(connection)
+        finally:
+            self.lock.release()
+
+
+class LockableCookieJar(cookielib.LWPCookieJar):
+    """CookieJar with integrated Lock object."""
+    def __init__(self, *args, **kwargs):
+        cookielib.LWPCookieJar.__init__(self, *args, **kwargs)
+        self.lock = threading.Lock()
+
+
+class Http(httplib2.Http):
+    """Subclass of httplib2.Http that stores cookies.
+
+    Overrides httplib2's internal redirect support to prevent cookies being
+    eaten by the wrong sites.
+
+    """
+    def __init__(self, *args, **kwargs):
+        """
+        @param cookiejar: (optional) CookieJar to use. A new one will be
+               used when not supplied.
+        @param connection_pool: (optional) Connection pool to use. A new one
+               will be used when not supplied.
+        @param max_redirects: (optional) The maximum number of redirects to
+               follow. 5 is default.
+
+        """
+        try:
+            self.cookiejar = kwargs.pop('cookiejar')
+        except KeyError:
+            self.cookiejar = LockableCookieJar()
+        try:
+            self.connection_pool = kwargs.pop('connection_pool')
+        except KeyError:
+            self.connection_pool = ConnectionPool()
+        self.max_redirects = kwargs.pop('max_redirects', 5)
+        httplib2.Http.__init__(self, *args, **kwargs)
+
+    def request(self, uri, method="GET", body=None, headers=None,
+                max_redirects=None, connection_type=None):
+        """Start an HTTP request.
+
+        @param uri: The uri to retrieve
+        @param method: (optional) The HTTP method to use. Default is 'GET'
+        @param body: (optional) The request body. Default is no body.
+        @param headers: (optional) Additional headers to send. Defaults
+               include C{connection: keep-alive}, C{user-agent} and
+               C{content-type}.
+        @param max_redirects: (optional) The maximum number of redirects to
+               use for this request. The class instance's max_redirects is
+               default
+        @param connection_type: (optional) see L{httplib2.Http.request}
+
+        @return: (response, content) tuple
+
+        """ 
+        if max_redirects is None:
+            max_redirects = self.max_redirects
+        if headers is None:
+            headers = {}
+        # Prepare headers
+        headers.pop('cookie', None)
+        req = DummyRequest(uri, headers)
+        self.cookiejar.lock.acquire()
+        try:
+            self.cookiejar.add_cookie_header(req)
+        finally:
+            self.cookiejar.lock.release()
+        headers = req.headers
+
+        # Wikimedia squids: add connection: keep-alive to request headers
+        # unless overridden
+        headers['connection'] = headers.pop('connection', 'keep-alive')
+
+        # determine connection pool key and fetch connection
+        (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(
+                                                        httplib2.iri2uri(uri))
+        conn_key = scheme+":"+authority
+
+        connection = self.connection_pool.pop_connection(conn_key)
+        if connection is not None:
+            self.connections[conn_key] = connection
+
+        # Redirect hack: we want to regulate redirects
+        follow_redirects = self.follow_redirects
+        self.follow_redirects = False
+        logging.debug('%r' % (
+            (uri, method, body, headers, max_redirects, connection_type),))
+        try:
+            (response, content) = httplib2.Http.request(
+                                    self, uri, method, body, headers,
+                                    max_redirects, connection_type)
+        except Exception, e: # what types?
+            # return exception instance to be retrieved by the calling thread
+            return e
+        self.follow_redirects = follow_redirects
+
+        # return connection to pool
+        self.connection_pool.push_connection(conn_key,
+                                             self.connections[conn_key])
+        del self.connections[conn_key]
+
+        # First write cookies 
+        self.cookiejar.lock.acquire()
+        try:           
+            self.cookiejar.extract_cookies(DummyResponse(response), req)
+        finally:
+            self.cookiejar.lock.release()
+
+        # Check for possible redirects
+        redirectable_response = ((response.status == 303) or
+                                 (response.status in [300, 301, 302, 307] and
+                                    method in ["GET", "HEAD"]))
+        if self.follow_redirects and (max_redirects > 0) \
+                                 and redirectable_response:
+            (response, content) = self._follow_redirect(
+                uri, method, body, headers, response, content, max_redirects)
+
+        return (response, content)
+
+    def _follow_redirect(self, uri, method, body, headers, response,
+                         content, max_redirects):
+        """Internal function to follow a redirect recieved by L{request}"""
+        (scheme, authority, absolute_uri, defrag_uri) = httplib2.urlnorm(
+                                                          httplib2.iri2uri(uri))
+        if self.cache:
+            cachekey = defrag_uri
+        else:
+            cachekey = None
+
+        # Pick out the location header and basically start from the beginning
+        # remembering first to strip the ETag header and decrement our 'depth'
+        if not response.has_key('location') and response.status != 300:
+            raise httplib2.RedirectMissingLocation(
+                "Redirected but the response is missing a Location: header.",
+                response, content)
+        # Fix-up relative redirects (which violate an RFC 2616 MUST)
+        if response.has_key('location'):
+            location = response['location']
+            (scheme, authority, path, query, fragment) = httplib2.parse_uri(
+                                                                    location)
+            if authority == None:
+                response['location'] = httplib2.urlparse.urljoin(uri, location)
+                logging.debug('Relative redirect: changed [%s] to [%s]'
+                              % (location, response['location']))
+        if response.status == 301 and method in ["GET", "HEAD"]:
+            response['-x-permanent-redirect-url'] = response['location']
+            if not response.has_key('content-location'):
+                response['content-location'] = absolute_uri 
+            httplib2._updateCache(headers, response, content, self.cache,
+                                  cachekey)
+
+        headers.pop('if-none-match', None)
+        headers.pop('if-modified-since', None)
+
+        if response.has_key('location'):
+            location = response['location']
+            redirect_method = ((response.status == 303) and
+                               (method not in ["GET", "HEAD"])
+                               ) and "GET" or method
+            return self.request(location, redirect_method, body=body,
+                                headers=headers,
+                                max_redirects=max_redirects - 1)
+        else:
+            raise RedirectLimit(
+                "Redirected more times than redirection_limit allows.",
+                response, content)
+
+
+class HttpRequest(object):
+    """Object wrapper for HTTP requests that need to block origin thread.
+
+    Usage:
+
+    >>> request = HttpRequest('http://www.google.com')
+    >>> queue.put(request)
+    >>> request.lock.acquire()
+    >>> print request.data
+
+    C{request.lock.acquire()} will block until the data is available.
+
+    """
+    def __init__(self, *args, **kwargs):
+        """See C{Http.request} for parameters."""
+        self.args = args
+        self.kwargs = kwargs
+        self.data = None
+        self.lock = threading.Semaphore(0)
+
+
+class HttpProcessor(threading.Thread):
+    """Thread object to spawn multiple HTTP connection threads."""
+    def __init__(self, queue, cookiejar, connection_pool):
+        """
+        @param queue: The C{Queue.Queue} object that contains L{HttpRequest}
+               objects.
+        @param cookiejar: The C{LockableCookieJar} cookie object to share among
+               requests.
+        @param connection_pool: The C{ConnectionPool} object which contains
+               connections to share among requests.
+
+        """
+        threading.Thread.__init__(self)
+        self.queue = queue
+        self.http = Http(cookiejar=cookiejar, connection_pool=connection_pool)
+
+    def run(self):
+        # The Queue item is expected to either an HttpRequest object
+        # or None (to shut down the thread)
+        logging.debug('Thread started, waiting for requests.')
+        while (True):
+            item = self.queue.get()
+            if item is None:
+                logging.debug('Shutting down thread.')
+                return
+            try:
+                item.data = self.http.request(*item.args, **item.kwargs)
+            finally:
+                if item.lock:
+                    item.lock.release()
+
+
+# Metaweb Technologies, Inc. License:
+ # ========================================================================
+ # The following dummy classes are:
+ # ========================================================================
+ # Copyright (c) 2007, Metaweb Technologies, Inc.
+ # All rights reserved.
+ #
+ # Redistribution and use in source and binary forms, with or without
+ # modification, are permitted provided that the following conditions
+ # are met:
+ #     * Redistributions of source code must retain the above copyright
+ #       notice, this list of conditions and the following disclaimer.
+ #     * Redistributions in binary form must reproduce the above
+ #       copyright notice, this list of conditions and the following
+ #       disclaimer in the documentation and/or other materials provided
+ #       with the distribution.
+ # 
+ # THIS SOFTWARE IS PROVIDED BY METAWEB TECHNOLOGIES AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL METAWEB
+ # TECHNOLOGIES OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ # POSSIBILITY OF SUCH DAMAGE.
+ # ========================================================================
+
+class DummyRequest(object):
+    """Simulated urllib2.Request object for httplib2
+       implements only what's necessary for cookielib.CookieJar to work
+    """
+    def __init__(self, url, headers=None):
+        self.url = url
+        self.headers = headers
+        self.origin_req_host = cookielib.request_host(self)
+        self.type, r = urllib.splittype(url)
+        self.host, r = urllib.splithost(r)
+        if self.host:
+            self.host = urllib.unquote(self.host)
+
+    def get_full_url(self):
+        return self.url
+
+    def get_origin_req_host(self):
+        # TODO to match urllib2 this should be different for redirects
+        return self.origin_req_host
+
+    def get_type(self):
+        return self.type
+
+    def get_host(self):
+        return self.host
+
+    def get_header(self, key, default=None):
+        return self.headers.get(key.lower(), default)
+
+    def has_header(self, key):
+        return key in self.headers
+
+    def add_unredirected_header(self, key, val):
+        # TODO this header should not be sent on redirect
+        self.headers[key.lower()] = val
+
+    def is_unverifiable(self):
+        # TODO to match urllib2, this should be set to True when the
+        #  request is the result of a redirect
+        return False
+
+class DummyResponse(object):
+    """Simulated urllib2.Request object for httplib2
+       implements only what's necessary for cookielib.CookieJar to work
+    """
+    def __init__(self, response):
+        self.response = response
+
+    def info(self):
+        return DummyMessage(self.response)
+
+class DummyMessage(object):
+    """Simulated mimetools.Message object for httplib2
+       implements only what's necessary for cookielib.CookieJar to work
+    """
+    def __init__(self, response):
+        self.response = response
+
+    def getheaders(self, k):
+        k = k.lower()
+        v = self.response.get(k.lower(), None)
+        if k not in self.response:
+            return []
+        #return self.response[k].split(re.compile(',\s*'))
+
+        # httplib2 joins multiple values for the same header
+        #  using ','.  but the netscape cookie format uses ','
+        #  as part of the expires= date format.  so we have
+        #  to split carefully here - header.split(',') won't do it.
+        HEADERVAL= re.compile(r'\s*(([^,]|(,\s*\d))+)')
+        return [h[0] for h in HEADERVAL.findall(self.response[k])]
Modified: branches/rewrite/pywikibot/config.py
===================================================================
--- branches/rewrite/pywikibot/config.py	2008-04-17 16:43:25 UTC (rev 5229)
+++ branches/rewrite/pywikibot/config.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -282,16 +282,16 @@
 # 'minthrottle' seconds. This can be lengthened if the server is slow,
 # but never more than 'maxthrottle' seconds. However - if you are running
 # more than one bot in parallel the times are lengthened.
-minthrottle = 1
+# By default, the get_throttle is turned off, and 'maxlag' is used to
+# control the rate of server access.  Set minthrottle to non-zero to use a
+# throttle on read access.
+minthrottle = 0
 maxthrottle = 10
-# Slow down the robot such that it never makes a second change within
+# Slow down the robot such that it never makes a second page edit within
 # 'put_throttle' seconds.
 put_throttle = 10
-# By default, the get_throttle is turned off, and 'maxlag' is used to
-# control the rate of server access.  Set this to non-zero to use a throttle
-# on read access.
-get_throttle = 0
+
 # Sometimes you want to know when a delay is inserted. If a delay is larger
 # than 'noisysleep' seconds, it is logged on the screen.
 noisysleep = 3.0
@@ -299,9 +299,11 @@
 # Defer bot edits during periods of database server lag.  For details, see
 # http://www.mediawiki.org/wiki/Maxlag_parameter
 # You can set this variable to a number of seconds, or to None (or 0) to
-# disable this behavior.
-# It is recommended that you do not change this parameter unless you know
-# what you are doing and have a good reason for it!
+# disable this behavior.  Higher values are more aggressive in seeking
+# access to the wiki.
+# Non-Wikimedia wikis may or may not support this feature; for families
+# that do not use it, it is recommended to set minthrottle (above) to
+# at least 1 second.
 maxlag = 5
# Maximum of pages which can be retrieved by special pages. Increase this if
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py	2008-04-17 16:43:25 UTC (rev 5229)
+++ branches/rewrite/pywikibot/data/api.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -11,7 +11,6 @@
from UserDict import DictMixin
 from datetime import datetime, timedelta
-import http
 import simplejson as json
 import logging
 import re
@@ -65,7 +64,7 @@
>>> r = Request(site=mysite, action="query", meta="userinfo")
     >>> # This is equivalent to
-    >>> # http://%5Bpath%5D/api.php?action=query&meta=userinfo&format=json
+    >>> # http://%7Bpath%7D/api.php?action=query&meta=userinfo&format=json
     >>> # r.data is undefined until request is submitted
     >>> print r.data
     Traceback (most recent call last):
@@ -75,8 +74,9 @@
     >>> r['meta'] = "userinfo|siteinfo"
     >>> # add a new parameter
     >>> r['siprop'] = "namespaces"
+    >>> # note that "uiprop" param gets added automatically
     >>> r.params
-    {'action': 'query', 'meta': 'userinfo|siteinfo', 'maxlag': '5', 'siprop': 'namespaces', 'format': 'json'}
+    {'maxlag': '5', 'format': 'json', 'meta': 'userinfo|siteinfo', 'action': 'query', 'siprop': 'namespaces', 'uiprop': 'blockinfo|hasmsg'}
     >>> data = r.submit()
     >>> type(data)
     <type 'dict'>
@@ -137,6 +137,7 @@
         @return:  The data retrieved from api.php (a dict)
"""
+        from pywikibot.comms import http
         if self.params['format'] != 'json':
             raise TypeError("Query format '%s' cannot be parsed."
                             % self.params['format'])
@@ -292,7 +293,7 @@
         # FIXME: this won't handle generators with <redirlinks> subelements
         #        correctly yet
         while True:
-            self.site.get_throttle()
+            self.site.throttle()
             self.data = self.request.submit()
             if not self.data or not isinstance(self.data, dict):
                 raise StopIteration
@@ -394,7 +395,7 @@
         """Iterate objects for elements found in response."""
         # this looks for the resultkey ''inside'' a <page> entry
         while True:
-            self.site.get_throttle()
+            self.site.throttle()
             self.data = self.request.submit()
             if not self.data or not isinstance(self.data, dict):
                 raise StopIteration
@@ -459,7 +460,10 @@
         self.username = login_result['login']['lgusername']
         return "\n".join(cookies)
+    def storecookiedata(self, data):
+        pywikibot.cookie_jar.save()
+
 if __name__ == "__main__":
     from pywikibot import Site
     mysite = Site("en", "wikipedia")
@@ -468,5 +472,5 @@
         import doctest
         doctest.testmod()
     _test()
+    pywikibot.stopme()
-
Deleted: branches/rewrite/pywikibot/data/http.py
===================================================================
--- branches/rewrite/pywikibot/data/http.py	2008-04-17 16:43:25 UTC (rev 5229)
+++ branches/rewrite/pywikibot/data/http.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -1,84 +0,0 @@
-# -*- coding: utf-8  -*-
-"""
-Basic HTTP access interface.
-
-This module handles communication between the bot and the HTTP threads.
-
-This module is responsible for
-    - Setting up a connection pool
-    - Providing a (blocking) interface for HTTP requests
-    - Translate site objects with query strings into urls
-    - Urlencoding all data
-    - Basic HTTP error handling
-"""
-
-#
-# (C) Pywikipedia bot team, 2007
-#
-# Distributed under the terms of the MIT license.
-#
-
-__version__ = '$Id: $'
-__docformat__ = 'epytext'
-
-import Queue
-import urllib
-import urlparse
-import logging
-import atexit
-
-import threadedhttp
-
-
-# global variables
-
-useragent = 'Pywikipediabot/2.0' # This should include some global version string
-numthreads = 1
-threads = []
-
-connection_pool = threadedhttp.ConnectionPool()
-cookie_jar = threadedhttp.LockableCookieJar()
-http_queue = Queue.Queue()
-
-# Build up HttpProcessors
-logging.info('Starting %i threads...' % numthreads)
-for i in range(numthreads):
-    proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool)
-    threads.append(proc)
-    proc.start()
-
-# Prepare flush on quit
-def _flush():
-    for i in threads:
-        http_queue.put(None)
-    logging.info('Waiting for threads to finish... ')
-    for i in threads:
-        i.join()
-atexit.register(_flush)
-        
-def request(site, uri, *args, **kwargs):
-    """Queue a request to be submitted to Site.
-
-    All parameters not listed below are the same as
-    L{httplib2.Http.request}, but the uri is relative
-
-    @param site: The Site to connect to
-    @return: The received data (a unicode string).
-    """
-    baseuri = "%s://%s/" % (site.protocol(), site.hostname())
-    uri = urlparse.urljoin(baseuri, uri)
-
-    request = threadedhttp.HttpRequest(uri, *args, **kwargs)
-    http_queue.put(request)
-    request.lock.acquire()
-
-    #do some error correcting stuff
-
-    #if all else fails
-    if isinstance(request.data, Exception):
-        raise request.data
-
-    if request.data[0].status != 200:
-        logging.warning("Http response status %s" % request.data[0].status)
-
-    return request.data[1]
Deleted: branches/rewrite/pywikibot/data/threadedhttp.py
===================================================================
--- branches/rewrite/pywikibot/data/threadedhttp.py	2008-04-17 16:43:25 UTC (rev 5229)
+++ branches/rewrite/pywikibot/data/threadedhttp.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -1,414 +0,0 @@
-# -*- coding: utf-8  -*-
-""" Httplib2 threaded cookie layer
-
-This class extends httplib2, adding support for:
-    - Cookies, guarded for cross-site redirects
-    - Thread safe ConnectionPool and LockableCookieJar classes
-    - HttpProcessor thread class
-    - HttpRequest object
-
-"""
-
-# (C) 2007 Pywikipedia bot team, 2007
-# (C) 2006 Httplib 2 team, 2006
-# (C) 2007 Metaweb Technologies, Inc.
-# 
-# Partially distributed under the MIT license
-# Partially distributed under Metaweb Technologies, Incs license
-#    which is compatible with the MIT license
-
-__version__ = '$Id$'
-__docformat__ = 'epytext'
-
-# standard python libraries
-import re
-import threading
-import time
-import logging
-
-import urllib
-import cookielib
-
-# easy_install safeguarded dependencies
-import pkg_resources
-pkg_resources.require("httplib2")
-import httplib2
-
-
-class ConnectionPool(object):
-    """A thread-safe connection pool."""
-    
-    def __init__(self, maxnum=5):
-        """
-        @param maxnum: Maximum number of connections per identifier.
-                       The pool drops excessive connections added.
-
-        """
-        self.connections = {}
-        self.lock = threading.Lock()
-        self.maxnum = maxnum
-    
-    def __del__(self):
-        """Destructor to close all connections in the pool."""
-        self.lock.acquire()
-        try:
-            for key in self.connections:
-                for connection in self.connections[key]:
-                    connection.close()
-        finally:
-            self.lock.release()
-            
-    def __repr__(self):
-        return self.connections.__repr__()
-        
-    def pop_connection(self, identifier):
-        """Get a connection from identifier's connection pool.
-
-        @param identifier: The pool identifier
-        @return: A connection object if found, None otherwise
-        
-        """
-        self.lock.acquire()
-        try:
-            if identifier in self.connections:
-                if len(self.connections[identifier]) > 0:
-                    return self.connections[identifier].pop()
-            return None
-        finally:
-            self.lock.release()
-            
-    def push_connection(self, identifier, connection):
-        """Add a connection to identifier's connection pool.
-
-        @param identifier: The pool identifier
-        @param connection: The connection to add to the pool
-
-        """
-        self.lock.acquire()
-        try:
-            if identifier not in self.connections:
-                self.connections[identifier] = []
-            
-            if len(self.connections[identifier]) == self.maxnum:
-                logging.debug('closing %s connection %r' % (identifier, connection))
-                connection.close()
-                del connection
-            else:
-                self.connections[identifier].append(connection)
-        finally:
-            self.lock.release()
-
-class LockableCookieJar(cookielib.CookieJar):
-    """CookieJar with integrated Lock object."""
-    def __init__(self, *args, **kwargs):
-        cookielib.CookieJar.__init__(self, *args, **kwargs)
-        self.lock = threading.Lock()
-
-class Http(httplib2.Http):
-    """Subclass of httplib2.Http that stores cookies.
-
-    Overrides httplib2's internal redirect support to prevent cookies being
-    eaten by the wrong sites.
-
-    """
-    def __init__(self, *args, **kwargs):
-        """
-        @param cookiejar: (optional) CookieJar to use. A new one will be
-               used when not supplied.
-        @param connection_pool: (optional) Connection pool to use. A new one
-               will be used when not supplied.
-        @param max_redirects: (optional) The maximum number of redirects to
-               follow. 5 is default.
-
-        """
-        self.cookiejar = kwargs.pop('cookiejar', LockableCookieJar())
-        self.connection_pool = kwargs.pop('connection_pool', ConnectionPool())
-        self.max_redirects = kwargs.pop('max_redirects', 5)
-        httplib2.Http.__init__(self, *args, **kwargs)
-
-    def request(self, uri, method="GET", body=None, headers=None,
-                max_redirects=None, connection_type=None):
-        """Start an HTTP request.
-
-        @param uri: The uri to retrieve
-        @param method: (optional) The HTTP method to use. Default is 'GET'
-        @param body: (optional) The request body. Default is no body.
-        @param headers: (optional) Additional headers to send. Defaults
-               include C{connection: keep-alive}, C{user-agent} and
-               C{content-type}.
-        @param max_redirects: (optional) The maximum number of redirects to
-               use for this request. The class instance's max_redirects is
-               default
-        @param connection_type: (optional) see L{httplib2.Http.request}
-
-        @return: (response, content) tuple
-        
-        """ 
-        if max_redirects is None:
-            max_redirects = self.max_redirects
-        if headers is None:
-            headers = {}
-        # Prepare headers
-        headers.pop('cookie', None)
-        req = DummyRequest(uri, headers)
-        self.cookiejar.lock.acquire()
-        try:
-            self.cookiejar.add_cookie_header(req)
-        finally:
-            self.cookiejar.lock.release()
-        headers = req.headers
-        
-        # Wikimedia squids: add connection: keep-alive to request headers
-        # unless overridden
-        headers['connection'] = headers.pop('connection', 'keep-alive')
-        
-        # determine connection pool key and fetch connection
-        (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(
-                                                        httplib2.iri2uri(uri))
-        conn_key = scheme+":"+authority
-        
-        connection = self.connection_pool.pop_connection(conn_key)
-        if connection is not None:
-            self.connections[conn_key] = connection
-        
-        # Redirect hack: we want to regulate redirects
-        follow_redirects = self.follow_redirects
-        self.follow_redirects = False
-        logging.debug('%r' % (
-            (uri, method, body, headers, max_redirects, connection_type),))
-        try:
-            (response, content) = httplib2.Http.request(
-                                    self, uri, method, body, headers,
-                                    max_redirects, connection_type)
-        except Exception, e: # what types?
-            # return exception instance to be retrieved by the calling thread
-            return e
-        self.follow_redirects = follow_redirects
-        
-        # return connection to pool
-        self.connection_pool.push_connection(conn_key,
-                                             self.connections[conn_key])
-        del self.connections[conn_key]
-                
-        # First write cookies 
-        self.cookiejar.lock.acquire()
-        try:           
-            self.cookiejar.extract_cookies(DummyResponse(response), req)
-        finally:
-            self.cookiejar.lock.release()
-        
-        # Check for possible redirects
-        redirectable_response = ((response.status == 303) or
-                                 (response.status in [300, 301, 302, 307] and
-                                    method in ["GET", "HEAD"]))
-        if self.follow_redirects and (max_redirects > 0) \
-                                 and redirectable_response:
-            (response, content) = self._follow_redirect(
-                uri, method, body, headers, response, content, max_redirects)
-
-        return (response, content)
-
-    def _follow_redirect(self, uri, method, body, headers, response,
-                         content, max_redirects):
-        """Internal function to follow a redirect recieved by L{request}"""
-        (scheme, authority, absolute_uri, defrag_uri) = httplib2.urlnorm(
-                                                          httplib2.iri2uri(uri))
-        if self.cache:
-            cachekey = defrag_uri
-        else:
-            cachekey = None
-
-        # Pick out the location header and basically start from the beginning
-        # remembering first to strip the ETag header and decrement our 'depth'
-        if not response.has_key('location') and response.status != 300:
-            raise httplib2.RedirectMissingLocation(
-                "Redirected but the response is missing a Location: header.",
-                response, content)
-        # Fix-up relative redirects (which violate an RFC 2616 MUST)
-        if response.has_key('location'):
-            location = response['location']
-            (scheme, authority, path, query, fragment) = httplib2.parse_uri(
-                                                                    location)
-            if authority == None:
-                response['location'] = httplib2.urlparse.urljoin(uri, location)
-                logging.debug('Relative redirect: changed [%s] to [%s]'
-                              % (location, response['location']))
-        if response.status == 301 and method in ["GET", "HEAD"]:
-            response['-x-permanent-redirect-url'] = response['location']
-            if not response.has_key('content-location'):
-                response['content-location'] = absolute_uri 
-            httplib2._updateCache(headers, response, content, self.cache,
-                                  cachekey)
-        
-        headers.pop('if-none-match', None)
-        headers.pop('if-modified-since', None)
-        
-        if response.has_key('location'):
-            location = response['location']
-            redirect_method = ((response.status == 303) and
-                               (method not in ["GET", "HEAD"])
-                               ) and "GET" or method
-            return self.request(location, redirect_method, body=body,
-                                headers=headers,
-                                max_redirects=max_redirects - 1)
-        else:
-            raise RedirectLimit(
-                "Redirected more times than redirection_limit allows.",
-                response, content)
-
-
-class HttpRequest(object):
-    """Object wrapper for HTTP requests that need to block origin thread.
-
-    Usage:
-
-    >>> request = HttpRequest('http://www.google.com')
-    >>> queue.put(request)
-    >>> request.lock.acquire()
-    >>> print request.data
-    
-    C{request.lock.acquire()} will block until the data is available.
-    
-    """
-    def __init__(self, *args, **kwargs):
-        """See C{Http.request} for parameters."""
-        self.args = args
-        self.kwargs = kwargs
-        self.data = None
-        self.lock = threading.Semaphore(0)
-
-
-class HttpProcessor(threading.Thread):
-    """Thread object to spawn multiple HTTP connection threads."""
-    def __init__(self, queue, cookiejar, connection_pool):
-        """
-        @param queue: The C{Queue.Queue} object that contains L{HttpRequest}
-               objects.
-        @param cookiejar: The C{LockableCookieJar} cookie object to share among
-               requests.
-        @param connection_pool: The C{ConnectionPool} object which contains
-               connections to share among requests.
-
-        """
-        threading.Thread.__init__(self)
-        self.queue = queue
-        self.http = Http(cookiejar=cookiejar, connection_pool=connection_pool)
-        
-    def run(self):
-        # The Queue item is expected to either an HttpRequest object
-        # or None (to shut down the thread)
-        logging.debug('Thread started, waiting for requests.')
-        while (True):
-            item = self.queue.get()
-            if item is None:
-                logging.debug('Shutting down thread.')
-                return
-            try:
-                item.data = self.http.request(*item.args, **item.kwargs)
-            finally:
-                if item.lock:
-                    item.lock.release()
-
-
-# Metaweb Technologies, Inc. License:
- # ========================================================================
- # The following dummy classes are:
- # ========================================================================
- # Copyright (c) 2007, Metaweb Technologies, Inc.
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions
- # are met:
- #     * Redistributions of source code must retain the above copyright
- #       notice, this list of conditions and the following disclaimer.
- #     * Redistributions in binary form must reproduce the above
- #       copyright notice, this list of conditions and the following
- #       disclaimer in the documentation and/or other materials provided
- #       with the distribution.
- # 
- # THIS SOFTWARE IS PROVIDED BY METAWEB TECHNOLOGIES AND CONTRIBUTORS
- # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL METAWEB
- # TECHNOLOGIES OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- # POSSIBILITY OF SUCH DAMAGE.
- # ========================================================================
-
-class DummyRequest(object):
-    """Simulated urllib2.Request object for httplib2
-       implements only what's necessary for cookielib.CookieJar to work
-    """
-    def __init__(self, url, headers=None):
-        self.url = url
-        self.headers = headers
-        self.origin_req_host = cookielib.request_host(self)
-        self.type, r = urllib.splittype(url)
-        self.host, r = urllib.splithost(r)
-        if self.host:
-            self.host = urllib.unquote(self.host)
-
-    def get_full_url(self):
-        return self.url
-
-    def get_origin_req_host(self):
-        # TODO to match urllib2 this should be different for redirects
-        return self.origin_req_host
-
-    def get_type(self):
-        return self.type
-
-    def get_host(self):
-        return self.host
-
-    def get_header(self, key, default=None):
-        return self.headers.get(key.lower(), default)
-
-    def has_header(self, key):
-        return key in self.headers
-
-    def add_unredirected_header(self, key, val):
-        # TODO this header should not be sent on redirect
-        self.headers[key.lower()] = val
-
-    def is_unverifiable(self):
-        # TODO to match urllib2, this should be set to True when the
-        #  request is the result of a redirect
-        return False
-
-class DummyResponse(object):
-    """Simulated urllib2.Request object for httplib2
-       implements only what's necessary for cookielib.CookieJar to work
-    """
-    def __init__(self, response):
-        self.response = response
-
-    def info(self):
-        return DummyMessage(self.response)
-
-class DummyMessage(object):
-    """Simulated mimetools.Message object for httplib2
-       implements only what's necessary for cookielib.CookieJar to work
-    """
-    def __init__(self, response):
-        self.response = response
-
-    def getheaders(self, k):
-        k = k.lower()
-        v = self.response.get(k.lower(), None)
-        if k not in self.response:
-            return []
-        #return self.response[k].split(re.compile(',\s*'))
-
-        # httplib2 joins multiple values for the same header
-        #  using ','.  but the netscape cookie format uses ','
-        #  as part of the expires= date format.  so we have
-        #  to split carefully here - header.split(',') won't do it.
-        HEADERVAL= re.compile(r'\s*(([^,]|(,\s*\d))+)')
-        return [h[0] for h in HEADERVAL.findall(self.response[k])]
Modified: branches/rewrite/pywikibot/login.py
===================================================================
--- branches/rewrite/pywikibot/login.py	2008-04-17 16:43:25 UTC (rev 5229)
+++ branches/rewrite/pywikibot/login.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -26,15 +26,15 @@
-force       Ignores if the user is already logged in, and tries to log in.
-If not given as parameter, the script will ask for your username and password
-(password entry will be hidden), log in to your home wiki using this
-combination, and store the resulting cookies (containing your password hash,
-so keep it secured!) in a file in the login-data subdirectory.
+If not given as parameter, the script will ask for your username and
+password (password entry will be hidden), log in to your home wiki using
+this combination, and store the resulting cookies (containing your password
+hash, so keep it secured!) in a file in the data subdirectory.
-All scripts in this library will be looking for this cookie file and will use the
-login information if it is present.
+All scripts in this library will be looking for this cookie file and will
+use the login information if it is present.
-To log out, throw away the XX-login.data file that is created in the login-data
+To log out, throw away the *.lwp file that is created in the data
 subdirectory.
 """
 #
@@ -169,7 +169,7 @@
                 if match:
                     id = match.group('id')
                     if not config.solve_captcha:
-                        raise wikipedia.CaptchaError(id)
+                        raise pywikibot.CaptchaError(id)
                     url = self.site.protocol() + '://' + self.site.hostname() + self.site.captcha_image_address(id)
                     answer = wikipedia.ui.askForCaptcha(url)
                     return self.getCookie(remember = remember, captchaId = id, captchaAnswer = answer)
@@ -255,7 +255,7 @@
     for arg in wikipedia.handleArgs():
         if arg.startswith("-pass"):
             if len(arg) == 5:
-                password = wikipedia.input(u'Password for all accounts:', password = True)
+                password = pywikibot.input(u'Password for all accounts:', password = True)
             else:
                 password = arg[6:]
         elif arg == "-sysop":
@@ -274,9 +274,9 @@
             namedict = config.usernames
         for familyName in namedict.iterkeys():
             for lang in namedict[familyName].iterkeys():
-                site = wikipedia.getSite(code=lang, fam=familyName)
+                site = pywikibot.getSite(code=lang, fam=familyName)
                 if not forceLogin and site.loggedInAs(sysop = sysop) != None:
-                    wikipedia.output(u'Already logged in on %s' % site)
+                    pywikibot.output(u'Already logged in on %s' % site)
                 else:
                     loginMan = LoginManager(password, sysop = sysop, site = site)
                     loginMan.login()
@@ -288,4 +288,4 @@
     try:
         main()
     finally:
-        wikipedia.stopme()
+        pywikibot.stopme()
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py	2008-04-17 16:43:25 UTC (rev 5229)
+++ branches/rewrite/pywikibot/page.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -93,10 +93,10 @@
             # copy all of source's attributes to this object
             self.__dict__ = source.__dict__
         elif isinstance(source, Link):
-            self._site = link.site
-            self._section = link.section
-            self._ns = link.namespace
-            self._title = link.title
+            self._site = source.site
+            self._section = source.section
+            self._ns = source.namespace
+            self._title = source.title
             # reassemble the canonical title from components
             if self._ns:
                 self._title = "%s:%s" % (self.site().namespace(self._ns),
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py	2008-04-17 16:43:25 UTC (rev 5229)
+++ branches/rewrite/pywikibot/site.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -103,15 +103,8 @@
         self._pagemutex = threading.Lock()
         self._locked_pages = []
-        pt_min = min(config.minthrottle, config.put_throttle)
-        self.put_throttle = Throttle(self, pt_min, config.maxthrottle,
-                                     verbosedelay=True)
-        self.put_throttle.setDelay(config.put_throttle)
+        self.throttle = Throttle(self, multiplydelay=True, verbosedelay=True)
-        gt_min = min(config.minthrottle, config.get_throttle)
-        self.get_throttle = Throttle(self, gt_min, config.maxthrottle)
-        self.get_throttle.setDelay(config.get_throttle)
-
     def family(self):
         """Return the associated Family object."""
         return self._family
@@ -409,7 +402,7 @@
           - blockinfo: present if user is blocked (dict)
"""
-        if not hasattr(self, "_userinfo"):
+        if not hasattr(self, "_userinfo") or "rights" not in self._userinfo:
             uirequest = api.Request(
                                 site=self,
                                 action="query",
Modified: branches/rewrite/pywikibot/throttle.py
===================================================================
--- branches/rewrite/pywikibot/throttle.py	2008-04-17 16:43:25 UTC (rev 5229)
+++ branches/rewrite/pywikibot/throttle.py	2008-04-17 17:54:39 UTC (rev 5230)
@@ -27,22 +27,24 @@
     Calling this object blocks the calling thread until at least 'delay'
     seconds have passed since the previous call.
-    Each Site initiates two Throttle objects: get_throttle to control
-    the rate of read access, and put_throttle to control the rate of write
-    access. These are available as the Site.get_throttle and Site.put_throttle
-    objects.
+    Each Site initiates one Throttle object (site.throttle) to control the
+    rate of access.
"""
     def __init__(self, site, mindelay=config.minthrottle,
                        maxdelay=config.maxthrottle,
+                       writedelay=config.put_throttle,
                        multiplydelay=True, verbosedelay=False):
         self.lock = threading.RLock()
         self.mysite = str(site)
+        self.logfn = config.datafilepath('throttle.log')
         self.mindelay = mindelay
         self.maxdelay = maxdelay
-        self.now = 0
+        self.writedelay = writedelay
+        self.last_read = 0
+        self.last_write = 0
         self.next_multiplicity = 1.0
-        self.checkdelay = 240  # Check logfile again after this many seconds
+        self.checkdelay = 120  # Check logfile again after this many seconds
         self.dropdelay = 360   # Ignore processes that have not made
                                # a check in this many seconds
         self.releasepid = 1800 # Free the process id after this many seconds
@@ -51,11 +53,8 @@
         self.verbosedelay = verbosedelay
         if multiplydelay:
             self.checkMultiplicity()
-        self.setDelay(mindelay)
+        self.setDelays()
-    def logfn(self):
-        return config.datafilepath('throttle.log')
-
     def checkMultiplicity(self):
         global pid
         self.lock.acquire()
@@ -65,7 +64,7 @@
             my_pid = 1
             count = 1
             try:
-                f = open(self.logfn(), 'r')
+                f = open(self.logfn, 'r')
             except IOError:
                 if not pid:
                     pass
@@ -101,7 +100,7 @@
             processes.append({'pid': my_pid,
                               'time': self.checktime,
                               'site': self.mysite})
-            f = open(self.logfn(), 'w')
+            f = open(self.logfn, 'w')
             processes.sort(key=lambda p:(p['pid'], p['site']))
             for p in processes:
                 f.write("%(pid)s %(time)s %(site)s\n" % p)
@@ -114,20 +113,24 @@
         finally:
             self.lock.release()
-    def setDelay(self, delay=config.minthrottle, absolute=False):
-        """Set the nominal delay in seconds."""
+    def setDelays(self, delay=None, absolute=False):
+        """Set the nominal delays in seconds. Defaults to config values."""
         self.lock.acquire()
         try:
+            if delay is None:
+                delay = self.mindelay
             if absolute:
                 self.maxdelay = delay
                 self.mindelay = delay
             self.delay = delay
+            self.writedelay = min(max(self.mindelay, self.writedelay),
+                                  self.maxdelay)
             # Start the delay count now, not at the next check
-            self.now = time.time()
+            self.last_read = self.last_write = time.time()
         finally:
             self.lock.release()
-    def getDelay(self):
+    def getDelay(self, write=False):
         """Return the actual delay, accounting for multiple processes.
This value is the maximum wait between reads/writes, not taking
@@ -135,7 +138,10 @@
"""
         global pid
-        thisdelay = self.delay
+        if write:
+            thisdelay = self.writedelay
+        else:
+            thisdelay = self.delay
         if pid: # If set, we're checking for multiple processes
             if time.time() > self.checktime + self.checkdelay:
                 self.checkMultiplicity()
@@ -146,13 +152,16 @@
             thisdelay *= self.process_multiplicity
         return thisdelay
-    def waittime(self):
+    def waittime(self, write=False):
         """Return waiting time in seconds if a query would be made right now"""
         # Take the previous requestsize in account calculating the desired
         # delay this time
-        thisdelay = self.getDelay()
+        thisdelay = self.getDelay(write=write)
         now = time.time()
-        ago = now - self.now
+        if write:
+            ago = now - self.last_write
+        else:
+            ago = now - self.last_read
         if ago < thisdelay:
             delta = thisdelay - ago
             return delta
@@ -164,7 +173,7 @@
         self.checktime = 0
         processes = []
         try:
-            f = open(self.logfn(), 'r')
+            f = open(self.logfn, 'r')
         except IOError:
             return
         else:
@@ -183,13 +192,13 @@
                     processes.append({'pid': this_pid,
                                       'time': ptime,
                                       'site': this_site})
-        f = open(self.logfn(), 'w')
+        f = open(self.logfn, 'w')
         processes.sort(key=lambda p:p['pid'])
         for p in processes:
             f.write("%(pid)s %(time)s %(site)s\n" % p)
         f.close()
-    def __call__(self, requestsize=1):
+    def __call__(self, requestsize=1, write=False):
         """
         Block the calling program if the throttle time has not expired.
@@ -198,7 +207,7 @@
         """
         self.lock.acquire()
         try:
-            waittime = self.waittime()
+            waittime = self.waittime(write=write)
             # Calculate the multiplicity of the next delay based on how
             # big the request is that is being posted now.
             # We want to add "one delay" for each factor of two in the
@@ -213,7 +222,10 @@
                                                   time.localtime()))
                                  )
             time.sleep(waittime)
-            self.now = time.time()
+            if write:
+                self.last_write = time.time()
+            else:
+                self.last_read = time.time()
         finally:
             self.lock.release()

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

[Pywikipedia-l] SVN: [5230] branches/rewrite/pywikibot