Revision: 8095 Author: russblau Date: 2010-04-15 18:16:10 +0000 (Thu, 15 Apr 2010)
Log Message: ----------- Revert to a single background thread for asynchronous saves, instead of a thread per request; this should mean less overhead and better performance.
Modified Paths: -------------- branches/rewrite/pywikibot/__init__.py branches/rewrite/pywikibot/config2.py branches/rewrite/pywikibot/page.py
Modified: branches/rewrite/pywikibot/__init__.py =================================================================== --- branches/rewrite/pywikibot/__init__.py 2010-04-15 17:04:13 UTC (rev 8094) +++ branches/rewrite/pywikibot/__init__.py 2010-04-15 18:16:10 UTC (rev 8095) @@ -14,6 +14,8 @@ import logging import re import sys +import threading +from Queue import Queue
import config2 as config from bot import * @@ -248,7 +250,6 @@
# Throttle and thread handling
-threadpool = [] # add page-putting threads to this list as they are created stopped = False
def stopme(): @@ -263,20 +264,70 @@
if not stopped: pywikibot.debug(u"stopme() called", _logger) - count = sum(1 for thd in threadpool if thd.isAlive()) - if count: - pywikibot.output(u"Waiting for about %(count)s pages to be saved." - % locals()) - for thd in threadpool: - if thd.isAlive(): - thd.join() + def remaining(): + import datetime + remainingPages = page_put_queue.qsize() - 1 + # -1 because we added a None element to stop the queue + remainingSeconds = datetime.timedelta( + seconds=(remainingPages * config.put_throttle)) + return (remainingPages, remainingSeconds) + + page_put_queue.put((None, [], {})) stopped = True + + if page_put_queue.qsize() > 1: + output(u'Waiting for %i pages to be put. Estimated time remaining: %s' + % remaining()) + + while(_putthread.isAlive()): + try: + _putthread.join(1) + except KeyboardInterrupt: + answer = inputChoice(u"""\ +There are %i pages remaining in the queue. Estimated time remaining: %s +Really exit?""" + % remaining(), + ['yes', 'no'], ['y', 'N'], 'N') + if answer == 'y': + return + # only need one drop() call because all throttles use the same global pid try: - _sites[_sites.keys()[0]].throttle.drop() + _sites.values()[0].throttle.drop() pywikibot.log(u"Dropped throttle(s).") except IndexError: pass
import atexit atexit.register(stopme) + +# Create a separate thread for asynchronous page saves (and other requests) + +def async_manager(): + """Daemon; take requests from the queue and execute them in background.""" + while True: + (request, args, kwargs) = page_put_queue.get() + if request is None: + break + request(*args, **kwargs) + +def async_request(request, *args, **kwargs): + """Put a request on the queue, and start the daemon if necessary.""" + if not _putthread.isAlive(): + try: + page_put_queue.mutex.acquire() + try: + _putthread.start() + except (AssertionError, RuntimeError): + pass + finally: + page_put_queue.mutex.release() + page_put_queue.put((request, args, kwargs)) + +# queue to hold pending requests +page_put_queue = Queue(config.max_queue_size) +# set up the background thread +_putthread = threading.Thread(target=async_manager) +# identification for debugging purposes +_putthread.setName('Put-Thread') +_putthread.setDaemon(True)
Modified: branches/rewrite/pywikibot/config2.py =================================================================== --- branches/rewrite/pywikibot/config2.py 2010-04-15 17:04:13 UTC (rev 8094) +++ branches/rewrite/pywikibot/config2.py 2010-04-15 18:16:10 UTC (rev 8095) @@ -490,6 +490,12 @@ # Configuration variable 'socks' is defined but unknown. Misspelled?proxy = None proxy = None
+# How many pages should be put to a queue in asynchroneous mode. +# If maxsize is <= 0, the queue size is infinite. +# Increasing this value will increase memory space but could speed up +# processing. As higher this value this effect will decrease. +max_queue_size = 64 + # End of configuration section # ============================ # System-level and User-level changes.
Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2010-04-15 17:04:13 UTC (rev 8094) +++ branches/rewrite/pywikibot/page.py 2010-04-15 18:16:10 UTC (rev 8095) @@ -718,16 +718,12 @@ "Page %s not saved; editing restricted by {{bots}} template" % self.title(asLink=True)) if async: - thd = threading.Thread( - target=self._save, - args=(comment, minor, watch, unwatch, callback) - ) - pywikibot.threadpool.append(thd) - thd.start() + pywikibot.async_request(self._save, comment, minor, watch, unwatch, + async, callback) else: - self._save(comment, minor, watch, unwatch, callback) + self._save(comment, minor, watch, unwatch, async, callback)
- def _save(self, comment, minor, watch, unwatch, callback): + def _save(self, comment, minor, watch, unwatch, async, callback): err = None link = self.title(asLink=True) try: @@ -741,13 +737,14 @@ except pywikibot.LockedPage, err: # re-raise the LockedPage exception so that calling program # can re-try if appropriate - if not callback: + if not callback and not async: raise # TODO: other "expected" error types to catch? except pywikibot.Error, err: - pywikibot.log(u"Error saving page %s\n" % link, exc_info=True) - if not callback: - raise pywikibot.PageNotSaved(link) + pywikibot.log(u"Error saving page %s (%s)\n" % (link, err), + exc_info=True) + if not callback and not async: + raise pywikibot.PageNotSaved("%s: %s" %(link, err)) if callback: callback(self, err)
pywikipedia-svn@lists.wikimedia.org