[Pywikipedia-l] SVN: [5367] trunk/pywikipedia/rciw.py
nicdumz at svn.wikimedia.org
nicdumz at svn.wikimedia.org
Wed May 14 11:28:34 UTC 2008
Revision: 5367
Author: nicdumz
Date: 2008-05-14 11:28:34 +0000 (Wed, 14 May 2008)
Log Message:
-----------
Better threading. Use consumer/producer with a single Queue. Limiting the number of threads (hardcoded value as of now)
Modified Paths:
--------------
trunk/pywikipedia/rciw.py
Modified: trunk/pywikipedia/rciw.py
===================================================================
--- trunk/pywikipedia/rciw.py 2008-05-14 11:02:13 UTC (rev 5366)
+++ trunk/pywikipedia/rciw.py 2008-05-14 11:28:34 UTC (rev 5367)
@@ -20,16 +20,8 @@
import re
import wikipedia
import time
+from Queue import Queue
-class iwThread(threading.Thread):
- def __init__(self, toiw):
- threading.Thread.__init__(self)
- self.toiw = toiw
- def run(self):
- bot = interwiki.InterwikiBot()
- bot.add(self.toiw)
- bot.run()
-
class SignerBot(SingleServerIRCBot):
def __init__(self, site, channel, nickname, server, port=6667):
SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
@@ -37,7 +29,23 @@
self.other_ns = re.compile(u'14\[\[07(' + u'|'.join(site.namespaces()) + u')')
interwiki.globalvar.autonomous = True
self.site = site
+ self.queue = Queue()
+ # Start 20 threads
+ for i in range(20):
+ t = threading.Thread(target=self.worker)
+ t.start()
+ def worker(self):
+ bot = interwiki.InterwikiBot()
+ while True:
+ # Will wait until one page is available
+ bot.add(self.queue.get())
+ bot.queryStep()
+ self.queue.task_done()
+
+ def join(self):
+ self.queue.join()
+
def on_nicknameinuse(self, c, e):
c.nick(c.get_nickname() + "_")
@@ -52,9 +60,9 @@
if not self.other_ns.match(msg):
name = msg[8:msg.find(u'14',9)]
page = wikipedia.Page(self.site, name)
- thread = iwThread(page)
- thread.start()
-
+ # the Queue has for now an (theoric) unlimited size,
+ # it is a simple atomic append(), no need to acquire a semaphore
+ self.queue.put_nowait(page)
def on_dccmsg(self, c, e):
pass
@@ -69,13 +77,18 @@
pass
def main():
- global times
- times = []
site = wikipedia.getSite()
site.forceLogin()
chan = '#' + site.language() + '.' + site.family.name
bot = SignerBot(site, chan, site.loggedInAs(), "irc.wikimedia.org", 6667)
- bot.start()
+ try:
+ bot.start()
+ except:
+ # Quit IRC
+ bot.stop()
+ # Join the IW threads
+ bot.join()
+ raise
if __name__ == "__main__":
main()
More information about the Pywikipedia-l
mailing list