[Pywikipedia-l] SVN: [5367] trunk/pywikipedia/rciw.py

nicdumz at svn.wikimedia.org nicdumz at svn.wikimedia.org
Wed May 14 11:28:34 UTC 2008


Revision: 5367
Author:   nicdumz
Date:     2008-05-14 11:28:34 +0000 (Wed, 14 May 2008)

Log Message:
-----------
Better threading. Use consumer/producer with a single Queue. Limiting the number of threads (hardcoded value as of now)

Modified Paths:
--------------
    trunk/pywikipedia/rciw.py

Modified: trunk/pywikipedia/rciw.py
===================================================================
--- trunk/pywikipedia/rciw.py	2008-05-14 11:02:13 UTC (rev 5366)
+++ trunk/pywikipedia/rciw.py	2008-05-14 11:28:34 UTC (rev 5367)
@@ -20,16 +20,8 @@
 import re
 import wikipedia
 import time
+from Queue import Queue
 
-class iwThread(threading.Thread):
-    def __init__(self, toiw):
-        threading.Thread.__init__(self)
-        self.toiw = toiw
-    def run(self):
-        bot = interwiki.InterwikiBot()
-        bot.add(self.toiw)
-        bot.run()
-
 class SignerBot(SingleServerIRCBot):
     def __init__(self, site, channel, nickname, server, port=6667):
         SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
@@ -37,7 +29,23 @@
         self.other_ns = re.compile(u'14\[\[07(' + u'|'.join(site.namespaces()) + u')')
         interwiki.globalvar.autonomous = True
         self.site = site
+        self.queue = Queue()
+        # Start 20 threads
+        for i in range(20):
+            t = threading.Thread(target=self.worker)
+            t.start()
 
+    def worker(self):
+        bot = interwiki.InterwikiBot()
+        while True:
+            # Will wait until one page is available
+            bot.add(self.queue.get())
+            bot.queryStep()
+            self.queue.task_done()
+
+    def join(self):
+        self.queue.join()
+
     def on_nicknameinuse(self, c, e):
         c.nick(c.get_nickname() + "_")
 
@@ -52,9 +60,9 @@
         if not self.other_ns.match(msg):
             name = msg[8:msg.find(u'14',9)]
             page = wikipedia.Page(self.site, name)
-            thread = iwThread(page)
-            thread.start()
-            
+            # the Queue has for now an (theoric) unlimited size,
+            # it is a simple atomic append(), no need to acquire a semaphore
+            self.queue.put_nowait(page)
 
     def on_dccmsg(self, c, e):
 	pass
@@ -69,13 +77,18 @@
 	pass
 
 def main():
-    global times
-    times = []
     site = wikipedia.getSite()
     site.forceLogin()
     chan = '#' + site.language() + '.' + site.family.name
     bot = SignerBot(site, chan, site.loggedInAs(), "irc.wikimedia.org", 6667)
-    bot.start()
+    try:
+        bot.start()
+    except:
+        # Quit IRC
+        bot.stop()
+        # Join the IW threads
+        bot.join()
+        raise
 
 if __name__ == "__main__":
     main()





More information about the Pywikipedia-l mailing list