[Pywikipedia-l] SVN: [5425] trunk/pywikipedia/rciw.py

nicdumz at svn.wikimedia.org nicdumz at svn.wikimedia.org
Thu May 22 18:31:54 UTC 2008


Revision: 5425
Author:   nicdumz
Date:     2008-05-22 18:31:54 +0000 (Thu, 22 May 2008)

Log Message:
-----------
* Adding a -safe parameter to rciw (process each page only once in a session)
* handling standard params

Modified Paths:
--------------
    trunk/pywikipedia/rciw.py

Modified: trunk/pywikipedia/rciw.py
===================================================================
--- trunk/pywikipedia/rciw.py	2008-05-22 18:09:09 UTC (rev 5424)
+++ trunk/pywikipedia/rciw.py	2008-05-22 18:31:54 UTC (rev 5425)
@@ -7,9 +7,14 @@
 In use on hu:, not sure if this scales well on a large wiki such
 as en: (Depending on the edit rate, the number of IW threads
 could grow continuously without ever decreasing)
+
+Params:
+
+-safe  Does not handle the same page more than once in a session
+
 """
 
-# Author: Kisbes
+# Authors: Kisbes
 # http://hu.wikipedia.org/wiki/User:Kisbes
 # License : GFDL
 
@@ -22,8 +27,8 @@
 import time
 from Queue import Queue
 
-class SignerBot(SingleServerIRCBot):
-    def __init__(self, site, channel, nickname, server, port=6667):
+class IWRCBot(SingleServerIRCBot):
+    def __init__(self, site, channel, nickname, server, port, safe):
         SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
         self.channel = channel
         self.other_ns = re.compile(u'14\[\[07(' + u'|'.join(site.namespaces()) + u')')
@@ -31,6 +36,7 @@
         self.site = site
         self.queue = Queue()
         self.processed = []
+        self.safe = safe
         # Start 20 threads
         for i in range(20):
             t = threading.Thread(target=self.worker)
@@ -61,15 +67,19 @@
             msg = unicode(e.arguments()[0],'utf-8')
         except UnicodeDecodeError:
             return
-        if not self.other_ns.match(msg):
-            name = msg[8:msg.find(u'14',9)]
-            if not name in self.processed:
-                self.processed.append(name)
-                page = wikipedia.Page(self.site, name)
-                # the Queue has for now an unlimited size,
-                # it is a simple atomic append(), no need to acquire a semaphore
-                self.queue.put_nowait(page)
+        if self.other_ns.match(msg):
+            return
 
+        name = msg[8:msg.find(u'14',9)]
+        if self.safe:
+            if name in self.processed:
+                return
+            self.processed.append(name)
+        page = wikipedia.Page(self.site, name)
+        # the Queue has for now an unlimited size,
+        # it is a simple atomic append(), no need to acquire a semaphore
+        self.queue.put_nowait(page)
+
     def on_dccmsg(self, c, e):
 	pass
 
@@ -83,10 +93,14 @@
 	pass
 
 def main():
+    safe = False
+    for arg in wikipedia.handleArgs():
+        if arg == 'safe':
+            safe = True
     site = wikipedia.getSite()
     site.forceLogin()
     chan = '#' + site.language() + '.' + site.family.name
-    bot = SignerBot(site, chan, site.loggedInAs(), "irc.wikimedia.org", 6667)
+    bot = IWRCBot(site, chan, site.loggedInAs(), "irc.wikimedia.org", 6667, safe)
     try:
         bot.start()
     except:





More information about the Pywikipedia-l mailing list