[Pywikipedia-l] SVN: [5425] trunk/pywikipedia/rciw.py
nicdumz at svn.wikimedia.org
nicdumz at svn.wikimedia.org
Thu May 22 18:31:54 UTC 2008
Revision: 5425
Author: nicdumz
Date: 2008-05-22 18:31:54 +0000 (Thu, 22 May 2008)
Log Message:
-----------
* Adding a -safe parameter to rciw (process each page only once in a session)
* handling standard params
Modified Paths:
--------------
trunk/pywikipedia/rciw.py
Modified: trunk/pywikipedia/rciw.py
===================================================================
--- trunk/pywikipedia/rciw.py 2008-05-22 18:09:09 UTC (rev 5424)
+++ trunk/pywikipedia/rciw.py 2008-05-22 18:31:54 UTC (rev 5425)
@@ -7,9 +7,14 @@
In use on hu:, not sure if this scales well on a large wiki such
as en: (Depending on the edit rate, the number of IW threads
could grow continuously without ever decreasing)
+
+Params:
+
+-safe Does not handle the same page more than once in a session
+
"""
-# Author: Kisbes
+# Authors: Kisbes
# http://hu.wikipedia.org/wiki/User:Kisbes
# License : GFDL
@@ -22,8 +27,8 @@
import time
from Queue import Queue
-class SignerBot(SingleServerIRCBot):
- def __init__(self, site, channel, nickname, server, port=6667):
+class IWRCBot(SingleServerIRCBot):
+ def __init__(self, site, channel, nickname, server, port, safe):
SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
self.channel = channel
self.other_ns = re.compile(u'14\[\[07(' + u'|'.join(site.namespaces()) + u')')
@@ -31,6 +36,7 @@
self.site = site
self.queue = Queue()
self.processed = []
+ self.safe = safe
# Start 20 threads
for i in range(20):
t = threading.Thread(target=self.worker)
@@ -61,15 +67,19 @@
msg = unicode(e.arguments()[0],'utf-8')
except UnicodeDecodeError:
return
- if not self.other_ns.match(msg):
- name = msg[8:msg.find(u'14',9)]
- if not name in self.processed:
- self.processed.append(name)
- page = wikipedia.Page(self.site, name)
- # the Queue has for now an unlimited size,
- # it is a simple atomic append(), no need to acquire a semaphore
- self.queue.put_nowait(page)
+ if self.other_ns.match(msg):
+ return
+ name = msg[8:msg.find(u'14',9)]
+ if self.safe:
+ if name in self.processed:
+ return
+ self.processed.append(name)
+ page = wikipedia.Page(self.site, name)
+ # the Queue has for now an unlimited size,
+ # it is a simple atomic append(), no need to acquire a semaphore
+ self.queue.put_nowait(page)
+
def on_dccmsg(self, c, e):
pass
@@ -83,10 +93,14 @@
pass
def main():
+ safe = False
+ for arg in wikipedia.handleArgs():
+ if arg == 'safe':
+ safe = True
site = wikipedia.getSite()
site.forceLogin()
chan = '#' + site.language() + '.' + site.family.name
- bot = SignerBot(site, chan, site.loggedInAs(), "irc.wikimedia.org", 6667)
+ bot = IWRCBot(site, chan, site.loggedInAs(), "irc.wikimedia.org", 6667, safe)
try:
bot.start()
except:
More information about the Pywikipedia-l
mailing list