Revision: 5425 Author: nicdumz Date: 2008-05-22 18:31:54 +0000 (Thu, 22 May 2008)
Log Message: ----------- * Adding a -safe parameter to rciw (process each page only once in a session) * handling standard params
Modified Paths: -------------- trunk/pywikipedia/rciw.py
Modified: trunk/pywikipedia/rciw.py =================================================================== --- trunk/pywikipedia/rciw.py 2008-05-22 18:09:09 UTC (rev 5424) +++ trunk/pywikipedia/rciw.py 2008-05-22 18:31:54 UTC (rev 5425) @@ -7,9 +7,14 @@ In use on hu:, not sure if this scales well on a large wiki such as en: (Depending on the edit rate, the number of IW threads could grow continuously without ever decreasing) + +Params: + +-safe Does not handle the same page more than once in a session + """
-# Author: Kisbes +# Authors: Kisbes # http://hu.wikipedia.org/wiki/User:Kisbes # License : GFDL
@@ -22,8 +27,8 @@ import time from Queue import Queue
-class SignerBot(SingleServerIRCBot): - def __init__(self, site, channel, nickname, server, port=6667): +class IWRCBot(SingleServerIRCBot): + def __init__(self, site, channel, nickname, server, port, safe): SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname) self.channel = channel self.other_ns = re.compile(u'14[[07(' + u'|'.join(site.namespaces()) + u')') @@ -31,6 +36,7 @@ self.site = site self.queue = Queue() self.processed = [] + self.safe = safe # Start 20 threads for i in range(20): t = threading.Thread(target=self.worker) @@ -61,15 +67,19 @@ msg = unicode(e.arguments()[0],'utf-8') except UnicodeDecodeError: return - if not self.other_ns.match(msg): - name = msg[8:msg.find(u'14',9)] - if not name in self.processed: - self.processed.append(name) - page = wikipedia.Page(self.site, name) - # the Queue has for now an unlimited size, - # it is a simple atomic append(), no need to acquire a semaphore - self.queue.put_nowait(page) + if self.other_ns.match(msg): + return
+ name = msg[8:msg.find(u'14',9)] + if self.safe: + if name in self.processed: + return + self.processed.append(name) + page = wikipedia.Page(self.site, name) + # the Queue has for now an unlimited size, + # it is a simple atomic append(), no need to acquire a semaphore + self.queue.put_nowait(page) + def on_dccmsg(self, c, e): pass
@@ -83,10 +93,14 @@ pass
def main(): + safe = False + for arg in wikipedia.handleArgs(): + if arg == 'safe': + safe = True site = wikipedia.getSite() site.forceLogin() chan = '#' + site.language() + '.' + site.family.name - bot = SignerBot(site, chan, site.loggedInAs(), "irc.wikimedia.org", 6667) + bot = IWRCBot(site, chan, site.loggedInAs(), "irc.wikimedia.org", 6667, safe) try: bot.start() except:
pywikipedia-l@lists.wikimedia.org