Revision: 5418 Author: nicdumz Date: 2008-05-21 17:09:46 +0000 (Wed, 21 May 2008)
Log Message: ----------- * Booh. Some messages are not UTF8, and can crash the main IRC process (why ?!), catching and passing those UnicodeDecodeError * adding a list to remember the pages we have treated, to avoid double processing the page
Modified Paths: -------------- trunk/pywikipedia/rciw.py
Modified: trunk/pywikipedia/rciw.py =================================================================== --- trunk/pywikipedia/rciw.py 2008-05-21 17:07:12 UTC (rev 5417) +++ trunk/pywikipedia/rciw.py 2008-05-21 17:09:46 UTC (rev 5418) @@ -30,6 +30,7 @@ interwiki.globalvar.autonomous = True self.site = site self.queue = Queue() + self.processed = [] # Start 20 threads for i in range(20): t = threading.Thread(target=self.worker) @@ -56,13 +57,18 @@ pass
def on_pubmsg(self, c, e): - msg = unicode(e.arguments()[0],'utf-8') + try: + msg = unicode(e.arguments()[0],'utf-8') + except UnicodeDecodeError: + return if not self.other_ns.match(msg): name = msg[8:msg.find(u'14',9)] - page = wikipedia.Page(self.site, name) - # the Queue has for now an (theoric) unlimited size, - # it is a simple atomic append(), no need to acquire a semaphore - self.queue.put_nowait(page) + if not name in self.processed: + self.processed.append(name) + page = wikipedia.Page(self.site, name) + # the Queue has for now an unlimited size, + # it is a simple atomic append(), no need to acquire a semaphore + self.queue.put_nowait(page)
def on_dccmsg(self, c, e): pass
pywikipedia-l@lists.wikimedia.org