[Pywikipedia-l] SVN: [5418] trunk/pywikipedia/rciw.py
nicdumz at svn.wikimedia.org
nicdumz at svn.wikimedia.org
Wed May 21 17:09:47 UTC 2008
Revision: 5418
Author: nicdumz
Date: 2008-05-21 17:09:46 +0000 (Wed, 21 May 2008)
Log Message:
-----------
* Booh. Some messages are not UTF8, and can crash the main IRC process (why ?!), catching and passing those UnicodeDecodeError
* adding a list to remember the pages we have treated, to avoid double processing the page
Modified Paths:
--------------
trunk/pywikipedia/rciw.py
Modified: trunk/pywikipedia/rciw.py
===================================================================
--- trunk/pywikipedia/rciw.py 2008-05-21 17:07:12 UTC (rev 5417)
+++ trunk/pywikipedia/rciw.py 2008-05-21 17:09:46 UTC (rev 5418)
@@ -30,6 +30,7 @@
interwiki.globalvar.autonomous = True
self.site = site
self.queue = Queue()
+ self.processed = []
# Start 20 threads
for i in range(20):
t = threading.Thread(target=self.worker)
@@ -56,13 +57,18 @@
pass
def on_pubmsg(self, c, e):
- msg = unicode(e.arguments()[0],'utf-8')
+ try:
+ msg = unicode(e.arguments()[0],'utf-8')
+ except UnicodeDecodeError:
+ return
if not self.other_ns.match(msg):
name = msg[8:msg.find(u'14',9)]
- page = wikipedia.Page(self.site, name)
- # the Queue has for now an (theoric) unlimited size,
- # it is a simple atomic append(), no need to acquire a semaphore
- self.queue.put_nowait(page)
+ if not name in self.processed:
+ self.processed.append(name)
+ page = wikipedia.Page(self.site, name)
+ # the Queue has for now an unlimited size,
+ # it is a simple atomic append(), no need to acquire a semaphore
+ self.queue.put_nowait(page)
def on_dccmsg(self, c, e):
pass
More information about the Pywikipedia-l
mailing list