[Pywikipedia-l] SVN: [5418] trunk/pywikipedia/rciw.py - pywikibot

22 May 2008

Revision: 5418
Author:   nicdumz
Date:     2008-05-21 17:09:46 +0000 (Wed, 21 May 2008)
Log Message:
-----------
* Booh. Some messages are not UTF8, and can crash the main IRC process (why ?!), catching and passing those UnicodeDecodeError
* adding a list to remember the pages we have treated, to avoid double processing the page
Modified Paths:
--------------
    trunk/pywikipedia/rciw.py
Modified: trunk/pywikipedia/rciw.py
===================================================================

--- trunk/pywikipedia/rciw.py	2008-05-21 17:07:12 UTC (rev 5417)
+++ trunk/pywikipedia/rciw.py	2008-05-21 17:09:46 UTC (rev 5418)
@@ -30,6 +30,7 @@
         interwiki.globalvar.autonomous = True
         self.site = site
         self.queue = Queue()
+        self.processed = []
         # Start 20 threads
         for i in range(20):
             t = threading.Thread(target=self.worker)
@@ -56,13 +57,18 @@
    pass
def on_pubmsg(self, c, e):
-        msg = unicode(e.arguments()[0],'utf-8')
+        try:
+            msg = unicode(e.arguments()[0],'utf-8')
+        except UnicodeDecodeError:
+            return
         if not self.other_ns.match(msg):
             name = msg[8:msg.find(u'14',9)]
-            page = wikipedia.Page(self.site, name)
-            # the Queue has for now an (theoric) unlimited size,
-            # it is a simple atomic append(), no need to acquire a semaphore
-            self.queue.put_nowait(page)
+            if not name in self.processed:
+                self.processed.append(name)
+                page = wikipedia.Page(self.site, name)
+                # the Queue has for now an unlimited size,
+                # it is a simple atomic append(), no need to acquire a semaphore
+                self.queue.put_nowait(page)
def on_dccmsg(self, c, e):
    pass