[Pywikipedia-l] SVN: [5461] trunk/pywikipedia/wikipedia.py

nicdumz at svn.wikimedia.org nicdumz at svn.wikimedia.org
Thu May 29 11:19:36 UTC 2008


Revision: 5461
Author:   nicdumz
Date:     2008-05-29 11:19:36 +0000 (Thu, 29 May 2008)

Log Message:
-----------
Making BeautifulSoup only a fallback solution. Message parsing with BS is really slow, per bug #1973804

Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2008-05-29 06:20:42 UTC (rev 5460)
+++ trunk/pywikipedia/wikipedia.py	2008-05-29 11:19:36 UTC (rev 5461)
@@ -4458,23 +4458,43 @@
 
     def mediawiki_message(self, key):
         """Return the MediaWiki message text for key "key" """
-        # Allmessages is retrieved once for all in a session
+        # Allmessages is retrieved once for all per created Site object
         if not self._mediawiki_messages:
             if verbose:
                 output(
                   u"Retrieving mediawiki messages from Special:Allmessages")
+            elementtree = True
+            try:
+                try:    
+                    from xml.etree.cElementTree import XML # 2.5    
+                except ImportError:     
+                    try:    
+                        from cElementTree import XML    
+                    except ImportError:     
+                        from elementtree.ElementTree import XML
+            except ImportError:
+                if verbose:
+                    output(u'Elementtree was not found, using BeautifulSoup instead')
+                elementtree = False
+
             retry_idle_time = 1
             while True:
                 get_throttle()
                 xml = self.getUrl(self.get_address("Special:Allmessages") 
                                     + "&ot=xml")
-                tree = BeautifulStoneSoup(xml)
                 # xml structure is :
                 # <messages lang="fr">
                 #    <message name="about">À propos</message>
                 #    ...
                 # </messages>
-                self._mediawiki_messages = dict([(tag.get('name').lower(), tag.string) 
+                if elementtree:
+                    decode = xml.encode(self.encoding())
+                    tree = XML(decode) 
+                    self._mediawiki_messages = dict([(tag.get('name').lower(), tag.text) 
+                    for tag in tree.getiterator('message')])
+                else:
+                    tree = BeautifulStoneSoup(xml)
+                    self._mediawiki_messages = dict([(tag.get('name').lower(), tag.string) 
                     for tag in tree.findAll('message')])
                 
                 if not self._mediawiki_messages:





More information about the Pywikipedia-l mailing list