[Pywikipedia-l] SVN: [5461] trunk/pywikipedia/wikipedia.py
nicdumz at svn.wikimedia.org
nicdumz at svn.wikimedia.org
Thu May 29 11:19:36 UTC 2008
Revision: 5461
Author: nicdumz
Date: 2008-05-29 11:19:36 +0000 (Thu, 29 May 2008)
Log Message:
-----------
Making BeautifulSoup only a fallback solution. Message parsing with BS is really slow, per bug #1973804
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-05-29 06:20:42 UTC (rev 5460)
+++ trunk/pywikipedia/wikipedia.py 2008-05-29 11:19:36 UTC (rev 5461)
@@ -4458,23 +4458,43 @@
def mediawiki_message(self, key):
"""Return the MediaWiki message text for key "key" """
- # Allmessages is retrieved once for all in a session
+ # Allmessages is retrieved once for all per created Site object
if not self._mediawiki_messages:
if verbose:
output(
u"Retrieving mediawiki messages from Special:Allmessages")
+ elementtree = True
+ try:
+ try:
+ from xml.etree.cElementTree import XML # 2.5
+ except ImportError:
+ try:
+ from cElementTree import XML
+ except ImportError:
+ from elementtree.ElementTree import XML
+ except ImportError:
+ if verbose:
+ output(u'Elementtree was not found, using BeautifulSoup instead')
+ elementtree = False
+
retry_idle_time = 1
while True:
get_throttle()
xml = self.getUrl(self.get_address("Special:Allmessages")
+ "&ot=xml")
- tree = BeautifulStoneSoup(xml)
# xml structure is :
# <messages lang="fr">
# <message name="about">À propos</message>
# ...
# </messages>
- self._mediawiki_messages = dict([(tag.get('name').lower(), tag.string)
+ if elementtree:
+ decode = xml.encode(self.encoding())
+ tree = XML(decode)
+ self._mediawiki_messages = dict([(tag.get('name').lower(), tag.text)
+ for tag in tree.getiterator('message')])
+ else:
+ tree = BeautifulStoneSoup(xml)
+ self._mediawiki_messages = dict([(tag.get('name').lower(), tag.string)
for tag in tree.findAll('message')])
if not self._mediawiki_messages:
More information about the Pywikipedia-l
mailing list