[Pywikipedia-l] SVN: [6568] trunk/pywikipedia/wikipedia.py
nicdumz at svn.wikimedia.org
nicdumz at svn.wikimedia.org
Wed Apr 1 11:18:59 UTC 2009
Revision: 6568
Author: nicdumz
Date: 2009-04-01 11:18:59 +0000 (Wed, 01 Apr 2009)
Log Message:
-----------
Cleaning previous commit :
* Cleaning scrubxml() implementation
* Applying scrubxml AFTER decoding the string to unicode
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-04-01 10:45:17 UTC (rev 6567)
+++ trunk/pywikipedia/wikipedia.py 2009-04-01 11:18:59 UTC (rev 6568)
@@ -4893,23 +4893,15 @@
# Token not found
output(u'WARNING: Token not found on %s. You will not be able to edit any page.' % self)
- def scrubxml(self,xml):
+ def scrubxml(self, xml):
"""scrub the start of xml input, to make things work, even
- when crap is inserted ahead of the actual xml data. (such as when php reports strict
- warnings)"""
- xml2=""
- start=False
- warn=False
- for line in xml.split("\n"):
- if line.startswith("<?xml"):
- start=True
- else:
- warn=True
- if start:
- xml2+=line+"\n"
- if warn==True:
- pass #TODO: we could issue a warning for broken xml
- return xml2
+ when crap is inserted ahead of the actual xml data.
+ (such as when php reports strict warnings)"""
+ start = xml.find('<?xml')
+ if start < 0:
+ # '<?xml' not found ? Should not happen.
+ return ""
+ return xml[start:]
def mediawiki_message(self, key):
"""Return the MediaWiki message text for key "key" """
@@ -4957,7 +4949,6 @@
else:
xml = self.getUrl(self.get_address("Special:Allmessages")
+ "&ot=xml")
- xml=self.scrubxml(xml)
# xml structure is :
# <messages lang="fr">
# <message name="about">À propos</message>
@@ -4965,7 +4956,8 @@
# </messages>
if elementtree:
decode = xml.encode(self.encoding())
- tree = XML(decode)
+ clean = self.scrubxml(decode)
+ tree = XML(clean)
self._mediawiki_messages = _dict([(tag.get('name').lower(), tag.text)
for tag in tree.getiterator('message')])
else:
More information about the Pywikipedia-l
mailing list