Revision: 6567 Author: kim Date: 2009-04-01 10:45:17 +0000 (Wed, 01 Apr 2009)
Log Message: ----------- Add code to scrub xml headers
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-03-31 14:03:54 UTC (rev 6566) +++ trunk/pywikipedia/wikipedia.py 2009-04-01 10:45:17 UTC (rev 6567) @@ -4893,6 +4893,24 @@ # Token not found output(u'WARNING: Token not found on %s. You will not be able to edit any page.' % self)
+ def scrubxml(self,xml): + """scrub the start of xml input, to make things work, even + when crap is inserted ahead of the actual xml data. (such as when php reports strict + warnings)""" + xml2="" + start=False + warn=False + for line in xml.split("\n"): + if line.startswith("<?xml"): + start=True + else: + warn=True + if start: + xml2+=line+"\n" + if warn==True: + pass #TODO: we could issue a warning for broken xml + return xml2 + def mediawiki_message(self, key): """Return the MediaWiki message text for key "key" """ # Allmessages is retrieved once for all per created Site object @@ -4939,6 +4957,7 @@ else: xml = self.getUrl(self.get_address("Special:Allmessages") + "&ot=xml") + xml=self.scrubxml(xml) # xml structure is : # <messages lang="fr"> # <message name="about">À propos</message>