[Pywikipedia-l] SVN: [6567] trunk/pywikipedia/wikipedia.py

kim at svn.wikimedia.org kim at svn.wikimedia.org
Wed Apr 1 10:45:17 UTC 2009


Revision: 6567
Author:   kim
Date:     2009-04-01 10:45:17 +0000 (Wed, 01 Apr 2009)

Log Message:
-----------
Add code to scrub xml headers

Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2009-03-31 14:03:54 UTC (rev 6566)
+++ trunk/pywikipedia/wikipedia.py	2009-04-01 10:45:17 UTC (rev 6567)
@@ -4893,6 +4893,24 @@
                 # Token not found
                 output(u'WARNING: Token not found on %s. You will not be able to edit any page.' % self)
 
+    def scrubxml(self,xml):
+        """scrub the start of xml input, to make things work, even
+        when crap is inserted ahead of the actual xml data. (such as when php reports strict
+        warnings)"""
+        xml2=""
+        start=False
+        warn=False
+        for line in xml.split("\n"):
+            if line.startswith("<?xml"):
+                start=True
+            else:
+		 warn=True
+            if start:
+                xml2+=line+"\n"
+        if warn==True:
+            pass    #TODO: we could issue a warning for broken xml
+        return xml2
+
     def mediawiki_message(self, key):
         """Return the MediaWiki message text for key "key" """
         # Allmessages is retrieved once for all per created Site object
@@ -4939,6 +4957,7 @@
                 else:
                     xml = self.getUrl(self.get_address("Special:Allmessages")
                                         + "&ot=xml")
+                    xml=self.scrubxml(xml)
                     # xml structure is :
                     # <messages lang="fr">
                     #    <message name="about">À propos</message>





More information about the Pywikipedia-l mailing list