Revision: 7366 Author: nicdumz Date: 2009-10-04 15:28:18 +0000 (Sun, 04 Oct 2009)
Log Message: ----------- xmlreader: parse redirect information from xml Original patch from Santiago Mola
Modified Paths: -------------- trunk/pywikipedia/tests/test_xmlreader.py trunk/pywikipedia/xmlreader.py
Added Paths: ----------- trunk/pywikipedia/tests/data/article-pyrus.xml
Added: trunk/pywikipedia/tests/data/article-pyrus.xml =================================================================== --- trunk/pywikipedia/tests/data/article-pyrus.xml (rev 0) +++ trunk/pywikipedia/tests/data/article-pyrus.xml 2009-10-04 15:28:18 UTC (rev 7366) @@ -0,0 +1,101 @@ +<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en"> + <siteinfo> + <sitename>Wikipedia</sitename> + <base>http://en.wikipedia.org/wiki/Main_Page</base> + <generator>MediaWiki 1.16alpha-wmf</generator> + <case>first-letter</case> + <namespaces> + <namespace key="-2">Media</namespace> + <namespace key="-1">Special</namespace> + <namespace key="0" /> + <namespace key="1">Talk</namespace> + <namespace key="2">User</namespace> + <namespace key="3">User talk</namespace> + <namespace key="4">Wikipedia</namespace> + <namespace key="5">Wikipedia talk</namespace> + <namespace key="6">File</namespace> + <namespace key="7">File talk</namespace> + <namespace key="8">MediaWiki</namespace> + <namespace key="9">MediaWiki talk</namespace> + <namespace key="10">Template</namespace> + <namespace key="11">Template talk</namespace> + <namespace key="12">Help</namespace> + <namespace key="13">Help talk</namespace> + <namespace key="14">Category</namespace> + <namespace key="15">Category talk</namespace> + <namespace key="100">Portal</namespace> + <namespace key="101">Portal talk</namespace> + </namespaces> + </siteinfo> + <page> + <title>Pyrus</title> + <id>9261472</id> + <redirect /> + <revision> + <id>104997415</id> + <timestamp>2007-02-02T02:39:52Z</timestamp> + <contributor> + <username>Melburnian</username> + <id>555187</id> + </contributor> + <comment>moved [[Pyrus]] to [[Pyrus (brand)]]: all links to ''Pyrus'' are related to the pear tree or fruit</comment> + <text xml:space="preserve">#REDIRECT [[Pyrus (brand)]]</text> + </revision> + <revision> + <id>104997738</id> + <timestamp>2007-02-02T02:41:24Z</timestamp> + <contributor> + <username>Melburnian</username> + <id>555187</id> + </contributor> + <comment>all links to ''Pyrus'' are related to the pear tree or fruit</comment> + <text xml:space="preserve">#REDIRECT [[Pear]]</text> + </revision> + <revision> + <id>189729426</id> + <timestamp>2008-02-07T14:06:10Z</timestamp> + <contributor> + <username>Jkokemueller</username> + <id>6303952</id> + </contributor> + <comment>Added disambiguation</comment> + <text xml:space="preserve">'''Pyrus''' may refer to: + +* [[Pear]], trees of the genus ''Pyrus'' and the fruit of that tree, edible in some species +* [[Main//Pyrus DMS]], a [[Document Management System]]</text> + </revision> + <revision> + <id>190346463</id> + <timestamp>2008-02-10T07:21:12Z</timestamp> + <contributor> + <username>IceCreamAntisocial</username> + <id>346507</id> + </contributor> + <minor/> + <comment>rv</comment> + <text xml:space="preserve">#REDIRECT [[Pear]]</text> + </revision> + <revision> + <id>238138507</id> + <timestamp>2008-09-13T12:57:33Z</timestamp> + <contributor> + <username>Cottonapple4</username> + <id>7707615</id> + </contributor> + <comment>[[WP:AES|←]] Redirected page to [[Pear]]</comment> + <text xml:space="preserve">#REDIRECT [[Pear]] +[[Category:Maloideae]]</text> + </revision> + <revision> + <id>238392911</id> + <timestamp>2008-09-14T17:08:56Z</timestamp> + <contributor> + <username>Rkitko</username> + <id>536375</id> + </contributor> + <minor/> + <comment>Reverted edits by [[Special:Contributions/Cottonapple4|Cottonapple4]] ([[User talk:Cottonapple4|talk]]) to last version by IceCreamAntisocial</comment> + <text xml:space="preserve">#REDIRECT [[Pear]]</text> + </revision> + </page> +</mediawiki>
Modified: trunk/pywikipedia/tests/test_xmlreader.py =================================================================== --- trunk/pywikipedia/tests/test_xmlreader.py 2009-10-04 15:11:01 UTC (rev 7365) +++ trunk/pywikipedia/tests/test_xmlreader.py 2009-10-04 15:28:18 UTC (rev 7366) @@ -14,6 +14,7 @@ self.assertEquals(u"24278", pages[0].id) self.assertTrue(pages[0].text.startswith('Pears are [[tree]]s of')) self.assertEquals(u"Quercusrobur", pages[1].username) + self.assertEquals(u"Pear", pages[0].title)
def test_XmlDumpFirstRev(self): pages = [r for r in xmlreader.XmlDump("data/article-pear.xml").parse()] @@ -22,7 +23,12 @@ self.assertEquals(u"Pear", pages[0].title) self.assertEquals(u"24278", pages[0].id) self.assertTrue(pages[0].text.startswith('Pears are [[tree]]s of')) + self.assertTrue(not pages[0].isredirect)
+ def test_XmlDumpRedirect(self): + pages = [r for r in xmlreader.XmlDump("data/article-pyrus.xml").parse()] + self.assertTrue(pages[0].isredirect) + def test_MediaWikiXmlHandler(self): handler = xmlreader.MediaWikiXmlHandler() pages = [] @@ -30,9 +36,9 @@ pages.append(page) handler.setCallback(pageDone) xml.sax.parse("data/article-pear.xml", handler) + self.assertEquals(u"Pear", pages[0].title) self.assertEquals(4, len(pages)) self.assertNotEquals("", pages[0].comment)
- if __name__ == '__main__': unittest.main()
Modified: trunk/pywikipedia/xmlreader.py =================================================================== --- trunk/pywikipedia/xmlreader.py 2009-10-04 15:11:01 UTC (rev 7365) +++ trunk/pywikipedia/xmlreader.py 2009-10-04 15:28:18 UTC (rev 7366) @@ -56,7 +56,7 @@ """ Represents a page. """ - def __init__(self, title, id, text, username, ipedit, timestamp, editRestriction, moveRestriction, revisionid, comment): + def __init__(self, title, id, text, username, ipedit, timestamp, editRestriction, moveRestriction, revisionid, comment, redirect): # TODO: there are more tags we can read. self.title = title self.id = id @@ -68,6 +68,7 @@ self.moveRestriction = moveRestriction self.revisionid = revisionid self.comment = comment + self.isredirect = redirect
class XmlHeaderEntry: @@ -94,6 +95,7 @@ self.id = u'' self.revisionid = u'' self.comment = u'' + self.isredirect = False
def setCallback(self, callback): self.callback = callback @@ -159,6 +161,8 @@ self.inContributorTag = False elif name == 'restrictions': self.editRestriction, self.moveRestriction = parseRestrictions(self.restrictions) + elif name == 'redirect': + self.isredirect = True elif name == 'revision': # All done for this. # Remove trailing newlines and spaces @@ -178,7 +182,7 @@ text, self.username, self.ipedit, timestamp, self.editRestriction, self.moveRestriction, - self.revisionid, self.comment) + self.revisionid, self.comment, self.isredirect) self.inRevisionTag = False self.callback(entry) elif self.headercallback: @@ -313,6 +317,7 @@ self.title = elem.findtext("{%s}title" % self.uri) self.pageid = elem.findtext("{%s}id" % self.uri) self.restrictions = elem.findtext("{%s}restrictions" % self.uri) + self.isredirect = elem.findtext("{%s}redirect" % self.uri) is not None
def _create_revision(self, revision): """Creates a Single revision""" @@ -332,7 +337,8 @@ editRestriction=editRestriction, moveRestriction=moveRestriction, revisionid=revisionid, - comment=comment + comment=comment, + redirect=self.isredirect )
def regex_parse(self):
pywikipedia-svn@lists.wikimedia.org