Revision: 3917 Author: valhallasw Date: 2007-07-28 13:09:37 +0000 (Sat, 28 Jul 2007)
Log Message: ----------- ObjectTree modules (simple XML tree)
Added Paths: ----------- trunk/pywikiparser/ObjectTree/ trunk/pywikiparser/ObjectTree/Element.py trunk/pywikiparser/ObjectTree/XMLParse.py trunk/pywikiparser/ObjectTree/__init__.py
Added: trunk/pywikiparser/ObjectTree/Element.py =================================================================== --- trunk/pywikiparser/ObjectTree/Element.py (rev 0) +++ trunk/pywikiparser/ObjectTree/Element.py 2007-07-28 13:09:37 UTC (rev 3917) @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +""" +Simple object tree system for python. +This module contains the Element class +""" +# +# (C) 2007 Merlijn 'valhallasw' van Deen +# +# Distributed under the terms of the MIT license. +# +__version__ = u'$Id$' + +class Element(list): + """ + Element in the element tree. Usage examples: + + >>> book = Element(u'book', {u'title': u'Wikitext Parsing', u'authors': u'valhallasw'}) + >>> chapter = Element(u'chapter', {u'title': u'Wikitext'}) + >>> section = chapter.appendElement(u'section', {u'title': u'Basic principles of wikitext'}) + >>> book.append(chapter) + >>> section.append(u'Wikitext was created as a way to implement formatting in plain text files, in a user friendly way. ....') + >>> book.toxml() + u'<book authors="valhallasw" title="Wikitext Parsing"><chapter title="Wikitext"><section title="Basic principles of wikitext">Wikitext was created as a way to implement formatting in plain text files, in a user friendly way. ....</section></chapter></book>' + >>> print book.toxml(True, symbol=u' ') + <book authors="valhallasw" title="Wikitext Parsing"> + <chapter title="Wikitext"> + <section title="Basic principles of wikitext"> + Wikitext was created as a way to implement formatting in plain text files, in a user friendly way. .... + </section> + </chapter> + </book> + """ + + def __init__(self, element_name, element_attributes={}, contents=[]): + self.name = element_name + self.attributes = element_attributes + self.parent = None + for item in contents: + self.append(item) + + def toxml(self, pretty=False, level=1, symbol=u'\t'): + retval = u'<%s' % (self.name,) + for (attribute, value) in self.attributes.iteritems(): + retval += u' %s="%s"' % (attribute, xmlify(value)) + if len(self) == 0: + retval += u'/>' + return retval + retval += u'>' + if pretty: + for subelement in self: + if isinstance(subelement, unicode): + retval += u'\n' + symbol*level + xmlify(subelement) + else: + retval += u'\n' + symbol*level + subelement.toxml(pretty, level+1, symbol) + retval += u'\n' + symbol*(level-1) + '</%s>' % (self.name,) + else: + for subelement in self: + if isinstance(subelement, unicode): + retval += xmlify(subelement) + elif isinstance(subelement, str): + print "THIS SHOULD NOT HAPPEN: String '%s' found!" % subelement + retval += xmlify(subelement) + else: + retval += subelement.toxml(pretty, level+1, symbol) + retval += u'</%s>' % (self.name,) + return retval + + def __repr__(self): + return u"<'%s' element: %r %s>" % (self.name, self.attributes, list.__repr__(self)) + + def append(self, arg): + if isinstance(arg, basestring): + if len(arg) == 0: #don't attach empty strings + return + try: + if isinstance(self[-1], unicode): #we convert to unicode! + self[-1] += unicode(arg) + else: + list.append(self, unicode(arg)) + except IndexError: + list.append(self, unicode(arg)) + elif isinstance(arg, Element): + list.append(self, arg) + arg.parent = self + else: + raise TypeError(u'Argument is of %r; expected <type 'BaseElement'>.' % (type(arg),)) + + + def appendElement(self, *args, **kwargs): + element = Element(*args, **kwargs) + self.append(element) + return element + +def xmlify(data): + """ + >>> xmlify(u'mooh&<>\'"') + u'mooh&<>'"' + """ + data = data.replace(u'&', u'&') + data = data.replace(u'<', u'<') + data = data.replace(u'>', u'>') + data = data.replace(u"'", u''') + data = data.replace(u'"', u'"') + return data + +def _test(*args, **kwargs): + import doctest + doctest.testmod(*args, **kwargs) + +if __name__ == "__main__": + _test() \ No newline at end of file
Property changes on: trunk/pywikiparser/ObjectTree/Element.py ___________________________________________________________________ Name: svn:keywords + Id *.c = svn:eol-style=native *.cpp = svn:eol-style=native *.h = svn:eol-style=native *.dsp = svn:eol-style=CRLF *.dsw = svn:eol-style=CRLF *.sh = svn:eol-style=native Name: svn:executable *.txt + svn:eol-style=native *.png = svn:mime-type=image/png *.jpg = svn:mime-type=image/jpeg Makefile = svn:eol-style=native Name: svn:eol-style + native
Added: trunk/pywikiparser/ObjectTree/XMLParse.py =================================================================== --- trunk/pywikiparser/ObjectTree/XMLParse.py (rev 0) +++ trunk/pywikiparser/ObjectTree/XMLParse.py 2007-07-28 13:09:37 UTC (rev 3917) @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +""" +Simple object tree system for python. +This module contains the XML parser +""" +# +# (C) 2007 Merlijn 'valhallasw' van Deen +# +# Distributed under the terms of the MIT license. +# + +import warnings +import xml.sax + +import Element + +class XMLParser(xml.sax.handler.ContentHandler): + def __init__(self): + self.root = Element('root') + self.currentNode = self.root + + def startElement(self, name, attrs): + self.currentNode = self.currentNode.appendElement(name, dict(attrs.items())) + + def endElement(self, name): + if self.currentNode.name == name: + self.currentNode = self.currentNode.parent + else: + warnings.warn("Parse warning: recieved </%s>, expected </%s>." % (name, self.currentNode.name)) + + def characters(self, data): + self.currentNode.append(data) + +def parseText(data): + """ Parses XML Text data to an object tree. + Examples: + >>> text = '<root>Hello, <bold>this</bold> is a test! <link rel="blah" /></root>' + >>> tree = parseText(text) + >>> tree + <'root' element: {} [u'Hello, ', <'bold' element: {} [u'this']>, u' is a test! ', <'link' element: {u'rel': u'blah'} []>]> + >>> tree.toxml() + u'<root>Hello, <bold>this</bold> is a test! <link rel="blah"/></root>' + + >>> parseText("<root><nonclosed></root>") + Traceback (most recent call last): + ... + SAXParseException: <unknown>:1:19: mismatched tag + """ + handler = XMLParser() + xml.sax.parseString(data, handler) + return handler.root[0] \ No newline at end of file
Property changes on: trunk/pywikiparser/ObjectTree/XMLParse.py ___________________________________________________________________ Name: svn:keywords + Id *.c = svn:eol-style=native *.cpp = svn:eol-style=native *.h = svn:eol-style=native *.dsp = svn:eol-style=CRLF *.dsw = svn:eol-style=CRLF *.sh = svn:eol-style=native Name: svn:executable *.txt + svn:eol-style=native *.png = svn:mime-type=image/png *.jpg = svn:mime-type=image/jpeg Makefile = svn:eol-style=native Name: svn:eol-style + native
Added: trunk/pywikiparser/ObjectTree/__init__.py =================================================================== --- trunk/pywikiparser/ObjectTree/__init__.py (rev 0) +++ trunk/pywikiparser/ObjectTree/__init__.py 2007-07-28 13:09:37 UTC (rev 3917) @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +""" Simple object tree system for python """ +# +# (C) 2007 Merlijn 'valhallasw' van Deen +# +# Distributed under the terms of the MIT license. +# +__version__ = u'$Id$' + +__all__ = ['Element', 'XMLParse'] + +from Element import Element +from XMLParse import parseText \ No newline at end of file
Property changes on: trunk/pywikiparser/ObjectTree/__init__.py ___________________________________________________________________ Name: svn:keywords + Id *.c = svn:eol-style=native *.cpp = svn:eol-style=native *.h = svn:eol-style=native *.dsp = svn:eol-style=CRLF *.dsw = svn:eol-style=CRLF *.sh = svn:eol-style=native Name: svn:executable *.txt + svn:eol-style=native *.png = svn:mime-type=image/png *.jpg = svn:mime-type=image/jpeg Makefile = svn:eol-style=native Name: svn:eol-style + native