Revision: 3917
Author: valhallasw
Date: 2007-07-28 13:09:37 +0000 (Sat, 28 Jul 2007)
Log Message:
-----------
ObjectTree modules (simple XML tree)
Added Paths:
-----------
trunk/pywikiparser/ObjectTree/
trunk/pywikiparser/ObjectTree/Element.py
trunk/pywikiparser/ObjectTree/XMLParse.py
trunk/pywikiparser/ObjectTree/__init__.py
Added: trunk/pywikiparser/ObjectTree/Element.py
===================================================================
--- trunk/pywikiparser/ObjectTree/Element.py (rev 0)
+++ trunk/pywikiparser/ObjectTree/Element.py 2007-07-28 13:09:37 UTC (rev 3917)
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+"""
+Simple object tree system for python.
+This module contains the Element class
+"""
+#
+# (C) 2007 Merlijn 'valhallasw' van Deen
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = u'$Id$'
+
+class Element(list):
+ """
+ Element in the element tree. Usage examples:
+
+ >>> book = Element(u'book', {u'title': u'Wikitext
Parsing', u'authors': u'valhallasw'})
+ >>> chapter = Element(u'chapter', {u'title':
u'Wikitext'})
+ >>> section = chapter.appendElement(u'section', {u'title':
u'Basic principles of wikitext'})
+ >>> book.append(chapter)
+ >>> section.append(u'Wikitext was created as a way to implement
formatting in plain text files, in a user friendly way. ....')
+ >>> book.toxml()
+ u'<book authors="valhallasw" title="Wikitext
Parsing"><chapter title="Wikitext"><section title="Basic
principles of wikitext">Wikitext was created as a way to implement formatting in
plain text files, in a user friendly way.
....</section></chapter></book>'
+ >>> print book.toxml(True, symbol=u' ')
+ <book authors="valhallasw" title="Wikitext Parsing">
+ <chapter title="Wikitext">
+ <section title="Basic principles of wikitext">
+ Wikitext was created as a way to implement formatting in plain text
files, in a user friendly way. ....
+ </section>
+ </chapter>
+ </book>
+ """
+
+ def __init__(self, element_name, element_attributes={}, contents=[]):
+ self.name = element_name
+ self.attributes = element_attributes
+ self.parent = None
+ for item in contents:
+ self.append(item)
+
+ def toxml(self, pretty=False, level=1, symbol=u'\t'):
+ retval = u'<%s' % (self.name,)
+ for (attribute, value) in self.attributes.iteritems():
+ retval += u' %s="%s"' % (attribute, xmlify(value))
+ if len(self) == 0:
+ retval += u'/>'
+ return retval
+ retval += u'>'
+ if pretty:
+ for subelement in self:
+ if isinstance(subelement, unicode):
+ retval += u'\n' + symbol*level + xmlify(subelement)
+ else:
+ retval += u'\n' + symbol*level + subelement.toxml(pretty,
level+1, symbol)
+ retval += u'\n' + symbol*(level-1) + '</%s>' %
(self.name,)
+ else:
+ for subelement in self:
+ if isinstance(subelement, unicode):
+ retval += xmlify(subelement)
+ elif isinstance(subelement, str):
+ print "THIS SHOULD NOT HAPPEN: String '%s' found!"
% subelement
+ retval += xmlify(subelement)
+ else:
+ retval += subelement.toxml(pretty, level+1, symbol)
+ retval += u'</%s>' % (self.name,)
+ return retval
+
+ def __repr__(self):
+ return u"<'%s' element: %r %s>" % (self.name,
self.attributes, list.__repr__(self))
+
+ def append(self, arg):
+ if isinstance(arg, basestring):
+ if len(arg) == 0: #don't attach empty strings
+ return
+ try:
+ if isinstance(self[-1], unicode): #we convert to unicode!
+ self[-1] += unicode(arg)
+ else:
+ list.append(self, unicode(arg))
+ except IndexError:
+ list.append(self, unicode(arg))
+ elif isinstance(arg, Element):
+ list.append(self, arg)
+ arg.parent = self
+ else:
+ raise TypeError(u'Argument is of %r; expected <type
\'BaseElement\'>.' % (type(arg),))
+
+
+ def appendElement(self, *args, **kwargs):
+ element = Element(*args, **kwargs)
+ self.append(element)
+ return element
+
+def xmlify(data):
+ """
+ >>> xmlify(u'mooh&<>\\'"')
+ u'mooh&<>'"'
+ """
+ data = data.replace(u'&', u'&')
+ data = data.replace(u'<', u'<')
+ data = data.replace(u'>', u'>')
+ data = data.replace(u"'", u''')
+ data = data.replace(u'"', u'"')
+ return data
+
+def _test(*args, **kwargs):
+ import doctest
+ doctest.testmod(*args, **kwargs)
+
+if __name__ == "__main__":
+ _test()
\ No newline at end of file
Property changes on: trunk/pywikiparser/ObjectTree/Element.py
___________________________________________________________________
Name: svn:keywords
+ Id *.c = svn:eol-style=native *.cpp = svn:eol-style=native *.h = svn:eol-style=native
*.dsp = svn:eol-style=CRLF *.dsw = svn:eol-style=CRLF *.sh = svn:eol-style=native
Name: svn:executable *.txt
+ svn:eol-style=native *.png = svn:mime-type=image/png *.jpg = svn:mime-type=image/jpeg
Makefile = svn:eol-style=native
Name: svn:eol-style
+ native
Added: trunk/pywikiparser/ObjectTree/XMLParse.py
===================================================================
--- trunk/pywikiparser/ObjectTree/XMLParse.py (rev 0)
+++ trunk/pywikiparser/ObjectTree/XMLParse.py 2007-07-28 13:09:37 UTC (rev 3917)
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+"""
+Simple object tree system for python.
+This module contains the XML parser
+"""
+#
+# (C) 2007 Merlijn 'valhallasw' van Deen
+#
+# Distributed under the terms of the MIT license.
+#
+
+import warnings
+import xml.sax
+
+import Element
+
+class XMLParser(xml.sax.handler.ContentHandler):
+ def __init__(self):
+ self.root = Element('root')
+ self.currentNode = self.root
+
+ def startElement(self, name, attrs):
+ self.currentNode = self.currentNode.appendElement(name, dict(attrs.items()))
+
+ def endElement(self, name):
+ if self.currentNode.name == name:
+ self.currentNode = self.currentNode.parent
+ else:
+ warnings.warn("Parse warning: recieved </%s>, expected
</%s>." % (name, self.currentNode.name))
+
+ def characters(self, data):
+ self.currentNode.append(data)
+
+def parseText(data):
+ """ Parses XML Text data to an object tree.
+ Examples:
+ >>> text = '<root>Hello, <bold>this</bold> is a test!
<link rel="blah" /></root>'
+ >>> tree = parseText(text)
+ >>> tree
+ <'root' element: {} [u'Hello, ', <'bold' element: {}
[u'this']>, u' is a test! ', <'link' element:
{u'rel': u'blah'} []>]>
+ >>> tree.toxml()
+ u'<root>Hello, <bold>this</bold> is a test! <link
rel="blah"/></root>'
+
+ >>> parseText("<root><nonclosed></root>")
+ Traceback (most recent call last):
+ ...
+ SAXParseException: <unknown>:1:19: mismatched tag
+ """
+ handler = XMLParser()
+ xml.sax.parseString(data, handler)
+ return handler.root[0]
\ No newline at end of file
Property changes on: trunk/pywikiparser/ObjectTree/XMLParse.py
___________________________________________________________________
Name: svn:keywords
+ Id *.c = svn:eol-style=native *.cpp = svn:eol-style=native *.h = svn:eol-style=native
*.dsp = svn:eol-style=CRLF *.dsw = svn:eol-style=CRLF *.sh = svn:eol-style=native
Name: svn:executable *.txt
+ svn:eol-style=native *.png = svn:mime-type=image/png *.jpg = svn:mime-type=image/jpeg
Makefile = svn:eol-style=native
Name: svn:eol-style
+ native
Added: trunk/pywikiparser/ObjectTree/__init__.py
===================================================================
--- trunk/pywikiparser/ObjectTree/__init__.py (rev 0)
+++ trunk/pywikiparser/ObjectTree/__init__.py 2007-07-28 13:09:37 UTC (rev 3917)
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+""" Simple object tree system for python """
+#
+# (C) 2007 Merlijn 'valhallasw' van Deen
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = u'$Id$'
+
+__all__ = ['Element', 'XMLParse']
+
+from Element import Element
+from XMLParse import parseText
\ No newline at end of file
Property changes on: trunk/pywikiparser/ObjectTree/__init__.py
___________________________________________________________________
Name: svn:keywords
+ Id *.c = svn:eol-style=native *.cpp = svn:eol-style=native *.h = svn:eol-style=native
*.dsp = svn:eol-style=CRLF *.dsw = svn:eol-style=CRLF *.sh = svn:eol-style=native
Name: svn:executable *.txt
+ svn:eol-style=native *.png = svn:mime-type=image/png *.jpg = svn:mime-type=image/jpeg
Makefile = svn:eol-style=native
Name: svn:eol-style
+ native