Revision: 3920
Author: valhallasw
Date: 2007-07-29 20:14:00 +0000 (Sun, 29 Jul 2007)
Log Message:
-----------
Added new Parser.py
Added Paths:
-----------
trunk/pywikiparser/Parser.py
Added: trunk/pywikiparser/Parser.py
===================================================================
--- trunk/pywikiparser/Parser.py (rev 0)
+++ trunk/pywikiparser/Parser.py 2007-07-29 20:14:00 UTC (rev 3920)
@@ -0,0 +1,170 @@
+# -*- coding: utf-8 -*-
+""" Mediawiki wikitext parser """
+#
+# (C) 2007 Merlijn 'valhallasw' van Deen
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+import warnings
+
+import ObjectTree as dom
+from BufferedReader import BufferedReader
+
+from Lexer import Lexer, Tokens
+
+class ParseError(Exception):
+ """ Parsing Error """
+
+class Parser:
+ def __init__(self, data):
+ self.lex = BufferedReader(Lexer(data).lexer())
+
+ def expect(self, tokens):
+ if not isinstance(tokens, list):
+ tokens = [tokens,]
+
+ data = self.lex.peek()
+ if data[0] in tokens:
+ return self.lex.next()
+ else:
+ raise ParseError('%r is not one of %r' % (data[0], tokens))
+
+ def parse(self, breaktoken=[]):
+ self.root = dom.Element('wikipage')
+ self.par = self.root.appendElement('p')
+ self.italic = False
+ self.bold = False
+ try:
+ while(True):
+ token = self.lex.peek()
+ if token[0] in breaktoken:
+ break
+
+ node = self.parsetoken(token)
+ print "Adding %r (was %r)" % (node,token)
+ self.par.extend(node)
+ self.lex.commit()
+
+ except StopIteration: pass
+ return self.root
+
+ def parsetoken(self, token):
+ # The function to call is parser<token>
+ exec("data = self.parse%s()" % token[0].name, globals(), locals())
+ return data
+
+ def parseEOF(self):
+ token = self.expect(Tokens.EOF)
+ raise StopIteration
+
+ def parseNEWPAR(self):
+ token = self.expect(Tokens.NEWPAR)
+ self.par = self.root.appendElement('p')
+ self.bold = False
+ self.italic = False
+ return []
+
+ def parseAPOSTROPHE(self):
+ token = self.expect(Tokens.APOSTROPHE)
+ retval = []
+
+ #prepare length
+ if (token[1] == 1):
+ retval.append('\'')
+ elif (token[1] == 4):
+ retval.append('\'')
+ token[1] = 3
+ elif (token[1] > 5):
+ retval.append('\'' * (token[1]-5))
+ token[1] = 5
+
+ # determine changes
+ newitalic = self.italic
+ newbold = self.bold
+
+ if token[1] == 2: #toggle italic
+ newitalic = not self.italic
+ elif token[1] == 3: #toggle bold
+ newbold = not self.bold
+ elif token[1] == 5: #toggle both
+ newitalic = not self.italic
+ newbold = not self.bold
+
+ print 'bold: %r>%r italic: %r>%r' % (self.bold, newbold,
self.italic, newitalic)
+ if self.italic and not newitalic:
+ if self.par.name == 'i' or not newbold:
+ self.par = self.par.parent
+ else:
+ self.par = self.par.parent.parent.appendElement('b')
+ self.italic = False
+ if self.bold and not newbold:
+ if self.par.name == 'b' or not newitalic:
+ self.par = self.par.parent
+ else:
+ self.par = self.par.parent.parent.appendElement('i')
+ self.bold = False
+ if not self.italic and newitalic:
+ self.par = self.par.appendElement('i')
+ self.italic = True
+ if not self.bold and newbold:
+ self.par = self.par.appendElement('b')
+ self.bold = True
+ return retval
+
+ def parseSQRE_CLOSE(self):
+ token = self.expect(Tokens.SQRE_CLOSE)
+ return [']'*token[1]]
+
+ def parsePIPE(self):
+ token = self.expect(Tokens.PIPE)
+ return ['|'*token[1]]
+
+ def parseEQUAL_SIGN(self):
+ token = self.expect(Tokens.EQUAL_SIGN)
+ return ['='*token[1]]
+
+ def parseCURL_CLOSE(self):
+ token = self.expect(Tokens.CURL_CLOSE)
+ return ['}'*token[1]]
+
+ def parseANGL_CLOSE(self):
+ token = self.expect(Tokens.ANGL_CLOSE)
+ return ['>'*token[1]]
+
+ def parseTAB_NEWLINE(self):
+ token = self.expect(Tokens.TAB_NEWLINE)
+ return ['|-']
+
+ def parseTAB_CLOSE(self):
+ token = self.expect(Tokens.TAB_CLOSE)
+ return ['|}']
+
+ def parseWHITESPACE(self):
+ return self.parseTEXT()
+
+ def parseTEXT(self):
+ text = ''
+ try:
+ while(True):
+ text += self.expect([Tokens.TEXT, Tokens.WHITESPACE])[1]
+ except ParseError: pass
+ if text:
+ return [text]
+ else:
+ return []
+
+ def parseSQRE_OPEN(self):
+ token = self.expect(Tokens.SQRE_OPEN)
+ def parseCURL_OPEN(self):
+ token = self.expect(Tokens.CURL_OPEN)
+ def parseANGL_OPEN(self):
+ token = self.expect(Tokens.ANGL_OPEN)
+ def parseTAB_OPEN(self):
+ token = self.expect(Tokens.TAB_OPEN)
+
+
+
+
+
\ No newline at end of file
Property changes on: trunk/pywikiparser/Parser.py
___________________________________________________________________
Name: svn:keywords
+ Id *.c = svn:eol-style=native *.cpp = svn:eol-style=native *.h = svn:eol-style=native
*.dsp = svn:eol-style=CRLF *.dsw = svn:eol-style=CRLF *.sh = svn:eol-style=native
Name: svn:executable *.txt
+ svn:eol-style=native *.png = svn:mime-type=image/png *.jpg = svn:mime-type=image/jpeg
Makefile = svn:eol-style=native
Name: svn:eol-style
+ native