Revision: 3930
Author: valhallasw
Date: 2007-08-01 16:05:20 +0000 (Wed, 01 Aug 2007)
Log Message:
-----------
Some BufferedReader hacks; Parser updated to understand both simple wikilinks [[blah]] and
simple templates {{blah}}. [[boo{{bah}}]] is also understood \o/
Modified Paths:
--------------
trunk/pywikiparser/BufferedReader.py
trunk/pywikiparser/Parser.py
Modified: trunk/pywikiparser/BufferedReader.py
===================================================================
--- trunk/pywikiparser/BufferedReader.py 2007-08-01 12:31:16 UTC (rev 3929)
+++ trunk/pywikiparser/BufferedReader.py 2007-08-01 16:05:20 UTC (rev 3930)
@@ -1,7 +1,41 @@
class BufferedReader(object):
+ """ Buffered reader. Usage:
+
+ >>> reader = BufferedReader((i for i in range(10)))
+ >>> restore = reader.getrestore()
+ >>> restore
+ -1
+ >>> reader.next()
+ 0
+ >>> reader.next()
+ 1
+ >>> reader.undo(-1)
+ >>> reader.next()
+ 0
+ >>> restore = reader.commit(-1)
+ >>> restore
+ -1
+ >>> reader.next()
+ 1
+ >>> reader.getrestore()
+ 0
+ >>> reader.next()
+ 2
+ >>> reader.undo(0)
+ >>> reader.next()
+ 2
+ >>> reader.commit(0)
+ 1
+ >>> reader.undo(1)
+ >>> reader.next()
+ 3
+ >>> reader.undo(-1)
+ >>> reader.next()
+ 1
+ """
+
def __init__(self, generator):
- self.inbuffer = []
- self.outbuffer = []
+ self.buffer = []
self.counter = -1
self.generator = generator
self.gen = self._generator()
@@ -31,33 +65,36 @@
def next(self, *args, **kwargs):
return self.gen.next(*args, **kwargs)
- def peek(self):
- if len(self.outbuffer) <= self.counter+1:
+ def peek(self, num=1):
+ if len(self.buffer) <= self.counter+num:
data = self.generator.next()
- self.inbuffer.append(data)
- self.outbuffer.append(data)
- return self.outbuffer[self.counter+1]
+ self.buffer.append(data)
+ return self.buffer[self.counter+num]
def _generator(self):
while(True):
self.counter += 1
- if len(self.outbuffer) <= self.counter:
+ if len(self.buffer) <= self.counter:
data = self.generator.next()
- self.inbuffer.append(data)
- self.outbuffer.append(data)
- yield self.outbuffer[self.counter]
+ self.buffer.append(data)
+ yield self.buffer[self.counter]
- def commit(self):
- self.inbuffer = self.inbuffer[self.counter+1:]
- self.outbuffer = self.outbuffer[self.counter+1:]
- self.counter = -1
+ def getrestore(self):
+ return self.counter
+
+ def commit(self, counter):
+ if counter == -1:
+ # clear memory
+ self.buffer = self.buffer[self.counter+1:]
+ self.counter = -1
+
self.gen = self._generator()
+ return self.counter
- def undo(self):
- self.outbuffer = self.inbuffer[:]
- self.counter = -1
+ def undo(self, counter):
+ self.counter = counter
self.gen = self._generator()
-
- def push(self, data):
- self.outbuffer.append(data)
- self.gen = self._generator()
\ No newline at end of file
+
+if __name__ == "__main__":
+ import doctest
+ doctest.testmod()
\ No newline at end of file
Modified: trunk/pywikiparser/Parser.py
===================================================================
--- trunk/pywikiparser/Parser.py 2007-08-01 12:31:16 UTC (rev 3929)
+++ trunk/pywikiparser/Parser.py 2007-08-01 16:05:20 UTC (rev 3930)
@@ -46,39 +46,41 @@
self.italic = False
self.bold = False
+ restore = self.lex.getrestore()
+
try:
while(True):
token = self.lex.peek()
if token[0] in breaktoken:
break
- node = self.parsetoken(token)
+ node = self.parsetoken(token, restore)
print "Adding %r (was %r)" % (node,token)
self.par.extend(node)
- self.lex.commit()
+ restore = self.lex.commit(restore)
except StopIteration: pass
return self.root
- def parsetoken(self, token):
+ def parsetoken(self, token, restore):
# The function to call is parser<token>
- exec("data = self.parse%s()" % token[0].name, globals(), locals())
+ exec("data = self.parse%s(restore)" % token[0].name, globals(),
locals())
return data
- def parseEOF(self):
+ def parseEOF(self, restore):
token = self.expect(Tokens.EOF)
raise StopIteration
# Special functions that directly access the storage tree
- def parseNEWPAR(self):
+ def parseNEWPAR(self, restore):
token = self.expect(Tokens.NEWPAR)
self.par = self.root.appendElement('p')
self.bold = False
self.italic = False
return []
- def parseAPOSTROPHE(self):
+ def parseAPOSTROPHE(self, restore):
num = len(self.eat(Tokens.APOSTROPHE))
#prepare length
@@ -126,46 +128,46 @@
# Functions that return the input directly
- def parseSQRE_CLOSE(self):
+ def parseSQRE_CLOSE(self, restore):
return self.expect(Tokens.SQRE_CLOSE)
- def parsePIPE(self):
+ def parsePIPE(self, restore):
return self.expect(Tokens.PIPE)
- def parseEQUAL_SIGN(self):
+ def parseEQUAL_SIGN(self, restore):
return self.expect(Tokens.EQUAL_SIGN)
- def parseCURL_CLOSE(self):
+ def parseCURL_CLOSE(self, restore):
return self.expect(Tokens.CURL_CLOSE)
- def parseANGL_CLOSE(self):
+ def parseANGL_CLOSE(self, restore):
return self.expect(Tokens.ANGL_CLOSE)
- def parseASTERISK(self):
+ def parseASTERISK(self, restore):
return self.expect(Tokens.ASTERISK)
- def parseCOLON(self):
+ def parseCOLON(self, restore):
return self.expect(Tokens.COLON)
- def parseSEMICOLON(self):
+ def parseSEMICOLON(self, restore):
return self.expect(Tokens.SEMICOLON)
- def parseHASH(self):
+ def parseHASH(self, restore):
return self.expect(Tokens.HASH)
- def parseTAB_NEWLINE(self):
+ def parseTAB_NEWLINE(self, restore):
return self.expect(Tokens.TAB_NEWLINE)
- def parseTAB_CLOSE(self):
+ def parseTAB_CLOSE(self, restore):
return self.expect(Tokens.TAB_CLOSE)
# True parser callers
- def parseWHITESPACE(self):
+ def parseWHITESPACE(self, restore):
# Todo:
- return self.parseTEXT()
+ return self.parseTEXT(restore)
- def parseTEXT(self):
+ def parseTEXT(self, restore):
text = self.eat([Tokens.TEXT, Tokens.WHITESPACE])
if text:
@@ -173,49 +175,48 @@
else:
return []
- def parseSQRE_OPEN(self):
+ def parseSQRE_OPEN(self, restore):
try:
return self.parseWikilink()
except ParseError: pass
- self.lex.undo()
+ self.lex.undo(restore)
try:
return self.parseExternallink()
except ParseError: pass
- self.lex.undo()
+ self.lex.undo(restore)
return self.expect(Tokens.SQRE_OPEN)
- def parseCURL_OPEN(self):
+ def parseCURL_OPEN(self, restore):
try:
return self.parseTemplateparam()
except ParseError: pass
- self.lex.undo()
+ self.lex.undo(restore)
try:
return self.parseTemplate()
except ParseError: pass
- self.lex.undo()
+ self.lex.undo(restore)
return self.expect(Tokens.CURL_OPEN)
- def parseANGL_OPEN(self):
+ def parseANGL_OPEN(self, restore):
try:
return self.parseHTML()
except ParseError: pass
- self.lex.undo()
+ self.lex.undo(restore)
return self.expect(Tokens.ANGL_OPEN)
- def parseTAB_OPEN(self):
+ def parseTAB_OPEN(self, restore):
try:
return self.parseWikitable()
except ParseError: pass
- self.lex.undo()
+ self.lex.undo(restore)
return self.expect(Tokens.TAB_OPEN)
- titlere = re.compile(r"[^\^\]#<>\[\|\{\}\n]*$")
def parseWikilink(self):
retval = dom.Element('')
self.expect(Tokens.SQRE_OPEN)
@@ -224,17 +225,53 @@
pre = self.eat(Tokens.SQRE_OPEN)
if pre:
retval.append(pre)
-
- title = self.eat(Tokens.TEXT) # temp. needs to allow templates etc.
-
- link = retval.appendElement('wikilink')
- link.appendElement('url').append(title)
- self.expect(Tokens.SQRE_CLOSE)
+ wikilink = retval.appendElement('wikilink')
+ # get page title
+ title = wikilink.appendElement('title')
+
+ #parse title
+ title.extend(self.parseTitle(Tokens.SQRE_CLOSE))
+
self.expect(Tokens.SQRE_CLOSE)
- return retval
-
+ self.expect(Tokens.SQRE_CLOSE)
+ return retval
+
+
+
+# while( titlere.match(next) ):
+# title += next
+# next = self.lex.peek()
+#
+#
+# else:
+# break
+# while(True):
+# param = .Element('parameter')
+# parampiece = self.parse([Tokens.SQRE_CLOSE, Tokens.PIPE])
+# param.extend(parampiece)
+# if (self.lex.peek( )[0] == Tokens.SQRE_CLOSE) and
+# (self.lex.peek(2)[0] != Tokens.SQRE_CLOSE): # \][^\]]: a single ]
+# param.append('[')
+# continue
+# else:
+# break
+#
+#
+#
+# breaktoken = self.lex.peek()
+# if breaktoken[0] == Tokens.PIPE:
+# break
+# elif breaktoken[0] == Tokens.SQRE_CLOSE:
+# next = self.lex.peek(2)
+# if next[0] == Tokens.SQRE_CLOSE:
+#
+# self.expect(Tokens.SQRE_CLOSE)
+# self.expect(Tokens.SQRE_CLOSE)
+# return retval
+#
+
def parseExternallink(self):
raise ParseError("Needs implementation")
@@ -242,11 +279,52 @@
raise ParseError("Needs implementation")
def parseTemplate(self):
- raise ParseError("Needs implementation")
+ retval = dom.Element('')
+ self.expect(Tokens.CURL_OPEN)
+ self.expect(Tokens.CURL_OPEN)
+ pre = self.eat(Tokens.CURL_OPEN)
+ print 'pre: ' + pre
+ if pre:
+ retval.append(pre)
+
+ wikilink = retval.appendElement('template')
+ # get page title
+ title = wikilink.appendElement('title')
+ title.extend(self.parseTitle(Tokens.CURL_CLOSE))
+
+ self.expect(Tokens.CURL_CLOSE)
+ self.expect(Tokens.CURL_CLOSE)
+
+ return retval
+
def parseHTML(self):
raise ParseError("Needs implementation")
def parseWikitable(self):
raise ParseError("Needs implementation")
+
+ titlere = re.compile(r"[^\^\]<>\[\|\{\}\n]*$")
+ def parseTitle(self, closetoken):
+ title = dom.Element('title')
+ while(True):
+ next = self.lex.peek()
+ if next[0] == closetoken or next[0] == Tokens.PIPE:
+ break
+ elif next[0] == Tokens.CURL_OPEN: # allow templates to expand
+ restore = self.lex.getrestore()
+ data = self.parseCURL_OPEN(restore)
+ print 'Parsed template: %r' % (data,)
+ for item in data:
+ if isinstance(item, basestring):
+ if not self.titlere.match(item):
+ raise ParseError('illegal wiki link')
+ title.extend(data)
+ else:
+ next = self.lex.next()
+ if not self.titlere.match(next[1]):
+ raise ParseError('illegal wiki link')
+ title.append(next[1])
+ return title
+
\ No newline at end of file