Revision: 4992 Author: russblau Date: 2008-02-09 20:04:01 +0000 (Sat, 09 Feb 2008)
Log Message: ----------- Roll back to r4988; r4989 crashes on pages with nested templates. (Example: en.wikipedia.org/Shane_White)
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-02-09 17:49:17 UTC (rev 4991) +++ trunk/pywikipedia/wikipedia.py 2008-02-09 20:04:01 UTC (rev 4992) @@ -1558,17 +1558,19 @@
def templates(self): """Return a list of Page objects for templates used on this Page. - + Template parameters are ignored. + """ return [template for (template, param) in self.templatesWithParams()]
def templatesWithParams(self): """Return a list of templates used on this Page. - + Return value is a list of tuples. There is one tuple for each use of a template in the page, with the template Page as the first entry and a list of parameters as the second entry. + """ try: thistxt = self.get() @@ -1578,23 +1580,16 @@ # remove commented-out stuff etc. thistxt = removeDisabledParts(thistxt)
- # marker for inside templates or parameters + # marker for inside template marker = '@@' while marker in thistxt: marker += '@'
- # marker for links - marker2 = '##' - while marker2 in thistxt: - marker2 += '#' - result = [] - inside = {} + markers = {} count = 0 Rtemplate = re.compile(r'{{(msg:)?(?P<name>[^{|]+?)(|(?P<params>[^{]+?))?}}') - Rlink = re.compile(r'[[[^]]+]]') Rmarker = re.compile('%s(\d+)%s' % (marker, marker)) - Rmarker2 = re.compile('%s(\d+)%s' % (marker2, marker2)) while Rtemplate.search(thistxt) is not None: for m in Rtemplate.finditer(thistxt): # Make sure it is not detected again @@ -1602,9 +1597,9 @@ text = m.group() thistxt = thistxt.replace(text, '%s%d%s' % (marker, count, marker)) for m2 in Rmarker.finditer(text): - # Make sure stored templates don't contain markers - text = text.replace(m2.group(), inside[int(m2.group(1))]) - inside[count] = text + # Make sure markers don't contain other markers + text = text.replace(m2.group(), markers[int(m2.group(1))]) + markers[count] = text
# Name name = m.group('name') @@ -1625,22 +1620,12 @@ paramString = m.group('params') params = [] if paramString: - # Replace links to markers - links = {} - count2 = 0 - for m2 in Rlink.finditer(paramString): - count2 += 1 - text = m2.group() - paramString = paramString.replace(text, '%s%d%s' % (marker2, count, marker2)) - links[count2] = text # Parse string markedParams = paramString.split('|') # Replace markers for param in markedParams: for m2 in Rmarker.finditer(param): - param = param.replace(m2.group(), inside[int(m2.group(1))]) - for m2 in Rmarker2.finditer(param): - param = param.replace(m2.group(), links[int(m2.group(1))]) + param = param.replace(m2.group(), markers[int(m2.group(1))]) params.append(param)
# Add it to the result
pywikipedia-l@lists.wikimedia.org