http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11324
Revision: 11324 Author: xqt Date: 2013-04-03 16:54:38 +0000 (Wed, 03 Apr 2013) Log Message: ----------- Remove code duplication for Page.templatesWithParams() and call textlib.extract_templates_and_params() method. Deprecated warning for thistxt parameter: textlib method should be used. Bugfix for templatesWithParams parser bug #3609794 and bug #3609685:
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2013-04-03 16:18:30 UTC (rev 11323) +++ trunk/pywikipedia/wikipedia.py 2013-04-03 16:54:38 UTC (rev 11324) @@ -2838,118 +2838,12 @@ thistxt = self.get(get_redirect=get_redirect) except (IsRedirectPage, NoPage): return [] + else: + output(u"""\ +thistxt argument of templatesWithParams is deprecated. Please use textlib method +extract_templates_and_params() instead.""") + return extract_templates_and_params(thistxt)
- # remove commented-out stuff etc. - thistxt = removeDisabledParts(thistxt) - - # marker for inside templates or parameters - marker = findmarker(thistxt, u'@@', u'@') - - # marker for links - marker2 = findmarker(thistxt, u'##', u'#') - - # marker for math - marker3 = findmarker(thistxt, u'%%', u'%') - - result = [] - inside = {} - count = 0 - Rtemplate = re.compile( - ur'{{(msg:)?(?P<name>[^{|]+?)(|(?P<params>[^{]*?))?}}') - Rlink = re.compile(ur'[[[^]]+]]') - Rmath = re.compile(ur'<math>[^<]+</math>') - Rmarker = re.compile(ur'%s(\d+)%s' % (marker, marker)) - Rmarker2 = re.compile(ur'%s(\d+)%s' % (marker2, marker2)) - Rmarker3 = re.compile(ur'%s(\d+)%s' % (marker3, marker3)) - - # Replace math with markers - maths = {} - count = 0 - for m in Rmath.finditer(thistxt): - count += 1 - text = m.group() - thistxt = thistxt.replace(text, '%s%d%s' % (marker3, count, marker3)) - maths[count] = text - - while Rtemplate.search(thistxt) is not None: - for m in Rtemplate.finditer(thistxt): - # Make sure it is not detected again - count += 1 - text = m.group() - thistxt = thistxt.replace(text, - '%s%d%s' % (marker, count, marker)) - # Make sure stored templates don't contain markers - for m2 in Rmarker.finditer(text): - text = text.replace(m2.group(), inside[int(m2.group(1))]) - for m2 in Rmarker3.finditer(text): - text = text.replace(m2.group(), maths[int(m2.group(1))]) - inside[count] = text - - # Name - name = m.group('name').strip() - m2 = Rmarker.search(name) or Rmath.search(name) - if m2 is not None: - # Doesn't detect templates whose name changes, - # or templates whose name contains math tags - continue - if self.site().isInterwikiLink(name): - continue - - # {{#if: }} - if name.startswith('#'): - continue - # {{DEFAULTSORT:...}} - defaultKeys = self.site().versionnumber() > 13 and \ - self.site().getmagicwords('defaultsort') - # It seems some wikis does not have this magic key - if defaultKeys: - found = False - for key in defaultKeys: - if name.startswith(key): - found = True - break - if found: continue - - try: - name = Page(self.site(), name).title() - except InvalidTitle: - if name: - output( - u"Page %s contains invalid template name {{%s}}." - % (self.title(), name.strip())) - continue - # Parameters - paramString = m.group('params') - params = [] - if paramString: - # Replace links to markers - links = {} - count2 = 0 - for m2 in Rlink.finditer(paramString): - count2 += 1 - text = m2.group() - paramString = paramString.replace(text, - '%s%d%s' % (marker2, count2, marker2)) - links[count2] = text - # Parse string - markedParams = paramString.split('|') - # Replace markers - for param in markedParams: - for m2 in Rmarker.finditer(param): - param = param.replace(m2.group(), - inside[int(m2.group(1))]) - for m2 in Rmarker2.finditer(param): - param = param.replace(m2.group(), - links[int(m2.group(1))]) - for m2 in Rmarker3.finditer(param): - param = param.replace(m2.group(), - maths[int(m2.group(1))]) - params.append(param) - - # Add it to the result - result.append((name, params)) - return result - def getRedirectTarget(self): """Return a Page object for the target this Page redirects to.