Revision: 5723 Author: nicdumz Date: 2008-07-17 12:00:29 +0000 (Thu, 17 Jul 2008)
Log Message: ----------- Take care of reference groups
Modified Paths: -------------- trunk/pywikipedia/reflinks.py
Modified: trunk/pywikipedia/reflinks.py =================================================================== --- trunk/pywikipedia/reflinks.py 2008-07-15 05:05:08 UTC (rev 5722) +++ trunk/pywikipedia/reflinks.py 2008-07-17 12:00:29 UTC (rev 5723) @@ -230,19 +230,27 @@ def __init__(self): # Match references self.REFS = re.compile(u'(?i)<ref(?P<name>[^>]*)>(?P<content>.*?)</ref>') - self.NAMES = re.compile(u'(?i)\s*name\s*=\s*(?P<quote>"?)\s*(?P<name>.*?)\s*(?P=quote)\s*') + self.NAMES = re.compile(u'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.*?)\s*(?P=quote).*') + self.GROUPS = re.compile(u'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.*?)\s*(?P=quote).*')
def process(self, text): - # key are ref contents - # values are [name, [list of full ref matches]] + # keys are ref groups + # values are a dict where : + # keys are ref content + # values are [name, [list of full ref matches]] foundRefs = {} foundRefNames = []
for match in self.REFS.finditer(text): content = match.group('content') name = match.group('name') - if foundRefs.has_key(content): - v = foundRefs[content] + group = self.GROUPS.match(name) + if not foundRefs.has_key(group): + foundRefs[group] = {} + + groupdict = foundRefs[group] + if groupdict.has_key(content): + v = groupdict[content] v[1].append(match.group()) else: v = [None, [match.group()]] @@ -252,31 +260,37 @@ n = n.group('name') v[0] = n foundRefNames.append(n) - foundRefs[content] = v + groupdict[content] = v
id = 1 while 'autogenerated%s' % id in foundRefNames: id += 1 - for (k, v) in foundRefs.iteritems(): - if len(v[1]) == 1: - continue - name = v[0] - if not name: - name = 'autogenerated%s' % id - id += 1 - named = u'<ref name="%s">%s</ref>' % (name, k) - text = text.replace(v[1][0], named, 1) + for (g, d) in foundRefs.iteritems(): + if g: + group = "group="%s" " % group + else: + group = ""
- # make sure that the first (named ref) is not - # removed later : - pos = text.index(named) + len(named) - header = text[:pos] - end = text[pos:] - - unnamed = u'<ref name="%s" />' % name - for ref in v[1][1:]: - end = end.replace(ref, unnamed) - text = header + end + for (k, v) in d.iteritems(): + if len(v[1]) == 1: + continue + name = v[0] + if not name: + name = 'autogenerated%s' % id + id += 1 + named = u'<ref %sname="%s">%s</ref>' % (group, name, k) + text = text.replace(v[1][0], named, 1) + + # make sure that the first (named ref) is not + # removed later : + pos = text.index(named) + len(named) + header = text[:pos] + end = text[pos:] + + unnamed = u'<ref %sname="%s" />' % (group, name) + for ref in v[1][1:]: + end = end.replace(ref, unnamed) + text = header + end return text
class ReferencesRobot: