[Pywikipedia-l] SVN: [5723] trunk/pywikipedia/reflinks.py
nicdumz at svn.wikimedia.org
nicdumz at svn.wikimedia.org
Thu Jul 17 12:00:29 UTC 2008
Revision: 5723
Author: nicdumz
Date: 2008-07-17 12:00:29 +0000 (Thu, 17 Jul 2008)
Log Message:
-----------
Take care of reference groups
Modified Paths:
--------------
trunk/pywikipedia/reflinks.py
Modified: trunk/pywikipedia/reflinks.py
===================================================================
--- trunk/pywikipedia/reflinks.py 2008-07-15 05:05:08 UTC (rev 5722)
+++ trunk/pywikipedia/reflinks.py 2008-07-17 12:00:29 UTC (rev 5723)
@@ -230,19 +230,27 @@
def __init__(self):
# Match references
self.REFS = re.compile(u'(?i)<ref(?P<name>[^>]*)>(?P<content>.*?)</ref>')
- self.NAMES = re.compile(u'(?i)\s*name\s*=\s*(?P<quote>"?)\s*(?P<name>.*?)\s*(?P=quote)\s*')
+ self.NAMES = re.compile(u'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.*?)\s*(?P=quote).*')
+ self.GROUPS = re.compile(u'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.*?)\s*(?P=quote).*')
def process(self, text):
- # key are ref contents
- # values are [name, [list of full ref matches]]
+ # keys are ref groups
+ # values are a dict where :
+ # keys are ref content
+ # values are [name, [list of full ref matches]]
foundRefs = {}
foundRefNames = []
for match in self.REFS.finditer(text):
content = match.group('content')
name = match.group('name')
- if foundRefs.has_key(content):
- v = foundRefs[content]
+ group = self.GROUPS.match(name)
+ if not foundRefs.has_key(group):
+ foundRefs[group] = {}
+
+ groupdict = foundRefs[group]
+ if groupdict.has_key(content):
+ v = groupdict[content]
v[1].append(match.group())
else:
v = [None, [match.group()]]
@@ -252,31 +260,37 @@
n = n.group('name')
v[0] = n
foundRefNames.append(n)
- foundRefs[content] = v
+ groupdict[content] = v
id = 1
while 'autogenerated%s' % id in foundRefNames:
id += 1
- for (k, v) in foundRefs.iteritems():
- if len(v[1]) == 1:
- continue
- name = v[0]
- if not name:
- name = 'autogenerated%s' % id
- id += 1
- named = u'<ref name="%s">%s</ref>' % (name, k)
- text = text.replace(v[1][0], named, 1)
+ for (g, d) in foundRefs.iteritems():
+ if g:
+ group = "group=\"%s\" " % group
+ else:
+ group = ""
- # make sure that the first (named ref) is not
- # removed later :
- pos = text.index(named) + len(named)
- header = text[:pos]
- end = text[pos:]
-
- unnamed = u'<ref name="%s" />' % name
- for ref in v[1][1:]:
- end = end.replace(ref, unnamed)
- text = header + end
+ for (k, v) in d.iteritems():
+ if len(v[1]) == 1:
+ continue
+ name = v[0]
+ if not name:
+ name = 'autogenerated%s' % id
+ id += 1
+ named = u'<ref %sname="%s">%s</ref>' % (group, name, k)
+ text = text.replace(v[1][0], named, 1)
+
+ # make sure that the first (named ref) is not
+ # removed later :
+ pos = text.index(named) + len(named)
+ header = text[:pos]
+ end = text[pos:]
+
+ unnamed = u'<ref %sname="%s" />' % (group, name)
+ for ref in v[1][1:]:
+ end = end.replace(ref, unnamed)
+ text = header + end
return text
class ReferencesRobot:
More information about the Pywikipedia-l
mailing list