[Pywikipedia-l] SVN: [5723] trunk/pywikipedia/reflinks.py

nicdumz at svn.wikimedia.org nicdumz at svn.wikimedia.org
Thu Jul 17 12:00:29 UTC 2008


Revision: 5723
Author:   nicdumz
Date:     2008-07-17 12:00:29 +0000 (Thu, 17 Jul 2008)

Log Message:
-----------
Take care of reference groups

Modified Paths:
--------------
    trunk/pywikipedia/reflinks.py

Modified: trunk/pywikipedia/reflinks.py
===================================================================
--- trunk/pywikipedia/reflinks.py	2008-07-15 05:05:08 UTC (rev 5722)
+++ trunk/pywikipedia/reflinks.py	2008-07-17 12:00:29 UTC (rev 5723)
@@ -230,19 +230,27 @@
     def __init__(self):
         # Match references
         self.REFS = re.compile(u'(?i)<ref(?P<name>[^>]*)>(?P<content>.*?)</ref>')
-        self.NAMES = re.compile(u'(?i)\s*name\s*=\s*(?P<quote>"?)\s*(?P<name>.*?)\s*(?P=quote)\s*')
+        self.NAMES = re.compile(u'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.*?)\s*(?P=quote).*')
+        self.GROUPS = re.compile(u'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.*?)\s*(?P=quote).*')
 
     def process(self, text):
-        # key are ref contents
-        # values are [name, [list of full ref matches]]
+        # keys are ref groups
+        # values are a dict where :
+        #   keys are ref content
+        #   values are [name, [list of full ref matches]]
         foundRefs = {}
         foundRefNames = []
 
         for match in self.REFS.finditer(text):
             content = match.group('content')
             name = match.group('name')
-            if foundRefs.has_key(content):
-                v = foundRefs[content]
+            group = self.GROUPS.match(name)
+            if not foundRefs.has_key(group):
+                foundRefs[group] = {}
+
+            groupdict = foundRefs[group]
+            if groupdict.has_key(content):
+                v = groupdict[content]
                 v[1].append(match.group())
             else:
                 v = [None, [match.group()]]
@@ -252,31 +260,37 @@
                     n = n.group('name')
                     v[0] = n
                     foundRefNames.append(n)
-            foundRefs[content] = v
+            groupdict[content] = v
     
         id = 1
         while 'autogenerated%s' % id in foundRefNames:
             id += 1
-        for (k, v) in foundRefs.iteritems():
-            if len(v[1]) == 1:
-                continue
-            name = v[0]
-            if not name:
-                name = 'autogenerated%s' % id
-                id += 1
-            named = u'<ref name="%s">%s</ref>' % (name, k)
-            text = text.replace(v[1][0], named, 1)
+        for (g, d) in foundRefs.iteritems():
+            if g:
+                group = "group=\"%s\" " % group
+            else:
+                group = ""
 
-            # make sure that the first (named ref) is not
-            # removed later :
-            pos = text.index(named) + len(named)
-            header = text[:pos]
-            end = text[pos:]
-
-            unnamed = u'<ref name="%s" />' % name
-            for ref in v[1][1:]:
-                end = end.replace(ref, unnamed)
-            text = header + end 
+            for (k, v) in d.iteritems():
+                if len(v[1]) == 1:
+                    continue
+                name = v[0]
+                if not name:
+                    name = 'autogenerated%s' % id
+                    id += 1
+                named = u'<ref %sname="%s">%s</ref>' % (group, name, k)
+                text = text.replace(v[1][0], named, 1)
+    
+                # make sure that the first (named ref) is not
+                # removed later :
+                pos = text.index(named) + len(named)
+                header = text[:pos]
+                end = text[pos:]
+    
+                unnamed = u'<ref %sname="%s" />' % (group, name)
+                for ref in v[1][1:]:
+                    end = end.replace(ref, unnamed)
+                text = header + end 
         return text        
 
 class ReferencesRobot:





More information about the Pywikipedia-l mailing list