[Pywikipedia-l] SVN: [5718] trunk/pywikipedia/wikipedia.py

rotem at svn.wikimedia.org rotem at svn.wikimedia.org
Mon Jul 14 15:46:17 UTC 2008


Revision: 5718
Author:   rotem
Date:     2008-07-14 15:46:17 +0000 (Mon, 14 Jul 2008)

Log Message:
-----------
Don't miss templates with <math> tags that contain curly brackets.

Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2008-07-14 13:49:02 UTC (rev 5717)
+++ trunk/pywikipedia/wikipedia.py	2008-07-14 15:46:17 UTC (rev 5718)
@@ -1737,13 +1737,30 @@
         while marker2 in thistxt:
             marker2 += u'#'
 
+        # marker for math
+        marker3 = u'%%'
+        while marker2 in thistxt:
+            marker3 += u'%'
+
         result = []
         inside = {}
         count = 0
         Rtemplate = re.compile(ur'{{(msg:)?(?P<name>[^{\|]+?)(\|(?P<params>[^{]+?))?}}')
         Rlink = re.compile(ur'\[\[[^\]]+\]\]')
-        Rmarker = re.compile(u'%s(\\d+)%s' % (marker, marker))
-        Rmarker2 = re.compile(u'%s(\\d+)%s' % (marker2, marker2))
+        Rmath = re.compile(ur'<math>[^<]+</math>')
+        Rmarker = re.compile(ur'%s(\d+)%s' % (marker, marker))
+        Rmarker2 = re.compile(ur'%s(\d+)%s' % (marker2, marker2))
+        Rmarker3 = re.compile(ur'%s(\d+)%s' % (marker3, marker3))
+
+        # Replace math with markers
+        maths = {}
+        count = 0
+        for m in Rmath.finditer(thistxt):
+            count += 1
+            text = m.group()
+            thistxt = thistxt.replace(text, '%s%d%s' % (marker3, count, marker3))
+            maths[count] = text
+
         while Rtemplate.search(thistxt) is not None:
             for m in Rtemplate.finditer(thistxt):
                 # Make sure it is not detected again
@@ -1751,16 +1768,19 @@
                 text = m.group()
                 thistxt = thistxt.replace(text,
                                           '%s%d%s' % (marker, count, marker))
+                # Make sure stored templates don't contain markers
                 for m2 in Rmarker.finditer(text):
-                    # Make sure stored templates don't contain markers
                     text = text.replace(m2.group(), inside[int(m2.group(1))])
+                for m2 in Rmarker3.finditer(text):
+                    text = text.replace(m2.group(), maths[int(m2.group(1))])
                 inside[count] = text
 
                 # Name
                 name = m.group('name')
-                m2 = Rmarker.search(name)
+                m2 = Rmarker.search(name) or Rmath.search(name)
                 if m2 is not None:
-                    # Doesn't detect templates whose name is changing
+                    # Doesn't detect templates whose name changes,
+                    # or templates whose name contains math tags
                     continue
                 if self.site().isInterwikiLink(name):
                     continue
@@ -1795,6 +1815,9 @@
                         for m2 in Rmarker2.finditer(param):
                             param = param.replace(m2.group(),
                                                   links[int(m2.group(1))])
+                        for m2 in Rmarker3.finditer(param):
+                            param = param.replace(m2.group(),
+                                                  maths[int(m2.group(1))])
                         params.append(param)
 
                 # Add it to the result





More information about the Pywikipedia-l mailing list