[Pywikipedia-l] SVN: [4455] trunk/pywikipedia/cosmetic_changes.py - pywikibot

17 Oct 2007

Revision: 4455
Author:   wikipedian
Date:     2007-10-17 08:44:54 +0000 (Wed, 17 Oct 2007)
Log Message:
-----------
fixed bug [ 1811843 ] cosmetic_changes.py should not edit <nowiki>
Modified Paths:
--------------
    trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================

--- trunk/pywikipedia/cosmetic_changes.py	2007-10-17 08:42:41 UTC (rev 4454)
+++ trunk/pywikipedia/cosmetic_changes.py	2007-10-17 08:44:54 UTC (rev 4455)
@@ -158,25 +158,12 @@
         return text
def cleanUpLinks(self, text):
-        trailR = re.compile(self.site.linktrail())
-        # The regular expression which finds links. Results consist of four groups:
-        # group title is the target page title, that is, everything before | or ].
-        # group section is the page section. It'll include the # to make life easier for us.
-        # group label is the alternative link title, that's everything between | and ].
-        # group linktrail is the link trail, that's letters after ]] which are part of the word.
-        # note that the definition of 'letter' varies from language to language.
-        self.linkR = re.compile(r'[[(?P<titleWithSection>[^]|]+)(|(?P<label>[^]|]*))?]](?P<linktrail>' + self.site.linktrail() + ')')
-        curpos = 0
-        # This loop will run until we have finished the current page
-        while True:
-            m = self.linkR.search(text, pos = curpos)
-            if not m:
-                break
-            # Make sure that next time around we will not find this same hit.
-            curpos = m.start() + 1
-            titleWithSection = m.group('titleWithSection')
-            label = m.group('label')
-            trailingChars = m.group('linktrail')
+        # helper function which works on one link and either returns it
+        # unmodified, or returns a replacement.
+        def handleOneLink(match):
+            titleWithSection = match.group('titleWithSection')
+            label = match.group('label')
+            trailingChars = match.group('linktrail')
if not self.site.isInterwikiLink(titleWithSection):
                 # The link looks like this:
@@ -210,7 +197,7 @@
if titleWithSection == '':
                         # just skip empty links.
-                        continue
+                        return match.group()
# Remove unnecessary initial and final spaces from label.
                     # Please note that some editors prefer spaces around pipes. (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
@@ -256,7 +243,20 @@
                         newLink = ' ' + newLink
                     if hadTrailingSpaces:
                         newLink = newLink + ' '
-                    text = text[:m.start()] + newLink + text[m.end():]
+                    return newLink
+            # don't change anything
+            return match.group()
+
+        trailR = re.compile(self.site.linktrail())
+        # The regular expression which finds links. Results consist of four groups:
+        # group title is the target page title, that is, everything before | or ].
+        # group section is the page section. It'll include the # to make life easier for us.
+        # group label is the alternative link title, that's everything between | and ].
+        # group linktrail is the link trail, that's letters after ]] which are part of the word.
+        # note that the definition of 'letter' varies from language to language.
+        linkR = re.compile(r'[[(?P<titleWithSection>[^]|]+)(|(?P<label>[^]|]*))?]](?P<linktrail>' + self.site.linktrail() + ')')
+
+        text = wikipedia.replaceExcept(text, linkR, handleOneLink, ['comment', 'math', 'nowiki', 'pre', 'startspace'])
         return text
def resolveHtmlEntities(self, text):
@@ -273,7 +273,7 @@
         return text
def validXhtml(self, text):
-        text = wikipedia.replaceExcept(text, r'<br>', r'<br />', ['comment', 'nowiki', 'pre'])
+        text = wikipedia.replaceExcept(text, r'<br>', r'<br />', ['comment', 'math', 'nowiki', 'pre'])
         return text
def removeUselessSpaces(self, text):