Revision: 4455 Author: wikipedian Date: 2007-10-17 08:44:54 +0000 (Wed, 17 Oct 2007)
Log Message: ----------- fixed bug [ 1811843 ] cosmetic_changes.py should not edit <nowiki>
Modified Paths: -------------- trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py =================================================================== --- trunk/pywikipedia/cosmetic_changes.py 2007-10-17 08:42:41 UTC (rev 4454) +++ trunk/pywikipedia/cosmetic_changes.py 2007-10-17 08:44:54 UTC (rev 4455) @@ -158,25 +158,12 @@ return text
def cleanUpLinks(self, text): - trailR = re.compile(self.site.linktrail()) - # The regular expression which finds links. Results consist of four groups: - # group title is the target page title, that is, everything before | or ]. - # group section is the page section. It'll include the # to make life easier for us. - # group label is the alternative link title, that's everything between | and ]. - # group linktrail is the link trail, that's letters after ]] which are part of the word. - # note that the definition of 'letter' varies from language to language. - self.linkR = re.compile(r'[[(?P<titleWithSection>[^]|]+)(|(?P<label>[^]|]*))?]](?P<linktrail>' + self.site.linktrail() + ')') - curpos = 0 - # This loop will run until we have finished the current page - while True: - m = self.linkR.search(text, pos = curpos) - if not m: - break - # Make sure that next time around we will not find this same hit. - curpos = m.start() + 1 - titleWithSection = m.group('titleWithSection') - label = m.group('label') - trailingChars = m.group('linktrail') + # helper function which works on one link and either returns it + # unmodified, or returns a replacement. + def handleOneLink(match): + titleWithSection = match.group('titleWithSection') + label = match.group('label') + trailingChars = match.group('linktrail')
if not self.site.isInterwikiLink(titleWithSection): # The link looks like this: @@ -210,7 +197,7 @@
if titleWithSection == '': # just skip empty links. - continue + return match.group()
# Remove unnecessary initial and final spaces from label. # Please note that some editors prefer spaces around pipes. (See [[en:Wikipedia:Semi-bots]]). We remove them anyway. @@ -256,7 +243,20 @@ newLink = ' ' + newLink if hadTrailingSpaces: newLink = newLink + ' ' - text = text[:m.start()] + newLink + text[m.end():] + return newLink + # don't change anything + return match.group() + + trailR = re.compile(self.site.linktrail()) + # The regular expression which finds links. Results consist of four groups: + # group title is the target page title, that is, everything before | or ]. + # group section is the page section. It'll include the # to make life easier for us. + # group label is the alternative link title, that's everything between | and ]. + # group linktrail is the link trail, that's letters after ]] which are part of the word. + # note that the definition of 'letter' varies from language to language. + linkR = re.compile(r'[[(?P<titleWithSection>[^]|]+)(|(?P<label>[^]|]*))?]](?P<linktrail>' + self.site.linktrail() + ')') + + text = wikipedia.replaceExcept(text, linkR, handleOneLink, ['comment', 'math', 'nowiki', 'pre', 'startspace']) return text
def resolveHtmlEntities(self, text): @@ -273,7 +273,7 @@ return text
def validXhtml(self, text): - text = wikipedia.replaceExcept(text, r'<br>', r'<br />', ['comment', 'nowiki', 'pre']) + text = wikipedia.replaceExcept(text, r'<br>', r'<br />', ['comment', 'math', 'nowiki', 'pre']) return text
def removeUselessSpaces(self, text):