Revision: 4103 Author: misza13 Date: 2007-08-24 16:34:02 +0000 (Fri, 24 Aug 2007)
Log Message: ----------- Modified removeDisabledParts - the exact set of parts which are removed can now be specified and defaults to all.
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-08-24 15:47:51 UTC (rev 4102) +++ trunk/pywikipedia/wikipedia.py 2007-08-24 16:34:02 UTC (rev 4103) @@ -2618,14 +2618,24 @@ text = text[:markerpos] + marker + text[markerpos:] return text
-def removeDisabledParts(text): +def removeDisabledParts(text, parts=['*']): """ Removes those parts of a wiki text where wiki markup is disabled, i.e. * HTML comments * nowiki tags * includeonly tags + + The exact set of parts which are removed is passed as the 'parts' parameter + and defaults to all. """ - toRemoveR = re.compile(r'<nowiki>.*?</nowiki>|<!--.*?-->|<includeonly>.*?</includeonly>', re.IGNORECASE | re.DOTALL) + regexes = { + 'nowiki' : r'<nowiki>.*?</nowiki>', + 'comments' : r'<!--.*?-->', + 'includeonly' : r'<includeonly>.*?</includeonly>', + } + if '*' in parts: + parts = regexes.keys() + toRemoveR = re.compile('|'.join([regexes[p] for p in parts]), re.IGNORECASE | re.DOTALL) return toRemoveR.sub('', text)
# Part of library dealing with interwiki links
pywikipedia-l@lists.wikimedia.org