Revision: 7963 Author: xqt Date: 2010-02-25 15:15:00 +0000 (Thu, 25 Feb 2010)
Log Message: ----------- replaceExcept(): interwiki exception also excludes links (with preleading ":") to other sites; enable additional exception tags like <onlyinclude />, <inputpox /> and other <foo /> tags from mw extensions.
Modified Paths: -------------- trunk/pywikipedia/pywikibot/textlib.py
Modified: trunk/pywikipedia/pywikibot/textlib.py =================================================================== --- trunk/pywikipedia/pywikibot/textlib.py 2010-02-25 14:00:14 UTC (rev 7962) +++ trunk/pywikipedia/pywikibot/textlib.py 2010-02-25 15:15:00 UTC (rev 7963) @@ -61,17 +61,11 @@ 'comment': re.compile(r'(?s)<!--.*?-->'), # section headers 'header': re.compile(r'\r\n=+.+=+ *\r\n'), - 'includeonly': re.compile(r'(?is)<includeonly>.*?</includeonly>'), - 'math': re.compile(r'(?is)<math>.*?</math>'), - 'noinclude': re.compile(r'(?is)<noinclude>.*?</noinclude>'), - # wiki tags are ignored inside nowiki tags. - 'nowiki': re.compile(r'(?is)<nowiki>.*?</nowiki>'), # preformatted text 'pre': re.compile(r'(?ism)<pre>.*?</pre>'), 'source': re.compile(r'(?is)<source .*?</source>'), # inline references 'ref': re.compile(r'(?ism)<ref[ >].*?</ref>'), - 'timeline': re.compile(r'(?is)<timeline>.*?</timeline>'), # lines that start with a space are shown in a monospace font and # have whitespace preserved. 'startspace': re.compile(r'(?m)^ (.*?)$'), @@ -92,7 +86,8 @@ # this matches internal wikilinks, but also interwiki, categories, and # images. 'link': re.compile(r'[[[^]|]*(|[^]]*)?]]'), - 'interwiki': re.compile(r'(?i)[[(%s)\s?:[^]]*]][\s]*' + # also finds links to foreign sites with preleading ":" + 'interwiki': re.compile(r'(?i)[[:?(%s)\s?:[^]]*]][\s]*' % '|'.join(site.validLanguageLinks() + site.family.obsolete.keys()) ), @@ -111,9 +106,11 @@ if isinstance(exc, str) or isinstance(exc, unicode): # assume it's a reference to the exceptionRegexes dictionary # defined above. - if exc not in exceptionRegexes: - raise ValueError("Unknown tag type: " + exc) - dontTouchRegexes.append(exceptionRegexes[exc]) + if exc in exceptionRegexes: + dontTouchRegexes.append(exceptionRegexes[exc]) + else: + # nowiki, noinclude, includeonly, timeline, math ond other extensions + dontTouchRegexes.append(re.compile(r'(?is)<%s>.*?</%s>' % (exc, exc))) # handle alias if exc == 'source': dontTouchRegexes.append(re.compile(r'(?is)<syntaxhighlight .*?</syntaxhighlight>'))
pywikipedia-svn@lists.wikimedia.org