Revision: 4218 Author: wikipedian Date: 2007-09-09 00:15:33 +0000 (Sun, 09 Sep 2007)
Log Message: ----------- syntax fixing: extended to https links, etc.
Modified Paths: -------------- trunk/pywikipedia/fixes.py
Modified: trunk/pywikipedia/fixes.py =================================================================== --- trunk/pywikipedia/fixes.py 2007-09-08 19:51:11 UTC (rev 4217) +++ trunk/pywikipedia/fixes.py 2007-09-09 00:15:33 UTC (rev 4218) @@ -93,7 +93,7 @@ ] }, # Do NOT run this automatically! - # Recommendation: First run syntax2 automatically, afterwards + # Recommendation: First run syntax-safe automatically, afterwards # run syntax manually, carefully checking that you're not breaking # anything. 'syntax': { @@ -112,25 +112,27 @@ }, 'replacements': [ # external link in double brackets - (r'[[(?P<url>http://%5B%5E%5C%5D%5D+?)%5C%5D%5C]', r'[\g<url>]'), + (r'[[(?P<url>https?://[^]]+?)]]', r'[\g<url>]'), # external link starting with double bracket - (r'[[(?P<url>http://.+?)%5C]', r'[\g<url>]'), + (r'[[(?P<url>https?://.+?)]', r'[\g<url>]'), + # external link with forgotten closing bracket + (r'[(?P<url>https?://[^]\s]+)\r\n', '[\g<url>]\r\n'), # external link ending with double bracket. # do not change weblinks that contain wiki links inside # inside the description - (r'[(?P<url>http://%5B%5E%5C%5B%5C%5D%5D+?)%5C%5D%5C%5D(?!%5C])', r'[\g<url>]'), + (r'[(?P<url>https?://[^[]]+?)]](?!])', r'[\g<url>]'), # external link and description separated by a dash. # ATTENTION: while this is a mistake in most cases, there are some # valid URLs that contain dashes! - (r'[(?P<url>http://%5B%5E%5C%7C%5C%5D%5Cs%5D+?) *| *(?P<label>[^|]]+?)]', r'[\g<url> \g<label>]'), + (r'[(?P<url>https?://[^|]\s]+?) *| *(?P<label>[^|]]+?)]', r'[\g<url> \g<label>]'), # wiki link closed by single bracket. # ATTENTION: There are some false positives, for example # Brainfuck code examples or MS-DOS parameter instructions. # There are also sometimes better ways to fix it than # just putting an additional ] after the link. + (r'[[([^[]]+?)](?!])', r'[[\1]]'), # wiki link opened by single bracket. # ATTENTION: same as above. - (r'[[([^[]]+?)](?!])', r'[[\1]]'), (r'(?<![)[([^[]]+?)]](?!])', r'[[\1]]'), # template closed by single bracket # ATTENTION: There are some false positives, especially in @@ -159,25 +161,26 @@ 'he':u'בוט: מתקן תחביר ויקי', 'ia':u'Robot: Reparation de syntaxe wiki', 'lt':u'robotas: Taisoma wiki sintaksė', + 'nl':u'Bot: reparatie wikisyntaxis', 'pl':u'Robot poprawia wiki-składnię', 'pt':u'Bot: Corrigindo sintaxe wiki', 'sr':u'Бот: Поправка вики синтаксе', }, 'replacements': [ # external link in double brackets - (r'[[(?P<url>http://%5B%5E%5C%5D%5D+?)%5C%5D%5C]', r'[\g<url>]'), + (r'[[(?P<url>https?://[^]]+?)]]', r'[\g<url>]'), # external link starting with double bracket - (r'[[(?P<url>http://.+?)%5C]', r'[\g<url>]'), + (r'[[(?P<url>https?://.+?)]', r'[\g<url>]'), # external link with forgotten closing bracket - (r'[(?P<url>http://%5B%5E%5C%5D%5Cs%5D+)%5Cr%5Cn', r'[\g<url>]\r\n'), + (r'[(?P<url>https?://[^]\s]+)\r\n', '[\g<url>]\r\n'), # external link and description separated by a dash, with # whitespace in front of the dash, so that it is clear that # the dash is not a legitimate part of the URL. - (r'[(?P<url>http://%5B%5E%5C%7C%5C%5D%5Cr%5Cn%5D+?) +| *(?P<label>[^|]]+?)]', r'[\g<url> \g<label>]'), + (r'[(?P<url>https?://[^|] \r\n]+?) +| *(?P<label>[^|]]+?)]', r'[\g<url> \g<label>]'), # dash in external link, where the correct end of the URL can # be detected from the file extension. It is very unlikely that # this will cause mistakes. - (r'[(?P<url>http://%5B%5E%5C%7C%5C] ]+?(.pdf|.html|.htm|.php|.asp|.aspx)) *| *(?P<label>[^|]]+?)]', r'[\g<url> \g<label>]'), + (r'[(?P<url>https?://[^|] ]+?(.pdf|.html|.htm|.php|.asp|.aspx|.jsp)) *| *(?P<label>[^|]]+?)]', r'[\g<url> \g<label>]'), ], }, 'case-de': { # German upper / lower case issues