jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/636004 )
Change subject: [IMPR] replace 'source' exception regex with 'syntaxhighlight' ......................................................................
[IMPR] replace 'source' exception regex with 'syntaxhighlight'
Also add 'source' as 'syntaxhighlight' alias
Bug: T257899 Change-Id: I1db4173c51bad8052fe60049ce17279cc5d65322 --- M pywikibot/cosmetic_changes.py M pywikibot/fixes.py M pywikibot/textlib.py M scripts/category.py M scripts/table2wiki.py M tests/textlib_tests.py 6 files changed, 51 insertions(+), 44 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py index f84d6c8..048b726 100755 --- a/pywikibot/cosmetic_changes.py +++ b/pywikibot/cosmetic_changes.py @@ -473,7 +473,7 @@ split[0], '|'.join(cache.get(x.strip(), x) for x in split[1:]))
cache = {} - exceptions = ['nowiki', 'comment', 'pre', 'source'] + exceptions = ['comment', 'nowiki', 'pre', 'syntaxhighlight'] regex = re.compile( FILE_LINK_REGEX % '|'.join(self.site.namespaces[6]), flags=re.X) @@ -659,7 +659,7 @@ # TODO: T254350 - what other extension tags should be avoided? # (graph, math, score, timeline, etc.) text = pywikibot.html2unicode( - text, ignore=ignore, exceptions=['comment', 'source']) + text, ignore=ignore, exceptions=['comment', 'syntaxhighlight']) return text
def removeEmptySections(self, text): @@ -708,8 +708,8 @@
def removeUselessSpaces(self, text): """Cleanup multiple or trailing spaces.""" - exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', - 'source', 'table'] + exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight', + 'startspace', 'table'] if self.site.sitename != 'wikipedia:cs': exceptions.append('template') text = textlib.replaceExcept(text, r'(?m)[\t ]+( |$)', r'\1', @@ -756,8 +756,9 @@ other wikis. If there are any complaints, please file a bug report. """ if not self.template: - exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', - 'template', 'timeline', self.site.redirectRegex()] + exceptions = ['comment', 'math', 'nowiki', 'pre', + 'syntaxhighlight', 'template', 'timeline', + self.site.redirectRegex()] text = textlib.replaceExcept( text, r'(?m)' @@ -803,8 +804,8 @@ replacement += '|' + match.group('title') return replacement + ']]'
- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', - 'startspace'] + exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', + 'syntaxhighlight'] # link to the wiki working on # Only use suffixes for article paths for suffix in self.site._interwiki_urls(True): @@ -866,8 +867,8 @@
# Everything case-insensitive (?i) # Keep in mind that MediaWiki automatically converts <br> to <br /> - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', - 'startspace'] + exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', + 'syntaxhighlight'] text = textlib.replaceExcept(text, r'(?i)<(b|strong)>(.*?)</\1>', r"'''\2'''", exceptions, site=self.site) text = textlib.replaceExcept(text, r'(?i)<(i|em)>(.*?)</\1>', @@ -893,7 +894,7 @@ """Fix references tags.""" # See also # https://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFix... - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', + exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight', 'startspace']
# it should be name = " or name=" NOT name =" @@ -909,8 +910,8 @@
def fixStyle(self, text): """Convert prettytable to wikitable class.""" - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', - 'startspace'] + exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', + 'syntaxhighlight'] if self.site.code in ('de', 'en'): text = textlib.replaceExcept(text, r'(class="[^"]*)prettytable([^"]*")', @@ -919,9 +920,8 @@
def fixTypo(self, text): """Fix units.""" - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', - 'startspace', 'gallery', 'hyperlink', 'interwiki', - 'link'] + exceptions = ['comment', 'gallery', 'hyperlink', 'interwiki', 'link', + 'nowiki', 'math', 'pre', 'startspace', 'syntaxhighlight'] # change <number> ccm -> <number> cm³ text = textlib.replaceExcept(text, r'(\d)\s*(?: )?ccm', r'\1 cm³', exceptions, @@ -942,11 +942,13 @@ """Fix arabic and persian letters.""" if self.site.code not in ['ckb', 'fa']: return text + exceptions = [ - 'gallery', 'file', + 'gallery', 'hyperlink', 'interwiki', + 'inputbox', # FIXME: but changes letters inside wikilinks # 'link', 'math', @@ -954,9 +956,8 @@ 'template', 'timeline', 'ref', - 'source', 'startspace', - 'inputbox', + 'syntaxhighlight', ]
digits = textlib.NON_LATIN_DIGITS @@ -998,7 +999,7 @@
# section headers to {{int:}} versions exceptions = ['comment', 'includeonly', 'math', 'noinclude', 'nowiki', - 'pre', 'source', 'ref', 'timeline'] + 'pre', 'syntaxhighlight', 'ref', 'timeline'] text = textlib.replaceExcept(text, r'([\r\n]|^)== *Summary *==', r'\1== {{int:filedesc}} ==', diff --git a/pywikibot/fixes.py b/pywikibot/fixes.py index 826fe0b..3313043 100644 --- a/pywikibot/fixes.py +++ b/pywikibot/fixes.py @@ -128,12 +128,12 @@ 'inside-tags': [ 'nowiki', 'comment', + 'gallery', # because of filenames + 'hyperlink', # e.g. commas in URLs 'math', - 'pre', # because of code examples - 'source', # because of code examples - 'startspace', # because of code examples - 'hyperlink', # e.g. commas in URLs - 'gallery', # because of filenames + 'pre', # because of code examples + 'startspace', # because of code examples + 'syntaxhighlight', # because of code examples 'timeline', ], 'text-contains': [ @@ -220,8 +220,8 @@ 'comment', 'math', 'pre', - 'source', # because of code examples - 'startspace', # because of code examples + 'startspace', # because of code examples + 'syntaxhighlight', # because of code examples ], 'text-contains': [ r'http://.*?object=tx%5C%7C', # regular dash in URL @@ -266,8 +266,8 @@ 'comment', 'math', 'pre', - 'source', # because of code examples - 'startspace', # because of code examples + 'startspace', # because of code examples + 'syntaxhighlight', # because of code examples ], } }, diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index e495ca2..437de05 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -316,6 +316,8 @@ # handle alias if exc == 'source': result.append(_tag_regex('syntaxhighlight')) + elif exc == 'syntaxhighlight': + result.append(_tag_regex('source'))
return result
@@ -459,7 +461,7 @@ @return: text stripped from disabled parts. """ if not tags: - tags = ('comment', 'includeonly', 'nowiki', 'pre', 'source') + tags = ('comment', 'includeonly', 'nowiki', 'pre', 'syntaxhighlight') tags = set(tags) - set(include) regexes = _get_regexes(tags, site) toRemoveR = re.compile('|'.join(x.pattern for x in regexes), @@ -1022,7 +1024,8 @@ interwikiR = re.compile(r'[[(%s)\s?:[^[]\n]*]][\s]*' % languages, re.IGNORECASE) text = replaceExcept(text, interwikiR, '', - ['nowiki', 'comment', 'math', 'pre', 'source'], + ['comment', 'math', 'nowiki', 'pre', + 'syntaxhighlight'], marker=marker, site=site) return text.strip() @@ -1300,8 +1303,8 @@ catNamespace = '|'.join(site.namespaces.CATEGORY) categoryR = re.compile(r'[[\s*(%s)\s*:.*?]]\s*' % catNamespace, re.I) text = replaceExcept(text, categoryR, '', - ['nowiki', 'comment', 'math', 'pre', 'source', - 'includeonly'], + ['comment', 'includeonly', 'math', 'nowiki', 'pre', + 'syntaxhighlight'], marker=marker, site=site) if marker: @@ -1369,7 +1372,7 @@ r'^[^\S\n]*[[\s*(%s)\s*:\s*%s[\s\u200e\u200f]*' r'((?:|[^]]+)?]])[^\S\n]*\n' % (catNamespace, title), re.I | re.M) - exceptions = ['nowiki', 'comment', 'math', 'pre', 'source'] + exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight'] if newcat is None: # First go through and try the more restrictive regex that removes # an entire line, if the category is the only thing on that line (this diff --git a/scripts/category.py b/scripts/category.py index ae695be..c1d8820 100755 --- a/scripts/category.py +++ b/scripts/category.py @@ -471,7 +471,8 @@ text = textlib.replaceExcept( text, tagnameregexp, r'\1{0}\n\2'.format(categorytitle), - ['nowiki', 'comment', 'math', 'pre', 'source'], + ['comment', 'math', 'nowiki', 'pre', + 'syntaxhighlight'], site=self.current_page.site) else: if self.includeonly == ['includeonly']: diff --git a/scripts/table2wiki.py b/scripts/table2wiki.py index 925daa4..8c81c7c 100644 --- a/scripts/table2wiki.py +++ b/scripts/table2wiki.py @@ -387,7 +387,7 @@ Mark all table start and end tags that are not disabled by nowiki tags, comments etc. We will then later only work on these marked tags. """ - exceptions = ['comment', 'math', 'nowiki', 'pre', 'source'] + exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight'] text = replaceExcept(text, _table_start_regex, '<##table##', exceptions=exceptions) text = replaceExcept(text, _table_end_regex, '</##table##>', diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 0df9bc9..77b3bd2 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -1235,18 +1235,20 @@ 'x', 'y', ['source'], site=self.site), '<source lang="xml">x</source>') - self.assertEqual(textlib.replaceExcept('<source>x</source>', - 'x', 'y', ['source'], - site=self.site), - '<source>x</source>') - self.assertEqual(textlib.replaceExcept( - '<syntaxhighlight lang="xml">x</syntaxhighlight>', - 'x', 'y', ['source'], site=self.site), - '<syntaxhighlight lang="xml">x</syntaxhighlight>') self.assertEqual( textlib.replaceExcept('<syntaxhighlight>x</syntaxhighlight>', 'x', 'y', ['source'], site=self.site), '<syntaxhighlight>x</syntaxhighlight>') + self.assertEqual( + textlib.replaceExcept( + '<syntaxhighlight lang="xml">x</syntaxhighlight>', + 'x', 'y', ['source'], site=self.site), + '<syntaxhighlight lang="xml">x</syntaxhighlight>') + self.assertEqual( + textlib.replaceExcept('<source>x</source>', + 'x', 'y', ['syntaxhighlight'], + site=self.site), + '<source>x</source>') self.assertEqual(textlib.replaceExcept('<includeonly>x</includeonly>', 'x', 'y', ['includeonly'], site=self.site),