jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] replace 'source' exception regex with 'syntaxhighlight'

Also add 'source' as 'syntaxhighlight' alias

Bug: T257899
Change-Id: I1db4173c51bad8052fe60049ce17279cc5d65322
---
M pywikibot/cosmetic_changes.py
M pywikibot/fixes.py
M pywikibot/textlib.py
M scripts/category.py
M scripts/table2wiki.py
M tests/textlib_tests.py
6 files changed, 51 insertions(+), 44 deletions(-)

diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index f84d6c8..048b726 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -473,7 +473,7 @@
split[0], '|'.join(cache.get(x.strip(), x) for x in split[1:]))

cache = {}
- exceptions = ['nowiki', 'comment', 'pre', 'source']
+ exceptions = ['comment', 'nowiki', 'pre', 'syntaxhighlight']
regex = re.compile(
FILE_LINK_REGEX % '|'.join(self.site.namespaces[6]),
flags=re.X)
@@ -659,7 +659,7 @@
# TODO: T254350 - what other extension tags should be avoided?
# (graph, math, score, timeline, etc.)
text = pywikibot.html2unicode(
- text, ignore=ignore, exceptions=['comment', 'source'])
+ text, ignore=ignore, exceptions=['comment', 'syntaxhighlight'])
return text

def removeEmptySections(self, text):
@@ -708,8 +708,8 @@

def removeUselessSpaces(self, text):
"""Cleanup multiple or trailing spaces."""
- exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace',
- 'source', 'table']
+ exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight',
+ 'startspace', 'table']
if self.site.sitename != 'wikipedia:cs':
exceptions.append('template')
text = textlib.replaceExcept(text, r'(?m)[\t ]+( |$)', r'\1',
@@ -756,8 +756,9 @@
other wikis. If there are any complaints, please file a bug report.
"""
if not self.template:
- exceptions = ['comment', 'math', 'nowiki', 'pre', 'source',
- 'template', 'timeline', self.site.redirectRegex()]
+ exceptions = ['comment', 'math', 'nowiki', 'pre',
+ 'syntaxhighlight', 'template', 'timeline',
+ self.site.redirectRegex()]
text = textlib.replaceExcept(
text,
r'(?m)'
@@ -803,8 +804,8 @@
replacement += '|' + match.group('title')
return replacement + ']]'

- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
- 'startspace']
+ exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace',
+ 'syntaxhighlight']
# link to the wiki working on
# Only use suffixes for article paths
for suffix in self.site._interwiki_urls(True):
@@ -866,8 +867,8 @@

# Everything case-insensitive (?i)
# Keep in mind that MediaWiki automatically converts <br> to <br />
- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
- 'startspace']
+ exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace',
+ 'syntaxhighlight']
text = textlib.replaceExcept(text, r'(?i)<(b|strong)>(.*?)</\1>',
r"'''\2'''", exceptions, site=self.site)
text = textlib.replaceExcept(text, r'(?i)<(i|em)>(.*?)</\1>',
@@ -893,7 +894,7 @@
"""Fix references tags."""
# See also
# https://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFixer.pm
- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
+ exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight',
'startspace']

# it should be name = " or name=" NOT name ="
@@ -909,8 +910,8 @@

def fixStyle(self, text):
"""Convert prettytable to wikitable class."""
- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
- 'startspace']
+ exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace',
+ 'syntaxhighlight']
if self.site.code in ('de', 'en'):
text = textlib.replaceExcept(text,
r'(class="[^"]*)prettytable([^"]*")',
@@ -919,9 +920,8 @@

def fixTypo(self, text):
"""Fix units."""
- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
- 'startspace', 'gallery', 'hyperlink', 'interwiki',
- 'link']
+ exceptions = ['comment', 'gallery', 'hyperlink', 'interwiki', 'link',
+ 'nowiki', 'math', 'pre', 'startspace', 'syntaxhighlight']
# change <number> ccm -> <number> cm³
text = textlib.replaceExcept(text, r'(\d)\s*(?:&nbsp;)?ccm',
r'\1&nbsp;cm³', exceptions,
@@ -942,11 +942,13 @@
"""Fix arabic and persian letters."""
if self.site.code not in ['ckb', 'fa']:
return text
+
exceptions = [
- 'gallery',
'file',
+ 'gallery',
'hyperlink',
'interwiki',
+ 'inputbox',
# FIXME: but changes letters inside wikilinks
# 'link',
'math',
@@ -954,9 +956,8 @@
'template',
'timeline',
'ref',
- 'source',
'startspace',
- 'inputbox',
+ 'syntaxhighlight',
]

digits = textlib.NON_LATIN_DIGITS
@@ -998,7 +999,7 @@

# section headers to {{int:}} versions
exceptions = ['comment', 'includeonly', 'math', 'noinclude', 'nowiki',
- 'pre', 'source', 'ref', 'timeline']
+ 'pre', 'syntaxhighlight', 'ref', 'timeline']
text = textlib.replaceExcept(text,
r'([\r\n]|^)\=\= *Summary *\=\=',
r'\1== {{int:filedesc}} ==',
diff --git a/pywikibot/fixes.py b/pywikibot/fixes.py
index 826fe0b..3313043 100644
--- a/pywikibot/fixes.py
+++ b/pywikibot/fixes.py
@@ -128,12 +128,12 @@
'inside-tags': [
'nowiki',
'comment',
+ 'gallery', # because of filenames
+ 'hyperlink', # e.g. commas in URLs
'math',
- 'pre', # because of code examples
- 'source', # because of code examples
- 'startspace', # because of code examples
- 'hyperlink', # e.g. commas in URLs
- 'gallery', # because of filenames
+ 'pre', # because of code examples
+ 'startspace', # because of code examples
+ 'syntaxhighlight', # because of code examples
'timeline',
],
'text-contains': [
@@ -220,8 +220,8 @@
'comment',
'math',
'pre',
- 'source', # because of code examples
- 'startspace', # because of code examples
+ 'startspace', # because of code examples
+ 'syntaxhighlight', # because of code examples
],
'text-contains': [
r'http://.*?object=tx\|', # regular dash in URL
@@ -266,8 +266,8 @@
'comment',
'math',
'pre',
- 'source', # because of code examples
- 'startspace', # because of code examples
+ 'startspace', # because of code examples
+ 'syntaxhighlight', # because of code examples
],
}
},
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index e495ca2..437de05 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -316,6 +316,8 @@
# handle alias
if exc == 'source':
result.append(_tag_regex('syntaxhighlight'))
+ elif exc == 'syntaxhighlight':
+ result.append(_tag_regex('source'))

return result

@@ -459,7 +461,7 @@
@return: text stripped from disabled parts.
"""
if not tags:
- tags = ('comment', 'includeonly', 'nowiki', 'pre', 'source')
+ tags = ('comment', 'includeonly', 'nowiki', 'pre', 'syntaxhighlight')
tags = set(tags) - set(include)
regexes = _get_regexes(tags, site)
toRemoveR = re.compile('|'.join(x.pattern for x in regexes),
@@ -1022,7 +1024,8 @@
interwikiR = re.compile(r'\[\[(%s)\s?:[^\[\]\n]*\]\][\s]*'
% languages, re.IGNORECASE)
text = replaceExcept(text, interwikiR, '',
- ['nowiki', 'comment', 'math', 'pre', 'source'],
+ ['comment', 'math', 'nowiki', 'pre',
+ 'syntaxhighlight'],
marker=marker,
site=site)
return text.strip()
@@ -1300,8 +1303,8 @@
catNamespace = '|'.join(site.namespaces.CATEGORY)
categoryR = re.compile(r'\[\[\s*(%s)\s*:.*?\]\]\s*' % catNamespace, re.I)
text = replaceExcept(text, categoryR, '',
- ['nowiki', 'comment', 'math', 'pre', 'source',
- 'includeonly'],
+ ['comment', 'includeonly', 'math', 'nowiki', 'pre',
+ 'syntaxhighlight'],
marker=marker,
site=site)
if marker:
@@ -1369,7 +1372,7 @@
r'^[^\S\n]*\[\[\s*(%s)\s*:\s*%s[\s\u200e\u200f]*'
r'((?:\|[^]]+)?\]\])[^\S\n]*\n'
% (catNamespace, title), re.I | re.M)
- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source']
+ exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight']
if newcat is None:
# First go through and try the more restrictive regex that removes
# an entire line, if the category is the only thing on that line (this
diff --git a/scripts/category.py b/scripts/category.py
index ae695be..c1d8820 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -471,7 +471,8 @@
text = textlib.replaceExcept(
text, tagnameregexp,
r'\1{0}\n\2'.format(categorytitle),
- ['nowiki', 'comment', 'math', 'pre', 'source'],
+ ['comment', 'math', 'nowiki', 'pre',
+ 'syntaxhighlight'],
site=self.current_page.site)
else:
if self.includeonly == ['includeonly']:
diff --git a/scripts/table2wiki.py b/scripts/table2wiki.py
index 925daa4..8c81c7c 100644
--- a/scripts/table2wiki.py
+++ b/scripts/table2wiki.py
@@ -387,7 +387,7 @@
Mark all table start and end tags that are not disabled by nowiki tags,
comments etc. We will then later only work on these marked tags.
"""
- exceptions = ['comment', 'math', 'nowiki', 'pre', 'source']
+ exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight']
text = replaceExcept(text, _table_start_regex, '<##table##',
exceptions=exceptions)
text = replaceExcept(text, _table_end_regex, '</##table##>',
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index 0df9bc9..77b3bd2 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -1235,18 +1235,20 @@
'x', 'y', ['source'],
site=self.site),
'<source lang="xml">x</source>')
- self.assertEqual(textlib.replaceExcept('<source>x</source>',
- 'x', 'y', ['source'],
- site=self.site),
- '<source>x</source>')
- self.assertEqual(textlib.replaceExcept(
- '<syntaxhighlight lang="xml">x</syntaxhighlight>',
- 'x', 'y', ['source'], site=self.site),
- '<syntaxhighlight lang="xml">x</syntaxhighlight>')
self.assertEqual(
textlib.replaceExcept('<syntaxhighlight>x</syntaxhighlight>',
'x', 'y', ['source'], site=self.site),
'<syntaxhighlight>x</syntaxhighlight>')
+ self.assertEqual(
+ textlib.replaceExcept(
+ '<syntaxhighlight lang="xml">x</syntaxhighlight>',
+ 'x', 'y', ['source'], site=self.site),
+ '<syntaxhighlight lang="xml">x</syntaxhighlight>')
+ self.assertEqual(
+ textlib.replaceExcept('<source>x</source>',
+ 'x', 'y', ['syntaxhighlight'],
+ site=self.site),
+ '<source>x</source>')
self.assertEqual(textlib.replaceExcept('<includeonly>x</includeonly>',
'x', 'y', ['includeonly'],
site=self.site),

To view, visit change 636004. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I1db4173c51bad8052fe60049ce17279cc5d65322
Gerrit-Change-Number: 636004
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: JAn Dudík <jan.dudik@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged