jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/433916 )
Change subject: [IMPR] Merge duplicate regexes dicts
......................................................................
[IMPR] Merge duplicate regexes dicts
Bug: T195026
Change-Id: I0c75e66598c502b4f6f5df737ebc8a770db6e966
---
M pywikibot/textlib.py
1 file changed, 27 insertions(+), 26 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 9ba2b5f..5f0f411 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -305,7 +305,8 @@
# which may not yet have a site specific re compiled.
if exc in _regex_cache:
if type(_regex_cache[exc]) is tuple:
- if not site:
+ if not site and exc in ('interwiki', 'property',
'invoke',
+ 'category', 'file'):
issue_deprecation_warning(
'site=None', 'a valid site', 3)
site = pywikibot.Site()
@@ -452,37 +453,37 @@
return text
-def removeDisabledParts(text, tags=['*'], include=[]):
+def removeDisabledParts(text, tags=None, include=[], site=None):
"""
Return text without portions where wiki markup is disabled.
- Parts that can/will be removed are --
+ Parts that will be removed by default are
* HTML comments
* nowiki tags
* pre tags
* includeonly tags
+ * source and syntaxhighlight tags
- The exact set of parts which should be removed can be passed as the
- 'tags' parameter, which defaults to all.
- Or, in alternative, default parts that shall not be removed can be
- specified in the 'include' param.
+ @param tags: The exact set of parts which should be removed using
+ keywords from textlib._get_regexes().
+ @type tags: list, set, tuple or None
+ @param include: Or, in alternative, default parts that shall not
+ be removed.
+ @type include: list, set or tuple
+
+ @param site: Site to be used for site-dependent regexes. Default
+ disabled parts listed above do not need it.
+ @type site: pywikibot.Site
+
+ @return: text stripped from disabled parts.
+ @rtype: str
"""
- regexes = {
- 'comments': r'<!--.*?-->',
- 'includeonly':
r'<includeonly\s*>.*?</includeonly\s*>',
- 'nowiki': r'<nowiki\s*>.*?</nowiki\s*>',
- 'pre': r'<pre\s*>.*?</pre\s*>',
- 'source': r'<source[ >].*?</source\s*>',
- 'syntaxhighlight': r'<syntaxhighlight[
>].*?</syntaxhighlight\s*>',
- }
- if '*' in tags:
- tags = list(regexes.keys())
- # add alias
+ if not tags:
+ tags = ('comment', 'includeonly', 'nowiki',
'pre', 'source')
tags = set(tags) - set(include)
- if 'source' in tags:
- tags.add('syntaxhighlight')
- toRemoveR = re.compile('|'.join([regexes[tag] for tag in tags]),
+ regexes = _get_regexes(tags, site)
+ toRemoveR = re.compile('|'.join(x.pattern for x in regexes),
re.IGNORECASE | re.DOTALL)
return toRemoveR.sub('', text)
@@ -525,7 +526,7 @@
self.textdata += u"</%s>" % tag
-def isDisabled(text, index, tags=['*']):
+def isDisabled(text, index, tags=None):
"""
Return True if text[index] is disabled, e.g. by a comment or nowiki tags.
@@ -861,10 +862,10 @@
result = {}
# Ignore interwiki links within nowiki tags, includeonly tags, pre tags,
# and HTML comments
- tags = ['comments', 'nowiki', 'pre', 'source']
- if not template_subpage:
- tags += ['includeonly']
- text = removeDisabledParts(text, tags)
+ include = []
+ if template_subpage:
+ include = ['includeonly']
+ text = removeDisabledParts(text, include=include)
# This regular expression will find every link that is possibly an
# interwiki link.
--
To view, visit
https://gerrit.wikimedia.org/r/433916
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I0c75e66598c502b4f6f5df737ebc8a770db6e966
Gerrit-Change-Number: 433916
Gerrit-PatchSet: 8
Gerrit-Owner: Dvorapa <dvorapa(a)seznam.cz>
Gerrit-Reviewer: Dalba <dalba.wiki(a)gmail.com>
Gerrit-Reviewer: Dvorapa <dvorapa(a)seznam.cz>
Gerrit-Reviewer: Framawiki <framawiki(a)tools.wmflabs.org>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: Zoranzoki21 <zorandori4444(a)gmail.com>
Gerrit-Reviewer: jenkins-bot <>