jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942668 )
Change subject: [IMPR] use inline re.IGNORECASE flag in textlib.case_escape function ......................................................................
[IMPR] use inline re.IGNORECASE flag in textlib.case_escape function
- use inline re.IGNORECASE flag for the first letter of string argument - add underscore parameter to detect interchangeable and collapsible spaces/underscores in string - use underscore parameter within scripts
Bug: T308265 Change-Id: I58df8260db97c45cde6e959ada7e5a8acc959d79 --- M pywikibot/textlib.py M scripts/image.py M scripts/delinker.py 3 files changed, 31 insertions(+), 15 deletions(-)
Approvals: Matěj Suchánek: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 39f8d50..b8ac31f 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -163,19 +163,24 @@ return phrase
-def case_escape(case: str, string: str) -> str: +def case_escape(case: str, string: str, *, underscore: bool = False) -> str: """Return an escaped regex pattern which depends on 'first-letter' case.
.. versionadded:: 7.0 + .. versionchanged:: 8.4 + Added the optional *underscore* parameter.
- :param case: if `case` is 'first-letter' the regex contains an - upper/lower case set for the first letter + :param case: if `case` is 'first-letter', the regex contains an + inline re.IGNORECASE flag for the first letter + :param underscore: if True, expand the regex to detect spaces and + underscores which are interchangeable and collapsible """ - first = string[0] - if first.isalpha() and case == 'first-letter': - pattern = f'[{first.upper()}{first.lower()}]{re.escape(string[1:])}' + if case == 'first-letter': + pattern = f'(?i:{string[:1]}){re.escape(string[1:])}' else: pattern = re.escape(string) + if underscore: + pattern = re.sub(r'_|\ ', '[_ ]+', pattern) return pattern
@@ -1557,9 +1562,7 @@ return oldtext
# title might contain regex special characters - title = case_escape(site.namespaces[14].case, title) - # spaces and underscores in page titles are interchangeable and collapsible - title = title.replace(r'\ ', '[ _]+').replace(r'_', '[ _]+') + title = case_escape(site.namespaces[14].case, title, underscore=True) categoryR = re.compile(r'[[\s*({})\s*:\s*{}[\s\u200e\u200f]*' r'((?:|[^]]+)?]])' .format(catNamespace, title), re.I) diff --git a/scripts/delinker.py b/scripts/delinker.py index 6282cd6..4d3d0b4 100755 --- a/scripts/delinker.py +++ b/scripts/delinker.py @@ -100,9 +100,9 @@ """Set page to current page and delink that page.""" # use image_regex from image.py namespace = file_page.site.namespaces[6] - escaped = case_escape(namespace.case, file_page.title(with_ns=False)) - # Be careful, spaces and _ have been converted to '\ ' and '_' - escaped = re.sub('\\[_ ]', '[_ ]', escaped) + escaped = case_escape(namespace.case, + file_page.title(with_ns=False), + underscore=True) self.image_regex = re.compile( r'[[ *(?:{})\s*:\s*{} *(?P<parameters>|' r'(?:[^[]]|[[[^]]+]]|[[^]]+])*|) *]]' diff --git a/scripts/image.py b/scripts/image.py index 1b5d05d..167fa78 100755 --- a/scripts/image.py +++ b/scripts/image.py @@ -85,10 +85,8 @@ param)
namespace = self.site.namespaces[6] - escaped = case_escape(namespace.case, self.old_image) + escaped = case_escape(namespace.case, self.old_image, underscore=True)
- # Be careful, spaces and _ have been converted to '\ ' and '_' - escaped = re.sub('\\[_ ]', '[_ ]', escaped) if not self.opt.loose or not self.new_image: image_regex = re.compile( r'[[ *(?:{})\s*:\s*{} *(?P<parameters>|'
pywikibot-commits@lists.wikimedia.org