jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/755931 )
Change subject: [IMPR] add a new function case_escape to textlib ......................................................................
[IMPR] add a new function case_escape to textlib
case_escape return an escaped regex pattern which takes namespace.case into account. Use the new function in
- MultiTemplateMatchBuilder - replaceCategoryInPlace - archivebot.py - image.py
Also improve image.py script
Change-Id: I73735616310187290dd3ceac34e80689ce51ee30 --- M pywikibot/textlib.py M scripts/archivebot.py M scripts/image.py 3 files changed, 46 insertions(+), 50 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index bd26f10..7875f4e 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -148,6 +148,24 @@ return phrase
+def case_escape(case: str, string: str) -> str: + """Return an escaped regex pattern which depends on 'first-letter' case. + + .. versionadded:: 7.0 + + :param case: if `case` is 'first-letter' the regex contains an + upper/lower case set for the first letter + """ + first = string[0] + if first.isalpha() and case == 'first-letter': + pattern = '[{}{}]{}'.format(first.upper(), + first.lower(), + re.escape(string[1:])) + else: + pattern = re.escape(string) + return pattern + + class MultiTemplateMatchBuilder:
"""Build template matcher.""" @@ -174,12 +192,7 @@ raise ValueError( '{!r} is not a valid template'.format(template))
- if namespace.case == 'first-letter': - pattern = '[{}{}]{}'.format(re.escape(old[0].upper()), - re.escape(old[0].lower()), - re.escape(old[1:])) - else: - pattern = re.escape(old) + pattern = case_escape(namespace.case, old) # namespaces may be any mixed case namespaces = [_ignore_case(ns) for ns in namespace] namespaces.append(_ignore_case('msg')) @@ -1395,11 +1408,9 @@ title = oldcat.title(with_ns=False) if not title: return oldtext + # title might contain regex special characters - title = re.escape(title) - # title might not be capitalized correctly on the wiki - if title[0].isalpha() and site.namespaces[14].case == 'first-letter': - title = '[{}{}]'.format(title[0].upper(), title[0].lower()) + title[1:] + title = case_escape(site.namespaces[14].case, title) # spaces and underscores in page titles are interchangeable and collapsible title = title.replace(r'\ ', '[ _]+').replace(r'_', '[ _]+') categoryR = re.compile(r'[[\s*({})\s*:\s*{}[\s\u200e\u200f]*' diff --git a/scripts/archivebot.py b/scripts/archivebot.py index 5c12659..adf23c1 100755 --- a/scripts/archivebot.py +++ b/scripts/archivebot.py @@ -88,7 +88,7 @@ -salt:SALT specify salt """ # -# (C) Pywikibot team, 2006-2021 +# (C) Pywikibot team, 2006-2022 # # Distributed under the terms of the MIT license. # @@ -111,9 +111,10 @@ from pywikibot.date import apply_month_delta from pywikibot.exceptions import Error, NoPageError from pywikibot.textlib import ( - TimeStripper, + case_escape, extract_sections, findmarker, + TimeStripper, to_local_digits, )
@@ -297,12 +298,7 @@ ns = tpl_page.site.namespaces[tpl_page.namespace()] marker = '?' if ns.id == 10 else '' title = tpl_page.title(with_ns=False) - if ns.case != 'case-sensitive': - title = '[{}{}]{}'.format(re.escape(title[0].upper()), - re.escape(title[0].lower()), - re.escape(title[1:])) - else: - title = re.escape(title) + title = case_escape(ns.case, title)
return re.compile(r'(?:(?:%s):)%s%s' % ('|'.join(ns), marker, title))
diff --git a/scripts/image.py b/scripts/image.py index 73f417a..4eff8f7 100755 --- a/scripts/image.py +++ b/scripts/image.py @@ -37,16 +37,17 @@
""" # -# (C) Pywikibot team, 2013-2021 +# (C) Pywikibot team, 2013-2022 # # Distributed under the terms of the MIT license. # import re -from typing import Optional
import pywikibot from pywikibot import i18n, pagegenerators from pywikibot.bot import SingleSiteBot +from pywikibot.textlib import case_escape + from scripts.replace import ReplaceRobot as ReplaceBot
@@ -55,7 +56,7 @@ """This bot will replace or remove all occurrences of an old image."""
def __init__(self, generator, old_image: str, - new_image: Optional[str] = None, **kwargs): + new_image: str = '', **kwargs) -> None: """ Initializer.
@@ -85,12 +86,7 @@ param)
namespace = self.site.namespaces[6] - if namespace.case == 'first-letter': - case = re.escape(self.old_image[0].upper() - + self.old_image[0].lower()) - escaped = '[' + case + ']' + re.escape(self.old_image[1:]) - else: - escaped = re.escape(self.old_image) + escaped = case_escape(namespace.case, self.old_image)
# Be careful, spaces and _ have been converted to '\ ' and '_' escaped = re.sub('\\[_ ]', '[_ ]', escaped) @@ -102,17 +98,14 @@ image_regex = re.compile(r'' + escaped)
replacements = [] - if self.new_image: - if not self.opt.loose: - replacements.append((image_regex, - '[[{}:{}\g<parameters>]]' - .format( - self.site.namespaces.FILE.custom_name, - self.new_image))) - else: - replacements.append((image_regex, self.new_image)) + if not self.opt.loose and self.new_image: + replacements.append((image_regex, + '[[{}:{}\g<parameters>]]' + .format( + self.site.namespaces.FILE.custom_name, + self.new_image))) else: - replacements.append((image_regex, '')) + replacements.append((image_regex, self.new_image))
super().__init__(generator, replacements, always=self.opt.always, @@ -128,21 +121,17 @@
:param args: command line arguments """ - old_image = None - new_image = None + old_image = '' + new_image = '' options = {}
- for arg in pywikibot.handle_args(args): - if arg == '-always': - options['always'] = True - elif arg == '-loose': - options['loose'] = True - elif arg.startswith('-summary'): - if len(arg) == len('-summary'): - options['summary'] = pywikibot.input( - 'Choose an edit summary: ') - else: - options['summary'] = arg[len('-summary:'):] + for argument in pywikibot.handle_args(args): + arg, _, value = argument.partition(':') + if arg in ('-always', '-loose'): + options[arg[1:]] = True + elif arg == '-summary': + options[arg[1:]] = value or pywikibot.input( + 'Choose an edit summary: ') elif old_image: new_image = arg else:
pywikibot-commits@lists.wikimedia.org