jenkins-bot has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/755931 )
Change subject: [IMPR] add a new function case_escape to textlib
......................................................................
[IMPR] add a new function case_escape to textlib
case_escape return an escaped regex pattern which takes namespace.case
into account. Use the new function in
- MultiTemplateMatchBuilder
- replaceCategoryInPlace
- archivebot.py
- image.py
Also improve image.py script
Change-Id: I73735616310187290dd3ceac34e80689ce51ee30
---
M pywikibot/textlib.py
M scripts/archivebot.py
M scripts/image.py
3 files changed, 46 insertions(+), 50 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index bd26f10..7875f4e 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -148,6 +148,24 @@
return phrase
+def case_escape(case: str, string: str) -> str:
+ """Return an escaped regex pattern which depends on
'first-letter' case.
+
+ .. versionadded:: 7.0
+
+ :param case: if `case` is 'first-letter' the regex contains an
+ upper/lower case set for the first letter
+ """
+ first = string[0]
+ if first.isalpha() and case == 'first-letter':
+ pattern = '[{}{}]{}'.format(first.upper(),
+ first.lower(),
+ re.escape(string[1:]))
+ else:
+ pattern = re.escape(string)
+ return pattern
+
+
class MultiTemplateMatchBuilder:
"""Build template matcher."""
@@ -174,12 +192,7 @@
raise ValueError(
'{!r} is not a valid template'.format(template))
- if namespace.case == 'first-letter':
- pattern = '[{}{}]{}'.format(re.escape(old[0].upper()),
- re.escape(old[0].lower()),
- re.escape(old[1:]))
- else:
- pattern = re.escape(old)
+ pattern = case_escape(namespace.case, old)
# namespaces may be any mixed case
namespaces = [_ignore_case(ns) for ns in namespace]
namespaces.append(_ignore_case('msg'))
@@ -1395,11 +1408,9 @@
title = oldcat.title(with_ns=False)
if not title:
return oldtext
+
# title might contain regex special characters
- title = re.escape(title)
- # title might not be capitalized correctly on the wiki
- if title[0].isalpha() and site.namespaces[14].case == 'first-letter':
- title = '[{}{}]'.format(title[0].upper(), title[0].lower()) + title[1:]
+ title = case_escape(site.namespaces[14].case, title)
# spaces and underscores in page titles are interchangeable and collapsible
title = title.replace(r'\ ', '[ _]+').replace(r'\_', '[
_]+')
categoryR = re.compile(r'\[\[\s*({})\s*:\s*{}[\s\u200e\u200f]*'
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index 5c12659..adf23c1 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -88,7 +88,7 @@
-salt:SALT specify salt
"""
#
-# (C) Pywikibot team, 2006-2021
+# (C) Pywikibot team, 2006-2022
#
# Distributed under the terms of the MIT license.
#
@@ -111,9 +111,10 @@
from pywikibot.date import apply_month_delta
from pywikibot.exceptions import Error, NoPageError
from pywikibot.textlib import (
- TimeStripper,
+ case_escape,
extract_sections,
findmarker,
+ TimeStripper,
to_local_digits,
)
@@ -297,12 +298,7 @@
ns = tpl_page.site.namespaces[tpl_page.namespace()]
marker = '?' if ns.id == 10 else ''
title = tpl_page.title(with_ns=False)
- if ns.case != 'case-sensitive':
- title = '[{}{}]{}'.format(re.escape(title[0].upper()),
- re.escape(title[0].lower()),
- re.escape(title[1:]))
- else:
- title = re.escape(title)
+ title = case_escape(ns.case, title)
return re.compile(r'(?:(?:%s):)%s%s' % ('|'.join(ns), marker,
title))
diff --git a/scripts/image.py b/scripts/image.py
index 73f417a..4eff8f7 100755
--- a/scripts/image.py
+++ b/scripts/image.py
@@ -37,16 +37,17 @@
"""
#
-# (C) Pywikibot team, 2013-2021
+# (C) Pywikibot team, 2013-2022
#
# Distributed under the terms of the MIT license.
#
import re
-from typing import Optional
import pywikibot
from pywikibot import i18n, pagegenerators
from pywikibot.bot import SingleSiteBot
+from pywikibot.textlib import case_escape
+
from scripts.replace import ReplaceRobot as ReplaceBot
@@ -55,7 +56,7 @@
"""This bot will replace or remove all occurrences of an old
image."""
def __init__(self, generator, old_image: str,
- new_image: Optional[str] = None, **kwargs):
+ new_image: str = '', **kwargs) -> None:
"""
Initializer.
@@ -85,12 +86,7 @@
param)
namespace = self.site.namespaces[6]
- if namespace.case == 'first-letter':
- case = re.escape(self.old_image[0].upper()
- + self.old_image[0].lower())
- escaped = '[' + case + ']' + re.escape(self.old_image[1:])
- else:
- escaped = re.escape(self.old_image)
+ escaped = case_escape(namespace.case, self.old_image)
# Be careful, spaces and _ have been converted to '\ ' and '\_'
escaped = re.sub('\\\\[_ ]', '[_ ]', escaped)
@@ -102,17 +98,14 @@
image_regex = re.compile(r'' + escaped)
replacements = []
- if self.new_image:
- if not self.opt.loose:
- replacements.append((image_regex,
- '[[{}:{}\\g<parameters>]]'
- .format(
- self.site.namespaces.FILE.custom_name,
- self.new_image)))
- else:
- replacements.append((image_regex, self.new_image))
+ if not self.opt.loose and self.new_image:
+ replacements.append((image_regex,
+ '[[{}:{}\\g<parameters>]]'
+ .format(
+ self.site.namespaces.FILE.custom_name,
+ self.new_image)))
else:
- replacements.append((image_regex, ''))
+ replacements.append((image_regex, self.new_image))
super().__init__(generator, replacements,
always=self.opt.always,
@@ -128,21 +121,17 @@
:param args: command line arguments
"""
- old_image = None
- new_image = None
+ old_image = ''
+ new_image = ''
options = {}
- for arg in pywikibot.handle_args(args):
- if arg == '-always':
- options['always'] = True
- elif arg == '-loose':
- options['loose'] = True
- elif arg.startswith('-summary'):
- if len(arg) == len('-summary'):
- options['summary'] = pywikibot.input(
- 'Choose an edit summary: ')
- else:
- options['summary'] = arg[len('-summary:'):]
+ for argument in pywikibot.handle_args(args):
+ arg, _, value = argument.partition(':')
+ if arg in ('-always', '-loose'):
+ options[arg[1:]] = True
+ elif arg == '-summary':
+ options[arg[1:]] = value or pywikibot.input(
+ 'Choose an edit summary: ')
elif old_image:
new_image = arg
else:
--
To view, visit
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/755931
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I73735616310187290dd3ceac34e80689ce51ee30
Gerrit-Change-Number: 755931
Gerrit-PatchSet: 6
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged