Xqt has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/871293 )
Change subject: Use re.fullmatch instead of $ at the end of regex ......................................................................
Use re.fullmatch instead of $ at the end of regex
Change-Id: I4da10e242518c69d86416e4682de260be905f5d9 --- M pywikibot/tools/__init__.py M tests/uploadscript_tests.py M pywikibot/family.py M scripts/upload.py M pywikibot/time.py M pywikibot/site/_siteinfo.py M scripts/dataextend.py M scripts/reflinks.py 8 files changed, 49 insertions(+), 38 deletions(-)
Approvals: Xqt: Verified; Looks good to me, approved
diff --git a/pywikibot/family.py b/pywikibot/family.py index 7f57190..981250b 100644 --- a/pywikibot/family.py +++ b/pywikibot/family.py @@ -6,7 +6,6 @@ # import collections import logging -import re import string import sys import types @@ -555,7 +554,7 @@ which would work with the given URL. """ parsed = urlparse.urlparse(url) - if not re.match('(https?)?$', parsed.scheme): + if parsed.scheme not in {'http', 'https', ''}: return None
path = parsed.path diff --git a/pywikibot/site/_siteinfo.py b/pywikibot/site/_siteinfo.py index 3e3afac..4eb1293 100644 --- a/pywikibot/site/_siteinfo.py +++ b/pywikibot/site/_siteinfo.py @@ -29,7 +29,7 @@ """
WARNING_REGEX = re.compile(r'Unrecognized values? for parameter ' - r'["']siprop["']: (.+?).?$') + r'["']siprop["']: (.+?).?')
# Until we get formatversion=2, we have to convert empty-string properties # into booleans so they are easier to use. @@ -139,7 +139,7 @@ """ def warn_handler(mod, message) -> bool: """Return True if the warning is handled.""" - matched = Siteinfo.WARNING_REGEX.match(message) + matched = Siteinfo.WARNING_REGEX.fullmatch(message) if mod == 'siteinfo' and matched: invalid_properties.extend( prop.strip() for prop in matched[1].split(',')) diff --git a/pywikibot/time.py b/pywikibot/time.py index e96a5f7..f760456 100644 --- a/pywikibot/time.py +++ b/pywikibot/time.py @@ -114,8 +114,8 @@
.. versionadded:: 7.5 """ - RE_MW = r'\d{14}$' # noqa: N806 - m = re.match(RE_MW, timestr) + RE_MW = r'\d{14}' # noqa: N806 + m = re.fullmatch(RE_MW, timestr)
if not m: msg = "time data '{timestr}' does not match MW format." @@ -134,9 +134,9 @@ """ RE_ISO8601 = (r'(?:\d{4}-\d{2}-\d{2})(?P<sep>[T ])' # noqa: N806 r'(?:\d{2}:\d{2}:\d{2})(?P<u>[.,]\d{1,6})?' - r'(?P<tz>Z|[+-]\d{2}:?\d{,2})?$' + r'(?P<tz>Z|[+-]\d{2}:?\d{,2})?' ) - m = re.match(RE_ISO8601, timestr) + m = re.fullmatch(RE_ISO8601, timestr)
if not m: msg = "time data '{timestr}' does not match ISO8601 format." @@ -174,8 +174,8 @@
.. versionadded:: 7.5 """ - RE_POSIX = r'(?P<S>-?\d{1,13})(?:.(?P<u>\d{1,6}))?$' # noqa: N806 - m = re.match(RE_POSIX, timestr) + RE_POSIX = r'(?P<S>-?\d{1,13})(?:.(?P<u>\d{1,6}))?' # noqa: N806 + m = re.fullmatch(RE_POSIX, timestr)
if not m: msg = "time data '{timestr}' does not match POSIX format." diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index 902653e..1c06290 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -438,7 +438,7 @@ """
MEDIAWIKI_VERSION = re.compile( - r'(\d+(?:.\d+)+)(-?wmf.?(\d+)|alpha|beta(\d+)|-?rc.?(\d+)|.*)?$') + r'(\d+(?:.\d+)+)(-?wmf.?(\d+)|alpha|beta(\d+)|-?rc.?(\d+)|.*)?')
def __init__(self, version_str: str) -> None: """ @@ -449,7 +449,8 @@ self._parse(version_str)
def _parse(self, version_str: str) -> None: - version_match = MediaWikiVersion.MEDIAWIKI_VERSION.match(version_str) + version_match = MediaWikiVersion.MEDIAWIKI_VERSION.fullmatch( + version_str)
if not version_match: raise ValueError(f'Invalid version number "{version_str}"') diff --git a/scripts/dataextend.py b/scripts/dataextend.py index fbde614..7f326bc 100644 --- a/scripts/dataextend.py +++ b/scripts/dataextend.py @@ -92,8 +92,8 @@
"""The Bot."""
- QRE = re.compile(r'Q\d+$') - PQRE = re.compile(r'[PQ]\d+$') + QRE = re.compile(r'Q\d+') + PQRE = re.compile(r'[PQ]\d+')
def __init__(self, **kwargs): """Initializer.""" @@ -358,7 +358,7 @@ return self.showtime(self.createdateclaim(title[6:])) if title.startswith('!q!'): return title[3:] - if not self.PQRE.match(title): + if not self.PQRE.fullmatch(title): return title
if title in self.labels: @@ -593,33 +593,34 @@ year = int(m[1]) month = int(m[2]) day = int(m[3]) - if re.match(r'\d{,4}(?:年頃|.)?$', text): - year = int(text) + m = re.fullmatch(r'(\d{1,4})(?:年頃|.)?', text) + if m: + year = int(m[1]) month = None day = None - if re.match(r'(?:1\d{3}|20[01]\d)[01]\d[0123]\d$', text): + if re.fullmatch(r'(?:1\d{3}|20[01]\d)[01]\d[0123]\d', text): year = int(text[:4]) month = int(text[4:6]) day = int(text[6:]) - if re.match(r'\d{4}-\d{2}$', text): + if re.fullmatch(r'\d{4}-\d{2}', text): year = int(text[:4]) month = int(text[-2:]) m = re.match(r'(\d{1,2})[-/](\d{4})', text) if m: year = int(m[2]) month = int(m[1]) - m = re.match(r'(\d+)[-./|](\d{1,2})[-./|](\d{1,2})$', text) + m = re.fullmatch(r'(\d+)[-./|](\d{1,2})[-./|](\d{1,2})', text) if m: year = int(m[1]) month = int(m[2]) day = int(m[3]) - m = re.match( - r'(\d{1,2})[-./|]\s*(\d{1,2})[-./|]\s*(\d{3,4}).?$', text) + m = re.fullmatch( + r'(\d{1,2})[-./|]\s*(\d{1,2})[-./|]\s*(\d{3,4}).?', text) if m: year = int(m[3]) month = int(m[2]) day = int(m[1]) - m = re.match(r'(\d{1,2})[-./\s]([iIvVxX]+)[-./\s](\d{4})$', text) + m = re.fullmatch(r'(\d{1,2})[-./\s]([iIvVxX]+)[-./\s](\d{4})', text) if m: year = int(m[3]) try: @@ -627,8 +628,8 @@ except KeyError: raise ValueError(f"Don't know month {m[2]}") day = int(m[1]) - m = re.match(r"(\d+)(?:.|er|eme|ème)?[\s.]\s*(?:d'|d[aei] )?" - r'([^\s.]{2,}).?[\s.]\s*(\d+)$', text) + m = re.fullmatch(r"(\d+)(?:.|er|eme|ème)?[\s.]\s*(?:d'|d[aei] )?" + r'([^\s.]{2,}).?[\s.]\s*(\d+)', text) if m: year = int(m[3]) try: @@ -636,7 +637,8 @@ except KeyError: raise ValueError(f"Don't know month {m[2]}") day = int(m[1]) - m = re.match(r'(\d{4}).?[\s.]\s*([^\s.]{3,}).?[\s.]\s*(\d+)$', text) + m = re.fullmatch( + r'(\d{4}).?[\s.]\s*([^\s.]{3,}).?[\s.]\s*(\d+)', text) if m: year = int(m[1]) try: @@ -652,15 +654,15 @@ except KeyError: raise ValueError(f"Don't know month {m[2]}") day = int(m[1]) - m = re.match(r'(\w*[a-zA-Z]\w*).? (\d+)$', text) + m = re.fullmatch(r'(\w*[a-zA-Z]\w*).? (\d+)', text) if m: year = int(m[2]) try: month = self.MONTHNUMBER[m[1].lower()] except KeyError: raise ValueError(f"Don't know month {m[1]}") - m = re.match(r'(\w+).? (\d{1,2})(?:st|nd|rd|th)?.?\s*,\s*(\d{3,4})$', - text) + m = re.fullmatch( + r'(\w+).? (\d{1,2})(?:st|nd|rd|th)?.?\s*,\s*(\d{3,4})', text) if m: year = int(m[3]) try: @@ -681,7 +683,7 @@ year = int(m[1]) month = int(m[2]) day = int(m[3]) - m = re.match(r'(\d+)年$', text) + m = re.fullmatch(r'(\d+)年', text) if m: year = int(m[1]) if day == 0: @@ -868,7 +870,7 @@
createdclaim = pywikibot.Claim(self.site, claim[0])
- if self.QRE.match(claim[1]): + if self.QRE.fullmatch(claim[1]): createdclaim.setTarget(pywikibot.ItemPage( self.site, claim[1]))
diff --git a/scripts/reflinks.py b/scripts/reflinks.py index eb673b2..2647f67 100755 --- a/scripts/reflinks.py +++ b/scripts/reflinks.py @@ -124,10 +124,10 @@ # matches an URL at the index of a website dirIndex = re.compile( r'\w+://[^/]+/((default|index).' - r'(asp|aspx|cgi|htm|html|phtml|mpx|mspx|php|shtml|var))?$', + r'(asp|aspx|cgi|htm|html|phtml|mpx|mspx|php|shtml|var))?', re.IGNORECASE) # Extracts the domain name -domain = re.compile(r'^(\w+)://(?:www.|)([^/]+)') +domain = re.compile(r'^(\w+)://(?:www.|)([^/]+)')
globalbadtitles = r""" # is @@ -601,8 +601,8 @@ f'Redirect 404 : {ref.link} ') continue
- if dirIndex.match(redir) \ - and not dirIndex.match(ref.link): + if dirIndex.fullmatch(redir) \ + and not dirIndex.fullmatch(ref.link): pywikibot.info(f'<<lightyellow>>WARNING<<default>> : ' f'Redirect to root : {ref.link} ') continue diff --git a/scripts/upload.py b/scripts/upload.py index 1812b4a..fd1944b 100755 --- a/scripts/upload.py +++ b/scripts/upload.py @@ -67,7 +67,7 @@
CHUNK_SIZE_REGEX = re.compile( - r'-chunked(?::(\d+(?:.\d+)?)[ \t]*(k|ki|m|mi)?b?)?$', re.I) + r'-chunked(?::(\d+(?:.\d+)?)[ \t]*(k|ki|m|mi)?b?)?', re.I)
def get_chunk_size(match) -> int: @@ -150,7 +150,7 @@ else: ignorewarn = True elif arg == '-chunked': - match = CHUNK_SIZE_REGEX.match(option) + match = CHUNK_SIZE_REGEX.fullmatch(option) chunk_size = get_chunk_size(match) elif arg == '-async': asynchronous = True diff --git a/tests/uploadscript_tests.py b/tests/uploadscript_tests.py index 89c7309..aa0441e 100755 --- a/tests/uploadscript_tests.py +++ b/tests/uploadscript_tests.py @@ -21,7 +21,7 @@ option = '-chunked' if value: option += ':' + value - match = CHUNK_SIZE_REGEX.match(option) + match = CHUNK_SIZE_REGEX.fullmatch(option) return get_chunk_size(match)