jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/836138 )
Change subject: [IMPR] Simplify access to Match object ......................................................................
[IMPR] Simplify access to Match object
With Python 3.6 Match object has a __getitem__ method.
Change-Id: I2e3e4073d317eb23b7ba040642e82edfbe471cdd --- M docs/conf.py M docs/recipes.rst M pywikibot/comms/http.py M pywikibot/cosmetic_changes.py M pywikibot/data/api/_login.py M pywikibot/data/api/_requests.py M pywikibot/date.py M pywikibot/i18n.py M pywikibot/page/_links.py M pywikibot/page/_pages.py M pywikibot/pagegenerators/_generators.py M pywikibot/proofreadpage.py M pywikibot/site/_apisite.py M pywikibot/site/_siteinfo.py M pywikibot/textlib.py M pywikibot/time.py M pywikibot/tools/__init__.py M pywikibot/tools/_deprecate.py M pywikibot/tools/djvu.py M pywikibot/tools/threading.py M pywikibot/xmlreader.py M scripts/archivebot.py M scripts/category.py M scripts/checkimages.py M scripts/commonscat.py M scripts/dataextend.py M scripts/fixing_redirects.py M scripts/harvest_template.py M scripts/misspelling.py M scripts/noreferences.py M scripts/pagefromfile.py M scripts/redirect.py M scripts/reflinks.py M scripts/solve_disambiguation.py M scripts/upload.py M scripts/weblinkchecker.py M tests/api_tests.py M tests/aspects.py M tests/textlib_tests.py M tests/timestripper_tests.py 40 files changed, 218 insertions(+), 225 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/docs/conf.py b/docs/conf.py index 3aabf0c..55fdafc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -447,7 +447,7 @@ # Indent options match = re.match(r'-[^ ]+? +', line) if match: - length = len(match.group(0)) + length = len(match[0]) lines[index] = ' ' + line elif length and line.startswith(' ' * length): # Indent descriptions of options (as options are indented) diff --git a/docs/recipes.rst b/docs/recipes.rst index 76e5b84..5e335ed 100644 --- a/docs/recipes.rst +++ b/docs/recipes.rst @@ -17,7 +17,7 @@
site = pywikibot.Site('wikipedia:en') # create a Site object page = pywikibot.Page(site, 'Deep learning') # create a Page object sect = textlib.extract_sections(page.text, site) # divide content into sections
- >>> links = sorted(link.group('title') for link in pywikibot.link_regex.finditer(sect.head)) + >>> links = sorted(link['title'] for link in pywikibot.link_regex.finditer(sect.header))
pages = [pywikibot.Page(site, title) for title in links]
``links`` is a list containing all link titles in alphabethical order. diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py index 8b492f6..48f74c8 100644 --- a/pywikibot/comms/http.py +++ b/pywikibot/comms/http.py @@ -410,7 +410,7 @@ m = CHARSET_RE.search(content_type) if not m: return None - charset = m.group('charset').strip('"' ').lower() + charset = m['charset'].strip('"' ').lower() # Convert to python correct encoding names if re.sub(r'[ _-]', '', charset) == 'xeucjp': charset = 'euc_jp' @@ -443,7 +443,7 @@ m = re.search( br'encoding=(["'])(?P<encoding>.+?)\1', header) if m: - header_encoding = m.group('encoding').decode('utf-8') + header_encoding = m['encoding'].decode('utf-8') else: header_encoding = 'utf-8' else: diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py index 484e784..569f081 100644 --- a/pywikibot/cosmetic_changes.py +++ b/pywikibot/cosmetic_changes.py @@ -194,7 +194,7 @@ raise NotImplementedError( 'ISBN functionality not available. Install stdnum package.')
- isbn = match.group('code') + isbn = match['code'] try: stdnum_isbn.validate(isbn) except stdnum_isbn.ValidationError as e: @@ -519,10 +519,10 @@ # helper function which works on one link and either returns it # unmodified, or returns a replacement. def handleOneLink(match: Match[str]) -> str: - titleWithSection = match.group('titleWithSection') - label = match.group('label') - trailingChars = match.group('linktrail') - newline = match.group('newline') + titleWithSection = match['titleWithSection'] + label = match['label'] + trailingChars = match['linktrail'] + newline = match['newline']
is_interwiki = self.site.isInterwikiLink(titleWithSection) if is_interwiki: @@ -819,11 +819,13 @@ if re.match(r'(?:{}):' .format('|'.join((*self.site.namespaces[6], *self.site.namespaces[14]))), - match.group('link')): + match['link']): replacement += ':' - replacement += match.group('link') - if match.group('title'): - replacement += '|' + match.group('title') + + replacement += match['link'] + if match['title']: + replacement += '|' + match['title'] + return replacement + ']]'
exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', @@ -898,8 +900,8 @@ """Relace html markups with wikitext markups.""" def replace_header(match: Match[str]) -> str: """Create a header string for replacing.""" - depth = int(match.group(1)) - return r'{0} {1} {0}'.format('=' * depth, match.group(2)) + depth = int(match[1]) + return r'{0} {1} {0}'.format('=' * depth, match[2])
# Everything case-insensitive (?i) # Keep in mind that MediaWiki automatically converts <br> to <br /> diff --git a/pywikibot/data/api/_login.py b/pywikibot/data/api/_login.py index cedc9a7..11de1ae 100644 --- a/pywikibot/data/api/_login.py +++ b/pywikibot/data/api/_login.py @@ -143,8 +143,7 @@ else: match = re.search(r'(\d+) (seconds|minutes)', fail_reason) if match: - delta = datetime.timedelta( - **{match.group(2): int(match.group(1))}) + delta = datetime.timedelta(**{match[2]: int(match[1])}) else: delta = datetime.timedelta() self._waituntil = datetime.datetime.now() + delta diff --git a/pywikibot/data/api/_requests.py b/pywikibot/data/api/_requests.py index 6e930da..70d1207 100644 --- a/pywikibot/data/api/_requests.py +++ b/pywikibot/data/api/_requests.py @@ -1028,7 +1028,7 @@ lag = error['lag'] except KeyError: lag = lagpattern.search(info) - lag = float(lag.group('lag')) if lag else 0.0 + lag = float(lag['lag']) if lag else 0.0
self.site.throttle.lag(lag * retries) continue diff --git a/pywikibot/date.py b/pywikibot/date.py index 5e167f2..3f35e63 100644 --- a/pywikibot/date.py +++ b/pywikibot/date.py @@ -515,8 +515,7 @@ m = compPattern.match(value) if m: # decode each found value using provided decoder - values = [decoder[2](m.group(i + 1)) - for i, decoder in enumerate(decoders)] + values = [decoder[2](m[i + 1]) for i, decoder in enumerate(decoders)] decValue = decf(values)
assert not isinstance(decValue, str), \ diff --git a/pywikibot/i18n.py b/pywikibot/i18n.py index 079b67f..f94a176 100644 --- a/pywikibot/i18n.py +++ b/pywikibot/i18n.py @@ -456,8 +456,8 @@ return plural_rule
def replace_plural(match: Match[str]) -> str: - selector = match.group(1) - variants = match.group(2) + selector = match[1] + variants = match[2] num = parameters[selector] if not isinstance(num, int): raise ValueError("'{}' must be a number, not a {} ({})" diff --git a/pywikibot/page/_links.py b/pywikibot/page/_links.py index 6b6a806..1dd4e3f 100644 --- a/pywikibot/page/_links.py +++ b/pywikibot/page/_links.py @@ -826,14 +826,14 @@ if textlib.isDisabled(match.string, match.start(), tags=exceptions): # match.string stores original text so we do not need # to pass it to handle_entity, ♥ Python - return match.group(0) + return match[0]
- if match.group('decimal'): - unicode_codepoint = int(match.group('decimal')) - elif match.group('hex'): - unicode_codepoint = int(match.group('hex'), 16) - elif match.group('name'): - name = match.group('name') + if match['decimal']: + unicode_codepoint = int(match['decimal']) + elif match['hex']: + unicode_codepoint = int(match['hex'], 16) + elif match['name']: + name = match['name'] unicode_codepoint = name2codepoint.get(name, False)
unicode_codepoint = _ILLEGAL_HTML_ENTITIES_MAPPING.get( @@ -843,6 +843,6 @@ return chr(unicode_codepoint)
# Leave the entity unchanged - return match.group(0) + return match[0]
return _ENTITY_SUB(handle_entity, text) diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py index 506243a..cb28f5e 100644 --- a/pywikibot/page/_pages.py +++ b/pywikibot/page/_pages.py @@ -1465,7 +1465,7 @@ text = self.text for linkmatch in pywikibot.link_regex.finditer( textlib.removeDisabledParts(text)): - linktitle = linkmatch.group('title') + linktitle = linkmatch['title'] link = Link(linktitle, self.site) # only yield links that are to a different site and that # are not language links @@ -2279,7 +2279,7 @@ old_text = '' result = redirect_regex.search(old_text) if result: - oldlink = result.group(1) + oldlink = result[1] if (keep_section and '#' in oldlink and target_page.section() is None): sectionlink = oldlink[oldlink.index('#'):] diff --git a/pywikibot/pagegenerators/_generators.py b/pywikibot/pagegenerators/_generators.py index 2f30bba..3d781a9 100644 --- a/pywikibot/pagegenerators/_generators.py +++ b/pywikibot/pagegenerators/_generators.py @@ -333,8 +333,8 @@ # This makes it possible to work on different wikis using a single # text file, but also could be dangerous because you might # inadvertently change pages on another wiki! - yield pywikibot.Page(pywikibot.Link(linkmatch.group('title'), - site)) + yield pywikibot.Page(pywikibot.Link(linkmatch['title'], site)) + if linkmatch is not None: return
@@ -855,7 +855,7 @@ for url in self.queryGoogle(local_query): m = re.search(pattern, url) if m: - page = pywikibot.Page(pywikibot.Link(m.group(1), self.site)) + page = pywikibot.Page(pywikibot.Link(m[1], self.site)) if page.site == self.site: yield page
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py index 5cdeedd..35ff2dd 100644 --- a/pywikibot/proofreadpage.py +++ b/pywikibot/proofreadpage.py @@ -121,10 +121,10 @@
m = self.p_header.search(self._text) if m: - self.ql = int(m.group('ql')) - self.user = m.group('user') - self.header = m.group('header') - if not m.group('has_div'): + self.ql = int(m['ql']) + self.user = m['user'] + self.header = m['header'] + if not m['has_div']: self._has_div = False else: self.ql = ProofreadPage.NOT_PROOFREAD @@ -867,7 +867,7 @@ r'/w/index.php?title=(.+?)&action=edit&redlink=1') title = p_href.search(href) if title: - return title.group(1).replace('_', ' ') + return title[1].replace('_', ' ') return None
def save(self, *args: Any, **kwargs: Any) -> None: # See Page.save(). diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py index 7e53bbf..2050db3 100644 --- a/pywikibot/site/_apisite.py +++ b/pywikibot/site/_apisite.py @@ -703,12 +703,12 @@ '"{}": No linktrail pattern extracted from "{}"' .format(self.code, linktrail))
- pattern = match.group('pattern') - letters = match.group('letters') + pattern = match['pattern'] + letters = match['letters']
if r'x{' in pattern: pattern = re.sub(r'\x{([A-F0-9]{4})}', - lambda match: chr(int(match.group(1), 16)), + lambda match: chr(int(match[1], 16)), pattern) if letters: pattern += ''.join(letters.split('|')) diff --git a/pywikibot/site/_siteinfo.py b/pywikibot/site/_siteinfo.py index c5d0a0a..4c1ab93 100644 --- a/pywikibot/site/_siteinfo.py +++ b/pywikibot/site/_siteinfo.py @@ -142,7 +142,7 @@ matched = Siteinfo.WARNING_REGEX.match(message) if mod == 'siteinfo' and matched: invalid_properties.extend( - prop.strip() for prop in matched.group(1).split(',')) + prop.strip() for prop in matched[1].split(',')) return True return False
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index d67b09f..ccc45b8 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -440,13 +440,13 @@ group_regex = re.compile(r'\(\d+)|\g<(.+?)>') last = 0 for group_match in group_regex.finditer(new): - group_id = group_match.group(1) or group_match.group(2) + group_id = group_match[1] or group_match[2] with suppress(ValueError): group_id = int(group_id)
try: replacement += new[last:group_match.start()] - replacement += match.group(group_id) or '' + replacement += match[group_id] or '' except IndexError: raise IndexError('Invalid group reference: {}\n' 'Groups found: {}' @@ -732,14 +732,15 @@ if not m: break
+ m_title = m['title'].strip() + # Ignore links to sections of the same page - if not m.group('title').strip(): + if not m_title: curpos = m.end() continue
# Ignore interwiki links - if (site.isInterwikiLink(m.group('title').strip()) - and not m.group('title').strip().startswith(':')): + if site.isInterwikiLink(m_title) and not m_title.startswith(':'): curpos = m.end() continue
@@ -752,8 +753,8 @@ # TODO: Unclosed link label, what happens there? curpos = m.end() continue - groups['label'] += groups['linktrail'] + extended_match.group(1) - groups['linktrail'] = extended_match.group(2) + groups['label'] += groups['linktrail'] + extended_match[1] + groups['linktrail'] = extended_match[2] end = extended_match.end() else: end = m.end() @@ -1338,19 +1339,18 @@ R = re.compile(r'[[\s*(?P<namespace>{})\s*:\s*(?P<rest>.+?)]]' .format(catNamespace), re.I) for match in R.finditer(text): - if expand_text and '{{' in match.group('rest'): - rest = site.expand_text(match.group('rest')) + match_rest = match['rest'] + if expand_text and '{{' in match_rest: + rest = site.expand_text(match_rest) else: - rest = match.group('rest') + rest = match_rest if '|' in rest: title, sortKey = rest.split('|', 1) else: title, sortKey = rest, None try: - cat = pywikibot.Category(pywikibot.Link( - '%s:%s' % - (match.group('namespace'), title), - site), + cat = pywikibot.Category(site, + '{}:{}'.format(match['namespace'], title), sort_key=sortKey) except InvalidTitleError: # Category title extracted contains invalid characters @@ -1749,7 +1749,7 @@ result = []
for match in NESTED_TEMPLATE_REGEX.finditer(text): - name, params = match.group(1), match.group(2) + name, params = match[1], match[2]
# Special case for {{a}} if params is None: @@ -1992,7 +1992,7 @@ # Recursion levels can be maximum two. If a comment is found, it # will not for sure be found in the next level. # Nested comments are excluded by design. - timestamp = self.timestripper(comment.group(1)) + timestamp = self.timestripper(comment[1]) most_recent.append(timestamp)
# Censor comments. @@ -2005,7 +2005,7 @@ # Recursion levels can be maximum two. If a link is found, it will # not for sure be found in the next level. # Nested links are excluded by design. - link, anchor = wikilink.group('link'), wikilink.group('anchor') + link, anchor = wikilink['link'], wikilink['anchor'] timestamp = self.timestripper(link) most_recent.append(timestamp) if anchor: diff --git a/pywikibot/time.py b/pywikibot/time.py index c129990..c9bfc57 100644 --- a/pywikibot/time.py +++ b/pywikibot/time.py @@ -143,18 +143,18 @@ strpfmt = '%Y-%m-%d{sep}%H:%M:%S'.format(sep=m.group('sep')) strpstr = timestr[:19]
- if m.group('u'): + if m['u']: strpfmt += '.%f' - strpstr += m.group('u').replace(',', '.') # .ljust(7, '0') + strpstr += m['u'].replace(',', '.') # .ljust(7, '0')
- if m.group('tz'): - if m.group('tz') == 'Z': + if m['tz']: + if m['tz'] == 'Z': strpfmt += 'Z' strpstr += 'Z' else: strpfmt += '%z' # strptime wants HHMM, without ':' - strpstr += (m.group('tz').replace(':', '')).ljust(5, '0') + strpstr += (m['tz'].replace(':', '')).ljust(5, '0')
ts = cls.strptime(strpstr, strpfmt) if ts.tzinfo is not None: @@ -179,15 +179,15 @@ msg = "time data '{timestr}' does not match POSIX format." raise ValueError(msg.format(timestr=timestr))
- sec = int(m.group('S')) - usec = m.group('u') + sec = int(m['S']) + usec = m['u'] usec = int(usec.ljust(6, '0')) if usec else 0 if sec < 0 < usec: sec = sec - 1 - usec = 1000000 - usec + usec = 1_000_000 - usec
- ts = (cls(1970, 1, 1) - + datetime.timedelta(seconds=sec, microseconds=usec)) + ts = cls(1970, 1, 1) + datetime.timedelta(seconds=sec, + microseconds=usec) return ts
@classmethod diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index 7aae16d..536d34f 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -406,32 +406,31 @@ if not version_match: raise ValueError(f'Invalid version number "{version_str}"')
- components = [int(n) for n in version_match.group(1).split('.')] + components = [int(n) for n in version_match[1].split('.')]
# The _dev_version numbering scheme might change. E.g. if a stage # between 'alpha' and 'beta' is added, 'beta', 'rc' and stable releases # are reassigned (beta=3, rc=4, stable=5).
- if version_match.group(3): # wmf version - self._dev_version = (0, int(version_match.group(3))) - elif version_match.group(4): - self._dev_version = (2, int(version_match.group(4))) - elif version_match.group(5): - self._dev_version = (3, int(version_match.group(5))) - elif version_match.group(2) in ('alpha', '-alpha'): + if version_match[3]: # wmf version + self._dev_version = (0, int(version_match[3])) + elif version_match[4]: + self._dev_version = (2, int(version_match[4])) + elif version_match[5]: + self._dev_version = (3, int(version_match[5])) + elif version_match[2] in ('alpha', '-alpha'): self._dev_version = (1, ) else: for handled in ('wmf', 'alpha', 'beta', 'rc'): # if any of those pops up here our parser has failed - assert handled not in version_match.group(2), \ - 'Found "{}" in "{}"'.format(handled, - version_match.group(2)) - if version_match.group(2): - pywikibot.logging.debug('Additional unused version part ' - '"{}"'.format(version_match.group(2))) + assert handled not in version_match[2], \ + f'Found "{handled}" in "{version_match[2]}"' + if version_match[2]: + pywikibot.logging.debug( + 'Additional unused version part {version_match[2]!r}') self._dev_version = (4, )
- self.suffix = version_match.group(2) or '' + self.suffix = version_match[2] or '' self.version = tuple(components)
@staticmethod diff --git a/pywikibot/tools/_deprecate.py b/pywikibot/tools/_deprecate.py index 4feb1c2..691c8a8 100644 --- a/pywikibot/tools/_deprecate.py +++ b/pywikibot/tools/_deprecate.py @@ -273,7 +273,7 @@ # Get docstring up to :params so deprecation notices for # parameters don't disrupt it trim_params = re.compile(r'^.*?((?=:param)|$)', re.DOTALL) - trimmed_doc = trim_params.match(wrapper.__doc__).group(0) + trimmed_doc = trim_params.match(wrapper.__doc__)[0]
if not deprecated_notice.search(trimmed_doc): # No notice add_docstring(wrapper) diff --git a/pywikibot/tools/djvu.py b/pywikibot/tools/djvu.py index 39e4ae7..3da4eda 100644 --- a/pywikibot/tools/djvu.py +++ b/pywikibot/tools/djvu.py @@ -160,7 +160,7 @@ if 'FORM:DJVU' in line: m = self._pat_form.search(line) if m: - key, id = int(m.group('n')), m.group('id') + key, id = int(m['n']), m['id'] else: # If djvu doc has only one page, # FORM:DJVU line in djvudump has no id @@ -169,7 +169,7 @@ if 'INFO' in line: m = self._pat_info.search(line) if m: - size, dpi = m.group('size'), int(m.group('dpi')) + size, dpi = m['size'], int(m['dpi']) else: size, dpi = None, None else: diff --git a/pywikibot/tools/threading.py b/pywikibot/tools/threading.py index 6d13cbe..b076a5f 100644 --- a/pywikibot/tools/threading.py +++ b/pywikibot/tools/threading.py @@ -70,7 +70,7 @@ """Return number of acquired locks.""" with self._block: counter = re.search(r'count=(\d+) ', repr(self)) - return int(counter.group(1)) + return int(counter[1])
def locked(self): """Return true if the lock is acquired.""" diff --git a/pywikibot/xmlreader.py b/pywikibot/xmlreader.py index 69712c1..d619d9f 100644 --- a/pywikibot/xmlreader.py +++ b/pywikibot/xmlreader.py @@ -40,10 +40,10 @@ moveRestriction = None editLockMatch = re.search('edit=([^:]*)', restrictions) if editLockMatch: - editRestriction = editLockMatch.group(1) + editRestriction = editLockMatch[1] moveLockMatch = re.search('move=([^:]*)', restrictions) if moveLockMatch: - moveRestriction = moveLockMatch.group(1) + moveRestriction = moveLockMatch[1] if restrictions == 'sysop': editRestriction = 'sysop' moveRestriction = 'sysop' diff --git a/scripts/archivebot.py b/scripts/archivebot.py index 668903d..2f69fcc 100755 --- a/scripts/archivebot.py +++ b/scripts/archivebot.py @@ -215,7 +215,7 @@ match = re.fullmatch(r'(\d{1,3}(?: \d{3})+|\d+) *([BkKMT]?)', string) if not match: raise MalformedConfigError(f"Couldn't parse size: {string}") - val, unit = (int(match.group(1).replace(' ', '')), match.group(2)) + val, unit = (int(match[1].replace(' ', '')), match[2]) if unit == 'M': val *= 1024 unit = 'K' @@ -588,11 +588,11 @@ return None # TODO: handle unsigned try: - maxage = str2timedelta(re_t.group(1), thread.timestamp) + maxage = str2timedelta(re_t[1], thread.timestamp) except ValueError as e: raise MalformedConfigError(e) from None if self.now - thread.timestamp > maxage: - duration = str2localized_duration(self.site, re_t.group(1)) + duration = str2localized_duration(self.site, re_t[1]) return ('duration', duration) # TODO: handle marked with template return None diff --git a/scripts/category.py b/scripts/category.py index fa64fb0..7c22710 100755 --- a/scripts/category.py +++ b/scripts/category.py @@ -465,7 +465,7 @@ brackets_regex = re.compile(r'(.*) (.+?)') match_object = brackets_regex.match(page_name) if match_object: - page_name = match_object.group(1) + page_name = match_object[1] split_string = page_name.rsplit(' ', 1) if len(split_string) > 1: # pull last part of the name to the beginning, and append the diff --git a/scripts/checkimages.py b/scripts/checkimages.py index 372b60e..d0fc38b 100755 --- a/scripts/checkimages.py +++ b/scripts/checkimages.py @@ -1089,14 +1089,14 @@
for number, m in enumerate(SETTINGS_REGEX.finditer(testo), start=1): - name = str(m.group(1)) - find_tipe = str(m.group(2)) - find = str(m.group(3)) - imagechanges = str(m.group(4)) - summary = str(m.group(5)) - head = str(m.group(6)) - text = str(m.group(7)) - mexcatched = str(m.group(8)) + name = str(m[1]) + find_tipe = str(m[2]) + find = str(m[3]) + imagechanges = str(m[4]) + summary = str(m[5]) + head = str(m[6]) + text = str(m[7]) + mexcatched = str(m[8]) tupla = [number, name, find_tipe, find, imagechanges, summary, head, text, mexcatched] self.settings_data += [tupla] @@ -1312,7 +1312,7 @@ regl = r"("|')(.*?)\1(?:,|])" pl = re.compile(regl) for xl in pl.finditer(raw): - word = xl.group(2).replace('\\', '\') + word = xl[2].replace('\\', '\') if word not in list_loaded: list_loaded.append(word) return list_loaded diff --git a/scripts/commonscat.py b/scripts/commonscat.py index dd1cd02..192046c 100755 --- a/scripts/commonscat.py +++ b/scripts/commonscat.py @@ -491,10 +491,10 @@ .format(commonsPage, loguser, logcomment)) break
- if m.group('newcat1'): - return self.checkCommonscatLink(m.group('newcat1')) - if m.group('newcat2'): - return self.checkCommonscatLink(m.group('newcat2')) + if m['newcat1']: + return self.checkCommonscatLink(m['newcat1']) + if m['newcat2']: + return self.checkCommonscatLink(m['newcat2'])
return ''
diff --git a/scripts/dataextend.py b/scripts/dataextend.py index 64e621d..fbde614 100644 --- a/scripts/dataextend.py +++ b/scripts/dataextend.py @@ -590,9 +590,9 @@ day = None m = re.search(r'[{|](\d{4})|(\d+)|(\d+)[|}]', text) if m: - year = int(m.group(1)) - month = int(m.group(2)) - day = int(m.group(3)) + year = int(m[1]) + month = int(m[2]) + day = int(m[3]) if re.match(r'\d{,4}(?:年頃|.)?$', text): year = int(text) month = None @@ -606,84 +606,84 @@ month = int(text[-2:]) m = re.match(r'(\d{1,2})[-/](\d{4})', text) if m: - year = int(m.group(2)) - month = int(m.group(1)) + year = int(m[2]) + month = int(m[1]) m = re.match(r'(\d+)[-./|](\d{1,2})[-./|](\d{1,2})$', text) if m: - year = int(m.group(1)) - month = int(m.group(2)) - day = int(m.group(3)) + year = int(m[1]) + month = int(m[2]) + day = int(m[3]) m = re.match( r'(\d{1,2})[-./|]\s*(\d{1,2})[-./|]\s*(\d{3,4}).?$', text) if m: - year = int(m.group(3)) - month = int(m.group(2)) - day = int(m.group(1)) + year = int(m[3]) + month = int(m[2]) + day = int(m[1]) m = re.match(r'(\d{1,2})[-./\s]([iIvVxX]+)[-./\s](\d{4})$', text) if m: - year = int(m.group(3)) + year = int(m[3]) try: - month = self.MONTHNUMBER[m.group(2).lower()] + month = self.MONTHNUMBER[m[2].lower()] except KeyError: - raise ValueError(f"Don't know month {m.group(2)}") - day = int(m.group(1)) + raise ValueError(f"Don't know month {m[2]}") + day = int(m[1]) m = re.match(r"(\d+)(?:.|er|eme|ème)?[\s.]\s*(?:d'|d[aei] )?" r'([^\s.]{2,}).?[\s.]\s*(\d+)$', text) if m: - year = int(m.group(3)) + year = int(m[3]) try: - month = self.MONTHNUMBER[m.group(2).lower()] + month = self.MONTHNUMBER[m[2].lower()] except KeyError: - raise ValueError(f"Don't know month {m.group(2)}") - day = int(m.group(1)) + raise ValueError(f"Don't know month {m[2]}") + day = int(m[1]) m = re.match(r'(\d{4}).?[\s.]\s*([^\s.]{3,}).?[\s.]\s*(\d+)$', text) if m: - year = int(m.group(1)) + year = int(m[1]) try: - month = self.MONTHNUMBER[m.group(2).lower()] + month = self.MONTHNUMBER[m[2].lower()] except KeyError: - raise ValueError(f"Don't know month {m.group(2)}") - day = int(m.group(3)) + raise ValueError(f"Don't know month {m[2]}") + day = int(m[3]) m = re.match(r"(\d+) (?:de |d')?(\w+[a-z]\w+) de (\d+)", text) if m: - year = int(m.group(3)) + year = int(m[3]) try: - month = self.MONTHNUMBER[m.group(2).lower()] + month = self.MONTHNUMBER[m[2].lower()] except KeyError: - raise ValueError(f"Don't know month {m.group(2)}") - day = int(m.group(1)) + raise ValueError(f"Don't know month {m[2]}") + day = int(m[1]) m = re.match(r'(\w*[a-zA-Z]\w*).? (\d+)$', text) if m: - year = int(m.group(2)) + year = int(m[2]) try: - month = self.MONTHNUMBER[m.group(1).lower()] + month = self.MONTHNUMBER[m[1].lower()] except KeyError: - raise ValueError(f"Don't know month {m.group(1)}") + raise ValueError(f"Don't know month {m[1]}") m = re.match(r'(\w+).? (\d{1,2})(?:st|nd|rd|th)?.?\s*,\s*(\d{3,4})$', text) if m: - year = int(m.group(3)) + year = int(m[3]) try: - month = self.MONTHNUMBER[m.group(1).lower()] + month = self.MONTHNUMBER[m[1].lower()] except KeyError: - raise ValueError(f"Don't know month {m.group(1)}") - day = int(m.group(2)) + raise ValueError(f"Don't know month {m[1]}") + day = int(m[2]) m = re.match(r'(\d{4}),? (\d{1,2}) (\w+)', text) if m: - year = int(m.group(1)) + year = int(m[1]) try: - month = self.MONTHNUMBER[m.group(3).lower()] + month = self.MONTHNUMBER[m[3].lower()] except KeyError: - raise ValueError(f"Don't know month {m.group(1)}") - day = int(m.group(2)) + raise ValueError(f"Don't know month {m[1]}") + day = int(m[2]) m = re.match(r'(\d+)年(\d+)月(\d+)日', text) if m: - year = int(m.group(1)) - month = int(m.group(2)) - day = int(m.group(3)) + year = int(m[1]) + month = int(m[2]) + day = int(m[3]) m = re.match(r'(\d+)年$', text) if m: - year = int(m.group(1)) + year = int(m[1]) if day == 0: day = None if day is None and month == 0: @@ -716,8 +716,8 @@
def createquantityclaim(self, text): m = re.match(r'(\d+(?:.\d+)?)\s*([a-z]\w*)', text.replace(',', '.')) - amount = m.group(1) - name = m.group(2).lower() + amount = m[1] + name = m[2].lower() return pywikibot.WbQuantity(amount, unit=pywikibot.ItemPage( self.site, self.QUANTITYTYPE[name]), @@ -1125,7 +1125,7 @@ if value.startswith('!date!'): value = value[6:] if value.startswith('!q!'): - value = re.search(r'\d+(?:.\d+)?', value).group(0) + value = re.search(r'\d+(?:.\d+)?', value)[0] elif value.startswith('!i!'): value = value[3:].strip()
@@ -1330,7 +1330,7 @@ if self.hrtre: match = re.compile('(?s)' + self.hrtre).search(self.html) if match: - text = match.group(1) + text = match[1] text = text.replace('\n', '\n') text = text.replace('\t', '\t') text = text.replace('\r', '\n') @@ -1981,16 +1981,16 @@ if dtype: alt = [dtype] + alt for alttype in alt: - if self.getdata(alttype, m.group(1), ask=False) \ - and self.getdata(alttype, m.group(1), ask=False) != 'XXX': - return self.getdata(alttype, m.group(1), ask=False) + if self.getdata(alttype, m[1], ask=False) \ + and self.getdata(alttype, m[1], ask=False) != 'XXX': + return self.getdata(alttype, m[1], ask=False) for skip in skips: - if self.getdata(skip, m.group(1), ask=False) \ - and self.getdata(skip, m.group(1), ask=False) != 'XXX': + if self.getdata(skip, m[1], ask=False) \ + and self.getdata(skip, m[1], ask=False) != 'XXX': return None if dtype: - return self.getdata(dtype, m.group(1)) - return m.group(1) + return self.getdata(dtype, m[1]) + return m[1]
def findallbyre(self, regex, html, dtype=None, skips=None, alt=None) -> List[str]: @@ -2552,9 +2552,7 @@ m = re.match(r'(\d+)[/-](\d+)[/-](\d+)', result) if m: result = '{}-{}-{}'.format( - m.group(2), - m.group(1), - m.group(3) if len(m.group(3)) > 2 else '19' + m.group(3) + m[2], m[1], m[3] if len(m[3]) > 2 else '19' + m[3] ) return result
@@ -2573,9 +2571,7 @@ m = re.match(r'(\d+)[/-](\d+)[/-](\d+)', result) if m: result = '{}-{}-{}'.format( - m.group(2), - m.group(1), - m.group(3) if len(m.group(3)) > 2 else '19' + m.group(3) + m[2], m[1], m[3] if len(m[3]) > 2 else '19' + m[3] ) return result
@@ -13885,21 +13881,22 @@
def prepare(self, html: str): def reworkwikilink(wikipart): - parts = wikipart.group(1).split('|') + parts = wikipart[1].split('|') return '[[{}]]'.format(parts[0] if ':' in parts[0] else parts[-1])
if not html: return None + f = codecs.open('result.html', 'w', 'utf-8') f.write(html) f.close() - html = re.search(self.mainRE, html).group(1) + html = re.search(self.mainRE, html)[1] html = re.sub(r'{{nowrap|([^{}]*)}}', r'\1', html) return re.sub(r'[[([^[]]*)]]', reworkwikilink, html)
@staticmethod def excludetemplatelight(text): - templatetype = re.search('([^{|]*)', text).group(0).lower().strip() + templatetype = re.search('([^{|]*)', text)[0].lower().strip() firstword = templatetype.split()[0] lastword = templatetype.split()[-1] return ( @@ -13996,7 +13993,7 @@
@staticmethod def excludetemplate(text): - templatetype = re.search('([^{|]+)', text).group(0).lower().strip() + templatetype = re.search('([^{|]+)', text)[0].lower().strip() firstword = templatetype.split()[0] lastword = templatetype.split()[-1] return ( diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py index e06e11b..98cbc59 100755 --- a/scripts/fixing_redirects.py +++ b/scripts/fixing_redirects.py @@ -84,16 +84,15 @@ # Make sure that next time around we will not find this same hit. curpos = m.start() + 1
- is_interwikilink = mysite.isInterwikiLink(m.group('title')) + is_interwikilink = mysite.isInterwikiLink(m['title'])
# ignore interwiki links, links in the disabled area # and links to sections of the same page - if (m.group('title').strip() == '' + if (m['title'].strip() == '' or is_interwikilink or isDisabled(text, m.start())): continue - actual_link_page = pywikibot.Page(target_page.site, - m.group('title')) + actual_link_page = pywikibot.Page(target_page.site, m['title']) # Check whether the link found is to page. try: actual_link_page.title() @@ -105,22 +104,22 @@
# The link looks like this: # [[page_title|link_text]]trailing_chars - page_title = m.group('title') - link_text = m.group('label') + page_title = m['title'] + link_text = m['label']
if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title - if m.group('section') is None: + if m['section'] is None: section = '' else: - section = m.group('section') + section = m['section'] if section and target_page.section(): pywikibot.warning( 'Source section {} and target section {} found. ' 'Skipping.'.format(section, target_page)) continue - trailing_chars = m.group('linktrail') + trailing_chars = m['linktrail'] if trailing_chars: link_text += trailing_chars
diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py index 2e589fd..e50668e 100755 --- a/scripts/harvest_template.py +++ b/scripts/harvest_template.py @@ -391,7 +391,7 @@ # Try to extract a valid page for match in pywikibot.link_regex.finditer(value): matched = True - link_text = match.group(1) + link_text = match[1] linked_item = self.template_link_target(item, site, link_text) if linked_item: yield linked_item @@ -431,8 +431,8 @@ start, end = match.span() since_prev_match = value[prev_end:start]
- title = match.group('title').strip() - text = match.group(2) + title = match['title'].strip() + text = match[2] if text: text = text[1:].strip() # remove '|'
@@ -480,7 +480,7 @@ .. versionadded:: 7.5 """ for match in self.linkR.finditer(value): - yield match.group('url') + yield match['url']
@staticmethod def handle_commonsmedia(value, site, diff --git a/scripts/misspelling.py b/scripts/misspelling.py index 71cca13..e72fd16 100755 --- a/scripts/misspelling.py +++ b/scripts/misspelling.py @@ -122,7 +122,7 @@ # misspelling is ambiguous, see for example: # https://de.wikipedia.org/wiki/Buthan for match in self.linkR.finditer(correct_spelling): - self.opt.pos.append(match.group('title')) + self.opt.pos.append(match['title'])
if not self.opt.pos: # There were no links in the parameter, so there is diff --git a/scripts/noreferences.py b/scripts/noreferences.py index 96570b6..7460069 100755 --- a/scripts/noreferences.py +++ b/scripts/noreferences.py @@ -642,7 +642,7 @@ 'Adding references section before {} section...\n' .format(section)) index = match.start() - ident = match.group('ident') + ident = match['ident'] return self.createReferenceSection(oldText, index, ident) else: diff --git a/scripts/pagefromfile.py b/scripts/pagefromfile.py index 0685092..4a564d5 100755 --- a/scripts/pagefromfile.py +++ b/scripts/pagefromfile.py @@ -248,12 +248,12 @@ if self.opt.include: contents = location.group() else: - contents = location.group(1) + contents = location[1]
title = self.opt.title if not title: try: - title = title_regex.search(contents).group(1) + title = title_regex.search(contents)[1] if self.opt.notitle: # Remove title (to allow creation of redirects) contents = title_regex.sub('', contents, count=1) diff --git a/scripts/redirect.py b/scripts/redirect.py index 4cc7790..ad2847b 100755 --- a/scripts/redirect.py +++ b/scripts/redirect.py @@ -168,7 +168,7 @@
m = redirR.match(entry.text) if m: - target = m.group(1) + target = m[1] # There might be redirects to another wiki. Ignore these. target_link = pywikibot.Link(target, self.site) try: diff --git a/scripts/reflinks.py b/scripts/reflinks.py index 6ef24d6..9fbd2f5 100755 --- a/scripts/reflinks.py +++ b/scripts/reflinks.py @@ -320,11 +320,11 @@
# Parse references for match in self.REFS.finditer(text): - content = match.group('content') + content = match['content'] if not content.strip(): continue
- params = match.group('params') + params = match['params'] group = self.GROUPS.search(params) or '' if group not in found_refs: found_refs[group] = {} @@ -338,8 +338,8 @@
found = self.NAMES.search(params) if found: - quoted = found.group('quote') in ['"', "'"] - name = found.group('name') + quoted = found['quote'] in ['"', "'"] + name = found['name']
if not v[IX.name]: # First name associated with this content @@ -555,12 +555,12 @@ # for each link to change for match in linksInRef.finditer(raw_text):
- link = match.group('url') + link = match['url'] if 'jstor.org' in link: # TODO: Clean URL blacklist continue
- ref = RefLink(link, match.group('name'), site=self.site) + ref = RefLink(link, match['name'], site=self.site)
try: r = comms.http.fetch( diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py index 2d57096..bc30d27 100755 --- a/scripts/solve_disambiguation.py +++ b/scripts/solve_disambiguation.py @@ -762,7 +762,7 @@ for line in page.text.splitlines(): found = reg.match(line) if found: - yield found.group(1) + yield found[1]
def firstize(self, page, links) -> List[pywikibot.Page]: """Call firstlinks and remove extra links. @@ -893,8 +893,7 @@ # Ensure that next time around we will not find this same hit. curpos = m.start() + 1 try: - foundlink = pywikibot.Link(m.group('title'), - disamb_page.site) + foundlink = pywikibot.Link(m['title'], disamb_page.site) foundlink.parse() except Error: continue @@ -911,7 +910,7 @@ except Error: # must be a broken link pywikibot.log('Invalid link [[{}]] in page [[{}]]' - .format(m.group('title'), ref_page.title())) + .format(m['title'], ref_page.title())) continue
n += 1 @@ -989,19 +988,19 @@
# The link looks like this: # [[page_title|link_text]]trailing_chars - page_title = m.group('title') - link_text = m.group('label') + page_title = m['title'] + link_text = m['label']
if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title
- if m.group('section') is None: + if m['section'] is None: section = '' else: - section = m.group('section') + section = m['section']
- trailing_chars = m.group('linktrail') + trailing_chars = m['linktrail'] if trailing_chars: link_text += trailing_chars
diff --git a/scripts/upload.py b/scripts/upload.py index 1a0d137..c654090 100755 --- a/scripts/upload.py +++ b/scripts/upload.py @@ -75,10 +75,10 @@ if not match: pywikibot.error('Chunk size parameter is not valid.') chunk_size = 0 - elif match.group(1): # number was in there - base = float(match.group(1)) - if match.group(2): # suffix too - suffix = match.group(2).lower() + elif match[1]: # number was in there + base = float(match[1]) + if match[2]: # suffix too + suffix = match[2].lower() if suffix == 'k': suffix = 1000 elif suffix == 'm': diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py index 0f1dccd..65c3790 100755 --- a/scripts/weblinkchecker.py +++ b/scripts/weblinkchecker.py @@ -226,10 +226,10 @@ text = textlib.removeDisabledParts(text) link_regex = textlib.compileLinkR(without_bracketed, only_bracketed) for m in link_regex.finditer(text): - if m.group('url'): - yield m.group('url') + if m['url']: + yield m['url'] else: - yield m.group('urlb') + yield m['urlb']
XmlDumpPageGenerator = partial( diff --git a/tests/api_tests.py b/tests/api_tests.py index 22753f2..e945abd 100755 --- a/tests/api_tests.py +++ b/tests/api_tests.py @@ -1004,7 +1004,7 @@ for info, time in patterns.items(): lag = api._requests.lagpattern.search(info) self.assertIsNotNone(lag) - self.assertEqual(float(lag.group('lag')), time) + self.assertEqual(float(lag['lag']), time)
if __name__ == '__main__': # pragma: no cover diff --git a/tests/aspects.py b/tests/aspects.py index b20b270..36709de 100644 --- a/tests/aspects.py +++ b/tests/aspects.py @@ -1440,7 +1440,7 @@ deprecation_messages = self.deprecation_messages for deprecation_message in deprecation_messages: match = self._generic_match.match(deprecation_message) - if (match and bool(match.group(1)) == (msg is self.INSTEAD) + if (match and bool(match[1]) == (msg is self.INSTEAD) or msg is None): break else: diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index be98cdd..c92e84d 100755 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -630,13 +630,13 @@ for pattern in patterns: m = func(pattern) self.assertIsNotNone(m) - self.assertIsNotNone(m.group(0)) - self.assertIsNone(m.group('name')) - self.assertIsNone(m.group(1)) - self.assertIsNone(m.group('params')) - self.assertIsNone(m.group(2)) - self.assertIsNotNone(m.group('unhandled_depth')) - self.assertTrue(m.group(0).endswith('foo {{bar}}')) + self.assertIsNotNone(m[0]) + self.assertIsNone(m['name']) + self.assertIsNone(m[1]) + self.assertIsNone(m['params']) + self.assertIsNone(m[2]) + self.assertIsNotNone(m['unhandled_depth']) + self.assertTrue(m[0].endswith('foo {{bar}}'))
class TestDisabledParts(DefaultDrySiteTestCase): diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py index 4929296..65bb9a8 100755 --- a/tests/timestripper_tests.py +++ b/tests/timestripper_tests.py @@ -285,8 +285,7 @@
txt_match = self.date + '<!--a test comment-->' exp_match = 'a test comment' - self.assertEqual(ts._comment_pat.search(txt_match).group(1), - exp_match) + self.assertEqual(ts._comment_pat.search(txt_match)[1], exp_match)
def test_timestripper_match_hyperlink(self): """Test that hyperlinks are correctly matched.""" @@ -304,9 +303,9 @@ txt_match = '[[wikilink|a wikilink with no date]]' exp_match_link = 'wikilink' exp_match_anchor = '|a wikilink with no date' - self.assertEqual(ts._wikilink_pat.search(txt_match).group('link'), + self.assertEqual(ts._wikilink_pat.search(txt_match)['link'], exp_match_link) - self.assertEqual(ts._wikilink_pat.search(txt_match).group('anchor'), + self.assertEqual(ts._wikilink_pat.search(txt_match)['anchor'], exp_match_anchor)
def test_timestripper_match_comment_with_date(self):
pywikibot-commits@lists.wikimedia.org