jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Simplify access to Match object

With Python 3.6 Match object has a __getitem__ method.

Change-Id: I2e3e4073d317eb23b7ba040642e82edfbe471cdd
---
M docs/conf.py
M docs/recipes.rst
M pywikibot/comms/http.py
M pywikibot/cosmetic_changes.py
M pywikibot/data/api/_login.py
M pywikibot/data/api/_requests.py
M pywikibot/date.py
M pywikibot/i18n.py
M pywikibot/page/_links.py
M pywikibot/page/_pages.py
M pywikibot/pagegenerators/_generators.py
M pywikibot/proofreadpage.py
M pywikibot/site/_apisite.py
M pywikibot/site/_siteinfo.py
M pywikibot/textlib.py
M pywikibot/time.py
M pywikibot/tools/__init__.py
M pywikibot/tools/_deprecate.py
M pywikibot/tools/djvu.py
M pywikibot/tools/threading.py
M pywikibot/xmlreader.py
M scripts/archivebot.py
M scripts/category.py
M scripts/checkimages.py
M scripts/commonscat.py
M scripts/dataextend.py
M scripts/fixing_redirects.py
M scripts/harvest_template.py
M scripts/misspelling.py
M scripts/noreferences.py
M scripts/pagefromfile.py
M scripts/redirect.py
M scripts/reflinks.py
M scripts/solve_disambiguation.py
M scripts/upload.py
M scripts/weblinkchecker.py
M tests/api_tests.py
M tests/aspects.py
M tests/textlib_tests.py
M tests/timestripper_tests.py
40 files changed, 218 insertions(+), 225 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 3aabf0c..55fdafc 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -447,7 +447,7 @@
# Indent options
match = re.match(r'-[^ ]+? +', line)
if match:
- length = len(match.group(0))
+ length = len(match[0])
lines[index] = ' ' + line
elif length and line.startswith(' ' * length):
# Indent descriptions of options (as options are indented)
diff --git a/docs/recipes.rst b/docs/recipes.rst
index 76e5b84..5e335ed 100644
--- a/docs/recipes.rst
+++ b/docs/recipes.rst
@@ -17,7 +17,7 @@
>>> site = pywikibot.Site('wikipedia:en') # create a Site object
>>> page = pywikibot.Page(site, 'Deep learning') # create a Page object
>>> sect = textlib.extract_sections(page.text, site) # divide content into sections
- >>> links = sorted(link.group('title') for link in pywikibot.link_regex.finditer(sect.head))
+ >>> links = sorted(link['title'] for link in pywikibot.link_regex.finditer(sect.header))
>>> pages = [pywikibot.Page(site, title) for title in links]

``links`` is a list containing all link titles in alphabethical order.
diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 8b492f6..48f74c8 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -410,7 +410,7 @@
m = CHARSET_RE.search(content_type)
if not m:
return None
- charset = m.group('charset').strip('"\' ').lower()
+ charset = m['charset'].strip('"\' ').lower()
# Convert to python correct encoding names
if re.sub(r'[ _\-]', '', charset) == 'xeucjp':
charset = 'euc_jp'
@@ -443,7 +443,7 @@
m = re.search(
br'encoding=(["\'])(?P<encoding>.+?)\1', header)
if m:
- header_encoding = m.group('encoding').decode('utf-8')
+ header_encoding = m['encoding'].decode('utf-8')
else:
header_encoding = 'utf-8'
else:
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 484e784..569f081 100644
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -194,7 +194,7 @@
raise NotImplementedError(
'ISBN functionality not available. Install stdnum package.')

- isbn = match.group('code')
+ isbn = match['code']
try:
stdnum_isbn.validate(isbn)
except stdnum_isbn.ValidationError as e:
@@ -519,10 +519,10 @@
# helper function which works on one link and either returns it
# unmodified, or returns a replacement.
def handleOneLink(match: Match[str]) -> str:
- titleWithSection = match.group('titleWithSection')
- label = match.group('label')
- trailingChars = match.group('linktrail')
- newline = match.group('newline')
+ titleWithSection = match['titleWithSection']
+ label = match['label']
+ trailingChars = match['linktrail']
+ newline = match['newline']

is_interwiki = self.site.isInterwikiLink(titleWithSection)
if is_interwiki:
@@ -819,11 +819,13 @@
if re.match(r'(?:{}):'
.format('|'.join((*self.site.namespaces[6],
*self.site.namespaces[14]))),
- match.group('link')):
+ match['link']):
replacement += ':'
- replacement += match.group('link')
- if match.group('title'):
- replacement += '|' + match.group('title')
+
+ replacement += match['link']
+ if match['title']:
+ replacement += '|' + match['title']
+
return replacement + ']]'

exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace',
@@ -898,8 +900,8 @@
"""Relace html markups with wikitext markups."""
def replace_header(match: Match[str]) -> str:
"""Create a header string for replacing."""
- depth = int(match.group(1))
- return r'{0} {1} {0}'.format('=' * depth, match.group(2))
+ depth = int(match[1])
+ return r'{0} {1} {0}'.format('=' * depth, match[2])

# Everything case-insensitive (?i)
# Keep in mind that MediaWiki automatically converts <br> to <br />
diff --git a/pywikibot/data/api/_login.py b/pywikibot/data/api/_login.py
index cedc9a7..11de1ae 100644
--- a/pywikibot/data/api/_login.py
+++ b/pywikibot/data/api/_login.py
@@ -143,8 +143,7 @@
else:
match = re.search(r'(\d+) (seconds|minutes)', fail_reason)
if match:
- delta = datetime.timedelta(
- **{match.group(2): int(match.group(1))})
+ delta = datetime.timedelta(**{match[2]: int(match[1])})
else:
delta = datetime.timedelta()
self._waituntil = datetime.datetime.now() + delta
diff --git a/pywikibot/data/api/_requests.py b/pywikibot/data/api/_requests.py
index 6e930da..70d1207 100644
--- a/pywikibot/data/api/_requests.py
+++ b/pywikibot/data/api/_requests.py
@@ -1028,7 +1028,7 @@
lag = error['lag']
except KeyError:
lag = lagpattern.search(info)
- lag = float(lag.group('lag')) if lag else 0.0
+ lag = float(lag['lag']) if lag else 0.0

self.site.throttle.lag(lag * retries)
continue
diff --git a/pywikibot/date.py b/pywikibot/date.py
index 5e167f2..3f35e63 100644
--- a/pywikibot/date.py
+++ b/pywikibot/date.py
@@ -515,8 +515,7 @@
m = compPattern.match(value)
if m:
# decode each found value using provided decoder
- values = [decoder[2](m.group(i + 1))
- for i, decoder in enumerate(decoders)]
+ values = [decoder[2](m[i + 1]) for i, decoder in enumerate(decoders)]
decValue = decf(values)

assert not isinstance(decValue, str), \
diff --git a/pywikibot/i18n.py b/pywikibot/i18n.py
index 079b67f..f94a176 100644
--- a/pywikibot/i18n.py
+++ b/pywikibot/i18n.py
@@ -456,8 +456,8 @@
return plural_rule

def replace_plural(match: Match[str]) -> str:
- selector = match.group(1)
- variants = match.group(2)
+ selector = match[1]
+ variants = match[2]
num = parameters[selector]
if not isinstance(num, int):
raise ValueError("'{}' must be a number, not a {} ({})"
diff --git a/pywikibot/page/_links.py b/pywikibot/page/_links.py
index 6b6a806..1dd4e3f 100644
--- a/pywikibot/page/_links.py
+++ b/pywikibot/page/_links.py
@@ -826,14 +826,14 @@
if textlib.isDisabled(match.string, match.start(), tags=exceptions):
# match.string stores original text so we do not need
# to pass it to handle_entity, ♥ Python
- return match.group(0)
+ return match[0]

- if match.group('decimal'):
- unicode_codepoint = int(match.group('decimal'))
- elif match.group('hex'):
- unicode_codepoint = int(match.group('hex'), 16)
- elif match.group('name'):
- name = match.group('name')
+ if match['decimal']:
+ unicode_codepoint = int(match['decimal'])
+ elif match['hex']:
+ unicode_codepoint = int(match['hex'], 16)
+ elif match['name']:
+ name = match['name']
unicode_codepoint = name2codepoint.get(name, False)

unicode_codepoint = _ILLEGAL_HTML_ENTITIES_MAPPING.get(
@@ -843,6 +843,6 @@
return chr(unicode_codepoint)

# Leave the entity unchanged
- return match.group(0)
+ return match[0]

return _ENTITY_SUB(handle_entity, text)
diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py
index 506243a..cb28f5e 100644
--- a/pywikibot/page/_pages.py
+++ b/pywikibot/page/_pages.py
@@ -1465,7 +1465,7 @@
text = self.text
for linkmatch in pywikibot.link_regex.finditer(
textlib.removeDisabledParts(text)):
- linktitle = linkmatch.group('title')
+ linktitle = linkmatch['title']
link = Link(linktitle, self.site)
# only yield links that are to a different site and that
# are not language links
@@ -2279,7 +2279,7 @@
old_text = ''
result = redirect_regex.search(old_text)
if result:
- oldlink = result.group(1)
+ oldlink = result[1]
if (keep_section and '#' in oldlink
and target_page.section() is None):
sectionlink = oldlink[oldlink.index('#'):]
diff --git a/pywikibot/pagegenerators/_generators.py b/pywikibot/pagegenerators/_generators.py
index 2f30bba..3d781a9 100644
--- a/pywikibot/pagegenerators/_generators.py
+++ b/pywikibot/pagegenerators/_generators.py
@@ -333,8 +333,8 @@
# This makes it possible to work on different wikis using a single
# text file, but also could be dangerous because you might
# inadvertently change pages on another wiki!
- yield pywikibot.Page(pywikibot.Link(linkmatch.group('title'),
- site))
+ yield pywikibot.Page(pywikibot.Link(linkmatch['title'], site))
+
if linkmatch is not None:
return

@@ -855,7 +855,7 @@
for url in self.queryGoogle(local_query):
m = re.search(pattern, url)
if m:
- page = pywikibot.Page(pywikibot.Link(m.group(1), self.site))
+ page = pywikibot.Page(pywikibot.Link(m[1], self.site))
if page.site == self.site:
yield page

diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index 5cdeedd..35ff2dd 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -121,10 +121,10 @@

m = self.p_header.search(self._text)
if m:
- self.ql = int(m.group('ql'))
- self.user = m.group('user')
- self.header = m.group('header')
- if not m.group('has_div'):
+ self.ql = int(m['ql'])
+ self.user = m['user']
+ self.header = m['header']
+ if not m['has_div']:
self._has_div = False
else:
self.ql = ProofreadPage.NOT_PROOFREAD
@@ -867,7 +867,7 @@
r'/w/index\.php\?title=(.+?)&action=edit&redlink=1')
title = p_href.search(href)
if title:
- return title.group(1).replace('_', ' ')
+ return title[1].replace('_', ' ')
return None

def save(self, *args: Any, **kwargs: Any) -> None: # See Page.save().
diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py
index 7e53bbf..2050db3 100644
--- a/pywikibot/site/_apisite.py
+++ b/pywikibot/site/_apisite.py
@@ -703,12 +703,12 @@
'"{}": No linktrail pattern extracted from "{}"'
.format(self.code, linktrail))

- pattern = match.group('pattern')
- letters = match.group('letters')
+ pattern = match['pattern']
+ letters = match['letters']

if r'x{' in pattern:
pattern = re.sub(r'\\x\{([A-F0-9]{4})\}',
- lambda match: chr(int(match.group(1), 16)),
+ lambda match: chr(int(match[1], 16)),
pattern)
if letters:
pattern += ''.join(letters.split('|'))
diff --git a/pywikibot/site/_siteinfo.py b/pywikibot/site/_siteinfo.py
index c5d0a0a..4c1ab93 100644
--- a/pywikibot/site/_siteinfo.py
+++ b/pywikibot/site/_siteinfo.py
@@ -142,7 +142,7 @@
matched = Siteinfo.WARNING_REGEX.match(message)
if mod == 'siteinfo' and matched:
invalid_properties.extend(
- prop.strip() for prop in matched.group(1).split(','))
+ prop.strip() for prop in matched[1].split(','))
return True
return False

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index d67b09f..ccc45b8 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -440,13 +440,13 @@
group_regex = re.compile(r'\\(\d+)|\\g<(.+?)>')
last = 0
for group_match in group_regex.finditer(new):
- group_id = group_match.group(1) or group_match.group(2)
+ group_id = group_match[1] or group_match[2]
with suppress(ValueError):
group_id = int(group_id)

try:
replacement += new[last:group_match.start()]
- replacement += match.group(group_id) or ''
+ replacement += match[group_id] or ''
except IndexError:
raise IndexError('Invalid group reference: {}\n'
'Groups found: {}'
@@ -732,14 +732,15 @@
if not m:
break

+ m_title = m['title'].strip()
+
# Ignore links to sections of the same page
- if not m.group('title').strip():
+ if not m_title:
curpos = m.end()
continue

# Ignore interwiki links
- if (site.isInterwikiLink(m.group('title').strip())
- and not m.group('title').strip().startswith(':')):
+ if site.isInterwikiLink(m_title) and not m_title.startswith(':'):
curpos = m.end()
continue

@@ -752,8 +753,8 @@
# TODO: Unclosed link label, what happens there?
curpos = m.end()
continue
- groups['label'] += groups['linktrail'] + extended_match.group(1)
- groups['linktrail'] = extended_match.group(2)
+ groups['label'] += groups['linktrail'] + extended_match[1]
+ groups['linktrail'] = extended_match[2]
end = extended_match.end()
else:
end = m.end()
@@ -1338,19 +1339,18 @@
R = re.compile(r'\[\[\s*(?P<namespace>{})\s*:\s*(?P<rest>.+?)\]\]'
.format(catNamespace), re.I)
for match in R.finditer(text):
- if expand_text and '{{' in match.group('rest'):
- rest = site.expand_text(match.group('rest'))
+ match_rest = match['rest']
+ if expand_text and '{{' in match_rest:
+ rest = site.expand_text(match_rest)
else:
- rest = match.group('rest')
+ rest = match_rest
if '|' in rest:
title, sortKey = rest.split('|', 1)
else:
title, sortKey = rest, None
try:
- cat = pywikibot.Category(pywikibot.Link(
- '%s:%s' %
- (match.group('namespace'), title),
- site),
+ cat = pywikibot.Category(site,
+ '{}:{}'.format(match['namespace'], title),
sort_key=sortKey)
except InvalidTitleError:
# Category title extracted contains invalid characters
@@ -1749,7 +1749,7 @@
result = []

for match in NESTED_TEMPLATE_REGEX.finditer(text):
- name, params = match.group(1), match.group(2)
+ name, params = match[1], match[2]

# Special case for {{a}}
if params is None:
@@ -1992,7 +1992,7 @@
# Recursion levels can be maximum two. If a comment is found, it
# will not for sure be found in the next level.
# Nested comments are excluded by design.
- timestamp = self.timestripper(comment.group(1))
+ timestamp = self.timestripper(comment[1])
most_recent.append(timestamp)

# Censor comments.
@@ -2005,7 +2005,7 @@
# Recursion levels can be maximum two. If a link is found, it will
# not for sure be found in the next level.
# Nested links are excluded by design.
- link, anchor = wikilink.group('link'), wikilink.group('anchor')
+ link, anchor = wikilink['link'], wikilink['anchor']
timestamp = self.timestripper(link)
most_recent.append(timestamp)
if anchor:
diff --git a/pywikibot/time.py b/pywikibot/time.py
index c129990..c9bfc57 100644
--- a/pywikibot/time.py
+++ b/pywikibot/time.py
@@ -143,18 +143,18 @@
strpfmt = '%Y-%m-%d{sep}%H:%M:%S'.format(sep=m.group('sep'))
strpstr = timestr[:19]

- if m.group('u'):
+ if m['u']:
strpfmt += '.%f'
- strpstr += m.group('u').replace(',', '.') # .ljust(7, '0')
+ strpstr += m['u'].replace(',', '.') # .ljust(7, '0')

- if m.group('tz'):
- if m.group('tz') == 'Z':
+ if m['tz']:
+ if m['tz'] == 'Z':
strpfmt += 'Z'
strpstr += 'Z'
else:
strpfmt += '%z'
# strptime wants HHMM, without ':'
- strpstr += (m.group('tz').replace(':', '')).ljust(5, '0')
+ strpstr += (m['tz'].replace(':', '')).ljust(5, '0')

ts = cls.strptime(strpstr, strpfmt)
if ts.tzinfo is not None:
@@ -179,15 +179,15 @@
msg = "time data '{timestr}' does not match POSIX format."
raise ValueError(msg.format(timestr=timestr))

- sec = int(m.group('S'))
- usec = m.group('u')
+ sec = int(m['S'])
+ usec = m['u']
usec = int(usec.ljust(6, '0')) if usec else 0
if sec < 0 < usec:
sec = sec - 1
- usec = 1000000 - usec
+ usec = 1_000_000 - usec

- ts = (cls(1970, 1, 1)
- + datetime.timedelta(seconds=sec, microseconds=usec))
+ ts = cls(1970, 1, 1) + datetime.timedelta(seconds=sec,
+ microseconds=usec)
return ts

@classmethod
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 7aae16d..536d34f 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -406,32 +406,31 @@
if not version_match:
raise ValueError(f'Invalid version number "{version_str}"')

- components = [int(n) for n in version_match.group(1).split('.')]
+ components = [int(n) for n in version_match[1].split('.')]

# The _dev_version numbering scheme might change. E.g. if a stage
# between 'alpha' and 'beta' is added, 'beta', 'rc' and stable releases
# are reassigned (beta=3, rc=4, stable=5).

- if version_match.group(3): # wmf version
- self._dev_version = (0, int(version_match.group(3)))
- elif version_match.group(4):
- self._dev_version = (2, int(version_match.group(4)))
- elif version_match.group(5):
- self._dev_version = (3, int(version_match.group(5)))
- elif version_match.group(2) in ('alpha', '-alpha'):
+ if version_match[3]: # wmf version
+ self._dev_version = (0, int(version_match[3]))
+ elif version_match[4]:
+ self._dev_version = (2, int(version_match[4]))
+ elif version_match[5]:
+ self._dev_version = (3, int(version_match[5]))
+ elif version_match[2] in ('alpha', '-alpha'):
self._dev_version = (1, )
else:
for handled in ('wmf', 'alpha', 'beta', 'rc'):
# if any of those pops up here our parser has failed
- assert handled not in version_match.group(2), \
- 'Found "{}" in "{}"'.format(handled,
- version_match.group(2))
- if version_match.group(2):
- pywikibot.logging.debug('Additional unused version part '
- '"{}"'.format(version_match.group(2)))
+ assert handled not in version_match[2], \
+ f'Found "{handled}" in "{version_match[2]}"'
+ if version_match[2]:
+ pywikibot.logging.debug(
+ 'Additional unused version part {version_match[2]!r}')
self._dev_version = (4, )

- self.suffix = version_match.group(2) or ''
+ self.suffix = version_match[2] or ''
self.version = tuple(components)

@staticmethod
diff --git a/pywikibot/tools/_deprecate.py b/pywikibot/tools/_deprecate.py
index 4feb1c2..691c8a8 100644
--- a/pywikibot/tools/_deprecate.py
+++ b/pywikibot/tools/_deprecate.py
@@ -273,7 +273,7 @@
# Get docstring up to :params so deprecation notices for
# parameters don't disrupt it
trim_params = re.compile(r'^.*?((?=:param)|$)', re.DOTALL)
- trimmed_doc = trim_params.match(wrapper.__doc__).group(0)
+ trimmed_doc = trim_params.match(wrapper.__doc__)[0]

if not deprecated_notice.search(trimmed_doc): # No notice
add_docstring(wrapper)
diff --git a/pywikibot/tools/djvu.py b/pywikibot/tools/djvu.py
index 39e4ae7..3da4eda 100644
--- a/pywikibot/tools/djvu.py
+++ b/pywikibot/tools/djvu.py
@@ -160,7 +160,7 @@
if 'FORM:DJVU' in line:
m = self._pat_form.search(line)
if m:
- key, id = int(m.group('n')), m.group('id')
+ key, id = int(m['n']), m['id']
else:
# If djvu doc has only one page,
# FORM:DJVU line in djvudump has no id
@@ -169,7 +169,7 @@
if 'INFO' in line:
m = self._pat_info.search(line)
if m:
- size, dpi = m.group('size'), int(m.group('dpi'))
+ size, dpi = m['size'], int(m['dpi'])
else:
size, dpi = None, None
else:
diff --git a/pywikibot/tools/threading.py b/pywikibot/tools/threading.py
index 6d13cbe..b076a5f 100644
--- a/pywikibot/tools/threading.py
+++ b/pywikibot/tools/threading.py
@@ -70,7 +70,7 @@
"""Return number of acquired locks."""
with self._block:
counter = re.search(r'count=(\d+) ', repr(self))
- return int(counter.group(1))
+ return int(counter[1])

def locked(self):
"""Return true if the lock is acquired."""
diff --git a/pywikibot/xmlreader.py b/pywikibot/xmlreader.py
index 69712c1..d619d9f 100644
--- a/pywikibot/xmlreader.py
+++ b/pywikibot/xmlreader.py
@@ -40,10 +40,10 @@
moveRestriction = None
editLockMatch = re.search('edit=([^:]*)', restrictions)
if editLockMatch:
- editRestriction = editLockMatch.group(1)
+ editRestriction = editLockMatch[1]
moveLockMatch = re.search('move=([^:]*)', restrictions)
if moveLockMatch:
- moveRestriction = moveLockMatch.group(1)
+ moveRestriction = moveLockMatch[1]
if restrictions == 'sysop':
editRestriction = 'sysop'
moveRestriction = 'sysop'
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index 668903d..2f69fcc 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -215,7 +215,7 @@
match = re.fullmatch(r'(\d{1,3}(?: \d{3})+|\d+) *([BkKMT]?)', string)
if not match:
raise MalformedConfigError(f"Couldn't parse size: {string}")
- val, unit = (int(match.group(1).replace(' ', '')), match.group(2))
+ val, unit = (int(match[1].replace(' ', '')), match[2])
if unit == 'M':
val *= 1024
unit = 'K'
@@ -588,11 +588,11 @@
return None
# TODO: handle unsigned
try:
- maxage = str2timedelta(re_t.group(1), thread.timestamp)
+ maxage = str2timedelta(re_t[1], thread.timestamp)
except ValueError as e:
raise MalformedConfigError(e) from None
if self.now - thread.timestamp > maxage:
- duration = str2localized_duration(self.site, re_t.group(1))
+ duration = str2localized_duration(self.site, re_t[1])
return ('duration', duration)
# TODO: handle marked with template
return None
diff --git a/scripts/category.py b/scripts/category.py
index fa64fb0..7c22710 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -465,7 +465,7 @@
brackets_regex = re.compile(r'(.*) \(.+?\)')
match_object = brackets_regex.match(page_name)
if match_object:
- page_name = match_object.group(1)
+ page_name = match_object[1]
split_string = page_name.rsplit(' ', 1)
if len(split_string) > 1:
# pull last part of the name to the beginning, and append the
diff --git a/scripts/checkimages.py b/scripts/checkimages.py
index 372b60e..d0fc38b 100755
--- a/scripts/checkimages.py
+++ b/scripts/checkimages.py
@@ -1089,14 +1089,14 @@

for number, m in enumerate(SETTINGS_REGEX.finditer(testo),
start=1):
- name = str(m.group(1))
- find_tipe = str(m.group(2))
- find = str(m.group(3))
- imagechanges = str(m.group(4))
- summary = str(m.group(5))
- head = str(m.group(6))
- text = str(m.group(7))
- mexcatched = str(m.group(8))
+ name = str(m[1])
+ find_tipe = str(m[2])
+ find = str(m[3])
+ imagechanges = str(m[4])
+ summary = str(m[5])
+ head = str(m[6])
+ text = str(m[7])
+ mexcatched = str(m[8])
tupla = [number, name, find_tipe, find, imagechanges,
summary, head, text, mexcatched]
self.settings_data += [tupla]
@@ -1312,7 +1312,7 @@
regl = r"(\"|\')(.*?)\1(?:,|\])"
pl = re.compile(regl)
for xl in pl.finditer(raw):
- word = xl.group(2).replace('\\\\', '\\')
+ word = xl[2].replace('\\\\', '\\')
if word not in list_loaded:
list_loaded.append(word)
return list_loaded
diff --git a/scripts/commonscat.py b/scripts/commonscat.py
index dd1cd02..192046c 100755
--- a/scripts/commonscat.py
+++ b/scripts/commonscat.py
@@ -491,10 +491,10 @@
.format(commonsPage, loguser, logcomment))
break

- if m.group('newcat1'):
- return self.checkCommonscatLink(m.group('newcat1'))
- if m.group('newcat2'):
- return self.checkCommonscatLink(m.group('newcat2'))
+ if m['newcat1']:
+ return self.checkCommonscatLink(m['newcat1'])
+ if m['newcat2']:
+ return self.checkCommonscatLink(m['newcat2'])

return ''

diff --git a/scripts/dataextend.py b/scripts/dataextend.py
index 64e621d..fbde614 100644
--- a/scripts/dataextend.py
+++ b/scripts/dataextend.py
@@ -590,9 +590,9 @@
day = None
m = re.search(r'[{\|](\d{4})\|(\d+)\|(\d+)[\|}]', text)
if m:
- year = int(m.group(1))
- month = int(m.group(2))
- day = int(m.group(3))
+ year = int(m[1])
+ month = int(m[2])
+ day = int(m[3])
if re.match(r'\d{,4}(?:年頃|\.)?$', text):
year = int(text)
month = None
@@ -606,84 +606,84 @@
month = int(text[-2:])
m = re.match(r'(\d{1,2})[-/](\d{4})', text)
if m:
- year = int(m.group(2))
- month = int(m.group(1))
+ year = int(m[2])
+ month = int(m[1])
m = re.match(r'(\d+)[-./|](\d{1,2})[-./|](\d{1,2})$', text)
if m:
- year = int(m.group(1))
- month = int(m.group(2))
- day = int(m.group(3))
+ year = int(m[1])
+ month = int(m[2])
+ day = int(m[3])
m = re.match(
r'(\d{1,2})[-./|]\s*(\d{1,2})[-./|]\s*(\d{3,4})\.?$', text)
if m:
- year = int(m.group(3))
- month = int(m.group(2))
- day = int(m.group(1))
+ year = int(m[3])
+ month = int(m[2])
+ day = int(m[1])
m = re.match(r'(\d{1,2})[-./\s]([iIvVxX]+)[-./\s](\d{4})$', text)
if m:
- year = int(m.group(3))
+ year = int(m[3])
try:
- month = self.MONTHNUMBER[m.group(2).lower()]
+ month = self.MONTHNUMBER[m[2].lower()]
except KeyError:
- raise ValueError(f"Don't know month {m.group(2)}")
- day = int(m.group(1))
+ raise ValueError(f"Don't know month {m[2]}")
+ day = int(m[1])
m = re.match(r"(\d+)(?:\.|er|eme|ème)?[\s.]\s*(?:d'|d[aei] )?"
r'([^\s.]{2,})\.?[\s.]\s*(\d+)$', text)
if m:
- year = int(m.group(3))
+ year = int(m[3])
try:
- month = self.MONTHNUMBER[m.group(2).lower()]
+ month = self.MONTHNUMBER[m[2].lower()]
except KeyError:
- raise ValueError(f"Don't know month {m.group(2)}")
- day = int(m.group(1))
+ raise ValueError(f"Don't know month {m[2]}")
+ day = int(m[1])
m = re.match(r'(\d{4})\.?[\s.]\s*([^\s.]{3,})\.?[\s.]\s*(\d+)$', text)
if m:
- year = int(m.group(1))
+ year = int(m[1])
try:
- month = self.MONTHNUMBER[m.group(2).lower()]
+ month = self.MONTHNUMBER[m[2].lower()]
except KeyError:
- raise ValueError(f"Don't know month {m.group(2)}")
- day = int(m.group(3))
+ raise ValueError(f"Don't know month {m[2]}")
+ day = int(m[3])
m = re.match(r"(\d+) (?:de |d')?(\w+[a-z]\w+) de (\d+)", text)
if m:
- year = int(m.group(3))
+ year = int(m[3])
try:
- month = self.MONTHNUMBER[m.group(2).lower()]
+ month = self.MONTHNUMBER[m[2].lower()]
except KeyError:
- raise ValueError(f"Don't know month {m.group(2)}")
- day = int(m.group(1))
+ raise ValueError(f"Don't know month {m[2]}")
+ day = int(m[1])
m = re.match(r'(\w*[a-zA-Z]\w*)\.? (\d+)$', text)
if m:
- year = int(m.group(2))
+ year = int(m[2])
try:
- month = self.MONTHNUMBER[m.group(1).lower()]
+ month = self.MONTHNUMBER[m[1].lower()]
except KeyError:
- raise ValueError(f"Don't know month {m.group(1)}")
+ raise ValueError(f"Don't know month {m[1]}")
m = re.match(r'(\w+)\.? (\d{1,2})(?:st|nd|rd|th)?\.?\s*,\s*(\d{3,4})$',
text)
if m:
- year = int(m.group(3))
+ year = int(m[3])
try:
- month = self.MONTHNUMBER[m.group(1).lower()]
+ month = self.MONTHNUMBER[m[1].lower()]
except KeyError:
- raise ValueError(f"Don't know month {m.group(1)}")
- day = int(m.group(2))
+ raise ValueError(f"Don't know month {m[1]}")
+ day = int(m[2])
m = re.match(r'(\d{4}),? (\d{1,2}) (\w+)', text)
if m:
- year = int(m.group(1))
+ year = int(m[1])
try:
- month = self.MONTHNUMBER[m.group(3).lower()]
+ month = self.MONTHNUMBER[m[3].lower()]
except KeyError:
- raise ValueError(f"Don't know month {m.group(1)}")
- day = int(m.group(2))
+ raise ValueError(f"Don't know month {m[1]}")
+ day = int(m[2])
m = re.match(r'(\d+)年(\d+)月(\d+)日', text)
if m:
- year = int(m.group(1))
- month = int(m.group(2))
- day = int(m.group(3))
+ year = int(m[1])
+ month = int(m[2])
+ day = int(m[3])
m = re.match(r'(\d+)年$', text)
if m:
- year = int(m.group(1))
+ year = int(m[1])
if day == 0:
day = None
if day is None and month == 0:
@@ -716,8 +716,8 @@

def createquantityclaim(self, text):
m = re.match(r'(\d+(?:\.\d+)?)\s*([a-z]\w*)', text.replace(',', '.'))
- amount = m.group(1)
- name = m.group(2).lower()
+ amount = m[1]
+ name = m[2].lower()
return pywikibot.WbQuantity(amount,
unit=pywikibot.ItemPage(
self.site, self.QUANTITYTYPE[name]),
@@ -1125,7 +1125,7 @@
if value.startswith('!date!'):
value = value[6:]
if value.startswith('!q!'):
- value = re.search(r'\d+(?:\.\d+)?', value).group(0)
+ value = re.search(r'\d+(?:\.\d+)?', value)[0]
elif value.startswith('!i!'):
value = value[3:].strip()

@@ -1330,7 +1330,7 @@
if self.hrtre:
match = re.compile('(?s)' + self.hrtre).search(self.html)
if match:
- text = match.group(1)
+ text = match[1]
text = text.replace('\\n', '\n')
text = text.replace('\\t', '\t')
text = text.replace('\\r', '\n')
@@ -1981,16 +1981,16 @@
if dtype:
alt = [dtype] + alt
for alttype in alt:
- if self.getdata(alttype, m.group(1), ask=False) \
- and self.getdata(alttype, m.group(1), ask=False) != 'XXX':
- return self.getdata(alttype, m.group(1), ask=False)
+ if self.getdata(alttype, m[1], ask=False) \
+ and self.getdata(alttype, m[1], ask=False) != 'XXX':
+ return self.getdata(alttype, m[1], ask=False)
for skip in skips:
- if self.getdata(skip, m.group(1), ask=False) \
- and self.getdata(skip, m.group(1), ask=False) != 'XXX':
+ if self.getdata(skip, m[1], ask=False) \
+ and self.getdata(skip, m[1], ask=False) != 'XXX':
return None
if dtype:
- return self.getdata(dtype, m.group(1))
- return m.group(1)
+ return self.getdata(dtype, m[1])
+ return m[1]

def findallbyre(self, regex, html, dtype=None, skips=None,
alt=None) -> List[str]:
@@ -2552,9 +2552,7 @@
m = re.match(r'(\d+)[/\-](\d+)[/\-](\d+)', result)
if m:
result = '{}-{}-{}'.format(
- m.group(2),
- m.group(1),
- m.group(3) if len(m.group(3)) > 2 else '19' + m.group(3)
+ m[2], m[1], m[3] if len(m[3]) > 2 else '19' + m[3]
)
return result

@@ -2573,9 +2571,7 @@
m = re.match(r'(\d+)[/\-](\d+)[/\-](\d+)', result)
if m:
result = '{}-{}-{}'.format(
- m.group(2),
- m.group(1),
- m.group(3) if len(m.group(3)) > 2 else '19' + m.group(3)
+ m[2], m[1], m[3] if len(m[3]) > 2 else '19' + m[3]
)
return result

@@ -13885,21 +13881,22 @@

def prepare(self, html: str):
def reworkwikilink(wikipart):
- parts = wikipart.group(1).split('|')
+ parts = wikipart[1].split('|')
return '[[{}]]'.format(parts[0] if ':' in parts[0] else parts[-1])

if not html:
return None
+
f = codecs.open('result.html', 'w', 'utf-8')
f.write(html)
f.close()
- html = re.search(self.mainRE, html).group(1)
+ html = re.search(self.mainRE, html)[1]
html = re.sub(r'{{nowrap\|([^{}]*)}}', r'\1', html)
return re.sub(r'\[\[([^\[\]]*)\]\]', reworkwikilink, html)

@staticmethod
def excludetemplatelight(text):
- templatetype = re.search('([^{|]*)', text).group(0).lower().strip()
+ templatetype = re.search('([^{|]*)', text)[0].lower().strip()
firstword = templatetype.split()[0]
lastword = templatetype.split()[-1]
return (
@@ -13996,7 +13993,7 @@

@staticmethod
def excludetemplate(text):
- templatetype = re.search('([^{|]+)', text).group(0).lower().strip()
+ templatetype = re.search('([^{|]+)', text)[0].lower().strip()
firstword = templatetype.split()[0]
lastword = templatetype.split()[-1]
return (
diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py
index e06e11b..98cbc59 100755
--- a/scripts/fixing_redirects.py
+++ b/scripts/fixing_redirects.py
@@ -84,16 +84,15 @@
# Make sure that next time around we will not find this same hit.
curpos = m.start() + 1

- is_interwikilink = mysite.isInterwikiLink(m.group('title'))
+ is_interwikilink = mysite.isInterwikiLink(m['title'])

# ignore interwiki links, links in the disabled area
# and links to sections of the same page
- if (m.group('title').strip() == ''
+ if (m['title'].strip() == ''
or is_interwikilink
or isDisabled(text, m.start())):
continue
- actual_link_page = pywikibot.Page(target_page.site,
- m.group('title'))
+ actual_link_page = pywikibot.Page(target_page.site, m['title'])
# Check whether the link found is to page.
try:
actual_link_page.title()
@@ -105,22 +104,22 @@

# The link looks like this:
# [[page_title|link_text]]trailing_chars
- page_title = m.group('title')
- link_text = m.group('label')
+ page_title = m['title']
+ link_text = m['label']

if not link_text:
# or like this: [[page_title]]trailing_chars
link_text = page_title
- if m.group('section') is None:
+ if m['section'] is None:
section = ''
else:
- section = m.group('section')
+ section = m['section']
if section and target_page.section():
pywikibot.warning(
'Source section {} and target section {} found. '
'Skipping.'.format(section, target_page))
continue
- trailing_chars = m.group('linktrail')
+ trailing_chars = m['linktrail']
if trailing_chars:
link_text += trailing_chars

diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py
index 2e589fd..e50668e 100755
--- a/scripts/harvest_template.py
+++ b/scripts/harvest_template.py
@@ -391,7 +391,7 @@
# Try to extract a valid page
for match in pywikibot.link_regex.finditer(value):
matched = True
- link_text = match.group(1)
+ link_text = match[1]
linked_item = self.template_link_target(item, site, link_text)
if linked_item:
yield linked_item
@@ -431,8 +431,8 @@
start, end = match.span()
since_prev_match = value[prev_end:start]

- title = match.group('title').strip()
- text = match.group(2)
+ title = match['title'].strip()
+ text = match[2]
if text:
text = text[1:].strip() # remove '|'

@@ -480,7 +480,7 @@
.. versionadded:: 7.5
"""
for match in self.linkR.finditer(value):
- yield match.group('url')
+ yield match['url']

@staticmethod
def handle_commonsmedia(value, site,
diff --git a/scripts/misspelling.py b/scripts/misspelling.py
index 71cca13..e72fd16 100755
--- a/scripts/misspelling.py
+++ b/scripts/misspelling.py
@@ -122,7 +122,7 @@
# misspelling is ambiguous, see for example:
# https://de.wikipedia.org/wiki/Buthan
for match in self.linkR.finditer(correct_spelling):
- self.opt.pos.append(match.group('title'))
+ self.opt.pos.append(match['title'])

if not self.opt.pos:
# There were no links in the parameter, so there is
diff --git a/scripts/noreferences.py b/scripts/noreferences.py
index 96570b6..7460069 100755
--- a/scripts/noreferences.py
+++ b/scripts/noreferences.py
@@ -642,7 +642,7 @@
'Adding references section before {} section...\n'
.format(section))
index = match.start()
- ident = match.group('ident')
+ ident = match['ident']
return self.createReferenceSection(oldText, index,
ident)
else:
diff --git a/scripts/pagefromfile.py b/scripts/pagefromfile.py
index 0685092..4a564d5 100755
--- a/scripts/pagefromfile.py
+++ b/scripts/pagefromfile.py
@@ -248,12 +248,12 @@
if self.opt.include:
contents = location.group()
else:
- contents = location.group(1)
+ contents = location[1]

title = self.opt.title
if not title:
try:
- title = title_regex.search(contents).group(1)
+ title = title_regex.search(contents)[1]
if self.opt.notitle:
# Remove title (to allow creation of redirects)
contents = title_regex.sub('', contents, count=1)
diff --git a/scripts/redirect.py b/scripts/redirect.py
index 4cc7790..ad2847b 100755
--- a/scripts/redirect.py
+++ b/scripts/redirect.py
@@ -168,7 +168,7 @@

m = redirR.match(entry.text)
if m:
- target = m.group(1)
+ target = m[1]
# There might be redirects to another wiki. Ignore these.
target_link = pywikibot.Link(target, self.site)
try:
diff --git a/scripts/reflinks.py b/scripts/reflinks.py
index 6ef24d6..9fbd2f5 100755
--- a/scripts/reflinks.py
+++ b/scripts/reflinks.py
@@ -320,11 +320,11 @@

# Parse references
for match in self.REFS.finditer(text):
- content = match.group('content')
+ content = match['content']
if not content.strip():
continue

- params = match.group('params')
+ params = match['params']
group = self.GROUPS.search(params) or ''
if group not in found_refs:
found_refs[group] = {}
@@ -338,8 +338,8 @@

found = self.NAMES.search(params)
if found:
- quoted = found.group('quote') in ['"', "'"]
- name = found.group('name')
+ quoted = found['quote'] in ['"', "'"]
+ name = found['name']

if not v[IX.name]:
# First name associated with this content
@@ -555,12 +555,12 @@
# for each link to change
for match in linksInRef.finditer(raw_text):

- link = match.group('url')
+ link = match['url']
if 'jstor.org' in link:
# TODO: Clean URL blacklist
continue

- ref = RefLink(link, match.group('name'), site=self.site)
+ ref = RefLink(link, match['name'], site=self.site)

try:
r = comms.http.fetch(
diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py
index 2d57096..bc30d27 100755
--- a/scripts/solve_disambiguation.py
+++ b/scripts/solve_disambiguation.py
@@ -762,7 +762,7 @@
for line in page.text.splitlines():
found = reg.match(line)
if found:
- yield found.group(1)
+ yield found[1]

def firstize(self, page, links) -> List[pywikibot.Page]:
"""Call firstlinks and remove extra links.
@@ -893,8 +893,7 @@
# Ensure that next time around we will not find this same hit.
curpos = m.start() + 1
try:
- foundlink = pywikibot.Link(m.group('title'),
- disamb_page.site)
+ foundlink = pywikibot.Link(m['title'], disamb_page.site)
foundlink.parse()
except Error:
continue
@@ -911,7 +910,7 @@
except Error:
# must be a broken link
pywikibot.log('Invalid link [[{}]] in page [[{}]]'
- .format(m.group('title'), ref_page.title()))
+ .format(m['title'], ref_page.title()))
continue

n += 1
@@ -989,19 +988,19 @@

# The link looks like this:
# [[page_title|link_text]]trailing_chars
- page_title = m.group('title')
- link_text = m.group('label')
+ page_title = m['title']
+ link_text = m['label']

if not link_text:
# or like this: [[page_title]]trailing_chars
link_text = page_title

- if m.group('section') is None:
+ if m['section'] is None:
section = ''
else:
- section = m.group('section')
+ section = m['section']

- trailing_chars = m.group('linktrail')
+ trailing_chars = m['linktrail']
if trailing_chars:
link_text += trailing_chars

diff --git a/scripts/upload.py b/scripts/upload.py
index 1a0d137..c654090 100755
--- a/scripts/upload.py
+++ b/scripts/upload.py
@@ -75,10 +75,10 @@
if not match:
pywikibot.error('Chunk size parameter is not valid.')
chunk_size = 0
- elif match.group(1): # number was in there
- base = float(match.group(1))
- if match.group(2): # suffix too
- suffix = match.group(2).lower()
+ elif match[1]: # number was in there
+ base = float(match[1])
+ if match[2]: # suffix too
+ suffix = match[2].lower()
if suffix == 'k':
suffix = 1000
elif suffix == 'm':
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 0f1dccd..65c3790 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -226,10 +226,10 @@
text = textlib.removeDisabledParts(text)
link_regex = textlib.compileLinkR(without_bracketed, only_bracketed)
for m in link_regex.finditer(text):
- if m.group('url'):
- yield m.group('url')
+ if m['url']:
+ yield m['url']
else:
- yield m.group('urlb')
+ yield m['urlb']


XmlDumpPageGenerator = partial(
diff --git a/tests/api_tests.py b/tests/api_tests.py
index 22753f2..e945abd 100755
--- a/tests/api_tests.py
+++ b/tests/api_tests.py
@@ -1004,7 +1004,7 @@
for info, time in patterns.items():
lag = api._requests.lagpattern.search(info)
self.assertIsNotNone(lag)
- self.assertEqual(float(lag.group('lag')), time)
+ self.assertEqual(float(lag['lag']), time)


if __name__ == '__main__': # pragma: no cover
diff --git a/tests/aspects.py b/tests/aspects.py
index b20b270..36709de 100644
--- a/tests/aspects.py
+++ b/tests/aspects.py
@@ -1440,7 +1440,7 @@
deprecation_messages = self.deprecation_messages
for deprecation_message in deprecation_messages:
match = self._generic_match.match(deprecation_message)
- if (match and bool(match.group(1)) == (msg is self.INSTEAD)
+ if (match and bool(match[1]) == (msg is self.INSTEAD)
or msg is None):
break
else:
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index be98cdd..c92e84d 100755
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -630,13 +630,13 @@
for pattern in patterns:
m = func(pattern)
self.assertIsNotNone(m)
- self.assertIsNotNone(m.group(0))
- self.assertIsNone(m.group('name'))
- self.assertIsNone(m.group(1))
- self.assertIsNone(m.group('params'))
- self.assertIsNone(m.group(2))
- self.assertIsNotNone(m.group('unhandled_depth'))
- self.assertTrue(m.group(0).endswith('foo {{bar}}'))
+ self.assertIsNotNone(m[0])
+ self.assertIsNone(m['name'])
+ self.assertIsNone(m[1])
+ self.assertIsNone(m['params'])
+ self.assertIsNone(m[2])
+ self.assertIsNotNone(m['unhandled_depth'])
+ self.assertTrue(m[0].endswith('foo {{bar}}'))


class TestDisabledParts(DefaultDrySiteTestCase):
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index 4929296..65bb9a8 100755
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -285,8 +285,7 @@

txt_match = self.date + '<!--a test comment-->'
exp_match = 'a test comment'
- self.assertEqual(ts._comment_pat.search(txt_match).group(1),
- exp_match)
+ self.assertEqual(ts._comment_pat.search(txt_match)[1], exp_match)

def test_timestripper_match_hyperlink(self):
"""Test that hyperlinks are correctly matched."""
@@ -304,9 +303,9 @@
txt_match = '[[wikilink|a wikilink with no date]]'
exp_match_link = 'wikilink'
exp_match_anchor = '|a wikilink with no date'
- self.assertEqual(ts._wikilink_pat.search(txt_match).group('link'),
+ self.assertEqual(ts._wikilink_pat.search(txt_match)['link'],
exp_match_link)
- self.assertEqual(ts._wikilink_pat.search(txt_match).group('anchor'),
+ self.assertEqual(ts._wikilink_pat.search(txt_match)['anchor'],
exp_match_anchor)

def test_timestripper_match_comment_with_date(self):

To view, visit change 836138. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I2e3e4073d317eb23b7ba040642e82edfbe471cdd
Gerrit-Change-Number: 836138
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged