jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/674613 )
Change subject: [bugfix] Avoid dupliate reference names ......................................................................
[bugfix] Avoid dupliate reference names
The old implementation failed if there were autogenerated references not starting with 1. Now an iterator was implemented which creates the next unused number (upto 999 but this should be enough)
Other improvements: - always use double quotes with references - modify pattern to find single quotes in reference names - remove regext o clean title (\s includes [\n\r\t]) - rename self.NAMES.match(params) to found - use removeDisabledPart function only once
Bug: T278040 Change-Id: Ie082dea4334e62f6818a208b86704b3a8afcd0ad --- M scripts/reflinks.py 1 file changed, 25 insertions(+), 21 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/reflinks.py b/scripts/reflinks.py index f21b808..205f5c8 100755 --- a/scripts/reflinks.py +++ b/scripts/reflinks.py @@ -47,6 +47,7 @@ import subprocess import tempfile
+from contextlib import suppress from functools import partial from textwrap import shorten from urllib.error import URLError @@ -56,6 +57,7 @@ import pywikibot
from pywikibot import comms, i18n, pagegenerators, textlib +from pywikibot.backports import removeprefix from pywikibot.bot import ExistingPageBot, NoRedirectPageBot, SingleSiteBot from pywikibot import config2 as config from pywikibot.pagegenerators import ( @@ -233,7 +235,6 @@ self.title = re.sub(r'[.+-=]{4,}', ' ', self.title) # remove \n and \r and unicode spaces from titles self.title = re.sub(r'\s', ' ', self.title) - self.title = re.sub(r'[\n\r\t]', ' ', self.title) # remove extra whitespaces # remove leading and trailing ./;/,/-/_/+/ / self.title = re.sub(r' +', ' ', self.title.strip(r'=.;,-+_ ')) @@ -285,10 +286,9 @@ # Match references self.REFS = re.compile( r'(?i)<ref(?P<params>[^>/]*)>(?P<content>.*?)</ref>') - self.NAMES = re.compile( - r'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.+)\s*(?P=quote).*') - self.GROUPS = re.compile( - r'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.+)\s*(?P=quote).*') + fmt = r'(?i).*{0}\s*=\s*(?P<quote>["']?)\s*(?P<{0}>.+)\s*(?P=quote).*' + self.NAMES = re.compile(fmt.format('name')) + self.GROUPS = re.compile(fmt.format('group')) self.autogen = i18n.twtranslate(site, 'reflinks-autogen')
def process(self, text): @@ -320,10 +320,10 @@ else: v = [None, [match.group()], False, False]
- name = self.NAMES.match(params) - if name: - quoted = name.group('quote') == '"' - name = name.group('name') + found = self.NAMES.match(params) + if found: + quoted = found.group('quote') in ['"', "'"] + name = found.group('name') if v[0]: if v[0] != name: named_repl[name] = [v[0], v[2]] @@ -344,14 +344,20 @@ found_ref_names[name] = 1 groupdict[content] = v
- id_ = 1 - while self.autogen + str(id_) in found_ref_names: - id_ += 1 + used_numbers = set() + for name in found_ref_names: + number = removeprefix(name, self.autogen) + with suppress(ValueError): + used_numbers.add(int(number)) + + # iterator to give the next free number + free_number = iter({str(i) for i in range(1, 1000) # should be enough + if i not in used_numbers})
for (g, d) in found_refs.items(): group = '' if g: - group = 'group="{}" '.format(group) + group = 'group="{}" '.format(group)
for (k, v) in d.items(): if len(v[1]) == 1 and not v[3]: @@ -359,10 +365,9 @@
name = v[0] if not name: - name = '"{}{}"'.format(self.autogen, id_) - id_ += 1 + name = '"{}{}"'.format(self.autogen, next(free_number)) elif v[2]: - name = '{!r}'.format(name) + name = '"{}"'.format(name)
named = '<ref {}name={}>{}</ref>'.format(group, name, k) text = text.replace(v[1][0], named, 1) @@ -384,10 +389,10 @@ # TODO : Support ref groups name = v[0] if v[1]: - name = '{!r}'.format(name) + name = '"{}"'.format(name)
text = re.sub( - '<ref name\s*=\s*(?P<quote>"?)\s*{}\s*(?P=quote)\s*/>' + r'<ref name\s*=\s*(?P<quote>["']?)\s*{}\s*(?P=quote)\s*/>' .format(k), '<ref name={} />'.format(name), text) return text @@ -518,10 +523,9 @@ """Process one page.""" # Load the page's text from the wiki new_text = page.text - + raw_text = textlib.removeDisabledParts(new_text) # for each link to change - for match in linksInRef.finditer( - textlib.removeDisabledParts(page.get())): + for match in linksInRef.finditer(raw_text):
link = match.group('url') if 'jstor.org' in link:
pywikibot-commits@lists.wikimedia.org