jenkins-bot has submitted this change and it was merged.
Change subject: [IMPROV] Redirect: Use dynamic interwiki prefix
......................................................................
[IMPROV] Redirect: Use dynamic interwiki prefix
Instead of guessing that the interwiki prefix is the same as a language
in the family, it is parsing it using the Link class. This also
separates the link label, section and automatically chooses the correct
capitalization.
Change-Id: I36e5c0e9d3ca0af6813a70851159445bc3285253
---
M scripts/redirect.py
1 file changed, 27 insertions(+), 30 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/redirect.py b/scripts/redirect.py
index 225d231..c29fa88 100755
--- a/scripts/redirect.py
+++ b/scripts/redirect.py
@@ -82,6 +82,12 @@
from pywikibot import i18n, xmlreader, Bot
+def space_to_underscore(link):
+ """Convert spaces to underscore."""
+ # previous versions weren't expecting spaces but underscores
+ return link.canonical_title().replace(' ', '_')
+
+
class RedirectGenerator:
"""Redirect generator."""
@@ -130,45 +136,36 @@
not in self.namespaces:
continue
if alsoGetPageTitles:
- pageTitles.add(entry.title.replace(' ', '_'))
+ pageTitles.add(space_to_underscore(pywikibot.Link(entry.title,
self.site)))
m = redirR.match(entry.text)
if m:
target = m.group(1)
# There might be redirects to another wiki. Ignore these.
- for code in self.site.family.langs.keys():
- if target.startswith('%s:' % code) \
- or target.startswith(':%s:' % code):
- if code == self.site.language():
- # link to our wiki, but with the lang prefix
- target = target[(len(code) + 1):]
- if target.startswith(':'):
- target = target[1:]
- else:
- pywikibot.output(
- u'NOTE: Ignoring %s which is a redirect to %s:'
- % (entry.title, code))
- target = None
- break
+ target_link = pywikibot.Link(target, self.site)
+ try:
+ target_link.parse()
+ except pywikibot.SiteDefinitionError as e:
+ pywikibot.log(e)
+ pywikibot.output(
+ u'NOTE: Ignoring {0} which is a redirect ({1}) to an '
+ u'unknown site.'.format(entry.title, target))
+ target_link = None
+ else:
+ if target_link.site != self.site:
+ pywikibot.output(
+ u'NOTE: Ignoring {0} which is a redirect to '
+ u'another site {1}.'.format(entry.title,
target_link.site))
+ target_link = None
# if the redirect does not link to another wiki
- if target:
- source = entry.title.replace(' ', '_')
- target = target.replace(' ', '_')
- # remove leading and trailing whitespace
- target = target.strip('_')
- # capitalize the first letter
- if not pywikibot.Site().nocapitalize:
- source = source[:1].upper() + source[1:]
- target = target[:1].upper() + target[1:]
- if '#' in target:
- target =
target[:target.index('#')].rstrip("_")
- if '|' in target:
+ if target_link and target_link.title:
+ source = pywikibot.Link(entry.title, self.site)
+ if target_link.anchor:
pywikibot.output(
u'HINT: %s is a redirect with a pipelink.'
% entry.title)
- target =
target[:target.index('|')].rstrip("_")
- if target: # in case preceding steps left nothing
- redict[source] = target
+ redict[space_to_underscore(source)] = (
+ space_to_underscore(target_link))
if alsoGetPageTitles:
return redict, pageTitles
else:
--
To view, visit
https://gerrit.wikimedia.org/r/190658
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I36e5c0e9d3ca0af6813a70851159445bc3285253
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>