jenkins-bot has submitted this change and it was merged.
Change subject: solve_disambiguation.py: Correct the way main namespace is detected
......................................................................
solve_disambiguation.py: Correct the way main namespace is detected
Previously, passing "-main" option caused the "ignore_title" dict to
extend
and include namespace numbers. But those numbers never match a page.title().
Hence the non-main-namespace pages could not be detected.
In the new implementation the "ignore_title" is not extended, instead
"namespaces=0" is passed as a keyword argument to getReferences.
Bug:T105892
Change-Id: Icde898746dd77309d3e42b2da6c40f9bc4ab1194
---
M scripts/solve_disambiguation.py
1 file changed, 18 insertions(+), 17 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py
index 0753773..2d92b05 100755
--- a/scripts/solve_disambiguation.py
+++ b/scripts/solve_disambiguation.py
@@ -81,7 +81,6 @@
#
import codecs
-import itertools
import os
import re
@@ -362,18 +361,23 @@
"""Referring Page generator, with an ignore
manager."""
- def __init__(self, disambPage, primary=False, minimum=0):
+ def __init__(self, disambPage, primary=False, minimum=0, main_only=False):
self.disambPage = disambPage
# if run with the -primary argument, enable the ignore manager
self.primaryIgnoreManager = PrimaryIgnoreManager(disambPage,
enabled=primary)
self.minimum = minimum
+ self.main_only = main_only
def __iter__(self):
# TODO: start yielding before all referring pages have been found
- refs = [page for page in
- self.disambPage.getReferences(follow_redirects=False,
- withTemplateInclusion=False)]
+ refs = [
+ page for page in self.disambPage.getReferences(
+ follow_redirects=False,
+ withTemplateInclusion=False,
+ namespaces=0 if self.main_only else None
+ )
+ ]
pywikibot.output(u"Found %d references." % len(refs))
# Remove ignorables
if self.disambPage.site.family.name in ignore_title and \
@@ -669,8 +673,9 @@
[('yes', 'y'), ('no', 'n'),
('change redirect', 'c')], 'n',
automatic_quit=False)
if choice == 'y':
- gen = ReferringPageGeneratorWithIgnore(refPage,
- self.primary)
+ gen = ReferringPageGeneratorWithIgnore(
+ refPage, self.primary, main_only=self.main_only
+ )
preloadingGen = pagegenerators.PreloadingGenerator(gen)
for refPage2 in preloadingGen:
# run until the user selected 'quit'
@@ -1021,14 +1026,6 @@
'count': len(new_targets)})
def run(self):
- if self.main_only:
- if self.mysite.family.name not in ignore_title:
- ignore_title[self.mysite.family.name] = {}
- if self.mylang not in ignore_title[self.mysite.family.name]:
- ignore_title[self.mysite.family.name][self.mylang] = []
-
- ignore_title[self.mysite.family.name][self.mylang] += [
- '%s:' % ns for ns in itertools.chain(self.mysite.namespaces)]
for disambPage in self.generator:
self.primaryIgnoreManager = PrimaryIgnoreManager(
@@ -1046,8 +1043,12 @@
self.alternatives.sort()
SequenceOutputter(self.alternatives).output()
- gen = ReferringPageGeneratorWithIgnore(disambPage, self.primary,
- minimum=self.minimum)
+ gen = ReferringPageGeneratorWithIgnore(
+ disambPage,
+ self.primary,
+ minimum=self.minimum,
+ main_only=self.main_only
+ )
preloadingGen = pagegenerators.PreloadingGenerator(gen)
for refPage in preloadingGen:
if not self.primaryIgnoreManager.isIgnored(refPage):
--
To view, visit
https://gerrit.wikimedia.org/r/268620
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Icde898746dd77309d3e42b2da6c40f9bc4ab1194
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Dalba <dalba.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>