Revision: 5972
Author: a_engels
Date: 2008-10-15 07:20:05 +0000 (Wed, 15 Oct 2008)
Log Message:
-----------
adding option -min, allowing one to work only on disambiguation pages with a certain
minimum number of links.
Modified Paths:
--------------
trunk/pywikipedia/solve_disambiguation.py
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py 2008-10-15 07:18:52 UTC (rev 5971)
+++ trunk/pywikipedia/solve_disambiguation.py 2008-10-15 07:20:05 UTC (rev 5972)
@@ -53,6 +53,9 @@
that is defined (to the bot) as the category containing disambiguation
pages, starting at XY. If only '-start' or '-start:' is
given, it starts
at the beginning.
+
+ -min:XX (XX being a number) only work on disambiguation pages for which
+ at least XX are to be worked on.
To complete a move of a page, one can use:
@@ -395,12 +398,13 @@
return linkupper
class ReferringPageGeneratorWithIgnore:
- def __init__(self, disambPage, primary=False):
+ def __init__(self, disambPage, primary=False, minimum = 0):
self.disambPage = disambPage
# if run with the -primary argument, enable the ignore manager
self.primaryIgnoreManager = PrimaryIgnoreManager(disambPage,
enabled=primary)
-
+ self.minimum = minimum
+
def __iter__(self):
# TODO: start yielding before all referring pages have been found
refs = [page for page in self.disambPage.getReferences(follow_redirects = False,
withTemplateInclusion = False)]
@@ -417,6 +421,9 @@
elif self.primaryIgnoreManager.isIgnored(refs[i]):
#wikipedia.output('Ignoring page %s because it was skipped
before' % refs[i].title())
del refs[i]
+ if len(refs) < self.minimum:
+ wikipedia.output(u"Found only %d pages to work on; skipping." %
len(refs))
+ return
wikipedia.output(u"Will work on %d pages." % len(refs))
for ref in refs:
yield ref
@@ -489,13 +496,14 @@
'hu': u'Egyért-redir',
}
- def __init__(self, always, alternatives, getAlternatives, generator, primary,
main_only):
+ def __init__(self, always, alternatives, getAlternatives, generator, primary,
main_only, minimum = 0):
self.always = always
self.alternatives = alternatives
self.getAlternatives = getAlternatives
self.generator = generator
self.primary = primary
self.main_only = main_only
+ self.minimum = minimum
self.mysite = wikipedia.getSite()
self.mylang = self.mysite.language()
@@ -898,7 +906,7 @@
self.alternatives.sort()
self.listAlternatives()
- gen = ReferringPageGeneratorWithIgnore(disambPage, self.primary)
+ gen = ReferringPageGeneratorWithIgnore(disambPage, self.primary, minimum =
self.minimum)
preloadingGen = pagegenerators.PreloadingGenerator(gen)
for refPage in preloadingGen:
if not self.primaryIgnoreManager.isIgnored(refPage):
@@ -926,6 +934,7 @@
# For sorting the linked pages, case can be ignored
ignoreCase = False
+ minimum = 0
for arg in wikipedia.handleArgs():
if arg.startswith('-primary:'):
@@ -958,6 +967,8 @@
getAlternatives = False
elif arg == '-main':
main_only = True
+ elif arg.startswith('-min:'):
+ minimum = int(arg[5:])
elif arg.startswith('-start'):
try:
if len(arg) <= len('-start:'):
@@ -989,7 +1000,7 @@
page = wikipedia.Page(wikipedia.getSite(), pageTitle)
generator = iter([page])
- bot = DisambiguationRobot(always, alternatives, getAlternatives, generator, primary,
main_only)
+ bot = DisambiguationRobot(always, alternatives, getAlternatives, generator, primary,
main_only, minimum = minimum)
bot.run()