Revision: 5972 Author: a_engels Date: 2008-10-15 07:20:05 +0000 (Wed, 15 Oct 2008)
Log Message: ----------- adding option -min, allowing one to work only on disambiguation pages with a certain minimum number of links.
Modified Paths: -------------- trunk/pywikipedia/solve_disambiguation.py
Modified: trunk/pywikipedia/solve_disambiguation.py =================================================================== --- trunk/pywikipedia/solve_disambiguation.py 2008-10-15 07:18:52 UTC (rev 5971) +++ trunk/pywikipedia/solve_disambiguation.py 2008-10-15 07:20:05 UTC (rev 5972) @@ -53,6 +53,9 @@ that is defined (to the bot) as the category containing disambiguation pages, starting at XY. If only '-start' or '-start:' is given, it starts at the beginning. + + -min:XX (XX being a number) only work on disambiguation pages for which + at least XX are to be worked on.
To complete a move of a page, one can use:
@@ -395,12 +398,13 @@ return linkupper
class ReferringPageGeneratorWithIgnore: - def __init__(self, disambPage, primary=False): + def __init__(self, disambPage, primary=False, minimum = 0): self.disambPage = disambPage # if run with the -primary argument, enable the ignore manager self.primaryIgnoreManager = PrimaryIgnoreManager(disambPage, enabled=primary) - + self.minimum = minimum + def __iter__(self): # TODO: start yielding before all referring pages have been found refs = [page for page in self.disambPage.getReferences(follow_redirects = False, withTemplateInclusion = False)] @@ -417,6 +421,9 @@ elif self.primaryIgnoreManager.isIgnored(refs[i]): #wikipedia.output('Ignoring page %s because it was skipped before' % refs[i].title()) del refs[i] + if len(refs) < self.minimum: + wikipedia.output(u"Found only %d pages to work on; skipping." % len(refs)) + return wikipedia.output(u"Will work on %d pages." % len(refs)) for ref in refs: yield ref @@ -489,13 +496,14 @@ 'hu': u'Egyért-redir', }
- def __init__(self, always, alternatives, getAlternatives, generator, primary, main_only): + def __init__(self, always, alternatives, getAlternatives, generator, primary, main_only, minimum = 0): self.always = always self.alternatives = alternatives self.getAlternatives = getAlternatives self.generator = generator self.primary = primary self.main_only = main_only + self.minimum = minimum
self.mysite = wikipedia.getSite() self.mylang = self.mysite.language() @@ -898,7 +906,7 @@ self.alternatives.sort() self.listAlternatives()
- gen = ReferringPageGeneratorWithIgnore(disambPage, self.primary) + gen = ReferringPageGeneratorWithIgnore(disambPage, self.primary, minimum = self.minimum) preloadingGen = pagegenerators.PreloadingGenerator(gen) for refPage in preloadingGen: if not self.primaryIgnoreManager.isIgnored(refPage): @@ -926,6 +934,7 @@
# For sorting the linked pages, case can be ignored ignoreCase = False + minimum = 0
for arg in wikipedia.handleArgs(): if arg.startswith('-primary:'): @@ -958,6 +967,8 @@ getAlternatives = False elif arg == '-main': main_only = True + elif arg.startswith('-min:'): + minimum = int(arg[5:]) elif arg.startswith('-start'): try: if len(arg) <= len('-start:'): @@ -989,7 +1000,7 @@ page = wikipedia.Page(wikipedia.getSite(), pageTitle) generator = iter([page])
- bot = DisambiguationRobot(always, alternatives, getAlternatives, generator, primary, main_only) + bot = DisambiguationRobot(always, alternatives, getAlternatives, generator, primary, main_only, minimum = minimum) bot.run()
pywikipedia-l@lists.wikimedia.org