Revision: 3928 Author: wikipedian Date: 2007-08-01 00:30:33 +0000 (Wed, 01 Aug 2007)
Log Message: ----------- made it possible to use all the typical parameters such as -ref:, -links:, -file:, and -weblink:. added -namespace: parameter.
Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2007-07-31 16:03:31 UTC (rev 3927) +++ trunk/pywikipedia/weblinkchecker.py 2007-08-01 00:30:33 UTC (rev 3928) @@ -9,7 +9,7 @@
The bot will store all links found dead in a .dat file in the deadlinks subdirectory. To avoid the removing of links which are only temporarily -unavailable, the bot only reports links which were reported dead at least +unavailable, the bot ONLY reports links which were reported dead at least two times, with a time lag of at least one week. Such links will be logged to a .txt file in the deadlinks subdirectory.
@@ -29,7 +29,7 @@ python weblinkchecker.py -start:Example_page Loads all wiki pages using the Special:Allpages feature, starting at "Example page" - + python weblinkchecker.py Example page Only checks links found in the wiki page "Example page" """ @@ -91,22 +91,6 @@ re.compile('.*[./@]berlinonline.de(/.*)?'), # a de: user wants to fix them by hand and doesn't want them to be deleted, see [[de:Benutzer:BLueFiSH.as/BZ]]. ]
-class Global(object): - talk = config.report_dead_links_on_talk - - def handleArgs(self, args): - unhandledArguments = [] - for arg in args: - if arg == '-talk': - self.talk = True - elif arg == '-notalk': - self.talk = False - else: - unhandledArguments.append(arg) - return unhandledArguments - -globalvar = Global() - def weblinksIn(text, withoutBracketed = False, onlyBracketed = False): text = wikipedia.removeDisabledParts(text)
@@ -546,10 +530,9 @@ Robot which will use several LinkCheckThreads at once to search for dead weblinks on pages provided by the given generator. ''' - def __init__(self, generator, start ='!'): + def __init__(self, generator): self.generator = generator - self.start = start - if globalvar.talk: + if config.report_dead_links_on_talk: #wikipedia.output("Starting talk page thread") reportThread = DeadLinkReportThread() # thread dies when program terminates @@ -598,25 +581,37 @@
def main(): gen = None - start = '!' - pageTitle = [] - args = wikipedia.handleArgs() - args = globalvar.handleArgs(args) - - for arg in args: - if arg.startswith('-start:'): - start = arg[7:] + singlePageTitle = [] + # Which namespaces should be processed? + # default to [] which means all namespaces will be processed + namespaces = [] + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + + for arg in wikipedia.handleArgs(): + if arg == '-talk': + config.report_dead_links_on_talk = True + elif arg == '-notalk': + config.report_dead_links_on_talk = False + elif arg.startswith('-namespace:'): + namespaces.append(int(arg[11:])) else: - pageTitle.append(arg) + generator = genFactory.handleArg(arg) + if generator: + gen = generator + else: + singlePageTitle.append(arg)
- if pageTitle: - pageTitle = ' '.join(pageTitle) - page = wikipedia.Page(wikipedia.getSite(), pageTitle) + if singlePageTitle: + singlePageTitle = ' '.join(singlePageTitle) + page = wikipedia.Page(wikipedia.getSite(), singlePageTitle) gen = iter([page]) - else: - gen = pagegenerators.AllpagesPageGenerator(start)
if gen: + if namespaces != []: + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) gen = pagegenerators.PreloadingGenerator(gen, pageNumber = 240) gen = pagegenerators.RedirectFilterPageGenerator(gen) bot = WeblinkCheckerRobot(gen) @@ -651,7 +646,7 @@ bot.history.save() else: wikipedia.showHelp() - + if __name__ == "__main__": try: main()
pywikipedia-l@lists.wikimedia.org