Revision: 3928
Author: wikipedian
Date: 2007-08-01 00:30:33 +0000 (Wed, 01 Aug 2007)
Log Message:
-----------
made it possible to use all the typical parameters such as -ref:,
-links:, -file:, and -weblink:.
added -namespace: parameter.
Modified Paths:
--------------
trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/weblinkchecker.py
===================================================================
--- trunk/pywikipedia/weblinkchecker.py 2007-07-31 16:03:31 UTC (rev 3927)
+++ trunk/pywikipedia/weblinkchecker.py 2007-08-01 00:30:33 UTC (rev 3928)
@@ -9,7 +9,7 @@
The bot will store all links found dead in a .dat file in the deadlinks
subdirectory. To avoid the removing of links which are only temporarily
-unavailable, the bot only reports links which were reported dead at least
+unavailable, the bot ONLY reports links which were reported dead at least
two times, with a time lag of at least one week. Such links will be logged to a
.txt file in the deadlinks subdirectory.
@@ -29,7 +29,7 @@
python weblinkchecker.py -start:Example_page
Loads all wiki pages using the Special:Allpages feature, starting at
"Example page"
-
+
python weblinkchecker.py Example page
Only checks links found in the wiki page "Example page"
"""
@@ -91,22 +91,6 @@
re.compile('.*[\./(a)]berlinonline.de(/.*)?'))?'), # a de: user wants to fix them
by hand and doesn't want them to be deleted, see [[de:Benutzer:BLueFiSH.as/BZ]].
]
-class Global(object):
- talk = config.report_dead_links_on_talk
-
- def handleArgs(self, args):
- unhandledArguments = []
- for arg in args:
- if arg == '-talk':
- self.talk = True
- elif arg == '-notalk':
- self.talk = False
- else:
- unhandledArguments.append(arg)
- return unhandledArguments
-
-globalvar = Global()
-
def weblinksIn(text, withoutBracketed = False, onlyBracketed = False):
text = wikipedia.removeDisabledParts(text)
@@ -546,10 +530,9 @@
Robot which will use several LinkCheckThreads at once to search for dead
weblinks on pages provided by the given generator.
'''
- def __init__(self, generator, start ='!'):
+ def __init__(self, generator):
self.generator = generator
- self.start = start
- if globalvar.talk:
+ if config.report_dead_links_on_talk:
#wikipedia.output("Starting talk page thread")
reportThread = DeadLinkReportThread()
# thread dies when program terminates
@@ -598,25 +581,37 @@
def main():
gen = None
- start = '!'
- pageTitle = []
- args = wikipedia.handleArgs()
- args = globalvar.handleArgs(args)
-
- for arg in args:
- if arg.startswith('-start:'):
- start = arg[7:]
+ singlePageTitle = []
+ # Which namespaces should be processed?
+ # default to [] which means all namespaces will be processed
+ namespaces = []
+ # This factory is responsible for processing command line arguments
+ # that are also used by other scripts and that determine on which pages
+ # to work on.
+ genFactory = pagegenerators.GeneratorFactory()
+
+ for arg in wikipedia.handleArgs():
+ if arg == '-talk':
+ config.report_dead_links_on_talk = True
+ elif arg == '-notalk':
+ config.report_dead_links_on_talk = False
+ elif arg.startswith('-namespace:'):
+ namespaces.append(int(arg[11:]))
else:
- pageTitle.append(arg)
+ generator = genFactory.handleArg(arg)
+ if generator:
+ gen = generator
+ else:
+ singlePageTitle.append(arg)
- if pageTitle:
- pageTitle = ' '.join(pageTitle)
- page = wikipedia.Page(wikipedia.getSite(), pageTitle)
+ if singlePageTitle:
+ singlePageTitle = ' '.join(singlePageTitle)
+ page = wikipedia.Page(wikipedia.getSite(), singlePageTitle)
gen = iter([page])
- else:
- gen = pagegenerators.AllpagesPageGenerator(start)
if gen:
+ if namespaces != []:
+ gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
gen = pagegenerators.PreloadingGenerator(gen, pageNumber = 240)
gen = pagegenerators.RedirectFilterPageGenerator(gen)
bot = WeblinkCheckerRobot(gen)
@@ -651,7 +646,7 @@
bot.history.save()
else:
wikipedia.showHelp()
-
+
if __name__ == "__main__":
try:
main()
Show replies by date