[Pywikipedia-l] SVN: [6276] branches/rewrite/pywikibot
russblau at svn.wikimedia.org
russblau at svn.wikimedia.org
Thu Jan 22 14:53:20 UTC 2009
Revision: 6276
Author: russblau
Date: 2009-01-22 14:53:20 +0000 (Thu, 22 Jan 2009)
Log Message:
-----------
Merge recent page generators changes from trunk
Modified Paths:
--------------
branches/rewrite/pywikibot/pagegenerators.py
branches/rewrite/pywikibot/scripts/category.py
branches/rewrite/pywikibot/scripts/replace.py
branches/rewrite/pywikibot/scripts/touch.py
Property Changed:
----------------
branches/rewrite/pywikibot/pagegenerators.py
Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py 2009-01-21 20:21:19 UTC (rev 6275)
+++ branches/rewrite/pywikibot/pagegenerators.py 2009-01-22 14:53:20 UTC (rev 6276)
@@ -77,6 +77,10 @@
config.py for instructions.
Argument can also be given as "-google:searchstring".
+-namespace Filter the page generator to only yield pages in the
+ specified namespaces. Separate multiple namespace
+ numbers with commas.
+
-interwiki Work on the given page and all equivalent pages in other
languages. This can, for example, be used to fight
multi-site spamming.
@@ -145,21 +149,43 @@
class GeneratorFactory(object):
"""Process command line arguments and return appropriate page generator."""
+ def __init__(self):
+ self.gens = []
+ self.namespaces = []
- def setCategoryGen(self, arg, length, recurse = False):
+ def getCombinedGenerator(self):
+ """Return the combination of all accumulated generators.
+
+ Only call this after all arguments have been parsed.
+
+ """
+ if len(self.gens) == 0:
+ return None
+ elif len(self.gens) == 1:
+ gensList = self.gens[0]
+ else:
+ gensList = CombinedPageGenerator(self.gens)
+ genToReturn = DuplicateFilterPageGenerator(gensList)
+ if self.namespaces:
+ genToReturn = NamespaceFilterPageGenerator(genToReturn, map(int, self.namespaces))
+ return genToReturn
+
+ def getCategoryGen(self, arg, length, recurse = False):
if len(arg) == length:
categoryname = pywikibot.input(u'Please enter the category name:')
else:
categoryname = arg[length + 1:]
ind = categoryname.find('|')
+ startfrom = None
if ind > 0:
startfrom = categoryname[ind + 1:]
categoryname = categoryname[:ind]
- else:
- startfrom = None
- cat = pywikibot.Category(pywikibot.Link('Category:%s' % categoryname))
+ cat = pywikibot.Category(pywikibot.Link(categoryname,
+ defaultNamespace=14))
+ # Link constructor automatically prepends localized namespace
+ # if not included in user's input
return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
def setSubCategoriesGen(self, arg, length, recurse=False):
@@ -175,10 +201,20 @@
else:
startfrom = None
- cat = pywikibot.Category(pywikibot.Link('Category:%s' % categoryname))
+ cat = pywikibot.Category(pywikibot.Link(categoryname,
+ defaultNamespace=14))
return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse)
def handleArg(self, arg):
+ """Parse one argument at a time.
+
+ If it is recognized as an argument that specifies a generator, a
+ generator is created and added to the accumulation list, and the
+ function returns true. Otherwise, it returns false, so that caller
+ can try parsing the argument. Call getCombinedGenerator() after all
+ arguments have been parsed to get the final output generator.
+
+ """
gen = None
if arg.startswith('-filelinks'):
fileLinksPageTitle = arg[11:]
@@ -224,14 +260,35 @@
textfilename = pywikibot.input(
u'Please enter the local file name:')
gen = TextfilePageGenerator(textfilename)
+ elif arg.startswith('-namespace'):
+ if len(arg) == len('-namespace'):
+ self.namespaces.append(
+ pywikibot.input(u'What namespace are you filtering on?'))
+ else:
+ self.namespaces.extend(arg[len('-namespace:'):].split(","))
+ return True
elif arg.startswith('-catr'):
- gen = self.setCategoryGen(arg, 5, recurse = True)
+ gen = self.getCategoryGen(arg, len("-catr"), recurse = True)
+ elif arg.startswith('-category'):
+ gen = self.getCategoryGen(arg, len('-category'))
elif arg.startswith('-cat'):
- gen = self.setCategoryGen(arg, 4)
+ gen = self.getCategoryGen(arg, len("-cat"))
elif arg.startswith('-subcatsr'):
gen = self.setSubCategoriesGen(arg, 9, recurse = True)
elif arg.startswith('-subcats'):
gen = self.setSubCategoriesGen(arg, 8)
+ elif arg.startswith('-page'):
+ if len(arg) == len('-page'):
+ gen = [pywikibot.Page(
+ pywikibot.Link(
+ pywikibot.input(
+ u'What page do you want to use?'),
+ pywikibot.getSite())
+ )]
+ else:
+ gen = [pywikibot.Page(pywikibot.Link(arg[len('-page:'):],
+ pywikibot.getSite())
+ )]
elif arg.startswith('-uncatfiles'):
gen = UnCategorizedImageGenerator()
elif arg.startswith('-uncatcat'):
@@ -265,9 +322,10 @@
if not transclusionPageTitle:
transclusionPageTitle = pywikibot.input(
u'Pages that transclude which page should be processed?')
- transclusionPage = pywikibot.Page(pywikibot.Link(
- 'Template:%s' % transclusionPageTitle,
- pywikibot.Site()))
+ transclusionPage = pywikibot.Page(
+ pywikibot.Link(transclusionPageTitle,
+ defaultNamespace=10,
+ source=pywikibot.Site()))
gen = ReferringPageGenerator(transclusionPage,
onlyTemplateInclusion=True)
elif arg.startswith('-start'):
@@ -327,10 +385,12 @@
elif arg.startswith('-yahoo'):
gen = YahooSearchPageGenerator(arg[7:])
else:
- return None
- # make sure all yielded pages are unique
- gen = DuplicateFilterPageGenerator(gen)
- return gen
+ pass
+ if gen:
+ self.gens.append(gen)
+ return True
+ else:
+ return False
def AllpagesPageGenerator(start ='!', namespace=None, includeredirects=True,
@@ -490,6 +550,7 @@
def UserContributionsGenerator(username, number=250, namespaces=None,
site=None):
"""Yields number unique pages edited by user:username
+
namespaces : list of namespace numbers to fetch contribs from
"""
Property changes on: branches/rewrite/pywikibot/pagegenerators.py
___________________________________________________________________
Added: svn:mergeinfo
+ /trunk/pywikipedia/pagegenerators.py:6271-6272
Modified: branches/rewrite/pywikibot/scripts/category.py
===================================================================
--- branches/rewrite/pywikibot/scripts/category.py 2009-01-21 20:21:19 UTC (rev 6275)
+++ branches/rewrite/pywikibot/scripts/category.py 2009-01-22 14:53:20 UTC (rev 6276)
@@ -877,6 +877,8 @@
def main(*args):
+ global catDB
+
fromGiven = False
toGiven = False
batchMode = False
@@ -895,106 +897,124 @@
# The generator gives the pages that should be worked upon.
gen = None
- #If this is set to true then the custom edit summary given for removing
- #categories from articles will also be used as the deletion reason.
+ # If this is set to true then the custom edit summary given for removing
+ # categories from articles will also be used as the deletion reason.
useSummaryForDeletion = True
- try:
- catDB = CategoryDatabase()
- action = None
- sort_by_last_name = False
- restore = False
- for arg in pywikibot.handleArgs(*args):
- if arg == 'add':
- action = 'add'
- elif arg == 'remove':
- action = 'remove'
- elif arg == 'move':
- action = 'move'
- elif arg == 'tidy':
- action = 'tidy'
- elif arg == 'tree':
- action = 'tree'
- elif arg == 'listify':
- action = 'listify'
- elif arg == '-person':
- sort_by_last_name = True
- elif arg == '-rebuild':
- catDB.rebuild()
- elif arg.startswith('-from:'):
- oldCatTitle = arg[len('-from:'):].replace('_', ' ')
- fromGiven = True
- elif arg.startswith('-to:'):
- newCatTitle = arg[len('-to:'):].replace('_', ' ')
- toGiven = True
- elif arg == '-batch':
- batchMode = True
- elif arg == '-inplace':
- inPlace = True
- elif arg == '-delsum':
- # This parameter is kept for historical reasons, as it was not previously the default option.
- pass
- elif arg == '-nodelsum':
- useSummaryForDeletion = False
- elif arg == '-overwrite':
- overwrite = True
- elif arg == '-showimages':
- showImages = True
- elif arg.startswith('-summary:'):
- editSummary = arg[len('-summary:'):]
- elif arg.startswith('-match'):
- if len(arg) == len('-match'):
- titleRegex = pywikibot.input(u'Which regular expression should affected objects match?')
- else:
- titleRegex = arg[len('-match:'):]
- elif arg == '-talkpages':
- talkPages = True
- elif arg == '-recurse':
- recurse = True
+ catDB = CategoryDatabase()
+ action = None
+ sort_by_last_name = False
+ restore = False
+ for arg in pywikibot.handleArgs(*args):
+ if genFactory.handleArg(arg):
+ continue
+ if arg == 'add':
+ action = 'add'
+ elif arg == 'remove':
+ action = 'remove'
+ elif arg == 'move':
+ action = 'move'
+ elif arg == 'tidy':
+ action = 'tidy'
+ elif arg == 'tree':
+ action = 'tree'
+ elif arg == 'listify':
+ action = 'listify'
+ elif arg == '-person':
+ sort_by_last_name = True
+ elif arg == '-rebuild':
+ catDB.rebuild()
+ elif arg.startswith('-from:'):
+ oldCatTitle = arg[len('-from:'):].replace('_', ' ')
+ fromGiven = True
+ elif arg.startswith('-to:'):
+ newCatTitle = arg[len('-to:'):].replace('_', ' ')
+ toGiven = True
+ elif arg == '-batch':
+ batchMode = True
+ elif arg == '-inplace':
+ inPlace = True
+ elif arg == '-delsum':
+ # This parameter is kept for historical reasons,
+ # as it was previously not the default option.
+ pass
+ elif arg == '-nodelsum':
+ useSummaryForDeletion = False
+ elif arg == '-overwrite':
+ overwrite = True
+ elif arg == '-showimages':
+ showImages = True
+ elif arg.startswith('-summary:'):
+ editSummary = arg[len('-summary:'):]
+ elif arg.startswith('-match'):
+ if len(arg) == len('-match'):
+ titleRegex = pywikibot.input(
+ u'Which regular expression should affected objects match?')
else:
- gen = genFactory.handleArg(arg)
+ titleRegex = arg[len('-match:'):]
+ elif arg == '-talkpages':
+ talkPages = True
+ elif arg == '-recurse':
+ recurse = True
- if action == 'add':
- if not gen:
- gen = genFactory.handleArg('-links')
- # default for backwards compatibility
- # The preloading generator is responsible for downloading multiple
- # pages from the wiki simultaneously.
- gen = pagegenerators.PreloadingGenerator(gen)
- add_category(sort_by_last_name)
- elif action == 'remove':
- if (fromGiven == False):
- oldCatTitle = pywikibot.input(u'Please enter the name of the category that should be removed:')
- bot = CategoryRemoveRobot(oldCatTitle, batchMode, editSummary, useSummaryForDeletion, inPlace = inPlace)
- bot.run()
- elif action == 'move':
- if (fromGiven == False):
- oldCatTitle = pywikibot.input(u'Please enter the old name of the category:')
- if (toGiven == False):
- newCatTitle = pywikibot.input(u'Please enter the new name of the category:')
- bot = CategoryMoveRobot(oldCatTitle, newCatTitle, batchMode, editSummary, inPlace, titleRegex = titleRegex)
- bot.run()
- elif action == 'tidy':
- catTitle = pywikibot.input(u'Which category do you want to tidy up?')
- bot = CategoryTidyRobot(catTitle, catDB)
- bot.run()
- elif action == 'tree':
- catTitle = pywikibot.input(u'For which category do you want to create a tree view?')
- filename = pywikibot.input(u'Please enter the name of the file where the tree should be saved, or press enter to simply show the tree:')
- bot = CategoryTreeRobot(catTitle, catDB, filename)
- bot.run()
- elif action == 'listify':
- if (fromGiven == False):
- oldCatTitle = pywikibot.input(u'Please enter the name of the category to listify:')
- if (toGiven == False):
- newCatTitle = pywikibot.input(u'Please enter the name of the list to create:')
- bot = CategoryListifyRobot(oldCatTitle, newCatTitle, editSummary, overwrite, showImages, subCats = True, talkPages = talkPages, recurse = recurse)
- bot.run()
- else:
- pywikibot.showHelp('category')
+ gen = genFactory.getCombinedGenerator()
+ if action == 'add':
+ if not gen:
+ genFactory.handleArg('-links')
+ gen = genFactory.getCombinedGenerator()
+ # default for backwards compatibility
+ # The preloading generator is responsible for downloading multiple
+ # pages from the wiki simultaneously.
+ gen = pagegenerators.PreloadingGenerator(gen)
+ add_category(sort_by_last_name)
+ elif action == 'remove':
+ if (fromGiven == False):
+ oldCatTitle = pywikibot.input(
+ u'Please enter the name of the category that should be removed:')
+ bot = CategoryRemoveRobot(oldCatTitle, batchMode, editSummary,
+ useSummaryForDeletion, inPlace=inPlace)
+ bot.run()
+ elif action == 'move':
+ if (fromGiven == False):
+ oldCatTitle = pywikibot.input(
+ u'Please enter the old name of the category:')
+ if (toGiven == False):
+ newCatTitle = pywikibot.input(
+ u'Please enter the new name of the category:')
+ bot = CategoryMoveRobot(oldCatTitle, newCatTitle, batchMode,
+ editSummary, inPlace, titleRegex=titleRegex)
+ bot.run()
+ elif action == 'tidy':
+ catTitle = pywikibot.input(u'Which category do you want to tidy up?')
+ bot = CategoryTidyRobot(catTitle, catDB)
+ bot.run()
+ elif action == 'tree':
+ catTitle = pywikibot.input(
+ u'For which category do you want to create a tree view?')
+ filename = pywikibot.input(
+ u'Please enter the name of the file where the tree should be saved,\n'
+ u'or press enter to simply show the tree:')
+ bot = CategoryTreeRobot(catTitle, catDB, filename)
+ bot.run()
+ elif action == 'listify':
+ if (fromGiven == False):
+ oldCatTitle = pywikibot.input(
+ u'Please enter the name of the category to listify:')
+ if (toGiven == False):
+ newCatTitle = pywikibot.input(
+ u'Please enter the name of the list to create:')
+ bot = CategoryListifyRobot(oldCatTitle, newCatTitle, editSummary,
+ overwrite, showImages, subCats=True,
+ talkPages=talkPages, recurse=recurse)
+ bot.run()
+ else:
+ pywikibot.showHelp('category')
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ except pywikibot.Error:
+ pywikibot.logging.exception("Fatal error:")
finally:
catDB.dump()
pywikibot.stopme()
-
-
-if __name__ == "__main__":
- main()
Modified: branches/rewrite/pywikibot/scripts/replace.py
===================================================================
--- branches/rewrite/pywikibot/scripts/replace.py 2009-01-21 20:21:19 UTC (rev 6275)
+++ branches/rewrite/pywikibot/scripts/replace.py 2009-01-22 14:53:20 UTC (rev 6276)
@@ -71,12 +71,6 @@
Currently available predefined fixes are:
&fixes-help;
--namespace:n Number or name of namespace to process. The parameter can be
- used multiple times. It works in combination with all other
- parameters, except for the -start parameter. If you e.g.
- want to iterate over all categories starting at M, use
- -start:Category:M.
-
-always Don't prompt you for each replacement
-recursive Recurse replacement as long as possible. Be careful, this
@@ -497,9 +491,6 @@
dotall = False
# Will become True if the user inputs the commandline parameter -multiline
multiline = False
- # Which namespaces should be processed?
- # default to [] which means all namespaces will be processed
- namespaces = []
# Do all hits when they overlap
allowoverlap = False
# Do not recurse replacement
@@ -514,6 +505,8 @@
# Read commandline parameters.
for arg in pywikibot.handleArgs(*args):
+ if genFactory.handleArg(arg):
+ continue
if arg == '-regex':
regex = True
elif arg.startswith('-xmlstart'):
@@ -562,23 +555,15 @@
multiline = True
elif arg.startswith('-addcat:'):
add_cat = arg[len('-addcat:'):]
- elif arg.startswith('-namespace:'):
- try:
- namespaces.append(int(arg[11:]))
- except ValueError:
- namespaces.append(arg[11:])
elif arg.startswith('-summary:'):
edit_summary = arg[9:]
summary_commandline = True
elif arg.startswith('-allowoverlap'):
allowoverlap = True
else:
- generator = genFactory.handleArg(arg)
- if generator:
- gen = generator
- else:
- commandline_replacements.append(arg)
+ commandline_replacements.append(arg)
+ gen = genFactory.getCombinedGenerator()
if (len(commandline_replacements) % 2):
raise pywikibot.Error, 'require even number of replacements.'
elif (len(commandline_replacements) == 2 and fix == None):
@@ -706,8 +691,6 @@
# syntax error, show help text from the top of this file
pywikibot.showHelp('replace')
return
- if namespaces != []:
- gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
if xmlFilename:
# XML parsing can be quite slow, so use smaller batches and
# longer lookahead.
Modified: branches/rewrite/pywikibot/scripts/touch.py
===================================================================
--- branches/rewrite/pywikibot/scripts/touch.py 2009-01-21 20:21:19 UTC (rev 6275)
+++ branches/rewrite/pywikibot/scripts/touch.py 2009-01-22 14:53:20 UTC (rev 6276)
@@ -56,38 +56,29 @@
gen = None
genFactory = pagegenerators.GeneratorFactory()
redirs = False
- namespaces = []
# If the user chooses to work on a single page, this temporary array is
# used to read the words from the page title. The words will later be
# joined with spaces to retrieve the full title.
pageTitle = []
for arg in pywikibot.handleArgs(*args):
+ if genFactory.handleArg(arg):
+ continue
if arg == '-redir':
redirs = True
- elif arg.startswith('-namespace:'):
- try:
- namespaces.append(int(arg[11:]))
- except ValueError:
- namespaces.append(arg[11:])
else:
- generator = genFactory.handleArg(arg)
- if generator:
- gen = generator
- else:
- pageTitle.append(arg)
+ pageTitle.append(arg)
- if pageTitle:
- # work on a single page
- page = pywikibot.Page(pywikibot.Link(' '.join(pageTitle)))
- gen = iter([page])
+ gen = genFactory.getCombinedGenerator()
if not gen:
- pywikibot.showHelp()
- else:
- if namespaces:
- gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
- preloadingGen = pagegenerators.PreloadingGenerator(gen)
- bot = TouchBot(preloadingGen, redirs)
- bot.run()
+ if pageTitle:
+ # work on a single page
+ page = pywikibot.Page(pywikibot.Link(' '.join(pageTitle)))
+ gen = iter([page])
+ else:
+ pywikibot.showHelp()
+ preloadingGen = pagegenerators.PreloadingGenerator(gen)
+ bot = TouchBot(preloadingGen, redirs)
+ bot.run()
if __name__ == "__main__":
More information about the Pywikipedia-l
mailing list