Revision: 6276 Author: russblau Date: 2009-01-22 14:53:20 +0000 (Thu, 22 Jan 2009)
Log Message: ----------- Merge recent page generators changes from trunk
Modified Paths: -------------- branches/rewrite/pywikibot/pagegenerators.py branches/rewrite/pywikibot/scripts/category.py branches/rewrite/pywikibot/scripts/replace.py branches/rewrite/pywikibot/scripts/touch.py
Property Changed: ---------------- branches/rewrite/pywikibot/pagegenerators.py
Modified: branches/rewrite/pywikibot/pagegenerators.py =================================================================== --- branches/rewrite/pywikibot/pagegenerators.py 2009-01-21 20:21:19 UTC (rev 6275) +++ branches/rewrite/pywikibot/pagegenerators.py 2009-01-22 14:53:20 UTC (rev 6276) @@ -77,6 +77,10 @@ config.py for instructions. Argument can also be given as "-google:searchstring".
+-namespace Filter the page generator to only yield pages in the + specified namespaces. Separate multiple namespace + numbers with commas. + -interwiki Work on the given page and all equivalent pages in other languages. This can, for example, be used to fight multi-site spamming. @@ -145,21 +149,43 @@
class GeneratorFactory(object): """Process command line arguments and return appropriate page generator.""" + def __init__(self): + self.gens = [] + self.namespaces = []
- def setCategoryGen(self, arg, length, recurse = False): + def getCombinedGenerator(self): + """Return the combination of all accumulated generators. + + Only call this after all arguments have been parsed. + + """ + if len(self.gens) == 0: + return None + elif len(self.gens) == 1: + gensList = self.gens[0] + else: + gensList = CombinedPageGenerator(self.gens) + genToReturn = DuplicateFilterPageGenerator(gensList) + if self.namespaces: + genToReturn = NamespaceFilterPageGenerator(genToReturn, map(int, self.namespaces)) + return genToReturn + + def getCategoryGen(self, arg, length, recurse = False): if len(arg) == length: categoryname = pywikibot.input(u'Please enter the category name:') else: categoryname = arg[length + 1:]
ind = categoryname.find('|') + startfrom = None if ind > 0: startfrom = categoryname[ind + 1:] categoryname = categoryname[:ind] - else: - startfrom = None
- cat = pywikibot.Category(pywikibot.Link('Category:%s' % categoryname)) + cat = pywikibot.Category(pywikibot.Link(categoryname, + defaultNamespace=14)) + # Link constructor automatically prepends localized namespace + # if not included in user's input return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
def setSubCategoriesGen(self, arg, length, recurse=False): @@ -175,10 +201,20 @@ else: startfrom = None
- cat = pywikibot.Category(pywikibot.Link('Category:%s' % categoryname)) + cat = pywikibot.Category(pywikibot.Link(categoryname, + defaultNamespace=14)) return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse)
def handleArg(self, arg): + """Parse one argument at a time. + + If it is recognized as an argument that specifies a generator, a + generator is created and added to the accumulation list, and the + function returns true. Otherwise, it returns false, so that caller + can try parsing the argument. Call getCombinedGenerator() after all + arguments have been parsed to get the final output generator. + + """ gen = None if arg.startswith('-filelinks'): fileLinksPageTitle = arg[11:] @@ -224,14 +260,35 @@ textfilename = pywikibot.input( u'Please enter the local file name:') gen = TextfilePageGenerator(textfilename) + elif arg.startswith('-namespace'): + if len(arg) == len('-namespace'): + self.namespaces.append( + pywikibot.input(u'What namespace are you filtering on?')) + else: + self.namespaces.extend(arg[len('-namespace:'):].split(",")) + return True elif arg.startswith('-catr'): - gen = self.setCategoryGen(arg, 5, recurse = True) + gen = self.getCategoryGen(arg, len("-catr"), recurse = True) + elif arg.startswith('-category'): + gen = self.getCategoryGen(arg, len('-category')) elif arg.startswith('-cat'): - gen = self.setCategoryGen(arg, 4) + gen = self.getCategoryGen(arg, len("-cat")) elif arg.startswith('-subcatsr'): gen = self.setSubCategoriesGen(arg, 9, recurse = True) elif arg.startswith('-subcats'): gen = self.setSubCategoriesGen(arg, 8) + elif arg.startswith('-page'): + if len(arg) == len('-page'): + gen = [pywikibot.Page( + pywikibot.Link( + pywikibot.input( + u'What page do you want to use?'), + pywikibot.getSite()) + )] + else: + gen = [pywikibot.Page(pywikibot.Link(arg[len('-page:'):], + pywikibot.getSite()) + )] elif arg.startswith('-uncatfiles'): gen = UnCategorizedImageGenerator() elif arg.startswith('-uncatcat'): @@ -265,9 +322,10 @@ if not transclusionPageTitle: transclusionPageTitle = pywikibot.input( u'Pages that transclude which page should be processed?') - transclusionPage = pywikibot.Page(pywikibot.Link( - 'Template:%s' % transclusionPageTitle, - pywikibot.Site())) + transclusionPage = pywikibot.Page( + pywikibot.Link(transclusionPageTitle, + defaultNamespace=10, + source=pywikibot.Site())) gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) elif arg.startswith('-start'): @@ -327,10 +385,12 @@ elif arg.startswith('-yahoo'): gen = YahooSearchPageGenerator(arg[7:]) else: - return None - # make sure all yielded pages are unique - gen = DuplicateFilterPageGenerator(gen) - return gen + pass + if gen: + self.gens.append(gen) + return True + else: + return False
def AllpagesPageGenerator(start ='!', namespace=None, includeredirects=True, @@ -490,6 +550,7 @@ def UserContributionsGenerator(username, number=250, namespaces=None, site=None): """Yields number unique pages edited by user:username + namespaces : list of namespace numbers to fetch contribs from
"""
Property changes on: branches/rewrite/pywikibot/pagegenerators.py ___________________________________________________________________ Added: svn:mergeinfo + /trunk/pywikipedia/pagegenerators.py:6271-6272
Modified: branches/rewrite/pywikibot/scripts/category.py =================================================================== --- branches/rewrite/pywikibot/scripts/category.py 2009-01-21 20:21:19 UTC (rev 6275) +++ branches/rewrite/pywikibot/scripts/category.py 2009-01-22 14:53:20 UTC (rev 6276) @@ -877,6 +877,8 @@
def main(*args): + global catDB + fromGiven = False toGiven = False batchMode = False @@ -895,106 +897,124 @@ # The generator gives the pages that should be worked upon. gen = None
- #If this is set to true then the custom edit summary given for removing - #categories from articles will also be used as the deletion reason. + # If this is set to true then the custom edit summary given for removing + # categories from articles will also be used as the deletion reason. useSummaryForDeletion = True - try: - catDB = CategoryDatabase() - action = None - sort_by_last_name = False - restore = False - for arg in pywikibot.handleArgs(*args): - if arg == 'add': - action = 'add' - elif arg == 'remove': - action = 'remove' - elif arg == 'move': - action = 'move' - elif arg == 'tidy': - action = 'tidy' - elif arg == 'tree': - action = 'tree' - elif arg == 'listify': - action = 'listify' - elif arg == '-person': - sort_by_last_name = True - elif arg == '-rebuild': - catDB.rebuild() - elif arg.startswith('-from:'): - oldCatTitle = arg[len('-from:'):].replace('_', ' ') - fromGiven = True - elif arg.startswith('-to:'): - newCatTitle = arg[len('-to:'):].replace('_', ' ') - toGiven = True - elif arg == '-batch': - batchMode = True - elif arg == '-inplace': - inPlace = True - elif arg == '-delsum': - # This parameter is kept for historical reasons, as it was not previously the default option. - pass - elif arg == '-nodelsum': - useSummaryForDeletion = False - elif arg == '-overwrite': - overwrite = True - elif arg == '-showimages': - showImages = True - elif arg.startswith('-summary:'): - editSummary = arg[len('-summary:'):] - elif arg.startswith('-match'): - if len(arg) == len('-match'): - titleRegex = pywikibot.input(u'Which regular expression should affected objects match?') - else: - titleRegex = arg[len('-match:'):] - elif arg == '-talkpages': - talkPages = True - elif arg == '-recurse': - recurse = True + catDB = CategoryDatabase() + action = None + sort_by_last_name = False + restore = False + for arg in pywikibot.handleArgs(*args): + if genFactory.handleArg(arg): + continue + if arg == 'add': + action = 'add' + elif arg == 'remove': + action = 'remove' + elif arg == 'move': + action = 'move' + elif arg == 'tidy': + action = 'tidy' + elif arg == 'tree': + action = 'tree' + elif arg == 'listify': + action = 'listify' + elif arg == '-person': + sort_by_last_name = True + elif arg == '-rebuild': + catDB.rebuild() + elif arg.startswith('-from:'): + oldCatTitle = arg[len('-from:'):].replace('_', ' ') + fromGiven = True + elif arg.startswith('-to:'): + newCatTitle = arg[len('-to:'):].replace('_', ' ') + toGiven = True + elif arg == '-batch': + batchMode = True + elif arg == '-inplace': + inPlace = True + elif arg == '-delsum': + # This parameter is kept for historical reasons, + # as it was previously not the default option. + pass + elif arg == '-nodelsum': + useSummaryForDeletion = False + elif arg == '-overwrite': + overwrite = True + elif arg == '-showimages': + showImages = True + elif arg.startswith('-summary:'): + editSummary = arg[len('-summary:'):] + elif arg.startswith('-match'): + if len(arg) == len('-match'): + titleRegex = pywikibot.input( + u'Which regular expression should affected objects match?') else: - gen = genFactory.handleArg(arg) + titleRegex = arg[len('-match:'):] + elif arg == '-talkpages': + talkPages = True + elif arg == '-recurse': + recurse = True
- if action == 'add': - if not gen: - gen = genFactory.handleArg('-links') - # default for backwards compatibility - # The preloading generator is responsible for downloading multiple - # pages from the wiki simultaneously. - gen = pagegenerators.PreloadingGenerator(gen) - add_category(sort_by_last_name) - elif action == 'remove': - if (fromGiven == False): - oldCatTitle = pywikibot.input(u'Please enter the name of the category that should be removed:') - bot = CategoryRemoveRobot(oldCatTitle, batchMode, editSummary, useSummaryForDeletion, inPlace = inPlace) - bot.run() - elif action == 'move': - if (fromGiven == False): - oldCatTitle = pywikibot.input(u'Please enter the old name of the category:') - if (toGiven == False): - newCatTitle = pywikibot.input(u'Please enter the new name of the category:') - bot = CategoryMoveRobot(oldCatTitle, newCatTitle, batchMode, editSummary, inPlace, titleRegex = titleRegex) - bot.run() - elif action == 'tidy': - catTitle = pywikibot.input(u'Which category do you want to tidy up?') - bot = CategoryTidyRobot(catTitle, catDB) - bot.run() - elif action == 'tree': - catTitle = pywikibot.input(u'For which category do you want to create a tree view?') - filename = pywikibot.input(u'Please enter the name of the file where the tree should be saved, or press enter to simply show the tree:') - bot = CategoryTreeRobot(catTitle, catDB, filename) - bot.run() - elif action == 'listify': - if (fromGiven == False): - oldCatTitle = pywikibot.input(u'Please enter the name of the category to listify:') - if (toGiven == False): - newCatTitle = pywikibot.input(u'Please enter the name of the list to create:') - bot = CategoryListifyRobot(oldCatTitle, newCatTitle, editSummary, overwrite, showImages, subCats = True, talkPages = talkPages, recurse = recurse) - bot.run() - else: - pywikibot.showHelp('category') + gen = genFactory.getCombinedGenerator() + if action == 'add': + if not gen: + genFactory.handleArg('-links') + gen = genFactory.getCombinedGenerator() + # default for backwards compatibility + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + gen = pagegenerators.PreloadingGenerator(gen) + add_category(sort_by_last_name) + elif action == 'remove': + if (fromGiven == False): + oldCatTitle = pywikibot.input( + u'Please enter the name of the category that should be removed:') + bot = CategoryRemoveRobot(oldCatTitle, batchMode, editSummary, + useSummaryForDeletion, inPlace=inPlace) + bot.run() + elif action == 'move': + if (fromGiven == False): + oldCatTitle = pywikibot.input( + u'Please enter the old name of the category:') + if (toGiven == False): + newCatTitle = pywikibot.input( + u'Please enter the new name of the category:') + bot = CategoryMoveRobot(oldCatTitle, newCatTitle, batchMode, + editSummary, inPlace, titleRegex=titleRegex) + bot.run() + elif action == 'tidy': + catTitle = pywikibot.input(u'Which category do you want to tidy up?') + bot = CategoryTidyRobot(catTitle, catDB) + bot.run() + elif action == 'tree': + catTitle = pywikibot.input( + u'For which category do you want to create a tree view?') + filename = pywikibot.input( + u'Please enter the name of the file where the tree should be saved,\n' + u'or press enter to simply show the tree:') + bot = CategoryTreeRobot(catTitle, catDB, filename) + bot.run() + elif action == 'listify': + if (fromGiven == False): + oldCatTitle = pywikibot.input( + u'Please enter the name of the category to listify:') + if (toGiven == False): + newCatTitle = pywikibot.input( + u'Please enter the name of the list to create:') + bot = CategoryListifyRobot(oldCatTitle, newCatTitle, editSummary, + overwrite, showImages, subCats=True, + talkPages=talkPages, recurse=recurse) + bot.run() + else: + pywikibot.showHelp('category') + + +if __name__ == "__main__": + try: + main() + except pywikibot.Error: + pywikibot.logging.exception("Fatal error:") finally: catDB.dump() pywikibot.stopme() - - -if __name__ == "__main__": - main()
Modified: branches/rewrite/pywikibot/scripts/replace.py =================================================================== --- branches/rewrite/pywikibot/scripts/replace.py 2009-01-21 20:21:19 UTC (rev 6275) +++ branches/rewrite/pywikibot/scripts/replace.py 2009-01-22 14:53:20 UTC (rev 6276) @@ -71,12 +71,6 @@ Currently available predefined fixes are: &fixes-help;
--namespace:n Number or name of namespace to process. The parameter can be - used multiple times. It works in combination with all other - parameters, except for the -start parameter. If you e.g. - want to iterate over all categories starting at M, use - -start:Category:M. - -always Don't prompt you for each replacement
-recursive Recurse replacement as long as possible. Be careful, this @@ -497,9 +491,6 @@ dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False - # Which namespaces should be processed? - # default to [] which means all namespaces will be processed - namespaces = [] # Do all hits when they overlap allowoverlap = False # Do not recurse replacement @@ -514,6 +505,8 @@
# Read commandline parameters. for arg in pywikibot.handleArgs(*args): + if genFactory.handleArg(arg): + continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): @@ -562,23 +555,15 @@ multiline = True elif arg.startswith('-addcat:'): add_cat = arg[len('-addcat:'):] - elif arg.startswith('-namespace:'): - try: - namespaces.append(int(arg[11:])) - except ValueError: - namespaces.append(arg[11:]) elif arg.startswith('-summary:'): edit_summary = arg[9:] summary_commandline = True elif arg.startswith('-allowoverlap'): allowoverlap = True else: - generator = genFactory.handleArg(arg) - if generator: - gen = generator - else: - commandline_replacements.append(arg) + commandline_replacements.append(arg)
+ gen = genFactory.getCombinedGenerator() if (len(commandline_replacements) % 2): raise pywikibot.Error, 'require even number of replacements.' elif (len(commandline_replacements) == 2 and fix == None): @@ -706,8 +691,6 @@ # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return - if namespaces != []: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) if xmlFilename: # XML parsing can be quite slow, so use smaller batches and # longer lookahead.
Modified: branches/rewrite/pywikibot/scripts/touch.py =================================================================== --- branches/rewrite/pywikibot/scripts/touch.py 2009-01-21 20:21:19 UTC (rev 6275) +++ branches/rewrite/pywikibot/scripts/touch.py 2009-01-22 14:53:20 UTC (rev 6276) @@ -56,38 +56,29 @@ gen = None genFactory = pagegenerators.GeneratorFactory() redirs = False - namespaces = [] # If the user chooses to work on a single page, this temporary array is # used to read the words from the page title. The words will later be # joined with spaces to retrieve the full title. pageTitle = [] for arg in pywikibot.handleArgs(*args): + if genFactory.handleArg(arg): + continue if arg == '-redir': redirs = True - elif arg.startswith('-namespace:'): - try: - namespaces.append(int(arg[11:])) - except ValueError: - namespaces.append(arg[11:]) else: - generator = genFactory.handleArg(arg) - if generator: - gen = generator - else: - pageTitle.append(arg) + pageTitle.append(arg)
- if pageTitle: - # work on a single page - page = pywikibot.Page(pywikibot.Link(' '.join(pageTitle))) - gen = iter([page]) + gen = genFactory.getCombinedGenerator() if not gen: - pywikibot.showHelp() - else: - if namespaces: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) - preloadingGen = pagegenerators.PreloadingGenerator(gen) - bot = TouchBot(preloadingGen, redirs) - bot.run() + if pageTitle: + # work on a single page + page = pywikibot.Page(pywikibot.Link(' '.join(pageTitle))) + gen = iter([page]) + else: + pywikibot.showHelp() + preloadingGen = pagegenerators.PreloadingGenerator(gen) + bot = TouchBot(preloadingGen, redirs) + bot.run()
if __name__ == "__main__":
pywikipedia-l@lists.wikimedia.org