Revision: 8493 Author: xqt Date: 2010-09-08 10:14:53 +0000 (Wed, 08 Sep 2010)
Log Message: ----------- Use new class for the "add" action. Some formatting due to PEP 8.
Modified Paths: -------------- trunk/pywikipedia/category.py
Modified: trunk/pywikipedia/category.py =================================================================== --- trunk/pywikipedia/category.py 2010-09-07 20:23:34 UTC (rev 8492) +++ trunk/pywikipedia/category.py 2010-09-08 10:14:53 UTC (rev 8493) @@ -14,7 +14,26 @@ * listify - make a list of all of the articles that are in a category
and option can be one of these: - * -person - sort persons by their last name (for action 'add') + +Options for "add" action: + * -person - sort persons by their last name + * -create - If a page doesn't exist, do not skip it, create it instead +If action is "add", the following options are supported: + +¶ms; + +Options for "listify" action: + * -overwrite - This overwrites the current page with the list even if + something is already there. + * -showimages - This displays images rather than linking them in the list. + * -talkpages - This outputs the links to talk pages of the pages to be + listified in addition to the pages themselves. +Options for "remove" action: + * -nodelsum - This specifies not to use the custom edit summary as the + deletion reason. Instead, it uses the default deletion reason + for the language, which is "Category was disbanded" in English. + +Options for several actions: * -rebuild - reset the database * -from: - The category to move from (for the move option) Also, the category to remove from in the remove option @@ -30,27 +49,10 @@ * -summary: - Pick a custom edit summary for the bot. * -inplace - Use this flag to change categories in place rather than rearranging them. - * -nodelsum - An option for remove, this specifies not to use the custom - edit summary as the deletion reason. Instead, it uses the - default deletion reason for the language, which is "Category - was disbanded" in English. - * -overwrite - An option for listify, this overwrites the current page with - the list even if something is already there. - * -showimages - An option for listify, this displays images rather than - linking them in the list. - * -talkpages - An option for listify, this outputs the links to talk pages - of the pages to be listified in addition to the pages - themselves. * -recurse - Recurse through all subcategories of categories. * -match - Only work on pages whose titles match the given regex (for move and remove actions). - * -create - An option for add: if a page doesn't exist, do not skip it, - create it instead
-If action is "add", the following options are supported: - -¶ms; - For the actions tidy and tree, the bot will store the category structure locally in category.dump. This saves time and server load, but if it uses these data later, they may be outdated; use the -rebuild parameter in this @@ -73,6 +75,9 @@ # # (C) Rob W.W. Hooft, 2004 # (C) Daniel Herding, 2004 +# (C) Wikipedian, 2004-2008 +# (C) leogregianin, 2004-2008 +# (C) Cyde, 2006-2010 # (C) Anreas J Schwab, 2007 # (C) Pywikipedia team, 2008-2009 # @@ -122,6 +127,7 @@ 'no':u'Robot: Legger til [[Kategori:%s]]', 'nn':u'robot: la til [[Kategori:%s]]', 'pl':u'Robot dodaje [[Kategoria:%s]]', + 'pdc':u'Waddefresser: [[Kategorie:%s]] dezu geduh', 'pt':u'Bot: Adicionando [[Categoria:%s]]', 'ru':u'Робот: добавление [[Категория:%s]]', 'sk':u'Robot pridal [[Kategória:%s]]', @@ -160,7 +166,7 @@ 'nl':u'Bot: wijziging %(oldcat)s', 'no':u'Robot: Endrer %(oldcat)s', 'nn':u'robot: endra %(oldcat)s', - 'pdc':u'Waddefresser: Abdeeling von %(oldcat)s nooch %(newcat)s geennert', + 'pdc':u'Waddefresser: Abdeeling vun %(oldcat)s nooch %(newcat)s geennert', 'pt':u'Bot: Modificando [[%(oldcat)s]]', 'pl':u'Robot przenosi %(oldcat)s', 'ru':u'Робот: изменение %(oldcat)s', @@ -235,12 +241,13 @@ } }
+ class CategoryDatabase: + '''This is a temporary knowledge base saving for each category the contained + subcategories and articles, so that category pages do not need to be loaded + over and over again + ''' - This is a temporary knowledge base saving for each category the contained - subcategories and articles, so that category pages do not need to - be loaded over and over again - ''' def __init__(self, rebuild = False, filename = 'category.dump.bz2'): if rebuild: self.rebuild() @@ -267,12 +274,11 @@ self.superclassDB={}
def getSubcats(self, supercat): + '''For a given supercategory, return a list of Categorys for all its + subcategories. Saves this list in a temporary database so that it won't + be loaded from the server next time it's required. + ''' - For a given supercategory, return a list of Categorys for all its - subcategories. - Saves this list in a temporary database so that it won't be loaded from the - server next time it's required. - ''' # if we already know which subcategories exist here if supercat in self.catContentDB: return self.catContentDB[supercat][0] @@ -284,10 +290,10 @@ return subcatlist
def getArticles(self, cat): - ''' - For a given category, return a list of Pages for all its articles. + '''For a given category, return a list of Pages for all its articles. Saves this list in a temporary database so that it won't be loaded from the server next time it's required. + ''' # if we already know which articles exist here if cat in self.catContentDB: @@ -310,133 +316,190 @@ return supercatlist
def dump(self, filename = 'category.dump.bz2'): + '''Saves the contents of the dictionaries superclassDB and catContentDB + to disk. + ''' - Saves the contents of the dictionaries superclassDB and catContentDB to disk. - ''' if not os.path.isabs(filename): filename = pywikibot.config.datafilepath(filename) - pywikibot.output(u'Dumping to %s, please wait...' - % pywikibot.config.shortpath(filename)) - f = bz2.BZ2File(filename, 'w') - databases = { - 'catContentDB': self.catContentDB, - 'superclassDB': self.superclassDB - } - # store dump to disk in binary format - try: - pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL) - except pickle.PicklingError: - pass - f.close() + if self.catContentDB or self.superclassDB: + pywikibot.output(u'Dumping to %s, please wait...' + % pywikibot.config.shortpath(filename)) + f = bz2.BZ2File(filename, 'w') + databases = { + 'catContentDB': self.catContentDB, + 'superclassDB': self.superclassDB + } + # store dump to disk in binary format + try: + pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL) + except pickle.PicklingError: + pass + f.close() + else: + os.remove(filename) + pywikibot.output(u'Database is empty. %s removed' + % pywikibot.config.shortpath(filename))
-def sorted_by_last_name(catlink, pagelink): - '''Return a Category with key that sorts persons by their last names.
- Parameters: catlink - The Category to be linked - pagelink - the Page to be placed in the category +class AddCategory: + '''A robot to mass-add a category to a list of pages.'''
- Trailing words in brackets will be removed. Example: If - category_name is 'Author' and pl is a Page to [[Alexandre Dumas - (senior)]], this function will return this Category: - [[Category:Author|Dumas, Alexandre]] + def __init__(self, generator, sort=False, create=False, editSummary='', + dry=False): + self.generator = generator + self.sort = sort # sort by last name + self.create = create + self.site = pywikibot.getSite() + self.always = False + self.dry = dry + self.newcatTitle = None + self.editSummary = editSummary
- ''' - page_name = pagelink.title() - site = pagelink.site() - # regular expression that matches a name followed by a space and - # disambiguation brackets. Group 1 is the name without the rest. - bracketsR = re.compile('(.*) (.+?)') - match_object = bracketsR.match(page_name) - if match_object: - page_name = match_object.group(1) - split_string = page_name.split(' ') - if len(split_string) > 1: - # pull last part of the name to the beginning, and append the - # rest after a comma; e.g., "John von Neumann" becomes - # "Neumann, John von" - sorted_key = split_string[-1] + ', ' + ' '.join(split_string[:-1]) - # give explicit sort key - return pywikibot.Page(site, catlink.title() + '|' + sorted_key) - else: - return pywikibot.Page(site, catlink.title()) + def sorted_by_last_name(catlink, pagelink): + '''Return a Category with key that sorts persons by their last name.
-def add_category(sort_by_last_name = False, create_pages = False): - '''A robot to mass-add a category to a list of pages.''' - site = pywikibot.getSite() - if gen: - newcatTitle = pywikibot.input(u'Category to add (do not give namespace):') - if not site.nocapitalize: - newcatTitle = newcatTitle[:1].capitalize() + newcatTitle[1:] + Parameters: catlink - The Category to be linked + pagelink - the Page to be placed in the category
- # set edit summary message - editSummary = pywikibot.translate(site, msg_add) % newcatTitle + Trailing words in brackets will be removed. Example: If + category_name is 'Author' and pl is a Page to [[Alexandre Dumas + (senior)]], this function will return this Category: + [[Category:Author|Dumas, Alexandre]]
- cat_namespace = site.category_namespaces()[0] + ''' + page_name = pagelink.title() + site = pagelink.site() + # regular expression that matches a name followed by a space and + # disambiguation brackets. Group 1 is the name without the rest. + bracketsR = re.compile('(.*) (.+?)') + match_object = bracketsR.match(page_name) + if match_object: + page_name = match_object.group(1) + split_string = page_name.split(' ') + if len(split_string) > 1: + # pull last part of the name to the beginning, and append the + # rest after a comma; e.g., "John von Neumann" becomes + # "Neumann, John von" + sorted_key = split_string[-1] + ', ' + \ + ' '.join(split_string[:-1]) + # give explicit sort key + return pywikibot.Page(site, catlink.title() + '|' + sorted_key) + else: + return pywikibot.Page(site, catlink.title())
- answer = '' - for page in gen: - if answer != 'a': - answer = '' + def run(self): + self.newcatTitle = pywikibot.input( + u'Category to add (do not give namespace):') + if not self.site.nocapitalize: + self.newcatTitle = self.newcatTitle[:1].upper() + \ + self.newcatTitle[1:] + if not self.editSummary: + self.editSummary = pywikibot.translate(self.site, msg_add) \ + % self.newcatTitle + counter = 0 + for page in self.generator: + self.treat(page) + counter += 1 + pywikibot.output(u"%d page(s) processed." % counter)
- while answer not in ('y','n','a'): - answer = pywikibot.inputChoice(u'%s'% (page.aslink()), ['Yes', 'No', 'All'],['y', 'n', 'a'], 'n') - if answer == 'a': - confirm = pywikibot.inputChoice(u"""\ + def load(self, page): + """ + Loads the given page, does some changes, and saves it. + """ + try: + # Load the page + text = page.get() + except pywikibot.NoPage: + if self.create: + pywikibot.output(u"Page %s doesn't exist yet; creating." + % (page.title())) + text = '' + else: + pywikibot.output(u"Page %s does not exist; skipping." + % page.aslink()) + except pywikibot.IsRedirectPage: + redirTarget = pywikibot.Page(site, arg.args[0]) + pywikibot.output(u"WARNING: Page %s is a redirect to %s; skipping." + % (page.aslink(), redirTarget.aslink())) + else: + return text + return None + + def save(self, text, page, comment, minorEdit=True, botflag=True): + # only save if something was changed + if text != page.get(): + # show what was changed + pywikibot.showDiff(page.get(), text) + pywikibot.output(u'Comment: %s' %comment) + if not self.dry: + if not self.always: + confirm = 'y' + while True: + choice = pywikibot.inputChoice( + u'Do you want to accept these changes?', + ['Yes', 'No', 'Always'], ['y', 'N', 'a'], 'N') + if choice == 'a': + confirm = pywikibot.inputChoice(u"""\ This should be used if and only if you are sure that your links are correct! Are you sure?""", ['Yes', 'No'], ['y', 'n'], 'n') - if confirm == 'n': - answer = '' - - if answer == 'y' or answer == 'a': - try: - text = page.get() - except pywikibot.NoPage: - if create_pages: - pywikibot.output(u"%s doesn't exist yet. Creating." - % (page.title())) - text = '' - else: - pywikibot.output(u"%s doesn't exist yet. Ignoring." - % (page.title())) - continue - except pywikibot.IsRedirectPage, arg: - redirTarget = pywikibot.Page(site, arg.args[0]) - pywikibot.output( - u"WARNING: %s is redirect to %s. Ignoring." - % (page.title(), redirTarget.title())) - continue - cats = page.categories() - # Show the title of the page we're working on. - # Highlight the title in purple. - pywikibot.output( - u"\n\n>>> \03{lightpurple}%s\03{default} <<<" - % page.title()) - pywikibot.output(u"Current categories:") - for cat in cats: - pywikibot.output(u"* %s" % cat.title()) - catpl = pywikibot.Page(site, - cat_namespace + ':' + newcatTitle) - if sort_by_last_name: - catpl = sorted_by_last_name(catpl, page) - if catpl in cats: - pywikibot.output(u"%s is already in %s." - % (page.title(), catpl.title())) - else: - pywikibot.output(u'Adding %s' % catpl.aslink()) - cats.append(catpl) - text = pywikibot.replaceCategoryLinks(text, cats) + if confirm == 'y': + self.always = True + break + else: break + if self.always or choice == 'y': try: - page.put(text, comment = editSummary) + # Save the page + page.put(text, comment=comment, + minorEdit=minorEdit, botflag=botflag) + except pywikibot.LockedPage: + pywikibot.output(u"Page %s is locked; skipping." + % page.aslink()) except pywikibot.EditConflict: pywikibot.output( u'Skipping %s because of edit conflict' % (page.title())) + except pywikibot.SpamfilterError, error: + pywikibot.output( +u'Cannot change %s because of spam blacklist entry %s' + % (page.title(), error.url)) + else: + return True + return False
+ def treat(self, page): + text = self.load(page) + if text is None: + return + cats = page.categories() + # Show the title of the page we're working on. + # Highlight the title in purple. + pywikibot.output( + u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % page.title()) + pywikibot.output(u"Current categories:") + for cat in cats: + pywikibot.output(u"* %s" % cat.title()) + catpl = pywikibot.Page(self.site, self.newcatTitle, defaultNamespace=14) + if self.sort: + catpl = self.sorted_by_last_name(catpl, page) + if catpl in cats: + pywikibot.output(u"%s is already in %s." + % (page.title(), catpl.title())) + else: + pywikibot.output(u'Adding %s' % catpl.aslink()) + cats.append(catpl) + text = pywikibot.replaceCategoryLinks(text, cats) + if not self.save(text, page, self.editSummary): + pywikibot.output(u'Page %s not saved.' % page.aslink()) + + class CategoryMoveRobot: """Robot to move pages from one category to another.""" def __init__(self, oldCatTitle, newCatTitle, batchMode=False, editSummary='', inPlace=False, moveCatPage=True, - deleteEmptySourceCat=True, titleRegex=None, useSummaryForDeletion=True): + deleteEmptySourceCat=True, titleRegex=None, + useSummaryForDeletion=True): site = pywikibot.getSite() self.editSummary = editSummary self.oldCat = catlib.Category(site, oldCatTitle) @@ -522,9 +585,7 @@
class CategoryListifyRobot: - ''' - Creates a list containing all of the members in a category. - ''' + '''Creates a list containing all of the members in a category.''' listify_msg={ 'ar':u'روبوت: عرض من %s (%d مدخلة)', 'ca':u'Robot: Llistant de %s (%d entrades)', @@ -577,11 +638,12 @@ else: self.list.put(listString, comment=self.editSummary)
+ class CategoryRemoveRobot: - ''' - Removes the category tag from all pages in a given category and from the - category pages of all subcategories, without prompting. + '''Removes the category tag from all pages in a given category + and from the category pages of all subcategories, without prompting. Does not remove category tags pointing at subcategories. + ''' deletion_reason_remove = { 'ar':u'روبوت: التصنيف تم الاستغناء عنه', @@ -688,9 +750,9 @@ if (talkPage.exists()): talkPage.delete(reason=reason, prompt=not self.batchMode)
+ class CategoryTidyRobot: - """ - Script to help a human to tidy up a category by moving its articles into + """Script to help a human to tidy up a category by moving its articles into subcategories
Specify the category name on the command line. The program will pick up the @@ -711,6 +773,7 @@ Important: * this bot is written to work with the MonoBook skin, so make sure your bot account uses this skin + """ def __init__(self, catTitle, catDB): self.catTitle = catTitle @@ -842,9 +905,11 @@ else: preloadingGen = pagegenerators.PreloadingGenerator(iter(articles)) for article in preloadingGen: - pywikibot.output(u'\n===================================================================') + pywikibot.output('') + pywikibot.output(u'=' * 67) self.move_to_category(article, cat, cat)
+ class CategoryTreeRobot: ''' Robot to create tree overviews of the category structure. @@ -939,12 +1004,13 @@ return result
def run(self): - """ - Prints the multi-line string generated by treeview or saves it to a file. + """Prints the multi-line string generated by treeview or saves it to a + file.
Parameters: * catTitle - the title of the category which will be the tree's root * maxDepth - the limit beyond which no subcategories will be listed + """ cat = catlib.Category(pywikibot.getSite(), 'Category:' + self.catTitle) tree = self.treeview(cat) @@ -957,6 +1023,7 @@ else: pywikibot.output(tree, toStdout = True)
+ if __name__ == "__main__": fromGiven = False toGiven = False @@ -1013,7 +1080,8 @@ elif arg == '-inplace': inPlace = True elif arg == '-delsum': - # This parameter is kept for historical reasons, as it was not previously the default option. + # This parameter is kept for historical reasons, as it was not + # previously the default option. pass elif arg == '-nodelsum': useSummaryForDeletion = False @@ -1025,7 +1093,8 @@ editSummary = arg[len('-summary:'):] elif arg.startswith('-match'): if len(arg) == len('-match'): - titleRegex = pywikibot.input(u'Which regular expression should affected objects match?') + titleRegex = pywikibot.input( +u'Which regular expression should affected objects match?') else: titleRegex = arg[len('-match:'):] elif arg == '-talkpages': @@ -1038,44 +1107,60 @@ genFactory.handleArg(arg)
if action == 'add': - # Note that the add functionality is the only bot that actually uses the - # the generator factory. Every other bot creates its own generator exclusively - # from the command-line arguments that category.py understands. + # Note that the add functionality is the only bot that actually + # uses the the generator factory. Every other bot creates its own + # generator exclusively from the command-line arguments that + # category.py understands. if not gen: gen = genFactory.getCombinedGenerator() if not gen: - genFactory.handleArg('-links') #default for backwords compatibility - # The preloading generator is responsible for downloading multiple - # pages from the wiki simultaneously. - gen = pagegenerators.PreloadingGenerator(genFactory.getCombinedGenerator()) - add_category(sort_by_last_name, create_pages) + #default for backwords compatibility + genFactory.handleArg('-links') + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + gen = pagegenerators.PreloadingGenerator( + genFactory.getCombinedGenerator()) + bot = AddCategory(gen, sort_by_last_name, create_pages, editSummary) + bot.run() elif action == 'remove': if (fromGiven == False): - oldCatTitle = pywikibot.input(u'Please enter the name of the category that should be removed:') - bot = CategoryRemoveRobot(oldCatTitle, batchMode, editSummary, useSummaryForDeletion, inPlace = inPlace) + oldCatTitle = pywikibot.input( +u'Please enter the name of the category that should be removed:') + bot = CategoryRemoveRobot(oldCatTitle, batchMode, editSummary, + useSummaryForDeletion, inPlace=inPlace) bot.run() elif action == 'move': if (fromGiven == False): - oldCatTitle = pywikibot.input(u'Please enter the old name of the category:') + oldCatTitle = pywikibot.input( + u'Please enter the old name of the category:') if (toGiven == False): - newCatTitle = pywikibot.input(u'Please enter the new name of the category:') - bot = CategoryMoveRobot(oldCatTitle, newCatTitle, batchMode, editSummary, inPlace, titleRegex = titleRegex) + newCatTitle = pywikibot.input( + u'Please enter the new name of the category:') + bot = CategoryMoveRobot(oldCatTitle, newCatTitle, batchMode, + editSummary, inPlace, titleRegex=titleRegex) bot.run() elif action == 'tidy': - catTitle = pywikibot.input(u'Which category do you want to tidy up?') + catTitle = pywikibot.input( + u'Which category do you want to tidy up?') bot = CategoryTidyRobot(catTitle, catDB) bot.run() elif action == 'tree': - catTitle = pywikibot.input(u'For which category do you want to create a tree view?') - filename = pywikibot.input(u'Please enter the name of the file where the tree should be saved, or press enter to simply show the tree:') + catTitle = pywikibot.input( + u'For which category do you want to create a tree view?') + filename = pywikibot.input( +u'Please enter the name of the file where the tree should be saved, or press enter to simply show the tree:') bot = CategoryTreeRobot(catTitle, catDB, filename) bot.run() elif action == 'listify': if (fromGiven == False): - oldCatTitle = pywikibot.input(u'Please enter the name of the category to listify:') + oldCatTitle = pywikibot.input( + u'Please enter the name of the category to listify:') if (toGiven == False): - newCatTitle = pywikibot.input(u'Please enter the name of the list to create:') - bot = CategoryListifyRobot(oldCatTitle, newCatTitle, editSummary, overwrite, showImages, subCats = True, talkPages = talkPages, recurse = recurse) + newCatTitle = pywikibot.input( + u'Please enter the name of the list to create:') + bot = CategoryListifyRobot(oldCatTitle, newCatTitle, editSummary, + overwrite, showImages, subCats=True, + talkPages=talkPages, recurse=recurse) bot.run() else: pywikibot.showHelp('category')
pywikipedia-svn@lists.wikimedia.org