Revision: 8497 Author: xqt Date: 2010-09-08 13:34:55 +0000 (Wed, 08 Sep 2010)
Log Message: ----------- AddCategory class from trunk
Modified Paths: -------------- branches/rewrite/scripts/category.py
Modified: branches/rewrite/scripts/category.py =================================================================== --- branches/rewrite/scripts/category.py 2010-09-08 13:15:43 UTC (rev 8496) +++ branches/rewrite/scripts/category.py 2010-09-08 13:34:55 UTC (rev 8497) @@ -14,7 +14,28 @@ * listify - make a list of all of the articles that are in a category
and option can be one of these: - * -person - sort persons by their last name (for action 'add') + +Options for "add" action: + * -person - sort persons by their last name + * -create - If a page doesn't exist, do not skip it, create it instead + +If action is "add", the following options are supported: + +¶ms; + +Options for "listify" action: + * -overwrite - This overwrites the current page with the list even if + something is already there. + * -showimages - This displays images rather than linking them in the list. + * -talkpages - This outputs the links to talk pages of the pages to be + listified in addition to the pages themselves. + +Options for "remove" action: + * -nodelsum - This specifies not to use the custom edit summary as the + deletion reason. Instead, it uses the default deletion reason + for the language, which is "Category was disbanded" in English. + +Options for several actions: * -rebuild - reset the database * -from: - The category to move from (for the move option) Also, the category to remove from in the remove option @@ -30,25 +51,10 @@ * -summary: - Pick a custom edit summary for the bot. * -inplace - Use this flag to change categories in place rather than rearranging them. - * -nodelsum - An option for remove, this specifies not to use the custom - edit summary as the deletion reason. Instead, it uses the - default deletion reason for the language, which is "Category - was disbanded" in English. - * -overwrite - An option for listify, this overwrites the current page with - the list even if something is already there. - * -showimages - An option for listify, this displays images rather than - linking them in the list. - * -talkpages - An option for listify, this outputs the links to talk pages - of the pages to be listified in addition to the pages - themselves. * -recurse - Recurse through all subcategories of categories. * -match - Only work on pages whose titles match the given regex (for move and remove actions).
-If action is "add", the following options are supported: - -¶ms; - For the actions tidy and tree, the bot will store the category structure locally in category.dump. This saves time and server load, but if it uses these data later, they may be outdated; use the -rebuild parameter in this @@ -71,6 +77,9 @@ # # (C) Rob W.W. Hooft, 2004 # (C) Daniel Herding, 2004 +# (C) Wikipedian, 2004-2008 +# (C) leogregianin, 2004-2008 +# (C) Cyde, 2006-2010 # (C) Anreas J Schwab, 2007 # (C) Pywikipedia team, 2008-2009 # @@ -121,6 +130,7 @@ 'no':u'Robot: Legger til [[Kategori:%s]]', 'nn':u'robot: la til [[Kategori:%s]]', 'pl':u'Robot dodaje [[Kategoria:%s]]', + 'pdc':u'Waddefresser: [[Kategorie:%s]] dezu geduh', 'pt':u'Bot: Adicionando [[Categoria:%s]]', 'ru':u'Робот: добавление [[Категория:%s]]', 'sk':u'Robot pridal [[Kategória:%s]]', @@ -159,7 +169,7 @@ 'nl':u'Bot: wijziging %(oldcat)s', 'no':u'Robot: Endrer %(oldcat)s', 'nn':u'robot: endra %(oldcat)s', - 'pdc':u'Waddefresser: Abdeeling von %(oldcat)s nooch %(newcat)s geennert', + 'pdc':u'Waddefresser: Abdeeling vun %(oldcat)s nooch %(newcat)s geennert', 'pt':u'Bot: Modificando [[%(oldcat)s]]', 'pl':u'Robot przenosi %(oldcat)s', 'ru':u'Робот: изменение %(oldcat)s', @@ -241,12 +251,13 @@ } }
+ class CategoryDatabase: + '''This is a temporary knowledge base saving for each category the contained + subcategories and articles, so that category pages do not need to be loaded + over and over again + ''' - This is a temporary knowledge base saving for each category the contained - subcategories and articles, so that category pages do not need to - be loaded over and over again - ''' def __init__(self, rebuild = False, filename = 'category.dump.bz2'): if rebuild: self.rebuild() @@ -273,12 +284,11 @@ self.superclassDB={}
def getSubcats(self, supercat): + '''For a given supercategory, return a list of Categorys for all its + subcategories. Saves this list in a temporary database so that it won't + be loaded from the server next time it's required. + ''' - For a given supercategory, return a list of Categorys for all its - subcategories. - Saves this list in a temporary database so that it won't be loaded from the - server next time it's required. - ''' # if we already know which subcategories exist here if supercat in self.catContentDB: return self.catContentDB[supercat][0] @@ -290,10 +300,10 @@ return subcatset
def getArticles(self, cat): - ''' - For a given category, return a list of Pages for all its articles. + '''For a given category, return a list of Pages for all its articles. Saves this list in a temporary database so that it won't be loaded from the server next time it's required. + ''' # if we already know which articles exist here if cat in self.catContentDB: @@ -316,28 +326,53 @@ return supercatset
def dump(self, filename = 'category.dump.bz2'): + '''Saves the contents of the dictionaries superclassDB and catContentDB + to disk. + ''' - Saves the contents of the dictionaries superclassDB and catContentDB to disk. - ''' if not os.path.isabs(filename): filename = config.datafilepath(filename) - pywikibot.output(u'Dumping to %s, please wait...' - % config.shortpath(filename)) - f = bz2.BZ2File(filename, 'w') - databases = { - 'catContentDB': self.catContentDB, - 'superclassDB': self.superclassDB - } - # store dump to disk in binary format - try: - pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL) - except pickle.PicklingError: - pass - f.close() + if self.catContentDB or self.superclassDB: + pywikibot.output(u'Dumping to %s, please wait...' + % config.shortpath(filename)) + f = bz2.BZ2File(filename, 'w') + databases = { + 'catContentDB': self.catContentDB, + 'superclassDB': self.superclassDB + } + # store dump to disk in binary format + try: + pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL) + except pickle.PicklingError: + pass + f.close() + else: + try: + os.remove(filename) + except EnvironmentError: + pass + else: + pywikibot.output(u'Database is empty. %s removed' + % config.shortpath(filename))
-def sorted_by_last_name(catlink, pagelink): - '''Return a Category with key that sorts persons by their last names.
+class AddCategory: + '''A robot to mass-add a category to a list of pages.''' + + def __init__(self, generator, sort_by_last_name=False, create=False, + editSummary='', dry=False): + self.generator = generator + self.sort = sort_by_last_name + self.create = create + self.site = pywikibot.getSite() + self.always = False + self.dry = dry + self.newcatTitle = None + self.editSummary = editSummary + + def sorted_by_last_name(self, catlink, pagelink): + '''Return a Category with key that sorts persons by their last name. + Parameters: catlink - The Category to be linked pagelink - the Page to be placed in the category
@@ -360,86 +395,126 @@ # pull last part of the name to the beginning, and append the # rest after a comma; e.g., "John von Neumann" becomes # "Neumann, John von" - sorted_key = split_string[-1] + ', ' + ' '.join(split_string[:-1]) + sorted_key = split_string[-1] + ', ' + \ + ' '.join(split_string[:-1]) # give explicit sort key return pywikibot.Page(site, catlink.title() + '|' + sorted_key) else: return pywikibot.Page(site, catlink.title())
-def add_category(sort_by_last_name = False): - '''A robot to mass-add a category to a list of pages.''' - site = pywikibot.getSite() - if gen: - newcatTitle = pywikibot.input( + def run(self): + self.newcatTitle = pywikibot.input( u'Category to add (do not give namespace):') - if not site.nocapitalize: - newcatTitle = newcatTitle[:1].capitalize() + newcatTitle[1:] + if not self.site.nocapitalize: + self.newcatTitle = self.newcatTitle[:1].upper() + \ + self.newcatTitle[1:] + if not self.editSummary: + self.editSummary = pywikibot.translate(self.site, msg_add) \ + % self.newcatTitle + counter = 0 + for page in self.generator: + self.treat(page) + counter += 1 + pywikibot.output(u"%d page(s) processed." % counter)
- # set edit summary message - editSummary = pywikibot.translate(site, msg_add) % newcatTitle + def load(self, page): + """ + Loads the given page, does some changes, and saves it. + """ + try: + # Load the page + text = page.get() + except pywikibot.NoPage: + if self.create: + pywikibot.output(u"Page %s doesn't exist yet; creating." + % (page.title(asLink=True))) + text = '' + else: + pywikibot.output(u"Page %s does not exist; skipping." + % page.title(asLink=True)) + except pywikibot.IsRedirectPage: + redirTarget = pywikibot.Page(site, arg.args[0]) + pywikibot.output(u"WARNING: Page %s is a redirect to %s; skipping." + % (page.title(asLink=True), + redirTarget.title(asLink=True))) + else: + return text + return None
- cat_namespace = site.category_namespaces()[0] + def save(self, text, page, comment, minorEdit=True, botflag=True): + # only save if something was changed + if text != page.get(): + # show what was changed + pywikibot.showDiff(page.get(), text) + pywikibot.output(u'Comment: %s' %comment) + if not self.dry: + if not self.always: + confirm = 'y' + while True: + choice = pywikibot.inputChoice( + u'Do you want to accept these changes?', + ['Yes', 'No', 'Always'], ['y', 'N', 'a'], 'N') + if choice == 'a': + confirm = pywikibot.inputChoice(u"""\ +This should be used if and only if you are sure that your links are correct! +Are you sure?""", ['Yes', 'No'], ['y', 'n'], 'n') + if confirm == 'y': + self.always = True + break + else: break + if self.always or choice == 'y': + try: + # Save the page + page.put(text, comment=comment, + minorEdit=minorEdit, botflag=botflag) + except pywikibot.LockedPage: + pywikibot.output(u"Page %s is locked; skipping." + % page.aslink()) + except pywikibot.EditConflict: + pywikibot.output( + u'Skipping %s because of edit conflict' + % (page.title())) + except pywikibot.SpamfilterError, error: + pywikibot.output( +u'Cannot change %s because of spam blacklist entry %s' + % (page.title(), error.url)) + else: + return True + return False
- answer = '' - for page in gen: - if answer != 'a': - answer = '' + def treat(self, page): + text = self.load(page) + if text is None: + return + cats = page.categories() + # Show the title of the page we're working on. + # Highlight the title in purple. + pywikibot.output( + u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % page.title()) + pywikibot.output(u"Current categories:") + for cat in cats: + pywikibot.output(u"* %s" % cat.title()) + catpl = pywikibot.Page(self.site, self.newcatTitle, defaultNamespace=14) + if catpl in cats: + pywikibot.output(u"%s is already in %s." + % (page.title(), catpl.title())) + else: + if self.sort: + catpl = self.sorted_by_last_name(catpl, page) + pywikibot.output(u'Adding %s' % catpl.aslink()) + cats.append(catpl) + text = pywikibot.replaceCategoryLinks(text, cats) + if not self.save(text, page, self.editSummary): + pywikibot.output(u'Page %s not saved.' % page.aslink())
- while answer not in ('y','n','a'): - answer = pywikibot.input(u'%s [y/n/a(ll)]:' % (page.title(asLink=True))) - if answer == 'a': - confirm = '' - while confirm not in ('y','n'): - confirm = pywikibot.input(u"""\ -This should be used if and only if you are sure that your links are correct! -Are you sure? [y/n]:""") - if confirm == 'n': - answer = ''
- if answer == 'y' or answer == 'a': - try: - text = page.get() - except pywikibot.NoPage: - pywikibot.output(u"%s doesn't exist yet. Ignoring." - % (page.title())) - pass - except pywikibot.IsRedirectPage: - pywikibot.output( - u"WARNING: %s is redirect to. Ignoring." % page) - else: - cats = page.categories() - # Show the title of the page we're working on. - # Highlight the title in purple. - pywikibot.output( - u"\n\n>>> \03{lightpurple}%s\03{default} <<<" - % page.title()) - pywikibot.output(u"Current categories:") - for cat in cats: - pywikibot.output(u"* %s" % cat.title()) - catpl = pywikibot.Page(site, - cat_namespace + ':' + newcatTitle) - if sort_by_last_name: - catpl = sorted_by_last_name(catpl, page) - if catpl in cats: - pywikibot.output(u"%s is already in %s." - % (page.title(), catpl.title())) - else: - pywikibot.output(u'Adding %s' % catpl.title(asLink=True)) - cats.append(catpl) - text = page.get() - text = pywikibot.replaceCategoryLinks(text, cats) - try: - page.put(text, comment = editSummary) - except pywikibot.EditConflict: - pywikibot.output( - u'Skipping %s because of edit conflict' - % (page.title())) - class CategoryMoveRobot: """Robot to move pages from one category to another.""" def __init__(self, oldCatTitle, newCatTitle, batchMode=False, editSummary='', inPlace=False, moveCatPage=True, - deleteEmptySourceCat=True, titleRegex=None): + deleteEmptySourceCat=True, titleRegex=None, + useSummaryForDeletion=True): site = pywikibot.getSite() self.editSummary = editSummary self.oldCat = catlib.Category(pywikibot.Link('Category:' + oldCatTitle)) @@ -449,16 +524,17 @@ self.batchMode = batchMode self.deleteEmptySourceCat = deleteEmptySourceCat self.titleRegex = titleRegex + self.useSummaryForDeletion = useSummaryForDeletion + + def run(self): + site = pywikibot.getSite() + newCat = catlib.Category(pywikibot.Link('Category:' + self.newCatTitle)) # set edit summary message if not self.editSummary: self.editSummary = pywikibot.translate(site, msg_change) \ % {'oldcat':self.oldCat.title(), 'newcat':newCat.title()}
- def run(self): - site = pywikibot.getSite() - newCat = catlib.Category(pywikibot.Link('Category:' + self.newCatTitle)) - # Copy the category contents to the new category page copied = False oldMovedTalk = None @@ -523,9 +599,7 @@
class CategoryListifyRobot: - ''' - Creates a list containing all of the members in a category. - ''' + '''Creates a list containing all of the members in a category.''' listify_msg={ 'ar':u'روبوت: عرض من %s (%d مدخلة)', 'ca':u'Robot: Llistant de %s (%d entrades)', @@ -560,7 +634,8 @@ if self.subCats: setOfArticles += set(self.cat.subcategories()) if not self.editSummary: - self.editSummary = pywikibot.translate(self.site, self.listify_msg) % (self.cat.title(), len(setOfArticles)) + self.editSummary = pywikibot.translate(self.site, self.listify_msg) \ + % (self.cat.title(), len(setOfArticles))
listString = "" for article in setOfArticles: @@ -579,11 +654,12 @@ else: self.list.put(listString, comment=self.editSummary)
+ class CategoryRemoveRobot: - ''' - Removes the category tag from all pages in a given category and from the - category pages of all subcategories, without prompting. + '''Removes the category tag from all pages in a given category + and from the category pages of all subcategories, without prompting. Does not remove category tags pointing at subcategories. + ''' deletion_reason_remove = { 'ar':u'روبوت: التصنيف تم الاستغناء عنه', @@ -683,13 +759,17 @@ else: reason = pywikibot.translate(self.site, self.deletion_reason_remove) talkPage = self.cat.toggleTalkPage() - self.cat.delete(reason, not self.batchMode) + try: + self.cat.delete(reason, not self.batchMode) + except pywikibot.NoUsername: + pywikibot.output(u'You're not setup sysop info, category will not delete.' % self.cat.site()) + return if (talkPage.exists()): talkPage.delete(reason=reason, prompt=not self.batchMode)
+ class CategoryTidyRobot: - """ - Script to help a human to tidy up a category by moving its articles into + """Script to help a human to tidy up a category by moving its articles into subcategories
Specify the category name on the command line. The program will pick up the @@ -710,13 +790,14 @@ Important: * this bot is written to work with the MonoBook skin, so make sure your bot account uses this skin + """ def __init__(self, catTitle, catDB): self.catTitle = catTitle self.catDB = catDB self.site = pywikibot.getSite() self.editSummary = pywikibot.translate(self.site, msg_change)\ - % {'oldcat':cat.title(), 'newcat':u''} + % {'oldcat':self.catTitle, 'newcat':u''}
def move_to_category(self, article, original_cat, current_cat): ''' @@ -835,9 +916,11 @@ else: preloadingGen = pagegenerators.PreloadingGenerator(iter(articles)) for article in preloadingGen: - pywikibot.output(u'\n===================================================================') + pywikibot.output('') + pywikibot.output(u'=' * 67) self.move_to_category(article, cat, cat)
+ class CategoryTreeRobot: ''' Robot to create tree overviews of the category structure. @@ -860,6 +943,7 @@ self.filename = filename # TODO: make maxDepth changeable with a parameter or config file entry self.maxDepth = maxDepth + self.site = pywikibot.getSite()
def treeview(self, cat, currentDepth = 0, parent = None): ''' @@ -919,7 +1003,7 @@ # create a list of wiki links to the supercategories supercat_names.append('[[:%s|%s]]' % (supercats[i].title(), supercats[i].title().split(':', 1)[1])) # print this list, separated with commas, using translations given in also_in_cats - result += ' ' + pywikibot.translate(pywikibot.getSite(), also_in_cats) % ', '.join(supercat_names) + result += ' ' + pywikibot.translate(self.site, also_in_cats) % ', '.join(supercat_names) result += '\n' if currentDepth < self.maxDepth: for subcat in self.catDB.getSubcats(cat): @@ -932,12 +1016,13 @@ return result
def run(self): - """ - Prints the multi-line string generated by treeview or saves it to a file. + """Prints the multi-line string generated by treeview or saves it to a + file.
Parameters: * catTitle - the title of the category which will be the tree's root * maxDepth - the limit beyond which no subcategories will be listed + """ cat = catlib.Category(pywikibot.Link('Category:' + self.catTitle)) tree = self.treeview(cat) @@ -979,6 +1064,7 @@ action = None sort_by_last_name = False restore = False + create_pages = False for arg in pywikibot.handleArgs(*args): if genFactory.handleArg(arg): continue @@ -1030,6 +1116,8 @@ talkPages = True elif arg == '-recurse': recurse = True + elif arg == '-create': + create_pages = True
gen = genFactory.getCombinedGenerator() if action == 'add': @@ -1040,11 +1128,12 @@ # The preloading generator is responsible for downloading multiple # pages from the wiki simultaneously. gen = pagegenerators.PreloadingGenerator(gen) - add_category(sort_by_last_name) + bot = AddCategory(gen, sort_by_last_name, create_pages, editSummary) + bot.run() elif action == 'remove': if (fromGiven == False): oldCatTitle = pywikibot.input( - u'Please enter the name of the category that should be removed:') +u'Please enter the name of the category that should be removed:') bot = CategoryRemoveRobot(oldCatTitle, batchMode, editSummary, useSummaryForDeletion, inPlace=inPlace) bot.run() @@ -1066,8 +1155,8 @@ catTitle = pywikibot.input( u'For which category do you want to create a tree view?') filename = pywikibot.input( - u'Please enter the name of the file where the tree should be saved,\n' - u'or press enter to simply show the tree:') + u'Please enter the name of the file where the tree should be saved,\n' + u'or press enter to simply show the tree:') bot = CategoryTreeRobot(catTitle, catDB, filename) bot.run() elif action == 'listify':
pywikipedia-svn@lists.wikimedia.org