[Pywikipedia-l] SVN: [6229] branches/rewrite/pywikibot/scripts/category.py

russblau at svn.wikimedia.org russblau at svn.wikimedia.org
Thu Jan 8 16:13:30 UTC 2009


Revision: 6229
Author:   russblau
Date:     2009-01-08 16:13:30 +0000 (Thu, 08 Jan 2009)

Log Message:
-----------
Branch for conversion to new framework.

Added Paths:
-----------
    branches/rewrite/pywikibot/scripts/category.py

Copied: branches/rewrite/pywikibot/scripts/category.py (from rev 6214, trunk/pywikipedia/category.py)
===================================================================
--- branches/rewrite/pywikibot/scripts/category.py	                        (rev 0)
+++ branches/rewrite/pywikibot/scripts/category.py	2009-01-08 16:13:30 UTC (rev 6229)
@@ -0,0 +1,999 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+Scripts to manage categories.
+
+Syntax: python category.py action [-option]
+
+where action can be one of these:
+ * add         - mass-add a category to a list of pages
+ * remove      - remove category tag from all pages in a category
+ * move        - move all pages in a category to another category
+ * tidy        - tidy up a category by moving its articles into subcategories
+ * tree        - show a tree of subcategories of a given category
+ * listify     - make a list of all of the articles that are in a category
+
+and option can be one of these:
+ * -person     - sort persons by their last name (for action 'add')
+ * -rebuild    - reset the database
+ * -from:      - The category to move from (for the move option)
+                 Also, the category to remove from in the remove option
+                 Also, the category to make a list of in the listify option
+ * -to:        - The category to move to (for the move option)
+               - Also, the name of the list to make in the listify option
+         NOTE: If the category names have spaces in them you may need to use
+         a special syntax in your shell so that the names aren't treated as
+         separate parameters.  For instance, in BASH, use single quotes,
+         e.g. -from:'Polar bears'
+ * -batch      - Don't prompt to delete emptied categories (do it
+                 automatically).
+ * -summary:   - Pick a custom edit summary for the bot.
+ * -inplace    - Use this flag to change categories in place rather than
+                 rearranging them.
+ * -nodelsum   - An option for remove, this specifies not to use the custom
+                 edit summary as the deletion reason.  Instead, it uses the
+                 default deletion reason for the language, which is "Category
+                 was disbanded" in English.
+ * -overwrite  - An option for listify, this overwrites the current page with
+                 the list even if something is already there.
+ * -showimages - An option for listify, this displays images rather than
+                 linking them in the list.
+ * -talkpages  - An option for listify, this outputs the links to talk pages
+                 of the pages to be listified in addition to the pages
+                 themselves.
+ * -recurse    - Recurse through all subcategories of categories.
+ * -match      - Only work on pages whose titles match the given regex (for
+                 move and remove actions).
+
+If action is "add", the following options are supported:
+
+&params;
+
+For the actions tidy and tree, the bot will store the category structure
+locally in category.dump. This saves time and server load, but if it uses
+these data later, they may be outdated; use the -rebuild parameter in this
+case.
+
+For example, to create a new category from a list of persons, type:
+
+  python category.py add -person
+
+and follow the on-screen instructions.
+
+Or to do it all from the command-line, use the following syntax:
+
+  python category.py move -from:US -to:'United States'
+
+This will move all pages in the category US to the category United States.
+
+"""
+
+#
+# (C) Rob W.W. Hooft, 2004
+# (C) Daniel Herding, 2004
+# (C) Anreas J Schwab, 2007
+#
+__version__ = '$Id$'
+#
+# Distributed under the terms of the MIT license.
+#
+import os, re, sys, pickle, bz2
+import wikipedia, catlib, config, pagegenerators
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+    '&params;': pagegenerators.parameterHelp
+}
+# Summary messages
+msg_add={
+    'ar':u'روبوت: إضافة [[تصنيف:%s]]',
+    'bat-smg':u'Robots: Pridedama [[Kateguorėjė:%s]]',
+    'ca':u'Robot: Afegint [[Categoria:%s]]',
+    'da':u'Robot: Tilføjer [[Kategori:%s]]',
+    'de':u'Bot: Ergänze [[Kategorie:%s]]',
+    'en':u'Robot: Adding [[Category:%s]]',
+    'es':u'Bot: Añadida [[Categoría:%s]]',
+    'id':u'Bot: Menambahkan [[Kategori:%s]]',
+    'fi':u'Botti lisäsi luokkaan [[Luokka:%s]]',
+    'fr':u'Robot : ajoute [[Catégorie:%s]]',
+    'he':u'בוט: מוסיף [[קטגוריה:%s]]',
+    'ia':u'Robot: Addition de [[Categoria:%s]]',
+    'is':u'Vélmenni: Bæti við [[Flokkur:%s]]',
+    'it':u'Bot: Aggiungo [[Categoria:%s]]',
+    'ja':u'ロボットによる: カテゴリ追加 [[Category:%s]]',
+    'kk':u'Бот: [[Санат:%s]] үстеді',
+    'ko': u'로봇: [[분류:%s]] 추가',
+    'ksh':u'Bot: [[Saachjropp:%s]] erinjedonn',
+    'lb': u'Bot: Derbäi setzen [[Kategorie:%s]]',
+    'lt':u'robotas: Pridedama [[Kategorija:%s]]',
+    'nds':u'Kat-Bot: [[Kategorie:%s]] rin',
+    'nds-nl':u'bot: [[kattegerie:%s]] derbie edaon',
+    'nl':u'Bot: [[Categorie:%s]] toegevoegd',
+    'no':u'Robot: Legger til [[Kategori:%s]]',
+    'nn':u'robot: la til [[Kategori:%s]]',
+    'pl':u'Robot dodaje [[Kategoria:%s]]',
+    'pt':u'Bot: Adicionando [[Categoria:%s]]',
+    'ru':u'Робот: добавление [[Категория:%s]]',
+    'sr':u'Бот: Додаје [[Категорија:%s]]',
+    'sv':u'Robot: Lägger till [[Kategori:%s]]',
+    'zh':u'機器人:新增目錄 [[Category:%s]]',
+    }
+
+msg_change={
+    'ar':u'روبوت: تغيير %s',
+    'ca':u'Robot: Canviant %s',
+    'da':u'Robot: Ændrer %s',
+    'de':u'Bot: Ändere %s',
+    'en':u'Robot: Changing %s',
+    'es':u'Bot: Cambiada %s',
+    'id':u'Bot: Mengganti %s',
+    'fi':u'Botti muutti luokan %s',
+    'fr':u'Robot : modifie [[%s]]',
+    'he':u'בוט: משנה %s',
+    'ia':u'Robot: Modification de %s',
+    'is':u'Vélmenni: Breyti flokknum [[%s]]',
+    'it':u'Bot: Modifico %s',
+    'lt':u'robotas: Keičiama %s',
+    'ja':u'ロボットによる: カテゴリ変更 [[%s]]',
+    'kk':u'Бот: %s дегенді түзетті',
+    'ko': u'로봇: %s 수정',
+    'ksh':u'Bot: %s ußjewääßelt',
+    'nds':u'Kat-Bot: %s utwesselt',
+    'nds-nl':u'bot: wieziging %s',
+    'nl':u'Bot: Wijziging %s',
+    'no':u'Robot: Endrer %s',
+    'nn':u'robot: endra %s',
+    'pt':u'Bot: Modificando [[%s]]',
+    'pl':u'Robot przenosi %s',
+    'ru':u'Робот: изменение %s',
+    'sr':u'Бот: Измена категорије %s',
+    'sv':u'Robot: Ändrar %s',
+    'zh':u'機器人:變更目錄 [[%s]]',
+    }
+
+deletion_reason_move = {
+    'ar':u'روبوت: التصنيف نقل إلى [[:تصنيف:%s|%s]]',
+    'bat-smg':u'Robots: Kateguorėjė bova parvadėnta i [[:Kateguorėjė:%s|%s]]',
+    'ca':u'Robot: La categoria s\'ha mogut a [[:Categoria:%s|%s]]',
+    'da':u'Robot: Kategori flyttet til [[:Category:%s|%s]]',
+    'de':u'Bot: Kategorie wurde nach [[:Category:%s|%s]] verschoben',
+    'en':u'Robot: Category was moved to [[:Category:%s|%s]]',
+    'es':u'Robot: La categoría ha sido movida a [[:Category:%s|%s]]',
+    'fi':u'Botti siirsi luokan nimelle [[:Luokka:%s|%s]]',
+    'fr':u'Robot : catégorie déplacée sur [[:Category:%s|%s]]',
+    'he':u'בוט: הקטגוריה הועברה לשם [[:קטגוריה:%s|%s]]',
+    'ia':u'Robot: Categoria transferite a [[:Category:%s|%s]]',
+    'id':u'Bot: Kategori dipindahkan ke [[:Category:%s|%s]]',
+    'it':u'Bot: La categoria è stata sostituita da [[:Categoria:%s|%s]]',
+    'ja':u'ロボットによる: カテゴリ [[:Category:%s]]へ移動',
+    'kk':u'Бот: Санат [[:Санат:%s|%s]] дегенге жылжытылды',
+    'ko': u'로봇: 분류가 [[:분류:%s|%s]]로 옮겨짐',
+    'ksh':u'Bot: Saachjropp noh [[:Category:%s|%s]] jeschovve',
+    'lb': u'Bot: Kategorie gouf gréckelt: Nei [[:Kategorie:%s|%s]]',
+    'lt':u'robotas: Kategorija pervadinta į [[:Category:%s|%s]]',
+    'nds':u'Kat-Bot: Kategorie na [[:Category:%s|%s]] schaven',
+    'nds-nl':u'Bot: kattegerie is herneumd naor [[:Kattegerie:%s|%s]]',
+    'nl':u'Bot: Categorie is hernoemd naar [[:Category:%s|%s]]',
+    'no':u'Robot: Kategorien ble flyttet til [[:Category:%s|%s]]',
+    'nn':u'robot: kategorien blei flytta til [[:Kategori:%s|%s]]',
+    'pt':u'Bot: Categoria [[:Category:%s|%s]] foi movida',
+    'pl':u'Robot przenosi kategorię do [[:Category:%s|%s]]',
+    'ru':u'Робот: категория переименована в [[:Категория:%s|%s]]',
+    'sr':u'Бот: Категорија премештена у [[:Category:%s|%s]]',
+    'sv':u'Robot: Kategori flyttades till [[:Category:%s|%s]]',
+    'zh':u'機器人:移動目錄至 [[:Category:%s|%s]]',
+    }
+
+cfd_templates = {
+    'en':['cfd', 'cfr', 'cfru', 'cfr-speedy', 'cfm', 'cfdu'],
+    'fi':['roskaa', 'poistettava', 'korjattava/nimi', u'yhdistettäväLuokka'],
+    'he':[u'הצבעת מחיקה', u'למחוק'],
+    }
+
+class CategoryDatabase:
+    '''
+    This is a temporary knowledge base saving for each category the contained
+    subcategories and articles, so that category pages do not need to
+    be loaded over and over again
+    '''
+    def __init__(self, rebuild = False, filename = 'category.dump.bz2'):
+        if rebuild:
+            self.rebuild()
+        else:
+            try:
+                if not os.path.isabs(filename):
+                    filename = wikipedia.config.datafilepath(filename)
+                f = bz2.BZ2File(filename, 'r')
+                wikipedia.output(u'Reading dump from %s'
+                                 % wikipedia.config.shortpath(filename))
+                databases = pickle.load(f)
+                f.close()
+                # keys are categories, values are 2-tuples with lists as entries.
+                self.catContentDB = databases['catContentDB']
+                # like the above, but for supercategories
+                self.superclassDB = databases['superclassDB']
+                del databases
+            except:
+                # If something goes wrong, just rebuild the database
+                self.rebuild()
+
+    def rebuild(self):
+        self.catContentDB={}
+        self.superclassDB={}
+
+    def getSubcats(self, supercat):
+        '''
+        For a given supercategory, return a list of Categorys for all its
+        subcategories.
+        Saves this list in a temporary database so that it won't be loaded from the
+        server next time it's required.
+        '''
+        # if we already know which subcategories exist here
+        if self.catContentDB.has_key(supercat):
+            return self.catContentDB[supercat][0]
+        else:
+            subcatlist = supercat.subcategoriesList()
+            articlelist = supercat.articlesList()
+            # add to dictionary
+            self.catContentDB[supercat] = (subcatlist, articlelist)
+            return subcatlist
+
+    def getArticles(self, cat):
+        '''
+        For a given category, return a list of Pages for all its articles.
+        Saves this list in a temporary database so that it won't be loaded from the
+        server next time it's required.
+        '''
+        # if we already know which articles exist here
+        if self.catContentDB.has_key(cat):
+            return self.catContentDB[cat][1]
+        else:
+            subcatlist = cat.subcategoriesList()
+            articlelist = cat.articlesList()
+            # add to dictionary
+            self.catContentDB[cat] = (subcatlist, articlelist)
+            return articlelist
+
+    def getSupercats(self, subcat):
+        # if we already know which subcategories exist here
+        if self.superclassDB.has_key(subcat):
+            return self.superclassDB[subcat]
+        else:
+            supercatlist = subcat.supercategoriesList()
+            # add to dictionary
+            self.superclassDB[subcat] = supercatlist
+            return supercatlist
+
+    def dump(self, filename = 'category.dump.bz2'):
+        '''
+        Saves the contents of the dictionaries superclassDB and catContentDB to disk.
+        '''
+        if not os.path.isabs(filename):
+            filename = wikipedia.config.datafilepath(filename)
+        wikipedia.output(u'Dumping to %s, please wait...'
+                         % wikipedia.config.shortpath(filename))
+        f = bz2.BZ2File(filename, 'w')
+        databases = {
+            'catContentDB': self.catContentDB,
+            'superclassDB': self.superclassDB
+        }
+        # store dump to disk in binary format
+        try:
+            pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL)
+        except pickle.PicklingError:
+            pass
+        f.close()
+
+def sorted_by_last_name(catlink, pagelink):
+        '''Return a Category with key that sorts persons by their last names.
+
+        Parameters: catlink - The Category to be linked
+                    pagelink - the Page to be placed in the category
+
+        Trailing words in brackets will be removed. Example: If
+        category_name is 'Author' and pl is a Page to [[Alexandre Dumas
+        (senior)]], this function will return this Category:
+        [[Category:Author|Dumas, Alexandre]]
+
+        '''
+        page_name = pagelink.title()
+        site = pagelink.site()
+        # regular expression that matches a name followed by a space and
+        # disambiguation brackets. Group 1 is the name without the rest.
+        bracketsR = re.compile('(.*) \(.+?\)')
+        match_object = bracketsR.match(page_name)
+        if match_object:
+            page_name = match_object.group(1)
+        split_string = page_name.split(' ')
+        if len(split_string) > 1:
+            # pull last part of the name to the beginning, and append the
+            # rest after a comma; e.g., "John von Neumann" becomes
+            # "Neumann, John von"
+            sorted_key = split_string[-1] + ', ' + ' '.join(split_string[:-1])
+            # give explicit sort key
+            return wikipedia.Page(site, catlink.title() + '|' + sorted_key)
+        else:
+            return wikipedia.Page(site, catlink.title())
+
+def add_category(sort_by_last_name = False):
+    '''A robot to mass-add a category to a list of pages.'''
+    site = wikipedia.getSite()
+    if gen:
+        newcatTitle = wikipedia.input(
+            u'Category to add (do not give namespace):')
+        if not site.nocapitalize:
+            newcatTitle = newcatTitle[:1].capitalize() + newcatTitle[1:]
+
+        # set edit summary message
+        wikipedia.setAction(wikipedia.translate(site, msg_add) % newcatTitle)
+
+        cat_namespace = site.category_namespaces()[0]
+
+        answer = ''
+        for page in gen:
+            if answer != 'a':
+                answer = ''
+
+            while answer not in ('y','n','a'):
+                answer = wikipedia.input(u'%s [y/n/a(ll)]:' % (page.aslink()))
+                if answer == 'a':
+                    confirm = ''
+                    while confirm not in ('y','n'):
+                        confirm = wikipedia.input(u"""\
+This should be used if and only if you are sure that your links are correct!
+Are you sure? [y/n]:""")
+                    if confirm == 'n':
+                        answer = ''
+
+            if answer == 'y' or answer == 'a':
+                try:
+                    text = page.get()
+                except wikipedia.NoPage:
+                    wikipedia.output(u"%s doesn't exist yet. Ignoring."
+                                     % (page.title()))
+                    pass
+                except wikipedia.IsRedirectPage, arg:
+                    redirTarget = wikipedia.Page(site, arg.args[0])
+                    wikipedia.output(
+                        u"WARNING: %s is redirect to %s. Ignoring."
+                        % (page.title(), redirTarget.title()))
+                else:
+                    cats = page.categories()
+                    # Show the title of the page we're working on.
+                    # Highlight the title in purple.
+                    wikipedia.output(
+                        u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+                        % page.title())
+                    wikipedia.output(u"Current categories:")
+                    for cat in cats:
+                        wikipedia.output(u"* %s" % cat.title())
+                    catpl = wikipedia.Page(site,
+                                           cat_namespace + ':' + newcatTitle)
+                    if sort_by_last_name:
+                        catpl = sorted_by_last_name(catpl, page)
+                    if catpl in cats:
+                        wikipedia.output(u"%s is already in %s."
+                                         % (page.title(), catpl.title()))
+                    else:
+                        wikipedia.output(u'Adding %s' % catpl.aslink())
+                        cats.append(catpl)
+                        text = page.get()
+                        text = wikipedia.replaceCategoryLinks(text, cats)
+                        try:
+                            page.put(text)
+                        except wikipedia.EditConflict:
+                            wikipedia.output(
+                                u'Skipping %s because of edit conflict'
+                                % (page.title()))
+
+class CategoryMoveRobot:
+    """Robot to move pages from one category to another."""
+    def __init__(self, oldCatTitle, newCatTitle, batchMode=False,
+                 editSummary='', inPlace=False, moveCatPage=True,
+                 deleteEmptySourceCat=True, titleRegex=None):
+        site = wikipedia.getSite()
+        self.editSummary = editSummary
+        self.oldCat = catlib.Category(site, 'Category:' + oldCatTitle)
+        self.newCatTitle = newCatTitle
+        self.inPlace = inPlace
+        self.moveCatPage = moveCatPage
+        self.batchMode = batchMode
+        self.deleteEmptySourceCat = deleteEmptySourceCat
+        self.titleRegex = titleRegex
+        # set edit summary message
+        if self.editSummary:
+            wikipedia.setAction(self.editSummary)
+        else:
+            wikipedia.setAction(wikipedia.translate(site, msg_change)
+                                % self.oldCat.title())
+
+    def run(self):
+        site = wikipedia.getSite()
+        newCat = catlib.Category(site, 'Category:' + self.newCatTitle)
+
+        # Copy the category contents to the new category page
+        copied = False
+        oldMovedTalk = None
+        if self.oldCat.exists() and self.moveCatPage:
+            copied = self.oldCat.copyAndKeep(
+                            self.newCatTitle,
+                            wikipedia.translate(site, cfd_templates))
+            # Also move the talk page
+            if copied:
+                reason = wikipedia.translate(site, deletion_reason_move) \
+                         % (self.newCatTitle, self.newCatTitle)
+                oldTalk = self.oldCat.toggleTalkPage()
+                if oldTalk.exists():
+                    newTalkTitle = newCat.toggleTalkPage().title()
+                    try:
+                        talkMoved = oldTalk.move(newTalkTitle, reason)
+                    except (wikipedia.NoPage, wikipedia.PageNotSaved), e:
+                        #in order :
+                        #Source talk does not exist, or
+                        #Target talk already exists
+                        wikipedia.output(e.message)
+                    else:
+                        if talkMoved:
+                            oldMovedTalk = oldTalk
+
+        # Move articles
+        gen = pagegenerators.CategorizedPageGenerator(self.oldCat,
+                                                      recurse=False)
+        preloadingGen = pagegenerators.PreloadingGenerator(gen)
+        for article in preloadingGen:
+            if not self.titleRegex or re.search(self.titleRegex,
+                                                article.title()):
+                catlib.change_category(article, self.oldCat, newCat,
+                                       inPlace=self.inPlace)
+
+        # Move subcategories
+        gen = pagegenerators.SubCategoriesPageGenerator(self.oldCat,
+                                                        recurse=False)
+        preloadingGen = pagegenerators.PreloadingGenerator(gen)
+        for subcategory in preloadingGen:
+            if not self.titleRegex or re.search(self.titleRegex,
+                                                subcategory.title()):
+                catlib.change_category(subcategory, self.oldCat, newCat,
+                                       inPlace=self.inPlace)
+
+        # Delete the old category and its moved talk page
+        if copied and self.deleteEmptySourceCat == True:
+            if self.oldCat.isEmpty():
+                reason = wikipedia.translate(site, deletion_reason_move) \
+                         % (self.newCatTitle, self.newCatTitle)
+                confirm = not self.batchMode
+                self.oldCat.delete(reason, confirm, mark = True)
+                if oldMovedTalk is not None:
+                    oldMovedTalk.delete(reason, confirm, mark = True)
+            else:
+                wikipedia.output('Couldn\'t delete %s - not empty.'
+                                 % self.oldCat.title())
+
+
+class CategoryListifyRobot:
+    '''
+    Creates a list containing all of the members in a category.
+    '''
+    listify_msg={
+        'ar':u'روبوت: عرض من %s (%d مدخلة)',
+        'ca':u'Robot: Llistant de %s (%d entrades)',
+        'en':u'Robot: Listifying from %s (%d entries)',
+        'fi':u'Botti listasi luokan %s (%d jäsentä)',
+        'he':u'בוט: יוצר רשימה מהקטגוריה %s (%d דפים)',
+        'kk':u'Бот: %s дегеннен (%d буын) тізімдеді',
+        'nds-nl':u'Bot: lieste van %s (%d pagina\'s)',
+        'nl':u'Bot: Lijst van %s (%d pagina\'s)',
+        'sv':u'Robot: Skapar en lista från %s (%d)',
+        'pt':u'Bot: Listando de %s (%d entradas)',
+        'zh':u'機器人: 從%s提取列表(%d個項目)',
+    }
+
+    def __init__(self, catTitle, listTitle, editSummary, overwrite = False, showImages = False, subCats = False, talkPages = False, recurse = False):
+        self.editSummary = editSummary
+        self.overwrite = overwrite
+        self.showImages = showImages
+        self.cat = catlib.Category(wikipedia.getSite(), 'Category:' + catTitle)
+        self.list = wikipedia.Page(wikipedia.getSite(), listTitle)
+        self.subCats = subCats
+        self.talkPages = talkPages
+        self.recurse = recurse
+
+    def run(self):
+        listOfArticles = self.cat.articlesList(recurse = self.recurse)
+        if self.subCats:
+            listOfArticles += self.cat.subcategoriesList()
+        if self.editSummary:
+            wikipedia.setAction(self.editSummary)
+        else:
+            wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.listify_msg) % (self.cat.title(), len(listOfArticles)))
+
+        listString = ""
+        for article in listOfArticles:
+            if (not article.isImage() or self.showImages) and not article.isCategory():
+                if self.talkPages and not article.isTalkPage():
+                    listString = listString + "*[[%s]] -- [[%s|talk]]\n" % (article.title(), article.toggleTalkPage().title())
+                else:
+                    listString = listString + "*[[%s]]\n" % article.title()
+            else:
+                if self.talkPages and not article.isTalkPage():
+                    listString = listString + "*[[:%s]] -- [[%s|talk]]\n" % (article.title(), article.toggleTalkPage().title())
+                else:
+                    listString = listString + "*[[:%s]]\n" % article.title()
+        if self.list.exists() and not self.overwrite:
+            wikipedia.output(u'Page %s already exists, aborting.' % self.list.title())
+        else:
+            self.list.put(listString)
+
+class CategoryRemoveRobot:
+    '''
+    Removes the category tag from all pages in a given category and from the
+    category pages of all subcategories, without prompting.
+    Does not remove category tags pointing at subcategories.
+    '''
+    deletion_reason_remove = {
+        'ar':u'روبوت: التصنيف تم الاستغناء عنه',
+        'ca':u'Robot: La categoria s\'ha eliminat',
+        'da':u'Robot: Kategorien blev opløst',
+        'de':u'Bot: Kategorie wurde aufgelöst',
+        'en':u'Robot: Category was disbanded',
+        'es':u'Robot: La categoría ha sido eliminada',
+        'fi':u'Botti tyhjensi luokan',
+        'he':u'בוט: הקטגוריה פורקה',
+        'ia':u'Robot: Categoria esseva dissolvite',
+        'kk':u'Бот: Санат тарқатылды',
+        'ksh':u'Bot: de Saachjropp is nu opjelööß',
+        'nds':u'Kat-Bot: Kategorie is nu oplööst',
+        'nds-nl':u'Bot: kattegerie besteet neet meer',
+        'nl':u'Bot: Categorie is opgeheven',
+        'no':u'Robot: Kategorien ble oppløst',
+        'nn':u'robot: kategorien blei løyst opp',
+        'pt':u'Bot: Categoria foi unida',
+        'ru':u'Робот: категория расформирована',
+        'sv':u'Robot: Kategorin upplöstes',
+        'zh':u'機器人:本目錄已解散',
+    }
+
+    msg_remove={
+        'ar':u'روبوت: إزالة من %s',
+        'bat-smg':u'Robots: Trėnama ėš  %s',
+        'ca':u'Robot: Eliminant de %s',
+        'da':u'Robot: Fjerner fra %s',
+        'de':u'Bot: Entferne aus %s',
+        'en':u'Robot: Removing from %s',
+        'es':u'Bot: Eliminada de la %s',
+        'fi':u'Botti poisti luokasta %s',
+        'he':u'בוט: מסיר את הדף מהקטגוריה %s',
+        'ia':u'Robot: Eliminate de %s',
+        'is':u'Vélmenni: Fjarlægi [[Flokkur:%s]]',
+        'kk':u'Бот: %s дегеннен аластатты',
+        'ksh':u'Bot: uß de %s ußjedraare',
+        'lb': u'Bot: Ewech huele vun %s',
+        'nds':u'Kat-Bot: rut ut %s',
+        'nds-nl':u'Bot: vort-ehaold uut %s',
+        'nl':u'Bot: Verwijderd uit %s',
+        'no':u'Robot: Fjerner ifra %s',
+        'nn':u'robot: fjerna ifrå %s',
+        'pt':u'Bot: Removendo [[Categoria:%s]]',
+        'ru':u'Робот: исключение из [[Категория:%s]]',
+        'sr':u'Бот: Уклањање из категорије [[Категорија:%s|%s]]',
+        'sv':u'Robot: Tar bort från %s',
+        'zh':u'機器人:移除目錄%s',
+    }
+
+    def __init__(self, catTitle, batchMode = False, editSummary = '', useSummaryForDeletion = True, titleRegex = None, inPlace = False):
+        self.editSummary = editSummary
+        self.cat = catlib.Category(wikipedia.getSite(), 'Category:' + catTitle)
+        # get edit summary message
+        self.useSummaryForDeletion = useSummaryForDeletion
+        self.batchMode = batchMode
+        self.titleRegex = titleRegex
+        self.inPlace = inPlace
+        if self.editSummary:
+            wikipedia.setAction(self.editSummary)
+        else:
+            wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg_remove) % self.cat.title())
+
+    def run(self):
+        articles = self.cat.articlesList(recurse = 0)
+        if len(articles) == 0:
+            wikipedia.output(u'There are no articles in category %s' % self.cat.title())
+        else:
+            for article in articles:
+                if not self.titleRegex or re.search(self.titleRegex,article.title()):
+                    catlib.change_category(article, self.cat, None, inPlace = self.inPlace)
+        # Also removes the category tag from subcategories' pages
+        subcategories = self.cat.subcategoriesList(recurse = 0)
+        if len(subcategories) == 0:
+            wikipedia.output(u'There are no subcategories in category %s' % self.cat.title())
+        else:
+            for subcategory in subcategories:
+                catlib.change_category(subcategory, self.cat, None, inPlace = self.inPlace)
+        # Deletes the category page
+        if self.cat.exists() and self.cat.isEmpty():
+            if self.useSummaryForDeletion and self.editSummary:
+                reason = self.editSummary
+            else:
+                reason = wikipedia.translate(wikipedia.getSite(), self.deletion_reason_remove)
+            talkPage = self.cat.toggleTalkPage()
+            self.cat.delete(reason, not self.batchMode)
+            if (talkPage.exists()):
+                talkPage.delete(reason=reason, prompt=not self.batchMode)
+
+class CategoryTidyRobot:
+    """
+    Script to help a human to tidy up a category by moving its articles into
+    subcategories
+
+    Specify the category name on the command line. The program will pick up the
+    page, and look for all subcategories and supercategories, and show them with
+    a number adjacent to them. It will then automatically loop over all pages
+    in the category. It will ask you to type the number of the appropriate
+    replacement, and perform the change robotically.
+
+    If you don't want to move the article to a subcategory or supercategory, but to
+    another category, you can use the 'j' (jump) command.
+
+    Typing 's' will leave the complete page unchanged.
+
+    Typing '?' will show you the first few bytes of the current page, helping
+    you to find out what the article is about and in which other categories it
+    currently is.
+
+    Important:
+     * this bot is written to work with the MonoBook skin, so make sure your bot
+       account uses this skin
+    """
+    def __init__(self, catTitle, catDB):
+        self.catTitle = catTitle
+        self.catDB = catDB
+
+    def move_to_category(self, article, original_cat, current_cat):
+        '''
+        Given an article which is in category original_cat, ask the user if
+        it should be moved to one of original_cat's subcategories.
+        Recursively run through subcategories' subcategories.
+        NOTE: current_cat is only used for internal recursion. You should
+        always use current_cat = original_cat.
+        '''
+        wikipedia.output(u'')
+        # Show the title of the page where the link was found.
+        # Highlight the title in purple.
+        wikipedia.output(u'Treating page \03{lightpurple}%s\03{default}, currently in \03{lightpurple}%s\03{default}' % (article.title(), current_cat.title()))
+
+        # Determine a reasonable amount of context to print
+        try:
+            full_text = article.get(get_redirect = True)
+        except wikipedia.NoPage:
+            wikipedia.output(u'Page %s not found.' % article.title())
+            return
+        try:
+            contextLength = full_text.index('\n\n')
+        except ValueError: # substring not found
+            contextLength = 500
+        if full_text.startswith(u'[['): # probably an image
+            # Add extra paragraph.
+            contextLength = full_text.find('\n\n', contextLength+2)
+        if contextLength > 1000 or contextLength < 0:
+            contextLength = 500
+        print
+        wikipedia.output(full_text[:contextLength])
+        print
+
+        subcatlist = self.catDB.getSubcats(current_cat)
+        supercatlist = self.catDB.getSupercats(current_cat)
+        print
+        if len(subcatlist) == 0:
+            print 'This category has no subcategories.'
+            print
+        if len(supercatlist) == 0:
+            print 'This category has no supercategories.'
+            print
+        # show subcategories as possible choices (with numbers)
+        for i in range(len(supercatlist)):
+            # layout: we don't expect a cat to have more than 10 supercats
+            wikipedia.output(u'u%d - Move up to %s' % (i, supercatlist[i].title()))
+        for i in range(len(subcatlist)):
+            # layout: we don't expect a cat to have more than 100 subcats
+            wikipedia.output(u'%2d - Move down to %s' % (i, subcatlist[i].title()))
+        print ' j - Jump to another category'
+        print ' s - Skip this article'
+        print ' r - Remove this category tag'
+        print ' ? - Print first part of the page (longer and longer)'
+        wikipedia.output(u'Enter - Save category as %s' % current_cat.title())
+
+        flag = False
+        while not flag:
+            print ''
+            choice=wikipedia.input(u'Choice:')
+            if choice in ['s', 'S']:
+                flag = True
+            elif choice == '':
+                wikipedia.output(u'Saving category as %s' % current_cat.title())
+                if current_cat == original_cat:
+                    print 'No changes necessary.'
+                else:
+                    catlib.change_category(article, original_cat, current_cat)
+                flag = True
+            elif choice in ['j', 'J']:
+                newCatTitle = wikipedia.input(u'Please enter the category the article should be moved to:')
+                newCat = catlib.Category(wikipedia.getSite(), 'Category:' + newCatTitle)
+                # recurse into chosen category
+                self.move_to_category(article, original_cat, newCat)
+                flag = True
+            elif choice in ['r', 'R']:
+                # remove the category tag
+                catlib.change_category(article, original_cat, None)
+                flag = True
+            elif choice == '?':
+                contextLength += 500
+                print
+                wikipedia.output(full_text[:contextLength])
+                print
+
+                # if categories possibly weren't visible, show them additionally
+                # (maybe this should always be shown?)
+                if len(full_text) > contextLength:
+                    print ''
+                    print 'Original categories: '
+                    for cat in article.categories():
+                        wikipedia.output(u'* %s' % cat.title())
+            elif choice[0] == 'u':
+                try:
+                    choice=int(choice[1:])
+                except ValueError:
+                    # user pressed an unknown command. Prompt him again.
+                    continue
+                self.move_to_category(article, original_cat, supercatlist[choice])
+                flag = True
+            else:
+                try:
+                    choice=int(choice)
+                except ValueError:
+                    # user pressed an unknown command. Prompt him again.
+                    continue
+                # recurse into subcategory
+                self.move_to_category(article, original_cat, subcatlist[choice])
+                flag = True
+
+    def run(self):
+        cat = catlib.Category(wikipedia.getSite(), 'Category:' + self.catTitle)
+
+        # get edit summary message
+        wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg_change) % cat.title())
+
+        articles = cat.articlesList(recurse = False)
+        if len(articles) == 0:
+            wikipedia.output(u'There are no articles in category ' + catTitle)
+        else:
+            preloadingGen = pagegenerators.PreloadingGenerator(iter(articles))
+            for article in preloadingGen:
+                wikipedia.output(u'\n===================================================================')
+                self.move_to_category(article, cat, cat)
+
+class CategoryTreeRobot:
+    '''
+    Robot to create tree overviews of the category structure.
+
+    Parameters:
+        * catTitle - The category which will be the tree's root.
+        * catDB    - A CategoryDatabase object
+        * maxDepth - The limit beyond which no subcategories will be listed.
+                     This also guarantees that loops in the category structure
+                     won't be a problem.
+        * filename - The textfile where the tree should be saved; None to print
+                     the tree to stdout.
+    '''
+
+    def __init__(self, catTitle, catDB, filename = None, maxDepth = 10):
+        self.catTitle = catTitle
+        self.catDB = catDB
+        if filename and not os.path.isabs(filename):
+            filename = wikipedia.config.datafilepath(filename)
+        self.filename = filename
+        # TODO: make maxDepth changeable with a parameter or config file entry
+        self.maxDepth = maxDepth
+
+    def treeview(self, cat, currentDepth = 0, parent = None):
+        '''
+        Returns a multi-line string which contains a tree view of all subcategories
+        of cat, up to level maxDepth. Recursively calls itself.
+
+        Parameters:
+            * cat - the Category of the node we're currently opening
+            * currentDepth - the current level in the tree (for recursion)
+            * parent - the Category of the category we're coming from
+        '''
+
+        # Translations to say that the current category is in more categories than
+        # the one we're coming from
+        also_in_cats = {
+            'ar': u'(أيضا في %s)',
+            'ca': u'(també a %s)',
+            'da': u'(også i %s)',
+            'de': u'(auch in %s)',
+            'en': u'(also in %s)',
+            'es': u'(también en %s)',
+            'fi': u'(myös luokassa %s)',
+            'fr': u'(également dans %s)',
+            'he': u'(גם בקטגוריות %s)',
+            'ia': u'(equalmente in %s)',
+            'is': u'(einnig í %s)',
+            'kk': u'(тағы да %s дегенде)',
+            'nds-nl': u'(oek in %s)',
+            'nl': u'(ook in %s)',
+            'no': u'(også i %s)',
+            'nn': u'(òg i %s)',
+            'pt': u'(também em %s)',
+            'ru': u'(также в %s)',
+            'sv': u'(också i %s)',
+            'ср': u'(такође у %s)',
+            'zh': u'(也在 %s)',
+        }
+
+        result = u'#' * currentDepth
+        result += '[[:%s|%s]]' % (cat.title(), cat.title().split(':', 1)[1])
+        result += ' (%d)' % len(self.catDB.getArticles(cat))
+        # We will remove an element of this array, but will need the original array
+        # later, so we create a shallow copy with [:]
+        supercats = self.catDB.getSupercats(cat)[:]
+        # Find out which other cats are supercats of the current cat
+        try:
+            supercats.remove(parent)
+        except:
+            pass
+        if supercats != []:
+            supercat_names = []
+            for i in range(len(supercats)):
+                # create a list of wiki links to the supercategories
+                supercat_names.append('[[:%s|%s]]' % (supercats[i].title(), supercats[i].title().split(':', 1)[1]))
+                # print this list, separated with commas, using translations given in also_in_cats
+            result += ' ' + wikipedia.translate(wikipedia.getSite(), also_in_cats) % ', '.join(supercat_names)
+        result += '\n'
+        if currentDepth < self.maxDepth:
+            for subcat in self.catDB.getSubcats(cat):
+                # recurse into subdirectories
+                result += self.treeview(subcat, currentDepth + 1, parent = cat)
+        else:
+            if self.catDB.getSubcats(cat) != []:
+                # show that there are more categories beyond the depth limit
+                result += '#' * (currentDepth + 1) + '[...]\n'
+        return result
+
+    def run(self):
+        """
+        Prints the multi-line string generated by treeview or saves it to a file.
+
+        Parameters:
+            * catTitle - the title of the category which will be the tree's root
+            * maxDepth - the limit beyond which no subcategories will be listed
+        """
+        cat = catlib.Category(wikipedia.getSite(), 'Category:' + self.catTitle)
+        tree = self.treeview(cat)
+        if self.filename:
+            wikipedia.output(u'Saving results in %s' % self.filename)
+            import codecs
+            f = codecs.open(self.filename, 'a', 'utf-8')
+            f.write(tree)
+            f.close()
+        else:
+            wikipedia.output(tree, toStdout = True)
+
+if __name__ == "__main__":
+    fromGiven = False
+    toGiven = False
+    batchMode = False
+    editSummary = ''
+    inPlace = False
+    overwrite = False
+    showImages = False
+    talkPages = False
+    recurse = False
+    titleRegex = None
+
+    # This factory is responsible for processing command line arguments
+    # that are also used by other scripts and that determine on which pages
+    # to work on.
+    genFactory = pagegenerators.GeneratorFactory()
+    # The generator gives the pages that should be worked upon.
+    gen = None
+
+    #If this is set to true then the custom edit summary given for removing
+    #categories from articles will also be used as the deletion reason.
+    useSummaryForDeletion = True
+    try:
+        catDB = CategoryDatabase()
+        action = None
+        sort_by_last_name = False
+        restore = False
+        for arg in wikipedia.handleArgs():
+            if arg == 'add':
+                action = 'add'
+            elif arg == 'remove':
+                action = 'remove'
+            elif arg == 'move':
+                action = 'move'
+            elif arg == 'tidy':
+                action = 'tidy'
+            elif arg == 'tree':
+                action = 'tree'
+            elif arg == 'listify':
+                action = 'listify'
+            elif arg == '-person':
+                sort_by_last_name = True
+            elif arg == '-rebuild':
+                catDB.rebuild()
+            elif arg.startswith('-from:'):
+                oldCatTitle = arg[len('-from:'):].replace('_', ' ')
+                fromGiven = True
+            elif arg.startswith('-to:'):
+                newCatTitle = arg[len('-to:'):].replace('_', ' ')
+                toGiven = True
+            elif arg == '-batch':
+                batchMode = True
+            elif arg == '-inplace':
+                inPlace = True
+            elif arg == '-delsum':
+                # This parameter is kept for historical reasons, as it was not previously the default option.
+                pass
+            elif arg == '-nodelsum':
+                useSummaryForDeletion = False
+            elif arg == '-overwrite':
+                overwrite = True
+            elif arg == '-showimages':
+                showImages = True
+            elif arg.startswith('-summary:'):
+                editSummary = arg[len('-summary:'):]
+            elif arg.startswith('-match'):
+                if len(arg) == len('-match'):
+                    titleRegex = wikipedia.input(u'Which regular expression should affected objects match?')
+                else:
+                    titleRegex = arg[len('-match:'):]
+            elif arg == '-talkpages':
+                talkPages = True
+            elif arg == '-recurse':
+                recurse = True
+            else:
+                gen = genFactory.handleArg(arg)
+
+        if action == 'add':
+            if not gen:
+			    gen = genFactory.handleArg('-links') #default for backwords compatibility
+			# The preloading generator is responsible for downloading multiple
+			# pages from the wiki simultaneously.
+            gen = pagegenerators.PreloadingGenerator(gen)
+            add_category(sort_by_last_name)
+        elif action == 'remove':
+            if (fromGiven == False):
+                oldCatTitle = wikipedia.input(u'Please enter the name of the category that should be removed:')
+            bot = CategoryRemoveRobot(oldCatTitle, batchMode, editSummary, useSummaryForDeletion, inPlace = inPlace)
+            bot.run()
+        elif action == 'move':
+            if (fromGiven == False):
+                oldCatTitle = wikipedia.input(u'Please enter the old name of the category:')
+            if (toGiven == False):
+                newCatTitle = wikipedia.input(u'Please enter the new name of the category:')
+            bot = CategoryMoveRobot(oldCatTitle, newCatTitle, batchMode, editSummary, inPlace, titleRegex = titleRegex)
+            bot.run()
+        elif action == 'tidy':
+            catTitle = wikipedia.input(u'Which category do you want to tidy up?')
+            bot = CategoryTidyRobot(catTitle, catDB)
+            bot.run()
+        elif action == 'tree':
+            catTitle = wikipedia.input(u'For which category do you want to create a tree view?')
+            filename = wikipedia.input(u'Please enter the name of the file where the tree should be saved, or press enter to simply show the tree:')
+            bot = CategoryTreeRobot(catTitle, catDB, filename)
+            bot.run()
+        elif action == 'listify':
+            if (fromGiven == False):
+                oldCatTitle = wikipedia.input(u'Please enter the name of the category to listify:')
+            if (toGiven == False):
+                newCatTitle = wikipedia.input(u'Please enter the name of the list to create:')
+            bot = CategoryListifyRobot(oldCatTitle, newCatTitle, editSummary, overwrite, showImages, subCats = True, talkPages = talkPages, recurse = recurse)
+            bot.run()
+        else:
+            wikipedia.showHelp('category')
+    finally:
+        catDB.dump()
+        wikipedia.stopme()





More information about the Pywikipedia-l mailing list