Revision: 5221
Author: multichill
Date: 2008-04-15 19:20:01 +0000 (Tue, 15 Apr 2008)
Log Message:
-----------
First version of commonscat.py, a tool to add the commonscat template to wikipedia categories.
Added Paths:
-----------
trunk/pywikipedia/commonscat.py
Added: trunk/pywikipedia/commonscat.py
===================================================================
--- trunk/pywikipedia/commonscat.py (rev 0)
+++ trunk/pywikipedia/commonscat.py 2008-04-15 19:20:01 UTC (rev 5221)
@@ -0,0 +1,228 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+With this tool you can add the template {{commonscat}} to categories.
+The tool works by following the interwiki links. If the template is present on
+another langauge page, the bot will use it.
+
+You could probably use it at articles as well, but this isnt tested.
+
+This bot uses pagegenerators to get a list of pages. For example to go through all categories:
+commonscat.py -start:Category:!
+
+Commonscat bot:
+
+Take a page. Follow the interwiki's and look for the commonscat template
+*Found zero templates. Done.
+*Found one template. Add this template
+*Found more templates. Ask the user <- still have to implement this
+
+TODO:
+*Update interwiki's at commons
+*Collect all possibilities also if local wiki already has link.
+*Better support for other templates (translations) / redundant templates.
+*Check mode, only check pages which already have the template
+*More efficient like interwiki.py
+*Possibility to update other languages in the same run
+
+"""
+
+#
+# (C) Multichill, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+
+import wikipedia, config, pagegenerators, add_text
+
+commonscatTemplates = {
+ 'af' : u'CommonsKategorie',
+ 'als' : u'Commonscat',
+ 'az' : u'CommonsKat',
+ 'bg' : u'Commonscat',
+ 'ca' : u'Commonscat',
+ 'cs' : u'Commonscat',
+ 'da' : u'Commonscat',
+ 'de' : u'Commonscat',
+ 'en' : u'Commonscat',
+ 'eo' : u'Commonscat',
+ 'es' : u'Commonscat',
+ 'eu' : u'Commonskat',
+ 'fi' : u'Commonscat',
+ 'fr' : u'Commonscat',
+ 'hr' : u'Commonscat',
+ 'hu' : u'Közvagyonkat',
+ 'id' : u'Commonscat',
+ 'io' : u'Commonscat',
+ 'is' : u'CommonsCat',
+ 'it' : u'Commonscat',
+ 'ja' : u'Commonscat',
+ 'ko' : u'Commonscat',
+ 'lt' : u'Commonscat',
+ 'lv' : u'Commonscat',
+ 'mk' : u'Ризница-врска',
+ 'ms' : u'Commonscat',
+ 'nl' : u'Commonscat',
+ 'nn' : u'Commonscat',
+ 'no' : u'Commonscat',
+ 'oc' : u'Commonscat',
+ 'os' : u'Commonscat',
+ 'pl' : u'Commonscat',
+ 'pt' : u'Commonscat',
+ 'ro' : u'Commonscat',
+ 'ru' : u'Commonscat',
+ 'scn' : u'Commonscat',
+ 'sh' : u'Commonscat',
+ 'simple' : u'Commonscat',
+ 'sk' : u'Commonscat',
+ 'sl' : u'Kategorija v Zbirki',
+ 'sr' : u'Commonscat',
+ 'su' : u'Commonscat',
+ 'sv' : u'Commonscat',
+ 'th' : u'Commonscat',
+ 'tr' : u'CommonsKat',
+ 'uk' : u'Commonscat',
+ 'vi' : u'Commonscat',
+ 'zh' : u'Commonscat',
+ 'zh-yue' : u'同享類'
+}
+
+def getTemplate (lang = None):
+ '''
+ Get the template name in a language. Expects the language code, returns the translation.
+ '''
+ if commonscatTemplates.has_key(lang):
+ return commonscatTemplates[lang]
+ else:
+ return u'Commonscat'
+
+def updateInterwiki (wikipediaPage = None, commonsPage = None):
+ '''
+ Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page.
+ This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist.
+
+ This function is disabled for the moment untill i figure out what the best way is to update the interwiki's.
+ '''
+ interwikis = {}
+ comment= u''
+ interwikilist = wikipediaPage.interwiki()
+ interwikilist.append(wikipediaPage)
+
+ for interwikiPage in interwikilist:
+ interwikis[interwikiPage.site()]=interwikiPage
+ oldtext = commonsPage.get()
+ # The commonssite object doesnt work with interwiki's
+ newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl'))
+ comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language() + u':' + wikipediaPage.title() + u']]'
+
+ if newtext != oldtext:
+ #This doesnt seem to work. Newtext has some trailing whitespace
+ wikipedia.showDiff(oldtext, newtext)
+ commonsPage.put(newtext=newtext, comment=comment)
+
+
+def addCommonscat (page = None, summary = None, always = False):
+ '''
+ Take a page. Go to all the interwiki page looking for a commonscat template.
+ When all the interwiki's links are checked and a proper category is found add it to the page.
+ '''
+ commonscat = ""
+ commonscatpage = None
+ commonscats = []
+
+ wikipedia.output("Working on " + page.title());
+ if getTemplate(page.site().language()) in page.templates():
+ wikipedia.output("Commonscat template is already on " + page.title());
+ #for template in page.templatesWithParams():
+ # if ((template[0]==getTemplate(page.site().language())) and (len(template[1]) > 0)):
+ # commonscatpage = getCommonscat(template[1][0])
+ # if commonscatpage != None:
+ # updateInterwiki (page, commonscatpage)
+ # #Should remove the template if something is wrong
+
+ else:
+ #Follow the interwiki's
+ for ipage in page.interwiki():
+ #See if commonscat is present
+ if getTemplate(ipage.site().language()) in ipage.templates():
+ #Go through all the templates at the page
+ for template in ipage.templatesWithParams():
+ #We found the template and it has the parameter set.
+ if ((template[0]==getTemplate(ipage.site().language())) and (len(template[1]) > 0)):
+ commonscatpage = getCommonscat(template[1][0])
+ if commonscatpage != None:
+ commonscats.append(commonscatpage);
+ wikipedia.output("Found link for " + page.title() + " at [[" + ipage.site().language() + ":" + ipage.title() + "]] to " + commonscatpage.title() + ".");
+ commonscatpage = None
+ if len(commonscats) > 0:
+ commonscatpage = commonscats.pop();
+ commonscat = commonscatpage.titleWithoutNamespace()
+ #We found one or more commonscat links, build the template and add it to our page
+ #TODO: We should check if we found more than one different link.
+ commonscat = "{{" + getTemplate(page.site().language()) + "|" + commonscat + "}}";
+ add_text.add_text(page, commonscat, summary, None, None, always);
+ #updateInterwiki(page, commonscatpage)
+ return (True, always);
+
+def getCommonscat (name = ""):
+ '''
+ This function will retun a page object of the commons page
+ If the page is a redirect this function tries to follow it.
+ If the page doesnt exists the function will return None
+ '''
+ #wikipedia.output("getCommonscat: " + name );
+ result = wikipedia.Page(wikipedia.getSite("commons", "commons"), "Category:" + name);
+ if not result.exists():
+ #wikipedia.output("getCommonscat : The category doesnt exist.");
+ return None
+ elif result.isRedirectPage():
+ #wikipedia.output("getCommonscat : The category is a redirect");
+ return result.getRedirectTarget();
+ elif "Category redirect" in result.templates():
+ #wikipedia.output("getCommonscat : The category is a category redirect");
+ for template in result.templatesWithParams():
+ if ((template[0]=="Category redirect") and (len(template[1]) > 0)):
+ return getCommonscat(template[1][0])
+ elif result.isDisambig():
+ #wikipedia.output("getCommonscat : The category is disambigu");
+ return None
+ else:
+ return result
+
+def main():
+ '''
+ Parse the command line arguments and get a pagegenerator to work on.
+ Iterate through all the pages.
+ '''
+ summary = None; generator = None; always = False
+ # Load a lot of default generators
+ genFactory = pagegenerators.GeneratorFactory()
+
+ for arg in wikipedia.handleArgs():
+ if arg.startswith('-summary'):
+ if len(arg) == 8:
+ summary = wikipedia.input(u'What summary do you want to use?')
+ else:
+ summary = arg[9:]
+ elif arg.startswith('-page'):
+ if len(arg) == 5:
+ generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
+ else:
+ generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
+ elif arg == '-always':
+ always = True
+ else:
+ generator = genFactory.handleArg(arg)
+ if not generator:
+ raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')
+
+ pregenerator = pagegenerators.PreloadingGenerator(generator)
+
+ for page in pregenerator:
+ (status, always) = addCommonscat(page, summary, always)
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ wikipedia.stopme()