Revision: 5384 Author: multichill Date: 2008-05-16 13:19:35 +0000 (Fri, 16 May 2008)
Log Message: ----------- New program to tag media needing categories at commons.
Added Paths: ----------- trunk/pywikipedia/imageuncat.py
Added: trunk/pywikipedia/imageuncat.py =================================================================== --- trunk/pywikipedia/imageuncat.py (rev 0) +++ trunk/pywikipedia/imageuncat.py 2008-05-16 13:19:35 UTC (rev 5384) @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +""" +Program to add uncat template to images without categories at commons. +See imagerecat.py (still working on that one) to add these images to categories. + +""" +# +# (C) Multichill 2008 +# +# Distributed under the terms of the MIT license. +# +# + +import os, sys, re, codecs +import wikipedia, config, pagegenerators + +#Probably unneeded because these are hidden categories. Have to figure it out. +ignoreCategories = [u'[[Category:CC-BY-SA-3.0]]', + u'[[Category:GFDL]]', + u'[[Category:Media for cleanup]]', + u'[[Category:Media lacking a description]]', + u'[[Category:Media lacking author information]]', + u'[[Category:Media lacking a description]]', + u'[[Category:Self-published work]]'] + +#Dont bother to put the template on a image with one of these templates +ignoreTemplates = [u'Delete', + u'Nocat', + u'No license', + u'No permission since', + u'No source', + u'No source since', + u'Uncategorized', + u'Uncat'] + +puttext = u'\n{{Uncategorized|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}' +putcomment = u'Please add categories to this image' + +def isUncat(page): + ''' + Do we want to skip this page? + + If we found a category which is not in the ignore list it means that the page is categorized so skip the page. + If we found a template which is in the ignore list, skip the page. + ''' + for category in page.categories(): + if category not in ignoreCategories: + #if category.title().count("Unknown") > 0: + #print "Iets unknown" + #else: + #print "false" + return False + #print "true" + for template in page.templates(): + if template in ignoreTemplates: + return False + return True + +def addUncat(page): + ''' + Add the uncat template to the page + ''' + newtext = page.get() + puttext + wikipedia.showDiff(page.get(), newtext) + try: + page.put(newtext, putcomment) + except wikipedia.EditConflict: + # Skip this page + pass + return + +def main(args): + ''' + Grab a bunch of images and tag them if they are not categorized. + ''' + generator = None; + genFactory = pagegenerators.GeneratorFactory() + + site = wikipedia.getSite(u'commons', u'commons') + wikipedia.setSite(site) + for arg in wikipedia.handleArgs(): + if arg.startswith('-page'): + if len(arg) == 5: + generator = [wikipedia.Page(site, wikipedia.input(u'What page do you want to use?'))] + else: + generator = [wikipedia.Page(site, arg[6:])] + else: + generator = genFactory.handleArg(arg) + if not generator: + wikipedia.output('You have to specify the generator you want to use for the program!') + else: + pregenerator = pagegenerators.PreloadingGenerator(generator) + for page in pregenerator: + if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) : + if isUncat(page): + addUncat(page) + +if __name__ == "__main__": + try: + main(sys.argv[1:]) + finally: + wikipedia.stopme()
pywikipedia-l@lists.wikimedia.org