[Pywikipedia-l] SVN: [5384] trunk/pywikipedia/imageuncat.py

multichill at svn.wikimedia.org multichill at svn.wikimedia.org
Fri May 16 13:19:35 UTC 2008


Revision: 5384
Author:   multichill
Date:     2008-05-16 13:19:35 +0000 (Fri, 16 May 2008)

Log Message:
-----------
New program to tag media needing categories at commons. 

Added Paths:
-----------
    trunk/pywikipedia/imageuncat.py

Added: trunk/pywikipedia/imageuncat.py
===================================================================
--- trunk/pywikipedia/imageuncat.py	                        (rev 0)
+++ trunk/pywikipedia/imageuncat.py	2008-05-16 13:19:35 UTC (rev 5384)
@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+"""
+Program to add uncat template to images without categories at commons.
+See imagerecat.py (still working on that one) to add these images to categories.
+
+"""
+#
+#  (C) Multichill 2008
+#
+# Distributed under the terms of the MIT license.
+#
+#
+
+import os, sys, re, codecs
+import wikipedia, config, pagegenerators  
+
+#Probably unneeded because these are hidden categories. Have to figure it out.
+ignoreCategories = [u'[[Category:CC-BY-SA-3.0]]',
+                    u'[[Category:GFDL]]',
+                    u'[[Category:Media for cleanup]]',
+                    u'[[Category:Media lacking a description]]',
+                    u'[[Category:Media lacking author information]]',
+                    u'[[Category:Media lacking a description]]',
+                    u'[[Category:Self-published work]]']
+
+#Dont bother to put the template on a image with one of these templates
+ignoreTemplates = [u'Delete',
+                   u'Nocat',
+                   u'No license',
+                   u'No permission since',
+                   u'No source',
+                   u'No source since',
+                   u'Uncategorized',
+                   u'Uncat']
+
+puttext = u'\n{{Uncategorized|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}'
+putcomment = u'Please add categories to this image'
+
+def isUncat(page):
+    '''
+    Do we want to skip this page?
+
+    If we found a category which is not in the ignore list it means that the page is categorized so skip the page.
+    If we found a template which is in the ignore list, skip the page.
+    '''
+    for category in page.categories():
+        if category not in ignoreCategories:
+            #if category.title().count("Unknown") > 0:
+                #print "Iets unknown"
+            #else:
+                #print "false"
+            return False            
+    #print "true"
+    for template in page.templates():
+        if template in ignoreTemplates:
+            return False
+    return True
+
+def addUncat(page):
+    '''
+    Add the uncat template to the page
+    '''    
+    newtext = page.get() + puttext
+    wikipedia.showDiff(page.get(), newtext)
+    try:
+        page.put(newtext, putcomment)
+    except wikipedia.EditConflict:
+        # Skip this page
+        pass
+    return    
+
+def main(args):
+    '''
+    Grab a bunch of images and tag them if they are not categorized.
+    '''
+    generator = None;
+    genFactory = pagegenerators.GeneratorFactory()
+
+    site = wikipedia.getSite(u'commons', u'commons')
+    wikipedia.setSite(site)
+    for arg in wikipedia.handleArgs():
+        if arg.startswith('-page'):
+            if len(arg) == 5:
+                generator = [wikipedia.Page(site, wikipedia.input(u'What page do you want to use?'))]
+            else:
+                generator = [wikipedia.Page(site, arg[6:])]
+        else:
+            generator = genFactory.handleArg(arg)
+    if not generator:        
+        wikipedia.output('You have to specify the generator you want to use for the program!')
+    else:    
+        pregenerator = pagegenerators.PreloadingGenerator(generator)
+        for page in pregenerator:
+            if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) :
+                if isUncat(page):
+                    addUncat(page)                        
+
+if __name__ == "__main__":
+    try:
+        main(sys.argv[1:])
+    finally:
+        wikipedia.stopme()





More information about the Pywikipedia-l mailing list