[Pywikipedia-l] SVN: [6334] trunk/pywikipedia/imagerecat.py

multichill at svn.wikimedia.org multichill at svn.wikimedia.org
Fri Feb 6 16:42:41 UTC 2009


Revision: 6334
Author:   multichill
Date:     2009-02-06 16:42:40 +0000 (Fri, 06 Feb 2009)

Log Message:
-----------
Bugfix and added new option:

-onlyuncat      Only work on uncategorized images. Will prevent the bot from working on an image multiple times.

Modified Paths:
--------------
    trunk/pywikipedia/imagerecat.py

Modified: trunk/pywikipedia/imagerecat.py
===================================================================
--- trunk/pywikipedia/imagerecat.py	2009-02-06 16:23:15 UTC (rev 6333)
+++ trunk/pywikipedia/imagerecat.py	2009-02-06 16:42:40 UTC (rev 6334)
@@ -9,6 +9,8 @@
 
 -onlyfilter     Don't use Commonsense to get categories, just filter the current categories
 
+-onlyuncat      Only work on uncategorized images. Will prevent the bot from working on an image multiple times.
+
 -hint           Give Commonsense a hint.
                 For example -hint:li.wikipedia.org
 
@@ -56,30 +58,34 @@
         countries.append(country.titleWithoutNamespace())
     return
 
-def categorizeImages(generator, onlyfilter):
+def categorizeImages(generator, onlyFilter, onlyUncat):
     '''
     Loop over all images in generator and try to categorize them. Get category suggestions from CommonSense.
     '''
     for page in generator:
         if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()):
             imagepage = wikipedia.ImagePage(page.site(), page.title())
-            #imagepage.get()
-            wikipedia.output(u'Working on ' + imagepage.title());
-            currentCats = getCurrentCats(imagepage)
-            if(onlyfilter):
-                commonshelperCats = []
-                usage = []
-                galleries = []
-            else:
-                (commonshelperCats, usage, galleries) = getCommonshelperCats(imagepage)
-            newcats = applyAllFilters(commonshelperCats+currentCats)
+            wikipedia.output(u'Working on ' + imagepage.title())
 
-            if (len(newcats) > 0 and not(set(currentCats)==set(newcats))):
-                for cat in newcats:
-                    wikipedia.output(u' Found new cat: ' + cat);
-                saveImagePage(imagepage, newcats, usage, galleries, onlyfilter)
+            if(onlyUncat and not(u'Uncategorized' in imagepage.templates())):
+                wikipedia.output(u'No Uncategorized template found')
+            else:                
+                currentCats = getCurrentCats(imagepage)
+                if(onlyFilter):
+                    commonshelperCats = []
+                    usage = []
+                    galleries = []
+                else:
+                    (commonshelperCats, usage, galleries) = getCommonshelperCats(imagepage)
+                newcats = applyAllFilters(commonshelperCats+currentCats)
 
+                if (len(newcats) > 0 and not(set(currentCats)==set(newcats))):
+                    for cat in newcats:
+                        wikipedia.output(u' Found new cat: ' + cat);
+                    saveImagePage(imagepage, newcats, usage, galleries, onlyFilter)
+            
 
+
 def getCurrentCats(imagepage):
     '''
     Get the categories currently on the image
@@ -169,7 +175,7 @@
 def applyAllFilters(categories):
     '''
     Apply all filters on categories.
-    ''''
+    '''
     result = []
     result = filterBlacklist(categories)
     result = filterDisambiguation(result)
@@ -275,19 +281,19 @@
     return result
 
 
-def saveImagePage(imagepage, newcats, usage, galleries, onlyfilter):
+def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter):
     '''
     Remove the old categories and add the new categories to the image.
     '''
     newtext = wikipedia.removeCategoryLinks(imagepage.get(), imagepage.site())    
 
-    if not(onlyfilter):
+    if not(onlyFilter):
         newtext = removeTemplates(newtext)
         newtext = newtext + getCheckCategoriesTemplate(usage, galleries, len(newcats))
     for category in newcats:
         newtext = newtext + u'[[Category:' + category + u']]\n'
 
-    if(onlyfilter):
+    if(onlyFilter):
         comment = u'Filtering categories'
     else:
         comment = u'Image is categorized by a bot using data from [[Commons:Tools#CommonSense|CommonSense]]'
@@ -336,7 +342,8 @@
     Main loop. Get a generator and options. Work on all images in the generator.
     '''
     generator = None
-    onlyfilter = False
+    onlyFilter = False
+    onlyUncat = False
     genFactory = pagegenerators.GeneratorFactory()
 
     global search_wikis
@@ -346,7 +353,9 @@
     wikipedia.setSite(site)
     for arg in wikipedia.handleArgs():
         if arg == '-onlyfilter':
-            onlyfilter = True
+            onlyFilter = True
+        elif arg == '-onlyuncat':
+            onlyUncat = True
         elif arg.startswith('-hint:'):
             hint_wiki = arg [len('-hint:'):]
         elif arg.startswith('-onlyhint'):
@@ -359,7 +368,7 @@
         generator = pagegenerators.CategorizedPageGenerator(catlib.Category(site, u'Category:Media needing categories'), recurse=True)
 
     initLists()
-    categorizeImages(generator, onlyfilter)
+    categorizeImages(generator, onlyFilter, onlyUncat)
 
     wikipedia.output(u'All done')
 





More information about the Pywikipedia-l mailing list