[Pywikipedia-l] SVN: [6073] trunk/pywikipedia/imagerecat.py

multichill at svn.wikimedia.org multichill at svn.wikimedia.org
Thu Nov 6 20:53:12 UTC 2008


Revision: 6073
Author:   multichill
Date:     2008-11-06 20:53:12 +0000 (Thu, 06 Nov 2008)

Log Message:
-----------
Include much more information in the check categories template.

Modified Paths:
--------------
    trunk/pywikipedia/imagerecat.py

Modified: trunk/pywikipedia/imagerecat.py
===================================================================
--- trunk/pywikipedia/imagerecat.py	2008-11-06 16:31:26 UTC (rev 6072)
+++ trunk/pywikipedia/imagerecat.py	2008-11-06 20:53:12 UTC (rev 6073)
@@ -53,13 +53,13 @@
             if(onlyfilter):
                 commonshelperCats = []
             else:
-                commonshelperCats = getCommonshelperCats(imagepage)
+                (commonshelperCats, usage, galleries) = getCommonshelperCats(imagepage)
             newcats = applyAllFilters(commonshelperCats+currentCats)
 
             if (len(newcats) > 0 and not(set(currentCats)==set(newcats))):
                 for cat in newcats:
                     wikipedia.output(u' Found new cat: ' + cat);
-                saveImagePage(imagepage, newcats, onlyfilter)
+                saveImagePage(imagepage, newcats, usage, galleries, onlyfilter)
 
 
 def getCurrentCats(imagepage):
@@ -76,9 +76,12 @@
     '''
     Get category suggestions from CommonSense. Parse them and return a list of suggestions.
     '''
-    result = []
+    commonshelperCats = []
+    usage = []
+    galleries = []
+    
     parameters = urllib.urlencode({'i' : imagepage.titleWithoutNamespace().encode('utf-8'), 'r' : 'on', 'go-clean' : 'Find+Categories', 'cl' : 'li'})
-    commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+\((?P<usage>(\d)+)\)(.*)#KEYWORDS(\s)+\((?P<keywords>(\d)+)\)(.*)#CATEGORIES(\s)+\((?P<catnum>(\d)+)\)\s(?P<cats>(.*))\s#GALLERIES(\s)+\((?P<galnum>(\d)+)\)(.*)#EOF$', re.MULTILINE + re.DOTALL)
+    commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+\((?P<usagenum>(\d)+)\)\s(?P<usage>(.*))\s#KEYWORDS(\s)+\((?P<keywords>(\d)+)\)(.*)#CATEGORIES(\s)+\((?P<catnum>(\d)+)\)\s(?P<cats>(.*))\s#GALLERIES(\s)+\((?P<galnum>(\d)+)\)\s(?P<gals>(.*))\s(.*)#EOF$', re.MULTILINE + re.DOTALL)
 
     gotInfo = False;
 
@@ -93,13 +96,51 @@
             wikipedia.output(u'Got a timeout, let\'s try again')
 
     if matches:
+        if(matches.group('usagenum') > 0):
+            used = matches.group('usage').splitlines()
+            for use in used:
+                usage= usage + getUsage(use)
+                #wikipedia.output(use)
         if(matches.group('catnum') > 0):
-            categories = matches.group('cats').splitlines()
-            for cat in categories:
-                result.append(cat.replace('_',' '))
+            cats = matches.group('cats').splitlines()
+            for cat in cats:
+                commonshelperCats.append(cat.replace('_',' '))
+                wikipedia.output(u'category : ' + cat)
+        if(matches.group('galnum') > 0):
+            gals = matches.group('gals').splitlines()
+            for gal in gals:
+                galleries.append(gal.replace('_',' '))
+                wikipedia.output(u'gallery : ' + gal)
+    commonshelperCats = list(set(commonshelperCats))
+    galleries = list(set(galleries))
+    for (lang, project, article) in usage:
+        wikipedia.output(lang + project + article)
+        
+    return (commonshelperCats, usage, galleries)
 
-    return list(set(result))
+def getUsage(use):
+    result = []
+    lang = ''
+    project = ''
+    article = ''
+    usageRe = re.compile('^(?P<lang>([\w]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))')
+    matches = usageRe.search(use)
+    if matches:
+        if(matches.group('lang')):
+            lang = matches.group('lang')
+            #wikipedia.output(lang)
+        if(matches.group('project')):
+            project = matches.group('project')
+            #wikipedia.output(project)
+        if(matches.group('articles')):
+            articles = matches.group('articles')
+            #wikipedia.output(articles)
+    for article in articles.split():
+        result.append((lang, project, article))
 
+    return result
+    
+                         
 
 def applyAllFilters(categories):
     result = []
@@ -203,7 +244,7 @@
     return result
 
 
-def saveImagePage(imagepage, newcats, onlyfilter):
+def saveImagePage(imagepage, newcats, usage, galleries, onlyfilter):
     '''
     Remove the old categories and add the new categories to the image.
     '''
@@ -211,7 +252,7 @@
 
     if not(onlyfilter):
         newtext = removeTemplates(newtext)
-        newtext = newtext + u'{{subst:chc}}\n'
+        newtext = newtext + getCheckCategoriesTemplate(usage, galleries)
     for category in newcats:
         newtext = newtext + u'[[Category:' + category + u']]\n'
 
@@ -235,7 +276,26 @@
     result = re.sub(u'\{\{\s*[Cc]heck categories[^}]*\}\}', u'', result)
     return result
 
+def getCheckCategoriesTemplate(usage, galleries):
+    result = u'{{Check categories|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}\n'
 
+    usageCounter = 1
+    for (lang, project, article) in usage:
+        result = result + u'|lang' + str(usageCounter) + u'=' + lang
+        result = result + u'|wiki' + str(usageCounter) + u'=' + project
+        result = result + u'|article' + str(usageCounter) + u'=' + article
+        result = result + u'\n'
+        usageCounter = usageCounter + 1
+    
+    galleryCounter = 1
+    for gallery in galleries:
+        result = result + u'|gallery' + str(galleryCounter) + u'=' + gallery + u'\n'
+        galleryCounter = galleryCounter + 1
+
+    result = result + u'}}\n'
+    return result    
+
+
 def main(args):
     '''
     Main loop. Get a generator. Set up the 3 threads and the 2 queue's and fire everything up.





More information about the Pywikipedia-l mailing list