Revision: 6333 Author: multichill Date: 2009-02-06 16:23:15 +0000 (Fri, 06 Feb 2009)
Log Message: ----------- Added -hint and -onlyhint options. Updated some documentation.
Modified Paths: -------------- trunk/pywikipedia/imagerecat.py
Modified: trunk/pywikipedia/imagerecat.py =================================================================== --- trunk/pywikipedia/imagerecat.py 2009-02-06 14:31:29 UTC (rev 6332) +++ trunk/pywikipedia/imagerecat.py 2009-02-06 16:23:15 UTC (rev 6333) @@ -3,14 +3,27 @@ Program to (re)categorize images at commons.
The program uses commonshelper for category suggestions. -It takes the suggestions and the current categories. Put the categories through some filters and add the result +It takes the suggestions and the current categories. Put the categories through some filters and adds the result.
+The following command line parameters are supported: + +-onlyfilter Don't use Commonsense to get categories, just filter the current categories + +-hint Give Commonsense a hint. + For example -hint:li.wikipedia.org + +-onlyhint Give Commonsense a hint. And only work on this hint. + Syntax is the same as -hint. Some special hints are possible: + _20 : Work on the top 20 wikipedia's + _80 : Work on the top 80 wikipedia's + wps : Work on all wikipedia's + """ __version__ = '$Id$' # # (C) Multichill 2008 -# (tkinter part loosely based on imagecopy.py) -# Distributed under the terms of the MIT license. +# +# Distributed under the terms of the MIT license. # # import os, sys, re, codecs @@ -24,6 +37,9 @@ category_blacklist = [] countries = []
+search_wikis=u'_20' +hint_wiki=u'' + def initLists(): ''' Get the list of countries & the blacklist from Commons. @@ -81,8 +97,11 @@ commonshelperCats = [] usage = [] galleries = [] + + global search_wikis + global hint_wiki
- parameters = urllib.urlencode({'i' : imagepage.titleWithoutNamespace().encode('utf-8'), 'r' : 'on', 'go-clean' : 'Find+Categories', 'cl' : 'li'}) + parameters = urllib.urlencode({'i' : imagepage.titleWithoutNamespace().encode('utf-8'), 'r' : 'on', 'go-clean' : 'Find+Categories', 'p' : search_wikis, 'cl' : hint_wiki}) commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+((?P<usagenum>(\d)+))\s(?P<usage>(.*))\s#KEYWORDS(\s)+((?P<keywords>(\d)+))(.*)#CATEGORIES(\s)+((?P<catnum>(\d)+))\s(?P<cats>(.*))\s#GALLERIES(\s)+((?P<galnum>(\d)+))\s(?P<gals>(.*))\s(.*)#EOF$', re.MULTILINE + re.DOTALL)
gotInfo = False; @@ -121,6 +140,9 @@ return (commonshelperCats, usage, galleries)
def getUsage(use): + ''' + Parse the Commonsense output to get the usage + ''' result = [] lang = '' project = '' @@ -145,6 +167,9 @@
def applyAllFilters(categories): + ''' + Apply all filters on categories. + '''' result = [] result = filterBlacklist(categories) result = filterDisambiguation(result) @@ -283,6 +308,9 @@ return result
def getCheckCategoriesTemplate(usage, galleries, ncats): + ''' + Build the check categories template with all parameters + ''' result = u'{{Check categories|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}\n'
usageCounter = 1 @@ -305,17 +333,24 @@
def main(args): ''' - Main loop. Get a generator. Set up the 3 threads and the 2 queue's and fire everything up. + Main loop. Get a generator and options. Work on all images in the generator. ''' generator = None onlyfilter = False genFactory = pagegenerators.GeneratorFactory()
+ global search_wikis + global hint_wiki + site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) for arg in wikipedia.handleArgs(): if arg == '-onlyfilter': onlyfilter = True + elif arg.startswith('-hint:'): + hint_wiki = arg [len('-hint:'):] + elif arg.startswith('-onlyhint'): + search_wikis = arg [len('-onlyhint:'):] else: genFactory.handleArg(arg)