jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/551812 )
Change subject: [cleanup] Remove commonshelper parts ......................................................................
[cleanup] Remove commonshelper parts
Remove commonshelper parts because CommonSense isn't available anymore.
Part 1 detached from I28d72f2
Bug: T195079 Change-Id: I765754366939b435b54a0340a1e518583b0a6f07 --- M scripts/imagerecat.py 1 file changed, 12 insertions(+), 190 deletions(-)
Approvals: Framawiki: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py index 04e3fd0..3a143ea 100755 --- a/scripts/imagerecat.py +++ b/scripts/imagerecat.py @@ -1,29 +1,15 @@ #!/usr/bin/python # -*- coding: utf-8 -*- """ -Program to (re)categorize images at commons. +Program to re-categorize images at commons.
-The program uses commonshelper for category suggestions. -It takes the suggestions and the current categories. Put the categories through +The program uses read the current categories, put the categories through some filters and adds the result.
The following command line parameters are supported:
--onlyfilter Don't use Commonsense to get categories, just filter the - current categories - -onlyuncat Only work on uncategorized images. Will prevent the bot from working on an image multiple times. - --hint Give Commonsense a hint. - For example -hint:li.wikipedia.org - --onlyhint Give Commonsense a hint. And only work on this hint. - Syntax is the same as -hint. Some special hints are possible: - _20 : Work on the top 20 wikipedia's - _80 : Work on the top 80 wikipedia's - wps : Work on all wikipedia's - """ # # (C) Multichill, 2008-2011 @@ -33,7 +19,6 @@ # from __future__ import absolute_import, division, unicode_literals
-import re import socket import xml.etree.ElementTree
@@ -52,9 +37,6 @@ category_blacklist = [] countries = []
-search_wikis = '_20' -hint_wiki = '' -
def initLists(): """Get the list of countries & the blacklist from Commons.""" @@ -73,7 +55,7 @@ return
-def categorizeImages(generator, onlyFilter, onlyUncat): +def categorizeImages(generator, onlyUncat): """Loop over all images in generator and try to categorize them.
Get category suggestions from CommonSense. @@ -93,19 +75,12 @@ continue
currentCats = getCurrentCats(imagepage) - if onlyFilter: - commonshelperCats = [] - usage = [] - galleries = [] - else: - (commonshelperCats, usage, - galleries) = getCommonshelperCats(imagepage) - newcats = applyAllFilters(commonshelperCats + currentCats) + newcats = applyAllFilters(currentCats)
if newcats and set(currentCats) != set(newcats): for cat in newcats: pywikibot.output(' Found new cat: ' + cat) - saveImagePage(imagepage, newcats, usage, galleries, onlyFilter) + saveImagePage(imagepage, newcats)
def getCurrentCats(imagepage): @@ -116,91 +91,6 @@ return list(set(result))
-def getCommonshelperCats(imagepage): - """Get category suggestions from CommonSense. - - @rtype: list of unicode - - """ - commonshelperCats = [] - usage = [] - galleries = [] - - global search_wikis - global hint_wiki - site = imagepage.site - lang = site.code - family = site.family.name - if lang == 'commons' and family == 'commons': - parameters = urlencode( - {'i': imagepage.title(with_ns=False).encode('utf-8'), - 'r': 'on', - 'go-clean': 'Find+Categories', - 'p': search_wikis, - 'cl': hint_wiki}) - elif family == 'wikipedia': - parameters = urlencode( - {'i': imagepage.title(with_ns=False).encode('utf-8'), - 'r': 'on', - 'go-move': 'Find+Categories', - 'p': search_wikis, - 'cl': hint_wiki, - 'w': lang}) - else: - # Can't handle other sites atm - return [], [], [] - - commonsenseRe = re.compile( - r'^#COMMONSENSE(.*)#USAGE(\s)+((?P<usagenum>(\d)+))\s' - r'(?P<usage>(.*))\s' - r'#KEYWORDS(\s)+((?P<keywords>(\d)+))(.*)' - r'#CATEGORIES(\s)+((?P<catnum>(\d)+))\s(?P<cats>(.*))\s' - r'#GALLERIES(\s)+((?P<galnum>(\d)+))\s(?P<gals>(.*))\s(.*)#EOF$', - re.MULTILINE + re.DOTALL) - - gotInfo = False - matches = None - maxtries = 10 - tries = 0 - while not gotInfo: - try: - if tries < maxtries: - tries += 1 - commonsHelperPage = fetch( - 'https://toolserver.org/~daniel/WikiSense/CommonSense.php?' - + parameters) - matches = commonsenseRe.search( - commonsHelperPage.text) - gotInfo = True - else: - break - except IOError: - pywikibot.output("Got an IOError, let's try again") - except socket.timeout: - pywikibot.output("Got a timeout, let's try again") - - if matches and gotInfo: - if matches.group('usagenum') > 0: - used = matches.group('usage').splitlines() - for use in used: - usage = usage + getUsage(use) - if matches.group('catnum') > 0: - cats = matches.group('cats').splitlines() - for cat in cats: - commonshelperCats.append(cat.replace('_', ' ')) - pywikibot.output('category : ' + cat) - if matches.group('galnum') > 0: - gals = matches.group('gals').splitlines() - for gal in gals: - galleries.append(gal.replace('_', ' ')) - pywikibot.output('gallery : ' + gal) - commonshelperCats = list(set(commonshelperCats)) - galleries = list(set(galleries)) - for (lang, project, article) in usage: - pywikibot.output(lang + project + article) - return commonshelperCats, usage, galleries - - def getOpenStreetMapCats(latitude, longitude): """Get a list of location categories based on the OSM nomatim tool.""" result = [] @@ -282,27 +172,6 @@ return ''
-def getUsage(use): - """Parse the Commonsense output to get the usage.""" - result = [] - lang = '' - project = '' - articles = '' - usageRe = re.compile( - r'^(?P<lang>([\w-]+)).(?P<project>([\w]+)).org:(?P<articles>\s(.*))') - matches = usageRe.search(use) - if matches: - if matches.group('lang'): - lang = matches.group('lang') - if matches.group('project'): - project = matches.group('project') - if matches.group('articles'): - articles = matches.group('articles') - for article in articles.split(): - result.append((lang, project, article)) - return result - - def applyAllFilters(categories): """Apply all filters on categories.""" result = filterDisambiguation(categories) @@ -392,59 +261,22 @@ return categories
-def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter): +def saveImagePage(imagepage, newcats): """Remove the old categories and add the new categories to the image.""" newtext = textlib.removeCategoryLinks(imagepage.text, imagepage.site) - if not onlyFilter: - newtext = removeTemplates(newtext) - newtext = newtext + getCheckCategoriesTemplate(usage, galleries, - len(newcats)) newtext += '\n' + for category in newcats: newtext = newtext + '[[Category:' + category + ']]\n' - if onlyFilter: - comment = 'Filtering categories' - else: - comment = ('Image is categorized by a bot using data from ' - '[[Commons:Tools#CommonSense|CommonSense]]') + + comment = 'Filtering categories' + pywikibot.showDiff(imagepage.text, newtext) imagepage.text = newtext imagepage.save(comment) return
-def removeTemplates(oldtext=''): - """Remove {{Uncategorized}} and {{Check categories}} templates.""" - result = re.sub( - r'{{\s*([Uu]ncat(egori[sz]ed( image)?)?|' - r'[Nn]ocat|[Nn]eedscategory)[^}]*}}', - '', oldtext) - result = re.sub('<!-- Remove this line once you have added categories -->', - '', result) - result = re.sub(r'{{\s*[Cc]heck categories[^}]*}}', '', result) - return result - - -def getCheckCategoriesTemplate(usage, galleries, ncats): - """Build the check categories template with all parameters.""" - result = ('{{Check categories|year={{subst:CURRENTYEAR}}|month={{subst:' - 'CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}\n') - usageCounter = 1 - for (lang, project, article) in usage: - result += '|lang%d=%s' % (usageCounter, lang) - result += '|wiki%d=%s' % (usageCounter, project) - result += '|article%d=%s' % (usageCounter, article) - result += '\n' - usageCounter += 1 - galleryCounter = 1 - for gallery in galleries: - result += '|gallery{}={}'.format(galleryCounter, - gallery.replace('_', ' ')) + '\n' - galleryCounter += 1 - result += '|ncats={}\n}}\n'.format(ncats) - return result - - def main(*args): """ Process command line arguments and invoke bot. @@ -454,25 +286,15 @@ @param args: command line arguments @type args: str """ - onlyFilter = False onlyUncat = False
# Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory()
- global search_wikis - global hint_wiki - for arg in local_args: - if arg == '-onlyfilter': - onlyFilter = True - elif arg == '-onlyuncat': + if arg == '-onlyuncat': onlyUncat = True - elif arg.startswith('-hint:'): - hint_wiki = arg[len('-hint:'):] - elif arg.startswith('-onlyhint'): - search_wikis = arg[len('-onlyhint:'):] else: genFactory.handleArg(arg)
@@ -484,7 +306,7 @@ recurse=True)
initLists() - categorizeImages(generator, onlyFilter, onlyUncat) + categorizeImages(generator, onlyUncat) pywikibot.output('All done')