jenkins-bot merged this change.
[cleanup] Remove commonshelper parts
Remove commonshelper parts because CommonSense isn't available anymore.
Part 1 detached from I28d72f2
Bug: T195079
Change-Id: I765754366939b435b54a0340a1e518583b0a6f07
---
M scripts/imagerecat.py
1 file changed, 12 insertions(+), 190 deletions(-)
diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py
index 04e3fd0..3a143ea 100755
--- a/scripts/imagerecat.py
+++ b/scripts/imagerecat.py
@@ -1,29 +1,15 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
-Program to (re)categorize images at commons.
+Program to re-categorize images at commons.
-The program uses commonshelper for category suggestions.
-It takes the suggestions and the current categories. Put the categories through
+The program uses read the current categories, put the categories through
some filters and adds the result.
The following command line parameters are supported:
--onlyfilter Don't use Commonsense to get categories, just filter the
- current categories
-
-onlyuncat Only work on uncategorized images. Will prevent the bot from
working on an image multiple times.
-
--hint Give Commonsense a hint.
- For example -hint:li.wikipedia.org
-
--onlyhint Give Commonsense a hint. And only work on this hint.
- Syntax is the same as -hint. Some special hints are possible:
- _20 : Work on the top 20 wikipedia's
- _80 : Work on the top 80 wikipedia's
- wps : Work on all wikipedia's
-
"""
#
# (C) Multichill, 2008-2011
@@ -33,7 +19,6 @@
#
from __future__ import absolute_import, division, unicode_literals
-import re
import socket
import xml.etree.ElementTree
@@ -52,9 +37,6 @@
category_blacklist = []
countries = []
-search_wikis = '_20'
-hint_wiki = ''
-
def initLists():
"""Get the list of countries & the blacklist from Commons."""
@@ -73,7 +55,7 @@
return
-def categorizeImages(generator, onlyFilter, onlyUncat):
+def categorizeImages(generator, onlyUncat):
"""Loop over all images in generator and try to categorize them.
Get category suggestions from CommonSense.
@@ -93,19 +75,12 @@
continue
currentCats = getCurrentCats(imagepage)
- if onlyFilter:
- commonshelperCats = []
- usage = []
- galleries = []
- else:
- (commonshelperCats, usage,
- galleries) = getCommonshelperCats(imagepage)
- newcats = applyAllFilters(commonshelperCats + currentCats)
+ newcats = applyAllFilters(currentCats)
if newcats and set(currentCats) != set(newcats):
for cat in newcats:
pywikibot.output(' Found new cat: ' + cat)
- saveImagePage(imagepage, newcats, usage, galleries, onlyFilter)
+ saveImagePage(imagepage, newcats)
def getCurrentCats(imagepage):
@@ -116,91 +91,6 @@
return list(set(result))
-def getCommonshelperCats(imagepage):
- """Get category suggestions from CommonSense.
-
- @rtype: list of unicode
-
- """
- commonshelperCats = []
- usage = []
- galleries = []
-
- global search_wikis
- global hint_wiki
- site = imagepage.site
- lang = site.code
- family = site.family.name
- if lang == 'commons' and family == 'commons':
- parameters = urlencode(
- {'i': imagepage.title(with_ns=False).encode('utf-8'),
- 'r': 'on',
- 'go-clean': 'Find+Categories',
- 'p': search_wikis,
- 'cl': hint_wiki})
- elif family == 'wikipedia':
- parameters = urlencode(
- {'i': imagepage.title(with_ns=False).encode('utf-8'),
- 'r': 'on',
- 'go-move': 'Find+Categories',
- 'p': search_wikis,
- 'cl': hint_wiki,
- 'w': lang})
- else:
- # Can't handle other sites atm
- return [], [], []
-
- commonsenseRe = re.compile(
- r'^#COMMONSENSE(.*)#USAGE(\s)+\((?P<usagenum>(\d)+)\)\s'
- r'(?P<usage>(.*))\s'
- r'#KEYWORDS(\s)+\((?P<keywords>(\d)+)\)(.*)'
- r'#CATEGORIES(\s)+\((?P<catnum>(\d)+)\)\s(?P<cats>(.*))\s'
- r'#GALLERIES(\s)+\((?P<galnum>(\d)+)\)\s(?P<gals>(.*))\s(.*)#EOF$',
- re.MULTILINE + re.DOTALL)
-
- gotInfo = False
- matches = None
- maxtries = 10
- tries = 0
- while not gotInfo:
- try:
- if tries < maxtries:
- tries += 1
- commonsHelperPage = fetch(
- 'https://toolserver.org/~daniel/WikiSense/CommonSense.php?'
- + parameters)
- matches = commonsenseRe.search(
- commonsHelperPage.text)
- gotInfo = True
- else:
- break
- except IOError:
- pywikibot.output("Got an IOError, let's try again")
- except socket.timeout:
- pywikibot.output("Got a timeout, let's try again")
-
- if matches and gotInfo:
- if matches.group('usagenum') > 0:
- used = matches.group('usage').splitlines()
- for use in used:
- usage = usage + getUsage(use)
- if matches.group('catnum') > 0:
- cats = matches.group('cats').splitlines()
- for cat in cats:
- commonshelperCats.append(cat.replace('_', ' '))
- pywikibot.output('category : ' + cat)
- if matches.group('galnum') > 0:
- gals = matches.group('gals').splitlines()
- for gal in gals:
- galleries.append(gal.replace('_', ' '))
- pywikibot.output('gallery : ' + gal)
- commonshelperCats = list(set(commonshelperCats))
- galleries = list(set(galleries))
- for (lang, project, article) in usage:
- pywikibot.output(lang + project + article)
- return commonshelperCats, usage, galleries
-
-
def getOpenStreetMapCats(latitude, longitude):
"""Get a list of location categories based on the OSM nomatim tool."""
result = []
@@ -282,27 +172,6 @@
return ''
-def getUsage(use):
- """Parse the Commonsense output to get the usage."""
- result = []
- lang = ''
- project = ''
- articles = ''
- usageRe = re.compile(
- r'^(?P<lang>([\w-]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))')
- matches = usageRe.search(use)
- if matches:
- if matches.group('lang'):
- lang = matches.group('lang')
- if matches.group('project'):
- project = matches.group('project')
- if matches.group('articles'):
- articles = matches.group('articles')
- for article in articles.split():
- result.append((lang, project, article))
- return result
-
-
def applyAllFilters(categories):
"""Apply all filters on categories."""
result = filterDisambiguation(categories)
@@ -392,59 +261,22 @@
return categories
-def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter):
+def saveImagePage(imagepage, newcats):
"""Remove the old categories and add the new categories to the image."""
newtext = textlib.removeCategoryLinks(imagepage.text, imagepage.site)
- if not onlyFilter:
- newtext = removeTemplates(newtext)
- newtext = newtext + getCheckCategoriesTemplate(usage, galleries,
- len(newcats))
newtext += '\n'
+
for category in newcats:
newtext = newtext + '[[Category:' + category + ']]\n'
- if onlyFilter:
- comment = 'Filtering categories'
- else:
- comment = ('Image is categorized by a bot using data from '
- '[[Commons:Tools#CommonSense|CommonSense]]')
+
+ comment = 'Filtering categories'
+
pywikibot.showDiff(imagepage.text, newtext)
imagepage.text = newtext
imagepage.save(comment)
return
-def removeTemplates(oldtext=''):
- """Remove {{Uncategorized}} and {{Check categories}} templates."""
- result = re.sub(
- r'{{\s*([Uu]ncat(egori[sz]ed( image)?)?|'
- r'[Nn]ocat|[Nn]eedscategory)[^}]*}}',
- '', oldtext)
- result = re.sub('<!-- Remove this line once you have added categories -->',
- '', result)
- result = re.sub(r'\{\{\s*[Cc]heck categories[^}]*\}\}', '', result)
- return result
-
-
-def getCheckCategoriesTemplate(usage, galleries, ncats):
- """Build the check categories template with all parameters."""
- result = ('{{Check categories|year={{subst:CURRENTYEAR}}|month={{subst:'
- 'CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}\n')
- usageCounter = 1
- for (lang, project, article) in usage:
- result += '|lang%d=%s' % (usageCounter, lang)
- result += '|wiki%d=%s' % (usageCounter, project)
- result += '|article%d=%s' % (usageCounter, article)
- result += '\n'
- usageCounter += 1
- galleryCounter = 1
- for gallery in galleries:
- result += '|gallery{}={}'.format(galleryCounter,
- gallery.replace('_', ' ')) + '\n'
- galleryCounter += 1
- result += '|ncats={}\n}}\n'.format(ncats)
- return result
-
-
def main(*args):
"""
Process command line arguments and invoke bot.
@@ -454,25 +286,15 @@
@param args: command line arguments
@type args: str
"""
- onlyFilter = False
onlyUncat = False
# Process global args and prepare generator args parser
local_args = pywikibot.handle_args(args)
genFactory = pagegenerators.GeneratorFactory()
- global search_wikis
- global hint_wiki
-
for arg in local_args:
- if arg == '-onlyfilter':
- onlyFilter = True
- elif arg == '-onlyuncat':
+ if arg == '-onlyuncat':
onlyUncat = True
- elif arg.startswith('-hint:'):
- hint_wiki = arg[len('-hint:'):]
- elif arg.startswith('-onlyhint'):
- search_wikis = arg[len('-onlyhint:'):]
else:
genFactory.handleArg(arg)
@@ -484,7 +306,7 @@
recurse=True)
initLists()
- categorizeImages(generator, onlyFilter, onlyUncat)
+ categorizeImages(generator, onlyUncat)
pywikibot.output('All done')
To view, visit change 551812. To unsubscribe, or for help writing mail filters, visit settings.