[Gerrit] PEP8-ify most of imagerecat.py - change (pywikibot/core) - Pywikibot-commits

30 Sep 2013

jenkins-bot has submitted this change and it was merged.
Change subject: PEP8-ify most of imagerecat.py
......................................................................
PEP8-ify most of imagerecat.py
Change-Id: I5a4a7dd85eadf7233fe7e388644dadd5ba1e5ffc
---
M scripts/imagerecat.py
1 file changed, 105 insertions(+), 92 deletions(-)
Approvals:
  Merlijn van Deen: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py
index 9a6ee16..df04190 100644
--- a/scripts/imagerecat.py
+++ b/scripts/imagerecat.py
@@ -45,13 +45,14 @@
 category_blacklist = []
 countries = []
-search_wikis=u'_20'
-hint_wiki=u''
+search_wikis = u'_20'
+hint_wiki = u''
+
def initLists():
-    '''
+    """
     Get the list of countries & the blacklist from Commons.
-    '''
+    """
     global category_blacklist
     global countries
@@ -66,18 +67,19 @@
         countries.append(country.title(withNamespace=False))
     return
+
 def categorizeImages(generator, onlyFilter, onlyUncat):
-    ''' Loop over all images in generator and try to categorize them. Get
+    """ Loop over all images in generator and try to categorize them. Get
     category suggestions from CommonSense.
-    '''
+    """
     for page in generator:
         if page.exists() and (page.namespace() == 6) and \
            (not page.isRedirectPage()):
             imagepage = pywikibot.ImagePage(page.site(), page.title())
             pywikibot.output(u'Working on ' + imagepage.title())
-            if (onlyUncat and not(u'Uncategorized' in imagepage.templates())):
+            if onlyUncat and not(u'Uncategorized' in imagepage.templates()):
                 pywikibot.output(u'No Uncategorized template found')
             else:
                 currentCats = getCurrentCats(imagepage)
@@ -87,26 +89,28 @@
                     galleries = []
                 else:
                     (commonshelperCats, usage, galleries) = getCommonshelperCats(imagepage)
-                newcats = applyAllFilters(commonshelperCats+currentCats)
+                newcats = applyAllFilters(commonshelperCats + currentCats)
-                if (len(newcats) > 0 and not(set(currentCats)==set(newcats))):
+                if len(newcats) > 0 and not(set(currentCats) == set(newcats)):
                     for cat in newcats:
-                        pywikibot.output(u' Found new cat: ' + cat);
+                        pywikibot.output(u' Found new cat: ' + cat)
                     saveImagePage(imagepage, newcats, usage, galleries,
                                   onlyFilter)
+
 def getCurrentCats(imagepage):
-    ''' Get the categories currently on the image '''
+    """ Get the categories currently on the image """
     result = []
     for cat in imagepage.categories():
         result.append(cat.title(withNamespace=False))
     return list(set(result))
+
 def getCommonshelperCats(imagepage):
-    ''' Get category suggestions from CommonSense. Parse them and return a list
+    """ Get category suggestions from CommonSense. Parse them and return a list
     of suggestions.
-    '''
+    """
     commonshelperCats = []
     usage = []
     galleries = []
@@ -116,35 +120,35 @@
     site = imagepage.site
     lang = site.language()
     family = site.family.name
-    if lang==u'commons' and family==u'commons':
+    if lang == u'commons' and family == u'commons':
         parameters = urllib.urlencode(
-            {'i' : imagepage.title(withNamespace=False).encode('utf-8'),
-             'r' : 'on',
-             'go-clean' : 'Find+Categories',
-             'p' : search_wikis,
-             'cl' : hint_wiki})
-    elif family==u'wikipedia':
+            {'i': imagepage.title(withNamespace=False).encode('utf-8'),
+             'r': 'on',
+             'go-clean': 'Find+Categories',
+             'p': search_wikis,
+             'cl': hint_wiki})
+    elif family == u'wikipedia':
         parameters = urllib.urlencode(
-            {'i' : imagepage.title(withNamespace=False).encode('utf-8'),
-             'r' : 'on',
-             'go-move' : 'Find+Categories',
-             'p' : search_wikis,
-             'cl' : hint_wiki,
-             'w' : lang})
+            {'i': imagepage.title(withNamespace=False).encode('utf-8'),
+             'r': 'on',
+             'go-move': 'Find+Categories',
+             'p': search_wikis,
+             'cl': hint_wiki,
+             'w': lang})
     else:
         #Cant handle other sites atm
-        return ([], [], [])
+        return [], [], []
-    commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+((?P<usagenum>(\d)+))\s(?P<usage>(.*))\s#KEYWORDS(\s)+((?P<keywords>(\d)+))(.*)#CATEGORIES(\s)+((?P<catnum>(\d)+))\s(?P<cats>(.*))\s#GALLERIES(\s)+((?P<galnum>(\d)+))\s(?P<gals>(.*))\s(.*)#EOF$', re.MULTILINE + re.DOTALL)
+    commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+((?P<usagenum>(\d)+))\s(?P<usage>(.*))\s#KEYWORDS(\s)+((?P<keywords>(\d)+))(.*)#CATEGORIES(\s)+((?P<catnum>(\d)+))\s(?P<cats>(.*))\s#GALLERIES(\s)+((?P<galnum>(\d)+))\s(?P<gals>(.*))\s(.*)#EOF$', re.MULTILINE + re.DOTALL)  # noqa
gotInfo = False
     matches = None
     maxtries = 10
     tries = 0
-    while(not gotInfo):
+    while not gotInfo:
         try:
-            if ( tries < maxtries ):
-                tries = tries + 1
+            if tries < maxtries:
+                tries += 1
                 commonsHelperPage = urllib.urlopen(
                     "http://toolserver.org/~daniel/WikiSense/CommonSense.php?%s" % parameters)
                 matches = commonsenseRe.search(
@@ -157,18 +161,18 @@
         except socket.timeout:
             pywikibot.output(u'Got a timeout, let's try again')
-    if (matches and gotInfo):
-        if (matches.group('usagenum') > 0):
+    if matches and gotInfo:
+        if matches.group('usagenum') > 0:
             used = matches.group('usage').splitlines()
             for use in used:
-                usage= usage + getUsage(use)
+                usage = usage + getUsage(use)
                 #pywikibot.output(use)
-        if (matches.group('catnum') > 0):
+        if matches.group('catnum') > 0:
             cats = matches.group('cats').splitlines()
             for cat in cats:
                 commonshelperCats.append(cat.replace('_', ' '))
                 pywikibot.output(u'category : ' + cat)
-        if (matches.group('galnum') > 0):
+        if matches.group('galnum') > 0:
             gals = matches.group('gals').splitlines()
             for gal in gals:
                 galleries.append(gal.replace('_', ' '))
@@ -177,41 +181,45 @@
     galleries = list(set(galleries))
     for (lang, project, article) in usage:
         pywikibot.output(lang + project + article)
-    return (commonshelperCats, usage, galleries)
+    return commonshelperCats, usage, galleries
+
def getOpenStreetMapCats(latitude, longitude):
-    '''
+    """
     Get a list of location categories based on the OSM nomatim tool
-    '''
+    """
     result = []
     locationList = getOpenStreetMap(latitude, longitude)
     for i in range(0, len(locationList)):
         #print 'Working on ' + locationList[i]
-        if i <= len(locationList)-3:
-            category = getCategoryByName(name=locationList[i], parent=locationList[i+1], grandparent=locationList[i+2])
-        elif i == len(locationList)-2:
-            category = getCategoryByName(name=locationList[i], parent=locationList[i+1])
+        if i <= len(locationList) - 3:
+            category = getCategoryByName(name=locationList[i],
+                                         parent=locationList[i + 1],
+                                         grandparent=locationList[i + 2])
+        elif i == len(locationList) - 2:
+            category = getCategoryByName(name=locationList[i],
+                                         parent=locationList[i + 1])
         else:
             category = getCategoryByName(name=locationList[i])
-        if category and not category==u'':
+        if category and not category == u'':
             result.append(category)
     #print result
     return result
def getOpenStreetMap(latitude, longitude):
-    '''
+    """
     Get the result from http://nominatim.openstreetmap.org/reverse
     and put it in a list of tuples to play around with
-    '''
+    """
     result = []
     gotInfo = False
-    parameters = urllib.urlencode({'lat' : latitude, 'lon' : longitude, 'accept-language' : 'en'})
-    while(not gotInfo):
+    parameters = urllib.urlencode({'lat': latitude, 'lon': longitude, 'accept-language': 'en'})
+    while not gotInfo:
         try:
             page = urllib.urlopen("http://nominatim.openstreetmap.org/reverse?format=xml&%s" % parameters)
             et = xml.etree.ElementTree.parse(page)
-            gotInfo=True
+            gotInfo = True
         except IOError:
             pywikibot.output(u'Got an IOError, let's try again')
             time.sleep(30)
@@ -233,30 +241,28 @@
     #print result
     return result
+
 def getCategoryByName(name, parent=u'', grandparent=u''):
-    if not parent==u'':
+    if not parent == u'':
         workname = name.strip() + u',_' + parent.strip()
-        workcat = pywikibot.Category(
-                    pywikibot.Site(u'commons', u'commons'), workname)
+        workcat = pywikibot.Category(pywikibot.Site(u'commons', u'commons'), workname)
         if workcat.exists():
             return workname
-    if not grandparent==u'':
+    if not grandparent == u'':
         workname = name.strip() + u',_' + grandparent.strip()
-        workcat = pywikibot.Category(
-                pywikibot.Site(u'commons', u'commons'), workname)
+        workcat = pywikibot.Category(pywikibot.Site(u'commons', u'commons'), workname)
         if workcat.exists():
             return workname
     workname = name.strip()
-    workcat = pywikibot.Category(
-                pywikibot.Site(u'commons', u'commons'), workname)
+    workcat = pywikibot.Category(pywikibot.Site(u'commons', u'commons'), workname)
     if workcat.exists():
         return workname
     return u''
def getUsage(use):
-    ''' Parse the Commonsense output to get the usage '''
+    """ Parse the Commonsense output to get the usage """
     result = []
     lang = ''
     project = ''
@@ -265,21 +271,22 @@
         '^(?P<lang>([\w-]+)).(?P<project>([\w]+)).org:(?P<articles>\s(.*))')
     matches = usageRe.search(use)
     if matches:
-        if (matches.group('lang')):
+        if matches.group('lang'):
             lang = matches.group('lang')
             #pywikibot.output(lang)
-        if (matches.group('project')):
+        if matches.group('project'):
             project = matches.group('project')
             #pywikibot.output(project)
-        if (matches.group('articles')):
+        if matches.group('articles'):
             articles = matches.group('articles')
             #pywikibot.output(articles)
     for article in articles.split():
         result.append((lang, project, article))
     return result
+
 def applyAllFilters(categories):
-    ''' Apply all filters on categories. '''
+    """ Apply all filters on categories. """
     result = []
     result = filterDisambiguation(categories)
     result = followRedirects(result)
@@ -288,17 +295,19 @@
     result = filterParents(result)
     return result
+
 def filterBlacklist(categories):
-    ''' Filter out categories which are on the blacklist. '''
+    """ Filter out categories which are on the blacklist. """
     result = []
     for cat in categories:
         cat = cat.replace('_', ' ')
-        if (cat not in category_blacklist):
+        if not (cat in category_blacklist):
             result.append(cat)
     return list(set(result))
+
 def filterDisambiguation(categories):
-    ''' Filter out disambiguation categories. '''
+    """ Filter out disambiguation categories. """
     result = []
     for cat in categories:
         if (not pywikibot.Page(pywikibot.Site(u'commons', u'commons'),
@@ -306,8 +315,9 @@
             result.append(cat)
     return result
+
 def followRedirects(categories):
-    ''' If a category is a redirect, replace the category with the target. '''
+    """ If a category is a redirect, replace the category with the target. """
     result = []
     for cat in categories:
         categoryPage = pywikibot.Page(pywikibot.getSite(u'commons', u'commons'),
@@ -320,19 +330,20 @@
             result.append(cat)
     return result
+
 def filterCountries(categories):
-    ''' Try to filter out ...by country categories.
+    """ Try to filter out ...by country categories.
     First make a list of any ...by country categories and try to find some
     countries. If a by country category has a subcategoy containing one of the
     countries found, add it. The ...by country categories remain in the set and
     should be filtered out by filterParents.
-    '''
+    """
     result = categories
     listByCountry = []
     listCountries = []
     for cat in categories:
-        if (cat.endswith(u'by country')):
+        if cat.endswith(u'by country'):
             listByCountry.append(cat)
#If cat contains 'by country' add it to the list
@@ -341,27 +352,26 @@
             for country in countries:
                 if country in cat:
                     listCountries.append(country)
-    if(len(listByCountry) > 0):
+    if len(listByCountry) > 0:
         for bc in listByCountry:
             category = pywikibot.Category(
                 pywikibot.Site(u'commons', u'commons'), u'Category:' + bc)
             for subcategory in category.subcategories():
                 for country in listCountries:
-                    if (subcategory.title(withNamespace=False).endswith(country)):
+                    if subcategory.title(withNamespace=False).endswith(country):
                         result.append(subcategory.title(withNamespace=False))
     return list(set(result))
-def filterParents(categories):
-    ''' Remove all parent categories from the set to prevent overcategorization.
-    '''
+def filterParents(categories):
+    """ Remove all parent categories from the set to prevent overcategorization. """
     result = []
     toFilter = u''
     for cat in categories:
         cat = cat.replace('_', ' ')
         toFilter = toFilter + "[[Category:" + cat + "]]\n"
-    parameters = urllib.urlencode({'source' : toFilter.encode('utf-8'),
-                                   'bot' : '1'})
+    parameters = urllib.urlencode({'source': toFilter.encode('utf-8'),
+                                   'bot': '1'})
     filterCategoriesRe = re.compile('[[Category:([^]]*)]]')
     try:
         filterCategoriesPage = urllib.urlopen(
@@ -377,17 +387,18 @@
         return categories
     return result
+
 def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter):
-    ''' Remove the old categories and add the new categories to the image. '''
+    """ Remove the old categories and add the new categories to the image. """
     newtext = pywikibot.removeCategoryLinks(imagepage.get(), imagepage.site())
-    if not(onlyFilter):
+    if not onlyFilter:
         newtext = removeTemplates(newtext)
         newtext = newtext + getCheckCategoriesTemplate(usage, galleries,
                                                        len(newcats))
-    newtext = newtext + u'\n'
+    newtext += u'\n'
     for category in newcats:
         newtext = newtext + u'[[Category:' + category + u']]\n'
-    if(onlyFilter):
+    if onlyFilter:
         comment = u'Filtering categories'
     else:
         comment = u'Image is categorized by a bot using data from [[Commons:Tools#CommonSense|CommonSense]]'
@@ -395,11 +406,11 @@
     imagepage.put(newtext, comment)
     return
-def removeTemplates(oldtext = u''):
-    '''
+
+def removeTemplates(oldtext=u''):
+    """
     Remove {{Uncategorized}} and {{Check categories}} templates
-    '''
-    result = u''
+    """
     result = re.sub(
         u'{{\s*([Uu]ncat(egori[sz]ed( image)?)?|[Nn]ocat|[Nn]eedscategory)[^}]*}}', u'', oldtext)
     result = re.sub(u'<!-- Remove this line once you have added categories -->',
@@ -407,10 +418,11 @@
     result = re.sub(u'{{\s*[Cc]heck categories[^}]*}}', u'', result)
     return result
+
 def getCheckCategoriesTemplate(usage, galleries, ncats):
-    '''
+    """
     Build the check categories template with all parameters
-    '''
+    """
     result = u'{{Check categories|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}\n'
     usageCounter = 1
     for (lang, project, article) in usage:
@@ -418,19 +430,20 @@
         result += u'|wiki%d=%s' % (usageCounter, project)
         result += u'|article%d=%s' % (usageCounter, article)
         result += u'\n'
-        usageCounter = usageCounter + 1
+        usageCounter += 1
     galleryCounter = 1
     for gallery in galleries:
         result += u'|gallery%d=%s' % (galleryCounter, gallery.replace('_', ' ')) + u'\n'
-        galleryCounter = galleryCounter + 1
+        galleryCounter += 1
     result += u'|ncats=%d\n' % ncats
     result += u'}}\n'
     return result
+
 def main(args):
-    '''
+    """
     Main loop. Get a generator and options. Work on all images in the generator.
-    '''
+    """
     generator = None
     onlyFilter = False
     onlyUncat = False
@@ -446,9 +459,9 @@
         elif arg == '-onlyuncat':
             onlyUncat = True
         elif arg.startswith('-hint:'):
-            hint_wiki = arg [len('-hint:'):]
+            hint_wiki = arg[len('-hint:'):]
         elif arg.startswith('-onlyhint'):
-            search_wikis = arg [len('-onlyhint:'):]
+            search_wikis = arg[len('-onlyhint:'):]
         else:
             genFactory.handleArg(arg)
-- 
To view, visit https://gerrit.wikimedia.org/r/86624
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5a4a7dd85eadf7233fe7e388644dadd5ba1e5ffc
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Legoktm legoktm.wikipedia@gmail.com
Gerrit-Reviewer: Ladsgroup ladsgroup@gmail.com
Gerrit-Reviewer: Merlijn van Deen valhallasw@arctus.nl
Gerrit-Reviewer: jenkins-bot