Pywikibot-commits September 2013

pywikibot-commits@lists.wikimedia.org

1 participants
139 discussions

[Gerrit] PEP8-ify most of imagerecat.py - change (pywikibot/core)
by jenkins-bot (Code Review) 30 Sep '13

30 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: PEP8-ify most of imagerecat.py ...................................................................... PEP8-ify most of imagerecat.py Change-Id: I5a4a7dd85eadf7233fe7e388644dadd5ba1e5ffc --- M scripts/imagerecat.py 1 file changed, 105 insertions(+), 92 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py index 9a6ee16..df04190 100644 --- a/scripts/imagerecat.py +++ b/scripts/imagerecat.py @@ -45,13 +45,14 @@ category_blacklist = [] countries = [] -search_wikis=u'_20' -hint_wiki=u'' +search_wikis = u'_20' +hint_wiki = u'' + def initLists(): - ''' + """ Get the list of countries & the blacklist from Commons. - ''' + """ global category_blacklist global countries @@ -66,18 +67,19 @@ countries.append(country.title(withNamespace=False)) return + def categorizeImages(generator, onlyFilter, onlyUncat): - ''' Loop over all images in generator and try to categorize them. Get + """ Loop over all images in generator and try to categorize them. Get category suggestions from CommonSense. - ''' + """ for page in generator: if page.exists() and (page.namespace() == 6) and \ (not page.isRedirectPage()): imagepage = pywikibot.ImagePage(page.site(), page.title()) pywikibot.output(u'Working on ' + imagepage.title()) - if (onlyUncat and not(u'Uncategorized' in imagepage.templates())): + if onlyUncat and not(u'Uncategorized' in imagepage.templates()): pywikibot.output(u'No Uncategorized template found') else: currentCats = getCurrentCats(imagepage) @@ -87,26 +89,28 @@ galleries = [] else: (commonshelperCats, usage, galleries) = getCommonshelperCats(imagepage) - newcats = applyAllFilters(commonshelperCats+currentCats) + newcats = applyAllFilters(commonshelperCats + currentCats) - if (len(newcats) > 0 and not(set(currentCats)==set(newcats))): + if len(newcats) > 0 and not(set(currentCats) == set(newcats)): for cat in newcats: - pywikibot.output(u' Found new cat: ' + cat); + pywikibot.output(u' Found new cat: ' + cat) saveImagePage(imagepage, newcats, usage, galleries, onlyFilter) + def getCurrentCats(imagepage): - ''' Get the categories currently on the image ''' + """ Get the categories currently on the image """ result = [] for cat in imagepage.categories(): result.append(cat.title(withNamespace=False)) return list(set(result)) + def getCommonshelperCats(imagepage): - ''' Get category suggestions from CommonSense. Parse them and return a list + """ Get category suggestions from CommonSense. Parse them and return a list of suggestions. - ''' + """ commonshelperCats = [] usage = [] galleries = [] @@ -116,35 +120,35 @@ site = imagepage.site lang = site.language() family = site.family.name - if lang==u'commons' and family==u'commons': + if lang == u'commons' and family == u'commons': parameters = urllib.urlencode( - {'i' : imagepage.title(withNamespace=False).encode('utf-8'), - 'r' : 'on', - 'go-clean' : 'Find+Categories', - 'p' : search_wikis, - 'cl' : hint_wiki}) - elif family==u'wikipedia': + {'i': imagepage.title(withNamespace=False).encode('utf-8'), + 'r': 'on', + 'go-clean': 'Find+Categories', + 'p': search_wikis, + 'cl': hint_wiki}) + elif family == u'wikipedia': parameters = urllib.urlencode( - {'i' : imagepage.title(withNamespace=False).encode('utf-8'), - 'r' : 'on', - 'go-move' : 'Find+Categories', - 'p' : search_wikis, - 'cl' : hint_wiki, - 'w' : lang}) + {'i': imagepage.title(withNamespace=False).encode('utf-8'), + 'r': 'on', + 'go-move': 'Find+Categories', + 'p': search_wikis, + 'cl': hint_wiki, + 'w': lang}) else: #Cant handle other sites atm - return ([], [], []) + return [], [], [] - commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+$(?P<usagenum>(\d)+)$\s(?P<usage>(.*))\s#KEYWORDS(\s)+$(?P<keywords>(\d)+)$(.*)#CATEGORIES(\s)+$(?P<catnum>(\d)+)$\s(?P<cats>(.*))\s#GALLERIES(\s)+$(?P<galnum>(\d)+)$\s(?P<gals>(.*))\s(.*)#EOF$', re.MULTILINE + re.DOTALL) + commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+$(?P<usagenum>(\d)+)$\s(?P<usage>(.*))\s#KEYWORDS(\s)+$(?P<keywords>(\d)+)$(.*)#CATEGORIES(\s)+$(?P<catnum>(\d)+)$\s(?P<cats>(.*))\s#GALLERIES(\s)+$(?P<galnum>(\d)+)$\s(?P<gals>(.*))\s(.*)#EOF$', re.MULTILINE + re.DOTALL) # noqa gotInfo = False matches = None maxtries = 10 tries = 0 - while(not gotInfo): + while not gotInfo: try: - if ( tries < maxtries ): - tries = tries + 1 + if tries < maxtries: + tries += 1 commonsHelperPage = urllib.urlopen( "http://toolserver.org/~daniel/WikiSense/CommonSense.php?%s" % parameters) matches = commonsenseRe.search( @@ -157,18 +161,18 @@ except socket.timeout: pywikibot.output(u'Got a timeout, let\'s try again') - if (matches and gotInfo): - if (matches.group('usagenum') > 0): + if matches and gotInfo: + if matches.group('usagenum') > 0: used = matches.group('usage').splitlines() for use in used: - usage= usage + getUsage(use) + usage = usage + getUsage(use) #pywikibot.output(use) - if (matches.group('catnum') > 0): + if matches.group('catnum') > 0: cats = matches.group('cats').splitlines() for cat in cats: commonshelperCats.append(cat.replace('_', ' ')) pywikibot.output(u'category : ' + cat) - if (matches.group('galnum') > 0): + if matches.group('galnum') > 0: gals = matches.group('gals').splitlines() for gal in gals: galleries.append(gal.replace('_', ' ')) @@ -177,41 +181,45 @@ galleries = list(set(galleries)) for (lang, project, article) in usage: pywikibot.output(lang + project + article) - return (commonshelperCats, usage, galleries) + return commonshelperCats, usage, galleries + def getOpenStreetMapCats(latitude, longitude): - ''' + """ Get a list of location categories based on the OSM nomatim tool - ''' + """ result = [] locationList = getOpenStreetMap(latitude, longitude) for i in range(0, len(locationList)): #print 'Working on ' + locationList[i] - if i <= len(locationList)-3: - category = getCategoryByName(name=locationList[i], parent=locationList[i+1], grandparent=locationList[i+2]) - elif i == len(locationList)-2: - category = getCategoryByName(name=locationList[i], parent=locationList[i+1]) + if i <= len(locationList) - 3: + category = getCategoryByName(name=locationList[i], + parent=locationList[i + 1], + grandparent=locationList[i + 2]) + elif i == len(locationList) - 2: + category = getCategoryByName(name=locationList[i], + parent=locationList[i + 1]) else: category = getCategoryByName(name=locationList[i]) - if category and not category==u'': + if category and not category == u'': result.append(category) #print result return result def getOpenStreetMap(latitude, longitude): - ''' + """ Get the result from http://nominatim.openstreetmap.org/reverse and put it in a list of tuples to play around with - ''' + """ result = [] gotInfo = False - parameters = urllib.urlencode({'lat' : latitude, 'lon' : longitude, 'accept-language' : 'en'}) - while(not gotInfo): + parameters = urllib.urlencode({'lat': latitude, 'lon': longitude, 'accept-language': 'en'}) + while not gotInfo: try: page = urllib.urlopen("http://nominatim.openstreetmap.org/reverse?format=xml&%s" % parameters) et = xml.etree.ElementTree.parse(page) - gotInfo=True + gotInfo = True except IOError: pywikibot.output(u'Got an IOError, let\'s try again') time.sleep(30) @@ -233,30 +241,28 @@ #print result return result + def getCategoryByName(name, parent=u'', grandparent=u''): - if not parent==u'': + if not parent == u'': workname = name.strip() + u',_' + parent.strip() - workcat = pywikibot.Category( - pywikibot.Site(u'commons', u'commons'), workname) + workcat = pywikibot.Category(pywikibot.Site(u'commons', u'commons'), workname) if workcat.exists(): return workname - if not grandparent==u'': + if not grandparent == u'': workname = name.strip() + u',_' + grandparent.strip() - workcat = pywikibot.Category( - pywikibot.Site(u'commons', u'commons'), workname) + workcat = pywikibot.Category(pywikibot.Site(u'commons', u'commons'), workname) if workcat.exists(): return workname workname = name.strip() - workcat = pywikibot.Category( - pywikibot.Site(u'commons', u'commons'), workname) + workcat = pywikibot.Category(pywikibot.Site(u'commons', u'commons'), workname) if workcat.exists(): return workname return u'' def getUsage(use): - ''' Parse the Commonsense output to get the usage ''' + """ Parse the Commonsense output to get the usage """ result = [] lang = '' project = '' @@ -265,21 +271,22 @@ '^(?P<lang>([\w-]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))') matches = usageRe.search(use) if matches: - if (matches.group('lang')): + if matches.group('lang'): lang = matches.group('lang') #pywikibot.output(lang) - if (matches.group('project')): + if matches.group('project'): project = matches.group('project') #pywikibot.output(project) - if (matches.group('articles')): + if matches.group('articles'): articles = matches.group('articles') #pywikibot.output(articles) for article in articles.split(): result.append((lang, project, article)) return result + def applyAllFilters(categories): - ''' Apply all filters on categories. ''' + """ Apply all filters on categories. """ result = [] result = filterDisambiguation(categories) result = followRedirects(result) @@ -288,17 +295,19 @@ result = filterParents(result) return result + def filterBlacklist(categories): - ''' Filter out categories which are on the blacklist. ''' + """ Filter out categories which are on the blacklist. """ result = [] for cat in categories: cat = cat.replace('_', ' ') - if (cat not in category_blacklist): + if not (cat in category_blacklist): result.append(cat) return list(set(result)) + def filterDisambiguation(categories): - ''' Filter out disambiguation categories. ''' + """ Filter out disambiguation categories. """ result = [] for cat in categories: if (not pywikibot.Page(pywikibot.Site(u'commons', u'commons'), @@ -306,8 +315,9 @@ result.append(cat) return result + def followRedirects(categories): - ''' If a category is a redirect, replace the category with the target. ''' + """ If a category is a redirect, replace the category with the target. """ result = [] for cat in categories: categoryPage = pywikibot.Page(pywikibot.getSite(u'commons', u'commons'), @@ -320,19 +330,20 @@ result.append(cat) return result + def filterCountries(categories): - ''' Try to filter out ...by country categories. + """ Try to filter out ...by country categories. First make a list of any ...by country categories and try to find some countries. If a by country category has a subcategoy containing one of the countries found, add it. The ...by country categories remain in the set and should be filtered out by filterParents. - ''' + """ result = categories listByCountry = [] listCountries = [] for cat in categories: - if (cat.endswith(u'by country')): + if cat.endswith(u'by country'): listByCountry.append(cat) #If cat contains 'by country' add it to the list @@ -341,27 +352,26 @@ for country in countries: if country in cat: listCountries.append(country) - if(len(listByCountry) > 0): + if len(listByCountry) > 0: for bc in listByCountry: category = pywikibot.Category( pywikibot.Site(u'commons', u'commons'), u'Category:' + bc) for subcategory in category.subcategories(): for country in listCountries: - if (subcategory.title(withNamespace=False).endswith(country)): + if subcategory.title(withNamespace=False).endswith(country): result.append(subcategory.title(withNamespace=False)) return list(set(result)) -def filterParents(categories): - ''' Remove all parent categories from the set to prevent overcategorization. - ''' +def filterParents(categories): + """ Remove all parent categories from the set to prevent overcategorization. """ result = [] toFilter = u'' for cat in categories: cat = cat.replace('_', ' ') toFilter = toFilter + "[[Category:" + cat + "]]\n" - parameters = urllib.urlencode({'source' : toFilter.encode('utf-8'), - 'bot' : '1'}) + parameters = urllib.urlencode({'source': toFilter.encode('utf-8'), + 'bot': '1'}) filterCategoriesRe = re.compile('\[\[Category:([^\]]*)\]\]') try: filterCategoriesPage = urllib.urlopen( @@ -377,17 +387,18 @@ return categories return result + def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter): - ''' Remove the old categories and add the new categories to the image. ''' + """ Remove the old categories and add the new categories to the image. """ newtext = pywikibot.removeCategoryLinks(imagepage.get(), imagepage.site()) - if not(onlyFilter): + if not onlyFilter: newtext = removeTemplates(newtext) newtext = newtext + getCheckCategoriesTemplate(usage, galleries, len(newcats)) - newtext = newtext + u'\n' + newtext += u'\n' for category in newcats: newtext = newtext + u'[[Category:' + category + u']]\n' - if(onlyFilter): + if onlyFilter: comment = u'Filtering categories' else: comment = u'Image is categorized by a bot using data from [[Commons:Tools#CommonSense|CommonSense]]' @@ -395,11 +406,11 @@ imagepage.put(newtext, comment) return -def removeTemplates(oldtext = u''): - ''' + +def removeTemplates(oldtext=u''): + """ Remove {{Uncategorized}} and {{Check categories}} templates - ''' - result = u'' + """ result = re.sub( u'\{\{\s*([Uu]ncat(egori[sz]ed( image)?)?|[Nn]ocat|[Nn]eedscategory)[^}]*\}\}', u'', oldtext) result = re.sub(u'', @@ -407,10 +418,11 @@ result = re.sub(u'\{\{\s*[Cc]heck categories[^}]*\}\}', u'', result) return result + def getCheckCategoriesTemplate(usage, galleries, ncats): - ''' + """ Build the check categories template with all parameters - ''' + """ result = u'{{Check categories|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}\n' usageCounter = 1 for (lang, project, article) in usage: @@ -418,19 +430,20 @@ result += u'|wiki%d=%s' % (usageCounter, project) result += u'|article%d=%s' % (usageCounter, article) result += u'\n' - usageCounter = usageCounter + 1 + usageCounter += 1 galleryCounter = 1 for gallery in galleries: result += u'|gallery%d=%s' % (galleryCounter, gallery.replace('_', ' ')) + u'\n' - galleryCounter = galleryCounter + 1 + galleryCounter += 1 result += u'|ncats=%d\n' % ncats result += u'}}\n' return result + def main(args): - ''' + """ Main loop. Get a generator and options. Work on all images in the generator. - ''' + """ generator = None onlyFilter = False onlyUncat = False @@ -446,9 +459,9 @@ elif arg == '-onlyuncat': onlyUncat = True elif arg.startswith('-hint:'): - hint_wiki = arg [len('-hint:'):] + hint_wiki = arg[len('-hint:'):] elif arg.startswith('-onlyhint'): - search_wikis = arg [len('-onlyhint:'):] + search_wikis = arg[len('-onlyhint:'):] else: genFactory.handleArg(arg) -- To view, visit https://gerrit.wikimedia.org/r/86624 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I5a4a7dd85eadf7233fe7e388644dadd5ba1e5ffc Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Port imagerecat.py from compat - change (pywikibot/core)
by jenkins-bot (Code Review) 30 Sep '13

30 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: Port imagerecat.py from compat ...................................................................... Port imagerecat.py from compat Change-Id: I9e5f5a1fb1823ec85378d3bf9d7c67592139face --- A scripts/imagerecat.py 1 file changed, 465 insertions(+), 0 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py new file mode 100644 index 0000000..7c91106 --- /dev/null +++ b/scripts/imagerecat.py @@ -0,0 +1,465 @@ +# -*- coding: utf-8 -*- +""" +Program to (re)categorize images at commons. + +The program uses commonshelper for category suggestions. +It takes the suggestions and the current categories. Put the categories through +some filters and adds the result. + +The following command line parameters are supported: + +-onlyfilter Don't use Commonsense to get categories, just filter the current + categories + +-onlyuncat Only work on uncategorized images. Will prevent the bot from + working on an image multiple times. + +-hint Give Commonsense a hint. + For example -hint:li.wikipedia.org + +-onlyhint Give Commonsense a hint. And only work on this hint. + Syntax is the same as -hint. Some special hints are possible: + _20 : Work on the top 20 wikipedia's + _80 : Work on the top 80 wikipedia's + wps : Work on all wikipedia's + +""" +__version__ = '$Id$' +# +# (C) Multichill 2008-2011 +# (C) Pywikipedia bot team, 2008-2013 +# +# Distributed under the terms of the MIT license. +# +# +import os, sys, re, codecs +import urllib, httplib, urllib2 +import time +import socket +import StringIO +import pywikibot +from pywikibot import config +from pywikibot import pagegenerators +import xml.etree.ElementTree + +category_blacklist = [] +countries = [] + +search_wikis=u'_20' +hint_wiki=u'' + +def initLists(): + ''' + Get the list of countries & the blacklist from Commons. + ''' + global category_blacklist + global countries + + blacklistPage = pywikibot.Page(pywikibot.Site(u'commons', u'commons'), + u'User:Multichill/Category_blacklist') + for cat in blacklistPage.linkedPages(): + category_blacklist.append(cat.title(withNamespace=False)) + + countryPage = pywikibot.Page(pywikibot.Site(u'commons', u'commons'), + u'User:Multichill/Countries') + for country in countryPage.linkedPages(): + countries.append(country.title(withNamespace=False)) + return + +def categorizeImages(generator, onlyFilter, onlyUncat): + ''' Loop over all images in generator and try to categorize them. Get + category suggestions from CommonSense. + + ''' + for page in generator: + if page.exists() and (page.namespace() == 6) and \ + (not page.isRedirectPage()): + imagepage = pywikibot.ImagePage(page.site(), page.title()) + pywikibot.output(u'Working on ' + imagepage.title()) + + if (onlyUncat and not(u'Uncategorized' in imagepage.templates())): + pywikibot.output(u'No Uncategorized template found') + else: + currentCats = getCurrentCats(imagepage) + if onlyFilter: + commonshelperCats = [] + usage = [] + galleries = [] + else: + (commonshelperCats, usage, galleries) = getCommonshelperCats(imagepage) + newcats = applyAllFilters(commonshelperCats+currentCats) + + if (len(newcats) > 0 and not(set(currentCats)==set(newcats))): + for cat in newcats: + pywikibot.output(u' Found new cat: ' + cat); + saveImagePage(imagepage, newcats, usage, galleries, + onlyFilter) + +def getCurrentCats(imagepage): + ''' Get the categories currently on the image ''' + result = [] + for cat in imagepage.categories(): + result.append(cat.title(withNamespace=False)) + return list(set(result)) + +def getCommonshelperCats(imagepage): + ''' Get category suggestions from CommonSense. Parse them and return a list + of suggestions. + + ''' + commonshelperCats = [] + usage = [] + galleries = [] + + global search_wikis + global hint_wiki + site = imagepage.site + lang = site.language() + family = site.family.name + if lang==u'commons' and family==u'commons': + parameters = urllib.urlencode( + {'i' : imagepage.title(withNamespace=False).encode('utf-8'), + 'r' : 'on', + 'go-clean' : 'Find+Categories', + 'p' : search_wikis, + 'cl' : hint_wiki}) + elif family==u'wikipedia': + parameters = urllib.urlencode( + {'i' : imagepage.title(withNamespace=False).encode('utf-8'), + 'r' : 'on', + 'go-move' : 'Find+Categories', + 'p' : search_wikis, + 'cl' : hint_wiki, + 'w' : lang}) + else: + #Cant handle other sites atm + return ([], [], []) + + commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+$(?P<usagenum>(\d)+)$\s(?P<usage>(.*))\s#KEYWORDS(\s)+$(?P<keywords>(\d)+)$(.*)#CATEGORIES(\s)+$(?P<catnum>(\d)+)$\s(?P<cats>(.*))\s#GALLERIES(\s)+$(?P<galnum>(\d)+)$\s(?P<gals>(.*))\s(.*)#EOF$', re.MULTILINE + re.DOTALL) + + gotInfo = False + matches = None + maxtries = 10 + tries = 0 + while(not gotInfo): + try: + if ( tries < maxtries ): + tries = tries + 1 + commonsHelperPage = urllib.urlopen( + "http://toolserver.org/~daniel/WikiSense/CommonSense.php?%s" % parameters) + matches = commonsenseRe.search( + commonsHelperPage.read().decode('utf-8')) + gotInfo = True + else: + break + except IOError: + pywikibot.output(u'Got an IOError, let\'s try again') + except socket.timeout: + pywikibot.output(u'Got a timeout, let\'s try again') + + if (matches and gotInfo): + if (matches.group('usagenum') > 0): + used = matches.group('usage').splitlines() + for use in used: + usage= usage + getUsage(use) + #pywikibot.output(use) + if (matches.group('catnum') > 0): + cats = matches.group('cats').splitlines() + for cat in cats: + commonshelperCats.append(cat.replace('_', ' ')) + pywikibot.output(u'category : ' + cat) + if (matches.group('galnum') > 0): + gals = matches.group('gals').splitlines() + for gal in gals: + galleries.append(gal.replace('_', ' ')) + pywikibot.output(u'gallery : ' + gal) + commonshelperCats = list(set(commonshelperCats)) + galleries = list(set(galleries)) + for (lang, project, article) in usage: + pywikibot.output(lang + project + article) + return (commonshelperCats, usage, galleries) + +def getOpenStreetMapCats(latitude, longitude): + ''' + Get a list of location categories based on the OSM nomatim tool + ''' + result = [] + locationList = getOpenStreetMap(latitude, longitude) + for i in range(0, len(locationList)): + #print 'Working on ' + locationList[i] + if i <= len(locationList)-3: + category = getCategoryByName(name=locationList[i], parent=locationList[i+1], grandparent=locationList[i+2]) + elif i == len(locationList)-2: + category = getCategoryByName(name=locationList[i], parent=locationList[i+1]) + else: + category = getCategoryByName(name=locationList[i]) + if category and not category==u'': + result.append(category) + #print result + return result + + +def getOpenStreetMap(latitude, longitude): + ''' + Get the result from http://nominatim.openstreetmap.org/reverse + and put it in a list of tuples to play around with + ''' + result = [] + gotInfo = False + parameters = urllib.urlencode({'lat' : latitude, 'lon' : longitude, 'accept-language' : 'en'}) + while(not gotInfo): + try: + page = urllib.urlopen("http://nominatim.openstreetmap.org/reverse?format=xml&%s" % parameters) + et = xml.etree.ElementTree.parse(page) + gotInfo=True + except IOError: + pywikibot.output(u'Got an IOError, let\'s try again') + time.sleep(30) + except socket.timeout: + pywikibot.output(u'Got a timeout, let\'s try again') + time.sleep(30) + validParts = [u'hamlet', u'village', u'city', u'county', u'country'] + invalidParts = [u'path', u'road', u'suburb', u'state', u'country_code'] + addressparts = et.find('addressparts') + #xml.etree.ElementTree.dump(et) + + for addresspart in addressparts.getchildren(): + if addresspart.tag in validParts: + result.append(addresspart.text) + elif addresspart.tag in invalidParts: + pywikibot.output(u'Dropping %s, %s' % (addresspart.tag, addresspart.text)) + else: + pywikibot.warning(u'%s, %s is not in addressparts lists' % (addresspart.tag, addresspart.text)) + #print result + return result + +def getCategoryByName(name, parent=u'', grandparent=u''): + + if not parent==u'': + workname = name.strip() + u',_' + parent.strip() + workcat = pywikibot.Category( + pywikibot.Site(u'commons', u'commons'), workname) + if workcat.exists(): + return workname + if not grandparent==u'': + workname = name.strip() + u',_' + grandparent.strip() + workcat = pywikibot.Category( + pywikibot.Site(u'commons', u'commons'), workname) + if workcat.exists(): + return workname + workname = name.strip() + workcat = pywikibot.Category( + pywikibot.Site(u'commons', u'commons'), workname) + if workcat.exists(): + return workname + return u'' + + +def getUsage(use): + ''' Parse the Commonsense output to get the usage ''' + result = [] + lang = '' + project = '' + article = '' + usageRe = re.compile( + '^(?P<lang>([\w-]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))') + matches = usageRe.search(use) + if matches: + if (matches.group('lang')): + lang = matches.group('lang') + #pywikibot.output(lang) + if (matches.group('project')): + project = matches.group('project') + #pywikibot.output(project) + if (matches.group('articles')): + articles = matches.group('articles') + #pywikibot.output(articles) + for article in articles.split(): + result.append((lang, project, article)) + return result + +def applyAllFilters(categories): + ''' Apply all filters on categories. ''' + result = [] + result = filterDisambiguation(categories) + result = followRedirects(result) + result = filterBlacklist(result) + result = filterCountries(result) + result = filterParents(result) + return result + +def filterBlacklist(categories): + ''' Filter out categories which are on the blacklist. ''' + result = [] + for cat in categories: + cat = cat.replace('_', ' ') + if (cat not in category_blacklist): + result.append(cat) + return list(set(result)) + +def filterDisambiguation(categories): + ''' Filter out disambiguation categories. ''' + result = [] + for cat in categories: + if (not pywikibot.Page(pywikibot.Site(u'commons', u'commons'), + cat, ns=14).isDisambig()): + result.append(cat) + return result + +def followRedirects(categories): + ''' If a category is a redirect, replace the category with the target. ''' + result = [] + for cat in categories: + categoryPage = pywikibot.Page(pywikibot.getSite(u'commons', u'commons'), + cat, ns=14) + if categoryPage.isCategoryRedirect(): + result.append( + categoryPage.getCategoryRedirectTarget().title( + withNamespace=False)) + else: + result.append(cat) + return result + +def filterCountries(categories): + ''' Try to filter out ...by country categories. + First make a list of any ...by country categories and try to find some + countries. If a by country category has a subcategoy containing one of the + countries found, add it. The ...by country categories remain in the set and + should be filtered out by filterParents. + + ''' + result = categories + listByCountry = [] + listCountries = [] + for cat in categories: + if (cat.endswith(u'by country')): + listByCountry.append(cat) + + #If cat contains 'by country' add it to the list + #If cat contains the name of a country add it to the list + else: + for country in countries: + if country in cat: + listCountries.append(country) + if(len(listByCountry) > 0): + for bc in listByCountry: + category = pywikibot.Category( + pywikibot.Site(u'commons', u'commons'), u'Category:' + bc) + for subcategory in category.subcategories(): + for country in listCountries: + if (subcategory.title(withNamespace=False).endswith(country)): + result.append(subcategory.title(withNamespace=False)) + return list(set(result)) + +def filterParents(categories): + ''' Remove all parent categories from the set to prevent overcategorization. + + ''' + result = [] + toFilter = u'' + for cat in categories: + cat = cat.replace('_', ' ') + toFilter = toFilter + "[[Category:" + cat + "]]\n" + parameters = urllib.urlencode({'source' : toFilter.encode('utf-8'), + 'bot' : '1'}) + filterCategoriesRe = re.compile('\[\[Category:([^\]]*)\]\]') + try: + filterCategoriesPage = urllib.urlopen( + "http://toolserver.org/~multichill/filtercats.php?%s" % parameters) + result = filterCategoriesRe.findall( + filterCategoriesPage.read().decode('utf-8')) + except IOError: + #Something is wrong, forget about this filter and just return the input + return categories + + if not result: + #Is empty, dont want to remove all categories + return categories + return result + +def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter): + ''' Remove the old categories and add the new categories to the image. ''' + newtext = pywikibot.removeCategoryLinks(imagepage.get(), imagepage.site()) + if not(onlyFilter): + newtext = removeTemplates(newtext) + newtext = newtext + getCheckCategoriesTemplate(usage, galleries, + len(newcats)) + newtext = newtext + u'\n' + for category in newcats: + newtext = newtext + u'[[Category:' + category + u']]\n' + if(onlyFilter): + comment = u'Filtering categories' + else: + comment = u'Image is categorized by a bot using data from [[Commons:Tools#CommonSense|CommonSense]]' + pywikibot.showDiff(imagepage.get(), newtext) + imagepage.put(newtext, comment) + return + +def removeTemplates(oldtext = u''): + ''' + Remove {{Uncategorized}} and {{Check categories}} templates + ''' + result = u'' + result = re.sub( + u'\{\{\s*([Uu]ncat(egori[sz]ed( image)?)?|[Nn]ocat|[Nn]eedscategory)[^}]*\}\}', u'', oldtext) + result = re.sub(u'', + u'', result) + result = re.sub(u'\{\{\s*[Cc]heck categories[^}]*\}\}', u'', result) + return result + +def getCheckCategoriesTemplate(usage, galleries, ncats): + ''' + Build the check categories template with all parameters + ''' + result = u'{{Check categories|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}\n' + usageCounter = 1 + for (lang, project, article) in usage: + result += u'|lang%d=%s' % (usageCounter, lang) + result += u'|wiki%d=%s' % (usageCounter, project) + result += u'|article%d=%s' % (usageCounter, article) + result += u'\n' + usageCounter = usageCounter + 1 + galleryCounter = 1 + for gallery in galleries: + result += u'|gallery%d=%s' % (galleryCounter, gallery.replace('_', ' ')) + u'\n' + galleryCounter = galleryCounter + 1 + result += u'|ncats=%d\n' % ncats + result += u'}}\n' + return result + +def main(args): + ''' + Main loop. Get a generator and options. Work on all images in the generator. + ''' + generator = None + onlyFilter = False + onlyUncat = False + genFactory = pagegenerators.GeneratorFactory() + + global search_wikis + global hint_wiki + + site = pywikibot.getSite(u'commons', u'commons') + for arg in pywikibot.handleArgs(): + if arg == '-onlyfilter': + onlyFilter = True + elif arg == '-onlyuncat': + onlyUncat = True + elif arg.startswith('-hint:'): + hint_wiki = arg [len('-hint:'):] + elif arg.startswith('-onlyhint'): + search_wikis = arg [len('-onlyhint:'):] + else: + genFactory.handleArg(arg) + + generator = genFactory.getCombinedGenerator() + if not generator: + generator = pagegenerators.CategorizedPageGenerator( + pywikibot.Category(site, u'Category:Media needing categories'), + recurse=True) + initLists() + categorizeImages(generator, onlyFilter, onlyUncat) + pywikibot.output(u'All done') + +if __name__ == "__main__": + main(sys.argv[1:]) -- To view, visit https://gerrit.wikimedia.org/r/86621 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9e5f5a1fb1823ec85378d3bf9d7c67592139face Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] .pep8: ignore ez_setup, .git - change (pywikibot/core)
by jenkins-bot (Code Review) 29 Sep '13

29 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: .pep8: ignore ez_setup, .git ...................................................................... .pep8: ignore ez_setup, .git Change-Id: I53a5b4a6b06218d57bde4ee7a18e0ed7602ce5da --- M .pep8 1 file changed, 1 insertion(+), 1 deletion(-) Approvals: Legoktm: Looks good to me, approved jenkins-bot: Verified diff --git a/.pep8 b/.pep8 index 6e64087..2454dfd 100644 --- a/.pep8 +++ b/.pep8 @@ -1,4 +1,4 @@ [pep8] ignore = E122,E241,E502 -exclude = distribute_setup.py,externals,user-config.py,./scripts/i18n/*,./pywikibot/date.py +exclude = .git,ez_setup.py,externals,user-config.py,./scripts/i18n/*,./pywikibot/date.py max_line_length = 256 -- To view, visit https://gerrit.wikimedia.org/r/86434 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I53a5b4a6b06218d57bde4ee7a18e0ed7602ce5da Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] update linktrail from mw (update from compat) - change (pywikibot/core)
by jenkins-bot (Code Review) 29 Sep '13

29 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: update linktrail from mw (update from compat) ...................................................................... update linktrail from mw (update from compat) Change-Id: I7e46868aab1a3d76caef3b9d95bd77e721274221 --- M pywikibot/family.py 1 file changed, 111 insertions(+), 5 deletions(-) Approvals: Legoktm: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/family.py b/pywikibot/family.py index bc5d206..59484c9 100644 --- a/pywikibot/family.py +++ b/pywikibot/family.py @@ -117,28 +117,134 @@ # Note: this is a regular expression. self.linktrails = { '_default': u'[a-z]*', + 'ab': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'als': u'[äöüßa-z]*', + 'an': u'[a-záéíóúñ]*', + 'ar': u'[a-zء-ي]*', + 'arz': u'[a-zء-ي]*', + 'av': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'ay': u'[a-záéíóúñ]*', + 'bar': u'[äöüßa-z]*', + 'be': u'[абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]*', + 'be-x-old': u'[абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]*', + 'bg': u'[a-zабвгдежзийклмнопрстуфхцчшщъыьэюя]*', + 'bm': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'bs': u'[a-zćčžšđž]*', + 'bxr': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', 'ca': u'[a-zàèéíòóúç·ïü]*', + 'cbk-zam': u'[a-záéíóúñ]*', + 'ce': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'crh': u'[a-zâçğıñöşüа-яё“»]*', 'cs': u'[a-záčďéěíňóřšťúůýž]*', - 'de': u'[a-zäöüß]*', + 'csb': u'[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*', + 'cu': u'[a-zабвгдеєжѕзїіıићклмнопсстѹфхѡѿцчшщъыьѣюѥѧѩѫѭѯѱѳѷѵґѓђёјйљњќуўџэ҄я“»]*', + 'cv': u'[a-zа-яĕçăӳ"»]*', + 'cy': u'[àáâèéêìíîïòóôûŵŷa-z]*', 'da': u'[a-zæøå]*', + 'de': u'[a-zäöüß]*', + 'dsb': u'[äöüßa-z]*', + 'el': u'[a-zαβγδεζηθικλμνξοπρστυφχψωςΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίόύώϊϋΐΰΆΈΉΊΌΎΏΪΫ]*', + 'eml': u'[a-zàéèíîìóòúù]*', 'es': u'[a-záéíóúñ]*', - 'fa': u'[a-zابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*', + 'et': u'[äöõšüža-z]*', + 'fa': u'[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*', + 'ff': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', 'fi': u'[a-zäö]*', + 'fiu-vro': u'[äöõšüža-z]*', + 'fo': u'[áðíóúýæøa-z]*', 'fr': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'frp': u'[a-zàâçéèêîœôû·’æäåāăëēïīòöōùü‘]*', 'frr': u'[a-zäöüßåāđē]*', + 'fur': u'[a-zàéèíîìóòúù]*', + 'fy': u'[a-zàáèéìíòóùúâêîôûäëïöü]*', + 'gag': u'[a-zÇĞçğİıÖöŞşÜüÂâÎîÛû]*', + 'gl': u'[áâãàéêẽçíòóôõq̃úüűũa-z]*', + 'glk': u'[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*', + 'gn': u'[a-záéíóúñ]*', 'he': u'[a-zא-ת]*', + 'hr': u'[čšžćđßa-z]*', + 'hsb': u'[äöüßa-z]*', + 'ht': u'[a-zàèòÀÈÒ]*', 'hu': u'[a-záéíóúöüőűÁÉÍÓÚÖÜŐŰ]*', + 'hy': u'[a-zաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև«»]*', + 'is': u'[áðéíóúýþæöa-z-–]*', 'it': u'[a-zàéèíîìóòúù]*', 'ka': u'[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*', 'kk': u'[a-zäçéğıïñöşüýʺʹа-яёәғіқңөұүһٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ“»]*', + 'kl': u'[a-zæøå]*', + 'koi': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'krc': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', 'ksh': u'[a-zäöüėëĳßəğåůæœç]*', + 'kv': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'lad': u'[a-záéíóúñ]*', + 'lb': u'[äöüßa-z]*', + 'lbe': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ1“»]*', + 'lez': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'li': u'[a-zäöüïëéèà]*', + 'lij': u'[a-zàéèíîìóòúù]*', + 'lmo': u'[a-zàéèíîìóòúù]*', + 'ln': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'mg': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'mhr': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', 'mk': u'[a-zабвгдѓежзѕијклљмнњопрстќуфхцчџш]*', - 'nl': u'[a-zäöüïëéèàë]*', + 'mn': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя“»]*', + 'mr': u'[ऀ-ॣॱ-ॿ‍]*', + 'mrj': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'mwl': u'[áâãàéêẽçíòóôõq̃úüűũa-z]*', + 'myv': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'mzn': u'[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*', + 'nah': u'[a-záéíóúñ]*', + 'nap': u'[a-zàéèíîìóòúù]*', + 'nds': u'[äöüßa-z]*', + 'nds-nl': u'[a-zäöüïëéèà]*', + 'nl': u'[a-zäöüïëéèà]*', + 'nn': u'[æøåa-z]*', + 'no': u'[æøåa-z]*', + 'oc': u'[a-zàâçéèêîôû]*', + 'pa': u'[ਁਂਃਅਆਇਈਉਊਏਐਓਔਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਲ਼ਵਸ਼ਸਹ਼ਾਿੀੁੂੇੈੋੌ੍ਖ਼ਗ਼ਜ਼ੜਫ਼ੰੱੲੳa-z]*', + 'pcd': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'pdc': u'[äöüßa-z]*', + 'pfl': u'[äöüßa-z]*', 'pl': u'[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*', - 'pt': u'[a-záâàãéêíóôõúüç]*', + 'pms': u'[a-zàéèíîìóòúù]*', + 'pt': u'[a-záâãàéêẽçíòóôõq̃úüűũ]*', + 'qu': u'[a-záéíóúñ]*', + 'rmy': u'[a-zăâîşţșțĂÂÎŞŢȘȚ]*', 'ro': u'[a-zăâîşţșțĂÂÎŞŢȘȚ]*', - 'ru': u'[a-zа-я]*', + 'ru': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'rue': u'[a-zабвгґдеєжзиіїйклмнопрстуфхцчшщьєюяёъы“»]*', + 'sah': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'scn': u'[a-zàéèíîìóòúù]*', + 'sg': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'sh': u'[a-zčćđžš]*', 'sk': u'[a-záäčďéíľĺňóôŕšťúýž]*', + 'sl': u'[a-zčćđžš]*', + 'sr': u'[abvgdđežzijklljmnnjoprstćufhcčdžšабвгдђежзијклљмнњопрстћуфхцчџш]*', + 'srn': u'[a-zäöüïëéèà]*', + 'stq': u'[äöüßa-z]*', + 'sv': u'[a-zåäöéÅÄÖÉ]*', + 'szl': u'[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*', + 'ta': u'[஀-௿]*', + 'te': u'[ఁ-౯]*', + 'tg': u'[a-zабвгдеёжзийклмнопрстуфхчшъэюяғӣқўҳҷцщыь]*', + 'tk': u'[a-zÄäÇçĞğŇňÖöŞşÜüÝýŽž]*', + 'tr': u'[a-zÇĞçğİıÖöŞşÜüÂâÎîÛû]*', + 'tt': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӘәӨөҮүҖҗҢңҺһ]*', + 'ty': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'tyv': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'udm': u'[a-zа-яёӝӟӥӧӵ“»]*', + 'uk': u'[a-zабвгґдеєжзиіїйклмнопрстуфхцчшщьєюяёъы“»]*', + 'uz': u'[a-zʻʼ“»]*', + 'vec': u'[a-zàéèíîìóòúù]*', + 'vep': u'[äöõšüža-z]*', + 'vi': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'vls': u'[a-zäöüïëéèà]*', + 'wa': u'[a-zåâêîôûçéè]*', + 'wo': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'xal': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'xmf': u'[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*', + 'yi': u'[a-zא-ת]*', + 'zea': u'[a-zäöüïëéèà]*', } # Wikimedia wikis all use "bodyContent" as the id of the <div> -- To view, visit https://gerrit.wikimedia.org/r/86392 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I7e46868aab1a3d76caef3b9d95bd77e721274221 Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] update version handling from core: - change (pywikibot/compat)
by jenkins-bot (Code Review) 29 Sep '13

29 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: update version handling from core: ...................................................................... update version handling from core: - print release from pywikibot __init__ file (1.0b1) update __init__ file from core - utf-8 coding - copyright mark - release string - version hash string Change-Id: I61758b8c8f14417be012311f496dbaa9a278f140 --- M pywikibot/__init__.py M version.py 2 files changed, 18 insertions(+), 4 deletions(-) Approvals: Legoktm: Looks good to me, approved DrTrigon: Checked; Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py index 5daf5a2..bf163d9 100644 --- a/pywikibot/__init__.py +++ b/pywikibot/__init__.py @@ -1,5 +1,17 @@ -# wikipedia.py will monkey-patch this module to look completely -# alike wikipedia itself... +# -*- coding: utf-8 -*- +""" +The initialization file for the Pywikibot framework. + +wikipedia.py will monkey-patch this module to look completely +alike wikipedia itself... +""" +# +# (C) Pywikipedia bot team, 2010-2013 +# +# Distributed under the terms of the MIT license. +# +__release__ = '1.0b1' +__version__ = '$Id$' try: import wikipedia diff --git a/version.py b/version.py index 338fac5..9832bd4 100644 --- a/version.py +++ b/version.py @@ -10,12 +10,14 @@ __version__ = '$Id$' import sys +import pywikibot from pywikibot.version import * import config if __name__ == '__main__': - print 'Pywikipedia %s' % getversion() - print 'Python %s' % sys.version + print 'Pywikibot %s' % getversion() + print 'Release version: %s' % pywikibot.__release__ + print 'Python: %s' % sys.version print 'config-settings:' print 'use_api =', config.use_api print 'use_api_login =', config.use_api_login -- To view, visit https://gerrit.wikimedia.org/r/86380 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I61758b8c8f14417be012311f496dbaa9a278f140 Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: DrTrigon <dr.trigon(a)surfeu.ch> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] update linktrail from mw - change (pywikibot/compat)
by jenkins-bot (Code Review) 29 Sep '13

29 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: update linktrail from mw ...................................................................... update linktrail from mw Change-Id: I81d29851d0f05674d0cce03d9ad045d339fa903d --- M family.py 1 file changed, 111 insertions(+), 5 deletions(-) Approvals: Legoktm: Looks good to me, approved Adamw: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/family.py b/family.py index ebafced..b672cdf 100644 --- a/family.py +++ b/family.py @@ -3288,28 +3288,134 @@ # Note: this is a regular expression. self.linktrails = { '_default': u'[a-z]*', + 'ab': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'als': u'[äöüßa-z]*', + 'an': u'[a-záéíóúñ]*', + 'ar': u'[a-zء-ي]*', + 'arz': u'[a-zء-ي]*', + 'av': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'ay': u'[a-záéíóúñ]*', + 'bar': u'[äöüßa-z]*', + 'be': u'[абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]*', + 'be-x-old': u'[абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]*', + 'bg': u'[a-zабвгдежзийклмнопрстуфхцчшщъыьэюя]*', + 'bm': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'bs': u'[a-zćčžšđž]*', + 'bxr': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', 'ca': u'[a-zàèéíòóúç·ïü]*', + 'cbk-zam': u'[a-záéíóúñ]*', + 'ce': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'crh': u'[a-zâçğıñöşüа-яё“»]*', 'cs': u'[a-záčďéěíňóřšťúůýž]*', - 'de': u'[a-zäöüß]*', + 'csb': u'[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*', + 'cu': u'[a-zабвгдеєжѕзїіıићклмнопсстѹфхѡѿцчшщъыьѣюѥѧѩѫѭѯѱѳѷѵґѓђёјйљњќуўџэ҄я“»]*', + 'cv': u'[a-zа-яĕçăӳ"»]*', + 'cy': u'[àáâèéêìíîïòóôûŵŷa-z]*', 'da': u'[a-zæøå]*', + 'de': u'[a-zäöüß]*', + 'dsb': u'[äöüßa-z]*', + 'el': u'[a-zαβγδεζηθικλμνξοπρστυφχψωςΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίόύώϊϋΐΰΆΈΉΊΌΎΏΪΫ]*', + 'eml': u'[a-zàéèíîìóòúù]*', 'es': u'[a-záéíóúñ]*', - 'fa': u'[a-zابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*', + 'et': u'[äöõšüža-z]*', + 'fa': u'[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*', + 'ff': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', 'fi': u'[a-zäö]*', + 'fiu-vro': u'[äöõšüža-z]*', + 'fo': u'[áðíóúýæøa-z]*', 'fr': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'frp': u'[a-zàâçéèêîœôû·’æäåāăëēïīòöōùü‘]*', 'frr': u'[a-zäöüßåāđē]*', + 'fur': u'[a-zàéèíîìóòúù]*', + 'fy': u'[a-zàáèéìíòóùúâêîôûäëïöü]*', + 'gag': u'[a-zÇĞçğİıÖöŞşÜüÂâÎîÛû]*', + 'gl': u'[áâãàéêẽçíòóôõq̃úüűũa-z]*', + 'glk': u'[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*', + 'gn': u'[a-záéíóúñ]*', 'he': u'[a-zא-ת]*', + 'hr': u'[čšžćđßa-z]*', + 'hsb': u'[äöüßa-z]*', + 'ht': u'[a-zàèòÀÈÒ]*', 'hu': u'[a-záéíóúöüőűÁÉÍÓÚÖÜŐŰ]*', + 'hy': u'[a-zաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև«»]*', + 'is': u'[áðéíóúýþæöa-z-–]*', 'it': u'[a-zàéèíîìóòúù]*', 'ka': u'[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*', 'kk': u'[a-zäçéğıïñöşüýʺʹа-яёәғіқңөұүһٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ“»]*', + 'kl': u'[a-zæøå]*', + 'koi': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'krc': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', 'ksh': u'[a-zäöüėëĳßəğåůæœç]*', + 'kv': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'lad': u'[a-záéíóúñ]*', + 'lb': u'[äöüßa-z]*', + 'lbe': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ1“»]*', + 'lez': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'li': u'[a-zäöüïëéèà]*', + 'lij': u'[a-zàéèíîìóòúù]*', + 'lmo': u'[a-zàéèíîìóòúù]*', + 'ln': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'mg': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'mhr': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', 'mk': u'[a-zабвгдѓежзѕијклљмнњопрстќуфхцчџш]*', - 'nl': u'[a-zäöüïëéèàë]*', + 'mn': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя“»]*', + 'mr': u'[ऀ-ॣॱ-ॿ‍]*', + 'mrj': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'mwl': u'[áâãàéêẽçíòóôõq̃úüűũa-z]*', + 'myv': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'mzn': u'[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*', + 'nah': u'[a-záéíóúñ]*', + 'nap': u'[a-zàéèíîìóòúù]*', + 'nds': u'[äöüßa-z]*', + 'nds-nl': u'[a-zäöüïëéèà]*', + 'nl': u'[a-zäöüïëéèà]*', + 'nn': u'[æøåa-z]*', + 'no': u'[æøåa-z]*', + 'oc': u'[a-zàâçéèêîôû]*', + 'pa': u'[ਁਂਃਅਆਇਈਉਊਏਐਓਔਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਲ਼ਵਸ਼ਸਹ਼ਾਿੀੁੂੇੈੋੌ੍ਖ਼ਗ਼ਜ਼ੜਫ਼ੰੱੲੳa-z]*', + 'pcd': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'pdc': u'[äöüßa-z]*', + 'pfl': u'[äöüßa-z]*', 'pl': u'[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*', - 'pt': u'[a-záâàãéêíóôõúüç]*', + 'pms': u'[a-zàéèíîìóòúù]*', + 'pt': u'[a-záâãàéêẽçíòóôõq̃úüűũ]*', + 'qu': u'[a-záéíóúñ]*', + 'rmy': u'[a-zăâîşţșțĂÂÎŞŢȘȚ]*', 'ro': u'[a-zăâîşţșțĂÂÎŞŢȘȚ]*', - 'ru': u'[a-zа-я]*', + 'ru': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'rue': u'[a-zабвгґдеєжзиіїйклмнопрстуфхцчшщьєюяёъы“»]*', + 'sah': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'scn': u'[a-zàéèíîìóòúù]*', + 'sg': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'sh': u'[a-zčćđžš]*', 'sk': u'[a-záäčďéíľĺňóôŕšťúýž]*', + 'sl': u'[a-zčćđžš]*', + 'sr': u'[abvgdđežzijklljmnnjoprstćufhcčdžšабвгдђежзијклљмнњопрстћуфхцчџш]*', + 'srn': u'[a-zäöüïëéèà]*', + 'stq': u'[äöüßa-z]*', + 'sv': u'[a-zåäöéÅÄÖÉ]*', + 'szl': u'[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*', + 'ta': u'[஀-௿]*', + 'te': u'[ఁ-౯]*', + 'tg': u'[a-zабвгдеёжзийклмнопрстуфхчшъэюяғӣқўҳҷцщыь]*', + 'tk': u'[a-zÄäÇçĞğŇňÖöŞşÜüÝýŽž]*', + 'tr': u'[a-zÇĞçğİıÖöŞşÜüÂâÎîÛû]*', + 'tt': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӘәӨөҮүҖҗҢңҺһ]*', + 'ty': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'tyv': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'udm': u'[a-zа-яёӝӟӥӧӵ“»]*', + 'uk': u'[a-zабвгґдеєжзиіїйклмнопрстуфхцчшщьєюяёъы“»]*', + 'uz': u'[a-zʻʼ“»]*', + 'vec': u'[a-zàéèíîìóòúù]*', + 'vep': u'[äöõšüža-z]*', + 'vi': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'vls': u'[a-zäöüïëéèà]*', + 'wa': u'[a-zåâêîôûçéè]*', + 'wo': u'[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*', + 'xal': u'[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*', + 'xmf': u'[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*', + 'yi': u'[a-zא-ת]*', + 'zea': u'[a-zäöüïëéèà]*', } # Wikimedia wikis all use "bodyContent" as the id of the <div> -- To view, visit https://gerrit.wikimedia.org/r/86391 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I81d29851d0f05674d0cce03d9ad045d339fa903d Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Adamw <awight(a)wikimedia.org> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Don't use set literals in tests - change (pywikibot/core)
by jenkins-bot (Code Review) 29 Sep '13

29 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: Don't use set literals in tests ...................................................................... Don't use set literals in tests Set literals are not available in Python 2.6. This doesn't change the fact that the tests don't work on 2.6 but at least it stops the message about illegal syntax popping up every time one installs the module. Change-Id: I9b8674af99bc3acd418b2a378679e7254e1dee79 --- M tests/page_tests.py 1 file changed, 2 insertions(+), 2 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/tests/page_tests.py b/tests/page_tests.py index 48f40fe..5a60678 100644 --- a/tests/page_tests.py +++ b/tests/page_tests.py @@ -253,9 +253,9 @@ if not site.hasExtension('Disambiguator', False): raise unittest.SkipTest('Disambiguator extension not loaded on test site') pg = pywikibot.Page(site, 'Random') - pg._pageprops = {'disambiguation', ''} + pg._pageprops = set(['disambiguation', '']) self.assertTrue(pg.isDisambig()) - pg._pageprops = {} + pg._pageprops = set() self.assertFalse(pg.isDisambig()) def testReferences(self): -- To view, visit https://gerrit.wikimedia.org/r/86433 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9b8674af99bc3acd418b2a378679e7254e1dee79 Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Mineo <themineo(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] setup.py: Only run generate_user_files.py after installing t... - change (pywikibot/core)
by jenkins-bot (Code Review) 29 Sep '13

29 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: setup.py: Only run generate_user_files.py after installing the package ...................................................................... setup.py: Only run generate_user_files.py after installing the package Otherwise it will even be run after `python setup.py --help` and other commands where it doesn't make sense. Change-Id: Idd1794481b4b57c001fed8df0bf8178bb5eccc30 --- M setup.py 1 file changed, 18 insertions(+), 8 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/setup.py b/setup.py index 912c242..a38cfe8 100644 --- a/setup.py +++ b/setup.py @@ -15,11 +15,25 @@ use_setuptools() from setuptools import setup, find_packages +from setuptools.command import install if sys.version_info[0] != 2: raise RuntimeError("ERROR: Pywikipediabot only runs under Python 2") elif sys.version_info[1] < 6: raise RuntimeError("ERROR: Pywikipediabot only runs under Python 2.6 or higher") + + +class pwb_install(install.install): + """ + Setuptools' install command subclassed to automatically call + `generate_user_files.py` after installing the package. + """ + def run(self): + install.install.run(self) + import subprocess + python = sys.executable + python = python.replace("pythonw.exe", "python.exe") # for Windows + subprocess.call([python, "generate_user_files.py"]) setup( name='Pywikipediabot', @@ -42,12 +56,8 @@ 'Environment :: Console', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7' - ] + ], + cmdclass={ + 'install': pwb_install + } ) - -# automatically launch generate_user_files.py - -import subprocess -python = sys.executable -python = python.replace("pythonw.exe", "python.exe") # for Windows -ignore = subprocess.call([python, "generate_user_files.py"]) -- To view, visit https://gerrit.wikimedia.org/r/86435 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Idd1794481b4b57c001fed8df0bf8178bb5eccc30 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Mineo <themineo(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Convert from Distribute to Setuptools - change (pywikibot/core)
by jenkins-bot (Code Review) 28 Sep '13

28 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: Convert from Distribute to Setuptools ...................................................................... Convert from Distribute to Setuptools Change-Id: I9160393aec062981546f5a277dd9526a7a43ff5c --- D distribute_setup.py A ez_setup.py M setup.py 3 files changed, 371 insertions(+), 478 deletions(-) Approvals: Ladsgroup: Looks good to me, approved Hashar: Checked; Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/distribute_setup.py b/distribute_setup.py deleted file mode 100644 index 0021336..0000000 --- a/distribute_setup.py +++ /dev/null @@ -1,477 +0,0 @@ -#!python -"""Bootstrap distribute installation - -If you want to use setuptools in your package's setup.py, just include this -file in the same directory with it, and add this to the top of your setup.py:: - - from distribute_setup import use_setuptools - use_setuptools() - -If you want to require a specific version of setuptools, set a download -mirror, or use an alternate download directory, you can do so by supplying -the appropriate options to ``use_setuptools()``. - -This file can also be run as a script to install or upgrade setuptools. -""" -import os -import sys -import time -import fnmatch -import tempfile -import tarfile -from distutils import log - -try: - from site import USER_SITE -except ImportError: - USER_SITE = None - -try: - import subprocess - - def _python_cmd(*args): - args = (sys.executable,) + args - return subprocess.call(args) == 0 - -except ImportError: - # will be used for python 2.3 - def _python_cmd(*args): - args = (sys.executable,) + args - # quoting arguments if windows - if sys.platform == 'win32': - def quote(arg): - if ' ' in arg: - return '"%s"' % arg - return arg - args = [quote(arg) for arg in args] - return os.spawnl(os.P_WAIT, sys.executable, *args) == 0 - -DEFAULT_VERSION = "0.6.10" -DEFAULT_URL = "http://pypi.python.org/packages/source/d/distribute/" -SETUPTOOLS_FAKED_VERSION = "0.6c11" - -SETUPTOOLS_PKG_INFO = """\ -Metadata-Version: 1.0 -Name: setuptools -Version: %s -Summary: xxxx -Home-page: xxx -Author: xxx -Author-email: xxx -License: xxx -Description: xxx -""" % SETUPTOOLS_FAKED_VERSION - - -def _install(tarball): - # extracting the tarball - tmpdir = tempfile.mkdtemp() - log.warn('Extracting in %s', tmpdir) - old_wd = os.getcwd() - try: - os.chdir(tmpdir) - tar = tarfile.open(tarball) - _extractall(tar) - tar.close() - - # going in the directory - subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) - os.chdir(subdir) - log.warn('Now working in %s', subdir) - - # installing - log.warn('Installing Distribute') - if not _python_cmd('setup.py', 'install'): - log.warn('Something went wrong during the installation.') - log.warn('See the error message above.') - finally: - os.chdir(old_wd) - - -def _build_egg(egg, tarball, to_dir): - # extracting the tarball - tmpdir = tempfile.mkdtemp() - log.warn('Extracting in %s', tmpdir) - old_wd = os.getcwd() - try: - os.chdir(tmpdir) - tar = tarfile.open(tarball) - _extractall(tar) - tar.close() - - # going in the directory - subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) - os.chdir(subdir) - log.warn('Now working in %s', subdir) - - # building an egg - log.warn('Building a Distribute egg in %s', to_dir) - _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) - - finally: - os.chdir(old_wd) - # returning the result - log.warn(egg) - if not os.path.exists(egg): - raise IOError('Could not build the egg.') - - -def _do_download(version, download_base, to_dir, download_delay): - egg = os.path.join(to_dir, 'distribute-%s-py%d.%d.egg' - % (version, sys.version_info[0], sys.version_info[1])) - if not os.path.exists(egg): - tarball = download_setuptools(version, download_base, - to_dir, download_delay) - _build_egg(egg, tarball, to_dir) - sys.path.insert(0, egg) - import setuptools - setuptools.bootstrap_install_from = egg - - -def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, - to_dir=os.curdir, download_delay=15, no_fake=True): - # making sure we use the absolute path - to_dir = os.path.abspath(to_dir) - was_imported = 'pkg_resources' in sys.modules or \ - 'setuptools' in sys.modules - try: - try: - import pkg_resources - if not hasattr(pkg_resources, '_distribute'): - if not no_fake: - _fake_setuptools() - raise ImportError - except ImportError: - return _do_download(version, download_base, to_dir, download_delay) - try: - pkg_resources.require("distribute>="+version) - return - except pkg_resources.VersionConflict: - e = sys.exc_info()[1] - if was_imported: - sys.stderr.write( - "The required version of distribute (>=%s) is not available,\n" - "and can't be installed while this script is running. Please\n" - "install a more recent version first, using\n" - "'easy_install -U distribute'." - "\n\n(Currently using %r)\n" % (version, e.args[0])) - sys.exit(2) - else: - del pkg_resources, sys.modules['pkg_resources'] # reload ok - return _do_download(version, download_base, to_dir, - download_delay) - except pkg_resources.DistributionNotFound: - return _do_download(version, download_base, to_dir, - download_delay) - finally: - if not no_fake: - _create_fake_setuptools_pkg_info(to_dir) - -def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, - to_dir=os.curdir, delay=15): - """Download distribute from a specified location and return its filename - - `version` should be a valid distribute version number that is available - as an egg for download under the `download_base` URL (which should end - with a '/'). `to_dir` is the directory where the egg will be downloaded. - `delay` is the number of seconds to pause before an actual download - attempt. - """ - # making sure we use the absolute path - to_dir = os.path.abspath(to_dir) - try: - from urllib.request import urlopen - except ImportError: - from urllib2 import urlopen - tgz_name = "distribute-%s.tar.gz" % version - url = download_base + tgz_name - saveto = os.path.join(to_dir, tgz_name) - src = dst = None - if not os.path.exists(saveto): # Avoid repeated downloads - try: - log.warn("Downloading %s", url) - src = urlopen(url) - # Read/write all in one block, so we don't create a corrupt file - # if the download is interrupted. - data = src.read() - dst = open(saveto, "wb") - dst.write(data) - finally: - if src: - src.close() - if dst: - dst.close() - return os.path.realpath(saveto) - - -def _patch_file(path, content): - """Will backup the file then patch it""" - existing_content = open(path).read() - if existing_content == content: - # already patched - log.warn('Already patched.') - return False - log.warn('Patching...') - _rename_path(path) - f = open(path, 'w') - try: - f.write(content) - finally: - f.close() - return True - - -def _same_content(path, content): - return open(path).read() == content - -def _no_sandbox(function): - def __no_sandbox(*args, **kw): - try: - from setuptools.sandbox import DirectorySandbox - def violation(*args): - pass - DirectorySandbox._old = DirectorySandbox._violation - DirectorySandbox._violation = violation - patched = True - except ImportError: - patched = False - - try: - return function(*args, **kw) - finally: - if patched: - DirectorySandbox._violation = DirectorySandbox._old - del DirectorySandbox._old - - return __no_sandbox - -@_no_sandbox -def _rename_path(path): - new_name = path + '.OLD.%s' % time.time() - log.warn('Renaming %s into %s', path, new_name) - os.rename(path, new_name) - return new_name - -def _remove_flat_installation(placeholder): - if not os.path.isdir(placeholder): - log.warn('Unkown installation at %s', placeholder) - return False - found = False - for file in os.listdir(placeholder): - if fnmatch.fnmatch(file, 'setuptools*.egg-info'): - found = True - break - if not found: - log.warn('Could not locate setuptools*.egg-info') - return - - log.warn('Removing elements out of the way...') - pkg_info = os.path.join(placeholder, file) - if os.path.isdir(pkg_info): - patched = _patch_egg_dir(pkg_info) - else: - patched = _patch_file(pkg_info, SETUPTOOLS_PKG_INFO) - - if not patched: - log.warn('%s already patched.', pkg_info) - return False - # now let's move the files out of the way - for element in ('setuptools', 'pkg_resources.py', 'site.py'): - element = os.path.join(placeholder, element) - if os.path.exists(element): - _rename_path(element) - else: - log.warn('Could not find the %s element of the ' - 'Setuptools distribution', element) - return True - - -def _after_install(dist): - log.warn('After install bootstrap.') - placeholder = dist.get_command_obj('install').install_purelib - _create_fake_setuptools_pkg_info(placeholder) - -@_no_sandbox -def _create_fake_setuptools_pkg_info(placeholder): - if not placeholder or not os.path.exists(placeholder): - log.warn('Could not find the install location') - return - pyver = '%s.%s' % (sys.version_info[0], sys.version_info[1]) - setuptools_file = 'setuptools-%s-py%s.egg-info' % \ - (SETUPTOOLS_FAKED_VERSION, pyver) - pkg_info = os.path.join(placeholder, setuptools_file) - if os.path.exists(pkg_info): - log.warn('%s already exists', pkg_info) - return - - log.warn('Creating %s', pkg_info) - f = open(pkg_info, 'w') - try: - f.write(SETUPTOOLS_PKG_INFO) - finally: - f.close() - - pth_file = os.path.join(placeholder, 'setuptools.pth') - log.warn('Creating %s', pth_file) - f = open(pth_file, 'w') - try: - f.write(os.path.join(os.curdir, setuptools_file)) - finally: - f.close() - -def _patch_egg_dir(path): - # let's check if it's already patched - pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO') - if os.path.exists(pkg_info): - if _same_content(pkg_info, SETUPTOOLS_PKG_INFO): - log.warn('%s already patched.', pkg_info) - return False - _rename_path(path) - os.mkdir(path) - os.mkdir(os.path.join(path, 'EGG-INFO')) - pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO') - f = open(pkg_info, 'w') - try: - f.write(SETUPTOOLS_PKG_INFO) - finally: - f.close() - return True - - -def _before_install(): - log.warn('Before install bootstrap.') - _fake_setuptools() - - -def _under_prefix(location): - if 'install' not in sys.argv: - return True - args = sys.argv[sys.argv.index('install')+1:] - for index, arg in enumerate(args): - for option in ('--root', '--prefix'): - if arg.startswith('%s=' % option): - top_dir = arg.split('root=')[-1] - return location.startswith(top_dir) - elif arg == option: - if len(args) > index: - top_dir = args[index+1] - return location.startswith(top_dir) - elif option == '--user' and USER_SITE is not None: - return location.startswith(USER_SITE) - return True - - -def _fake_setuptools(): - log.warn('Scanning installed packages') - try: - import pkg_resources - except ImportError: - # we're cool - log.warn('Setuptools or Distribute does not seem to be installed.') - return - ws = pkg_resources.working_set - try: - setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools', - replacement=False)) - except TypeError: - # old distribute API - setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools')) - - if setuptools_dist is None: - log.warn('No setuptools distribution found') - return - # detecting if it was already faked - setuptools_location = setuptools_dist.location - log.warn('Setuptools installation detected at %s', setuptools_location) - - # if --root or --preix was provided, and if - # setuptools is not located in them, we don't patch it - if not _under_prefix(setuptools_location): - log.warn('Not patching, --root or --prefix is installing Distribute' - ' in another location') - return - - # let's see if its an egg - if not setuptools_location.endswith('.egg'): - log.warn('Non-egg installation') - res = _remove_flat_installation(setuptools_location) - if not res: - return - else: - log.warn('Egg installation') - pkg_info = os.path.join(setuptools_location, 'EGG-INFO', 'PKG-INFO') - if (os.path.exists(pkg_info) and - _same_content(pkg_info, SETUPTOOLS_PKG_INFO)): - log.warn('Already patched.') - return - log.warn('Patching...') - # let's create a fake egg replacing setuptools one - res = _patch_egg_dir(setuptools_location) - if not res: - return - log.warn('Patched done.') - _relaunch() - - -def _relaunch(): - log.warn('Relaunching...') - # we have to relaunch the process - args = [sys.executable] + sys.argv - sys.exit(subprocess.call(args)) - - -def _extractall(self, path=".", members=None): - """Extract all members from the archive to the current working - directory and set owner, modification time and permissions on - directories afterwards. `path' specifies a different directory - to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). - """ - import copy - import operator - from tarfile import ExtractError - directories = [] - - if members is None: - members = self - - for tarinfo in members: - if tarinfo.isdir(): - # Extract directories with a safe mode. - directories.append(tarinfo) - tarinfo = copy.copy(tarinfo) - tarinfo.mode = 448 # decimal for oct 0700 - self.extract(tarinfo, path) - - # Reverse sort directories. - if sys.version_info < (2, 4): - def sorter(dir1, dir2): - return cmp(dir1.name, dir2.name) - directories.sort(sorter) - directories.reverse() - else: - directories.sort(key=operator.attrgetter('name'), reverse=True) - - # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) - try: - self.chown(tarinfo, dirpath) - self.utime(tarinfo, dirpath) - self.chmod(tarinfo, dirpath) - except ExtractError: - e = sys.exc_info()[1] - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - -def main(argv, version=DEFAULT_VERSION): - """Install or upgrade setuptools and EasyInstall""" - tarball = download_setuptools() - _install(tarball) - - -if __name__ == '__main__': - main(sys.argv[1:]) diff --git a/ez_setup.py b/ez_setup.py new file mode 100644 index 0000000..b02f3f1 --- /dev/null +++ b/ez_setup.py @@ -0,0 +1,370 @@ +#!python +"""Bootstrap setuptools installation + +If you want to use setuptools in your package's setup.py, just include this +file in the same directory with it, and add this to the top of your setup.py:: + + from ez_setup import use_setuptools + use_setuptools() + +If you want to require a specific version of setuptools, set a download +mirror, or use an alternate download directory, you can do so by supplying +the appropriate options to ``use_setuptools()``. + +This file can also be run as a script to install or upgrade setuptools. +""" +import os +import shutil +import sys +import tempfile +import tarfile +import optparse +import subprocess +import platform + +from distutils import log + +try: + from site import USER_SITE +except ImportError: + USER_SITE = None + +DEFAULT_VERSION = "1.1.6" +DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/" + +def _python_cmd(*args): + args = (sys.executable,) + args + return subprocess.call(args) == 0 + +def _check_call_py24(cmd, *args, **kwargs): + res = subprocess.call(cmd, *args, **kwargs) + class CalledProcessError(Exception): + pass + if not res == 0: + msg = "Command '%s' return non-zero exit status %d" % (cmd, res) + raise CalledProcessError(msg) +vars(subprocess).setdefault('check_call', _check_call_py24) + +def _install(tarball, install_args=()): + # extracting the tarball + tmpdir = tempfile.mkdtemp() + log.warn('Extracting in %s', tmpdir) + old_wd = os.getcwd() + try: + os.chdir(tmpdir) + tar = tarfile.open(tarball) + _extractall(tar) + tar.close() + + # going in the directory + subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) + os.chdir(subdir) + log.warn('Now working in %s', subdir) + + # installing + log.warn('Installing Setuptools') + if not _python_cmd('setup.py', 'install', *install_args): + log.warn('Something went wrong during the installation.') + log.warn('See the error message above.') + # exitcode will be 2 + return 2 + finally: + os.chdir(old_wd) + shutil.rmtree(tmpdir) + + +def _build_egg(egg, tarball, to_dir): + # extracting the tarball + tmpdir = tempfile.mkdtemp() + log.warn('Extracting in %s', tmpdir) + old_wd = os.getcwd() + try: + os.chdir(tmpdir) + tar = tarfile.open(tarball) + _extractall(tar) + tar.close() + + # going in the directory + subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) + os.chdir(subdir) + log.warn('Now working in %s', subdir) + + # building an egg + log.warn('Building a Setuptools egg in %s', to_dir) + _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) + + finally: + os.chdir(old_wd) + shutil.rmtree(tmpdir) + # returning the result + log.warn(egg) + if not os.path.exists(egg): + raise IOError('Could not build the egg.') + + +def _do_download(version, download_base, to_dir, download_delay): + egg = os.path.join(to_dir, 'setuptools-%s-py%d.%d.egg' + % (version, sys.version_info[0], sys.version_info[1])) + if not os.path.exists(egg): + tarball = download_setuptools(version, download_base, + to_dir, download_delay) + _build_egg(egg, tarball, to_dir) + sys.path.insert(0, egg) + + # Remove previously-imported pkg_resources if present (see + # https://bitbucket.org/pypa/setuptools/pull-request/7/ for details). + if 'pkg_resources' in sys.modules: + del sys.modules['pkg_resources'] + + import setuptools + setuptools.bootstrap_install_from = egg + + +def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, + to_dir=os.curdir, download_delay=15): + # making sure we use the absolute path + to_dir = os.path.abspath(to_dir) + was_imported = 'pkg_resources' in sys.modules or \ + 'setuptools' in sys.modules + try: + import pkg_resources + except ImportError: + return _do_download(version, download_base, to_dir, download_delay) + try: + pkg_resources.require("setuptools>=" + version) + return + except pkg_resources.VersionConflict: + e = sys.exc_info()[1] + if was_imported: + sys.stderr.write( + "The required version of setuptools (>=%s) is not available,\n" + "and can't be installed while this script is running. Please\n" + "install a more recent version first, using\n" + "'easy_install -U setuptools'." + "\n\n(Currently using %r)\n" % (version, e.args[0])) + sys.exit(2) + else: + del pkg_resources, sys.modules['pkg_resources'] # reload ok + return _do_download(version, download_base, to_dir, + download_delay) + except pkg_resources.DistributionNotFound: + return _do_download(version, download_base, to_dir, + download_delay) + +def download_file_powershell(url, target): + """ + Download the file at url to target using Powershell (which will validate + trust). Raise an exception if the command cannot complete. + """ + target = os.path.abspath(target) + cmd = [ + 'powershell', + '-Command', + "(new-object System.Net.WebClient).DownloadFile(%(url)r, %(target)r)" % vars(), + ] + subprocess.check_call(cmd) + +def has_powershell(): + if platform.system() != 'Windows': + return False + cmd = ['powershell', '-Command', 'echo test'] + devnull = open(os.path.devnull, 'wb') + try: + try: + subprocess.check_call(cmd, stdout=devnull, stderr=devnull) + except: + return False + finally: + devnull.close() + return True + +download_file_powershell.viable = has_powershell + +def download_file_curl(url, target): + cmd = ['curl', url, '--silent', '--output', target] + subprocess.check_call(cmd) + +def has_curl(): + cmd = ['curl', '--version'] + devnull = open(os.path.devnull, 'wb') + try: + try: + subprocess.check_call(cmd, stdout=devnull, stderr=devnull) + except: + return False + finally: + devnull.close() + return True + +download_file_curl.viable = has_curl + +def download_file_wget(url, target): + cmd = ['wget', url, '--quiet', '--output-document', target] + subprocess.check_call(cmd) + +def has_wget(): + cmd = ['wget', '--version'] + devnull = open(os.path.devnull, 'wb') + try: + try: + subprocess.check_call(cmd, stdout=devnull, stderr=devnull) + except: + return False + finally: + devnull.close() + return True + +download_file_wget.viable = has_wget + +def download_file_insecure(url, target): + """ + Use Python to download the file, even though it cannot authenticate the + connection. + """ + try: + from urllib.request import urlopen + except ImportError: + from urllib2 import urlopen + src = dst = None + try: + src = urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = src.read() + dst = open(target, "wb") + dst.write(data) + finally: + if src: + src.close() + if dst: + dst.close() + +download_file_insecure.viable = lambda: True + +def get_best_downloader(): + downloaders = [ + download_file_powershell, + download_file_curl, + download_file_wget, + download_file_insecure, + ] + + for dl in downloaders: + if dl.viable(): + return dl + +def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, + to_dir=os.curdir, delay=15, + downloader_factory=get_best_downloader): + """Download setuptools from a specified location and return its filename + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end + with a '/'). `to_dir` is the directory where the egg will be downloaded. + `delay` is the number of seconds to pause before an actual download + attempt. + + ``downloader_factory`` should be a function taking no arguments and + returning a function for downloading a URL to a target. + """ + # making sure we use the absolute path + to_dir = os.path.abspath(to_dir) + tgz_name = "setuptools-%s.tar.gz" % version + url = download_base + tgz_name + saveto = os.path.join(to_dir, tgz_name) + if not os.path.exists(saveto): # Avoid repeated downloads + log.warn("Downloading %s", url) + downloader = downloader_factory() + downloader(url, saveto) + return os.path.realpath(saveto) + + +def _extractall(self, path=".", members=None): + """Extract all members from the archive to the current working + directory and set owner, modification time and permissions on + directories afterwards. `path' specifies a different directory + to extract to. `members' is optional and must be a subset of the + list returned by getmembers(). + """ + import copy + import operator + from tarfile import ExtractError + directories = [] + + if members is None: + members = self + + for tarinfo in members: + if tarinfo.isdir(): + # Extract directories with a safe mode. + directories.append(tarinfo) + tarinfo = copy.copy(tarinfo) + tarinfo.mode = 448 # decimal for oct 0700 + self.extract(tarinfo, path) + + # Reverse sort directories. + if sys.version_info < (2, 4): + def sorter(dir1, dir2): + return cmp(dir1.name, dir2.name) + directories.sort(sorter) + directories.reverse() + else: + directories.sort(key=operator.attrgetter('name'), reverse=True) + + # Set correct owner, mtime and filemode on directories. + for tarinfo in directories: + dirpath = os.path.join(path, tarinfo.name) + try: + self.chown(tarinfo, dirpath) + self.utime(tarinfo, dirpath) + self.chmod(tarinfo, dirpath) + except ExtractError: + e = sys.exc_info()[1] + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + +def _build_install_args(options): + """ + Build the arguments to 'python setup.py install' on the setuptools package + """ + install_args = [] + if options.user_install: + if sys.version_info < (2, 6): + log.warn("--user requires Python 2.6 or later") + raise SystemExit(1) + install_args.append('--user') + return install_args + +def _parse_args(): + """ + Parse the command line for options + """ + parser = optparse.OptionParser() + parser.add_option( + '--user', dest='user_install', action='store_true', default=False, + help='install in user site package (requires Python 2.6 or later)') + parser.add_option( + '--download-base', dest='download_base', metavar="URL", + default=DEFAULT_URL, + help='alternative URL from where to download the setuptools package') + parser.add_option( + '--insecure', dest='downloader_factory', action='store_const', + const=lambda: download_file_insecure, default=get_best_downloader, + help='Use internal, non-validating downloader' + ) + options, args = parser.parse_args() + # positional arguments are ignored + return options + +def main(version=DEFAULT_VERSION): + """Install or upgrade setuptools and EasyInstall""" + options = _parse_args() + tarball = download_setuptools(download_base=options.download_base, + downloader_factory=options.downloader_factory) + return _install(tarball, _build_install_args(options)) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/setup.py b/setup.py index db0ea02..912c242 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ import sys -from distribute_setup import use_setuptools +from ez_setup import use_setuptools use_setuptools() from setuptools import setup, find_packages -- To view, visit https://gerrit.wikimedia.org/r/83839 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9160393aec062981546f5a277dd9526a7a43ff5c Gerrit-PatchSet: 5 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Hashar <hashar(a)free.fr> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Add Wikidata as wikibase repo of Wikimedia Commons - change (pywikibot/compat)
by jenkins-bot (Code Review) 28 Sep '13

28 Sep '13

jenkins-bot has submitted this change and it was merged. Change subject: Add Wikidata as wikibase repo of Wikimedia Commons ...................................................................... Add Wikidata as wikibase repo of Wikimedia Commons Change-Id: I6078b1784b5614676a12489b54da6342ae087003 --- M families/commons_family.py 1 file changed, 3 insertions(+), 0 deletions(-) Approvals: Ladsgroup: Looks good to me, approved jenkins-bot: Verified diff --git a/families/commons_family.py b/families/commons_family.py index 0730e7d..872611e 100644 --- a/families/commons_family.py +++ b/families/commons_family.py @@ -76,3 +76,6 @@ def dbName(self, code): return 'commonswiki_p' + + def shared_data_repository(self, code, transcluded=False): + return ('wikidata', 'wikidata') -- To view, visit https://gerrit.wikimedia.org/r/86396 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I6078b1784b5614676a12489b54da6342ae087003 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com> Gerrit-Reviewer: jenkins-bot

1 0

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

Pywikibot-commits September 2013