Revision: 8670 Author: multichill Date: 2010-10-20 15:05:53 +0000 (Wed, 20 Oct 2010)
Log Message: ----------- Merged some code form https://fisheye.toolserver.org/browse/multichill/bot/geograph/geograph_lib.p... to be able to categorize images based on the provided location.
Modified Paths: -------------- trunk/pywikipedia/imagerecat.py
Modified: trunk/pywikipedia/imagerecat.py =================================================================== --- trunk/pywikipedia/imagerecat.py 2010-10-20 14:41:13 UTC (rev 8669) +++ trunk/pywikipedia/imagerecat.py 2010-10-20 15:05:53 UTC (rev 8670) @@ -40,6 +40,7 @@ import wikipedia as pywikibot import config import pagegenerators +import xml.etree.ElementTree
category_blacklist = [] countries = [] @@ -167,6 +168,82 @@ pywikibot.output(lang + project + article) return (commonshelperCats, usage, galleries)
+def getOpenStreetMapCats(latitude, longitude): + ''' + Get a list of location categories based on the OSM nomatim tool + ''' + result = [] + locationList = getOpenStreetMap(latitude, longitude) + for i in range(0, len(locationList)): + #print 'Working on ' + locationList[i] + if i <= len(locationList)-3: + category = getCategoryByName(name=locationList[i], parent=locationList[i+1], grandparent=locationList[i+2]) + elif i == len(locationList)-2: + category = getCategoryByName(name=locationList[i], parent=locationList[i+1]) + else: + category = getCategoryByName(name=locationList[i]) + if category and not category==u'': + result.append(category) + #print result + return result + + +def getOpenStreetMap(latitude, longitude): + ''' + Get the result from http://nominatim.openstreetmap.org/reverse + and put it in a list of tuples to play around with + ''' + result = [] + gotInfo = False + parameters = urllib.urlencode({'lat' : latitude, 'lon' : longitude}) + while(not gotInfo): + try: + page = urllib.urlopen("http://nominatim.openstreetmap.org/reverse?format=xml&%s" % parameters) + et = xml.etree.ElementTree.parse(page) + gotInfo=True + except IOError: + pywikibot.output(u'Got an IOError, let's try again') + time.sleep(30) + except socket.timeout: + pywikibot.output(u'Got a timeout, let's try again') + time.sleep(30) + validParts = [u'hamlet', u'village', u'city', u'county', u'country'] + invalidParts = [u'path', u'road', u'suburb', u'state', u'country_code'] + addressparts = et.find('addressparts') + #xml.etree.ElementTree.dump(et) + + for addresspart in addressparts.getchildren(): + if addresspart.tag in validParts: + result.append(addresspart.text) + elif addresspart.tag in invalidParts: + pywikibot.output(u'Dropping %s, %s' % (addresspart.tag, addresspart.text)) + else: + pywikibot.output(u'WARNING %s, %s is not in addressparts lists' % (addresspart.tag, addresspart.text)) + #print result + return result + +def getCategoryByName(name, parent=u'', grandparent=u''): + + if not parent==u'': + workname = name.strip() + u',_' + parent.strip() + workcat = catlib.Category( + pywikibot.getSite(u'commons', u'commons'), workname) + if workcat.exists(): + return workname + if not grandparent==u'': + workname = name.strip() + u',_' + grandparent.strip() + workcat = catlib.Category( + pywikibot.getSite(u'commons', u'commons'), workname) + if workcat.exists(): + return workname + workname = name.strip() + workcat = catlib.Category( + pywikibot.getSite(u'commons', u'commons'), workname) + if workcat.exists(): + return workname + return u'' + + def getUsage(use): ''' Parse the Commonsense output to get the usage ''' result = []
pywikipedia-svn@lists.wikimedia.org