Revision: 5639 Author: filnik Date: 2008-06-26 19:39:16 +0000 (Thu, 26 Jun 2008)
Log Message: ----------- Fixing for nowcommons detection the hash and duplicate function
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-06-26 17:04:25 UTC (rev 5638) +++ trunk/pywikipedia/wikipedia.py 2008-06-26 19:39:16 UTC (rev 5639) @@ -2637,8 +2637,10 @@ except IndexError: raise NoPage(u'API Error, nothing found in the APIs')
- def getDuplicates(self): - """ Function that uses APIs to give the duplicates of the given image """ + def getHash(self): + """ Function that return the Hash of an image in oder to understand if two + Images are the same or not. + """ params = { 'action' :'query', 'titles' :self.title(), @@ -2656,22 +2658,9 @@ else: wikipedia.output(u'Image deleted before getting the Hash. Skipping...') return None - # Now get all the images with the same hash - #action=query&format=xml&list=allimages&aisha1=%s - image_namespace = "%s:" % self._site.image_namespace() # Image: - params = { - 'action' :'query', - 'list' :'allimages', - 'aisha1' :hash_found, - } - data = query.GetData(params, useAPI = True, encodeTitle = False) - allimages = data['query']['allimages'] - duplicates = list() - for imagedata in allimages: - image = imagedata[u'descriptionurl'].split('/wiki/%s' % image_namespace)[1] - duplicates.append(image) - return duplicates - + else: + return hash_found + def getFileVersionHistoryTable(self): """Return the version history in the form of a wiki table.""" lines = [] @@ -5730,6 +5719,33 @@ else: return False
+ def getImagesFromAnHash(self, hash_found = None): + """ Function that uses APIs to give the images that has the same hash. Useful + to find duplicates or nowcommons. + + NOTE: it returns also the image itself, if you don't want it, just + filter the list returned. + + NOTE 2: it returns the image WITHOUT the image namespace. + """ + if hash_found == None: # If the hash is none return None and not continue + return None + # Now get all the images with the same hash + #action=query&format=xml&list=allimages&aisha1=%s + image_namespace = "%s:" % self.image_namespace() # Image: + params = { + 'action' :'query', + 'list' :'allimages', + 'aisha1' :hash_found, + } + data = query.GetData(params, site = getSite(self.lang, self.family), useAPI = True, encodeTitle = False) + allimages = data['query']['allimages'] + duplicates = list() + for imagedata in allimages: + image = imagedata[u'descriptionurl'].split('/wiki/%s' % image_namespace)[1] + duplicates.append(image) + return duplicates + # Caches to provide faster access _sites = {} _namespaceCache = {}
pywikipedia-l@lists.wikimedia.org