Revision: 5639
Author: filnik
Date: 2008-06-26 19:39:16 +0000 (Thu, 26 Jun 2008)
Log Message:
-----------
Fixing for nowcommons detection the hash and duplicate function
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-06-26 17:04:25 UTC (rev 5638)
+++ trunk/pywikipedia/wikipedia.py 2008-06-26 19:39:16 UTC (rev 5639)
@@ -2637,8 +2637,10 @@
except IndexError:
raise NoPage(u'API Error, nothing found in the APIs')
- def getDuplicates(self):
- """ Function that uses APIs to give the duplicates of the given
image """
+ def getHash(self):
+ """ Function that return the Hash of an image in oder to
understand if two
+ Images are the same or not.
+ """
params = {
'action' :'query',
'titles' :self.title(),
@@ -2656,22 +2658,9 @@
else:
wikipedia.output(u'Image deleted before getting the Hash.
Skipping...')
return None
- # Now get all the images with the same hash
- #action=query&format=xml&list=allimages&aisha1=%s
- image_namespace = "%s:" % self._site.image_namespace() # Image:
- params = {
- 'action' :'query',
- 'list' :'allimages',
- 'aisha1' :hash_found,
- }
- data = query.GetData(params, useAPI = True, encodeTitle = False)
- allimages = data['query']['allimages']
- duplicates = list()
- for imagedata in allimages:
- image = imagedata[u'descriptionurl'].split('/wiki/%s' %
image_namespace)[1]
- duplicates.append(image)
- return duplicates
-
+ else:
+ return hash_found
+
def getFileVersionHistoryTable(self):
"""Return the version history in the form of a wiki
table."""
lines = []
@@ -5730,6 +5719,33 @@
else:
return False
+ def getImagesFromAnHash(self, hash_found = None):
+ """ Function that uses APIs to give the images that has the same
hash. Useful
+ to find duplicates or nowcommons.
+
+ NOTE: it returns also the image itself, if you don't want it, just
+ filter the list returned.
+
+ NOTE 2: it returns the image WITHOUT the image namespace.
+ """
+ if hash_found == None: # If the hash is none return None and not continue
+ return None
+ # Now get all the images with the same hash
+ #action=query&format=xml&list=allimages&aisha1=%s
+ image_namespace = "%s:" % self.image_namespace() # Image:
+ params = {
+ 'action' :'query',
+ 'list' :'allimages',
+ 'aisha1' :hash_found,
+ }
+ data = query.GetData(params, site = getSite(self.lang, self.family), useAPI =
True, encodeTitle = False)
+ allimages = data['query']['allimages']
+ duplicates = list()
+ for imagedata in allimages:
+ image = imagedata[u'descriptionurl'].split('/wiki/%s' %
image_namespace)[1]
+ duplicates.append(image)
+ return duplicates
+
# Caches to provide faster access
_sites = {}
_namespaceCache = {}
Show replies by date