Revision: 5633 Author: filnik Date: 2008-06-26 14:26:14 +0000 (Thu, 26 Jun 2008)
Log Message: ----------- Adding new functionality to getLatestUploader() (now returns also the timestamp) and adding the new function: getDuplicates based on APIs
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-06-26 10:38:10 UTC (rev 5632) +++ trunk/pywikipedia/wikipedia.py 2008-06-26 14:26:14 UTC (rev 5633) @@ -215,6 +215,10 @@ class CaptchaError(Error): """Captcha is asked and config.solve_captcha == False."""
+class NoHash(Error): + """ The APIs don't return any Hash for the image searched. + Really Strange, better to raise an error. """ + SaxError = xml.sax._exceptions.SAXParseException
# Pre-compile re expressions @@ -2627,11 +2631,44 @@ try: # We don't know the page's id, if any other better idea please change it pageid = data['query']['pages'].keys()[0] - nick = data['query']['pages'][pageid][u'imageinfo'][0]['user'] - return nick + nick = data['query']['pages'][pageid][u'imageinfo'][0][u'user'] + timestamp = data['query']['pages'][pageid][u'imageinfo'][0][u'timestamp'] + return [nick, timestamp] except IndexError: raise NoPage(u'API Error, nothing found in the APIs')
+ def getDuplicates(self): + params = { + 'action' :'query', + 'titles' :self.title(), + 'prop' :'imageinfo', + 'iiprop' :'sha1', + } + data = query.GetData(params, useAPI = True, encodeTitle = False) + pageid = data['query']['pages'].keys()[0] + try: + hash_found = data['query']['pages'][pageid][u'imageinfo'][0][u'sha1'] + except KeyError: + if self.exists(): + raise NoHash('No Hash found in the APIs! Maybe the regex to catch it is wrong or someone has changed the APIs structure.') + else: + wikipedia.output(u'Image deleted before getting the Hash. Skipping...') + return None + #action=query&format=xml&list=allimages&aisha1=%s + image_namespace = "%s:" % self._site.image_namespace() # Image: + params = { + 'action' :'query', + 'list' :'allimages', + 'aisha1' :hash_found, + } + data = query.GetData(params, useAPI = True, encodeTitle = False) + allimages = data['query']['allimages'] + duplicates = list() + for imagedata in allimages: + image = imagedata[u'descriptionurl'].split('/wiki/%s' % image_namespace)[1] + duplicates.append(image) + return duplicates + def getFileVersionHistoryTable(self): """Return the version history in the form of a wiki table.""" lines = []
pywikipedia-l@lists.wikimedia.org