Revision: 5633
Author: filnik
Date: 2008-06-26 14:26:14 +0000 (Thu, 26 Jun 2008)
Log Message:
-----------
Adding new functionality to getLatestUploader() (now returns also the timestamp) and
adding the new function: getDuplicates based on APIs
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-06-26 10:38:10 UTC (rev 5632)
+++ trunk/pywikipedia/wikipedia.py 2008-06-26 14:26:14 UTC (rev 5633)
@@ -215,6 +215,10 @@
class CaptchaError(Error):
"""Captcha is asked and config.solve_captcha ==
False."""
+class NoHash(Error):
+ """ The APIs don't return any Hash for the image searched.
+ Really Strange, better to raise an error. """
+
SaxError = xml.sax._exceptions.SAXParseException
# Pre-compile re expressions
@@ -2627,11 +2631,44 @@
try:
# We don't know the page's id, if any other better idea please change
it
pageid = data['query']['pages'].keys()[0]
- nick =
data['query']['pages'][pageid][u'imageinfo'][0]['user']
- return nick
+ nick =
data['query']['pages'][pageid][u'imageinfo'][0][u'user']
+ timestamp =
data['query']['pages'][pageid][u'imageinfo'][0][u'timestamp']
+ return [nick, timestamp]
except IndexError:
raise NoPage(u'API Error, nothing found in the APIs')
+ def getDuplicates(self):
+ params = {
+ 'action' :'query',
+ 'titles' :self.title(),
+ 'prop' :'imageinfo',
+ 'iiprop' :'sha1',
+ }
+ data = query.GetData(params, useAPI = True, encodeTitle = False)
+ pageid = data['query']['pages'].keys()[0]
+ try:
+ hash_found =
data['query']['pages'][pageid][u'imageinfo'][0][u'sha1']
+ except KeyError:
+ if self.exists():
+ raise NoHash('No Hash found in the APIs! Maybe the regex to catch it
is wrong or someone has changed the APIs structure.')
+ else:
+ wikipedia.output(u'Image deleted before getting the Hash.
Skipping...')
+ return None
+ #action=query&format=xml&list=allimages&aisha1=%s
+ image_namespace = "%s:" % self._site.image_namespace() # Image:
+ params = {
+ 'action' :'query',
+ 'list' :'allimages',
+ 'aisha1' :hash_found,
+ }
+ data = query.GetData(params, useAPI = True, encodeTitle = False)
+ allimages = data['query']['allimages']
+ duplicates = list()
+ for imagedata in allimages:
+ image = imagedata[u'descriptionurl'].split('/wiki/%s' %
image_namespace)[1]
+ duplicates.append(image)
+ return duplicates
+
def getFileVersionHistoryTable(self):
"""Return the version history in the form of a wiki
table."""
lines = []
Show replies by date