[Pywikipedia-svn] SVN: [7494] trunk/pywikipedia/wikipedia.py

18 Oct 2009

Revision: 7494
Author:   alexsh
Date:     2009-10-18 16:42:51 +0000 (Sun, 18 Oct 2009)
Log Message:
-----------
ImagePage():load page Info in one time load
Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================

--- trunk/pywikipedia/wikipedia.py	2009-10-18 06:23:41 UTC (rev 7493)
+++ trunk/pywikipedia/wikipedia.py	2009-10-18 16:42:51 UTC (rev 7494)
@@ -3406,7 +3406,7 @@
             raise NoPage(u'API Error, nothing found in the APIs')
# We don't know the page's id, if any other better idea please change it
-        return data[data.keys()[0]][u'revisions']
+        return data.values()[0][u'revisions']
class ImagePage(Page):
     """A subclass of Page representing an image descriptor wiki page.
@@ -3432,6 +3432,9 @@
         if self.namespace() != 6:
             raise ValueError(u'BUG: %s is not in the image namespace!' % title)
         self._imagePageHtml = None
+        self._local = True
+        self._latestInfo = {}
+        self._infoLoaded = False
def getImagePageHtml(self):
         """
@@ -3445,6 +3448,42 @@
             self._imagePageHtml = self.site().getUrl(path)
         return self._imagePageHtml
+    def _loadInfo(self, limit = 1):
+        params = {
+            'action': 'query',
+            'prop': 'imageinfo',
+            'titles': self.title(),
+            'iiprop': ['timestamp', 'user', 'comment', 'url', 'size', 'dimensions', 'sha1', 'mime', 'metadata', 'archivename', 'bitdepth'],
+            'iilimit': limit,
+        }
+        data = query.GetData(params, self.site())
+        if 'error' in data:
+            raise RuntimeError("%s" %data['error'])
+        count = 0
+        if data['query']['pages'].values()[0]["imagerepository"] == "shared":
+            self._local = False
+        infos = []
+        
+        while True:
+            for info in data['query']['pages'].values()[0]['imageinfo']:
+                count += 1
+                if count == 1 and 'iistart' not in params: 
+                    # count 1 and no iicontinue mean first image revision is latest.
+                    self.latestInfo = info
+                infos.append(info)
+                if count >= limit:
+                    break
+            
+            
+            if count < limit and 'query-continue' in data:
+                params['iistart'] = data['query-continue']['imageinfo']['iistart']
+            else:
+                break
+        
+        self._infoLoaded = True
+        if limit > 1:
+            return infos
+
     def fileUrl(self):
         """Return the URL for the image described on this page."""
         # There are three types of image pages:
@@ -3457,27 +3496,20 @@
         # is a full image link for .ogg and .mid files.
         #***********************
         #change to API query: action=query&titles=File:wiki.jpg&prop=imageinfo&iiprop=url
-        params = {
-            'action'    :'query',
-            'prop'      :'imageinfo',
-            'titles'    :self.title(),
-            'iiprop'    :'url',
-        }
-        imagedata = query.GetData(params, self.site(), encodeTitle = False)
-        try:
-            url = imagedata['query']['pages'].values()[0]['imageinfo'][0]['url']
-            return url
+        if not self._infoLoaded:
+            self._loadInfo()
+        
+        return self.latestInfo['url']
         #urlR = re.compile(r'<div class="fullImageLink" id="file">.*?<a href="(?P<url>[^ ]+?)"(?! class="image")|<span class="dangerousLink"><a href="(?P<url2>.+?)"', re.DOTALL)
         #m = urlR.search(self.getImagePageHtml())
#    url = m.group('url') or m.group('url2')
-        except KeyError:
-            raise NoPage(u'Image file URL for %s not found.' % self.aslink(forceInterwiki = True) )
-        return url
def fileIsOnCommons(self):
         """Return True if the image is stored on Wikimedia Commons"""
-        return self.fileUrl().startswith(u'http://upload.wikimedia.org/wikipedia/commons/')
+        if not self._infoLoaded:
+            self._loadInfo()
+        return not self._local
def fileIsShared(self):
         """Return True if image is stored on Wikitravel shared repository."""
@@ -3489,9 +3521,9 @@
     # (see bug #1795683).
     def getFileMd5Sum(self):
         """Return image file's MD5 checksum."""
-        uo = MyURLopener()
-        f = uo.open(self.fileUrl())
-        md5Checksum = md5(f.read()).hexdigest()
+        
+        f = self.site().getUrl(self.fileUrl(), no_hostname=True)
+        md5Checksum = md5(f).hexdigest()
         return md5Checksum
def getFileVersionHistory(self):
@@ -3526,18 +3558,11 @@
def getLatestUploader(self):
         """ Function that uses the APIs to detect the latest uploader of the image """
-        params = {
-            'action'    :'query',
-            'prop'      :'imageinfo',
-            'titles'    :self.title(),
-            }
-        data = query.GetData(params, self.site(), encodeTitle = False)
+        if not self._infoLoaded:
+            self._loadInfo()
         try:
             # We don't know the page's id, if any other better idea please change it
-            pageid = data['query']['pages'].keys()[0]
-            nick = data['query']['pages'][pageid][u'imageinfo'][0][u'user']
-            timestamp = data['query']['pages'][pageid][u'imageinfo'][0][u'timestamp']
-            return [nick, timestamp]
+            return [self.latestInfo['user'], self.latestInfo['timestamp']]
         except KeyError:
             raise NoPage(u'API Error, nothing found in the APIs')
@@ -3546,17 +3571,10 @@
             Files are the same or not.
             """
         if self.exists():
-            params = {
-                'action'    :'query',
-                'titles'    :self.title(),
-                'prop'      :'imageinfo',
-                'iiprop'    :'sha1',
-                }
-            # First of all we need the Hash that identify an image
-            data = query.GetData(params, self.site(), encodeTitle = False)
-            pageid = data['query']['pages'].keys()[0]
+            if not self._infoLoaded:
+                self._loadInfo()
             try:
-                hash_found = data['query']['pages'][pageid][u'imageinfo'][0][u'sha1']
+                return self._latestInfo['sha1']
             except (KeyError, IndexError):
                 try:
                     self.get()
@@ -3568,8 +3586,6 @@
                     return None
                 else:
                     raise NoHash('No Hash found in the APIs! Maybe the regex to catch it is wrong or someone has changed the APIs structure.')
-            else:
-                return hash_found
         else:
             output(u'File deleted before getting the Hash. Skipping...')
             return None

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

[Pywikipedia-svn] SVN: [7494] trunk/pywikipedia/wikipedia.py