[Pywikipedia-svn] SVN: [6954] branches/rewrite/pywikibot/page.py

11 Jun 2009

Revision: 6954
Author:   russblau
Date:     2009-06-11 20:24:06 +0000 (Thu, 11 Jun 2009)
Log Message:
-----------
Add 'step' and 'total' parameters to all applicable Page methods
Modified Paths:
--------------
    branches/rewrite/pywikibot/page.py
Modified: branches/rewrite/pywikibot/page.py
===================================================================

--- branches/rewrite/pywikibot/page.py	2009-06-11 18:38:08 UTC (rev 6953)
+++ branches/rewrite/pywikibot/page.py	2009-06-11 20:24:06 UTC (rev 6954)
@@ -32,7 +32,7 @@
This object only implements internally methods that do not require
     reading from or writing to the wiki.  All other methods are delegated
-    to the Site object. 
+    to the Site object.
"""
@@ -93,7 +93,7 @@
                 raise pywikibot.Error(
                       "Page object cannot be created from Site without title.")
             self._title = title
-        elif isinstance(source, Page): 
+        elif isinstance(source, Page):
             # copy all of source's attributes to this object
             self.__dict__ = source.__dict__
             if title:
@@ -286,7 +286,7 @@
         exceptions that should be caught by the calling code:
- NoPage: The page does not exist
-          - IsRedirectPage: The page is a redirect. 
+          - IsRedirectPage: The page is a redirect.
           - SectionError: The section does not exist on a page with a #
                 link
@@ -314,11 +314,13 @@
         return self._revisions[self._revid].text
def _getInternals(self, sysop):
-        """Helper function for get(). 
+        """Helper function for get().
+
         Stores latest revision in self if it doesn't contain it, doesn't think.
         * Raises exceptions from previous runs.
-        * Stores new exceptions in _getexception and raises them"""
+        * Stores new exceptions in _getexception and raises them.
+        """
         # Raise exceptions from previous runs
         if hasattr(self, '_getexception'):
             raise self._getexception
@@ -481,7 +483,7 @@
         If self is a talk page, returns the associated content page;
         otherwise, returns the associated talk page.  The returned page need
         not actually exist on the wiki.
-        
+
         Returns None if self is a special page.
"""
@@ -545,10 +547,10 @@
def getReferences(self, follow_redirects=True, withTemplateInclusion=True,
                       onlyTemplateInclusion=False, redirectsOnly=False,
-                      namespaces=None):
+                      namespaces=None, step=None, total=None):
         """Return an iterator all pages that refer to or embed the page.
-        If you need a full list of referring pages, use 
+        If you need a full list of referring pages, use
         C{pages = list(s.getReferences())}
@param follow_redirects: if True, also iterate pages that link to a
@@ -559,20 +561,26 @@
             is used as a template.
         @param redirectsOnly: if True, only iterate redirects to self.
         @param namespaces: only iterate pages in these namespaces
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
"""
         # N.B.: this method intentionally overlaps with backlinks() and
         # embeddedin(). Depending on the interface, it may be more efficient
         # to implement those methods in the site interface and then combine
         # the results for this method, or to implement this method and then
-        # split up the results for the others. 
+        # split up the results for the others.
         return self.site().pagereferences(
-                           self, follow_redirects, redirectsOnly,
-                           withTemplateInclusion, onlyTemplateInclusion,
-                           namespaces)
+                               self,
+                               followRedirects=follow_redirects,
+                               filterRedirects=redirectsOnly,
+                               withTemplateInclusion=withTemplateInclusion,
+                               onlyTemplateInclusion=onlyTemplateInclusion,
+                               namespaces=namespaces, step=step,
+                               total=total)
def backlinks(self, followRedirects=True, filterRedirects=None,
-                  namespaces=None):
+                  namespaces=None, step=None, total=None):
         """Return an iterator for pages that link to this page.
@param followRedirects: if True, also iterate pages that link to a
@@ -580,20 +588,31 @@
         @param filterRedirects: if True, only iterate redirects; if False,
             omit redirects; if None, do not filter
         @param namespaces: only iterate pages in these namespaces
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
"""
-        return self.site().pagebacklinks(self, followRedirects, filterRedirects,
-                                         namespaces)
+        return self.site().pagebacklinks(self,
+                                         followRedirects=followRedirects,
+                                         filterRedirects=filterRedirects,
+                                         namespaces=namespaces, step=step,
+                                         total=total)
-    def embeddedin(self, filter_redirects=None, namespaces=None):
+    def embeddedin(self, filter_redirects=None, namespaces=None, step=None,
+                   total=None):
         """Return an iterator for pages that embed this page as a template.
@param filterRedirects: if True, only iterate redirects; if False,
             omit redirects; if None, do not filter
         @param namespaces: only iterate pages in these namespaces
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
"""
-        return self.site().page_embeddedin(self, filter_redirects, namespaces)
+        return self.site().page_embeddedin(self,
+                                           filterRedirects=filter_redirects,
+                                           namespaces=namespaces,
+                                           step=step, total=total)
def canBeEdited(self):
         """Return bool indicating whether this page can be edited.
@@ -751,7 +770,7 @@
                         minorEdit=minorEdit, force=force, async=True,
                         callback=callback)
-    def linkedPages(self):
+    def linkedPages(self, namespaces=None, step=None, total=None):
         """Iterate Pages that this Page links to.
Only returns pages from "normal" internal links. Image and category
@@ -759,10 +778,14 @@
         omitted (but links within them are returned). All interwiki and
         external links are omitted.
+        @param namespaces: only iterate links in these namespaces
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
         @return: a generator that yields Page objects.
"""
-        return self.site().pagelinks(self)
+        return self.site().pagelinks(self, namespaces=namespaces, step=step,
+                                     total=total)
def interwiki(self, expand=True):
         """Iterate interwiki links in the page text, excluding language links.
@@ -798,7 +821,7 @@
                 continue
def langlinks(self):
-        """Returns a list of all interlanguage Links on this page.
+        """Return a list of all interlanguage Links on this page.
"""
         # Data might have been preloaded
@@ -807,9 +830,11 @@
return self._langlinks
-    def iterlanglinks(self):
+    def iterlanglinks(self, step=None, total=None):
         """Iterate all interlanguage links on this page.
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
         @return: a generator that yields Link objects.
"""
@@ -819,10 +844,10 @@
         # method is called. If we do this, we'll have to think
         # about what will happen if the generator is not completely
         # iterated upon.
-        return self.site().pagelanglinks(self)
+        return self.site().pagelanglinks(self, step=step, total=total)
def templates(self):
-        """Returns a list of Page objects for templates used on this Page.
+        """Return a list of Page objects for templates used on this Page.
Template parameters are ignored.  This method only returns embedded
         templates, not template pages that happen to be referenced through
@@ -835,27 +860,32 @@
return self._templates
-    def itertemplates(self):
+    def itertemplates(self, step=None, total=None):
         """Iterate Page objects for templates used on this Page.
Template parameters are ignored.  This method only returns embedded
         templates, not template pages that happen to be referenced through
         a normal link.
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
+
         """
         if hasattr(self, '_templates'):
             return iter(self._templates)
-        return self.site().pagetemplates(self)
+        return self.site().pagetemplates(self, step=step, total=total)
@deprecate_arg("followRedirects", None)
     @deprecate_arg("loose", None)
-    def imagelinks(self, followRedirects=None, loose=None):
+    def imagelinks(self, step=None, total=None):
         """Iterate ImagePage objects for images displayed on this Page.
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
         @return: a generator that yields ImagePage objects.
"""
-        return self.site().pageimages(self)
+        return self.site().pageimages(self, step=step, total=total)
def templatesWithParams(self):
         """Iterate templates used on this Page.
@@ -898,22 +928,27 @@
@deprecate_arg("nofollow_redirects", None)
     @deprecate_arg("get_redirect", None)
-    def categories(self, withSortKey=False):
+    def categories(self, withSortKey=False, step=None, total=None):
         """Iterate categories that the article is in.
@param withSortKey: if True, include the sort key in each Category.
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
         @return: a generator that yields Category objects.
"""
-        return self.site().pagecategories(self, withSortKey=withSortKey)
+        return self.site().pagecategories(self, withSortKey=withSortKey,
+                                          step=step, total=total)
-    def extlinks(self):
+    def extlinks(self, step=None, total=None):
         """Iterate all external URLs (not interwiki links) from this page.
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
         @return: a generator that yields unicode objects containing URLs.
"""
-        return self.site().page_extlinks(self)
+        return self.site().page_extlinks(self, step=step, total=total)
def getRedirectTarget(self):
         """Return a Page object for the target this Page redirects to.
@@ -924,9 +959,14 @@
         """
         return self.site().getredirtarget(self)
+    # BREAKING CHANGE: in old framework, default value for getVersionHistory
+    #                  returned no more than 500 revisions; now, it iterates
+    #                  all revisions unless 'total' argument is used
     @deprecate_arg("forceReload", None)
-    def getVersionHistory(self, reverseOrder=False, getAll=False,
-                          revCount=500):
+    @deprecate_arg("revCount", "total")
+    @deprecate_arg("getAll", None)
+    def getVersionHistory(self, reverseOrder=False, step=None,
+                          total=None):
         """Load the version history page and return history information.
Return value is a list of tuples, where each tuple represents one
@@ -935,49 +975,58 @@
         reverseOrder is True. Defaults to getting the first revCount edits,
         unless getAll is True.
+        @param step: limit each API call to this number of revisions
+        @param total: iterate no more than this number of revisions in total
+
         """
-        if getAll:
-            limit = None
-        else:
-            limit = revCount
         self.site().loadrevisions(self, getText=False, rvdir=reverseOrder,
-                                  total=limit)
-        if getAll:
-            revCount = len(self._revisions)
+                                  step=step, total=total)
         return [ ( self._revisions[rev].revid,
                    self._revisions[rev].timestamp,
                    self._revisions[rev].user,
                    self._revisions[rev].comment
                  ) for rev in sorted(self._revisions,
-                                     reverse=not reverseOrder)[ : revCount]
+                                     reverse=not reverseOrder)
                ]
def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
-                               getAll=False, revCount=500):
+                               step=None, total=None):
         """Return the version history as a wiki table."""
+
         result = '{| border="1"\n'
         result += '! oldid || date/time || username || edit summary\n'
         for oldid, time, username, summary \
                 in self.getVersionHistory(forceReload=forceReload,
                                           reverseOrder=reverseOrder,
-                                          getAll=getAll, revCount=revCount):
+                                          step=step, total=total):
             result += '|----\n'
             result += '| %s || %s || %s || <nowiki>%s</nowiki>\n'\
                       % (oldid, time, username, summary)
         result += '|}\n'
         return result
-    def fullVersionHistory(self):
-        """Iterate all previous versions including wikitext.
+    def fullVersionHistory(self, reverseOrder=False, step=None,
+                          total=None):
+        """Iterate previous versions including wikitext.
+        Takes same arguments as getVersionHistory.
+
         @return: A generator that yields tuples consisting of revision ID,
             edit date/time, user name and content
+
         """
-        return self.site().loadrevisions(self, withText=True)
+        return self.site().loadrevisions(self, getText=True,
+                                         rvdir=reverseOrder,
+                                         step=step, total=total)
-    def contributingUsers(self):
-        """Return a set of usernames (or IPs) of users who edited this page."""
-        edits = self.getVersionHistory()
+    def contributingUsers(self, step=None, total=None):
+        """Return a set of usernames (or IPs) of users who edited this page.
+
+        @param step: limit each API call to this number of revisions
+        @param total: iterate no more than this number of revisions in total
+
+        """
+        edits = self.getVersionHistory(step=step, total=total)
         users = set([edit[2] for edit in edits])
         return users
@@ -1000,7 +1049,7 @@
             pywikibot.output(u'Moving %s to [[%s]].'
                              % (self.title(asLink=True), newtitle))
             reason = pywikibot.input(u'Please enter a reason for the move:')
-        # TODO: implement "safe" parameter (Is this necessary ?) 
+        # TODO: implement "safe" parameter (Is this necessary ?)
         # TODO: implement "sysop" parameter
         return self.site().movepage(self, newtitle, reason,
                                     movetalk=movetalkpage,
@@ -1035,21 +1084,28 @@
                 return self.site().deletepage(self, reason)
             except pywikibot.NoUsername, e:
                 if mark:
-                    raise NotImplementedError("marking pages for deletions is not yet available.")
+                    raise NotImplementedError(
+                        "Marking pages for deletion is not yet available.")
                 raise e
-
-    def loadDeletedRevisions(self):
+    # all these DeletedRevisions methods need to be reviewed and harmonized
+    # with the new framework; they do not appear functional
+    def loadDeletedRevisions(self, step=None, total=None):
         """Retrieve all deleted revisions for this Page from Special/Undelete.
Stores all revisions' timestamps, dates, editors and comments in
         self._deletedRevs attribute.
-        @return: list of timestamps (which can be used to retrieve revisions
-            later on).
+        @return: iterator of timestamps (which can be used to retrieve
+            revisions later on).
"""
-        return self.site().loadDeletedRevisions(self)
+        if not hasattr(self, "_deletedRevs"):
+            self._deletedRevs = {}
+        for item in self.site().deletedrevs(self, step=step, total=total):
+            for rev in item.get("revisions", []):
+                self._deletedRevs[rev['timestamp']] = rev
+                yield rev['timestamp']
def getDeletedRevision(self, timestamp, retrieveText=False):
         """Return a particular deleted revision by timestamp.
@@ -1060,8 +1116,17 @@
             None.
"""
-        return self.site().getDeletedRevision(self, timestamp,
-                                              getText=retrieveText)
+        if hasattr(self, "_deletedRevs"):
+            if timestamp in self._deletedRevs and (
+                    (not retrieveText)
+                    or "content" in self._deletedRevs["timestamp"]):
+                return self._deletedRevs["timestamp"]
+        for item in self.site().deletedrevs(self, start=timestamp,
+                                            get_text=retrieveText, total=1):
+            # should only be one item with one revision
+            if item['title'] == self.title:
+                if "revisions" in item:
+                    return item["revisions"][0]
def markDeletedRevision(self, timestamp, undelete=True):
         """Mark the revision identified by timestamp for undeletion.
@@ -1069,7 +1134,7 @@
         @param undelete: if False, mark the revision to remain deleted.
"""
-        if self._deletedRevs == None:
+        if not hasattr(self, "_deletedRevs"):
             self.loadDeletedRevisions()
         if timestamp not in self._deletedRevs:
             #TODO: Throw an exception?
@@ -1149,14 +1214,14 @@
oldCat and newCat should be Category objects.
         If newCat is None, the category will be removed.
-        
+
         comment: string to use as an edit summary
-        sortKey: sortKey to use for the added category. 
+        sortKey: sortKey to use for the added category.
         Unused if newCat is None, or if inPlace=True
-        
+
         """
-        #TODO: is inPlace necessary? 
+        #TODO: is inPlace necessary?
         site = self.site()
         changesMade = False
@@ -1405,11 +1470,16 @@
                          % (datetime, username, resolution, size, comment))
         return u'{| border="1"\n! date/time || username || resolution || size || edit summary\n|----\n' + u'\n|----\n'.join(lines) + '\n|}'
-    def usingPages(self):
-        """Yield Pages on which the image is displayed."""
-        return self.site().imageusage(self)
+    def usingPages(self, step=None, total=None):
+        """Yield Pages on which the image is displayed.
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in total
+        """
+        return self.site().imageusage(self, step=step, total=total)
+
+
 class Category(Page):
     """A page in the Category: namespace"""
@@ -1447,7 +1517,7 @@
@deprecate_arg("startFrom", None)
     @deprecate_arg("cacheResults", None)
-    def subcategories(self, recurse=False):
+    def subcategories(self, recurse=False, step=None, total=None):
         """Iterate all subcategories of the current category.
@param recurse: if not False or 0, also iterate subcategories of
@@ -1455,28 +1525,46 @@
             levels. (Example: recurse=1 will iterate direct subcats and
             first-level sub-sub-cats, but no deeper.)
         @type recurse: int or bool
+        @param step: limit each API call to this number of categories
+        @param total: iterate no more than this number of
+            subcategories in total (at all levels)
"""
         if not isinstance(recurse, bool) and recurse:
             recurse = recurse - 1
         if not hasattr(self, "_subcats"):
             self._subcats = []
-            for member in self.site().categorymembers(self, namespaces=[14]):
+            for member in self.site().categorymembers(self, namespaces=[14],
+                                                      step=step, total=total):
                 subcat = Category(self.site(), member.title())
                 self._subcats.append(subcat)
                 yield subcat
+                total -= 1
+                if not total:
+                    return
                 if recurse:
-                    for item in subcat.subcategories(recurse):
+                    for item in subcat.subcategories(recurse,
+                                                     step=step, total=total):
                         yield item
+                        total -= 1
+                        if not total:
+                            return
         else:
             for subcat in self._subcats:
                 yield subcat
+                total -= 1
+                if not total:
+                    return
                 if recurse:
-                    for item in subcat.subcategories(recurse):
+                    for item in subcat.subcategories(recurse,
+                                                     step=step, total=total):
                         yield item
+                        total -= 1
+                        if not total:
+                            return
@deprecate_arg("startFrom", None)
-    def articles(self, recurse=False):
+    def articles(self, recurse=False, step=None, total=None):
         """
         Yields all articles in the current category.
@@ -1485,31 +1573,50 @@
             levels. (Example: recurse=1 will iterate articles in first-level
             subcats, but no deeper.)
         @type recurse: int or bool
+        @param step: limit each API call to this number of pages
+        @param total: iterate no more than this number of pages in
+            total (at all levels)
"""
         namespaces = [x for x in self.site().namespaces()
                       if x>=0 and x!=14]
         for member in self.site().categorymembers(self,
-                                                  namespaces=namespaces):
+                                                  namespaces=namespaces,
+                                                  step=step, total=total):
             yield member
+            total -= 1
+            if not total:
+                return
         if recurse:
             if not isinstance(recurse, bool) and recurse:
                 recurse = recurse - 1
-            for subcat in self.subcategories():
-                for article in subcat.articles(recurse):
+            for subcat in self.subcategories(step=step):
+                for article in subcat.articles(recurse, step=step, total=total):
                     yield article
+                    total -= 1
+                    if not total:
+                        return
-    def members(self, recurse=False, namespaces=None):
+    def members(self, recurse=False, namespaces=None, step=None, total=None):
         """Yield all category contents (subcats, pages, and files)."""
-        for member in self.site().categorymembers(self, namespaces):
+
+        for member in self.site().categorymembers(self, namespaces,
+                                                  step=step, total=total):
             yield member
+            total -= 1
+            if not total:
+                return
         if recurse:
             if not isinstance(recurse, bool) and recurse:
                 recurse = recurse - 1
-            for subcat in self.subcategories():
-                for article in subcat.members(recurse, namespaces):
+            for subcat in self.subcategories(step=step):
+                for article in subcat.members(recurse, namespaces, step=step,
+                                              total=total):
                     yield article
-        
+                    total -= 1
+                    if not total:
+                        return
+
     def isEmptyCategory(self):
         """Return True if category has no members (including subcategories)."""
         for member in self.site().categorymembers(self, total=1):
@@ -1523,7 +1630,7 @@
         @param cat: New category title (without namespace) or Category object
         @type cat: unicode or Category
         @param message: message to use for category creation message
-        If two %s are provided in message, will be replaced 
+        If two %s are provided in message, will be replaced
         by (self.title, authorsList)
         @type message: unicode
         @return: True if copying was successful, False if target page
@@ -1676,7 +1783,7 @@
             u'''[^ %!"$&'()*,\-.\/0-9:;=?@A-Z\\^_`a-z~\u0080-\uFFFF+]'''
             # URL percent encoding sequences interfere with the ability
             # to round-trip titles -- you can't link to them consistently.
-            u'|%[0-9A-Fa-f]{2}' 
+            u'|%[0-9A-Fa-f]{2}'
             # XML/HTML character references produce similar issues.
             u'|&[A-Za-z0-9\x80-\xff]+;'
             u'|&#[0-9]+;'
@@ -1783,7 +1890,7 @@
def parse(self):
         """Parse text; called internally when accessing attributes"""
-        
+
         self._site = self._source
         self._namespace = self._defaultns
         t = self._text
@@ -2000,7 +2107,7 @@
         link._site = page.site()
         link._section = page.section()
         link._namespace = page.namespace()
-        link._title = page.title(withNamespace=False, 
+        link._title = page.title(withNamespace=False,
                                 allowInterwiki=False,
                                 withSection=False)
         link._anchor = None
@@ -2029,9 +2136,9 @@
                 link._namespace = ns
                 title = t
         link._title = title
-        
-        return link
+        return link
+
 # Utility functions for parsing page titles
def html2unicode(text, ignore = []):

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

[Pywikipedia-svn] SVN: [6954] branches/rewrite/pywikibot/page.py