Revision: 6954
Author: russblau
Date: 2009-06-11 20:24:06 +0000 (Thu, 11 Jun 2009)
Log Message:
-----------
Add 'step' and 'total' parameters to all applicable Page methods
Modified Paths:
--------------
branches/rewrite/pywikibot/page.py
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2009-06-11 18:38:08 UTC (rev 6953)
+++ branches/rewrite/pywikibot/page.py 2009-06-11 20:24:06 UTC (rev 6954)
@@ -32,7 +32,7 @@
This object only implements internally methods that do not require
reading from or writing to the wiki. All other methods are delegated
- to the Site object.
+ to the Site object.
"""
@@ -93,7 +93,7 @@
raise pywikibot.Error(
"Page object cannot be created from Site without title.")
self._title = title
- elif isinstance(source, Page):
+ elif isinstance(source, Page):
# copy all of source's attributes to this object
self.__dict__ = source.__dict__
if title:
@@ -286,7 +286,7 @@
exceptions that should be caught by the calling code:
- NoPage: The page does not exist
- - IsRedirectPage: The page is a redirect.
+ - IsRedirectPage: The page is a redirect.
- SectionError: The section does not exist on a page with a #
link
@@ -314,11 +314,13 @@
return self._revisions[self._revid].text
def _getInternals(self, sysop):
- """Helper function for get().
+ """Helper function for get().
+
Stores latest revision in self if it doesn't contain it, doesn't think.
* Raises exceptions from previous runs.
- * Stores new exceptions in _getexception and raises them"""
+ * Stores new exceptions in _getexception and raises them.
+ """
# Raise exceptions from previous runs
if hasattr(self, '_getexception'):
raise self._getexception
@@ -481,7 +483,7 @@
If self is a talk page, returns the associated content page;
otherwise, returns the associated talk page. The returned page need
not actually exist on the wiki.
-
+
Returns None if self is a special page.
"""
@@ -545,10 +547,10 @@
def getReferences(self, follow_redirects=True, withTemplateInclusion=True,
onlyTemplateInclusion=False, redirectsOnly=False,
- namespaces=None):
+ namespaces=None, step=None, total=None):
"""Return an iterator all pages that refer to or embed the page.
- If you need a full list of referring pages, use
+ If you need a full list of referring pages, use
C{pages = list(s.getReferences())}
@param follow_redirects: if True, also iterate pages that link to a
@@ -559,20 +561,26 @@
is used as a template.
@param redirectsOnly: if True, only iterate redirects to self.
@param namespaces: only iterate pages in these namespaces
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
"""
# N.B.: this method intentionally overlaps with backlinks() and
# embeddedin(). Depending on the interface, it may be more efficient
# to implement those methods in the site interface and then combine
# the results for this method, or to implement this method and then
- # split up the results for the others.
+ # split up the results for the others.
return self.site().pagereferences(
- self, follow_redirects, redirectsOnly,
- withTemplateInclusion, onlyTemplateInclusion,
- namespaces)
+ self,
+ followRedirects=follow_redirects,
+ filterRedirects=redirectsOnly,
+ withTemplateInclusion=withTemplateInclusion,
+ onlyTemplateInclusion=onlyTemplateInclusion,
+ namespaces=namespaces, step=step,
+ total=total)
def backlinks(self, followRedirects=True, filterRedirects=None,
- namespaces=None):
+ namespaces=None, step=None, total=None):
"""Return an iterator for pages that link to this page.
@param followRedirects: if True, also iterate pages that link to a
@@ -580,20 +588,31 @@
@param filterRedirects: if True, only iterate redirects; if False,
omit redirects; if None, do not filter
@param namespaces: only iterate pages in these namespaces
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
"""
- return self.site().pagebacklinks(self, followRedirects, filterRedirects,
- namespaces)
+ return self.site().pagebacklinks(self,
+ followRedirects=followRedirects,
+ filterRedirects=filterRedirects,
+ namespaces=namespaces, step=step,
+ total=total)
- def embeddedin(self, filter_redirects=None, namespaces=None):
+ def embeddedin(self, filter_redirects=None, namespaces=None, step=None,
+ total=None):
"""Return an iterator for pages that embed this page as a template.
@param filterRedirects: if True, only iterate redirects; if False,
omit redirects; if None, do not filter
@param namespaces: only iterate pages in these namespaces
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
"""
- return self.site().page_embeddedin(self, filter_redirects, namespaces)
+ return self.site().page_embeddedin(self,
+ filterRedirects=filter_redirects,
+ namespaces=namespaces,
+ step=step, total=total)
def canBeEdited(self):
"""Return bool indicating whether this page can be edited.
@@ -751,7 +770,7 @@
minorEdit=minorEdit, force=force, async=True,
callback=callback)
- def linkedPages(self):
+ def linkedPages(self, namespaces=None, step=None, total=None):
"""Iterate Pages that this Page links to.
Only returns pages from "normal" internal links. Image and category
@@ -759,10 +778,14 @@
omitted (but links within them are returned). All interwiki and
external links are omitted.
+ @param namespaces: only iterate links in these namespaces
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
@return: a generator that yields Page objects.
"""
- return self.site().pagelinks(self)
+ return self.site().pagelinks(self, namespaces=namespaces, step=step,
+ total=total)
def interwiki(self, expand=True):
"""Iterate interwiki links in the page text, excluding language links.
@@ -798,7 +821,7 @@
continue
def langlinks(self):
- """Returns a list of all interlanguage Links on this page.
+ """Return a list of all interlanguage Links on this page.
"""
# Data might have been preloaded
@@ -807,9 +830,11 @@
return self._langlinks
- def iterlanglinks(self):
+ def iterlanglinks(self, step=None, total=None):
"""Iterate all interlanguage links on this page.
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
@return: a generator that yields Link objects.
"""
@@ -819,10 +844,10 @@
# method is called. If we do this, we'll have to think
# about what will happen if the generator is not completely
# iterated upon.
- return self.site().pagelanglinks(self)
+ return self.site().pagelanglinks(self, step=step, total=total)
def templates(self):
- """Returns a list of Page objects for templates used on this Page.
+ """Return a list of Page objects for templates used on this Page.
Template parameters are ignored. This method only returns embedded
templates, not template pages that happen to be referenced through
@@ -835,27 +860,32 @@
return self._templates
- def itertemplates(self):
+ def itertemplates(self, step=None, total=None):
"""Iterate Page objects for templates used on this Page.
Template parameters are ignored. This method only returns embedded
templates, not template pages that happen to be referenced through
a normal link.
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
+
"""
if hasattr(self, '_templates'):
return iter(self._templates)
- return self.site().pagetemplates(self)
+ return self.site().pagetemplates(self, step=step, total=total)
@deprecate_arg("followRedirects", None)
@deprecate_arg("loose", None)
- def imagelinks(self, followRedirects=None, loose=None):
+ def imagelinks(self, step=None, total=None):
"""Iterate ImagePage objects for images displayed on this Page.
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
@return: a generator that yields ImagePage objects.
"""
- return self.site().pageimages(self)
+ return self.site().pageimages(self, step=step, total=total)
def templatesWithParams(self):
"""Iterate templates used on this Page.
@@ -898,22 +928,27 @@
@deprecate_arg("nofollow_redirects", None)
@deprecate_arg("get_redirect", None)
- def categories(self, withSortKey=False):
+ def categories(self, withSortKey=False, step=None, total=None):
"""Iterate categories that the article is in.
@param withSortKey: if True, include the sort key in each Category.
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
@return: a generator that yields Category objects.
"""
- return self.site().pagecategories(self, withSortKey=withSortKey)
+ return self.site().pagecategories(self, withSortKey=withSortKey,
+ step=step, total=total)
- def extlinks(self):
+ def extlinks(self, step=None, total=None):
"""Iterate all external URLs (not interwiki links) from this page.
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
@return: a generator that yields unicode objects containing URLs.
"""
- return self.site().page_extlinks(self)
+ return self.site().page_extlinks(self, step=step, total=total)
def getRedirectTarget(self):
"""Return a Page object for the target this Page redirects to.
@@ -924,9 +959,14 @@
"""
return self.site().getredirtarget(self)
+ # BREAKING CHANGE: in old framework, default value for getVersionHistory
+ # returned no more than 500 revisions; now, it iterates
+ # all revisions unless 'total' argument is used
@deprecate_arg("forceReload", None)
- def getVersionHistory(self, reverseOrder=False, getAll=False,
- revCount=500):
+ @deprecate_arg("revCount", "total")
+ @deprecate_arg("getAll", None)
+ def getVersionHistory(self, reverseOrder=False, step=None,
+ total=None):
"""Load the version history page and return history information.
Return value is a list of tuples, where each tuple represents one
@@ -935,49 +975,58 @@
reverseOrder is True. Defaults to getting the first revCount edits,
unless getAll is True.
+ @param step: limit each API call to this number of revisions
+ @param total: iterate no more than this number of revisions in total
+
"""
- if getAll:
- limit = None
- else:
- limit = revCount
self.site().loadrevisions(self, getText=False, rvdir=reverseOrder,
- total=limit)
- if getAll:
- revCount = len(self._revisions)
+ step=step, total=total)
return [ ( self._revisions[rev].revid,
self._revisions[rev].timestamp,
self._revisions[rev].user,
self._revisions[rev].comment
) for rev in sorted(self._revisions,
- reverse=not reverseOrder)[ : revCount]
+ reverse=not reverseOrder)
]
def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
- getAll=False, revCount=500):
+ step=None, total=None):
"""Return the version history as a wiki table."""
+
result = '{| border="1"\n'
result += '! oldid || date/time || username || edit summary\n'
for oldid, time, username, summary \
in self.getVersionHistory(forceReload=forceReload,
reverseOrder=reverseOrder,
- getAll=getAll, revCount=revCount):
+ step=step, total=total):
result += '|----\n'
result += '| %s || %s || %s || <nowiki>%s</nowiki>\n'\
% (oldid, time, username, summary)
result += '|}\n'
return result
- def fullVersionHistory(self):
- """Iterate all previous versions including wikitext.
+ def fullVersionHistory(self, reverseOrder=False, step=None,
+ total=None):
+ """Iterate previous versions including wikitext.
+ Takes same arguments as getVersionHistory.
+
@return: A generator that yields tuples consisting of revision ID,
edit date/time, user name and content
+
"""
- return self.site().loadrevisions(self, withText=True)
+ return self.site().loadrevisions(self, getText=True,
+ rvdir=reverseOrder,
+ step=step, total=total)
- def contributingUsers(self):
- """Return a set of usernames (or IPs) of users who edited this page."""
- edits = self.getVersionHistory()
+ def contributingUsers(self, step=None, total=None):
+ """Return a set of usernames (or IPs) of users who edited this page.
+
+ @param step: limit each API call to this number of revisions
+ @param total: iterate no more than this number of revisions in total
+
+ """
+ edits = self.getVersionHistory(step=step, total=total)
users = set([edit[2] for edit in edits])
return users
@@ -1000,7 +1049,7 @@
pywikibot.output(u'Moving %s to [[%s]].'
% (self.title(asLink=True), newtitle))
reason = pywikibot.input(u'Please enter a reason for the move:')
- # TODO: implement "safe" parameter (Is this necessary ?)
+ # TODO: implement "safe" parameter (Is this necessary ?)
# TODO: implement "sysop" parameter
return self.site().movepage(self, newtitle, reason,
movetalk=movetalkpage,
@@ -1035,21 +1084,28 @@
return self.site().deletepage(self, reason)
except pywikibot.NoUsername, e:
if mark:
- raise NotImplementedError("marking pages for deletions is not yet available.")
+ raise NotImplementedError(
+ "Marking pages for deletion is not yet available.")
raise e
-
- def loadDeletedRevisions(self):
+ # all these DeletedRevisions methods need to be reviewed and harmonized
+ # with the new framework; they do not appear functional
+ def loadDeletedRevisions(self, step=None, total=None):
"""Retrieve all deleted revisions for this Page from Special/Undelete.
Stores all revisions' timestamps, dates, editors and comments in
self._deletedRevs attribute.
- @return: list of timestamps (which can be used to retrieve revisions
- later on).
+ @return: iterator of timestamps (which can be used to retrieve
+ revisions later on).
"""
- return self.site().loadDeletedRevisions(self)
+ if not hasattr(self, "_deletedRevs"):
+ self._deletedRevs = {}
+ for item in self.site().deletedrevs(self, step=step, total=total):
+ for rev in item.get("revisions", []):
+ self._deletedRevs[rev['timestamp']] = rev
+ yield rev['timestamp']
def getDeletedRevision(self, timestamp, retrieveText=False):
"""Return a particular deleted revision by timestamp.
@@ -1060,8 +1116,17 @@
None.
"""
- return self.site().getDeletedRevision(self, timestamp,
- getText=retrieveText)
+ if hasattr(self, "_deletedRevs"):
+ if timestamp in self._deletedRevs and (
+ (not retrieveText)
+ or "content" in self._deletedRevs["timestamp"]):
+ return self._deletedRevs["timestamp"]
+ for item in self.site().deletedrevs(self, start=timestamp,
+ get_text=retrieveText, total=1):
+ # should only be one item with one revision
+ if item['title'] == self.title:
+ if "revisions" in item:
+ return item["revisions"][0]
def markDeletedRevision(self, timestamp, undelete=True):
"""Mark the revision identified by timestamp for undeletion.
@@ -1069,7 +1134,7 @@
@param undelete: if False, mark the revision to remain deleted.
"""
- if self._deletedRevs == None:
+ if not hasattr(self, "_deletedRevs"):
self.loadDeletedRevisions()
if timestamp not in self._deletedRevs:
#TODO: Throw an exception?
@@ -1149,14 +1214,14 @@
oldCat and newCat should be Category objects.
If newCat is None, the category will be removed.
-
+
comment: string to use as an edit summary
- sortKey: sortKey to use for the added category.
+ sortKey: sortKey to use for the added category.
Unused if newCat is None, or if inPlace=True
-
+
"""
- #TODO: is inPlace necessary?
+ #TODO: is inPlace necessary?
site = self.site()
changesMade = False
@@ -1405,11 +1470,16 @@
% (datetime, username, resolution, size, comment))
return u'{| border="1"\n! date/time || username || resolution || size || edit summary\n|----\n' + u'\n|----\n'.join(lines) + '\n|}'
- def usingPages(self):
- """Yield Pages on which the image is displayed."""
- return self.site().imageusage(self)
+ def usingPages(self, step=None, total=None):
+ """Yield Pages on which the image is displayed.
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in total
+ """
+ return self.site().imageusage(self, step=step, total=total)
+
+
class Category(Page):
"""A page in the Category: namespace"""
@@ -1447,7 +1517,7 @@
@deprecate_arg("startFrom", None)
@deprecate_arg("cacheResults", None)
- def subcategories(self, recurse=False):
+ def subcategories(self, recurse=False, step=None, total=None):
"""Iterate all subcategories of the current category.
@param recurse: if not False or 0, also iterate subcategories of
@@ -1455,28 +1525,46 @@
levels. (Example: recurse=1 will iterate direct subcats and
first-level sub-sub-cats, but no deeper.)
@type recurse: int or bool
+ @param step: limit each API call to this number of categories
+ @param total: iterate no more than this number of
+ subcategories in total (at all levels)
"""
if not isinstance(recurse, bool) and recurse:
recurse = recurse - 1
if not hasattr(self, "_subcats"):
self._subcats = []
- for member in self.site().categorymembers(self, namespaces=[14]):
+ for member in self.site().categorymembers(self, namespaces=[14],
+ step=step, total=total):
subcat = Category(self.site(), member.title())
self._subcats.append(subcat)
yield subcat
+ total -= 1
+ if not total:
+ return
if recurse:
- for item in subcat.subcategories(recurse):
+ for item in subcat.subcategories(recurse,
+ step=step, total=total):
yield item
+ total -= 1
+ if not total:
+ return
else:
for subcat in self._subcats:
yield subcat
+ total -= 1
+ if not total:
+ return
if recurse:
- for item in subcat.subcategories(recurse):
+ for item in subcat.subcategories(recurse,
+ step=step, total=total):
yield item
+ total -= 1
+ if not total:
+ return
@deprecate_arg("startFrom", None)
- def articles(self, recurse=False):
+ def articles(self, recurse=False, step=None, total=None):
"""
Yields all articles in the current category.
@@ -1485,31 +1573,50 @@
levels. (Example: recurse=1 will iterate articles in first-level
subcats, but no deeper.)
@type recurse: int or bool
+ @param step: limit each API call to this number of pages
+ @param total: iterate no more than this number of pages in
+ total (at all levels)
"""
namespaces = [x for x in self.site().namespaces()
if x>=0 and x!=14]
for member in self.site().categorymembers(self,
- namespaces=namespaces):
+ namespaces=namespaces,
+ step=step, total=total):
yield member
+ total -= 1
+ if not total:
+ return
if recurse:
if not isinstance(recurse, bool) and recurse:
recurse = recurse - 1
- for subcat in self.subcategories():
- for article in subcat.articles(recurse):
+ for subcat in self.subcategories(step=step):
+ for article in subcat.articles(recurse, step=step, total=total):
yield article
+ total -= 1
+ if not total:
+ return
- def members(self, recurse=False, namespaces=None):
+ def members(self, recurse=False, namespaces=None, step=None, total=None):
"""Yield all category contents (subcats, pages, and files)."""
- for member in self.site().categorymembers(self, namespaces):
+
+ for member in self.site().categorymembers(self, namespaces,
+ step=step, total=total):
yield member
+ total -= 1
+ if not total:
+ return
if recurse:
if not isinstance(recurse, bool) and recurse:
recurse = recurse - 1
- for subcat in self.subcategories():
- for article in subcat.members(recurse, namespaces):
+ for subcat in self.subcategories(step=step):
+ for article in subcat.members(recurse, namespaces, step=step,
+ total=total):
yield article
-
+ total -= 1
+ if not total:
+ return
+
def isEmptyCategory(self):
"""Return True if category has no members (including subcategories)."""
for member in self.site().categorymembers(self, total=1):
@@ -1523,7 +1630,7 @@
@param cat: New category title (without namespace) or Category object
@type cat: unicode or Category
@param message: message to use for category creation message
- If two %s are provided in message, will be replaced
+ If two %s are provided in message, will be replaced
by (self.title, authorsList)
@type message: unicode
@return: True if copying was successful, False if target page
@@ -1676,7 +1783,7 @@
u'''[^ %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\u0080-\uFFFF+]'''
# URL percent encoding sequences interfere with the ability
# to round-trip titles -- you can't link to them consistently.
- u'|%[0-9A-Fa-f]{2}'
+ u'|%[0-9A-Fa-f]{2}'
# XML/HTML character references produce similar issues.
u'|&[A-Za-z0-9\x80-\xff]+;'
u'|&#[0-9]+;'
@@ -1783,7 +1890,7 @@
def parse(self):
"""Parse text; called internally when accessing attributes"""
-
+
self._site = self._source
self._namespace = self._defaultns
t = self._text
@@ -2000,7 +2107,7 @@
link._site = page.site()
link._section = page.section()
link._namespace = page.namespace()
- link._title = page.title(withNamespace=False,
+ link._title = page.title(withNamespace=False,
allowInterwiki=False,
withSection=False)
link._anchor = None
@@ -2029,9 +2136,9 @@
link._namespace = ns
title = t
link._title = title
-
- return link
+ return link
+
# Utility functions for parsing page titles
def html2unicode(text, ignore = []):