Revision: 5268
Author: russblau
Date: 2008-04-24 13:50:10 +0000 (Thu, 24 Apr 2008)
Log Message:
-----------
further implementation of getrevisions; documentation; refactoring.
Modified Paths:
--------------
branches/rewrite/pywikibot/README-conversion.txt
branches/rewrite/pywikibot/__init__.py
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/README-conversion.txt
===================================================================
--- branches/rewrite/pywikibot/README-conversion.txt 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/README-conversion.txt 2008-04-24 13:50:10 UTC (rev 5268)
@@ -12,15 +12,27 @@
bot.
With pywikipedia scripts were importing "wikipedia" or
"pagegenerators"
-librairies; pywikibot is now written as a standard module.
+libraries; pywikibot is now written as a standard package, and other modules
+are contained within it (e.g., pywikibot.site contains Site classes). However,
+most commonly-used names are imported into the pywikibot namespace, so that
+module names don't need to be used unless specified in the documentation.
+
(To use it, just import "pywikibot", assuming that pywikibot/ is in sys.path)
== Python librairies ==
-You will need, to run pywikibot, httplib2 and setuptools
-* httplib2 :
http://code.google.com/p/httplib2/
+[Note: the goal will be to package pywikibot with setuptools easy_install,
+so that these dependencies will be loaded automatically when the package is
+installed, and users won't need to worry about this...]
+
+To run pywikibot, you will need the httplib2, simplejson, and setuptools packages--
+* httplib2 :
http://code.google.com/p/httplib2/
* setuptools :
http://pypi.python.org/pypi/setuptools/
+* simplejson :
http://https://svn.red-bean.com/bob/simplejson/tags/simplejson-1.7.1/docs/i…
+or, if you already have setuptools installed, just execute 'easy_install
httplib2'
+and 'easy_install simplejson'
+
If you run into errors involving httplib2.urlnorm, update httplib2 to
0.4.0 (Ubuntu package python-httlib2 for example, is outdated)
Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/__init__.py 2008-04-24 13:50:10 UTC (rev 5268)
@@ -60,7 +60,7 @@
getSite = Site # alias for backwards-compability
-from page import Page, ImagePage, Category
+from page import Page, ImagePage, Category, Link
# DEBUG
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/data/api.py 2008-04-24 13:50:10 UTC (rev 5268)
@@ -329,22 +329,7 @@
"""
p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns'])
- if "pageid" in pagedata:
- self._pageid = int(pagedata['pageid'])
- elif "missing" in pagedata:
- self._pageid = 0 # Non-existent page
- else:
- raise AssertionError(
- "Page %s has neither 'pageid' nor 'missing'
attribute"
- % pagedata['title'])
- if 'lastrevid' in pagedata:
- p._revid = pagedata['lastrevid']
- if 'touched' in pagedata:
- p._timestamp = pagedata['touched']
- if 'protection' in pagedata:
- p._protection = {}
- for item in pagedata['protection']:
- p._protection[item['type']] = item['level']
+ update_page(p, pagedata)
return p
@@ -366,8 +351,12 @@
class PropertyGenerator(object):
- """Generator for queries of type
action=query&property=..."""
+ """Generator for queries of type action=query&property=...
+ Note that this generator yields one or more dict object(s) corresponding
+ to each "page" item(s) from the API response; the calling module has to
+ decide what to do with the contents of the dict."""
+
def __init__(self, prop, **kwargs):
"""
Required and optional parameters are as for C{Request}, except that
@@ -384,7 +373,7 @@
if self.limits[prop] and kwargs.pop("getAll", False):
self.request['g'+self.limits[generator]] = "max"
self.site = self.request.site
- self.resultkey = prop # element to look for in result
+ self.resultkey = prop
# dict mapping property types to their limit parameter names
limits = {'revisions': 'rvlimit',
@@ -410,21 +399,8 @@
if not ("query" in self.data and "pages" in
self.data["query"]):
raise StopIteration
pagedata = self.data["query"]["pages"].values()
- assert len(pagedata)==1
- pagedata = pagedata[0]
- if not self.resultkey in pagedata:
- raise StopIteration
- if isinstance(pagedata[self.resultkey], dict):
- for v in pagedata[self.resultkey].itervalues():
- yield v
- elif isinstance(pagedata[self.resultkey], list):
- for v in pagedata[self.resultkey]:
- yield v
- else:
- raise APIError("Unknown",
- "Unknown format in ['%s'] value."
- % self.resultkey,
- data=pagedata[self.resultkey])
+ for item in pagedata:
+ yield item
if not "query-continue" in self.data:
return
if not self.resultkey in self.data["query-continue"]:
@@ -472,6 +448,32 @@
pywikibot.cookie_jar.save()
+def update_page(page, pagedict):
+ """Update attributes of Page object page, based on query data in
pagequery
+
+ @param page: object to be updated
+ @type page: Page
+ @param pagedict: the contents of a "page" element of a query response
+ @type pagedict: dict
+
+ """
+ if "pageid" in pagedict:
+ page._pageid = int(pagedict['pageid'])
+ elif "missing" in pagedict:
+ page._pageid = 0 # Non-existent page
+ else:
+ raise AssertionError(
+ "Page %s has neither 'pageid' nor 'missing'
attribute"
+ % pagedict['title'])
+ if 'lastrevid' in pagedict:
+ page._revid = pagedict['lastrevid']
+ if 'touched' in pagedict:
+ page._timestamp = pagedict['touched']
+ if 'protection' in pagedict:
+ page._protection = {}
+ for item in pagedict['protection']:
+ page._protection[item['type']] = item['level'],
item['expiry']
+
if __name__ == "__main__":
from pywikibot import Site
logging.getLogger().setLevel(logging.DEBUG)
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/page.py 2008-04-24 13:50:10 UTC (rev 5268)
@@ -678,8 +678,8 @@
limit = None
else:
limit = revCount
- return self.site().getrevisions(self, withText=False,
- older=not reverseOrder, limit=limit)
+ return self.site().getrevisions(self, getText=False,
+ rvdir=not reverseOrder, limit=limit)
def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
getAll=False, revCount=500):
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/site.py 2008-04-24 13:50:10 UTC (rev 5268)
@@ -491,8 +491,12 @@
"info",
inprop="protection|talkid|subjectid",
titles=page.title(withSection=False
).encode(self.encoding()))
- for item in query():
- pass #FIXME
+ for pageitem in query:
+ if pageitem['title'] != page.title(withSection=False):
+ raise RuntimeError(
+ "page_exists: Query on %s returned data on
'%s'"
+ % (page, pageitem['title']))
+ page._pageid = pageitem['pageid']
return page._pageid > 0
# following group of methods map more-or-less directly to API queries
@@ -621,30 +625,80 @@
return cmgen
def getrevisions(self, page=None, getText=False, revids=None,
- older=True, limit=None, sysop=False, user=None,
- excludeuser=None):
+ limit=None, startid=None, endid=None, starttime=None,
+ endtime=None, rvdir=None, user=None, excludeuser=None,
+ section=None, sysop=False):
"""Retrieve and store revision information.
- @param page: retrieve the history of this Page (required unless ids
+ By default, retrieves the last (current) revision of the page,
+ I{unless} any of the optional parameters revids, startid, endid,
+ starttime, endtime, rvdir, user, excludeuser, or limit are
+ specified. Unless noted below, all parameters not specified
+ default to False.
+
+ If rvdir is False or not specified, startid must be greater than
+ endid if both are specified; likewise, starttime must be greater
+ than endtime. If rvdir is True, these relationships are reversed.
+
+ @param page: retrieve revisions of this Page (required unless ids
is specified)
- @param getText: if True, retrieve the wiki-text of each revision as
- well
+ @param getText: if True, retrieve the wiki-text of each revision;
+ otherwise, only retrieve the revision metadata (default)
+ @param section: if specified, retrieve only this section of the text
+ (getText must be True); section must be given by number (top of
+ the article is section 0), not name
+ @type section: int
@param revids: retrieve only the specified revision ids (required
unless page is specified)
- @param older: if True, retrieve newest revisions first; otherwise,
- retrieve oldest revisions first
- @param limit: if specified, retrieve no more than this number of
- revisions (defaults to latest revision only)
+ @type revids: list of ints
+ @param limit: Retrieve no more than this number of revisions
@type limit: int
+ @param startid: retrieve revisions starting with this revid
+ @param endid: stop upon retrieving this revid
+ @param starttime: retrieve revisions starting at this timestamp
+ @param endtime: stop upon reaching this timestamp
+ @param rvdir: if false, retrieve newest revisions first (default);
+ if true, retrieve earliest first
@param user: retrieve only revisions authored by this user
@param excludeuser: retrieve all revisions not authored by this user
@param sysop: if True, switch to sysop account (if available) to
retrieve this page
"""
+ latest = (revids is None and
+ startid is None and
+ endid is None and
+ starttime is None and
+ endtime is None and
+ rvdir is None and
+ user is None and
+ excludeuser is None and
+ limit is None) # if True, we are retrieving current revision
+
+ # check for invalid argument combinations
if page is None and revids is None:
raise ValueError(
- "getrevisions needs either page or revids argument.")
+ "getrevisions: either page or revids argument required")
+ if (startid is not None or endid is not None) and \
+ (starttime is not None or endtime is not None):
+ raise ValueError(
+ "getrevisions: startid/endid combined with starttime/endtime")
+ if starttime is not None and endtime is not None:
+ if rvdir and starttime >= endtime:
+ raise ValueError(
+ "getrevisions: starttime > endtime with rvdir=True")
+ if (not rvdir) and endtime >= starttime:
+ raise ValueError(
+ "getrevisions: endtime > starttime with rvdir=False")
+ if startid is not None and endid is not None:
+ if rvdir and startid >= endid:
+ raise ValueError(
+ "getrevisions: startid > endid with rvdir=True")
+ if (not rvdir) and endid >= startid:
+ raise ValueError(
+ "getrevisions: endid > startid with rvdir=False")
+
+ # assemble API request
if revids is None:
rvtitle = page.title(withSection=False).encode(self.encoding())
rvgen = api.PropertyGenerator(u"revisions", titles=rvtitle)
@@ -654,28 +708,50 @@
if getText:
rvgen.request[u"rvprop"] = \
u"ids|flags|timestamp|user|comment|content"
- if page.section():
- rvgen.request[u"rvsection"] = unicode(page.section())
+ if section is not None:
+ rvgen.request[u"rvsection"] = unicode(section)
if limit:
rvgen.request[u"rvlimit"] = unicode(limit)
- if not older:
+ if rvdir:
rvgen.request[u"rvdir"] = u"newer"
+ elif rvdir is not None:
+ rvgen.request[u"rvdir"] = u"older"
+ if startid:
+ rvgen.request[u"rvstartid"] = startid
+ if endid:
+ rvgen.request[u"rvendid"] = endid
+ if starttime:
+ rvgen.request[u"rvstart"] = starttime
+ if endtime:
+ rvgen.request[u"rvend"] = endtime
if user:
rvgen.request[u"rvuser"] = user
elif excludeuser:
rvgen.request[u"rvexcludeuser"] = excludeuser
- # TODO if sysop:
- for rev in rvgen:
- revision = pywikibot.page.Revision(revid=rev['revid'],
- timestamp=rev['timestamp'],
- user=rev['user'],
- anon=rev.has_key('anon'),
- comment=rev.get('comment',
u''),
- minor=rev.has_key('minor'),
- text=rev.get('*', None))
- page._revisions[revision.revid] = revision
- if revids is None and limit is None and user is None and excludeuser is
None:
- page._revid = revision.revid
+ # TODO if sysop: something
+ for pagedata in rvgen:
+ if page is not None:
+ if pagedata['title'] != page.title(withSection=False):
+ raise RuntimeError(
+ "getrevisions: Query on %s returned data on
'%s'"
+ % (page, pagedata['title']))
+ else:
+ page = Page(self, pagedata['title'])
+ api.update_page(page, pagedata)
+
+ for rev in pagedata['revisions']:
+ revision = pywikibot.page.Revision(
+ revid=rev['revid'],
+ timestamp=rev['timestamp'],
+ user=rev['user'],
+ anon=rev.has_key('anon'),
+ comment=rev.get('comment',
u''),
+ minor=rev.has_key('minor'),
+ text=rev.get('*', None)
+ )
+ page._revisions[revision.revid] = revision
+ if latest:
+ page._revid = revision.revid
#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####