Revision: 5268 Author: russblau Date: 2008-04-24 13:50:10 +0000 (Thu, 24 Apr 2008)
Log Message: ----------- further implementation of getrevisions; documentation; refactoring.
Modified Paths: -------------- branches/rewrite/pywikibot/README-conversion.txt branches/rewrite/pywikibot/__init__.py branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/README-conversion.txt =================================================================== --- branches/rewrite/pywikibot/README-conversion.txt 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/README-conversion.txt 2008-04-24 13:50:10 UTC (rev 5268) @@ -12,15 +12,27 @@ bot.
With pywikipedia scripts were importing "wikipedia" or "pagegenerators" -librairies; pywikibot is now written as a standard module. +libraries; pywikibot is now written as a standard package, and other modules +are contained within it (e.g., pywikibot.site contains Site classes). However, +most commonly-used names are imported into the pywikibot namespace, so that +module names don't need to be used unless specified in the documentation. + (To use it, just import "pywikibot", assuming that pywikibot/ is in sys.path)
== Python librairies ==
-You will need, to run pywikibot, httplib2 and setuptools -* httplib2 : http://code.google.com/p/httplib2/ +[Note: the goal will be to package pywikibot with setuptools easy_install, +so that these dependencies will be loaded automatically when the package is +installed, and users won't need to worry about this...] + +To run pywikibot, you will need the httplib2, simplejson, and setuptools packages-- +* httplib2 : http://code.google.com/p/httplib2/ * setuptools : http://pypi.python.org/pypi/setuptools/ +* simplejson : http://https://svn.red-bean.com/bob/simplejson/tags/simplejson-1.7.1/docs/in...
+or, if you already have setuptools installed, just execute 'easy_install httplib2' +and 'easy_install simplejson' + If you run into errors involving httplib2.urlnorm, update httplib2 to 0.4.0 (Ubuntu package python-httlib2 for example, is outdated)
Modified: branches/rewrite/pywikibot/__init__.py =================================================================== --- branches/rewrite/pywikibot/__init__.py 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/__init__.py 2008-04-24 13:50:10 UTC (rev 5268) @@ -60,7 +60,7 @@
getSite = Site # alias for backwards-compability
-from page import Page, ImagePage, Category +from page import Page, ImagePage, Category, Link
# DEBUG
Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/data/api.py 2008-04-24 13:50:10 UTC (rev 5268) @@ -329,22 +329,7 @@
""" p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns']) - if "pageid" in pagedata: - self._pageid = int(pagedata['pageid']) - elif "missing" in pagedata: - self._pageid = 0 # Non-existent page - else: - raise AssertionError( - "Page %s has neither 'pageid' nor 'missing' attribute" - % pagedata['title']) - if 'lastrevid' in pagedata: - p._revid = pagedata['lastrevid'] - if 'touched' in pagedata: - p._timestamp = pagedata['touched'] - if 'protection' in pagedata: - p._protection = {} - for item in pagedata['protection']: - p._protection[item['type']] = item['level'] + update_page(p, pagedata) return p
@@ -366,8 +351,12 @@
class PropertyGenerator(object): - """Generator for queries of type action=query&property=...""" + """Generator for queries of type action=query&property=...
+ Note that this generator yields one or more dict object(s) corresponding + to each "page" item(s) from the API response; the calling module has to + decide what to do with the contents of the dict.""" + def __init__(self, prop, **kwargs): """ Required and optional parameters are as for C{Request}, except that @@ -384,7 +373,7 @@ if self.limits[prop] and kwargs.pop("getAll", False): self.request['g'+self.limits[generator]] = "max" self.site = self.request.site - self.resultkey = prop # element to look for in result + self.resultkey = prop
# dict mapping property types to their limit parameter names limits = {'revisions': 'rvlimit', @@ -410,21 +399,8 @@ if not ("query" in self.data and "pages" in self.data["query"]): raise StopIteration pagedata = self.data["query"]["pages"].values() - assert len(pagedata)==1 - pagedata = pagedata[0] - if not self.resultkey in pagedata: - raise StopIteration - if isinstance(pagedata[self.resultkey], dict): - for v in pagedata[self.resultkey].itervalues(): - yield v - elif isinstance(pagedata[self.resultkey], list): - for v in pagedata[self.resultkey]: - yield v - else: - raise APIError("Unknown", - "Unknown format in ['%s'] value." - % self.resultkey, - data=pagedata[self.resultkey]) + for item in pagedata: + yield item if not "query-continue" in self.data: return if not self.resultkey in self.data["query-continue"]: @@ -472,6 +448,32 @@ pywikibot.cookie_jar.save()
+def update_page(page, pagedict): + """Update attributes of Page object page, based on query data in pagequery + + @param page: object to be updated + @type page: Page + @param pagedict: the contents of a "page" element of a query response + @type pagedict: dict + + """ + if "pageid" in pagedict: + page._pageid = int(pagedict['pageid']) + elif "missing" in pagedict: + page._pageid = 0 # Non-existent page + else: + raise AssertionError( + "Page %s has neither 'pageid' nor 'missing' attribute" + % pagedict['title']) + if 'lastrevid' in pagedict: + page._revid = pagedict['lastrevid'] + if 'touched' in pagedict: + page._timestamp = pagedict['touched'] + if 'protection' in pagedict: + page._protection = {} + for item in pagedict['protection']: + page._protection[item['type']] = item['level'], item['expiry'] + if __name__ == "__main__": from pywikibot import Site logging.getLogger().setLevel(logging.DEBUG)
Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/page.py 2008-04-24 13:50:10 UTC (rev 5268) @@ -678,8 +678,8 @@ limit = None else: limit = revCount - return self.site().getrevisions(self, withText=False, - older=not reverseOrder, limit=limit) + return self.site().getrevisions(self, getText=False, + rvdir=not reverseOrder, limit=limit)
def getVersionHistoryTable(self, forceReload=False, reverseOrder=False, getAll=False, revCount=500):
Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/site.py 2008-04-24 13:50:10 UTC (rev 5268) @@ -491,8 +491,12 @@ "info", inprop="protection|talkid|subjectid", titles=page.title(withSection=False ).encode(self.encoding())) - for item in query(): - pass #FIXME + for pageitem in query: + if pageitem['title'] != page.title(withSection=False): + raise RuntimeError( + "page_exists: Query on %s returned data on '%s'" + % (page, pageitem['title'])) + page._pageid = pageitem['pageid'] return page._pageid > 0
# following group of methods map more-or-less directly to API queries @@ -621,30 +625,80 @@ return cmgen
def getrevisions(self, page=None, getText=False, revids=None, - older=True, limit=None, sysop=False, user=None, - excludeuser=None): + limit=None, startid=None, endid=None, starttime=None, + endtime=None, rvdir=None, user=None, excludeuser=None, + section=None, sysop=False): """Retrieve and store revision information.
- @param page: retrieve the history of this Page (required unless ids + By default, retrieves the last (current) revision of the page, + I{unless} any of the optional parameters revids, startid, endid, + starttime, endtime, rvdir, user, excludeuser, or limit are + specified. Unless noted below, all parameters not specified + default to False. + + If rvdir is False or not specified, startid must be greater than + endid if both are specified; likewise, starttime must be greater + than endtime. If rvdir is True, these relationships are reversed. + + @param page: retrieve revisions of this Page (required unless ids is specified) - @param getText: if True, retrieve the wiki-text of each revision as - well + @param getText: if True, retrieve the wiki-text of each revision; + otherwise, only retrieve the revision metadata (default) + @param section: if specified, retrieve only this section of the text + (getText must be True); section must be given by number (top of + the article is section 0), not name + @type section: int @param revids: retrieve only the specified revision ids (required unless page is specified) - @param older: if True, retrieve newest revisions first; otherwise, - retrieve oldest revisions first - @param limit: if specified, retrieve no more than this number of - revisions (defaults to latest revision only) + @type revids: list of ints + @param limit: Retrieve no more than this number of revisions @type limit: int + @param startid: retrieve revisions starting with this revid + @param endid: stop upon retrieving this revid + @param starttime: retrieve revisions starting at this timestamp + @param endtime: stop upon reaching this timestamp + @param rvdir: if false, retrieve newest revisions first (default); + if true, retrieve earliest first @param user: retrieve only revisions authored by this user @param excludeuser: retrieve all revisions not authored by this user @param sysop: if True, switch to sysop account (if available) to retrieve this page
""" + latest = (revids is None and + startid is None and + endid is None and + starttime is None and + endtime is None and + rvdir is None and + user is None and + excludeuser is None and + limit is None) # if True, we are retrieving current revision + + # check for invalid argument combinations if page is None and revids is None: raise ValueError( - "getrevisions needs either page or revids argument.") + "getrevisions: either page or revids argument required") + if (startid is not None or endid is not None) and \ + (starttime is not None or endtime is not None): + raise ValueError( + "getrevisions: startid/endid combined with starttime/endtime") + if starttime is not None and endtime is not None: + if rvdir and starttime >= endtime: + raise ValueError( + "getrevisions: starttime > endtime with rvdir=True") + if (not rvdir) and endtime >= starttime: + raise ValueError( + "getrevisions: endtime > starttime with rvdir=False") + if startid is not None and endid is not None: + if rvdir and startid >= endid: + raise ValueError( + "getrevisions: startid > endid with rvdir=True") + if (not rvdir) and endid >= startid: + raise ValueError( + "getrevisions: endid > startid with rvdir=False") + + # assemble API request if revids is None: rvtitle = page.title(withSection=False).encode(self.encoding()) rvgen = api.PropertyGenerator(u"revisions", titles=rvtitle) @@ -654,28 +708,50 @@ if getText: rvgen.request[u"rvprop"] = \ u"ids|flags|timestamp|user|comment|content" - if page.section(): - rvgen.request[u"rvsection"] = unicode(page.section()) + if section is not None: + rvgen.request[u"rvsection"] = unicode(section) if limit: rvgen.request[u"rvlimit"] = unicode(limit) - if not older: + if rvdir: rvgen.request[u"rvdir"] = u"newer" + elif rvdir is not None: + rvgen.request[u"rvdir"] = u"older" + if startid: + rvgen.request[u"rvstartid"] = startid + if endid: + rvgen.request[u"rvendid"] = endid + if starttime: + rvgen.request[u"rvstart"] = starttime + if endtime: + rvgen.request[u"rvend"] = endtime if user: rvgen.request[u"rvuser"] = user elif excludeuser: rvgen.request[u"rvexcludeuser"] = excludeuser - # TODO if sysop: - for rev in rvgen: - revision = pywikibot.page.Revision(revid=rev['revid'], - timestamp=rev['timestamp'], - user=rev['user'], - anon=rev.has_key('anon'), - comment=rev.get('comment', u''), - minor=rev.has_key('minor'), - text=rev.get('*', None)) - page._revisions[revision.revid] = revision - if revids is None and limit is None and user is None and excludeuser is None: - page._revid = revision.revid + # TODO if sysop: something + for pagedata in rvgen: + if page is not None: + if pagedata['title'] != page.title(withSection=False): + raise RuntimeError( + "getrevisions: Query on %s returned data on '%s'" + % (page, pagedata['title'])) + else: + page = Page(self, pagedata['title']) + api.update_page(page, pagedata) + + for rev in pagedata['revisions']: + revision = pywikibot.page.Revision( + revid=rev['revid'], + timestamp=rev['timestamp'], + user=rev['user'], + anon=rev.has_key('anon'), + comment=rev.get('comment', u''), + minor=rev.has_key('minor'), + text=rev.get('*', None) + ) + page._revisions[revision.revid] = revision + if latest: + page._revid = revision.revid
#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
pywikipedia-l@lists.wikimedia.org