Revision: 5204
Author: russblau
Date: 2008-04-11 20:29:11 +0000 (Fri, 11 Apr 2008)
Log Message:
-----------
implemented getrevisions() [incomplete], and made modest changes elsewhere
Modified Paths:
--------------
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/date.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
branches/rewrite/pywikibot/throttle.py
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-04-11 13:36:23 UTC (rev 5203)
+++ branches/rewrite/pywikibot/data/api.py 2008-04-11 20:29:11 UTC (rev 5204)
@@ -145,19 +145,23 @@
while True:
# TODO catch http errors
try:
- if self.params.get("action", "") in ("login",):
- rawdata = http.request(self.site, uri, method="POST",
- headers={'Content-Type':
- 'application/x-www-form-urlencoded'},
- body=params)
- else:
- uri = uri + "?" + params
- rawdata = http.request(self.site, uri)
- except Exception, e: #TODO: what exceptions can occur here?
- logging.warning(traceback.format_exc())
- print uri, params
- self.wait()
- continue
+ self.site.sitelock.acquire()
+ try:
+ if self.params.get("action", "") in ("login",):
+ rawdata = http.request(self.site, uri, method="POST",
+ headers={'Content-Type':
+ 'application/x-www-form-urlencoded'},
+ body=params)
+ else:
+ uri = uri + "?" + params
+ rawdata = http.request(self.site, uri)
+ except Exception, e: #TODO: what exceptions can occur here?
+ logging.warning(traceback.format_exc())
+ print uri, params
+ self.wait()
+ continue
+ finally:
+ self.site.sitelock.release()
if rawdata.startswith(u"unknown_action"):
raise APIError(rawdata[:14], rawdata[16:])
try:
@@ -197,7 +201,7 @@
if lag:
logging.info(
"Pausing due to database lag: " + info)
- self.wait(int(lag.group("lag")))
+ self.lag_wait(int(lag.group("lag")))
continue
if code in (u'internal_api_error_DBConnectionError', ):
self.wait()
@@ -208,25 +212,32 @@
except TypeError:
raise RuntimeError(result)
- def wait(self, lag=None):
+ def wait(self):
"""Determine how long to wait after a failed request."""
self.max_retries -= 1
if self.max_retries < 0:
raise TimeoutError("Maximum retries attempted without success.")
- wait = self.retry_wait
- if lag is not None:
- # in case of database lag, wait half the lag time,
- # but not less than 5 or more than 120 seconds
- wait = max(5, min(lag // 2, 120))
logging.warn("Waiting %s seconds before retrying." % wait)
- time.sleep(wait)
- if lag is None:
- self.retry_wait = min(120, self.retry_wait * 2)
+ time.sleep(self.retry_wait)
+ # double the next wait, but do not exceed 120 seconds
+ self.retry_wait = min(120, self.retry_wait * 2)
+ def lag_wait(self, lag):
+ """Wait due to server lag."""
+ # unlike regular wait, this shuts down all access to site
+ self.site.sitelock.acquire()
+ try:
+ # wait at least 5 seconds, no more than 120
+ wait = max(5, min(120, lag//2))
+ logging.warn("Pausing %s seconds due to server lag." % wait)
+ time.sleep(wait)
+ finally:
+ self.site.sitelock.release()
+
class PageGenerator(object):
"""Iterator for response to a request of type action=query&generator=foo."""
- def __init__(self, generator="", **kwargs):
+ def __init__(self, generator, **kwargs):
"""
Required and optional parameters are as for C{Request}, except that
action=query is assumed and generator is required.
@@ -235,8 +246,6 @@
@type generator: str
"""
- if not generator:
- raise ValueError("generator argument is required.")
if generator not in self.limits:
raise ValueError("Unrecognized generator '%s'" % generator)
self.request = Request(action="query", generator=generator, **kwargs)
@@ -261,7 +270,6 @@
self.resultkey = "pages" # element to look for in result
# dict mapping generator types to their limit parameter names
-
limits = {'links': None,
'images': None,
'templates': None,
@@ -348,6 +356,75 @@
return image
+class PropertyGenerator(object):
+ """Generator for queries of type action=query&property=..."""
+
+ def __init__(self, prop, **kwargs):
+ """
+ Required and optional parameters are as for C{Request}, except that
+ action=query is assumed and prop is required.
+
+ @param prop: the "property=" type from api.php
+ @type prop: str
+
+ """
+ self.request = Request(action="query", prop=prop, **kwargs)
+ if prop not in self.limits:
+ raise ValueError("Unrecognized property '%s'" % prop)
+ # set limit to max, if applicable
+ if self.limits[prop] and kwargs.pop("getAll", False):
+ self.request['g'+self.limits[generator]] = "max"
+ self.site = self.request.site
+ self.resultkey = prop # element to look for in result
+
+ # dict mapping property types to their limit parameter names
+ limits = {'revisions': 'rvlimit',
+ 'imageinfo': 'iilimit',
+ 'info': None,
+ 'links': None,
+ 'langlinks': None,
+ 'images': None,
+ 'imageinfo': None,
+ 'templates': None,
+ 'categories': None,
+ 'extlinks': None,
+ }
+
+ def __iter__(self):
+ """Iterate objects for elements found in response."""
+ # this looks for the resultkey ''inside'' a <page> entry
+ while True:
+ self.site.get_throttle()
+ self.data = self.request.submit()
+ if not self.data or not isinstance(self.data, dict):
+ raise StopIteration
+ if not ("query" in self.data and "pages" in self.data["query"]):
+ raise StopIteration
+ pagedata = self.data["query"]["pages"].values()
+ assert len(pagedata)==1
+ pagedata = pagedata[0]
+ if not self.resultkey in pagedata:
+ raise StopIteration
+ if isinstance(pagedata[self.resultkey], dict):
+ for v in pagedata[self.resultkey].itervalues():
+ yield v
+ elif isinstance(pagedata[self.resultkey], list):
+ for v in pagedata[self.resultkey]:
+ yield v
+ else:
+ raise APIError("Unknown",
+ "Unknown format in ['%s'] value."
+ % self.resultkey,
+ data=pagedata[self.resultkey])
+ if not "query-continue" in self.data:
+ return
+ if not self.resultkey in self.data["query-continue"]:
+ raise APIError("Unknown",
+ "Missing '%s' key in ['query-continue'] value.",
+ data=self.data["query-continue"])
+ self.request.update(self.data["query-continue"][self.resultkey])
+
+
class LoginManager(login.LoginManager):
"""Supplies getCookie() method to use API interface."""
def getCookie(self, remember=True, captchaId=None, captchaAnswer=None):
Modified: branches/rewrite/pywikibot/date.py
===================================================================
--- branches/rewrite/pywikibot/date.py 2008-04-11 13:36:23 UTC (rev 5203)
+++ branches/rewrite/pywikibot/date.py 2008-04-11 20:29:11 UTC (rev 5204)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
"""
This file is not runnable, but it only consists of various
lists which are required by some other programs.
@@ -17,9 +17,7 @@
# used for date recognition
import types
import re
-import wikipedia
-
#
# Different collections of well known formats
#
@@ -1523,7 +1521,7 @@
"""
"""
for s in makeMonthNamedList( lang, pattern, capitalize ):
- wikipedia.output( s )
+ print( s )
def testMapEntry( formatName, showAll = True, value = None ):
@@ -1542,7 +1540,7 @@
if value is not None:
start, stop = value, value+1
if showAll:
- wikipedia.output(u"Processing %s with limits from %d to %d and step %d" % (formatName, start,stop-1,step))
+ print(u"Processing %s with limits from %d to %d and step %d" % (formatName, start,stop-1,step))
for code, convFunc in formats[formatName].iteritems():
# import time
@@ -1555,18 +1553,21 @@
if newValue != value:
raise AssertionError(" %s != %s: assert failed, values didn't match" % (newValue, value))
if showAll:
- wikipedia.output(u"date.formats['%s']['%s'](%d): '%s' -> %d" % (formatName, code, value, convFunc(value), newValue))
+ print(u"date.formats['%s']['%s'](%d): '%s' -> %d" % (formatName, code, value, convFunc(value), newValue))
except:
- wikipedia.output(u"********** Error in date.formats['%s']['%s'](%d)" % (formatName, code, value))
+ print(u"********** Error in date.formats['%s']['%s'](%d)" % (formatName, code, value))
raise
-# wikipedia.output( u"%s\t%s\t%f" % (formatName, code, time.clock() - startClock) )
+# print( u"%s\t%s\t%f" % (formatName, code, time.clock() - startClock) )
def test(quick = False, showAll = False):
- """This is a test function, to be used interactivelly to test entire format convesion map at once
+ """This is a test function, to be used interactively to test entire
+ format conversion map at once
+
Usage example:
run python interpreter
>>> import date
>>> date.test()
+
"""
for formatName in formats.keys():
@@ -1574,13 +1575,13 @@
testMapEntry( formatName, showAll, formatLimits[formatName][1] ) # Only test the first value in the test range
else:
testMapEntry( formatName, showAll ) # Extensive test! # Test decade rounding
- wikipedia.output(u"'%s' complete." % formatName)
+ print(u"'%s' complete." % formatName)
if quick:
- #wikipedia.output(u'Date module quick consistency test passed')
+ #print(u'Date module quick consistency test passed')
pass
else:
- wikipedia.output(u'Date module has been fully tested')
+ print(u'Date module has been fully tested')
#
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-04-11 13:36:23 UTC (rev 5203)
+++ branches/rewrite/pywikibot/page.py 2008-04-11 20:29:11 UTC (rev 5204)
@@ -283,7 +283,7 @@
raise self._getexception
if force or not hasattr(self, "_revid") \
or not self._revid in self._revisions:
- self.site().getrevisions(self, getText=True, ids=None, sysop=sysop)
+ self.site().getrevisions(self, getText=True, sysop=sysop)
# TODO: Exception handling for no-page, redirects, etc.
return self._revisions[self._revid].text
@@ -307,7 +307,8 @@
"Page.getOldVersion(change_edit_time) option is deprecated.")
if force or not oldid in self._revisions:
self.site().getrevisions(self, getText=True, ids=oldid,
- redirs=get_redirect, sysop=sysop)
+ sysop=sysop)
+ # TODO: what about redirects, errors?
return self._revisions[oldid].text
def permalink(self):
@@ -678,7 +679,7 @@
else:
limit = revCount
return self.site().getrevisions(self, withText=False,
- older=reverseOrder, limit=limit)
+ older=not reverseOrder, limit=limit)
def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
getAll=False, revCount=500):
@@ -701,8 +702,7 @@
@return: A generator that yields tuples consisting of revision ID,
edit date/time, user name and content
"""
- return self.site().getrevisions(self, withText=True,
- older=reverseOrder, limit=None)
+ return self.site().getrevisions(self, withText=True)
def contributingUsers(self):
"""Return a set of usernames (or IPs) of users who edited this page."""
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-04-11 13:36:23 UTC (rev 5203)
+++ branches/rewrite/pywikibot/site.py 2008-04-11 20:29:11 UTC (rev 5204)
@@ -100,11 +100,12 @@
self._username = user
# following are for use with lock_page and unlock_page methods
- self._mutex = threading.Lock()
+ self._pagemutex = threading.Lock()
self._locked_pages = []
pt_min = min(config.minthrottle, config.put_throttle)
- self.put_throttle = Throttle(self, pt_min, config.maxthrottle)
+ self.put_throttle = Throttle(self, pt_min, config.maxthrottle,
+ verbosedelay=True)
self.put_throttle.setDelay(config.put_throttle)
gt_min = min(config.minthrottle, config.get_throttle)
@@ -203,7 +204,6 @@
else:
return self.family().redirect.get(self.language(), None)
-
def lock_page(self, page, block=True):
"""Lock page for writing. Must be called before writing any page.
@@ -216,7 +216,7 @@
otherwise, raise an exception if page can't be locked
"""
- self._mutex.acquire()
+ self._pagemutex.acquire()
try:
while page in self._locked_pages:
if not block:
@@ -224,7 +224,7 @@
time.sleep(.25)
self._locked_pages.append(page.title(withSection=False))
finally:
- self._mutex.release()
+ self._pagemutex.release()
def unlock_page(self, page):
"""Unlock page. Call as soon as a write operation has completed.
@@ -233,11 +233,11 @@
@type page: pywikibot.Page
"""
- self._mutex.acquire()
+ self._pagemutex.acquire()
try:
self._locked_pages.remove(page.title(withSection=False))
finally:
- self._mutex.release()
+ self._pagemutex.release()
class APISite(BaseSite):
@@ -338,6 +338,7 @@
14: [u"Category"],
15: [u"Category talk"],
}
+ self.sitelock = threading.Lock()
return
# ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__()
@@ -600,14 +601,71 @@
"Cannot get category members of non-Category page '%s'"
% category.title())
cmtitle = category.title(withSection=False)
- cmgen = api.PageGenerator("categorymembers", gcmtitle=cmtitle,
+ cmgen = api.PageGenerator(u"categorymembers", gcmtitle=cmtitle,
gcmprop="ids|title|sortkey")
if namespaces is not None:
- cmgen.request["gcmnamespace"] = u"|".join(unicode(ns)
+ cmgen.request[u"gcmnamespace"] = u"|".join(unicode(ns)
for ns in namespaces)
return cmgen
+ def getrevisions(self, page=None, getText=False, revids=None,
+ older=True, limit=None, sysop=False, user=None,
+ excludeuser=None):
+ """Retrieve and store revision information.
+ @param page: retrieve the history of this Page (required unless ids
+ is specified)
+ @param getText: if True, retrieve the wiki-text of each revision as
+ well
+ @param revids: retrieve only the specified revision ids (required
+ unless page is specified)
+ @param older: if True, retrieve newest revisions first; otherwise,
+ retrieve oldest revisions first
+ @param limit: if specified, retrieve no more than this number of
+ revisions (defaults to latest revision only)
+ @type limit: int
+ @param user: retrieve only revisions authored by this user
+ @param excludeuser: retrieve all revisions not authored by this user
+ @param sysop: if True, switch to sysop account (if available) to
+ retrieve this page
+
+ """
+ if page is None and revids is None:
+ raise ValueError(
+ "getrevisions needs either page or revids argument.")
+ if page is not None:
+ rvtitle = page.title(withSection=False)
+ rvgen = api.PropertyGenerator(u"revisions", titles=rvtitle)
+ else:
+ ids = u"|".join(unicode(r) for r in revids)
+ rvgen = api.PropertyGenerator(u"revisions", revids=ids)
+ if getText:
+ rvgen.request[u"rvprop"] = \
+ u"ids|flags|timestamp|user|comment|content"
+ if page.section():
+ rvgen.request[u"rvsection"] = unicode(page.section())
+ if limit:
+ rvgen.request[u"rvlimit"] = unicode(limit)
+ if not older:
+ rvgen.request[u"rvdir"] = u"newer"
+ if user:
+ rvgen.request[u"rvuser"] = user
+ elif excludeuser:
+ rvgen.request[u"rvexcludeuser"] = excludeuser
+ # TODO if sysop:
+ for rev in rvgen:
+ revision = pywikibot.page.Revision(revid=rev['revid'],
+ timestamp=rev['timestamp'],
+ user=rev['user'],
+ anon=rev.has_key('anon'),
+ comment=rev.get('comment', u''),
+ minor=rev.has_key('minor'),
+ text=rev.get('*', None))
+ page._revisions[revision.revid] = revision
+ if revids is None and limit is None and user is None and excludeuser is None:
+ page._revid = revision.revid
+
+
#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
class NotImplementedYet:
Modified: branches/rewrite/pywikibot/throttle.py
===================================================================
--- branches/rewrite/pywikibot/throttle.py 2008-04-11 13:36:23 UTC (rev 5203)
+++ branches/rewrite/pywikibot/throttle.py 2008-04-11 20:29:11 UTC (rev 5204)
@@ -35,7 +35,7 @@
"""
def __init__(self, site, mindelay=config.minthrottle,
maxdelay=config.maxthrottle,
- multiplydelay=True):
+ multiplydelay=True, verbosedelay=False):
self.lock = threading.RLock()
self.mysite = str(site)
self.mindelay = mindelay
@@ -48,6 +48,7 @@
self.releasepid = 1800 # Free the process id after this many seconds
self.lastwait = 0.0
self.delay = 0
+ self.verbosedelay = verbosedelay
if multiplydelay:
self.checkMultiplicity()
self.setDelay(mindelay)
@@ -106,9 +107,10 @@
f.write("%(pid)s %(time)s %(site)s\n" % p)
f.close()
self.process_multiplicity = count
- pywikibot.output(
+ if self.verbosedelay:
+ pywikibot.output(
u"Found %s processes running, including the current process."
- % count)
+ % count)
finally:
self.lock.release()