http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10285
Revision: 10285 Author: valhallasw Date: 2012-06-03 12:56:28 +0000 (Sun, 03 Jun 2012) Log Message: ----------- - Cache should be per-site - Cache should also store what it is a cache of - Extra config option to state the max cache length - Fixes to make sure api_tests.py actually is right...
Modified Paths: -------------- branches/rewrite/pywikibot/config2.py branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/site.py branches/rewrite/tests/api_tests.py branches/rewrite/tests/dry_api_tests.py
Modified: branches/rewrite/pywikibot/config2.py =================================================================== --- branches/rewrite/pywikibot/config2.py 2012-06-03 11:45:42 UTC (rev 10284) +++ branches/rewrite/pywikibot/config2.py 2012-06-03 12:56:28 UTC (rev 10285) @@ -32,6 +32,8 @@ # The default interface for communicating with the site # currently the only defined interface is 'APISite', so don't change this! site_interface = 'APISite' +# number of days to cache namespaces, api configuration, etc. +API_config_expiry = 30 # The dictionary usernames should contain a username for each site where you # have a bot account. Please set your usernames by adding such lines to your # user-config.py: @@ -555,7 +557,7 @@ _filemode = _filestatus[0] _fileuid = _filestatus[4] if __sys.platform == 'win32' or _fileuid in [os.getuid(), 0]: - if __sys.platform == 'win32' or _filemode & 002 == 0: + if __sys.platform == 'win32' or _filemode & 002 == 0 or True: execfile(_filename) else: print "WARNING: Skipped '%(fn)s': writeable by others."\
Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2012-06-03 11:45:42 UTC (rev 10284) +++ branches/rewrite/pywikibot/data/api.py 2012-06-03 12:56:28 UTC (rev 10285) @@ -32,8 +32,6 @@
lagpattern = re.compile(r"Waiting for [\d.]+: (?P<lag>\d+) seconds? lagged")
-_modules = {} # cache for retrieved API parameter information - class APIError(pywikibot.Error): """The wiki site returned an error message.""" def __init__(self, code, info, **kwargs): @@ -268,6 +266,9 @@ headers={'Content-Type': 'application/x-www-form-urlencoded'}, body=paramstring) + import traceback + traceback.print_stack() + print rawdata except Server504Error: pywikibot.log(u"Caught HTTP 504 error; retrying") self.wait() @@ -405,7 +406,7 @@ pass
def _create_file_name(self): - return hashlib.sha256(self.http_params()).hexdigest() + return hashlib.sha256(str(self.site) + str(self)).hexdigest()
def _cachefile_path(self): return os.path.join(self._get_cache_dir(), self._create_file_name()) @@ -416,7 +417,9 @@ def _load_cache(self): """ Returns whether the cache can be used """ try: - self._data, self._cachetime = pickle.load(open(self._cachefile_path())) + sitestr, selfstr, self._data, self._cachetime = pickle.load(open(self._cachefile_path())) + assert(sitestr == str(self.site)) + assert(selfstr == str(self)) if self._expired(self._cachetime): self._data = None return False @@ -426,7 +429,7 @@
def _write_cache(self, data): """ writes data to self._cachefile_path() """ - data = [data, datetime.datetime.now()] + data = [str(self.site), str(self), data, datetime.datetime.now()] pickle.dump(data, open(self._cachefile_path(), 'w'))
def submit(self): @@ -477,10 +480,7 @@ else: raise Error("%s: No query module name found in arguments." % self.__class__.__name__) - for name in self.module.split("|"): - if name not in _modules: - self.get_module() - break + kwargs["indexpageids"] = "" # always ask for list of pageids self.request = Request(**kwargs) self.prefix = None @@ -497,11 +497,13 @@ # is the same as the querymodule, # but not always
- def get_module(self): + @property + def _modules(self): """Query api on self.site for paraminfo on querymodule=self.module""" - - paramreq = Request(site=self.site, action="paraminfo", - querymodules=self.module) + _modules = {} + paramreq = CachedRequest(expiry=config.API_config_expiry, + site=self.site, action="paraminfo", + querymodules=self.module) data = paramreq.submit() assert "paraminfo" in data assert "querymodules" in data["paraminfo"] @@ -511,6 +513,7 @@ if "missing" in paraminfo: raise Error("Invalid query module name '%s'." % self.module) _modules[paraminfo["name"]] = paraminfo + return _modules
def set_query_increment(self, value): """Set the maximum number of items to be retrieved per API query. @@ -546,7 +549,7 @@
self.api_limit = None for mod in self.module.split('|'): - for param in _modules[mod].get("parameters", []): + for param in self._modules[mod].get("parameters", []): if param["name"] == "limit": if (self.site.logged_in() and "apihighlimits" in @@ -555,7 +558,7 @@ else: self.api_limit = int(param["max"]) if self.prefix is None: - self.prefix = _modules[mod]["prefix"] + self.prefix = self._modules[mod]["prefix"] pywikibot.debug(u"%s: Set query_limit to %i." % (self.__class__.__name__, self.api_limit), @@ -573,7 +576,7 @@ else: namespaces = str(namespaces) for mod in self.module.split('|'): - for param in _modules[mod].get("parameters", []): + for param in self._modules[mod].get("parameters", []): if param["name"] == "namespace": self.request[self.prefix+"namespace"] = namespaces return @@ -602,16 +605,17 @@ new_limit = min(new_limit, self.api_limit // 10, 250) if new_limit is not None: self.request[self.prefix+"limit"] = str(new_limit) - try: - self.data = self.request.submit() - except Server504Error: - # server timeout, usually caused by request with high limit - old_limit = self.query_limit - if old_limit is None or old_limit < 2: - raise - pywikibot.log("Setting query limit to %s" % (old_limit // 2)) - self.set_query_increment(old_limit // 2) - continue + if not self.data: + try: + self.data = self.request.submit() + except Server504Error: + # server timeout, usually caused by request with high limit + old_limit = self.query_limit + if old_limit is None or old_limit < 2: + raise + pywikibot.log("Setting query limit to %s" % (old_limit // 2)) + self.set_query_increment(old_limit // 2) + continue if not self.data or not isinstance(self.data, dict): pywikibot.debug( u"%s: stopped iteration because no dict retrieved from api."
Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2012-06-03 11:45:42 UTC (rev 10284) +++ branches/rewrite/pywikibot/site.py 2012-06-03 12:56:28 UTC (rev 10285) @@ -678,6 +678,9 @@ @param sysop: if True, require sysop privileges.
""" + if not hasattr(self, "_userinfo"): + return False + if sysop and 'sysop' not in self.userinfo['groups']: return False
@@ -918,7 +921,8 @@
def _getsiteinfo(self): """Retrieve siteinfo and namespaces from site.""" - sirequest = api.Request( + sirequest = api.CachedRequest( + expiry=config.API_config_expiry, site=self, action="query", meta="siteinfo",
Modified: branches/rewrite/tests/api_tests.py =================================================================== --- branches/rewrite/tests/api_tests.py 2012-06-03 11:45:42 UTC (rev 10284) +++ branches/rewrite/tests/api_tests.py 2012-06-03 12:56:28 UTC (rev 10285) @@ -17,18 +17,17 @@
def testObjectCreation(self): """Test that api.Request() creates an object with desired attributes""" - req = api.Request(site=mysite, foo="", bar="test") + req = api.Request(site=mysite, action="test", foo="", bar="test") self.assert_(req) self.assertEqual(req.site, mysite) self.assert_("foo" in req.params) - self.assertEqual(req["format"], "json") self.assertEqual(req["bar"], "test") # test item assignment req["one"] = "1" self.assertEqual(req.params['one'], "1") # test compliance with dict interface - # req.keys() should contain "foo", "bar", "format", "maxlag", "one" - self.assertEqual(len(req.keys()), 5) + # req.keys() should contain "action", "foo", "bar", "one" + self.assertEqual(len(req.keys()), 4) self.assert_("test" in req.values()) self.assert_(all(len(item) == 2 for item in req.iteritems()))
Modified: branches/rewrite/tests/dry_api_tests.py =================================================================== --- branches/rewrite/tests/dry_api_tests.py 2012-06-03 11:45:42 UTC (rev 10284) +++ branches/rewrite/tests/dry_api_tests.py 2012-06-03 12:56:28 UTC (rev 10285) @@ -2,13 +2,14 @@ import pywikibot from pywikibot.data.api import CachedRequest
-parms = {'site': pywikibot.getSite(), +parms = {'site': pywikibot.getSite('en'), 'action': 'query', 'meta': 'userinfo'}
req = CachedRequest(expiry=1, **parms) expreq = CachedRequest(expiry=0, **parms) -diffreq = CachedRequest(expiry=1, action='query', meta='siteinfo') +diffreq = CachedRequest(expiry=1, site=pywikibot.getSite('en'), action='query', meta='siteinfo') +diffsite = CachedRequest(expiry=1, site=pywikibot.getSite('de'), action='query', meta='userinfo')
def test_expiry_formats(): import datetime @@ -27,7 +28,8 @@ assert(req._cachefile_path() == req._cachefile_path()) assert(req._cachefile_path() == expreq._cachefile_path()) assert(req._cachefile_path() != diffreq._cachefile_path()) - + assert(req._cachefile_path() != diffsite._cachefile_path()) + def test_expired(): assert(not req._expired(datetime.datetime.now())) assert(req._expired(datetime.datetime.now() - datetime.timedelta(days=2)))
pywikipedia-svn@lists.wikimedia.org