jenkins-bot has submitted this change and it was merged.
Change subject: Add maintenance script for the API cache
......................................................................
Add maintenance script for the API cache
Replace cache entry deletion in TestRequest with a new
script 'cache.py' which reports cache entries with a password
and allows them to be fixed.
cache.py also allows cache entries to be queried and deleted
based on other conditions.
Change-Id: I727af9b6fa10d2231e5d777a1bc6f8602ca03f50
---
M pywikibot/data/api.py
A scripts/maintenance/cache.py
M tests/__init__.py
3 files changed, 340 insertions(+), 7 deletions(-)
Approvals:
John Vandenberg: Looks good to me, but someone else must approve
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index bb5c1cb..1410205 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -454,6 +454,12 @@
pass
def _uniquedescriptionstr(self):
+ """ Unique description for the cache entry.
+
+ If this is modified, please also update
+ scripts/maintenance/cache.py to support
+ the new key and all previous keys. """
+
login_status = self.site._loginstatus
if login_status > pywikibot.site.LoginStatus.NOT_LOGGED_IN and \
diff --git a/scripts/maintenance/cache.py b/scripts/maintenance/cache.py
new file mode 100644
index 0000000..702e034
--- /dev/null
+++ b/scripts/maintenance/cache.py
@@ -0,0 +1,334 @@
+# -*- coding: utf-8 -*-
+"""
+This script runs commands on each entry in the API caches.
+
+Syntax: cache.py [-password] [-delete] [-c '...'] [dir ...]
+
+If no directory are specified, it will detect the API caches.
+
+If no command is specified, it will print the filename of all entries.
+If only -delete is specified, it will delete all entries.
+
+The option '-c' must be followed by a command in python syntax.
+
+Example commands:
+ Print the filename of any entry with 'wikidata' in the key:
+
+ entry if "wikidata" in entry._uniquedescriptionstr() else None
+
+ Customised output if the site code is 'ar':
+
+ entry.site.code == "ar" and \
+pywikibot.output("%s" % entry._uniquedescriptionstr())
+
+ Or the state of the login
+ entry.site._loginstatus == LoginStatus.NOT_ATTEMPTED and \
+pywikibot.output("%s" % entry._uniquedescriptionstr())
+
+ These functions can be used as a command:
+ has_password(entry)
+ is_logout(entry)
+ empty_response(entry)
+ not_accessed(entry)
+ incorrect_hash(entry)
+ older_than_one_day(entry)
+ recent(entry)
+
+ There are helper functions which can be part of a command:
+ older_than(entry, interval)
+ newer_than(entry, interval)
+"""
+#
+# (C) Pywikibot team, 2014
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import os
+import datetime
+import pickle
+import hashlib
+import pywikibot
+from pywikibot.data import api
+
+from pywikibot.site import APISite, DataSite, LoginStatus # noqa
+from pywikibot.page import User # noqa
+
+
+class ParseError(Exception):
+ """ Error parsing. """
+
+
+class CacheEntry(api.CachedRequest):
+
+ def __init__(self, directory, filename):
+ """ Constructor. """
+ self.directory = directory
+ self.filename = filename
+
+ def __str__(self):
+ return self.filename
+
+ def __repr__(self):
+ return self._cachefile_path()
+
+ def _create_file_name(self):
+ """ Filename of the cached entry. """
+ return self.filename
+
+ def _get_cache_dir(self):
+ """ Directory of the cached entry. """
+ return self.directory
+
+ def _load_cache(self):
+ """ Load the cache entry. """
+ with open(self._cachefile_path(), 'rb') as f:
+ self.key, self._data, self._cachetime = pickle.load(f)
+ return True
+
+ def parse_key(self):
+ """ Parse the key loaded from the cache entry. """
+
+ # find the start of the first parameter
+ start = self.key.index('(')
+ # find the end of the first object
+ end = self.key.index(')')
+
+ if not end:
+ raise ParseError('End of Site() keyword not found: %s' % self.key)
+
+ if 'Site' not in self.key[0:start]:
+ raise ParseError('Site() keyword not found at start of key: %s'
+ % self.key)
+
+ site = self.key[0:end + 1]
+ if site[0:5] == 'Site(':
+ site = 'APISite(' + site[5:]
+
+ username = None
+ login_status = None
+
+ start = end + 1
+ if self.key[start:start + 5] == 'User(':
+ # The addition of user to the cache key used:
+ # repr(User)
+ # which includes namespaces resulting in:
+ # User(User:<username>)
+ # This also accepts User(<username>)
+ if self.key[start:start + 10] == 'User(User:':
+ start += 10
+ else:
+ start += 5
+
+ end = self.key.index(')', start + 5)
+ if not end:
+ raise ParseError('End of User() keyword not found: %s'
+ % self.key)
+ username = self.key[start:end]
+ elif self.key[start:start + 12] == 'LoginStatus(':
+ end = self.key.index(')', start + 12)
+ if not end:
+ raise ParseError('End of LoginStatus() keyword not found: %s'
+ % self.key)
+ login_status = self.key[start:end + 1]
+ # If the key does not contain User(..) or LoginStatus(..),
+ # it must be the old key format which only contains Site and params
+ elif self.key[start:start + 3] != "[('":
+ raise ParseError('Keyword after Site not recognised: %s...'
+ % self.key)
+
+ start = end + 1
+
+ params = self.key[start:]
+
+ self._parsed_key = (site, username, login_status, params)
+ return self._parsed_key
+
+ def _rebuild(self):
+ """ Reconstruct the original Request from the key.
"""
+ if hasattr(self, '_parsed_key'):
+ (site, username, login_status, params) = self._parsed_key
+ else:
+ (site, username, login_status, params) = self.parse_key()
+ if site:
+ self.site = eval(site)
+ if login_status:
+ self.site._loginstatus = eval('LoginStatus.%s'
+ % login_status[12:-1])
+ if username:
+ self.site._username = [username, username]
+ if params:
+ self.params = dict(eval(params))
+
+ def _delete(self):
+ """ Delete the cache entry. """
+ os.remove(self._cachefile_path())
+
+
+def process_entries(cache_dir, func):
+ """ Check the contents of the cache. """
+
+ # This program tries to use file access times to determine
+ # whether cache files are being used.
+ # However file access times are not always usable.
+ # On many modern filesystems, they have been disabled.
+ # On unix, check the filesystem mount options. You may
+ # need to remount with 'strictatime'.
+ # - None = detect
+ # - False = dont use
+ # - True = always use
+ use_accesstime = None
+
+ if not cache_dir:
+ cache_dir = os.path.join(pywikibot.config2.base_dir, 'apicache')
+ for filename in os.listdir(cache_dir):
+ filepath = os.path.join(cache_dir, filename)
+ if use_accesstime is not False:
+ stinfo = os.stat(filepath)
+
+ entry = CacheEntry(cache_dir, filename)
+ entry._load_cache()
+
+ if use_accesstime is None:
+ stinfo2 = os.stat(filepath)
+ use_accesstime = stinfo.st_atime != stinfo2.st_atime
+
+ if use_accesstime:
+ # Reset access times to values before loading cache entry.
+ os.utime(filepath, (stinfo.st_atime, stinfo.st_mtime))
+ entry.stinfo = stinfo
+
+ try:
+ entry.parse_key()
+ except ParseError:
+ pywikibot.error(u'Problems parsing %s with key %s'
+ % (entry.filename, entry.key))
+ pywikibot.exception()
+ continue
+
+ try:
+ entry._rebuild()
+ except Exception:
+ pywikibot.error(u'Problems loading %s with key %s, %r'
+ % (entry.filename, entry.key, entry._parsed_key))
+ pywikibot.exception()
+ continue
+
+ func(entry)
+
+
+def has_password(entry):
+ """ has a password in the entry """
+ if 'lgpassword' in entry._uniquedescriptionstr():
+ return entry
+
+
+def is_logout(entry):
+ """ is a logout entry """
+ if not entry._data and 'logout' in entry.key:
+ return entry
+
+
+def empty_response(entry):
+ """ has no data """
+ if not entry._data and 'logout' not in entry.key:
+ return entry
+
+
+def not_accessed(entry):
+ """ has never been accessed """
+ if not hasattr(entry, 'stinfo'):
+ return
+
+ if entry.stinfo.st_atime <= entry.stinfo.st_mtime:
+ return entry
+
+
+def incorrect_hash(entry):
+ if hashlib.sha256(entry.key.encode('utf-8')).hexdigest() != entry.filename:
+ return entry
+
+
+def older_than(entry, interval):
+ if entry._cachetime + interval < datetime.datetime.now():
+ return entry
+
+
+def newer_than(entry, interval):
+ if entry._cachetime + interval >= datetime.datetime.now():
+ return entry
+
+
+def older_than_one_day(entry):
+ if older_than(entry, datetime.timedelta(days=1)):
+ return entry
+
+
+def recent(entry):
+ if newer_than(entry, datetime.timedelta(hours=1)):
+ return entry
+
+
+def main():
+ local_args = pywikibot.handleArgs()
+ cache_dirs = None
+ delete = False
+ command = None
+
+ for arg in local_args:
+ if command == '':
+ command = arg
+ elif arg == '-delete':
+ delete = True
+ elif arg == '-password':
+ command = 'has_password(entry)'
+ elif arg == '-c':
+ if command:
+ pywikibot.error('Only one command may be executed.')
+ exit(1)
+ command = ''
+ else:
+ cache_dir = [arg]
+
+ func = None
+
+ if not cache_dirs:
+ cache_dirs = ['apicache', 'tests/apicache']
+
+ # Also process the base directory, if it isnt the current directory
+ if os.path.abspath(os.getcwd()) != pywikibot.config2.base_dir:
+ cache_dirs += [
+ os.path.join(pywikibot.config2.base_dir, 'apicache')]
+
+ # Also process the user home cache, if it isnt the config directory
+ if os.path.expanduser('~/.pywikibot') != pywikibot.config2.base_dir:
+ cache_dirs += [
+ os.path.join(os.path.expanduser('~/.pywikibot'),
'apicache')]
+
+ if delete:
+ action_func = lambda entry: entry._delete()
+ else:
+ action_func = lambda entry: pywikibot.output(entry)
+
+ if command:
+ try:
+ command_func = eval('lambda entry: ' + command)
+ except:
+ pywikibot.exception()
+ pywikibot.error(u'Can not compile command: %s' % command)
+ exit(1)
+
+ func = lambda entry: command_func(entry) and action_func(entry)
+ else:
+ func = action_func
+
+ for cache_dir in cache_dirs:
+ if os.path.isdir(cache_dir):
+ if len(cache_dirs) > 1:
+ pywikibot.output(u'Processing %s' % cache_dir)
+ process_entries(cache_dir, func)
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/__init__.py b/tests/__init__.py
index 3ac2361..4d3ce5a 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -34,17 +34,10 @@
return False
if 'lgpassword' in self._uniquedescriptionstr():
- self._delete_cache()
self._data = None
return False
return True
-
- def _delete_cache(self):
- """Delete cached response if it exists."""
- self._load_cache()
- if self._cachetime:
- os.remove(self._cachefile_path())
def _write_cache(self, data):
"""Write data except login details."""
--
To view, visit
https://gerrit.wikimedia.org/r/144144
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I727af9b6fa10d2231e5d777a1bc6f8602ca03f50
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>