jenkins-bot has submitted this change and it was merged.
Change subject: ParamInfo support for 1.11-1.14 ......................................................................
ParamInfo support for 1.11-1.14
Paraminfo was introduced in 1.12, and until 1.15 it did not include the 'main' module, which is used to obtain the list of action modules, or the 'pageset' module.
To workaround this, the API help is parsed to obtain basic parameter information that pywikibot depends on.
Also paraminfo pre 1.14 does not include limit attributes on parameters which can have multiple values.
Change-Id: I8a7ae7147554e87b9b097684d4381988ea541b03 --- M pywikibot/data/api.py M tests/api_tests.py 2 files changed, 273 insertions(+), 13 deletions(-)
Approvals: XZise: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index 9a9c030..5cc1e87 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -139,6 +139,12 @@
Provides cache aware fetching of parameter information.
+ Full support for MW 1.12+, when 'paraminfo' was introduced to the API. + Partially supports MW 1.11, using data extracted from API 'help'. + MW 1.10 not supported as module prefixes are not extracted from API 'help'. + + TODO: Rewrite help parser to support earlier releases. + TODO: establish a data structure in the class which prefills the param information available for a site given its version, using the API information available for each @@ -168,7 +174,7 @@ @type preloaded_modules: set of string @param modules_only_mode: use the 'modules' only syntax for API request @type: modules_only_mode: bool or None to only use default, which True - if the site is 1.25wm4+ + if the site is 1.25wmf4+ """ self.site = site
@@ -194,6 +200,10 @@
def _init(self): _mw_ver = MediaWikiVersion(self.site.version()) + + if _mw_ver < MediaWikiVersion('1.15'): + self._parse_help(_mw_ver) + # The paraminfo api deprecated the old request syntax of # querymodules='info'; to avoid warnings sites with 1.25wmf4+ # must only use 'modules' parameter. @@ -271,12 +281,14 @@ self.__inited = True
def _emulate_pageset(self): + """Emulate the pageset module, which existed in MW 1.15-1.24.""" # pageset isnt a module in the new system, so it is emulated, with # the paraminfo from the query module. assert('query' in self._paraminfo)
self._paraminfo['pageset'] = { 'name': 'pageset', + 'path': 'pageset', 'classname': 'ApiPageSet', 'prefix': '', 'readrights': '', @@ -284,9 +296,209 @@ 'parameters': self._paraminfo['query']['parameters'] }
+ def _parse_help(self, _mw_ver): + """Emulate paraminfo data using help.""" + # 1.14 paraminfo 'main' module doesnt exist. + # paraminfo only exists 1.12+. + + # Request need ParamInfo to determine use_get + request = CachedRequest(expiry=config.API_config_expiry, + use_get=True, site=self.site, action='help') + result = request.submit() + + assert('help' in result) + assert(isinstance(result['help'], dict)) + assert('mime' in result['help']) + assert(result['help']['mime'] == 'text/plain') + assert('help' in result['help']) + assert(isinstance(result['help']['help'], basestring)) + + help_text = result['help']['help'] + + start = help_text.find('What action you would like to perform') + start = help_text.find('One value: ', start) + len('One value: ') + end = help_text.find('\n', start) + + action_modules = help_text[start:end].split(', ') + + self._paraminfo['main'] = { + 'name': 'main', + 'path': 'main', + 'classname': 'ApiMain', + 'prefix': '', + 'readrights': '', + 'helpurls': [], + 'parameters': [ + { + "name": "action", + 'type': action_modules, + }, + ], + } + + if _mw_ver >= MediaWikiVersion('1.12'): + return + + query_help_list_prefix = "Values (separate with '|'): " + + start = help_text.find('Which properties to get') + start = help_text.find(query_help_list_prefix, start) + start += len(query_help_list_prefix) + end = help_text.find('\n', start) + + prop_modules = help_text[start:end].split(', ') + + start = help_text.find('Which lists to get') + start = help_text.find(query_help_list_prefix, start) + start += len(query_help_list_prefix) + end = help_text.find('\n', start) + + list_modules = help_text[start:end].split(', ') + + start = help_text.find('Which meta data to get') + start = help_text.find(query_help_list_prefix, start) + start += len(query_help_list_prefix) + end = help_text.find('\n', start) + + meta_modules = help_text[start:end].split(', ') + + start = help_text.find('Use the output of a list as the input') + start = help_text.find(query_help_list_prefix, start) + start += len(query_help_list_prefix) + end = help_text.find('\n', start) + + gen_modules = help_text[start:end].split(', ') + + self._paraminfo['paraminfo'] = { + 'name': 'paraminfo', + 'path': 'paraminfo', + 'classname': 'ApiParamInfo', + 'prefix': '', + 'readrights': '', + 'helpurls': [], + 'parameters': [ + { + 'name': 'querymodules', + 'type': (prop_modules + list_modules + + meta_modules + gen_modules), + 'limit': 50, + }, + ], + } + + self._paraminfo['query'] = { + 'name': 'query', + 'path': 'query', + 'classname': 'ApiQuery', + 'prefix': '', + 'readrights': '', + 'helpurls': [], + 'parameters': [ + { + 'name': 'prop', + 'type': prop_modules, + }, + { + 'name': 'list', + 'type': list_modules, + }, + { + 'name': 'meta', + 'type': meta_modules, + }, + { + 'name': 'generator', + 'type': gen_modules, + }, + ], + } + + # TODO: rewrite 'help' parser to determine prefix from the parameter + # names, as API 1.10 help does not include prefix on the first line. + + for mod_type in ['action', 'prop', 'list', 'meta', 'generator']: + if mod_type == 'action': + submodules = self.parameter('main', mod_type)['type'] + path_prefix = '' + else: + submodules = self.parameter('query', mod_type) + submodules = submodules['type'] + path_prefix = 'query+' + + for submodule in submodules: + mod_begin_string = '* %s=%s' % (mod_type, submodule) + start = help_text.find(mod_begin_string) + assert(start) + start += len(mod_begin_string) + + if help_text[start + 1] == '(' and help_text[start + 4] == ')': + prefix = help_text[start + 2:start + 4] + else: + prefix = '' + + path = path_prefix + submodule + + # query is added above; some query modules appear as both + # prop and generator, and the generator doesnt have a + # prefix in the help. + if path not in self._paraminfo: + php_class = 'Api' + if mod_type == 'action': + php_class += 'Query' + # This doesnt correctly derive PHP class names where there + # are additional uppercase letters in the class name. + php_class += submodule.title() + + self._paraminfo[path] = { + 'name': submodule, + 'path': path, + 'classname': php_class, + 'prefix': prefix, + 'readrights': '', + 'helpurls': [], + 'parameters': [], + } + + if not prefix: + continue + + params = {} + + # Check existence of parameters used frequently by pywikibot. + # TODO: for each parameter, parse list of values ('type') + if prefix + 'limit' in help_text: + params['limit'] = { + 'name': 'limit', + 'type': 'limit', + 'max': 50, + } + + if prefix + 'namespace' in help_text: + params['namespace'] = { + 'name': 'namespace', + 'type': 'namespace', + } + if not submodule.startswith('all'): + params['namespace']['multi'] = '' + + for param_name in ['token', 'prop', 'type', 'show']: + if prefix + param_name in help_text: + params[param_name] = { + 'name': param_name, + 'type': [], + 'multi': '', + } + + self._paraminfo[path]['parameters'] = params.values() + + self._emulate_pageset() + def fetch(self, modules, _init=False): """ Fetch paraminfo for multiple modules. + + No exception is raised when paraminfo for a module does not exist. + Use __getitem__ to cause an exception if a module does not exist.
@param modules: API modules to load @type modules: set @@ -307,10 +519,20 @@
assert(self._query_modules or _init)
+ if MediaWikiVersion(self.site.version()) < MediaWikiVersion("1.12"): + # When the help is parsed, all paraminfo should already be loaded + # and the caller is responsible for detecting missing modules. + pywikibot.log('ParamInfo did not detect modules: %s' + % modules, _logger=_logger) + return + # This can be further optimised, by grouping them in more stable # subsets, which are unlikely to change. i.e. first request core # modules which have been a stable part of the API for a long time. # Also detecting extension based modules may help. + # Also, when self.modules_only_mode is disabled, both modules and + # querymodules may each be filled with self._limit items, doubling the + # number of modules that may be processed in a single batch. for module_batch in itergroup(sorted(modules), self._limit): if self.modules_only_mode and 'pageset' in module_batch: pywikibot.debug('paraminfo fetch: removed pageset', _logger) @@ -350,7 +572,7 @@
self._paraminfo.update(normalized_result)
- if self.modules_only_mode and 'pageset' in modules: + if 'pageset' in modules and 'pageset' not in self._paraminfo: self._emulate_pageset()
def _normalize_modules(self, modules): @@ -455,19 +677,21 @@ return False
def __len__(self): - """Obtain length of the iterable.""" + """Return number of cached modules.""" return len(self._paraminfo)
def parameter(self, module, param_name): """ Get details about one modules parameter.
+ Returns None if the parameter does not exist. + @param module: API module name @type module: str @param param_name: parameter name in the module @type param_name: str @return: metadata that describes how the parameter may be used - @rtype: dict + @rtype: dict or None """ # TODO: the 'description' field of each parameter is not in the default # output of v1.25, and cant removed from previous API versions. @@ -488,7 +712,16 @@ params = module['parameters'] param_data = [param for param in params if param['name'] == param_name] - return param_data[0] if len(param_data) else None + + if not param_data: + return None + + assert(len(param_data) == 1) + param_data = param_data[0] + # pre 1.14 doesnt provide limit attribute on parameters + if 'multi' in param_data and 'limit' not in param_data: + param_data['limit'] = self._limit + return param_data
@property def modules(self): diff --git a/tests/api_tests.py b/tests/api_tests.py index 8a495fc..28b8194 100644 --- a/tests/api_tests.py +++ b/tests/api_tests.py @@ -70,8 +70,9 @@
self.assertIn('main', pi._paraminfo) self.assertIn('paraminfo', pi._paraminfo) - self.assertEqual(len(pi), - len(pi.preloaded_modules)) + if MediaWikiVersion(self.site.version()) >= MediaWikiVersion("1.12"): + self.assertEqual(len(pi), + len(pi.preloaded_modules))
self.assertIn('info', pi._query_modules) self.assertIn('login', pi._action_modules) @@ -87,6 +88,9 @@ self.assertIn('main', pi._paraminfo) self.assertIn('paraminfo', pi._paraminfo) self.assertIn('pageset', pi._paraminfo) + + if MediaWikiVersion(self.site.version()) < MediaWikiVersion("1.12"): + return
if 'query' in pi.preloaded_modules: self.assertIn('query', pi._paraminfo) @@ -130,8 +134,9 @@
self.assertIn('main', pi._paraminfo) self.assertIn('paraminfo', pi._paraminfo) - self.assertEqual(len(pi), - 1 + len(pi.preloaded_modules)) + if MediaWikiVersion(self.site.version()) >= MediaWikiVersion("1.12"): + self.assertEqual(len(pi), + 1 + len(pi.preloaded_modules))
self.assertEqual(pi['info']['prefix'], 'in')
@@ -142,6 +147,10 @@ self.assertNotIn('deprecated', param)
self.assertIsInstance(param['type'], list) + + if MediaWikiVersion(self.site.version()) < MediaWikiVersion("1.12"): + return + self.assertIn('protection', param['type'])
def test_with_module_revisions(self): @@ -153,8 +162,9 @@
self.assertIn('main', pi._paraminfo) self.assertIn('paraminfo', pi._paraminfo) - self.assertEqual(len(pi), - 1 + len(pi.preloaded_modules)) + if MediaWikiVersion(self.site.version()) >= MediaWikiVersion("1.12"): + self.assertEqual(len(pi), + 1 + len(pi.preloaded_modules))
self.assertEqual(pi['revisions']['prefix'], 'rv')
@@ -165,6 +175,10 @@ self.assertNotIn('deprecated', param)
self.assertIsInstance(param['type'], list) + + if MediaWikiVersion(self.site.version()) < MediaWikiVersion("1.12"): + return + self.assertIn('user', param['type'])
def test_multiple_modules(self): @@ -177,6 +191,10 @@
self.assertIn('main', pi._paraminfo) self.assertIn('paraminfo', pi._paraminfo) + + if MediaWikiVersion(self.site.version()) < MediaWikiVersion("1.12"): + return + self.assertEqual(len(pi), 2 + len(pi.preloaded_modules))
@@ -187,8 +205,14 @@ pi.fetch('foobar') self.assertNotIn('foobar', pi._paraminfo)
+ self.assertRaises(KeyError, pi.__getitem__, 'foobar') + self.assertIn('main', pi._paraminfo) self.assertIn('paraminfo', pi._paraminfo) + + if MediaWikiVersion(self.site.version()) < MediaWikiVersion("1.12"): + return + self.assertEqual(len(pi), len(pi.preloaded_modules))
@@ -222,8 +246,10 @@
self.assertIn('main', pi._paraminfo) self.assertIn('paraminfo', pi._paraminfo) - self.assertEqual(len(pi), - 1 + len(pi.preloaded_modules)) + + if MediaWikiVersion(self.site.version()) >= MediaWikiVersion("1.12"): + self.assertEqual(len(pi), + 1 + len(pi.preloaded_modules))
self.assertIn('revisions', pi.prefixes)
@@ -239,6 +265,7 @@
self.assertIn('main', pi._paraminfo) self.assertIn('paraminfo', pi._paraminfo) + self.assertEqual(len(pi), 1 + len(pi.preloaded_modules))