jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Request: Support simplified continuation
......................................................................
[FEAT] Request: Support simplified continuation
With version 1.25wmf5 a warning is emitted that the continue parameter
is going to change (af11268d05d98d27d99cfee9264d649a62c2c816). It is
possible to avoid that warning by explicitly telling the server which
continuation mode to expect.
This adds the ability of QueryGenerator to use the simplified
continuation method as soon as possible.
If the server is using a version of at least 1.25wmf5 it'll
automatically 'rawcontinue' to the request if neither 'continue' nor
'rawcontinue' are set. This will avoid any warning emitted and in the
future (if by default it uses the simplified continuation mode) it'll
use the old continuation mode for which the scripts are probably
written. To avoid an infinite loop, because requesting the server
version requires a request which requires (raw-)continue in the
parameters, siteinfo uses the simplified continuation mode by always
adding "continue=''" to the parameters.
Change-Id: Iddbf4922c60c0f3900b88cb81e4c176855e33fc3
---
M pywikibot/data/api.py
M pywikibot/site.py
2 files changed, 51 insertions(+), 21 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index 8a0dda3..0ecaeae 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -196,6 +196,8 @@
>> data = r.submit() # doctest: +IGNORE_UNICODE
>> isinstance(data, dict)
True
+ >>> set(['query', 'batchcomplete',
'warnings']).issuperset(data.keys())
+ True
>> 'query' in data
True
>> sorted(data[u'query'].keys()) #
doctest: +IGNORE_UNICODE
@@ -376,6 +378,13 @@
inprop = self._params.get("inprop", [])
info = set(inprop + ["protection", "talkid",
"subjectid"])
self._params["info"] = list(info)
+ # When neither 'continue' nor 'rawcontinue' is present and
the
+ # version number is at least 1.25wmf5 we add a dummy rawcontinue
+ # parameter. Querying siteinfo is save as it adds 'continue'.
+ if ('continue' not in self._params and
+ 'rawcontinue' not in self._params and
+ LV(self.site.version()) >= LV('1.25wmf5')):
+ self._params['rawcontinue'] = ['']
if "maxlag" not in self._params and config.maxlag:
self._params["maxlag"] = [str(config.maxlag)]
if "format" not in self._params:
@@ -828,7 +837,7 @@
"""Base class for iterators that handle responses to API
action=query.
By default, the iterator will iterate each item in the query response,
- and use the query-continue element, if present, to continue iterating as
+ and use the (query-)continue element, if present, to continue iterating as
long as the wiki returns additional values. However, if the iterator's
limit attribute is set to a positive int, the iterator will stop after
iterating that many values. If limit is negative, the limit parameter
@@ -868,6 +877,14 @@
% self.__class__.__name__)
kwargs["indexpageids"] = "" # always ask for list of
pageids
+ if LV(self.site.version()) < LV('1.21'):
+ self.continue_name = 'query-continue'
+ self.continue_update = self._query_continue
+ else:
+ self.continue_name = 'continue'
+ self.continue_update = self._continue
+ # Explicitly enable the simplified continuation
+ kwargs['continue'] = ''
self.request = Request(**kwargs)
self.prefix = None
self.api_limit = None
@@ -881,7 +898,7 @@
else: # to look for when iterating
self.resultkey = self.module
- # usually the query-continue key is the same as the querymodule,
+ # usually the (query-)continue key is the same as the querymodule,
# but not always
# API can return more than one query-continue key, if multiple properties
# are requested by the query, e.g.
@@ -1003,6 +1020,26 @@
self.request[self.prefix + "namespace"] = namespaces
return
+ def _query_continue(self):
+ if all(key not in self.data[self.continue_name]
+ for key in self.continuekey):
+ pywikibot.log(
+ u"Missing '%s' key(s) in ['%s'] value."
+ % (self.continuekey, self.continue_name))
+ return True
+ for query_continue_pair in self.data['query-continue'].values():
+ self._add_continues(query_continue_pair)
+
+ def _continue(self):
+ self._add_continues(self.data['continue'])
+
+ def _add_continues(self, continue_pair):
+ for key, value in continue_pair.items():
+ # query-continue can return ints (continue too?)
+ if isinstance(value, int):
+ value = str(value)
+ self.request[key] = value
+
def __iter__(self):
"""Submit request and iterate the response based on
self.resultkey.
@@ -1023,7 +1060,7 @@
# self.resultkey in data in last request.submit()
new_limit = min(self.query_limit, self.limit - count)
else:
- # only "query-continue" returned. See Bug 72209.
+ # only "(query-)continue" returned. See Bug 72209.
# increase new_limit to advance faster until new
# useful data are found again.
new_limit = min(new_limit * 2, self.query_limit)
@@ -1117,35 +1154,25 @@
# self.resultkey in data in last request.submit()
previous_result_had_data = True
else:
- # if query-continue is present, self.resultkey might not have been
- # fetched yet
- if "query-continue" not in self.data:
+ # if (query-)continue is present, self.resultkey might not have
+ # been fetched yet
+ if self.continue_name not in self.data:
# No results.
return
# self.resultkey not in data in last request.submit()
- # only "query-continue" was retrieved.
+ # only "(query-)continue" was retrieved.
previous_result_had_data = False
if self.module == "random" and self.limit:
- # "random" module does not return "query-continue"
+ # "random" module does not return "(query-)continue"
# now we loop for a new random query
del self.data # a new request is needed
continue
- if "query-continue" not in self.data:
+ if self.continue_name not in self.data:
return
- if all(key not in self.data["query-continue"] for key in
self.continuekey):
- pywikibot.log(
- u"Missing '%s' key(s) in ['query-continue']
value."
- % self.continuekey)
+ if self.continue_update():
return
- query_continue_pairs = self.data["query-continue"].values()
- for query_continue_pair in query_continue_pairs:
- for key, value in query_continue_pair.items():
- # query-continue can return ints
- if isinstance(value, int):
- value = str(value)
- self.request[key] = value
- del self.data # a new request with query-continue is needed
+ del self.data # a new request with (query-)continue is needed
def result(self, data):
"""Process result data as needed for particular
subclass."""
diff --git a/pywikibot/site.py b/pywikibot/site.py
index a17b205..dffd696 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -1058,6 +1058,9 @@
action='query',
meta='siteinfo',
siprop='|'.join(props))
+ # With 1.25wmf5 it'll require continue or rawcontinue. As we don't
+ # continue anyway we just always use continue.
+ request['continue'] = ''
# warnings are handled later
request._warning_handler = warn_handler
data = request.submit()
--
To view, visit
https://gerrit.wikimedia.org/r/168529
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Iddbf4922c60c0f3900b88cb81e4c176855e33fc3
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>