jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/371697 )
Change subject: Add retry logic to timeouted requests ......................................................................
Add retry logic to timeouted requests
Reuses the same basic logic as api.py. The retry logic is needed for sparql queries which time out (http-wise) but actually terminate successfully so are cached the next time you make the same query.
Change-Id: I2e4feff5338eef3c669ec4f0e5bef8412f12bbfb --- M pywikibot/data/api.py M pywikibot/data/sparql.py M pywikibot/exceptions.py 3 files changed, 57 insertions(+), 18 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index 94be83c..6dc86d6 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -35,8 +35,7 @@ from pywikibot.comms import http from pywikibot.exceptions import ( Server504Error, Server414Error, FatalServerError, NoUsername, - Error, - InvalidTitle + Error, TimeoutError, InvalidTitle ) from pywikibot.tools import ( MediaWikiVersion, deprecated, itergroup, ip, PY2, getargspec, @@ -1241,11 +1240,6 @@ def __len__(self): """Return the number of enabled and disabled options.""" return len(self._enabled) + len(self._disabled) - - -class TimeoutError(Error): - - """API request failed with a timeout error."""
class EnableSSLSiteWrapper(object): diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py index b577bb0..392f8a8 100644 --- a/pywikibot/data/sparql.py +++ b/pywikibot/data/sparql.py @@ -9,14 +9,18 @@
import json import sys +import time if sys.version_info[0] > 2: from urllib.parse import quote else: from urllib2 import quote
-from pywikibot import Site, Error +from requests.exceptions import Timeout + +from pywikibot import config, warning, Site from pywikibot.comms import http from pywikibot.tools import UnicodeMixin, py2_encode_utf_8 +from pywikibot.exceptions import Error, TimeoutError
DEFAULT_HEADERS = {'cache-control': 'no-cache', 'Accept': 'application/sparql-results+json'} @@ -29,7 +33,8 @@ This class allows to run SPARQL queries against any SPARQL endpoint. """
- def __init__(self, endpoint=None, entity_url=None, repo=None): + def __init__(self, endpoint=None, entity_url=None, repo=None, + max_retries=None, retry_wait=None): """ Create endpoint.
@@ -38,9 +43,16 @@ @param entity_url: URL prefix for any entities returned in a query. @type entity_url: string @param repo: The Wikibase site which we want to run queries on. If - provided this overrides any value in endpoint and entity_url. - Defaults to Wikidata. + provided this overrides any value in endpoint and entity_url. + Defaults to Wikidata. @type repo: pywikibot.site.DataSite + @param max_retries: (optional) Maximum number of times to retry after + errors, defaults to config.max_retries. + @type max_retries: int + @param retry_wait: (optional) Minimum time in seconds to wait after an + error, defaults to config.retry_wait seconds (doubles each retry + until max of 120 seconds is reached). + @type retry_wait: float """ # default to Wikidata if not repo and not endpoint: @@ -67,6 +79,15 @@ self.entity_url = entity_url
self.last_response = None + + if max_retries is None: + self.max_retries = config.max_retries + else: + self.max_retries = max_retries + if retry_wait is None: + self.retry_wait = config.retry_wait + else: + self.retry_wait = retry_wait
def get_last_response(self): """ @@ -120,13 +141,28 @@ @type query: string """ url = '%s?query=%s' % (self.endpoint, quote(query)) - self.last_response = http.fetch(url, headers=headers) - if not self.last_response.content: - return None - try: - return json.loads(self.last_response.content) - except ValueError: - return None + while True: + try: + self.last_response = http.fetch(url, headers=headers) + if not self.last_response.content: + return None + try: + return json.loads(self.last_response.content) + except ValueError: + return None + except Timeout: + self.wait() + continue + + def wait(self): + """Determine how long to wait after a failed request.""" + self.max_retries -= 1 + if self.max_retries < 0: + raise TimeoutError('Maximum retries attempted without success.') + warning('Waiting {0} seconds before retrying.'.format(self.retry_wait)) + time.sleep(self.retry_wait) + # double the next wait, but do not exceed 120 seconds + self.retry_wait = min(120, self.retry_wait * 2)
def ask(self, query, headers=DEFAULT_HEADERS): """ diff --git a/pywikibot/exceptions.py b/pywikibot/exceptions.py index aa249c7..d9dff19 100644 --- a/pywikibot/exceptions.py +++ b/pywikibot/exceptions.py @@ -58,6 +58,8 @@ - CoordinateGlobeUnknownException: globe is not implemented yet. - EntityTypeUnknownException: entity type is not available on the site.
+TimeoutError: request failed with a timeout + DeprecationWarning: old functionality replaced by new functionality
PendingDeprecationWarning: problematic code which has not yet been @@ -549,6 +551,13 @@ pass
+class TimeoutError(Error): + + """Request failed with a timeout error.""" + + pass + + @__deprecated class DeprecatedPageNotFoundError(Error):