jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/329360 )
Change subject: Query string dictionary parameter for pwb.comms.http.fetch and friends ......................................................................
Query string dictionary parameter for pwb.comms.http.fetch and friends
This will break usages of the affected methods that rely on the order of the parameters, rather than specifying the parameters dictionary-style.
* New "params" parameter to pass unencoded query string parameters as a dictionary to pywikibot.comms.http.{fetch,request,_enqueue} * PetScanPageGenerator has been updated to use this new parameter. * "data" parameter added to fetch/request/_enqueue as an alias of "body" to make the method parameters correspond to requests.Session.request * Unit tests for "params" and "data" parameters
Bug: T153559 Change-Id: I96da6d4c719aba24d35e58dd5f0694e628be86a3 --- M pywikibot/comms/http.py M pywikibot/comms/threadedhttp.py M pywikibot/pagegenerators.py M tests/http_tests.py 4 files changed, 105 insertions(+), 27 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py index 7bf6235..3006dde 100644 --- a/pywikibot/comms/http.py +++ b/pywikibot/comms/http.py @@ -276,8 +276,8 @@
@deprecate_arg('ssl', None) -def request(site=None, uri=None, method='GET', body=None, headers=None, - **kwargs): +def request(site=None, uri=None, method='GET', params=None, body=None, + headers=None, data=None, **kwargs): """ Request to Site with default error handling and response decoding.
@@ -299,12 +299,17 @@ @return: The received data @rtype: a unicode string """ + # body and data parameters both map to the data parameter of + # requests.Session.request. + if data: + body = data + assert(site or uri) if not site: # +1 because of @deprecate_arg issue_deprecation_warning( 'Invoking http.request without argument site', 'http.fetch()', 3) - r = fetch(uri, method, body, headers, **kwargs) + r = fetch(uri, method, params, body, headers, **kwargs) return r.content
baseuri = site.base_url(uri) @@ -320,7 +325,7 @@
headers['user-agent'] = user_agent(site, format_string)
- r = fetch(baseuri, method, body, headers, **kwargs) + r = fetch(baseuri, method, params, body, headers, **kwargs) return r.content
@@ -350,6 +355,7 @@ def _http_process(session, http_request): method = http_request.method uri = http_request.uri + params = http_request.params body = http_request.body headers = http_request.headers if PY2 and headers: @@ -370,8 +376,8 @@ # Note that the connections are pooled which mean that a future # HTTPS request can succeed even if the certificate is invalid and # verify=True, when a request with verify=False happened before - response = session.request(method, uri, data=body, headers=headers, - auth=auth, timeout=timeout, + response = session.request(method, uri, params=params, data=body, + headers=headers, auth=auth, timeout=timeout, verify=not ignore_validation) except Exception as e: http_request.data = e @@ -407,7 +413,8 @@ warning('Http response status {0}'.format(request.data.status_code))
-def _enqueue(uri, method="GET", body=None, headers=None, **kwargs): +def _enqueue(uri, method="GET", params=None, body=None, headers=None, data=None, + **kwargs): """ Enqueue non-blocking threaded HTTP request with callback.
@@ -432,6 +439,11 @@ @type callbacks: list of callable @rtype: L{threadedhttp.HttpRequest} """ + # body and data parameters both map to the data parameter of + # requests.Session.request. + if data: + body = data + default_error_handling = kwargs.pop('default_error_handling', None) callback = kwargs.pop('callback', None)
@@ -451,13 +463,14 @@ all_headers['user-agent'] = user_agent(None, user_agent_format_string)
request = threadedhttp.HttpRequest( - uri, method, body, all_headers, callbacks, **kwargs) + uri, method, params, body, all_headers, callbacks, **kwargs) _http_process(session, request) return request
-def fetch(uri, method="GET", body=None, headers=None, - default_error_handling=True, use_fake_user_agent=False, **kwargs): +def fetch(uri, method="GET", params=None, body=None, headers=None, + default_error_handling=True, use_fake_user_agent=False, data=None, + **kwargs): """ Blocking HTTP request.
@@ -474,6 +487,11 @@ overridden by domain in config. @rtype: L{threadedhttp.HttpRequest} """ + # body and data parameters both map to the data parameter of + # requests.Session.request. + if data: + body = data + # Change user agent depending on fake UA settings. # Set header to new UA if needed. headers = headers or {} @@ -489,7 +507,7 @@ elif use_fake_user_agent is True: headers['user-agent'] = fake_user_agent()
- request = _enqueue(uri, method, body, headers, **kwargs) + request = _enqueue(uri, method, params, body, headers, **kwargs) assert(request._data is not None) # if there's no data in the answer we're in trouble # Run the error handling callback in the callers thread so exceptions # may be caught. diff --git a/pywikibot/comms/threadedhttp.py b/pywikibot/comms/threadedhttp.py index a166929..03386cf 100644 --- a/pywikibot/comms/threadedhttp.py +++ b/pywikibot/comms/threadedhttp.py @@ -31,7 +31,7 @@ * an exception """
- def __init__(self, uri, method="GET", body=None, headers=None, + def __init__(self, uri, method="GET", params=None, body=None, headers=None, callbacks=None, charset=None, **kwargs): """ Constructor. @@ -40,6 +40,7 @@ """ self.uri = uri self.method = method + self.params = params self.body = body self.headers = headers if isinstance(charset, codecs.CodecInfo): diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 3db16a7..61dacdb 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -47,7 +47,6 @@ intersect_generators, IteratorNextMixin, filter_unique, - PY2, )
from pywikibot import date, config, i18n, xmlreader @@ -55,13 +54,6 @@ from pywikibot.exceptions import ArgumentDeprecationWarning, UnknownExtension from pywikibot.logentries import LogEntryFactory from pywikibot.proofreadpage import ProofreadPage - -if PY2: - from urllib import urlencode - import urlparse -else: - import urllib.parse as urlparse - from urllib.parse import urlencode
if sys.version_info[0] > 2: basestring = (str, ) @@ -2840,14 +2832,9 @@
def query(self): """Query PetScan.""" - url = urlparse.urlunparse(('https', # scheme - 'petscan.wmflabs.org', # netloc - '', # path - '', # params - urlencode(self.opts), # query - '')) # fragment + url = 'https://petscan.wmflabs.org'
- req = http.fetch(url) + req = http.fetch(url, params=self.opts) j = json.loads(req.content) raw_pages = j['*'][0]['a']['*'] for raw_page in raw_pages: diff --git a/tests/http_tests.py b/tests/http_tests.py index 0cb28bd..26663f1 100644 --- a/tests/http_tests.py +++ b/tests/http_tests.py @@ -9,6 +9,7 @@
__version__ = '$Id$'
+import json import re import warnings
@@ -556,6 +557,77 @@ self.assertIs(main_module_cookie_jar, http.cookie_jar)
+class QueryStringParamsTestCase(TestCase): + + """ + Test the query string parameter of request methods. + + The /get endpoint of httpbin returns JSON that can include an 'args' key with + urldecoded query string parameters. + """ + + sites = { + 'httpbin': { + 'hostname': 'httpbin.org', + }, + } + + def test_no_params(self): + """Test fetch method with no parameters.""" + r = http.fetch(uri='https://httpbin.org/get', params={}) + self.assertEqual(r.status, 200) + + content = json.loads(r.content) + self.assertDictEqual(content['args'], {}) + + def test_unencoded_params(self): + """ + Test fetch method with unencoded parameters, which should be encoded internally. + + HTTPBin returns the args in their urldecoded form, so what we put in should be + the same as what we get out. + """ + r = http.fetch(uri='https://httpbin.org/get', params={'fish&chips': 'delicious'}) + self.assertEqual(r.status, 200) + + content = json.loads(r.content) + self.assertDictEqual(content['args'], {'fish&chips': 'delicious'}) + + def test_encoded_params(self): + """ + Test fetch method with encoded parameters, which should be re-encoded internally. + + HTTPBin returns the args in their urldecoded form, so what we put in should be + the same as what we get out. + """ + r = http.fetch(uri='https://httpbin.org/get', + params={'fish%26chips': 'delicious'}) + self.assertEqual(r.status, 200) + + content = json.loads(r.content) + self.assertDictEqual(content['args'], {'fish%26chips': 'delicious'}) + + +class DataBodyParameterTestCase(TestCase): + """Test that the data and body parameters of fetch/request methods are equivalent.""" + + sites = { + 'httpbin': { + 'hostname': 'httpbin.org', + }, + } + + def test_fetch(self): + """Test that using the data parameter and body parameter produce same results.""" + r_data = http.fetch(uri='https://httpbin.org/post', method='POST', + data={'fish&chips': 'delicious'}) + r_body = http.fetch(uri='https://httpbin.org/post', method='POST', + body={'fish&chips': 'delicious'}) + + self.assertDictEqual(json.loads(r_data.content), + json.loads(r_body.content)) + + if __name__ == '__main__': # pragma: no cover try: unittest.main()