XZise has submitted this change and it was merged.
Change subject: Use unicode for Request params ......................................................................
Use unicode for Request params
Request.http_params() optionally decodes values in the Request.params dict, and then always re-encodes params each time it is called. As a result the type of the values differs throughout the life of the Request object.
Split Request.http_params() into __setitem__, _add_defaults, _http_param_string. Rename Request.params to _params as Request has a dict interface for it.
__setitem__ forces all param values to always be unicode (or str for ascii in Python 2), and be stored as a list for each key.
_add_defaults adds param values to the pending request. It now only peforms these additions the first invocation, and is a noop when invoked again.
_http_param_string prepares a URL for the site's API, but does not store the encoded values in the Request object's state.
Request.action is added as this parameter is mandatory, may only have one value, and is frequently used in the logic.
Change-Id: Id13622e72623024166cec4ac0a5c9c4b3d511755 --- M pywikibot/data/api.py M pywikibot/site.py M tests/api_tests.py M tox.ini 4 files changed, 152 insertions(+), 82 deletions(-)
Approvals: XZise: Looks good to me, approved
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index 63e47cb..47cb74d 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -27,7 +27,7 @@
import pywikibot from pywikibot import config, login -from pywikibot.tools import MediaWikiVersion as LV +from pywikibot.tools import MediaWikiVersion as LV, deprecated from pywikibot.exceptions import Server504Error, FatalServerError, Error
import sys @@ -144,8 +144,8 @@ >>> # add a new parameter >>> r['siprop'] = "namespaces" >>> # note that "uiprop" param gets added automatically - >>> r.params - {'action': 'query', 'meta': 'userinfo|siteinfo', 'siprop': 'namespaces'} + >>> r._params + {'action': [u'query'], 'meta': [u'userinfo', u'siteinfo'], 'siprop': [u'namespaces']} >>> data = r.submit() >>> type(data) <type 'dict'> @@ -185,15 +185,16 @@ self.throttle = kwargs.pop('throttle', True) self.max_retries = kwargs.pop("max_retries", pywikibot.config.max_retries) self.retry_wait = kwargs.pop("retry_wait", pywikibot.config.retry_wait) - self.params = {} + self._params = {} if "action" not in kwargs: raise ValueError("'action' specification missing from Request.") + self.action = kwargs['action'] self.update(**kwargs) self._warning_handler = None # Actions that imply database updates on the server, used for various # things like throttling or skipping actions when we're in simulation # mode - self.write = self.params["action"] in ( + self.write = self.action in ( "edit", "move", "rollback", "delete", "undelete", "protect", "block", "unblock", "watch", "patrol", "import", "userrights", "upload", "emailuser", @@ -216,44 +217,65 @@ # This situation is only tripped when one of the first actions # on the site is a write action and the extension isn't installed. if ((self.write and LV(self.site.version()) >= LV("1.23")) or - (self.params['action'] == 'edit' and + (self.action == 'edit' and self.site.has_extension('AssertEdit'))): pywikibot.debug(u"Adding user assertion", _logger) - self.params["assert"] = "user" # make sure user is logged in + self["assert"] = 'user' # make sure user is logged in
if (self.site.protocol() == 'http' and (config.use_SSL_always or ( - self.params["action"] == "login" and config.use_SSL_onlogin)) + self.action == 'login' and config.use_SSL_onlogin)) and self.site.family.name in config.available_ssl_project): self.site = EnableSSLSiteWrapper(self.site)
# implement dict interface def __getitem__(self, key): - return self.params[key] + return self._params[key]
def __setitem__(self, key, value): - self.params[key] = value + """Set MediaWiki API request parameter. + + @param key: param key + @type key: basestring + @param value: param value + @type value: list of unicode, unicode, or str in site encoding + Any string type may use a |-separated list + """ + # Allow site encoded bytes (note: str is a subclass of bytes in py2) + if isinstance(value, bytes): + value = value.decode(self.site.encoding()) + + if isinstance(value, unicode): + value = value.split("|") + + try: + iter(value) + except TypeError: + # convert any non-iterable value into a single-element list + value = [unicode(value)] + + self._params[key] = value
def __delitem__(self, key): - del self.params[key] + del self._params[key]
def keys(self): - return list(self.params.keys()) + return list(self._params.keys())
def __contains__(self, key): - return self.params.__contains__(key) + return self._params.__contains__(key)
def __iter__(self): - return self.params.__iter__() + return self._params.__iter__()
def __len__(self): - return len(self.params) + return len(self._params)
def iteritems(self): - return iter(self.params.items()) + return iter(self._params.items())
def items(self): """Return a list of tuples containg the parameters in any order.""" - return list(self.params.items()) + return list(self._params.items())
@property def mime(self): @@ -272,62 +294,102 @@ except TypeError: self.mime_params = {} if value else None
+ @deprecated('_http_param_string') def http_params(self): """Return the parameters formatted for inclusion in an HTTP request.
- self.params MUST be either - list of unicode - unicode (may be |-separated list) - str in site encoding (may be |-separated list) + DEPRECATED. See _encoded_items for explanation of encoding used. """ - if self.mime_params and set(self.params.keys()) & set(self.mime_params.keys()): + self._add_defaults() + return self._http_param_string() + + def _add_defaults(self): + """ + Add default parameters to the API request. + + This method will only add them once. + """ + if hasattr(self, '__defaulted'): + return + + if self.mime_params and set(self._params.keys()) & set(self.mime_params.keys()): raise ValueError('The mime_params and params may not share the ' 'same keys.') - for key in self.params: - if isinstance(self.params[key], bytes): - self.params[key] = self.params[key].decode(self.site.encoding()) - if isinstance(self.params[key], basestring): - # convert a stringified sequence into a list - self.params[key] = self.params[key].split("|") - try: - iter(self.params[key]) - except TypeError: - # convert any non-iterable value into a single-element list - self.params[key] = [str(self.params[key])] - if self.params["action"] == ['query']: - meta = self.params.get("meta", []) + + if self.action == 'query': + meta = self._params.get("meta", []) if "userinfo" not in meta: meta.append("userinfo") - self.params["meta"] = meta - uiprop = self.params.get("uiprop", []) + self._params["meta"] = meta + uiprop = self._params.get("uiprop", []) uiprop = set(uiprop + ["blockinfo", "hasmsg"]) - self.params["uiprop"] = list(sorted(uiprop)) - if "properties" in self.params: - if "info" in self.params["properties"]: - inprop = self.params.get("inprop", []) + self._params["uiprop"] = list(sorted(uiprop)) + if "properties" in self._params: + if "info" in self._params["properties"]: + inprop = self._params.get("inprop", []) info = set(inprop + ["protection", "talkid", "subjectid"]) - self.params["info"] = list(info) - if "maxlag" not in self.params and config.maxlag: - self.params["maxlag"] = [str(config.maxlag)] - if "format" not in self.params: - self.params["format"] = ["json"] - if self.params['format'] != ["json"]: + self._params["info"] = list(info) + if "maxlag" not in self._params and config.maxlag: + self._params["maxlag"] = [str(config.maxlag)] + if "format" not in self._params: + self._params["format"] = ["json"] + elif self._params['format'] != ["json"]: raise TypeError("Query format '%s' cannot be parsed." - % self.params['format']) - for key in self.params: + % self._params['format']) + + self.__defaulted = True + + def _encoded_items(self): + """ + Build a dict of params with minimal encoding needed for the site. + + This helper method only prepares params for serialisation or + transmission, so it only encodes values which are not ASCII, + requiring callers to consider how to handle ASCII vs other values, + however the output is designed to enable __str__ and __repr__ to + do the right thing in most circumstances. + + Servers which use an encoding that is not a superset of ASCII + are not supported. + + @return: Parameters either in the site encoding, or ASCII strings + @rtype: dict with values of either str or bytes + """ + params = {} + for key, value in self._params.items(): + value = u"|".join(value) + # If the value is encodable as ascii, do not encode it. + # This means that any value which can be encoded as ascii + # is presumed to be ascii, and servers using a site encoding + # which is not a superset of ascii may be problematic. try: - self.params[key] = "|".join(self.params[key]) - self.params[key] = self.params[key].encode(self.site.encoding()) - except Exception: - pywikibot.error( - u"http_params: Key '%s' could not be encoded to '%s'; params=%r" - % (key, self.site.encoding(), self.params[key])) - return urlencode(self.params) + value.encode('ascii') + # In Python 2, ascii API params should be represented as 'foo' + # rather than u'foo' + if sys.version_info[0] == 2: + value = str(value) + except UnicodeError: + try: + value = value.encode(self.site.encoding()) + except Exception: + pywikibot.error( + u"_encoded_items: '%s' could not be encoded as '%s':" + u" %r" % (key, self.site.encoding(), value)) + params[key] = value + return params + + def _http_param_string(self): + """ + Return the parameters as a HTTP URL query fragment. + + URL encodes the parameters provided by _encoded_items() + """ + return urlencode(self._encoded_items())
def __str__(self): return unquote(self.site.scriptpath() + "/api.php?" - + self.http_params()) + + self._http_param_string())
def __repr__(self): return "%s.%s<%s->%r>" % (self.__class__.__module__, self.__class__.__name__, self.site, str(self)) @@ -404,26 +466,27 @@ @return: a dict containing data retrieved from api.php
""" + self._add_defaults() while True: - paramstring = self.http_params() - action = self.params.get("action", "") - simulate = self._simulate(action) + paramstring = self._http_param_string() + simulate = self._simulate(self.action) if simulate: return simulate if self.throttle: self.site.throttle(write=self.write) else: - pywikibot.log("Action '{0}' is submitted not throttled.".format(action)) + pywikibot.log( + "Submitting unthrottled action '{0}'.".format(self.action)) uri = self.site.scriptpath() + "/api.php" try: if self.mime: # construct a MIME message containing all API key/values container = MIMEMultipart(_subtype='form-data') - for key in self.params: + for key, value in self._encoded_items().items(): # key "file" requires special treatment in a multipart # message if key == "file": - local_filename = self.params[key] + local_filename = value filetype = mimetypes.guess_type(local_filename)[0] \ or 'application/octet-stream' file_content = file(local_filename, "rb").read() @@ -432,7 +495,7 @@ {'filename': local_filename}) else: submsg = Request._generate_MIME_part( - key, self.params[key], None, None) + key, value, None, None) container.attach(submsg) for key, value in self.mime_params.items(): container.attach(Request._generate_MIME_part(key, *value)) @@ -484,13 +547,14 @@ % self.site) pywikibot.debug(rawdata, _logger) # there might also be an overflow, so try a smaller limit - for param in self.params: + for param in self._params: if param.endswith("limit"): - value = self.params[param] + # param values are stored a list of str + value = self._params[param][0] try: - self.params[param] = str(int(value) // 2) + self._params[param] = [str(int(value) // 2)] pywikibot.output(u"Set %s = %s" - % (param, self.params[param])) + % (param, self._params[param])) except: pass self.wait() @@ -502,7 +566,7 @@ "Unable to process query response of type %s." % type(result), data=result) - if self['action'] == 'query': + if self.action == 'query': if 'userinfo' in result.get('query', ()): if hasattr(self.site, '_userinfo'): self.site._userinfo.update(result['query']['userinfo']) @@ -553,7 +617,7 @@
pywikibot.log(u"MediaWiki exception %s: query=\n%s" % (class_name, - pprint.pformat(self.params))) + pprint.pformat(self._params))) pywikibot.log(u" response=\n%s" % result)
raise APIMWException(class_name, info, **result["error"]) @@ -561,14 +625,14 @@ # bugs 46535, 62126, 64494, 66619 # maybe removed when it 46535 is solved if code == "failed-save" and \ - action == 'wbeditentity' and \ + self.action == 'wbeditentity' and \ self._is_wikibase_error_retryable(result["error"]): self.wait() continue # raise error try: pywikibot.log(u"API Error: query=\n%s" - % pprint.pformat(self.params)) + % pprint.pformat(self._params)) pywikibot.log(u" response=\n%s" % result)
@@ -638,7 +702,6 @@ scripts/maintenance/cache.py to support the new key and all previous keys. """ - login_status = self.site._loginstatus
if login_status > pywikibot.site.LoginStatus.NOT_LOGGED_IN and \ @@ -653,10 +716,10 @@ max(login_status, pywikibot.site.LoginStatus.NOT_LOGGED_IN)) user_key = repr(user_key)
- return repr(self.site) + user_key + repr(sorted(self.items())) + request_key = repr(sorted(list(self._encoded_items().items()))) + return repr(self.site) + user_key + request_key
def _create_file_name(self): - self.http_params() # normalize self.params return hashlib.sha256( self._uniquedescriptionstr().encode('utf-8') ).hexdigest() diff --git a/pywikibot/site.py b/pywikibot/site.py index f159130..8ba6e52 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -4337,11 +4337,12 @@ while True: f.seek(offset) chunk = f.read(chunk_size) + file_page_title = filepage.title(withNamespace=False) req = api.Request(site=self, action='upload', token=token, stash='1', offset=offset, filesize=filesize, - filename=filepage.title(withNamespace=False), + filename=file_page_title, mime_params={}, throttle=throttle) - req.mime_params['chunk'] = (chunk, None, {'filename': req.params['filename']}) + req.mime_params['chunk'] = (chunk, None, {'filename': file_page_title}) if file_key: req['filekey'] = file_key try: diff --git a/tests/api_tests.py b/tests/api_tests.py index 9639ba8..9f00a2b 100644 --- a/tests/api_tests.py +++ b/tests/api_tests.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +"""API test module.""" # # (C) Pywikibot team, 2007-2014 # @@ -14,29 +15,33 @@
class TestApiFunctions(DefaultSiteTestCase):
+ """API Request object test class.""" + cached = True
def testObjectCreation(self): - """Test that api.Request() creates an object with desired attributes""" + """Test api.Request() constructor.""" mysite = self.get_site() req = api.Request(site=mysite, action="test", foo="", bar="test") self.assertTrue(req) self.assertEqual(req.site, mysite) - self.assertIn("foo", req.params) - self.assertEqual(req["bar"], "test") + self.assertIn("foo", req._params) + self.assertEqual(req["bar"], ["test"]) # test item assignment req["one"] = "1" - self.assertEqual(req.params['one'], "1") + self.assertEqual(req._params['one'], ["1"]) # test compliance with dict interface # req.keys() should contain "action", "foo", "bar", "one" self.assertEqual(len(req.keys()), 4) - self.assertIn("test", req.values()) + self.assertIn("test", req._encoded_items().values()) for item in req.items(): self.assertEqual(len(item), 2, item)
class TestPageGenerator(TestCase):
+ """API PageGenerator object test class.""" + family = 'wikipedia' code = 'en'
diff --git a/tox.ini b/tox.ini index 59868cc..2049adb 100644 --- a/tox.ini +++ b/tox.ini @@ -54,6 +54,7 @@ ./scripts/illustrate_wikidata.py \ ./scripts/newitem.py \ ./tests/aspects.py \ + ./tests/api_tests.py \ ./tests/dry_api_tests.py \ ./tests/upload_tests.py
pywikibot-commits@lists.wikimedia.org