jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/842442 )
Change subject: [IMPR] make charset parameter of http._decide_encoding optional ......................................................................
[IMPR] make charset parameter of http._decide_encoding optional
Change-Id: I2ea0f364e5811456c9fc46c37679214d2b1ff5d1 --- M pywikibot/comms/http.py M tests/http_tests.py 2 files changed, 16 insertions(+), 23 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py index 48f74c8..5c37b84 100644 --- a/pywikibot/comms/http.py +++ b/pywikibot/comms/http.py @@ -425,7 +425,9 @@ return charset
-def _get_encoding_from_response_headers(response) -> Optional[str]: +def _get_encoding_from_response_headers( + response: requests.Response +) -> Optional[str]: """Return charset given by the response header.""" content_type = response.headers.get('content-type')
@@ -452,9 +454,10 @@ return header_encoding
-def _decide_encoding(response, charset) -> Optional[str]: +def _decide_encoding(response: requests.Response, + charset: Optional[str] = None) -> Optional[str]: """Detect the response encoding.""" - def _try_decode(content, encoding): + def _try_decode(content: bytes, encoding: Optional[str]) -> Optional[str]: """Helper function to try decoding.""" if encoding is None: return None diff --git a/tests/http_tests.py b/tests/http_tests.py index f9ef04a..f19732f 100755 --- a/tests/http_tests.py +++ b/tests/http_tests.py @@ -327,42 +327,38 @@
def test_no_content_type(self): """Test decoding without content-type (and then no charset).""" - charset = None resp = CharsetTestCase._create_response( headers={}, data=CharsetTestCase.LATIN1_BYTES) - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp) self.assertEqual('latin1', resp.encoding) self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES) self.assertEqual(resp.text, CharsetTestCase.STR)
def test_no_charset(self): """Test decoding without explicit charset.""" - charset = None resp = CharsetTestCase._create_response( headers={'content-type': ''}, data=CharsetTestCase.LATIN1_BYTES) - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp) self.assertEqual('latin1', resp.encoding) self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES) self.assertEqual(resp.text, CharsetTestCase.STR)
def test_content_type_application_json_without_charset(self): """Test decoding without explicit charset but JSON content.""" - charset = None resp = CharsetTestCase._create_response( headers={'content-type': 'application/json'}, data=CharsetTestCase.UTF8_BYTES) - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp) self.assertEqual('utf-8', resp.encoding)
def test_content_type_sparql_json_without_charset(self): """Test decoding without explicit charset but JSON content.""" - charset = None resp = CharsetTestCase._create_response( headers={'content-type': 'application/sparql-results+json'}, data=CharsetTestCase.UTF8_BYTES) - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp) self.assertEqual('utf-8', resp.encoding)
def test_content_type_xml(self): @@ -381,17 +377,15 @@ ('Test xml content with latin1 encoding given in content', b"<?xml version='1.0' encoding='latin1'?>", 'latin1') ] - charset = None for msg, data, result in tests: with self.subTest(msg=msg): resp = CharsetTestCase._create_response( headers={'content-type': 'application/xml'}, data=data) - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp) self.assertEqual(resp.encoding, result)
def test_charset_not_last(self): """Test charset not last part of content-type header.""" - charset = None resp = CharsetTestCase._create_response( headers={ 'content-type': ( @@ -400,32 +394,29 @@ ) }, data=CharsetTestCase.UTF8_BYTES) - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp) self.assertEqual('utf-8', resp.encoding)
def test_server_charset(self): """Test decoding with server explicit charset.""" - charset = None resp = CharsetTestCase._create_response() - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp) self.assertEqual('utf-8', resp.encoding) self.assertEqual(resp.content, CharsetTestCase.UTF8_BYTES) self.assertEqual(resp.text, CharsetTestCase.STR)
def test_same_charset(self): """Test decoding with explicit and equal charsets.""" - charset = 'utf-8' resp = CharsetTestCase._create_response() - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp, 'utf-8') self.assertEqual('utf-8', resp.encoding) self.assertEqual(resp.content, CharsetTestCase.UTF8_BYTES) self.assertEqual(resp.text, CharsetTestCase.STR)
def test_header_charset(self): """Test decoding with different charsets and valid header charset.""" - charset = 'latin1' resp = CharsetTestCase._create_response() - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp, 'latin1') # Ignore WARNING: Encoding "latin1" requested but "utf-8" received with patch('pywikibot.warning'): self.assertEqual('utf-8', resp.encoding) @@ -434,10 +425,9 @@
def test_code_charset(self): """Test decoding with different charsets and invalid header charset.""" - charset = 'latin1' resp = CharsetTestCase._create_response( data=CharsetTestCase.LATIN1_BYTES) - resp.encoding = http._decide_encoding(resp, charset) + resp.encoding = http._decide_encoding(resp, 'latin1') # Ignore WARNING: Encoding "latin1" requested but "utf-8" received with patch('pywikibot.warning'): self.assertEqual('latin1', resp.encoding)