jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] make charset parameter of http._decide_encoding optional

Change-Id: I2ea0f364e5811456c9fc46c37679214d2b1ff5d1
---
M pywikibot/comms/http.py
M tests/http_tests.py
2 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 48f74c8..5c37b84 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -425,7 +425,9 @@
return charset


-def _get_encoding_from_response_headers(response) -> Optional[str]:
+def _get_encoding_from_response_headers(
+ response: requests.Response
+) -> Optional[str]:
"""Return charset given by the response header."""
content_type = response.headers.get('content-type')

@@ -452,9 +454,10 @@
return header_encoding


-def _decide_encoding(response, charset) -> Optional[str]:
+def _decide_encoding(response: requests.Response,
+ charset: Optional[str] = None) -> Optional[str]:
"""Detect the response encoding."""
- def _try_decode(content, encoding):
+ def _try_decode(content: bytes, encoding: Optional[str]) -> Optional[str]:
"""Helper function to try decoding."""
if encoding is None:
return None
diff --git a/tests/http_tests.py b/tests/http_tests.py
index f9ef04a..f19732f 100755
--- a/tests/http_tests.py
+++ b/tests/http_tests.py
@@ -327,42 +327,38 @@

def test_no_content_type(self):
"""Test decoding without content-type (and then no charset)."""
- charset = None
resp = CharsetTestCase._create_response(
headers={},
data=CharsetTestCase.LATIN1_BYTES)
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp)
self.assertEqual('latin1', resp.encoding)
self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES)
self.assertEqual(resp.text, CharsetTestCase.STR)

def test_no_charset(self):
"""Test decoding without explicit charset."""
- charset = None
resp = CharsetTestCase._create_response(
headers={'content-type': ''},
data=CharsetTestCase.LATIN1_BYTES)
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp)
self.assertEqual('latin1', resp.encoding)
self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES)
self.assertEqual(resp.text, CharsetTestCase.STR)

def test_content_type_application_json_without_charset(self):
"""Test decoding without explicit charset but JSON content."""
- charset = None
resp = CharsetTestCase._create_response(
headers={'content-type': 'application/json'},
data=CharsetTestCase.UTF8_BYTES)
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp)
self.assertEqual('utf-8', resp.encoding)

def test_content_type_sparql_json_without_charset(self):
"""Test decoding without explicit charset but JSON content."""
- charset = None
resp = CharsetTestCase._create_response(
headers={'content-type': 'application/sparql-results+json'},
data=CharsetTestCase.UTF8_BYTES)
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp)
self.assertEqual('utf-8', resp.encoding)

def test_content_type_xml(self):
@@ -381,17 +377,15 @@
('Test xml content with latin1 encoding given in content',
b"<?xml version='1.0' encoding='latin1'?>", 'latin1')
]
- charset = None
for msg, data, result in tests:
with self.subTest(msg=msg):
resp = CharsetTestCase._create_response(
headers={'content-type': 'application/xml'}, data=data)
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp)
self.assertEqual(resp.encoding, result)

def test_charset_not_last(self):
"""Test charset not last part of content-type header."""
- charset = None
resp = CharsetTestCase._create_response(
headers={
'content-type': (
@@ -400,32 +394,29 @@
)
},
data=CharsetTestCase.UTF8_BYTES)
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp)
self.assertEqual('utf-8', resp.encoding)

def test_server_charset(self):
"""Test decoding with server explicit charset."""
- charset = None
resp = CharsetTestCase._create_response()
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp)
self.assertEqual('utf-8', resp.encoding)
self.assertEqual(resp.content, CharsetTestCase.UTF8_BYTES)
self.assertEqual(resp.text, CharsetTestCase.STR)

def test_same_charset(self):
"""Test decoding with explicit and equal charsets."""
- charset = 'utf-8'
resp = CharsetTestCase._create_response()
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp, 'utf-8')
self.assertEqual('utf-8', resp.encoding)
self.assertEqual(resp.content, CharsetTestCase.UTF8_BYTES)
self.assertEqual(resp.text, CharsetTestCase.STR)

def test_header_charset(self):
"""Test decoding with different charsets and valid header charset."""
- charset = 'latin1'
resp = CharsetTestCase._create_response()
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp, 'latin1')
# Ignore WARNING: Encoding "latin1" requested but "utf-8" received
with patch('pywikibot.warning'):
self.assertEqual('utf-8', resp.encoding)
@@ -434,10 +425,9 @@

def test_code_charset(self):
"""Test decoding with different charsets and invalid header charset."""
- charset = 'latin1'
resp = CharsetTestCase._create_response(
data=CharsetTestCase.LATIN1_BYTES)
- resp.encoding = http._decide_encoding(resp, charset)
+ resp.encoding = http._decide_encoding(resp, 'latin1')
# Ignore WARNING: Encoding "latin1" requested but "utf-8" received
with patch('pywikibot.warning'):
self.assertEqual('latin1', resp.encoding)

To view, visit change 842442. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I2ea0f364e5811456c9fc46c37679214d2b1ff5d1
Gerrit-Change-Number: 842442
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged