jenkins-bot submitted this change.

View Change


Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] return 'https' scheme with Family.protocol()

- return 'https' scheme with Family.protocol() by default
- update all related family files
- enable http.request to pass a given protocol to site.base_url()
- try to switch the scheme if a request fails and has no json result;
this ensures that we have no braking change with the new default
'https' scheme
- raise a FatalServerError to stop the loop if requests raises one of
MissingSchema, InvalidSchema, InvalidURL, InvalidHeader exception

Bug: T326046
Change-Id: I10859e11b569ee3a4441272439342792038567aa
---
M pywikibot/families/wikihow_family.py
M pywikibot/family.py
M pywikibot/comms/http.py
M pywikibot/families/wikispore_family.py
M pywikibot/families/osm_family.py
M pywikibot/families/vikidia_family.py
M tests/http_tests.py
M pywikibot/families/i18n_family.py
M pywikibot/data/api/_requests.py
9 files changed, 72 insertions(+), 45 deletions(-)

diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 60a9322..8fed6a6 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -249,7 +249,7 @@
return UserAgent().random


-def request(site,
+def request(site: 'pywikibot.site.BaseSite',
uri: Optional[str] = None,
headers: Optional[dict] = None,
**kwargs) -> requests.Response:
@@ -261,13 +261,16 @@
The optional uri is a relative uri from site base uri including the
document root '/'.

+ .. versionchanged:: 8.2
+ a *protocol* parameter can be given which is passed to the
+ :meth:`family.Family.base_url` method.
+
:param site: The Site to connect to
- :type site: pywikibot.site.BaseSite
:param uri: the URI to retrieve
- :keyword charset: Either a valid charset (usable for str.decode()) or None
- to automatically chose the charset from the returned header (defaults
- to latin-1)
- :type charset: CodecInfo, str, None
+ :keyword Optional[CodecInfo, str] charset: Either a valid charset
+ (usable for str.decode()) or None to automatically chose the
+ charset from the returned header (defaults to latin-1)
+ :keyword Optional[str] protocol: a url scheme
:return: The received data Response
"""
kwargs.setdefault('verify', site.verify_SSL_certificate())
@@ -278,7 +281,7 @@
format_string = headers.get('user-agent')
headers['user-agent'] = user_agent(site, format_string)

- baseuri = site.base_url(uri)
+ baseuri = site.base_url(uri, protocol=kwargs.pop('protocol', None))
r = fetch(baseuri, headers=headers, **kwargs)
site.throttle.retry_after = int(r.headers.get('retry-after', 0))
return r
@@ -331,6 +334,10 @@
if isinstance(response, requests.Timeout):
raise ServerError(response)

+ if isinstance(response, ValueError):
+ # MissingSchema, InvalidSchema, InvalidURL, InvalidHeader
+ raise FatalServerError(str(response))
+
if isinstance(response, Exception):
with suppress(Exception):
# request exception may contain response and request attribute
diff --git a/pywikibot/data/api/_requests.py b/pywikibot/data/api/_requests.py
index 892c5a2..25f7e57 100644
--- a/pywikibot/data/api/_requests.py
+++ b/pywikibot/data/api/_requests.py
@@ -17,7 +17,7 @@
from email.mime.nonmultipart import MIMENonMultipart
from pathlib import Path
from typing import Any, Optional, Union
-from urllib.parse import unquote, urlencode
+from urllib.parse import unquote, urlencode, urlparse
from warnings import warn

import pywikibot
@@ -674,13 +674,23 @@
paramstring) -> tuple:
"""Get or post a http request with exception handling.

+ .. versionchanged:: 8.2
+ change the scheme if the previous request didn't have json
+ content.
+
:return: a tuple containing requests.Response object from
http.request and use_get value
"""
+ kwargs = {}
+ schemes = ('http', 'https')
+ if self.json_warning and self.site.protocol() in schemes:
+ # retry with other scheme
+ kwargs['protocol'] = schemes[self.site.protocol() == 'http']
+
try:
response = http.request(self.site, uri=uri,
method='GET' if use_get else 'POST',
- data=data, headers=headers)
+ data=data, headers=headers, **kwargs)
except Server504Error:
pywikibot.log('Caught HTTP 504 error; retrying')
except Client414Error:
@@ -708,6 +718,10 @@
def _json_loads(self, response) -> Optional[dict]:
"""Return a dict from requests.Response.

+ .. versionchanged:: 8.2
+ show a warning to add a ``protocoll()`` method to the family
+ file if suitable.
+
:param response: a requests.Response object
:type response: requests.Response
:return: a data dict
@@ -753,7 +767,18 @@
self[param] = [str(int(value) // 2)]
pywikibot.info(f'Set {param} = {self[param]}')
else:
+ scheme = urlparse(response.url).scheme
+ if self.json_warning and scheme != self.site.protocol():
+ warn(f"""
+Your {self.site.family} family uses a wrong scheme {self.site.protocol()!r}
+but {scheme!r} is required. Please add the following code to your family file:
+
+ def protocol(self, code: str) -> str:
+ return '{scheme}'
+
+""", stacklevel=2)
return result or {}
+
self.wait()
return None

diff --git a/pywikibot/families/i18n_family.py b/pywikibot/families/i18n_family.py
index 67c780d..93be54e 100644
--- a/pywikibot/families/i18n_family.py
+++ b/pywikibot/families/i18n_family.py
@@ -1,6 +1,6 @@
"""Family module for Translate Wiki."""
#
-# (C) Pywikibot team, 2007-2022
+# (C) Pywikibot team, 2007-2023
#
# Distributed under the terms of the MIT license.
#
@@ -14,7 +14,3 @@

name = 'i18n'
domain = 'translatewiki.net'
-
- def protocol(self, code) -> str:
- """Return https as the protocol for this family."""
- return 'https'
diff --git a/pywikibot/families/osm_family.py b/pywikibot/families/osm_family.py
index f5c2962..81b14cc 100644
--- a/pywikibot/families/osm_family.py
+++ b/pywikibot/families/osm_family.py
@@ -1,6 +1,6 @@
"""Family module for OpenStreetMap wiki."""
#
-# (C) Pywikibot team, 2009-2022
+# (C) Pywikibot team, 2009-2023
#
# Distributed under the terms of the MIT license.
#
@@ -43,7 +43,3 @@
edit_restricted_templates = {
'en': ('In Bearbeitung',),
}
-
- def protocol(self, code) -> str:
- """Return https as the protocol for this family."""
- return 'https'
diff --git a/pywikibot/families/vikidia_family.py b/pywikibot/families/vikidia_family.py
index 2878681..6280258 100644
--- a/pywikibot/families/vikidia_family.py
+++ b/pywikibot/families/vikidia_family.py
@@ -21,7 +21,3 @@

# Sites we want to edit but not count as real languages
test_codes = ['central', 'test']
-
- def protocol(self, code) -> str:
- """Return https as the protocol for this family."""
- return 'https'
diff --git a/pywikibot/families/wikihow_family.py b/pywikibot/families/wikihow_family.py
index d49b642..2343870 100644
--- a/pywikibot/families/wikihow_family.py
+++ b/pywikibot/families/wikihow_family.py
@@ -60,7 +60,3 @@
def scriptpath(self, code) -> str:
"""Return the script path for this family."""
return ''
-
- def protocol(self, code) -> str:
- """Return 'https' as the protocol."""
- return 'https'
diff --git a/pywikibot/families/wikispore_family.py b/pywikibot/families/wikispore_family.py
index 427280e..a0deedb 100644
--- a/pywikibot/families/wikispore_family.py
+++ b/pywikibot/families/wikispore_family.py
@@ -3,7 +3,7 @@
.. versionadded:: 4.1
"""
#
-# (C) Pywikibot team, 2020-2022
+# (C) Pywikibot team, 2020-2023
#
# Distributed under the terms of the MIT license.
#
@@ -22,6 +22,3 @@
'en': 'wikispore.wmflabs.org',
'test': 'wikispore-test.wmflabs.org',
}
-
- def protocol(self, code) -> str:
- return 'https'
diff --git a/pywikibot/family.py b/pywikibot/family.py
index 1d9d0b8..ed75706 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -428,15 +428,18 @@

# Methods
def protocol(self, code: str) -> str:
- """
- The protocol to use to connect to the site.
+ """The protocol to use to connect to the site.

- May be overridden to return 'https'. Other protocols are not supported.
+ May be overridden to return 'http'. Other protocols are not
+ supported.
+
+ .. versionchanged:: 8.2
+ ``https`` is returned instead of ``http``.

:param code: language code
:return: protocol that this family uses
"""
- return 'http'
+ return 'https'

def verify_SSL_certificate(self, code: str) -> bool:
"""
@@ -828,10 +831,6 @@

return {code: cls.domain for code in codes}

- def protocol(self, code) -> str:
- """Return 'https' as the protocol."""
- return 'https'
-
def scriptpath(self, code):
"""Return the script path for this family."""
return '' if code == 'en' else ('/' + code)
@@ -980,10 +979,6 @@
"""Return Wikimedia Commons as the shared image repository."""
return ('commons', 'commons')

- def protocol(self, code) -> str:
- """Return 'https' as the protocol."""
- return 'https'
-
def eventstreams_host(self, code) -> str:
"""Return 'https://stream.wikimedia.org' as the stream hostname."""
return 'https://stream.wikimedia.org'
diff --git a/tests/http_tests.py b/tests/http_tests.py
index 73036a5..eee230b 100755
--- a/tests/http_tests.py
+++ b/tests/http_tests.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""Tests for http module."""
#
-# (C) Pywikibot team, 2014-2022
+# (C) Pywikibot team, 2014-2023
#
# Distributed under the terms of the MIT license.
#
@@ -153,7 +153,7 @@
"""Test invalid scheme."""
# A InvalidSchema is raised within requests
with self.assertRaisesRegex(
- requests.exceptions.InvalidSchema,
+ FatalServerError,
"No connection adapters were found for 'invalid://url'"):
http.fetch('invalid://url')


To view, visit change 874903. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I10859e11b569ee3a4441272439342792038567aa
Gerrit-Change-Number: 874903
Gerrit-PatchSet: 5
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged