jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/874903 )
Change subject: [IMPR] return 'https' scheme with Family.protocol() ......................................................................
[IMPR] return 'https' scheme with Family.protocol()
- return 'https' scheme with Family.protocol() by default - update all related family files - enable http.request to pass a given protocol to site.base_url() - try to switch the scheme if a request fails and has no json result; this ensures that we have no braking change with the new default 'https' scheme - raise a FatalServerError to stop the loop if requests raises one of MissingSchema, InvalidSchema, InvalidURL, InvalidHeader exception
Bug: T326046 Change-Id: I10859e11b569ee3a4441272439342792038567aa --- M pywikibot/families/wikihow_family.py M pywikibot/family.py M pywikibot/comms/http.py M pywikibot/families/wikispore_family.py M pywikibot/families/osm_family.py M pywikibot/families/vikidia_family.py M tests/http_tests.py M pywikibot/families/i18n_family.py M pywikibot/data/api/_requests.py 9 files changed, 72 insertions(+), 45 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py index 60a9322..8fed6a6 100644 --- a/pywikibot/comms/http.py +++ b/pywikibot/comms/http.py @@ -249,7 +249,7 @@ return UserAgent().random
-def request(site, +def request(site: 'pywikibot.site.BaseSite', uri: Optional[str] = None, headers: Optional[dict] = None, **kwargs) -> requests.Response: @@ -261,13 +261,16 @@ The optional uri is a relative uri from site base uri including the document root '/'.
+ .. versionchanged:: 8.2 + a *protocol* parameter can be given which is passed to the + :meth:`family.Family.base_url` method. + :param site: The Site to connect to - :type site: pywikibot.site.BaseSite :param uri: the URI to retrieve - :keyword charset: Either a valid charset (usable for str.decode()) or None - to automatically chose the charset from the returned header (defaults - to latin-1) - :type charset: CodecInfo, str, None + :keyword Optional[CodecInfo, str] charset: Either a valid charset + (usable for str.decode()) or None to automatically chose the + charset from the returned header (defaults to latin-1) + :keyword Optional[str] protocol: a url scheme :return: The received data Response """ kwargs.setdefault('verify', site.verify_SSL_certificate()) @@ -278,7 +281,7 @@ format_string = headers.get('user-agent') headers['user-agent'] = user_agent(site, format_string)
- baseuri = site.base_url(uri) + baseuri = site.base_url(uri, protocol=kwargs.pop('protocol', None)) r = fetch(baseuri, headers=headers, **kwargs) site.throttle.retry_after = int(r.headers.get('retry-after', 0)) return r @@ -331,6 +334,10 @@ if isinstance(response, requests.Timeout): raise ServerError(response)
+ if isinstance(response, ValueError): + # MissingSchema, InvalidSchema, InvalidURL, InvalidHeader + raise FatalServerError(str(response)) + if isinstance(response, Exception): with suppress(Exception): # request exception may contain response and request attribute diff --git a/pywikibot/data/api/_requests.py b/pywikibot/data/api/_requests.py index 892c5a2..25f7e57 100644 --- a/pywikibot/data/api/_requests.py +++ b/pywikibot/data/api/_requests.py @@ -17,7 +17,7 @@ from email.mime.nonmultipart import MIMENonMultipart from pathlib import Path from typing import Any, Optional, Union -from urllib.parse import unquote, urlencode +from urllib.parse import unquote, urlencode, urlparse from warnings import warn
import pywikibot @@ -674,13 +674,23 @@ paramstring) -> tuple: """Get or post a http request with exception handling.
+ .. versionchanged:: 8.2 + change the scheme if the previous request didn't have json + content. + :return: a tuple containing requests.Response object from http.request and use_get value """ + kwargs = {} + schemes = ('http', 'https') + if self.json_warning and self.site.protocol() in schemes: + # retry with other scheme + kwargs['protocol'] = schemes[self.site.protocol() == 'http'] + try: response = http.request(self.site, uri=uri, method='GET' if use_get else 'POST', - data=data, headers=headers) + data=data, headers=headers, **kwargs) except Server504Error: pywikibot.log('Caught HTTP 504 error; retrying') except Client414Error: @@ -708,6 +718,10 @@ def _json_loads(self, response) -> Optional[dict]: """Return a dict from requests.Response.
+ .. versionchanged:: 8.2 + show a warning to add a ``protocoll()`` method to the family + file if suitable. + :param response: a requests.Response object :type response: requests.Response :return: a data dict @@ -753,7 +767,18 @@ self[param] = [str(int(value) // 2)] pywikibot.info(f'Set {param} = {self[param]}') else: + scheme = urlparse(response.url).scheme + if self.json_warning and scheme != self.site.protocol(): + warn(f""" +Your {self.site.family} family uses a wrong scheme {self.site.protocol()!r} +but {scheme!r} is required. Please add the following code to your family file: + + def protocol(self, code: str) -> str: + return '{scheme}' + +""", stacklevel=2) return result or {} + self.wait() return None
diff --git a/pywikibot/families/i18n_family.py b/pywikibot/families/i18n_family.py index 67c780d..93be54e 100644 --- a/pywikibot/families/i18n_family.py +++ b/pywikibot/families/i18n_family.py @@ -1,6 +1,6 @@ """Family module for Translate Wiki.""" # -# (C) Pywikibot team, 2007-2022 +# (C) Pywikibot team, 2007-2023 # # Distributed under the terms of the MIT license. # @@ -14,7 +14,3 @@
name = 'i18n' domain = 'translatewiki.net' - - def protocol(self, code) -> str: - """Return https as the protocol for this family.""" - return 'https' diff --git a/pywikibot/families/osm_family.py b/pywikibot/families/osm_family.py index f5c2962..81b14cc 100644 --- a/pywikibot/families/osm_family.py +++ b/pywikibot/families/osm_family.py @@ -1,6 +1,6 @@ """Family module for OpenStreetMap wiki.""" # -# (C) Pywikibot team, 2009-2022 +# (C) Pywikibot team, 2009-2023 # # Distributed under the terms of the MIT license. # @@ -43,7 +43,3 @@ edit_restricted_templates = { 'en': ('In Bearbeitung',), } - - def protocol(self, code) -> str: - """Return https as the protocol for this family.""" - return 'https' diff --git a/pywikibot/families/vikidia_family.py b/pywikibot/families/vikidia_family.py index 2878681..6280258 100644 --- a/pywikibot/families/vikidia_family.py +++ b/pywikibot/families/vikidia_family.py @@ -21,7 +21,3 @@
# Sites we want to edit but not count as real languages test_codes = ['central', 'test'] - - def protocol(self, code) -> str: - """Return https as the protocol for this family.""" - return 'https' diff --git a/pywikibot/families/wikihow_family.py b/pywikibot/families/wikihow_family.py index d49b642..2343870 100644 --- a/pywikibot/families/wikihow_family.py +++ b/pywikibot/families/wikihow_family.py @@ -60,7 +60,3 @@ def scriptpath(self, code) -> str: """Return the script path for this family.""" return '' - - def protocol(self, code) -> str: - """Return 'https' as the protocol.""" - return 'https' diff --git a/pywikibot/families/wikispore_family.py b/pywikibot/families/wikispore_family.py index 427280e..a0deedb 100644 --- a/pywikibot/families/wikispore_family.py +++ b/pywikibot/families/wikispore_family.py @@ -3,7 +3,7 @@ .. versionadded:: 4.1 """ # -# (C) Pywikibot team, 2020-2022 +# (C) Pywikibot team, 2020-2023 # # Distributed under the terms of the MIT license. # @@ -22,6 +22,3 @@ 'en': 'wikispore.wmflabs.org', 'test': 'wikispore-test.wmflabs.org', } - - def protocol(self, code) -> str: - return 'https' diff --git a/pywikibot/family.py b/pywikibot/family.py index 1d9d0b8..ed75706 100644 --- a/pywikibot/family.py +++ b/pywikibot/family.py @@ -428,15 +428,18 @@
# Methods def protocol(self, code: str) -> str: - """ - The protocol to use to connect to the site. + """The protocol to use to connect to the site.
- May be overridden to return 'https'. Other protocols are not supported. + May be overridden to return 'http'. Other protocols are not + supported. + + .. versionchanged:: 8.2 + ``https`` is returned instead of ``http``.
:param code: language code :return: protocol that this family uses """ - return 'http' + return 'https'
def verify_SSL_certificate(self, code: str) -> bool: """ @@ -828,10 +831,6 @@
return {code: cls.domain for code in codes}
- def protocol(self, code) -> str: - """Return 'https' as the protocol.""" - return 'https' - def scriptpath(self, code): """Return the script path for this family.""" return '' if code == 'en' else ('/' + code) @@ -980,10 +979,6 @@ """Return Wikimedia Commons as the shared image repository.""" return ('commons', 'commons')
- def protocol(self, code) -> str: - """Return 'https' as the protocol.""" - return 'https' - def eventstreams_host(self, code) -> str: """Return 'https://stream.wikimedia.org' as the stream hostname.""" return 'https://stream.wikimedia.org' diff --git a/tests/http_tests.py b/tests/http_tests.py index 73036a5..eee230b 100755 --- a/tests/http_tests.py +++ b/tests/http_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for http module.""" # -# (C) Pywikibot team, 2014-2022 +# (C) Pywikibot team, 2014-2023 # # Distributed under the terms of the MIT license. # @@ -153,7 +153,7 @@ """Test invalid scheme.""" # A InvalidSchema is raised within requests with self.assertRaisesRegex( - requests.exceptions.InvalidSchema, + FatalServerError, "No connection adapters were found for 'invalid://url'"): http.fetch('invalid://url')