jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942624 )
Change subject: [IMPR] use urllib.parse.unquote() for tools.chars.url2string() function ......................................................................
[IMPR] use urllib.parse.unquote() for tools.chars.url2string() function
Simplify tools.chars.url2string() function by using urllib.parse.unquote() instead of urllib.parse.unquote_to_bytes and encoding/decoding strings for it.
Change-Id: I49bf4fec45f6f67ddab75f7248b8b1a9eadc6d8a --- M pywikibot/tools/chars.py 1 file changed, 30 insertions(+), 9 deletions(-)
Approvals: Matěj Suchánek: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/tools/chars.py b/pywikibot/tools/chars.py index c64c84d..47bfb5a 100644 --- a/pywikibot/tools/chars.py +++ b/pywikibot/tools/chars.py @@ -8,7 +8,7 @@ import sys from contextlib import suppress from typing import Union -from urllib.parse import unquote_to_bytes +from urllib.parse import unquote
from pywikibot.backports import Iterable from pywikibot.tools._unidata import _category_cf @@ -98,10 +98,22 @@ encodings: Union[str, Iterable[str]] = 'utf-8') -> str: """Convert URL-encoded text to unicode using several encoding.
- Uses the first encoding that doesn't cause an error. + Uses the first encoding that doesn't cause an error. Raises the + first exception if all encodings fails. + + For a single *encodings* string this function is equvalent to + :samp:`urllib.parse.unquote(title, encodings, errors='strict')` + + .. versionchanged:: 8.4 + Ignore *LookupError* and try other encodings. + + .. seealso:: :python:`urllib.parse.unquote + <library/urllib.parse.html#urllib.parse.unquote>`
**Example:**
+ >>> url2string('abc%20def') + 'abc def' >>> url2string('/El%20Ni%C3%B1o/') '/El Niño/' >>> url2string('/El%20Ni%C3%B1o/', 'ascii') @@ -118,19 +130,15 @@ :raise LookupError: unknown encoding """ if isinstance(encodings, str): - encodings = [encodings] + return unquote(title, encodings, errors='strict')
first_exception = None for enc in encodings: try: - t = title.encode(enc) - t = unquote_to_bytes(t) - result = t.decode(enc) - except UnicodeError as e: + return unquote(title, enc, errors='strict') + except (UnicodeError, LookupError) as e: if not first_exception: first_exception = e - else: - return result
# Couldn't convert, raise the first exception raise first_exception
pywikibot-commits@lists.wikimedia.org