jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/811738 )
Change subject: [bugfix] Fix cp encodings ......................................................................
[bugfix] Fix cp encodings
Bug: T312230 Change-Id: Iae0be98ebf3cbe6cee42bc979b4e8bad68474a75 --- M pywikibot/comms/http.py M tests/http_tests.py 2 files changed, 4 insertions(+), 2 deletions(-)
Approvals: Rubin: Looks good to me, but someone else must approve Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py index a828e41..c5c4b11 100644 --- a/pywikibot/comms/http.py +++ b/pywikibot/comms/http.py @@ -415,12 +415,12 @@ if re.sub(r'[ _-]', '', charset) == 'xeucjp': charset = 'euc_jp' else: - # fix cp encodings (T304830, T307760) + # fix cp encodings (T304830, T307760, T312230) # remove delimiter in front of the code number # replace win/windows with cp # remove language code in font of win/windows charset = re.sub( - r'\A(?:cp[ _-]|(?:[a-z]+[_-]?)?win(?:dows[_-]?)?)(\d{3,4})', + r'\A(?:cp[ _-]|(?:[a-z]+[_-]?)?win(?:dows)?[_-]?)(\d{3,4})', r'cp\1', charset) return charset
diff --git a/tests/http_tests.py b/tests/http_tests.py index 95c67d6..962aa97 100755 --- a/tests/http_tests.py +++ b/tests/http_tests.py @@ -481,6 +481,8 @@ self.assertEqual( http.get_charset_from_content_type('charset="cp-1251"'), 'cp1251') self.assertEqual( + http.get_charset_from_content_type('charset="win-1251"'), 'cp1251') + self.assertEqual( http.get_charset_from_content_type('charset="ru-win1251"'), 'cp1251')