jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Replace nicepath by APISite.article_path
......................................................................
[FEAT] Replace nicepath by APISite.article_path
This replaces Family.nicepath and Family.nice_get_address and uses the info
from the siteinfo. Family.nicepath should be replaced by APISite.article_path
and Family.nice_get_address is already replaced by APISite.nice_get_address.
Only since MW version 1.16.0 'articlepath' is present in the siteinfo so it
parses it from the 'base' property and caches this inside the siteinfo.
Bug: T89451
Change-Id: Ie9f8680d1cb82d3f322c7d6efb9ffba4be489f48
---
M pywikibot/family.py
M pywikibot/page.py
M pywikibot/pagegenerators.py
M pywikibot/site.py
M scripts/add_text.py
M scripts/blockpageschecker.py
6 files changed, 28 insertions(+), 11 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/family.py b/pywikibot/family.py
index a4634fc..31526ea 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -1076,6 +1076,8 @@
def apipath(self, code):
return '%s/api.php' % self.scriptpath(code)
+ # TODO: @deprecated('APISite.article_path')
+ # As soon as from_url does not need nicepath anymore
def nicepath(self, code):
return '/wiki/'
@@ -1114,6 +1116,7 @@
def get_address(self, code, title):
return '%s?title=%s&redirect=no' % (self.path(code), title)
+ @deprecated('APISite.nice_get_address(title)')
def nice_get_address(self, code, title):
return '%s%s' % (self.nicepath(code), title)
diff --git a/pywikibot/page.py b/pywikibot/page.py
index b364488..b47badd 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -300,8 +300,8 @@
def full_url(self):
"""Return the full URL."""
- return self.site.base_url(self.site.nice_get_address(self.title(
- asUrl=True)))
+ return self.site.base_url(self.site.article_path +
+ self.title(asUrl=True))
def autoFormat(self):
"""Return L{date.getAutoFormat} dictName and value, if any.
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 3dff94f..df3382c 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -2118,7 +2118,7 @@
# restrict query to local site
localQuery = '%s site:%s' % (self.query, self.site.hostname())
base = 'http://%s%s' % (self.site.hostname(),
- self.site.nice_get_address(''))
+ self.site.article_path)
for url in self.queryYahoo(localQuery):
if url[:len(base)] == base:
title = url[len(base):]
@@ -2184,7 +2184,7 @@
# restrict query to local site
localQuery = '%s site:%s' % (self.query, self.site.hostname())
base = 'http://%s%s' % (self.site.hostname(),
- self.site.nice_get_address(''))
+ self.site.article_path)
for url in self.queryGoogle(localQuery):
if url[:len(base)] == base:
title = url[len(base):]
diff --git a/pywikibot/site.py b/pywikibot/site.py
index f1901da..c4d53ce 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -1209,6 +1209,16 @@
else:
return pywikibot.tools.EMPTY_DEFAULT
+ def _post_process(self, prop, data):
+ """Do some default handling of data. Directly modifies data."""
+ # Be careful with version tests inside this here as it might need to
+ # query this method to actually get the version number
+ if prop == 'general':
+ if 'articlepath' not in data: # Introduced in 1.16.0
+ # Old version of MediaWiki, extract from base
+ path = urlparse(data['base'])[2].rsplit('/', 1)[0] + '/$1'
+ data['articlepath'] = path
+
def _get_siteinfo(self, prop, expiry):
"""
Retrieve a siteinfo property.
@@ -1286,6 +1296,7 @@
cache_time = datetime.datetime.utcnow()
for prop in props:
if prop in data['query']:
+ self._post_process(prop, data['query'][prop])
result[prop] = (data['query'][prop], cache_time)
return result
@@ -1896,6 +1907,14 @@
return set(ns for ns in self.namespaces.values() if ns.id >= 0 and
self._useroptions['searchNs{0}'.format(ns.id)] in ['1', True])
+ @property
+ def article_path(self):
+ """Get the nice article path without $1."""
+ # Assert and remove the trailing $1 and assert that it'll end in /
+ assert self.siteinfo['general']['articlepath'].endswith('/$1'), \
+ 'articlepath must end with /$1'
+ return self.siteinfo['general']['articlepath'][:-2]
+
def assert_valid_iter_params(self, msg_prefix, start, end, reverse):
"""Validate iterating API parameters."""
if reverse:
diff --git a/scripts/add_text.py b/scripts/add_text.py
index b4b6532..7c03d85 100755
--- a/scripts/add_text.py
+++ b/scripts/add_text.py
@@ -155,7 +155,7 @@
# Understand if the bot has to skip the page or not
# In this way you can use both -except and -excepturl
if regexSkipUrl is not None:
- url = site.nice_get_address(page.title(asUrl=True))
+ url = page.full_url()
result = re.findall(regexSkipUrl, site.getUrl(url))
if result != []:
pywikibot.output(
diff --git a/scripts/blockpageschecker.py b/scripts/blockpageschecker.py
index bb416c7..7a671ee 100755
--- a/scripts/blockpageschecker.py
+++ b/scripts/blockpageschecker.py
@@ -192,13 +192,8 @@
u'Do you want to open the page?',
[('with browser', 'b'), ('with gui', 'g'), ('no', 'n')], 'n',
automatic_quit=False)
- site = page.site
- url = '%s://%s%s?redirect=no' % (site.protocol(),
- site.hostname(),
- site.nice_get_address(
- page.title(asUrl=True)))
if quest == 'b':
- webbrowser.open(url)
+ webbrowser.open('%s?redirect=no' % page.full_url())
elif quest == 'g':
from pywikibot import editor as editarticle
editor = editarticle.TextEditor()
--
To view, visit https://gerrit.wikimedia.org/r/221439
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie9f8680d1cb82d3f322c7d6efb9ffba4be489f48
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: [bugfix] Workaround UnicodeDecodeError on api error
......................................................................
[bugfix] Workaround UnicodeDecodeError on api error
When an API error happens it tries to log the parameters but it fails as they
are in a dict and the str and repr of a dict use the repr of each key and
value. If the value is then a Page instance it gets in Python 2 bytes returned
encoded with the console encoding. If then the str or repr of the dict is
inserted into a unicode it must decode the bytes and uses by default ASCII
which fails if the Page's title contains non-ASCII characters.
This patch works around that by manually decoding the bytes in Python 2 using
the console encoding whenever an API error happens but does not actually fix
Page's repr method so it might still fail at other places. It assumes that the
rest of the str is either also encoded using that or is encoded using ASCII and
that the console encoding is a superset of ASCII.
Bug: T66958
Change-Id: I298b7594599dd189211a8c268c7e094d042f40e6
---
M pywikibot/data/api.py
M tests/api_tests.py
2 files changed, 15 insertions(+), 4 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index 539eed3..3e5d960 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -1809,11 +1809,17 @@
pywikibot.error("Detected MediaWiki API exception %s%s"
% (class_name,
"; retrying" if retry else "; raising"))
+ # Due to bug T66958, Page's repr may return non ASCII bytes
+ # Get as bytes in PY2 and decode with the console encoding as
+ # the rest should be ASCII anyway.
+ param_repr = str(self._params)
+ if PY2:
+ param_repr = param_repr.decode(config.console_encoding)
pywikibot.log(u"MediaWiki exception %s details:\n"
u" query=\n%s\n"
u" response=\n%s"
% (class_name,
- pprint.pformat(self._params),
+ pprint.pformat(param_repr),
result))
if retry:
@@ -1869,8 +1875,14 @@
for e in user_tokens.items())))
# raise error
try:
+ # Due to bug T66958, Page's repr may return non ASCII bytes
+ # Get as bytes in PY2 and decode with the console encoding as
+ # the rest should be ASCII anyway.
+ param_repr = str(self._params)
+ if PY2:
+ param_repr = param_repr.decode(config.console_encoding)
pywikibot.log(u"API Error: query=\n%s"
- % pprint.pformat(self._params))
+ % pprint.pformat(param_repr))
pywikibot.log(u" response=\n%s"
% result)
diff --git a/tests/api_tests.py b/tests/api_tests.py
index 4331e35..c3aa17f 100644
--- a/tests/api_tests.py
+++ b/tests/api_tests.py
@@ -29,7 +29,7 @@
DefaultSiteTestCase,
DefaultDrySiteTestCase,
)
-from tests.utils import allowed_failure, expected_failure_if, FakeLoginManager
+from tests.utils import allowed_failure, FakeLoginManager
if not PY2:
from urllib.parse import unquote_to_bytes
@@ -151,7 +151,6 @@
with PatchedRequest(self._dummy_request):
self.assertRaises(api.APIMWException, req.submit)
- @expected_failure_if(PY2)
def test_API_error_encoding_Unicode(self):
"""Test a Page instance as parameter using non-ASCII chars."""
page = pywikibot.page.Page(self.site, 'Ümlä üt')
--
To view, visit https://gerrit.wikimedia.org/r/219618
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I298b7594599dd189211a8c268c7e094d042f40e6
Gerrit-PatchSet: 7
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>