jenkins-bot has submitted this change and it was merged.
Change subject: User-agent graceful degradation ......................................................................
User-agent graceful degradation
The newly formatted user-agent string is not compliant with RFC 7231, as it puts the username at the end, when it should be at the beginning and leaves '(User:)' in the user-agent if the request was not to a site.
Using a default format string of '{script}/{version} ..' is asking for the end-user to build a uncompliant user-agent. We should give them properly formatted products and product comments.
Provide Pywikibot, httplib2 and Python release versions as products, and provide script_product and script_comments, populated with the information currently known about the script.
In the user-agent builder, include in the user-agent whichever components are available at the time of the request, allowing http.py to be used by version.py without cyclic dependency runtime exceptions.
Avoid percent-encoding or utf-8 encoding the username unless necessary.
Bug: 55016 Bug: 66102 Change-Id: Iedf28f5e0a216ba23a015924e564ae0537e64f0d --- M pywikibot/comms/http.py M pywikibot/config2.py M tests/dry_site_tests.py M tests/http_tests.py 4 files changed, 187 insertions(+), 21 deletions(-)
Approvals: John Vandenberg: Looks good to me, but someone else must approve XZise: Looks good to me, but someone else must approve Mpaa: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py index 564026a..22a430d 100644 --- a/pywikibot/comms/http.py +++ b/pywikibot/comms/http.py @@ -123,6 +123,85 @@ # export cookie_jar to global namespace pywikibot.cookie_jar = cookie_jar
+USER_AGENT_PRODUCTS = { + 'python': 'Python/' + '.'.join([str(i) for i in sys.version_info]), + 'httplib2': 'httplib2/' + httplib2.__version__, + 'pwb': 'Pywikibot/' + pywikibot.__release__, +} + + +def user_agent_username(username=None): + """ + Reduce username to a representation permitted in HTTP headers. + + To achieve that, this function: + 1) replaces spaces (' ') with '_' + 2) encodes the username as 'utf-8' and if the username is not ASCII + 3) URL encodes the username if it is not ASCII, or contains '%' + """ + if not username: + return '' + username = username.replace(' ', '_') # Avoid spaces or %20. + try: + username = username.encode('ascii') + # % is legal in the default $wgLegalTitleChars + # This is so that ops know the real pywikibot will not + # allow a useragent in the username to allow through a hand-coded + # percent-encoded value. + if '%' in username: + return quote(username) + else: + return username + except UnicodeEncodeError: + pass + username = quote(username.encode('utf-8')) + return username + + +def user_agent(site=None, format_string=None): + values = USER_AGENT_PRODUCTS.copy() + + # This is the Pywikibot revision; also map it to {version} at present. + if pywikibot.version.cache: + values['revision'] = pywikibot.version.cache['rev'] + else: + values['revision'] = '' + values['version'] = values['revision'] + + values['script'] = pywikibot.calledModuleName() + + # TODO: script_product should add the script version, if known + values['script_product'] = pywikibot.calledModuleName() + + script_comments = [] + username = '' + if site: + script_comments.append(str(site)) + + # TODO: there are several ways of identifying a user, and username + # is not the best for a HTTP header if the username isnt ASCII. + if site.username(): + username = user_agent_username(site.username()) + script_comments.append( + 'User:' + username) + + values.update({ + 'family': site.family.name if site else '', + 'code': site.code if site else '', + 'lang': site.code if site else '', # TODO: use site.lang, if known + 'site': str(site) if site else '', + 'username': username, + 'script_comments': '; '.join(script_comments) + }) + + if not format_string: + format_string = config.user_agent_format + + formatted = format_string.format(**values) + # clean up after any blank components + formatted = formatted.replace(u'()', u'').replace(u' ', u' ').strip() + return formatted +
def request(site, uri, ssl=False, *args, **kwargs): """Queue a request to be submitted to Site. @@ -151,24 +230,10 @@ baseuri = urlparse.urljoin("%s://%s" % (proto, host), uri) else: baseuri = uri - if "headers" not in kwargs: - kwargs["headers"] = {} - if site: - username = site.username() - if not username: - username = ""
- kwargs["headers"]["user-agent"] = config.USER_AGENT_FORMAT.format( - script=pywikibot.calledModuleName(), - version=pywikibot.version.getversiondict()['rev'], - username=quote(username.encode('utf-8')), - lang=site.code, - family=site.family.name) - else: - USER_AGENT_FORMAT = '{script}/{version} Pywikibot/2.0' - kwargs["headers"]["user-agent"] = USER_AGENT_FORMAT.format( - script=pywikibot.calledModuleName(), - version=pywikibot.version.getversiondict()['rev']) + format_string = kwargs.setdefault("headers", {}).get("user-agent") + kwargs["headers"]["user-agent"] = user_agent(site, format_string) + request = threadedhttp.HttpRequest(baseuri, *args, **kwargs) http_queue.put(request) while not request.lock.acquire(False): diff --git a/pywikibot/config2.py b/pywikibot/config2.py index 6ea3c45..0fe5f38 100644 --- a/pywikibot/config2.py +++ b/pywikibot/config2.py @@ -76,9 +76,7 @@ # User agent format. # For the meaning and more help in customization see: # https://www.mediawiki.org/wiki/Manual:Pywikibot/User-agent -# The default is script, revision number and user name -# For more information see https://meta.wikimedia.org/wiki/User-agent_policy -USER_AGENT_FORMAT = '{script}/{version} Pywikibot/2.0 (User:{username})' +user_agent_format = '{script_product} ({script_comments}) {pwb} ({revision}) {httplib2} {python}'
# The default interface for communicating with the site # currently the only defined interface is 'APISite', so don't change this! diff --git a/tests/dry_site_tests.py b/tests/dry_site_tests.py index ff75020..c3e12aa 100644 --- a/tests/dry_site_tests.py +++ b/tests/dry_site_tests.py @@ -9,6 +9,7 @@
import pywikibot from pywikibot.site import must_be, need_version +from pywikibot.comms.http import user_agent
from tests.utils import unittest, NoSiteTestCase, DummySiteinfo
@@ -44,6 +45,56 @@ self.assertTrue(x.logged_in(True)) self.assertFalse(x.logged_in(False))
+ def test_user_agent(self): + x = DrySite('en', 'wikipedia') + + x._userinfo = {'name': 'foo'} + x._username = ('foo', None) + + self.assertEqual('Pywikibot/' + pywikibot.__release__, + user_agent(x, format_string='{pwb}')) + + self.assertEqual(x.family.name, + user_agent(x, format_string='{family}')) + self.assertEqual(x.code, + user_agent(x, format_string='{lang}')) + self.assertEqual(x.family.name + ' ' + x.code, + user_agent(x, format_string='{family} {lang}')) + + self.assertEqual(x.username(), + user_agent(x, format_string='{username}')) + + x._userinfo = {'name': u'!'} + x._username = (u'!', None) + + self.assertEqual('!', user_agent(x, format_string='{username}')) + + x._userinfo = {'name': u'foo bar'} + x._username = (u'foo bar', None) + + self.assertEqual('foo_bar', user_agent(x, format_string='{username}')) + + old_config = '{script}/{version} Pywikibot/2.0 (User:{username})' + + pywikibot.version.getversiondict() + script_value = pywikibot.calledModuleName() + '/' + pywikibot.version.cache['rev'] + + self.assertEqual(script_value + ' Pywikibot/2.0 (User:foo_bar)', + user_agent(x, format_string=old_config)) + + x._userinfo = {'name': u'⁂'} + x._username = (u'⁂', None) + + self.assertEqual('%E2%81%82', + user_agent(x, format_string='{username}')) + + x._userinfo = {'name': u'127.0.0.1'} + x._username = (None, None) + + self.assertEqual('Foo', user_agent(x, format_string='Foo {username}')) + self.assertEqual('Foo (wikipedia:en)', + user_agent(x, format_string='Foo ({script_comments})')) +
class TestMustBe(NoSiteTestCase):
diff --git a/tests/http_tests.py b/tests/http_tests.py index e139fbb..9229cdd 100644 --- a/tests/http_tests.py +++ b/tests/http_tests.py @@ -7,8 +7,10 @@ # __version__ = '$Id$'
- +import sys +import pywikibot from pywikibot.comms import http, threadedhttp +from pywikibot import config2 as config from tests.utils import unittest, NoSiteTestCase
@@ -45,6 +47,56 @@ self.assertIn('-content-encoding', r[0]) self.assertEqual(r[0]['-content-encoding'], 'gzip')
+ def test_user_agent(self): + self.assertEqual('', http.user_agent(format_string=' ')) + self.assertEqual('', http.user_agent(format_string=' ')) + self.assertEqual('a', http.user_agent(format_string=' a ')) + + # if there is no site, these can't have a value + self.assertEqual('', http.user_agent(format_string='{username}')) + self.assertEqual('', http.user_agent(format_string='{family}')) + self.assertEqual('', http.user_agent(format_string='{lang}')) + + self.assertEqual('Pywikibot/' + pywikibot.__release__, + http.user_agent(format_string='{pwb}')) + self.assertNotIn(' ', http.user_agent(format_string=' {pwb} ')) + + self.assertIn('Pywikibot/' + pywikibot.__release__, + http.user_agent(format_string='SVN/1.7.5 {pwb}')) + + def test_user_agent_username(self): + self.assertEqual('%25', http.user_agent_username('%')) + self.assertEqual('%2525', http.user_agent_username('%25')) + self.assertEqual(';', http.user_agent_username(';')) + self.assertEqual('-', http.user_agent_username('-')) + self.assertEqual('.', http.user_agent_username('.')) + self.assertEqual("'", http.user_agent_username("'")) + self.assertEqual('foo_bar', http.user_agent_username('foo bar')) + + self.assertEqual('%E2%81%82', http.user_agent_username(u'⁂')) + + +class DefaultUserAgentTestCase(NoSiteTestCase): + + def setUp(self): + self.orig_format = config.user_agent_format + config.user_agent_format = '{script_product} ({script_comments}) {pwb} ({revision}) {httplib2} {python}' + + def tearDown(self): + config.user_agent_format = self.orig_format + + def test_default_user_agent(self): + """ Config defined format string test. """ + self.assertTrue(http.user_agent().startswith( + pywikibot.calledModuleName())) + self.assertIn('Pywikibot/' + pywikibot.__release__, http.user_agent()) + self.assertNotIn(' ', http.user_agent()) + self.assertNotIn('()', http.user_agent()) + self.assertNotIn('(;', http.user_agent()) + self.assertNotIn(';)', http.user_agent()) + self.assertIn('httplib2/', http.user_agent()) + self.assertIn('Python/' + str(sys.version_info[0]), http.user_agent()) +
if __name__ == '__main__': try:
pywikibot-commits@lists.wikimedia.org