jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] color_format: Only handle unicode strings ......................................................................
[FIX] color_format: Only handle unicode strings
When using `unicode.format` it does return a `unicode` instance, but `color_format` is based on Python's `Formatter` class which doesn't apply that rule.
So it needs to workaround the issue that Python's `Formatter` will return the `bytes` representation of an object when there is no further specification (aka nothing after the colon). The default implementation will also return `bytes` when there is no literal text and it might cause `UnicodeDecodeError`s when there is literal text and the bytes representation does use non-ASCII characters.
Bug: T113411 Change-Id: I0f8e0f1ac3e64c57918d99e9716c486270a87766 --- M pywikibot/tools/formatter.py M tests/tools_formatter_tests.py 2 files changed, 69 insertions(+), 10 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/tools/formatter.py b/pywikibot/tools/formatter.py index 151e4d9..ae9c530 100644 --- a/pywikibot/tools/formatter.py +++ b/pywikibot/tools/formatter.py @@ -15,6 +15,7 @@ from string import Formatter
from pywikibot.logging import output +from pywikibot.tools import UnicodeType from pywikibot.userinterfaces.terminal_interface_base import colors
@@ -103,11 +104,41 @@ if previous_literal: yield previous_literal, None, None, None
+ def _vformat(self, *args, **kwargs): + """ + Override original `_vformat` to prevent that it changes into `bytes`. + + The original `_vformat` is returning `bytes` under certain + curcumstances. It happens when the `format_string` is empty, when there + is no literal text around it or when the field value is not a `unicode` + already. + + @rtype: unicode + """ + result = super(_ColorFormatter, self)._vformat(*args, **kwargs) + if not isinstance(result, UnicodeType): + assert result == b'' + result = '' # This is changing it into a unicode + return result + def vformat(self, format_string, args, kwargs): - """Return the normal format result but verify no colors are keywords.""" + """ + Return the normal format result but verify no colors are keywords. + + @param format_string: The format template string + @type format_string: unicode + @param args: The positional field values + @type args: sequence + @param kwargs: The named field values + @type kwargs: dict + @return: The formatted string + @rtype: unicode + """ if self.colors.intersection(kwargs): # kwargs use colors raise ValueError('Keyword argument(s) use valid color(s): ' + '", "'.join(self.colors.intersection(kwargs))) + if not isinstance(format_string, UnicodeType): + raise TypeError('expected str, got {0}'.format(type(format_string))) return super(_ColorFormatter, self).vformat(format_string, args, kwargs)
@@ -118,5 +149,10 @@
It is automatically adding \03 in front of color fields so it's unnecessary to add them manually. Any other \03 in the text is disallowed. + + @param text: The format template string + @type text: unicode + @return: The formatted string + @rtype: unicode """ return _ColorFormatter().format(text, *args, **kwargs) diff --git a/tests/tools_formatter_tests.py b/tests/tools_formatter_tests.py index a441e35..bd3a2df 100644 --- a/tests/tools_formatter_tests.py +++ b/tests/tools_formatter_tests.py @@ -10,6 +10,7 @@ __version__ = '$Id$' # from pywikibot.tools import formatter +from pywikibot.tools import UnicodeMixin
from tests.aspects import unittest, TestCase
@@ -35,20 +36,31 @@
"""Test color_format function in bot module."""
+ class DummyUnicode(UnicodeMixin): + + def __unicode__(self): + return 'ä' + net = False + + def assert_format(self, format_string, expected, *args, **kwargs): + """Assert that color_format returns the expected string and type.""" + result = formatter.color_format(format_string, *args, **kwargs) + self.assertEqual(result, expected) + self.assertIsInstance(result, type(expected))
def test_no_colors(self): """Test without colors in template string.""" - self.assertEqual(formatter.color_format('42'), '42') - self.assertEqual(formatter.color_format('{0}', 42), '42') - self.assertEqual(formatter.color_format('{ans}', ans=42), '42') + self.assert_format('', '') + self.assert_format('42', '42') + self.assert_format('{0}', '42', 42) + self.assert_format('before {0} after', 'before 42 after', 42) + self.assert_format('{ans}', '42', ans=42)
def test_colors(self): """Test with colors in template string.""" - self.assertEqual(formatter.color_format('{0}{black}', 42), - '42\03{black}') - self.assertEqual(formatter.color_format('{ans}{black}', ans=42), - '42\03{black}') + self.assert_format('{0}{black}', '42\03{black}', 42) + self.assert_format('{ans}{black}', '42\03{black}', ans=42) self.assertRaisesRegex( ValueError, r'.*conversion.*', formatter.color_format, '{0}{black!r}', 42) @@ -58,8 +70,7 @@
def test_marker(self): r"""Test that the \03 marker is only allowed in front of colors.""" - self.assertEqual(formatter.color_format('{0}\03{black}', 42), - '42\03{black}') + self.assert_format('{0}\03{black}', '42\03{black}', 42) # literal before a normal field self.assertRaisesRegex( ValueError, r'.*\03', formatter.color_format, @@ -74,6 +85,18 @@ self.assertRaises(ValueError, formatter.color_format, '{aqua}{black}', aqua=42)
+ def test_non_ascii(self): + """Test non-ASCII replacements.""" + self.assert_format('{0}', 'ä', 'ä') + self.assert_format('{black}{0}', '\03{black}ä', 'ä') + self.assert_format('{0}', 'ä', self.DummyUnicode()) + self.assert_format('{black}{0}', '\03{black}ä', self.DummyUnicode()) + + def test_bytes_format(self): + """Test that using `bytes` is not allowed.""" + self.assertRaises(TypeError, formatter.color_format, b'{0}', 'a') + self.assertRaises(TypeError, formatter.color_format, b'{black}{0}', 'a') +
if __name__ == '__main__': try: