jenkins-bot has submitted this change and it was merged.
Change subject: Python issue #10254
......................................................................
Python issue #10254
Pywikibot had problems with NFC normalizing unicode,
resulting in Python issue #10254, which was resolved
in Python 2.7.2 and backported to Python 2.6.7.
It is unlikely that supporting Python 2.7.0 and 2.7.1 is still
necessary, so decommission support for Python 2.7.0 and 2.7.1.
Workaround the problem using unicodedata2 backport of unicodedata,
which includes Unicode 7.0 support.
If unicodedata2 isnt available on Python 2.6.6, the Link constructor
raises UnicodeError for titles containing combining characters,
which will allow normal usage for many languages, but will fail
predictably for page titles which could encounter this error.
Moves the unicode test from the version script to the test suite,
as the only supported platform with this bug is Python 2.6.6.
Bug: T102461
Change-Id: If4a8e8eed682b837dd468b02a5d98b01ebf37584
---
M .appveyor.yml
M pwb.py
M pywikibot/bot.py
M pywikibot/page.py
M pywikibot/version.py
M requirements.txt
M scripts/version.py
M setup.py
M tests/link_tests.py
A tests/python_tests.py
10 files changed, 193 insertions(+), 41 deletions(-)
Approvals:
John Vandenberg: Looks good to me, but someone else must approve
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/.appveyor.yml b/.appveyor.yml
index acd7c07..1befd5d 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -15,9 +15,9 @@
matrix:
- # Pywikibot support matrix suggests 'should run' on Python 2.6.5+
- # Test the lowest release of each major Python version.
+ # Test the lowest supported release of each major Python version.
+ # Pywikibot support matrix suggests 'should run' on Python 2.6.5+
# Only Python 2.6.6 is able to be set up on Appveyor.
#
https://github.com/ogrisel/python-appveyor-demo/issues/10
# fwiw, Redhat Enterprise Linux ships with 2.6.6.
@@ -26,11 +26,8 @@
PYTHON_VERSION: "2.6.6"
PYTHON_ARCH: "64"
- # Python 2.7.0 needs "2.7" as the version instead of "2.7.0"
- #
https://github.com/ogrisel/python-appveyor-demo/issues/9
-
- - PYTHON: "C:\\Python270-x64"
- PYTHON_VERSION: "2.7"
+ - PYTHON: "C:\\Python272-x64"
+ PYTHON_VERSION: "2.7.2"
PYTHON_ARCH: "64"
- PYTHON: "C:\\Python330-x64"
diff --git a/pwb.py b/pwb.py
index 0efba76..3266a85 100755
--- a/pwb.py
+++ b/pwb.py
@@ -29,6 +29,30 @@
from warnings import warn
+PYTHON_VERSION = sys.version_info[:3]
+PY2 = (PYTHON_VERSION[0] == 2)
+PY26 = (PYTHON_VERSION < (2, 7))
+
+versions_required_message = """
+Pywikibot not available on:
+%s
+
+Pywikibot is only supported under Python 2.6.5+, 2.7.2+ or 3.3+
+"""
+
+
+def python_is_supported():
+ """Check that Python is supported."""
+ # Any change to this must be copied to setup.py
+ return (PYTHON_VERSION >= (3, 3, 0) or
+ (PY2 and PYTHON_VERSION >= (2, 7, 2)) or
+ (PY26 and PYTHON_VERSION >= (2, 6, 5)))
+
+
+if not python_is_supported():
+ print(versions_required_message % sys.version)
+ sys.exit(1)
+
pwb = None
@@ -106,17 +130,6 @@
path = path[0].upper() + path[1:]
return path
-
-if sys.version_info[0] not in (2, 3):
- raise RuntimeError("ERROR: Pywikibot only runs under Python 2 "
- "or Python 3")
-version = tuple(sys.version_info)[:3]
-if version < (2, 6, 5):
- raise RuntimeError("ERROR: Pywikibot only runs under Python 2.6.5 "
- "or higher")
-if version >= (3, ) and version < (3, 3):
- raise RuntimeError("ERROR: Pywikibot only runs under Python 3.3 "
- "or higher")
# Establish a normalised path for the directory containing pwb.py.
# Either it is '.' if the user's current working directory is the same,
diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index a110ea1..6c334b6 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -367,7 +367,11 @@
all_modules = sys.modules.keys()
# These are the main dependencies of pywikibot.
- check_package_list = ['requests', 'mwparserfromhell']
+ check_package_list = [
+ 'requests',
+ 'mwparserfromhell',
+ 'unicodedata', 'unicodedata2', # T102461
+ ]
# report all imported packages
if config.verbose_output:
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 278ed54..479eace 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -24,7 +24,11 @@
import logging
import re
import sys
-import unicodedata
+
+try:
+ import unicodedata2 as unicodedata
+except ImportError:
+ import unicodedata
from collections import defaultdict, namedtuple
from warnings import warn
@@ -53,6 +57,7 @@
UserRightsError,
)
from pywikibot.tools import (
+ PYTHON_VERSION,
MediaWikiVersion, UnicodeMixin, ComparableMixin, DotReadableDict,
deprecated, deprecate_arg, deprecated_args, issue_deprecation_warning,
first_upper, remove_last_args, _NotImplementedWarning,
@@ -4632,6 +4637,10 @@
contain one (defaults to 0)
@type defaultNamespace: int
+ @raises UnicodeError: text could not be converted to unicode.
+ On Python 2.6.6 without unicodedata2, this could also be raised
+ if the text contains combining characters.
+ See
https://phabricator.wikimedia.org/T102461
"""
source_is_page = isinstance(source, BasePage)
@@ -4663,10 +4672,17 @@
t = html2unicode(self._text)
# Normalize unicode string to a NFC (composed) format to allow
- # proper string comparisons. According to
- #
https://svn.wikimedia.org/viewvc/mediawiki/branches/REL1_6/phase3/includes/…
- # the MediaWiki code normalizes everything to NFC, not NFKC
- # (which might result in information loss).
+ # proper string comparisons to strings output from MediaWiki API.
+ # Due to Python issue 10254, this is not possible on Python 2.6.6
+ # if the string contains combining characters. See T102461.
+ if (PYTHON_VERSION == (2, 6, 6) and
+ unicodedata.__name__ != 'unicodedata2' and
+ any(unicodedata.combining(c) for c in t)):
+ raise UnicodeError(
+ 'Link(%r, %s): combining characters detected, which are '
+ 'not supported by Pywikibot on Python 2.6.6. See '
+ 'https://phabricator.wikimedia.org/T102461'
+ % (t, self._source))
t = unicodedata.normalize('NFC', t)
# This code was adapted from Title.php : secureAndSplit()
diff --git a/pywikibot/version.py b/pywikibot/version.py
index 45cec8b..4cd65da 100644
--- a/pywikibot/version.py
+++ b/pywikibot/version.py
@@ -531,6 +531,8 @@
if '__version__' in package.__dict__:
info['ver'] = package.__version__
+ elif name.startswith('unicodedata'):
+ info['ver'] = package.unidata_version
elif name == 'mwlib': # mwlib 0.14.3 does not include a __init__.py
module = __import__(name + '._version',
fromlist=['_version'], level=0)
diff --git a/requirements.txt b/requirements.txt
index 7febc63..4410981 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,6 +34,9 @@
ipaddr>=2.1.10 ; python_version < '3'
+git+https://github.com/jayvdb/unicodedata2@issue_2#egg=unicodedata2-7.0.0 ;
python_version < '2.7'
+unicodedata2 ; python_version >= '2.7'
+
# core interwiki_graph.py:
git+https://github.com/nlhepler/pydot#egg=pydot-1.0.29
diff --git a/scripts/version.py b/scripts/version.py
index 7a90fd8..3d42a86 100755
--- a/scripts/version.py
+++ b/scripts/version.py
@@ -63,13 +63,7 @@
' Please reinstall requests!')
pywikibot.output('Python: %s' % sys.version)
- normalize_text = u'\u092e\u093e\u0930\u094d\u0915
\u091c\u093c\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917'
- if normalize_text != __import__('unicodedata').normalize(
- 'NFC', normalize_text):
- pywikibot.output(u' unicode test: triggers problem #3081100')
- else:
- pywikibot.output(u' unicode test: ok')
check_environ('PYWIKIBOT2_DIR')
check_environ('PYWIKIBOT2_DIR_PWB')
check_environ('PYWIKIBOT2_NO_USER_CONFIG')
diff --git a/setup.py b/setup.py
index 0e9dab9..01920d1 100644
--- a/setup.py
+++ b/setup.py
@@ -10,6 +10,29 @@
import os
import sys
+PYTHON_VERSION = sys.version_info[:3]
+PY2 = (PYTHON_VERSION[0] == 2)
+PY26 = (PYTHON_VERSION < (2, 7))
+
+versions_required_message = """
+Pywikibot not available on:
+%s
+
+Pywikibot is only supported under Python 2.6.5+, 2.7.2+ or 3.3+
+"""
+
+
+def python_is_supported():
+ """Check that Python is supported."""
+ # Any change to this must be copied to pwb.py
+ return (PYTHON_VERSION >= (3, 3, 0) or
+ (PY2 and PYTHON_VERSION >= (2, 7, 2)) or
+ (PY26 and PYTHON_VERSION >= (2, 6, 5)))
+
+
+if not python_is_supported():
+ raise RuntimeError(versions_required_message % sys.version)
+
test_deps = []
dependencies = ['requests']
@@ -28,9 +51,10 @@
# 0.6.1 supports socket.io 1.0, but WMF is using 0.9 (T91393 and T85716)
'rcstream': ['socketIO-client<0.6.1'],
'security': ['requests[security]'],
+ 'unicode7': ['unicodedata2'],
}
-if sys.version_info[0] == 2:
+if PY2:
# Additional core library dependencies which are only available on Python 2
extra_deps.update({
'csv': ['unicodecsv'],
@@ -69,21 +93,23 @@
]
if sys.version_info[0] == 2:
- if sys.version_info < (2, 6, 5):
- raise RuntimeError("ERROR: Pywikibot only runs under Python 2.6.5 or
higher")
- elif sys.version_info[1] == 6:
+ if PY26:
# requests security extra includes pyOpenSSL. cryptography is the
# dependency of pyOpenSSL. 0.8.2 is the newest and compatible version
# for Python 2.6, which won't raise unexpected DeprecationWarning.
extra_deps['security'].append('cryptography<=0.8.2')
# work around distutils hardcoded unittest dependency
import unittest # noqa
- if 'test' in sys.argv and sys.version_info < (2, 7):
+ if 'test' in sys.argv:
import unittest2
sys.modules['unittest'] = unittest2
script_deps['replicate_wiki.py'] = ['argparse']
dependencies.append('future') # provides collections backports
+ dependency_links.append(
+
'git+https://github.com/jayvdb/unicodedata2@issue_2#egg=unicodedata2-7.0.0')
+
+ dependencies += extra_deps['unicode7'] # T102461 workaround
# tools.ip does not have a hard dependency on an IP address module,
# as it falls back to using regexes if one is not available.
@@ -108,11 +134,6 @@
# mwlib is not available for py3
script_deps['patrol'] = ['mwlib']
-
-if sys.version_info[0] == 3:
- if sys.version_info[1] < 3:
- print("ERROR: Python 3.3 or higher is required!")
- sys.exit(1)
# Some of the ui_tests depend on accessing the console window's menu
# to set the console font and copy and paste, achieved using pywinauto
diff --git a/tests/link_tests.py b/tests/link_tests.py
index f271bb5b..34cd32c 100644
--- a/tests/link_tests.py
+++ b/tests/link_tests.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""Test Link functionality."""
#
-# (C) Pywikibot team, 2014
+# (C) Pywikibot team, 2014-2015
#
# Distributed under the terms of the MIT license.
#
@@ -10,9 +10,12 @@
__version__ = '$Id$'
import pywikibot
+
from pywikibot import config2 as config
from pywikibot.page import Link, Page
from pywikibot.exceptions import Error, InvalidTitle
+from pywikibot.tools import PYTHON_VERSION
+
from tests.aspects import (
unittest,
AlteredDefaultSiteTestCase as LinkTestCase,
@@ -154,6 +157,35 @@
l = Link('/bar', self.get_site())
self.assertEquals(l.title, '/bar')
+
+class Issue10254TestCase(DefaultDrySiteTestCase):
+
+ """Test T102461 (Python issue 10254)."""
+
+ def setUp(self):
+ """Set up test case."""
+ super(Issue10254TestCase, self).setUp()
+ self._orig_unicodedata = pywikibot.page.unicodedata
+
+ def tearDown(self):
+ """Tear down test case."""
+ pywikibot.page.unicodedata = self._orig_unicodedata
+ super(Issue10254TestCase, self).tearDown()
+
+ def test_no_change(self):
+ """Test T102461 (Python issue 10254) is not
encountered."""
+ title = 'Li̍t-sṳ́'
+ l = Link(title, self.site)
+ self.assertEqual(l.title, 'Li̍t-sṳ́')
+
+ @unittest.skipIf(PYTHON_VERSION != (2, 6, 6), 'Python 2.6.6-only test')
+ def test_py266_bug_exception(self):
+ """Test Python issue 10254 causes an exception."""
+ pywikibot.page.unicodedata = __import__('unicodedata')
+ title = 'Li̍t-sṳ́'
+ self.assertRaises(UnicodeError, Link, title, self.site)
+
+
# ---- The first set of tests are explicit links, starting with a ':'.
diff --git a/tests/python_tests.py b/tests/python_tests.py
new file mode 100755
index 0000000..218cc40
--- /dev/null
+++ b/tests/python_tests.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""Tests Python features."""
+#
+# (C) Pywikibot team, 2015
+#
+# Distributed under the terms of the MIT license.
+from __future__ import unicode_literals
+
+__version__ = '$Id$'
+
+import unicodedata
+try:
+ import unicodedata2
+except ImportError:
+ unicodedata2 = None
+
+from pywikibot.tools import PYTHON_VERSION
+
+from tests.aspects import TestCase, unittest
+from tests.utils import expected_failure_if
+
+# TODO:
+# very old
+#
http://bugs.python.org/issue2517
+#
+# unicode
+#
http://sourceforge.net/p/pywikipediabot/bugs/1246/
+#
http://bugs.python.org/issue10254
+#
+# ip
+#
http://bugs.python.org/issue22282
+#
+#
http://bugs.python.org/issue7559
+#
+# diff
+#
http://bugs.python.org/issue2142
+#
http://bugs.python.org/issue11747
+#
http://sourceforge.net/p/pywikipediabot/bugs/509/
+#
https://phabricator.wikimedia.org/T57329
+#
http://bugs.python.org/issue1528074
+#
http://bugs.python.org/issue1678345
+
+
+class PythonTestCase(TestCase):
+
+ """Test Python bugs and features."""
+
+ net = False
+
+ @expected_failure_if((2, 7, 0) <= PYTHON_VERSION < (2, 7, 2) or
+ PYTHON_VERSION == (2, 6, 6))
+ def test_issue_10254(self):
+ """Test Python issue #10254."""
+ # Python 2.6.6, 2.7.0 and 2.7.1 have a bug in this routine.
+ # See T102461 and
http://bugs.python.org/issue10254
+ text = 'Li̍t-sṳ́'
+ self.assertEqual(text, unicodedata.normalize('NFC', text))
+
+ @unittest.skipIf(not unicodedata2, 'unicodedata2 not found')
+ def test_issue_10254_unicodedata2(self):
+ text = 'Li̍t-sṳ́'
+ self.assertEqual(text, unicodedata2.normalize('NFC', text))
+
+
+if __name__ == '__main__':
+ try:
+ unittest.main()
+ except SystemExit:
+ pass
--
To view, visit
https://gerrit.wikimedia.org/r/218884
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: If4a8e8eed682b837dd468b02a5d98b01ebf37584
Gerrit-PatchSet: 16
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>