jenkins-bot has submitted this change and it was merged.
Change subject: Use ipaddress module if installed ......................................................................
Use ipaddress module if installed
page.py includes a regexp to check if a username is an IPv4 or IPv6 address, for User.isAnonymous().
There are other instances where pywikibot should be validating strings which may contain a IP address, such as config parsing, and a client side assert to prevent writing to the server as an anonymous user.
Python 3 includes a ipaddress module, and there are functional backports available.
Two of the IP6 tests fail with the Python 3 ipaddress module and backport, which is http://bugs.python.org/issue22282 . These are only corner cases, unlikely to be a serious problem.
If the ipaddress exists and is functional, use it, otherwise fall back to the old regex approach.
Bug: T76286 Change-Id: I438632608c07f2b7ed196e8b2a536481fcfa3e8e --- M pywikibot/page.py M pywikibot/tools/__init__.py A pywikibot/tools/ip.py M setup.py M tests/ipregex_tests.py M tox.ini 6 files changed, 226 insertions(+), 43 deletions(-)
Approvals: XZise: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py index 1741107..e7a4a91 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -41,6 +41,7 @@ import pywikibot
from pywikibot import config +from pywikibot.comms import http from pywikibot.family import Family from pywikibot.site import Namespace from pywikibot.exceptions import ( @@ -53,6 +54,8 @@ remove_last_args, _NotImplementedWarning, OrderedDict, Counter, ) +from pywikibot.tools.ip import ip_regexp # noqa & deprecated +from pywikibot.tools.ip import is_IP from pywikibot import textlib
@@ -2031,7 +2034,6 @@ same FilePage object, the page will only be downloaded once. """ if not hasattr(self, '_imagePageHtml'): - from pywikibot.comms import http path = "%s/index.php?title=%s" \ % (self.site.scriptpath(), self.title(asUrl=True)) self._imagePageHtml = http.request(self.site, path) @@ -2537,15 +2539,6 @@ return sorted(list(set(self.categories())))
-ip_regexp = re.compile(r'^(?:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?).){3}' - r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|' - r'(((?=(?=(.*?(::)))\3(?!.+\4)))\4?|[\dA-F]{1,4}:)' - r'([\dA-F]{1,4}(\4|:\b)|\2){5}' - r'(([\dA-F]{1,4}(\4|:\b|$)|\2){2}|' - r'(((2[0-4]|1\d|[1-9])?\d|25[0-5]).?\b){4}))\Z', - re.IGNORECASE) - - class User(Page):
"""A class that represents a Wiki user. @@ -2620,7 +2613,7 @@
@return: bool """ - return ip_regexp.match(self.username) is not None + return is_IP(self.username)
def getprops(self, force=False): """Return a properties about the user. diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index 2cb16ee..6bcb0c6 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -193,6 +193,54 @@ return u'{0} ({1}):'.format(message, option_msg)
+class LazyRegex(object): + + """Regex object that compiles the regex on usage.""" + + def __init__(self): + """Constructor.""" + self._raw = None + self._flags = None + self._compiled = None + super(LazyRegex, self).__init__() + + @property + def raw(self): + """Get raw property.""" + return self._raw + + @raw.setter + def raw(self, value): + """Set raw property.""" + self._raw = value + self._compiled = None + + @property + def flags(self): + """Get flags property.""" + return self._flags + + @flags.setter + def flags(self, value): + """Set flags property.""" + self._flags = value + self._compiled = None + + def __getattr__(self, attr): + """Compile the regex and delegate all attribute to the regex.""" + if self._raw: + if not self._compiled: + self._compiled = re.compile(self._raw, self._flags) + + if hasattr(self._compiled, attr): + return getattr(self._compiled, attr) + + raise AttributeError('%s: attr %s not recognised' + % (self.__class__.__name__, attr)) + else: + raise AttributeError('%s.raw not set' % self.__class__.__name__) + + class MediaWikiVersion(Version):
""" diff --git a/pywikibot/tools/ip.py b/pywikibot/tools/ip.py new file mode 100644 index 0000000..4b11c5c --- /dev/null +++ b/pywikibot/tools/ip.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +"""IP address tools module.""" +# +# (C) Pywikibot team, 2015 +# +# Distributed under the terms of the MIT license. +# +# Note that this module _must_ not import future.unicode_literals +# otherwise it will not be able to detect the defective ipaddress module. +__version__ = '$Id$' + +import re +import sys + +from warnings import warn + +from pywikibot.tools import LazyRegex + +try: + from ipaddress import ip_address + if sys.version_info[0] < 3: + # This backport fails many tests + # https://pypi.python.org/pypi/py2-ipaddress + # It accepts u'1111' as a valid IP address. + try: + ip_address(u'1111') + ip_address = None + raise ImportError('ipaddress backport is broken') + except ValueError: + pass + + # This backport only fails a few tests if given a unicode object + # https://pypi.python.org/pypi/ipaddress + # However while it rejects u'1111', it will consider '1111' valid + try: + ip_address('1111') + warn('ipaddress backport is defective; patching.', ImportWarning) + orig_ip_address = ip_address + # force all input to be a unicode object so it validates correctly + + def ip_address(IP): + """Safe ip_address.""" + return orig_ip_address(unicode(IP)) # noqa + except ValueError: + # This means ipaddress has correctly determined '1111' is invalid + pass +except ImportError as e: + warn('Importing ipaddress.ip_address failed: %s' % e, + ImportWarning) + + def ip_address(IP): + """Fake ip_address method.""" + warn('ipaddress backport not available.', DeprecationWarning) + if ip_regexp.match(IP) is None: + raise ValueError('Invalid IP address') + + # The following flag is used by the unit tests + ip_address.__fake__ = True + +# deprecated IP detector +ip_regexp = LazyRegex() +ip_regexp.flags = re.IGNORECASE +ip_regexp.raw = ( + r'^(?:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?).){3}' + r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|' + r'(((?=(?=(.*?(::)))\3(?!.+\4)))\4?|[\dA-F]{1,4}:)' + r'([\dA-F]{1,4}(\4|:\b)|\2){5}' + r'(([\dA-F]{1,4}(\4|:\b|$)|\2){2}|' + r'(((2[0-4]|1\d|[1-9])?\d|25[0-5]).?\b){4}))\Z') + + +def is_IP(IP): + """ + Verify the IP address provided is valid. + + No logging is performed. Use ip_address instead to catch errors. + + @param IP: IP address + @type IP: unicode + @rtype: bool + """ + try: + ip_address(IP) + return True + except ValueError: + return False diff --git a/setup.py b/setup.py index 7ba08ae..a3b8241 100644 --- a/setup.py +++ b/setup.py @@ -68,6 +68,13 @@ script_deps['replicate_wiki.py'] = ['argparse'] dependencies.append('future') # provides collections backports
+ # tools.ip does not depend on an ipaddress module, as it falls back to + # using regexes if not available, however the pywikibot package should use + # the functional backport of py3 ipaddress, which is: + # https://pypi.python.org/pypi/ipaddress + # Other backports are likely broken. + dependencies.append('ipaddress') + if sys.version_info[0] == 3: if sys.version_info[1] < 3: print("ERROR: Python 3.3 or higher is required!") diff --git a/tests/ipregex_tests.py b/tests/ipregex_tests.py index 75fe6a2..098fdc7 100644 --- a/tests/ipregex_tests.py +++ b/tests/ipregex_tests.py @@ -7,39 +7,46 @@ # Distributed under the terms of the MIT license. __version__ = '$Id$'
+from pywikibot.tools import ip + from tests.aspects import unittest, TestCase -from pywikibot.page import ip_regexp
-class PyWikiIpRegexCase(TestCase): +class TestIPBase(TestCase):
- """Unit test class for ip_regexp.""" + """Unit test class base for IP matching."""
net = False
def setUp(self): + """Set up test.""" self.total = 0 self.fail = 0 - super(PyWikiIpRegexCase, self).setUp() + self.errors = [] + super(TestIPBase, self).setUp()
def tearDown(self): - super(PyWikiIpRegexCase, self).tearDown() - print('%d tests done, %d failed' % (self.total, self.fail)) - if self.fail: - raise AssertionError + """Tear down test.""" + super(TestIPBase, self).tearDown() + if not self.fail: + print('%d tests done' % self.total) + else: + print('%d of %d tests failed:\n%s' + % (self.fail, self.total, '\n'.join(self.errors)))
def ipv6test(self, result, IP): + """Perform one IP test.""" self.total += 1 - failed = False try: - self.assertEqual(result, bool(ip_regexp.match(IP))) + self.assertEqual(result, self._do_ip_test(IP)) except AssertionError: self.fail += 1 - failed = True - if failed: - print('"%s" should match %s - not OK' % (IP, result)) + self.errors.append( + '"%s" match should be %s - not OK' + % (IP, result))
- def test_IP(self): + def _run_tests(self): + """Test various IP.""" # test from http://download.dartware.com/thirdparty/test-ipv6-regex.pl self.ipv6test(False, "") # empty string self.ipv6test(True, "::1") # loopback, compressed, non-routable @@ -61,9 +68,9 @@ self.ipv6test(True, "0000:0000:0000:0000:0000:0000:0000:0000") self.ipv6test(False, "02001:0000:1234:0000:0000:C1C0:ABCD:0876") # extra 0 not allowed! self.ipv6test(False, "2001:0000:1234:0000:00001:C1C0:ABCD:0876") # extra 0 not allowed! - # self.ipv6test(True, " 2001:0000:1234:0000:0000:C1C0:ABCD:0876") # leading space - # self.ipv6test(True, "2001:0000:1234:0000:0000:C1C0:ABCD:0876") # trailing space - # self.ipv6test(True, " 2001:0000:1234:0000:0000:C1C0:ABCD:0876 ") # leading and trailing space + self.ipv6test(False, " 2001:0000:1234:0000:0000:C1C0:ABCD:0876") # leading space + self.ipv6test(False, "2001:0000:1234:0000:0000:C1C0:ABCD:0876 ") # trailing space + self.ipv6test(False, " 2001:0000:1234:0000:0000:C1C0:ABCD:0876 ") # leading and trailing space self.ipv6test(False, "2001:0000:1234:0000:0000:C1C0:ABCD:0876 0") # junk after valid address self.ipv6test(False, "2001:0000:1234: 0000:0000:C1C0:ABCD:0876") # internal space
@@ -187,7 +194,6 @@ self.ipv6test(True, "::ffff:12.34.56.78") self.ipv6test(False, "::ffff:2.3.4") self.ipv6test(False, "::ffff:257.1.2.3") - # self.ipv6test(False, "1.2.3.4") # IPv4 address
self.ipv6test(False, "1.2.3.4:1111:2222:3333:4444::5555") # Aeron self.ipv6test(False, "1.2.3.4:1111:2222:3333::5555") @@ -204,20 +210,18 @@ self.ipv6test(False, "fe80:0000:0000:0000:0204:61ff:254.157.241.086") self.ipv6test(True, "::ffff:192.0.2.128") # but this is OK, since there's a single digit self.ipv6test(False, "XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:1.2.3.4") - self.ipv6test(False, "1111:2222:3333:4444:5555:6666:00.00.00.00") - self.ipv6test(False, "1111:2222:3333:4444:5555:6666:000.000.000.000") self.ipv6test(False, "1111:2222:3333:4444:5555:6666:256.256.256.256")
- # Not testing address with subnet mask - # self.ipv6test(True, "2001:0DB8:0000:CD30:0000:0000:0000:0000/60") # full, with prefix - # self.ipv6test(True, "2001:0DB8::CD30:0:0:0:0/60") # compressed, with prefix - # self.ipv6test(True, "2001:0DB8:0:CD30::/60") # compressed, with prefix #2 - # self.ipv6test(True, "::/128") # compressed, unspecified address type, non-routable - # self.ipv6test(True, "::1/128") # compressed, loopback address type, non-routable - # self.ipv6test(True, "FF00::/8") # compressed, multicast address type - # self.ipv6test(True, "FE80::/10") # compressed, link-local unicast, non-routable - # self.ipv6test(True, "FEC0::/10") # compressed, site-local unicast, deprecated - # self.ipv6test(False, "124.15.6.89/60") # standard IPv4, prefix not allowed + # Subnet mask not accepted + self.ipv6test(False, "2001:0DB8:0000:CD30:0000:0000:0000:0000/60") # full, with prefix + self.ipv6test(False, "2001:0DB8::CD30:0:0:0:0/60") # compressed, with prefix + self.ipv6test(False, "2001:0DB8:0:CD30::/60") # compressed, with prefix #2 + self.ipv6test(False, "::/128") # compressed, unspecified address type, non-routable + self.ipv6test(False, "::1/128") # compressed, loopback address type, non-routable + self.ipv6test(False, "FF00::/8") # compressed, multicast address type + self.ipv6test(False, "FE80::/10") # compressed, link-local unicast, non-routable + self.ipv6test(False, "FEC0::/10") # compressed, site-local unicast, deprecated + self.ipv6test(False, "124.15.6.89/60") # standard IPv4, prefix not allowed
self.ipv6test(True, "fe80:0000:0000:0000:0204:61ff:fe9d:f156") self.ipv6test(True, "fe80:0:0:0:204:61ff:fe9d:f156") @@ -450,7 +454,6 @@ self.ipv6test(False, "1111:2222:3333:1.2.3.4") self.ipv6test(False, "1111:2222:1.2.3.4") self.ipv6test(False, "1111:1.2.3.4") - # self.ipv6test(False, "1.2.3.4") # ipv4 address
# Missing : self.ipv6test(False, "11112222:3333:4444:5555:6666:1.2.3.4") @@ -608,13 +611,57 @@ self.ipv6test(False, "::3333:4444:5555:6666:7777:8888:") self.ipv6test(False, "::2222:3333:4444:5555:6666:7777:8888:")
- # Additional cases: http://crisp.tweakblogs.net/blog/2031/ipv6-validation-%28and-caveats%29.html + # Additional cases: + # http://crisp.tweakblogs.net/blog/2031/ipv6-validation-%28and-caveats%29.html self.ipv6test(True, "0:a:b:c:d:e:f::") self.ipv6test(True, "::0:a:b:c:d:e:f") # syntactically correct, but bad form (::0:... could be combined) self.ipv6test(True, "a:b:c:d:e:f:0::") self.ipv6test(False, "':10.0.0.1")
+class IPRegexTestCase(TestIPBase): + + """Test IP regex.""" + + def _do_ip_test(self, address): + return bool(ip.ip_regexp.match(address)) + + def test_regex(self): + """Test IP regex.""" + self._run_tests() + # The following only work with the IP regex. See below. + self.ipv6test(False, "1111:2222:3333:4444:5555:6666:00.00.00.00") + self.ipv6test(False, "1111:2222:3333:4444:5555:6666:000.000.000.000") + self.assertEqual(self.fail, 0) + + +class IPAddressModuleTestCase(TestIPBase): + + """Test ipaddress module.""" + + def _do_ip_test(self, address): + return ip.is_IP(address) + + @classmethod + def setUpClass(cls): + """Check ipaddress module is available.""" + if hasattr(ip.ip_address, '__fake__'): + raise unittest.SkipTest('module ipaddress not available') + super(IPAddressModuleTestCase, cls).setUpClass() + + def test_ipaddress_module(self): + """Test ipaddress module.""" + self._run_tests() + self.assertEqual(self.fail, 0) + + @unittest.expectedFailure + def test_ipaddress_module_failures(self): + """Test known bugs in the ipaddress module.""" + # The following fail with the ipaddress module. See T76286 + self.ipv6test(False, "1111:2222:3333:4444:5555:6666:00.00.00.00") + self.ipv6test(False, "1111:2222:3333:4444:5555:6666:000.000.000.000") + self.assertEqual(self.fail, 0) + if __name__ == "__main__": try: unittest.main() diff --git a/tox.ini b/tox.ini index fa3385e..0dfda1d 100644 --- a/tox.ini +++ b/tox.ini @@ -56,6 +56,7 @@ pywikibot/plural.py \ pywikibot/throttle.py \ pywikibot/tools/__init__.py \ + pywikibot/tools/ip.py \ pywikibot/userinterfaces/__init__.py \ pywikibot/userinterfaces/terminal_interface.py \ pywikibot/weblib.py \ @@ -104,6 +105,7 @@ tests/file_tests.py \ tests/i18n/ \ tests/l10n_tests.py \ + tests/ipregex_tests.py \ tests/pwb/ \ tests/pwb_tests.py \ tests/script_tests.py \
pywikibot-commits@lists.wikimedia.org