jenkins-bot has submitted this change and it was merged.
Change subject: Use ipaddress module if installed
......................................................................
Use ipaddress module if installed
page.py includes a regexp to check if a username is an IPv4 or IPv6
address, for User.isAnonymous().
There are other instances where pywikibot should be validating strings
which may contain a IP address, such as config parsing, and a client
side assert to prevent writing to the server as an anonymous user.
Python 3 includes a ipaddress module, and there are functional backports
available.
Two of the IP6 tests fail with the Python 3 ipaddress module and backport,
which is
http://bugs.python.org/issue22282 . These are only corner cases,
unlikely to be a serious problem.
If the ipaddress exists and is functional, use it, otherwise
fall back to the old regex approach.
Bug: T76286
Change-Id: I438632608c07f2b7ed196e8b2a536481fcfa3e8e
---
M pywikibot/page.py
M pywikibot/tools/__init__.py
A pywikibot/tools/ip.py
M setup.py
M tests/ipregex_tests.py
M tox.ini
6 files changed, 226 insertions(+), 43 deletions(-)
Approvals:
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 1741107..e7a4a91 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -41,6 +41,7 @@
import pywikibot
from pywikibot import config
+from pywikibot.comms import http
from pywikibot.family import Family
from pywikibot.site import Namespace
from pywikibot.exceptions import (
@@ -53,6 +54,8 @@
remove_last_args, _NotImplementedWarning,
OrderedDict, Counter,
)
+from pywikibot.tools.ip import ip_regexp # noqa & deprecated
+from pywikibot.tools.ip import is_IP
from pywikibot import textlib
@@ -2031,7 +2034,6 @@
same FilePage object, the page will only be downloaded once.
"""
if not hasattr(self, '_imagePageHtml'):
- from pywikibot.comms import http
path = "%s/index.php?title=%s" \
% (self.site.scriptpath(), self.title(asUrl=True))
self._imagePageHtml = http.request(self.site, path)
@@ -2537,15 +2539,6 @@
return sorted(list(set(self.categories())))
-ip_regexp = re.compile(r'^(?:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}'
- r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|'
- r'(((?=(?=(.*?(::)))\3(?!.+\4)))\4?|[\dA-F]{1,4}:)'
- r'([\dA-F]{1,4}(\4|:\b)|\2){5}'
- r'(([\dA-F]{1,4}(\4|:\b|$)|\2){2}|'
- r'(((2[0-4]|1\d|[1-9])?\d|25[0-5])\.?\b){4}))\Z',
- re.IGNORECASE)
-
-
class User(Page):
"""A class that represents a Wiki user.
@@ -2620,7 +2613,7 @@
@return: bool
"""
- return ip_regexp.match(self.username) is not None
+ return is_IP(self.username)
def getprops(self, force=False):
"""Return a properties about the user.
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 2cb16ee..6bcb0c6 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -193,6 +193,54 @@
return u'{0} ({1}):'.format(message, option_msg)
+class LazyRegex(object):
+
+ """Regex object that compiles the regex on usage."""
+
+ def __init__(self):
+ """Constructor."""
+ self._raw = None
+ self._flags = None
+ self._compiled = None
+ super(LazyRegex, self).__init__()
+
+ @property
+ def raw(self):
+ """Get raw property."""
+ return self._raw
+
+ @raw.setter
+ def raw(self, value):
+ """Set raw property."""
+ self._raw = value
+ self._compiled = None
+
+ @property
+ def flags(self):
+ """Get flags property."""
+ return self._flags
+
+ @flags.setter
+ def flags(self, value):
+ """Set flags property."""
+ self._flags = value
+ self._compiled = None
+
+ def __getattr__(self, attr):
+ """Compile the regex and delegate all attribute to the
regex."""
+ if self._raw:
+ if not self._compiled:
+ self._compiled = re.compile(self._raw, self._flags)
+
+ if hasattr(self._compiled, attr):
+ return getattr(self._compiled, attr)
+
+ raise AttributeError('%s: attr %s not recognised'
+ % (self.__class__.__name__, attr))
+ else:
+ raise AttributeError('%s.raw not set' % self.__class__.__name__)
+
+
class MediaWikiVersion(Version):
"""
diff --git a/pywikibot/tools/ip.py b/pywikibot/tools/ip.py
new file mode 100644
index 0000000..4b11c5c
--- /dev/null
+++ b/pywikibot/tools/ip.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+"""IP address tools module."""
+#
+# (C) Pywikibot team, 2015
+#
+# Distributed under the terms of the MIT license.
+#
+# Note that this module _must_ not import future.unicode_literals
+# otherwise it will not be able to detect the defective ipaddress module.
+__version__ = '$Id$'
+
+import re
+import sys
+
+from warnings import warn
+
+from pywikibot.tools import LazyRegex
+
+try:
+ from ipaddress import ip_address
+ if sys.version_info[0] < 3:
+ # This backport fails many tests
+ #
https://pypi.python.org/pypi/py2-ipaddress
+ # It accepts u'1111' as a valid IP address.
+ try:
+ ip_address(u'1111')
+ ip_address = None
+ raise ImportError('ipaddress backport is broken')
+ except ValueError:
+ pass
+
+ # This backport only fails a few tests if given a unicode object
+ #
https://pypi.python.org/pypi/ipaddress
+ # However while it rejects u'1111', it will consider '1111'
valid
+ try:
+ ip_address('1111')
+ warn('ipaddress backport is defective; patching.', ImportWarning)
+ orig_ip_address = ip_address
+ # force all input to be a unicode object so it validates correctly
+
+ def ip_address(IP):
+ """Safe ip_address."""
+ return orig_ip_address(unicode(IP)) # noqa
+ except ValueError:
+ # This means ipaddress has correctly determined '1111' is invalid
+ pass
+except ImportError as e:
+ warn('Importing ipaddress.ip_address failed: %s' % e,
+ ImportWarning)
+
+ def ip_address(IP):
+ """Fake ip_address method."""
+ warn('ipaddress backport not available.', DeprecationWarning)
+ if ip_regexp.match(IP) is None:
+ raise ValueError('Invalid IP address')
+
+ # The following flag is used by the unit tests
+ ip_address.__fake__ = True
+
+# deprecated IP detector
+ip_regexp = LazyRegex()
+ip_regexp.flags = re.IGNORECASE
+ip_regexp.raw = (
+ r'^(?:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}'
+ r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|'
+ r'(((?=(?=(.*?(::)))\3(?!.+\4)))\4?|[\dA-F]{1,4}:)'
+ r'([\dA-F]{1,4}(\4|:\b)|\2){5}'
+ r'(([\dA-F]{1,4}(\4|:\b|$)|\2){2}|'
+ r'(((2[0-4]|1\d|[1-9])?\d|25[0-5])\.?\b){4}))\Z')
+
+
+def is_IP(IP):
+ """
+ Verify the IP address provided is valid.
+
+ No logging is performed. Use ip_address instead to catch errors.
+
+ @param IP: IP address
+ @type IP: unicode
+ @rtype: bool
+ """
+ try:
+ ip_address(IP)
+ return True
+ except ValueError:
+ return False
diff --git a/setup.py b/setup.py
index 7ba08ae..a3b8241 100644
--- a/setup.py
+++ b/setup.py
@@ -68,6 +68,13 @@
script_deps['replicate_wiki.py'] = ['argparse']
dependencies.append('future') # provides collections backports
+ # tools.ip does not depend on an ipaddress module, as it falls back to
+ # using regexes if not available, however the pywikibot package should use
+ # the functional backport of py3 ipaddress, which is:
+ #
https://pypi.python.org/pypi/ipaddress
+ # Other backports are likely broken.
+ dependencies.append('ipaddress')
+
if sys.version_info[0] == 3:
if sys.version_info[1] < 3:
print("ERROR: Python 3.3 or higher is required!")
diff --git a/tests/ipregex_tests.py b/tests/ipregex_tests.py
index 75fe6a2..098fdc7 100644
--- a/tests/ipregex_tests.py
+++ b/tests/ipregex_tests.py
@@ -7,39 +7,46 @@
# Distributed under the terms of the MIT license.
__version__ = '$Id$'
+from pywikibot.tools import ip
+
from tests.aspects import unittest, TestCase
-from pywikibot.page import ip_regexp
-class PyWikiIpRegexCase(TestCase):
+class TestIPBase(TestCase):
- """Unit test class for ip_regexp."""
+ """Unit test class base for IP matching."""
net = False
def setUp(self):
+ """Set up test."""
self.total = 0
self.fail = 0
- super(PyWikiIpRegexCase, self).setUp()
+ self.errors = []
+ super(TestIPBase, self).setUp()
def tearDown(self):
- super(PyWikiIpRegexCase, self).tearDown()
- print('%d tests done, %d failed' % (self.total, self.fail))
- if self.fail:
- raise AssertionError
+ """Tear down test."""
+ super(TestIPBase, self).tearDown()
+ if not self.fail:
+ print('%d tests done' % self.total)
+ else:
+ print('%d of %d tests failed:\n%s'
+ % (self.fail, self.total, '\n'.join(self.errors)))
def ipv6test(self, result, IP):
+ """Perform one IP test."""
self.total += 1
- failed = False
try:
- self.assertEqual(result, bool(ip_regexp.match(IP)))
+ self.assertEqual(result, self._do_ip_test(IP))
except AssertionError:
self.fail += 1
- failed = True
- if failed:
- print('"%s" should match %s - not OK' % (IP, result))
+ self.errors.append(
+ '"%s" match should be %s - not OK'
+ % (IP, result))
- def test_IP(self):
+ def _run_tests(self):
+ """Test various IP."""
# test from
http://download.dartware.com/thirdparty/test-ipv6-regex.pl
self.ipv6test(False, "") # empty string
self.ipv6test(True, "::1") # loopback, compressed, non-routable
@@ -61,9 +68,9 @@
self.ipv6test(True, "0000:0000:0000:0000:0000:0000:0000:0000")
self.ipv6test(False, "02001:0000:1234:0000:0000:C1C0:ABCD:0876") #
extra 0 not allowed!
self.ipv6test(False, "2001:0000:1234:0000:00001:C1C0:ABCD:0876") #
extra 0 not allowed!
- # self.ipv6test(True, " 2001:0000:1234:0000:0000:C1C0:ABCD:0876") #
leading space
- # self.ipv6test(True, "2001:0000:1234:0000:0000:C1C0:ABCD:0876") #
trailing space
- # self.ipv6test(True, " 2001:0000:1234:0000:0000:C1C0:ABCD:0876 ") #
leading and trailing space
+ self.ipv6test(False, " 2001:0000:1234:0000:0000:C1C0:ABCD:0876") #
leading space
+ self.ipv6test(False, "2001:0000:1234:0000:0000:C1C0:ABCD:0876 ") #
trailing space
+ self.ipv6test(False, " 2001:0000:1234:0000:0000:C1C0:ABCD:0876 ") #
leading and trailing space
self.ipv6test(False, "2001:0000:1234:0000:0000:C1C0:ABCD:0876 0") #
junk after valid address
self.ipv6test(False, "2001:0000:1234: 0000:0000:C1C0:ABCD:0876") #
internal space
@@ -187,7 +194,6 @@
self.ipv6test(True, "::ffff:12.34.56.78")
self.ipv6test(False, "::ffff:2.3.4")
self.ipv6test(False, "::ffff:257.1.2.3")
- # self.ipv6test(False, "1.2.3.4") # IPv4 address
self.ipv6test(False, "1.2.3.4:1111:2222:3333:4444::5555") # Aeron
self.ipv6test(False, "1.2.3.4:1111:2222:3333::5555")
@@ -204,20 +210,18 @@
self.ipv6test(False, "fe80:0000:0000:0000:0204:61ff:254.157.241.086")
self.ipv6test(True, "::ffff:192.0.2.128") # but this is OK, since
there's a single digit
self.ipv6test(False, "XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:1.2.3.4")
- self.ipv6test(False, "1111:2222:3333:4444:5555:6666:00.00.00.00")
- self.ipv6test(False, "1111:2222:3333:4444:5555:6666:000.000.000.000")
self.ipv6test(False, "1111:2222:3333:4444:5555:6666:256.256.256.256")
- # Not testing address with subnet mask
- # self.ipv6test(True, "2001:0DB8:0000:CD30:0000:0000:0000:0000/60") #
full, with prefix
- # self.ipv6test(True, "2001:0DB8::CD30:0:0:0:0/60") # compressed, with
prefix
- # self.ipv6test(True, "2001:0DB8:0:CD30::/60") # compressed, with
prefix #2
- # self.ipv6test(True, "::/128") # compressed, unspecified address
type, non-routable
- # self.ipv6test(True, "::1/128") # compressed, loopback address type,
non-routable
- # self.ipv6test(True, "FF00::/8") # compressed, multicast address
type
- # self.ipv6test(True, "FE80::/10") # compressed, link-local unicast,
non-routable
- # self.ipv6test(True, "FEC0::/10") # compressed, site-local unicast,
deprecated
- # self.ipv6test(False, "124.15.6.89/60") # standard IPv4, prefix not
allowed
+ # Subnet mask not accepted
+ self.ipv6test(False, "2001:0DB8:0000:CD30:0000:0000:0000:0000/60") #
full, with prefix
+ self.ipv6test(False, "2001:0DB8::CD30:0:0:0:0/60") # compressed, with
prefix
+ self.ipv6test(False, "2001:0DB8:0:CD30::/60") # compressed, with
prefix #2
+ self.ipv6test(False, "::/128") # compressed, unspecified address type,
non-routable
+ self.ipv6test(False, "::1/128") # compressed, loopback address type,
non-routable
+ self.ipv6test(False, "FF00::/8") # compressed, multicast address type
+ self.ipv6test(False, "FE80::/10") # compressed, link-local unicast,
non-routable
+ self.ipv6test(False, "FEC0::/10") # compressed, site-local unicast,
deprecated
+ self.ipv6test(False, "124.15.6.89/60") # standard IPv4, prefix not
allowed
self.ipv6test(True, "fe80:0000:0000:0000:0204:61ff:fe9d:f156")
self.ipv6test(True, "fe80:0:0:0:204:61ff:fe9d:f156")
@@ -450,7 +454,6 @@
self.ipv6test(False, "1111:2222:3333:1.2.3.4")
self.ipv6test(False, "1111:2222:1.2.3.4")
self.ipv6test(False, "1111:1.2.3.4")
- # self.ipv6test(False, "1.2.3.4") # ipv4 address
# Missing :
self.ipv6test(False, "11112222:3333:4444:5555:6666:1.2.3.4")
@@ -608,13 +611,57 @@
self.ipv6test(False, "::3333:4444:5555:6666:7777:8888:")
self.ipv6test(False, "::2222:3333:4444:5555:6666:7777:8888:")
- # Additional cases:
http://crisp.tweakblogs.net/blog/2031/ipv6-validation-%28and-caveats%29.html
+ # Additional cases:
+ #
http://crisp.tweakblogs.net/blog/2031/ipv6-validation-%28and-caveats%29.html
self.ipv6test(True, "0:a:b:c:d:e:f::")
self.ipv6test(True, "::0:a:b:c:d:e:f") # syntactically correct, but
bad form (::0:... could be combined)
self.ipv6test(True, "a:b:c:d:e:f:0::")
self.ipv6test(False, "':10.0.0.1")
+class IPRegexTestCase(TestIPBase):
+
+ """Test IP regex."""
+
+ def _do_ip_test(self, address):
+ return bool(ip.ip_regexp.match(address))
+
+ def test_regex(self):
+ """Test IP regex."""
+ self._run_tests()
+ # The following only work with the IP regex. See below.
+ self.ipv6test(False, "1111:2222:3333:4444:5555:6666:00.00.00.00")
+ self.ipv6test(False, "1111:2222:3333:4444:5555:6666:000.000.000.000")
+ self.assertEqual(self.fail, 0)
+
+
+class IPAddressModuleTestCase(TestIPBase):
+
+ """Test ipaddress module."""
+
+ def _do_ip_test(self, address):
+ return ip.is_IP(address)
+
+ @classmethod
+ def setUpClass(cls):
+ """Check ipaddress module is available."""
+ if hasattr(ip.ip_address, '__fake__'):
+ raise unittest.SkipTest('module ipaddress not available')
+ super(IPAddressModuleTestCase, cls).setUpClass()
+
+ def test_ipaddress_module(self):
+ """Test ipaddress module."""
+ self._run_tests()
+ self.assertEqual(self.fail, 0)
+
+ @unittest.expectedFailure
+ def test_ipaddress_module_failures(self):
+ """Test known bugs in the ipaddress module."""
+ # The following fail with the ipaddress module. See T76286
+ self.ipv6test(False, "1111:2222:3333:4444:5555:6666:00.00.00.00")
+ self.ipv6test(False, "1111:2222:3333:4444:5555:6666:000.000.000.000")
+ self.assertEqual(self.fail, 0)
+
if __name__ == "__main__":
try:
unittest.main()
diff --git a/tox.ini b/tox.ini
index fa3385e..0dfda1d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -56,6 +56,7 @@
pywikibot/plural.py \
pywikibot/throttle.py \
pywikibot/tools/__init__.py \
+ pywikibot/tools/ip.py \
pywikibot/userinterfaces/__init__.py \
pywikibot/userinterfaces/terminal_interface.py \
pywikibot/weblib.py \
@@ -104,6 +105,7 @@
tests/file_tests.py \
tests/i18n/ \
tests/l10n_tests.py \
+ tests/ipregex_tests.py \
tests/pwb/ \
tests/pwb_tests.py \
tests/script_tests.py \
--
To view, visit
https://gerrit.wikimedia.org/r/155698
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I438632608c07f2b7ed196e8b2a536481fcfa3e8e
Gerrit-PatchSet: 17
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>