jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Move compression detection to tools ......................................................................
[FEAT] Move compression detection to tools
Instead of having the logic in xmlreader to determine which compression module to use it is moving the logic into tools so that other modules can benefit from it too.
Change-Id: I008754a55dd3d2e0a1cb346826981d83d79c1b61 --- M README.rst M pywikibot/tools/__init__.py M pywikibot/xmlreader.py A tests/data/xml/article-pyrus.xml.7z A tests/data/xml/article-pyrus.xml.gz A tests/data/xml/article-pyrus.xml_invalid.7z A tests/tools_tests.py M tox.ini 8 files changed, 216 insertions(+), 19 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/README.rst b/README.rst index 19e685e..e3dc096 100644 --- a/README.rst +++ b/README.rst @@ -35,6 +35,13 @@ page.text = page.text.replace('foo', 'bar') page.save('Replacing "foo" with "bar"') # Saves the page
+Required external programms +--------------------------- + +It may require the following programs to function properly: + +* `7za`: To extract 7z files + Contributing ------------
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index 53513ac..41714aa 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -8,9 +8,12 @@ from __future__ import print_function, unicode_literals __version__ = '$Id$'
+import bz2 import collections +import gzip import inspect import re +import subprocess import sys import threading import time @@ -660,6 +663,101 @@ return self.next()
+class ContextManagerWrapper(object): + + """ + Wraps an object in a context manager. + + It is redirecting all access to the wrapped object and executes 'close' when + used as a context manager in with-statements. In such statements the value + set via 'as' is directly the wrapped object. For example: + + wrapped = ContextManagerWrapper(an_object) + with wrapped as another_object: + assert(another_object is an_object) + + It does not subclass the object though, so isinstance checks will fail + outside a with-statement. + """ + + def __init__(self, wrapped): + """Create a new wrapper.""" + super(ContextManagerWrapper, self).__init__() + super(ContextManagerWrapper, self).__setattr__('_wrapped', wrapped) + + def __enter__(self): + """Enter a context manager and use the wrapped object directly.""" + return self._wrapped + + def __exit__(self, exc_type, exc_value, traceback): + """Call close on the wrapped object when exiting a context manager.""" + self._wrapped.close() + + def __getattr__(self, name): + """Get the attribute from the wrapped object.""" + return getattr(self._wrapped, name) + + def __setattr__(self, name, value): + """Set the attribute in the wrapped object.""" + setattr(self._wrapped, name, value) + + +def open_compressed(filename): + """ + Open a file and uncompress it if needed. + + This function supports bzip2, gzip and 7zip as compression containers. It + uses the packages available in the standard library for bzip2 and gzip so + they are always available. 7zip is only available when a 7za program is + available. + + The compression is selected via the file ending. + + @param filename: The filename. + @type filename: str + @raises ValueError: When 7za is not available. + @raises OSError: When it's not a 7z archive but the file extension is 7z. + It is also raised by bz2 when its content is invalid. gzip does not + immediately raise that error but only on reading it. + @return: A file like object returning the uncompressed data in binary mode. + Before Python 2.7 it's wrapping the object returned by BZ2File and gzip + in a ContextManagerWrapper so it's advantages/disadvantages apply there. + @rtype: file like object + """ + def wrap(wrapped): + """Wrap in a wrapper when this is below Python version 2.7.""" + if sys.version_info < (2, 7): + return ContextManagerWrapper(wrapped) + else: + return wrapped + + if filename.endswith('.bz2'): + return wrap(bz2.BZ2File(filename)) + elif filename.endswith('.gz'): + return wrap(gzip.open(filename)) + elif filename.endswith('.7z'): + try: + process = subprocess.Popen(['7za', 'e', '-bd', '-so', filename], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + bufsize=65535) + except OSError: + raise ValueError('7za is not installed and can not ' + 'uncompress "{0}"'.format(filename)) + else: + stderr = process.stderr.read() + process.stderr.close() + if b'Everything is Ok' not in stderr: + process.stdout.close() + # OSError is also raised when bz2 is invalid + raise OSError('Invalid 7z archive.') + else: + return process.stdout + else: + # assume it's an uncompressed XML file + return open(filename, 'rb') + + # Decorators # # Decorator functions without parameters are _invoked_ differently from diff --git a/pywikibot/xmlreader.py b/pywikibot/xmlreader.py index 2b77234..86185cf 100644 --- a/pywikibot/xmlreader.py +++ b/pywikibot/xmlreader.py @@ -23,6 +23,8 @@ from xml.etree.cElementTree import iterparse import xml.sax
+from pywikibot.tools import open_compressed +
def parseRestrictions(restrictions): """ @@ -116,23 +118,7 @@
def parse(self): """Generator using cElementTree iterparse function.""" - if self.filename.endswith('.bz2'): - import bz2 - source = bz2.BZ2File(self.filename) - elif self.filename.endswith('.gz'): - import gzip - source = gzip.open(self.filename) - elif self.filename.endswith('.7z'): - import subprocess - source = subprocess.Popen('7za e -bd -so %s 2>/dev/null' - % self.filename, - shell=True, - stdout=subprocess.PIPE, - bufsize=65535).stdout - else: - # assume it's an uncompressed XML file - source = open(self.filename, 'rb') - try: + with open_compressed(self.filename) as source: # iterparse's event must be a str but they are unicode with # unicode_literals in Python 2 context = iterparse(source, events=(str('start'), str('end'), @@ -148,8 +134,6 @@ continue for rev in self._parse(event, elem): yield rev - finally: - source.close()
def _parse_only_latest(self, event, elem): """Parser that yields only the latest revision.""" diff --git a/tests/data/xml/article-pyrus.xml.7z b/tests/data/xml/article-pyrus.xml.7z new file mode 100644 index 0000000..4e7404e --- /dev/null +++ b/tests/data/xml/article-pyrus.xml.7z Binary files differ diff --git a/tests/data/xml/article-pyrus.xml.gz b/tests/data/xml/article-pyrus.xml.gz new file mode 100644 index 0000000..825b719 --- /dev/null +++ b/tests/data/xml/article-pyrus.xml.gz Binary files differ diff --git a/tests/data/xml/article-pyrus.xml_invalid.7z b/tests/data/xml/article-pyrus.xml_invalid.7z new file mode 100644 index 0000000..7d25ee8 --- /dev/null +++ b/tests/data/xml/article-pyrus.xml_invalid.7z @@ -0,0 +1,2 @@ +This file is not a valid XML file (but that is not important) and not a valid +7z file (which is important). diff --git a/tests/tools_tests.py b/tests/tools_tests.py new file mode 100644 index 0000000..9ece411 --- /dev/null +++ b/tests/tools_tests.py @@ -0,0 +1,105 @@ +#!/usr/bin/python +"""Test tools package alone which don't fit into other tests.""" +# -*- coding: utf-8 -*- +# +# (C) Pywikibot team, 2015 +# +# Distributed under the terms of the MIT license. +from __future__ import unicode_literals + +__version__ = '$Id$' + +import os.path +import subprocess + +from pywikibot import tools + +from tests import _data_dir +from tests.aspects import unittest, TestCase + +_xml_data_dir = os.path.join(_data_dir, 'xml') + + +class ContextManagerWrapperTestCase(TestCase): + + """Test that ContextManagerWrapper is working correctly.""" + + net = False + + def test_wrapper(self): + """Create a test instance and verify the wrapper redirects.""" + class DummyClass(object): + + """A dummy class which has some values and a close method.""" + + class_var = 42 + + def __init__(self): + """Create instance with dummy values.""" + self.instance_var = 1337 + self.closed = False + + def close(self): + """Just store that it has been closed.""" + self.closed = True + + obj = DummyClass() + wrapped = tools.ContextManagerWrapper(obj) + self.assertIs(wrapped.class_var, obj.class_var) + self.assertIs(wrapped.instance_var, obj.instance_var) + self.assertIs(wrapped._wrapped, obj) + self.assertFalse(obj.closed) + with wrapped as unwrapped: + self.assertFalse(obj.closed) + self.assertIs(unwrapped, obj) + self.assertTrue(obj.closed) + + +class OpenCompressedTestCase(TestCase): + + """ + Unit test class for tools. + + The tests for open_compressed requires that article-pyrus.xml* contain all + the same content after extraction. The content itself is not important. + The file article-pyrus.xml_invalid.7z is not a valid 7z file and + open_compressed will fail extracting it using 7za. + """ + + net = False + + @classmethod + def setUpClass(cls): + """Define base_file and original_content.""" + super(OpenCompressedTestCase, cls).setUpClass() + cls.base_file = os.path.join(_xml_data_dir, 'article-pyrus.xml') + with open(cls.base_file, 'rb') as f: + cls.original_content = f.read() + + @staticmethod + def _get_content(*args): + """Use open_compressed and return content using a with-statement.""" + with tools.open_compressed(*args) as f: + return f.read() + + def test_open_compressed(self): + """Test open_compressed with all compressors in the standard library.""" + self.assertEqual(self._get_content(self.base_file), self.original_content) + self.assertEqual(self._get_content(self.base_file + '.bz2'), self.original_content) + self.assertEqual(self._get_content(self.base_file + '.gz'), self.original_content) + + def test_open_compressed_7z(self): + """Test open_compressed with 7za if installed.""" + try: + subprocess.Popen(['7za'], stdout=subprocess.PIPE).stdout.close() + except OSError: + raise unittest.SkipTest('7za not installed') + self.assertEqual(self._get_content(self.base_file + '.7z'), self.original_content) + self.assertRaises(OSError, self._get_content, self.base_file + '_invalid.7z') + + +if __name__ == '__main__': + try: + unittest.main() + except SystemExit: + pass diff --git a/tox.ini b/tox.ini index ef3fa9c..412f9a0 100644 --- a/tox.ini +++ b/tox.ini @@ -110,6 +110,7 @@ tests/pwb/ \ tests/pwb_tests.py \ tests/script_tests.py \ + tests/tools_tests.py \ tests/upload_tests.py \ tests/wikidataquery_tests.py