jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/353931 )
Change subject: djvu.py: add replace_page() and delete_page() ......................................................................
djvu.py: add replace_page() and delete_page()
Add: -. replace_page() to replace a page with a white page. -. delete_page() to remove a page.
Also added: -. __repr__() -. __str__() -. __unicode__()
Added (limited) tests.
Change-Id: I475df22d42a00dacaca5d55e05f6784f25e407fd --- M pywikibot/tools/djvu.py M tests/djvu_tests.py 2 files changed, 152 insertions(+), 14 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/pywikibot/tools/djvu.py b/pywikibot/tools/djvu.py index 2355a45..3629611 100644 --- a/pywikibot/tools/djvu.py +++ b/pywikibot/tools/djvu.py @@ -20,6 +20,7 @@ Counter, deprecated, deprecated_args, StringTypes, + UnicodeType, )
@@ -80,17 +81,41 @@ @param file: filename (including path) to djvu file @type file: string/unicode """ - file = os.path.expanduser(file) - file = os.path.abspath(file) + self._filename = file + filename = os.path.expanduser(file) + filename = os.path.abspath(file) # Check file exists and has read permissions. - with open(file): - self.file = file - self.dirname = os.path.dirname(file) + with open(filename): + self.file = filename + self.dirname = os.path.dirname(filename)
# pattern for parsing of djvudump output. self._pat_form = re.compile( r' *?FORM:DJVU *?[\d+] *?(?P<id>{[^}]*?})? *?[P(?P<n>\d+)]') self._pat_info = re.compile(r'DjVu.*?(?P<size>\d+x\d+).*?(?P<dpi>\d+) dpi') + + def __repr__(self): + """Return a more complete string representation.""" + filename = self._filename + if not isinstance(filename, str): + filename = self._filename.encode('utf-8') + return str("{0}.{1}('{2}')").format(self.__module__, + self.__class__.__name__, + filename) + + def __str__(self): + """Return a string representation.""" + filename = self._filename + if not isinstance(filename, str): + filename = self._filename.encode('utf-8') + return str("{0}('{1}')").format(self.__class__.__name__, filename) + + def __unicode__(self): + """Return a unicode representation.""" + _str = self.__str__() + if not isinstance(_str, UnicodeType): + _str = _str.decode('utf-8') + return _str
@property @deprecated('DjVuFile.file') @@ -149,7 +174,7 @@ @param force: if True, refresh the cached data @type force: bool """ - if not hasattr(self, '_page_info'): + if not hasattr(self, '_page_info') or force: self._get_page_info(force=force) return self._page_info[n]
@@ -243,6 +268,77 @@ return False return self._remove_control_chars(stdoutdata)
+ @check_page_number + def whiten_page(self, n): + """Replace page 'n' of djvu file with a blank page.""" + # tmp files for creation/insertion of a white page. + white_ppm = os.path.join(self.dirname, 'white_page.ppm') + white_djvu = os.path.join(self.dirname, 'white_page.djvu') + + n_tot = self.number_of_images() + + # Check n is in valid range and set ref_page number for final checks. + ref_page = 2 if n == 1 else n - 1 + + size, dpi = self.get_most_common_info() + + # Generate white_page. + res, data = _call_cmd(['convert', '-size', size, 'xc:white', white_ppm], + lib='ImageMagik') + if not res: + return False + + # Convert white_page to djvu. + res, data = _call_cmd(['c44', white_ppm, '-dpi', dpi]) + os.unlink(white_ppm) # rm white_page.ppm before retuning. + if not res: + return False + + # Delete page n. + # Get ref page info for later checks. + info_ref_page = self.page_info(ref_page) + res, data = _call_cmd(['djvm', '-d', self.file, n]) + if not res: + return False + + # Insert new page + res, data = _call_cmd(['djvm', '-i', self.file, white_djvu, n]) + os.unlink(white_djvu) # rm white_page.djvu before returning. + if not res: + return False + + # Check if page processing is as expected. + expected_id = '{%s}' % os.path.basename(white_djvu) + assert self.number_of_images(force=True) == n_tot + assert self.page_info(n) == (expected_id, (size, dpi)) # white page id. + assert self.page_info(ref_page) == info_ref_page # ref page info. + + return True + + @check_page_number + def delete_page(self, n): + """Delete page 'n' of djvu file .""" + n_tot = self.number_of_images() + + # Check n is in valid range and set ref_page number for final checks. + ref_page = n - 1 if n == n_tot else n + 1 + new_ref_page = n - 1 if n == n_tot else n + + # Delete page n. + # Get ref page info for later checks. + info_ref_page = self.page_info(ref_page) + res, data = _call_cmd(['djvm', '-d', self.file, n]) + if not res: + return False + + # Check if page processing is as expected. + # ref page info. + assert self.page_info(new_ref_page, force=True) == info_ref_page + if n_tot > 1: + assert self.number_of_images() == n_tot - 1 + + return True + # This is to be used only if this class is subclassed and the decorators # needs to be used by the child. check_page_number = staticmethod(check_page_number) diff --git a/tests/djvu_tests.py b/tests/djvu_tests.py index 31a6fb0..62604a3 100644 --- a/tests/djvu_tests.py +++ b/tests/djvu_tests.py @@ -10,14 +10,19 @@
from __future__ import absolute_import, unicode_literals
+import os import subprocess
from pywikibot.tools.djvu import DjVuFile +from pywikibot.tools import PY2
from tests import join_data_path, create_path_func from tests.aspects import unittest, TestCase
join_djvu_data_path = create_path_func(join_data_path, 'djvu') + +if not PY2: + unicode = str
class TestDjVuFile(TestCase): @@ -36,15 +41,42 @@ """Setup tests.""" super(TestDjVuFile, cls).setUpClass() try: - subprocess.Popen(['djvudump'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + dp = subprocess.Popen(['djvudump'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdoutdata, stderrdata = dp.communicate() except OSError: raise unittest.SkipTest('djvulibre library not installed.') + + def test_repr_method(self): + """Test __repr__() method.""" + djvu = DjVuFile(self.file_djvu) + expected = "pywikibot.tools.djvu.DjVuFile('%s')" % self.file_djvu + if PY2: + expected = expected.encode('utf-8') + self.assertEqual(repr(djvu), expected) + + def test_str_method(self): + """Test __str__() method.""" + djvu = DjVuFile(self.file_djvu) + expected = "DjVuFile('%s')" % self.file_djvu + if PY2: + expected = expected.encode('utf-8') + self.assertEqual(str(djvu), expected) + + def test_unicode_method(self): + """Test __unicode__() method.""" + djvu = DjVuFile(self.file_djvu) + expected = "DjVuFile('%s')" % self.file_djvu + if PY2: + self.assertEqual(unicode(djvu), expected) + else: + self.assertEqual(djvu.__unicode__(), expected)
def test_file_existance(self): """Test file existence checks.""" djvu = DjVuFile(self.file_djvu) - self.assertEqual(self.file_djvu, djvu.file) + self.assertEqual(os.path.abspath(self.file_djvu), djvu.file) self.assertRaises(IOError, DjVuFile, self.file_djvu_not_existing)
def test_number_of_images(self): @@ -59,7 +91,7 @@ ('{myfile.djvu}', ('1092x221', 600)))
def test_get_most_common_info(self): - """Test page number generator.""" + """Test that most common (size, dpi) are returned.""" djvu = DjVuFile(self.file_djvu) self.assertEqual(djvu.get_most_common_info(), ('1092x221', 600))
@@ -71,23 +103,33 @@ self.assertFalse(djvu.has_text())
def test_get_existing_page_number(self): - """Test if djvu file contains text.""" + """Test text is returned for existing page number.""" djvu = DjVuFile(self.file_djvu) self.assertTrue(djvu.has_text()) txt = djvu.get_page(1) self.assertEqual(txt, self.test_txt)
def test_get_not_existing_page_number(self): - """Test if djvu file contains text.""" + """Test error is raised if djvu page number is out of range.""" djvu = DjVuFile(self.file_djvu) self.assertTrue(djvu.has_text()) self.assertRaises(ValueError, djvu.get_page, 100)
def test_get_not_existing_page(self): - """Test if djvu file contains text.""" + """Test error is raised if djvu file has no text.""" djvu = DjVuFile(self.file_djvu_wo_text) self.assertFalse(djvu.has_text()) - self.assertRaises(ValueError, djvu.get_page, 100) + self.assertRaises(ValueError, djvu.get_page, 1) + + def test_whiten_not_existing_page_number(self): + """Test djvu page cannot be whitend if page number is out of range.""" + djvu = DjVuFile(self.file_djvu) + self.assertRaises(ValueError, djvu.whiten_page, 100) + + def test_delete_not_existing_page_number(self): + """Test djvu page cannot be deleted if page number is out of range.""" + djvu = DjVuFile(self.file_djvu) + self.assertRaises(ValueError, djvu.delete_page, 100)
def test_clear_cache(self): """Test if djvu file contains text."""
pywikibot-commits@lists.wikimedia.org