jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/339317 )
Change subject: tools: make general function to compute file sha ......................................................................
tools: make general function to compute file sha
It can be reused in several places: - site.upload() - Filepage.download() [if/when it will be merged]
Change-Id: I756c4d127274f7f6031920127850f30de3964597 --- M pywikibot/site.py M pywikibot/tools/__init__.py M tests/tools_tests.py 3 files changed, 89 insertions(+), 10 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/pywikibot/site.py b/pywikibot/site.py index bdf1fb4..52e6927 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -18,7 +18,6 @@ import copy import datetime import functools -import hashlib import heapq import itertools import json @@ -68,6 +67,7 @@ from pywikibot.family import WikimediaFamily from pywikibot.throttle import Throttle from pywikibot.tools import ( + compute_file_hash, itergroup, UnicodeMixin, ComparableMixin, SelfCallMixin, SelfCallString, deprecated, deprecate_arg, deprecated_args, remove_last_args, redirect_func, issue_deprecation_warning, @@ -6027,15 +6027,7 @@ # The SHA1 was also requested so calculate and compare it assert 'sha1' in stash_info, \ 'sha1 not in stash info: {0}'.format(stash_info) - sha1 = hashlib.sha1() - bytes_to_read = offset - with open(source_filename, 'rb') as f: - while bytes_to_read > 0: - read_bytes = f.read(min(bytes_to_read, 1 << 20)) - assert read_bytes # make sure we actually read bytes - bytes_to_read -= len(read_bytes) - sha1.update(read_bytes) - sha1 = sha1.hexdigest() + sha1 = compute_file_hash(source_filename, bytes_to_read=offset) if sha1 != stash_info['sha1']: raise ValueError( 'The SHA1 of {0} bytes of the stashed "{1}" is {2} ' diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index 526fd93..261f5ac 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -10,6 +10,7 @@
import collections import gzip +import hashlib import inspect import itertools import os @@ -1714,3 +1715,41 @@ # re-read and check changes if os.stat(filename).st_mode != st_mode: warn(warn_str.format(filename, st_mode - stat.S_IFREG, mode)) + + +def compute_file_hash(filename, sha='sha1', bytes_to_read=None): + """Compute file hash. + + Result is expressed as hexdigest(). + + @param filename: filename path + @type filename: basestring + + @param func: hashing function among the following in hashlib: + md5(), sha1(), sha224(), sha256(), sha384(), and sha512() + function name shall be passed as string, e.g. 'sha1'. + @type filename: basestring + + @param bytes_to_read: only the first bytes_to_read will be considered; + if file size is smaller, the whole file will be considered. + @type bytes_to_read: None or int + + """ + size = os.path.getsize(filename) + if bytes_to_read is None: + bytes_to_read = size + else: + bytes_to_read = min(bytes_to_read, size) + step = 1 << 20 + + shas = ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'] + assert sha in shas + sha = getattr(hashlib, sha)() # sha instance + + with open(filename, 'rb') as f: + while bytes_to_read > 0: + read_bytes = f.read(min(bytes_to_read, step)) + assert read_bytes # make sure we actually read bytes + bytes_to_read -= len(read_bytes) + sha.update(read_bytes) + return sha.hexdigest() diff --git a/tests/tools_tests.py b/tests/tools_tests.py index 6e97772..c450966 100644 --- a/tests/tools_tests.py +++ b/tests/tools_tests.py @@ -754,6 +754,54 @@ self.chmod.assert_called_once_with(self.file, 0o600)
+class TestFileShaCalculator(TestCase): + + """Test calculator of sha of a file.""" + + net = False + + filename = join_xml_data_path('article-pear-0.10.xml') + + def setUp(self): + """Setup tests.""" + super(TestFileShaCalculator, self).setUp() + + def test_md5_complete_calculation(self): + """"Test md5 of complete file.""" + res = tools.compute_file_hash(self.filename, sha='md5') + self.assertEqual(res, '5d7265e290e6733e1e2020630262a6f3') + + def test_md5_partial_calculation(self): + """"Test md5 of partial file (1024 bytes).""" + res = tools.compute_file_hash(self.filename, sha='md5', + bytes_to_read=1024) + self.assertEqual(res, 'edf6e1accead082b6b831a0a600704bc') + + def test_sha1_complete_calculation(self): + """"Test sha1 of complete file.""" + res = tools.compute_file_hash(self.filename, sha='sha1') + self.assertEqual(res, '1c12696e1119493a625aa818a35c41916ce32d0c') + + def test_sha1_partial_calculation(self): + """"Test sha1 of partial file (1024 bytes).""" + res = tools.compute_file_hash(self.filename, sha='sha1', + bytes_to_read=1024) + self.assertEqual(res, 'e56fa7bd5cfdf6bb7e2d8649dd9216c03e7271e6') + + def test_sha224_complete_calculation(self): + """"Test sha224 of complete file.""" + res = tools.compute_file_hash(self.filename, sha='sha224') + self.assertEqual( + res, '3d350d9d9eca074bd299cb5ffe1b325a9f589b2bcd7ba1c033ab4d33') + + def test_sha224_partial_calculation(self): + """"Test sha224 of partial file (1024 bytes).""" + res = tools.compute_file_hash(self.filename, sha='sha224', + bytes_to_read=1024) + self.assertEqual( + res, 'affa8cb79656a9b6244a079f8af91c9271e382aa9d5aa412b599e169') + + class Foo(object):
"""Test class to verify classproperty decorator."""
pywikibot-commits@lists.wikimedia.org