incremental block hasher (for zfs-verify)
This commit is contained in:
39
zfs_autobackup/util.py
Normal file
39
zfs_autobackup/util.py
Normal file
@ -0,0 +1,39 @@
|
||||
import hashlib
|
||||
|
||||
# root@psyt14s:/home/psy/zfs_autobackup# ls -lh /home/psy/Downloads/carimage.zip
|
||||
# -rw-rw-r-- 1 psy psy 990M Nov 26 2020 /home/psy/Downloads/carimage.zip
|
||||
# root@psyt14s:/home/psy/zfs_autobackup# time sha1sum /home/psy/Downloads/carimage.zip
|
||||
# a682e1a36e16fe0d0c2f011104f4a99004f19105 /home/psy/Downloads/carimage.zip
|
||||
#
|
||||
# real 0m2.558s
|
||||
# user 0m2.105s
|
||||
# sys 0m0.448s
|
||||
# root@psyt14s:/home/psy/zfs_autobackup# time python3 -m zfs_autobackup.ZfsCheck
|
||||
#
|
||||
# real 0m1.459s
|
||||
# user 0m0.993s
|
||||
# sys 0m0.462s
|
||||
|
||||
# NOTE: surprisingly sha1 in via python3 is faster than the native sha1sum utility, even in the way we use below!
|
||||
def block_hash(fname, count=10000, bs=4006):
|
||||
"""yields sha1 hash per count blocks.
|
||||
yields(chunk_nr, hexdigest)
|
||||
|
||||
yields nothing for empty files.
|
||||
"""
|
||||
|
||||
with open(fname, "rb") as f:
|
||||
hash = hashlib.sha1()
|
||||
block_nr = 0
|
||||
chunk_nr = 0
|
||||
for block in iter(lambda: f.read(4096), b""):
|
||||
hash.update(block)
|
||||
block_nr = block_nr + 1
|
||||
if block_nr % count == 0:
|
||||
yield (chunk_nr, hash.hexdigest())
|
||||
chunk_nr = chunk_nr + 1
|
||||
hash = hashlib.sha1()
|
||||
|
||||
# yield last (incomplete) block
|
||||
if block_nr % count != 0:
|
||||
yield (chunk_nr, hash.hexdigest())
|
||||
Reference in New Issue
Block a user