Files
zfs_autobackup/zfs_autobackup/util.py
Edwin Eefting f397e7be59 python2 compat
2022-02-21 11:01:07 +01:00

103 lines
3.0 KiB
Python

import hashlib
# root@psyt14s:/home/psy/zfs_autobackup# ls -lh /home/psy/Downloads/carimage.zip
# -rw-rw-r-- 1 psy psy 990M Nov 26 2020 /home/psy/Downloads/carimage.zip
# root@psyt14s:/home/psy/zfs_autobackup# time sha1sum /home/psy/Downloads/carimage.zip
# a682e1a36e16fe0d0c2f011104f4a99004f19105 /home/psy/Downloads/carimage.zip
#
# real 0m2.558s
# user 0m2.105s
# sys 0m0.448s
# root@psyt14s:/home/psy/zfs_autobackup# time python3 -m zfs_autobackup.ZfsCheck
#
# real 0m1.459s
# user 0m0.993s
# sys 0m0.462s
# NOTE: surprisingly sha1 in via python3 is faster than the native sha1sum utility, even in the way we use below!
import os
import platform
import sys
import time
def block_hash(fname, count=10000, bs=4096):
"""This function was created to checksum huge files and blockdevices (TB's)
Instead of one sha1sum of the whole file, it generates sha1susms of chunks of the file.
yields sha1 hash of fname, per count blocks.
yields(chunk_nr, hexdigest)
yields nothing for empty files.
"""
with open(fname, "rb") as f:
hash = hashlib.sha1()
block_nr = 0
chunk_nr = 0
for block in iter(lambda: f.read(bs), b""):
hash.update(block)
block_nr = block_nr + 1
if block_nr % count == 0:
yield (chunk_nr, hash.hexdigest())
chunk_nr = chunk_nr + 1
hash = hashlib.sha1()
# yield last (incomplete) block
if block_nr % count != 0:
yield (chunk_nr, hash.hexdigest())
def block_hash_tree(start_path, count=10000, bs=4096):
"""block_hash every file in a tree, yielding the results
note that it only checks the contents of actual files. It ignores metadata like permissions and mtimes.
It also ignores empty directories, symlinks and special files.
"""
cwd=os.getcwd()
os.chdir(start_path)
def walkerror(e):
raise e
try:
for (dirpath, dirnames, filenames) in os.walk(".", onerror=walkerror):
for f in filenames:
file_path=os.path.join(dirpath, f)
if (not os.path.islink(file_path)) and os.path.isfile(file_path):
for (chunk_nr, hash) in block_hash(file_path, count, bs):
yield ( file_path, chunk_nr, hash )
finally:
os.chdir(cwd)
def tmp_name(suffix=""):
"""create temporary name unique to this process and node"""
#we could use uuids but those are ugly and confusing
name="{}-{}-{}".format(
os.path.basename(sys.argv[0]).replace(" ","_"),
platform.node(),
os.getpid())
name=name+suffix
return name
def get_tmp_clone_name(snapshot):
pool=snapshot.zfs_node.get_pool(snapshot)
return pool.name+"/"+tmp_name()
def output_redir():
"""use this after a BrokenPipeError to prevent further exceptions.
Redirects stdout/err to /dev/null
"""
devnull = os.open(os.devnull, os.O_WRONLY)
os.dup2(devnull, sys.stdout.fileno())
os.dup2(devnull, sys.stderr.fileno())