replaced tar verification with much better find/md5sum.

This commit is contained in:
Edwin Eefting
2022-01-24 23:25:55 +01:00
parent 02dca218b8
commit fa3f44a045
4 changed files with 69 additions and 33 deletions

View File

@ -86,12 +86,12 @@ class TestZfsEncryption(unittest2.TestCase):
runchecked("rsync, local", "test test_target1 --verbose --exclude-received --fs-compare=rsync") runchecked("rsync, local", "test test_target1 --verbose --exclude-received --fs-compare=rsync")
runchecked("tar, remote source and remote target", runchecked("tar, remote source and remote target",
"test test_target1 --ssh-source=localhost --ssh-target=localhost --verbose --exclude-received --fs-compare=tar") "test test_target1 --ssh-source=localhost --ssh-target=localhost --verbose --exclude-received --fs-compare=find")
runchecked("tar, remote source", runchecked("tar, remote source",
"test test_target1 --ssh-source=localhost --verbose --exclude-received --fs-compare=tar") "test test_target1 --ssh-source=localhost --verbose --exclude-received --fs-compare=find")
runchecked("tar, remote target", runchecked("tar, remote target",
"test test_target1 --ssh-target=localhost --verbose --exclude-received --fs-compare=tar") "test test_target1 --ssh-target=localhost --verbose --exclude-received --fs-compare=find")
runchecked("tar, local", "test test_target1 --verbose --exclude-received --fs-compare=tar") runchecked("tar, local", "test test_target1 --verbose --exclude-received --fs-compare=find")
with self.subTest("no common snapshot"): with self.subTest("no common snapshot"):
#destroy common snapshot, now 3 should fail #destroy common snapshot, now 3 should fail

View File

@ -9,7 +9,7 @@ class ZfsAuto(object):
"""Common Base class, this class is always used subclassed. Look at ZfsAutobackup and ZfsAutoverify .""" """Common Base class, this class is always used subclassed. Look at ZfsAutobackup and ZfsAutoverify ."""
# also used by setup.py # also used by setup.py
VERSION = "3.2-alpha1" VERSION = "3.2-alpha2"
HEADER = "{} v{} - (c)2021 E.H.Eefting (edwin@datux.nl)".format(os.path.basename(sys.argv[0]), VERSION) HEADER = "{} v{} - (c)2021 E.H.Eefting (edwin@datux.nl)".format(os.path.basename(sys.argv[0]), VERSION)
def __init__(self, argv, print_arguments=True): def __init__(self, argv, print_arguments=True):

View File

@ -16,34 +16,60 @@ def tmp_name(suffix=""):
name=name+suffix name=name+suffix
return name return name
def hash_tree_tar(node, path): #NO!
"""calculate md5sum of a directory tree, using tar""" # def hash_tree_tar(node, path):
# """calculate md5sum of a directory tree, using tar"""
#
# node.debug("Hashing filesystem {} ".format(path))
#
# cmd=[ "tar", "-cf", "-", "-C", path, "--warning=none", ".",
# ExecuteNode.PIPE, "md5sum"]
#
#
# stdout = node.run(cmd)
#
# if node.readonly:
# hashed=None
# else:
# hashed = stdout[0].split(" ")[0]
#
# node.debug("Hash of {} filesytem is {}".format(path, hashed))
#
# return hashed
node.debug("Hashing filesystem {} ".format(path)) # try to be as unix compatible as possible, while still having decent performance
def compare_trees_find(source_node, source_path, target_node, target_path):
# find /tmp/zfstmp_pve1_1993135target/ -xdev -type f -print0 | xargs -0 md5sum | md5sum -c
cmd=[ "tar", "-cf", "-", "-C", path, ".", #verify tree has atleast one file
ExecuteNode.PIPE, "md5sum"]
stdout = node.run(cmd) stdout=source_node.run(["find", ".", "-type", "f",
ExecuteNode.PIPE, "head", "-n1",
], cwd=source_path)
if node.readonly: if not stdout:
hashed=None source_node.debug("No files, skipping check")
else: else:
hashed = stdout[0].split(" ")[0] pipe=source_node.run(["find", ".", "-type", "f", "-print0",
ExecuteNode.PIPE, "xargs", "-0", "md5sum"
], pipe=True, cwd=source_path)
stdout=target_node.run([ "md5sum", "-c", "--quiet"], inp=pipe, cwd=target_path, valid_exitcodes=[0,1])
node.debug("Hash of {} filesytem is {}".format(path, hashed)) if len(stdout):
for line in stdout:
target_node.error("md5sum: "+line)
return hashed raise(Exception("Some files have checksum errors"))
#NOTE: horrible idea, dont use
def compare_trees_tar(source_node, source_path, target_node, target_path): # def compare_trees_tar(source_node, source_path, target_node, target_path):
"""compare two trees using tar. compatible and simple""" # """compare two trees using tar. compatible and simple"""
#
source_hash= hash_tree_tar(source_node, source_path) # source_hash= hash_tree_tar(source_node, source_path)
target_hash= hash_tree_tar(target_node, target_path) # target_hash= hash_tree_tar(target_node, target_path)
#
if source_hash != target_hash: # if source_hash != target_hash:
raise Exception("md5hash difference: {} != {}".format(source_hash, target_hash)) # raise Exception("md5hash difference: {} != {}".format(source_hash, target_hash))
def compare_trees_rsync(source_node, source_path, target_node, target_path): def compare_trees_rsync(source_node, source_path, target_node, target_path):
@ -51,7 +77,7 @@ def compare_trees_rsync(source_node, source_path, target_node, target_path):
Advantage is that we can see which individual files differ. Advantage is that we can see which individual files differ.
But requires rsync and cant do remote to remote.""" But requires rsync and cant do remote to remote."""
cmd = ["rsync", "-rcn", "--info=COPY,DEL,MISC,NAME,SYMSAFE", "--msgs2stderr", "--delete" ] cmd = ["rsync", "-rcnq", "--info=COPY,DEL,MISC,NAME,SYMSAFE", "--msgs2stderr", "--delete" ]
#local #local
if source_node.ssh_to is None and target_node.ssh_to is None: if source_node.ssh_to is None and target_node.ssh_to is None:
@ -94,6 +120,8 @@ def verify_filesystem(source_snapshot, source_mnt, target_snapshot, target_mnt,
compare_trees_rsync(source_snapshot.zfs_node, source_mnt, target_snapshot.zfs_node, target_mnt) compare_trees_rsync(source_snapshot.zfs_node, source_mnt, target_snapshot.zfs_node, target_mnt)
elif method == 'tar': elif method == 'tar':
compare_trees_tar(source_snapshot.zfs_node, source_mnt, target_snapshot.zfs_node, target_mnt) compare_trees_tar(source_snapshot.zfs_node, source_mnt, target_snapshot.zfs_node, target_mnt)
elif method == 'find':
compare_trees_find(source_snapshot.zfs_node, source_mnt, target_snapshot.zfs_node, target_mnt)
else: else:
raise(Exception("program errror, unknown method")) raise(Exception("program errror, unknown method"))
@ -191,7 +219,7 @@ def activate_volume_snapshot(snapshot):
def deacitvate_volume_snapshot(snapshot): def deacitvate_volume_snapshot(snapshot):
clone_name=get_tmp_clone_name(snapshot) clone_name=get_tmp_clone_name(snapshot)
clone=snapshot.zfs_node.get_dataset(clone_name) clone=snapshot.zfs_node.get_dataset(clone_name)
clone.destroy() clone.destroy(deferred=True, verbose=False)
def verify_volume(source_dataset, source_snapshot, target_dataset, target_snapshot): def verify_volume(source_dataset, source_snapshot, target_dataset, target_snapshot):
"""compare the contents of two zfs volume snapshots""" """compare the contents of two zfs volume snapshots"""
@ -254,8 +282,8 @@ class ZfsAutoverify(ZfsAuto):
parser=super(ZfsAutoverify, self).get_parser() parser=super(ZfsAutoverify, self).get_parser()
group=parser.add_argument_group("Verify options") group=parser.add_argument_group("Verify options")
group.add_argument('--fs-compare', metavar='METHOD', default="tar", choices=["tar", "rsync"], group.add_argument('--fs-compare', metavar='METHOD', default="find", choices=["find", "rsync"],
help='Compare method to use for filesystems. (tar, rsync) Default: %(default)s ') help='Compare method to use for filesystems. (find, rsync) Default: %(default)s ')
return parser return parser

View File

@ -260,7 +260,7 @@ class ZfsDataset:
self.force_exists = True self.force_exists = True
def destroy(self, fail_exception=False): def destroy(self, fail_exception=False, deferred=False, verbose=True):
"""destroy the dataset. by default failures are not an exception, so we """destroy the dataset. by default failures are not an exception, so we
can continue making backups can continue making backups
@ -268,13 +268,20 @@ class ZfsDataset:
:type fail_exception: bool :type fail_exception: bool
""" """
self.verbose("Destroying") if verbose:
self.verbose("Destroying")
else:
self.debug("Destroying")
if self.is_snapshot: if self.is_snapshot:
self.release() self.release()
try: try:
self.zfs_node.run(["zfs", "destroy", self.name]) if deferred and self.is_snapshot:
self.zfs_node.run(["zfs", "destroy", "-d", self.name])
else:
self.zfs_node.run(["zfs", "destroy", self.name])
self.invalidate() self.invalidate()
self.force_exists = False self.force_exists = False
return True return True
@ -1113,9 +1120,10 @@ class ZfsDataset:
self.debug("Unmounting") self.debug("Unmounting")
cmd = [ cmd = [
"umount", self.name "umount", "-l", self.name
] ]
self.zfs_node.run(cmd=cmd, valid_exitcodes=[0]) self.zfs_node.run(cmd=cmd, valid_exitcodes=[0])
def clone(self, name): def clone(self, name):