comparing input now functions
This commit is contained in:
@ -8,6 +8,8 @@ class BlockHasher():
|
|||||||
The chunksize is count*bs (bs is the read blocksize from disk)
|
The chunksize is count*bs (bs is the read blocksize from disk)
|
||||||
|
|
||||||
Its also possible to only read a certain percentage of blocks to just check a sample.
|
Its also possible to only read a certain percentage of blocks to just check a sample.
|
||||||
|
|
||||||
|
Input and output generators are in the format ( chunk_nr, hexdigest )
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1):
|
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1):
|
||||||
@ -39,7 +41,11 @@ class BlockHasher():
|
|||||||
yield (chunk_nr, hash.hexdigest())
|
yield (chunk_nr, hash.hexdigest())
|
||||||
|
|
||||||
def compare(self, fname, generator):
|
def compare(self, fname, generator):
|
||||||
"""reads from generator and compares blocks, yields mismatches"""
|
"""reads from generator and compares blocks
|
||||||
|
Yields mismatches in the form: ( chunk_nr, hexdigest, actual_hexdigest)
|
||||||
|
Yields errors in the form: ( chunk_nr, hexdigest, "message" )
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
checked = 0
|
checked = 0
|
||||||
@ -49,7 +55,7 @@ class BlockHasher():
|
|||||||
|
|
||||||
checked = checked + 1
|
checked = checked + 1
|
||||||
hash = self.hash_class()
|
hash = self.hash_class()
|
||||||
f.seek(chunk_nr * self.bs * self.count)
|
f.seek(int(chunk_nr) * self.bs * self.count)
|
||||||
block_nr = 0
|
block_nr = 0
|
||||||
for block in iter(lambda: f.read(self.bs), b""):
|
for block in iter(lambda: f.read(self.bs), b""):
|
||||||
hash.update(block)
|
hash.update(block)
|
||||||
|
|||||||
@ -3,7 +3,11 @@ import os
|
|||||||
|
|
||||||
|
|
||||||
class TreeHasher():
|
class TreeHasher():
|
||||||
"""uses BlockHasher recursively on a directory tree"""
|
"""uses BlockHasher recursively on a directory tree
|
||||||
|
|
||||||
|
Input and output generators are in the format: ( relative-filepath, chunk_nr, hexdigest)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, block_hasher):
|
def __init__(self, block_hasher):
|
||||||
"""
|
"""
|
||||||
@ -19,44 +23,37 @@ class TreeHasher():
|
|||||||
It also ignores empty directories, symlinks and special files.
|
It also ignores empty directories, symlinks and special files.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
cwd=os.getcwd()
|
|
||||||
os.chdir(start_path)
|
|
||||||
|
|
||||||
def walkerror(e):
|
def walkerror(e):
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
try:
|
for (dirpath, dirnames, filenames) in os.walk(start_path, onerror=walkerror):
|
||||||
for (dirpath, dirnames, filenames) in os.walk(".", onerror=walkerror):
|
for f in filenames:
|
||||||
for f in filenames:
|
file_path=os.path.join(dirpath, f)
|
||||||
file_path=os.path.join(dirpath, f)[2:]
|
|
||||||
|
|
||||||
if (not os.path.islink(file_path)) and os.path.isfile(file_path):
|
if (not os.path.islink(file_path)) and os.path.isfile(file_path):
|
||||||
for (chunk_nr, hash) in self.block_hasher.generate(file_path):
|
for (chunk_nr, hash) in self.block_hasher.generate(file_path):
|
||||||
yield ( file_path, chunk_nr, hash )
|
yield ( os.path.relpath(file_path,start_path), chunk_nr, hash )
|
||||||
finally:
|
|
||||||
os.chdir(cwd)
|
|
||||||
|
|
||||||
|
|
||||||
def compare(self, start_path, generator):
|
def compare(self, start_path, generator):
|
||||||
"""reads from generator and compares blocks, raises exception on error
|
"""reads from generator and compares blocks
|
||||||
|
|
||||||
|
yields mismatches in the form: ( relative_filename, chunk_nr, compare_hexdigest, actual_hexdigest )
|
||||||
|
yields errors in the form: ( relative_filename, chunk_nr, compare_hexdigest, "message" )
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
cwd=os.getcwd()
|
|
||||||
os.chdir(start_path)
|
|
||||||
count=0
|
count=0
|
||||||
try:
|
|
||||||
|
|
||||||
def filter_file_name( file_name, chunk_nr, hexdigest):
|
def filter_file_name( file_name, chunk_nr, hexdigest):
|
||||||
return ( chunk_nr, hexdigest )
|
return ( chunk_nr, hexdigest )
|
||||||
|
|
||||||
|
|
||||||
for file_name, group_generator in itertools.groupby(generator, lambda x: x[0]):
|
for file_name, group_generator in itertools.groupby(generator, lambda x: x[0]):
|
||||||
count=count+1
|
count=count+1
|
||||||
block_generator=itertools.starmap(filter_file_name, group_generator)
|
block_generator=itertools.starmap(filter_file_name, group_generator)
|
||||||
for ( chunk_nr, compare_hexdigest, actual_hexdigest) in self.block_hasher.compare(file_name, block_generator):
|
for ( chunk_nr, compare_hexdigest, actual_hexdigest) in self.block_hasher.compare(os.path.join(start_path,file_name), block_generator):
|
||||||
yield ( file_name, chunk_nr, compare_hexdigest, actual_hexdigest )
|
yield ( file_name, chunk_nr, compare_hexdigest, actual_hexdigest )
|
||||||
finally:
|
|
||||||
os.chdir(cwd)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -14,17 +14,18 @@ class ZfsCheck(CliBase):
|
|||||||
|
|
||||||
def __init__(self, argv, print_arguments=True):
|
def __init__(self, argv, print_arguments=True):
|
||||||
|
|
||||||
# NOTE: common options and parameters are in ZfsAuto
|
# NOTE: common options argument parsing are in CliBase
|
||||||
super(ZfsCheck, self).__init__(argv, print_arguments)
|
super(ZfsCheck, self).__init__(argv, print_arguments)
|
||||||
|
|
||||||
self.node = ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output)
|
self.node = ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output)
|
||||||
|
self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size)
|
||||||
|
|
||||||
def get_parser(self):
|
def get_parser(self):
|
||||||
|
|
||||||
parser = super(ZfsCheck, self).get_parser()
|
parser = super(ZfsCheck, self).get_parser()
|
||||||
|
|
||||||
# positional arguments
|
# positional arguments
|
||||||
parser.add_argument('snapshot', metavar='SNAPSHOT', default=None, nargs='?', help='Snapshot to checksum')
|
parser.add_argument('target', metavar='TARGET', default=None, nargs='?', help='Target to checksum. (can be blockdevice, directory or ZFS snapshot)')
|
||||||
|
|
||||||
group = parser.add_argument_group('Hasher options')
|
group = parser.add_argument_group('Hasher options')
|
||||||
|
|
||||||
@ -45,13 +46,13 @@ class ZfsCheck(CliBase):
|
|||||||
self.warning("TEST MODE - NOT DOING ANYTHING USEFULL")
|
self.warning("TEST MODE - NOT DOING ANYTHING USEFULL")
|
||||||
self.log.show_debug = True # show at least what we would do
|
self.log.show_debug = True # show at least what we would do
|
||||||
|
|
||||||
if args.snapshot is None:
|
if args.target is None:
|
||||||
self.error("Please specify SNAPSHOT")
|
self.error("Please specify TARGET")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
def hash_filesystem(self, snapshot, count, bs):
|
def generate_zfs_filesystem(self, snapshot, input_generator):
|
||||||
""" recursively hash all files in this snapshot, using block_hash_tree()
|
""" recursively hash all files in this snapshot, using block_hash_tree()
|
||||||
|
|
||||||
:type snapshot: ZfsDataset.ZfsDataset
|
:type snapshot: ZfsDataset.ZfsDataset
|
||||||
@ -64,19 +65,16 @@ class ZfsCheck(CliBase):
|
|||||||
|
|
||||||
snapshot.mount(mnt)
|
snapshot.mount(mnt)
|
||||||
|
|
||||||
tree_hasher=TreeHasher(BlockHasher(count=count, bs=bs))
|
tree_hasher=TreeHasher(self.block_hasher)
|
||||||
|
|
||||||
self.debug("Hashing tree: {}".format(mnt))
|
self.debug("Hashing tree: {}".format(mnt))
|
||||||
if not self.args.test:
|
if not self.args.test:
|
||||||
|
if input_generator:
|
||||||
# generator=tree_hasher.generate(mnt)
|
for i in tree_hasher.compare(mnt, input_generator):
|
||||||
# tree_hasher.compare(mnt, generator)
|
yield i
|
||||||
|
else:
|
||||||
|
for i in tree_hasher.generate(mnt):
|
||||||
for (file, block, hash) in tree_hasher.generate(mnt):
|
yield i
|
||||||
print("{}\t{}\t{}".format(file, block, hash))
|
|
||||||
sys.stdout.flush() #important, to generate SIGPIPES on ssh disconnect
|
|
||||||
|
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
snapshot.unmount()
|
snapshot.unmount()
|
||||||
@ -119,24 +117,26 @@ class ZfsCheck(CliBase):
|
|||||||
clone = snapshot.zfs_node.get_dataset(clone_name)
|
clone = snapshot.zfs_node.get_dataset(clone_name)
|
||||||
clone.destroy(deferred=True, verbose=False)
|
clone.destroy(deferred=True, verbose=False)
|
||||||
|
|
||||||
def hash_volume(self, snapshot, count, bs):
|
def generate_zfs_volume(self, snapshot, input_generator):
|
||||||
try:
|
try:
|
||||||
dev=self.activate_volume_snapshot(snapshot)
|
dev=self.activate_volume_snapshot(snapshot)
|
||||||
block_hasher=BlockHasher(count=count, bs=bs)
|
|
||||||
|
|
||||||
self.debug("Hashing dev: {}".format(dev))
|
self.debug("Hashing dev: {}".format(dev))
|
||||||
if not self.args.test:
|
if not self.args.test:
|
||||||
for (block, hash) in block_hasher.generate(dev):
|
if input_generator:
|
||||||
print("{}\t{}".format(block, hash))
|
for i in self.block_hasher.compare(dev, input_generator):
|
||||||
sys.stdout.flush() #important, to generate SIGPIPES on ssh disconnect
|
yield i
|
||||||
|
else:
|
||||||
|
for i in self.block_hasher.generate(dev):
|
||||||
|
yield i
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
self.deacitvate_volume_snapshot(snapshot)
|
self.deacitvate_volume_snapshot(snapshot)
|
||||||
|
|
||||||
def run(self):
|
def generate_zfs_target(self, input_generator):
|
||||||
|
"""specified arget is a ZFS snapshot"""
|
||||||
snapshot = self.node.get_dataset(self.args.snapshot)
|
|
||||||
|
|
||||||
|
snapshot = self.node.get_dataset(self.args.target)
|
||||||
if not snapshot.exists:
|
if not snapshot.exists:
|
||||||
snapshot.error("Snapshot not found")
|
snapshot.error("Snapshot not found")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@ -147,15 +147,64 @@ class ZfsCheck(CliBase):
|
|||||||
|
|
||||||
dataset_type = snapshot.parent.properties['type']
|
dataset_type = snapshot.parent.properties['type']
|
||||||
|
|
||||||
snapshot.verbose("Generating checksums...")
|
|
||||||
|
|
||||||
if dataset_type == 'volume':
|
if dataset_type == 'volume':
|
||||||
self.hash_volume(snapshot, self.args.count, self.args.block_size)
|
return self.generate_zfs_volume(snapshot, input_generator)
|
||||||
elif dataset_type == 'filesystem':
|
elif dataset_type == 'filesystem':
|
||||||
self.hash_filesystem(snapshot, self.args.count, self.args.block_size)
|
return self.generate_zfs_filesystem(snapshot, input_generator)
|
||||||
else:
|
else:
|
||||||
raise Exception("huh?")
|
raise Exception("huh?")
|
||||||
|
|
||||||
|
def generate(self, input_generator=None):
|
||||||
|
"""generate checksums or compare (and generate error messages)"""
|
||||||
|
|
||||||
|
if '@' in self.args.target:
|
||||||
|
self.verbose("Assuming target {} is ZFS snapshot.".format(self.args.target))
|
||||||
|
return self.generate_zfs_target(input_generator)
|
||||||
|
elif os.path.isdir(self.args.target):
|
||||||
|
self.verbose("Target {} is directory, checking recursively.".format(self.args.target))
|
||||||
|
return self.check_path(input_generator)
|
||||||
|
elif os.path.isfile(self.args.target):
|
||||||
|
self.verbose("Target {} is single file or blockdevice.".format(self.args.target))
|
||||||
|
|
||||||
|
def input_parser(self, file_name):
|
||||||
|
"""parse input lines and generate items to use in compare functions"""
|
||||||
|
with open(file_name, 'r') as input_fh:
|
||||||
|
for line in input_fh:
|
||||||
|
i=line.rstrip().split("\t")
|
||||||
|
#ignores lines without tabs
|
||||||
|
if (len(i)>1):
|
||||||
|
yield i
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
|
||||||
|
try:
|
||||||
|
#run as generator
|
||||||
|
if self.args.check==None:
|
||||||
|
for i in self.generate(input_generator=None):
|
||||||
|
if len(i)==3:
|
||||||
|
print("{}\t{}\t{}".format(*i))
|
||||||
|
else:
|
||||||
|
print("{}\t{}".format(*i))
|
||||||
|
sys.stdout.flush()
|
||||||
|
#run as compare
|
||||||
|
else:
|
||||||
|
input_generator=self.input_parser(self.args.check)
|
||||||
|
for i in self.generate(input_generator):
|
||||||
|
if len(i)==4:
|
||||||
|
(file_name, chunk_nr, compare_hexdigest, actual_hexdigest)=i
|
||||||
|
self.log.error("{}\t{}\t{}\t{}".format(file_name, chunk_nr, compare_hexdigest, actual_hexdigest))
|
||||||
|
else:
|
||||||
|
(chunk_nr, compare_hexdigest, actual_hexdigest) = i
|
||||||
|
self.log.error("{}\t{}\t{}".format(chunk_nr, compare_hexdigest, actual_hexdigest))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.error("Exception: " + str(e))
|
||||||
|
if self.args.debug:
|
||||||
|
raise
|
||||||
|
return 255
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
self.error("Aborted")
|
||||||
|
return 255
|
||||||
|
|
||||||
def cli():
|
def cli():
|
||||||
import sys
|
import sys
|
||||||
@ -163,22 +212,6 @@ def cli():
|
|||||||
|
|
||||||
sys.exit(ZfsCheck(sys.argv[1:], False).run())
|
sys.exit(ZfsCheck(sys.argv[1:], False).run())
|
||||||
|
|
||||||
# block_hasher=BlockHasher()
|
|
||||||
|
|
||||||
# if sys.argv[1]=="s":
|
|
||||||
# for ( fname, nr, hash ) in TreeHasher(block_hasher).generate("/usr/src/linux-headers-5.14.14-051414"):
|
|
||||||
# print("{}\t{}\t{}".format(fname, nr, hash))
|
|
||||||
#
|
|
||||||
# if sys.argv[1]=="r":
|
|
||||||
#
|
|
||||||
# def gen():
|
|
||||||
# for line in sys.stdin:
|
|
||||||
# ( fname, nr, hash)=line.rstrip().split('\t')
|
|
||||||
# yield (fname, int(nr), hash)
|
|
||||||
#
|
|
||||||
# TreeHasher(block_hasher).compare("/usr/src/linux-headers-5.14.14-051414", gen())
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
cli()
|
cli()
|
||||||
|
|||||||
Reference in New Issue
Block a user