zfs check initial version (wip)
This commit is contained in:
		| @ -1,11 +1,9 @@ | ||||
| import hashlib | ||||
|  | ||||
| from .util import block_hash | ||||
| from .ZfsNode import ZfsNode | ||||
| from .util import * | ||||
| from .CliBase import CliBase | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| class ZfsCheck(CliBase): | ||||
|  | ||||
|     def __init__(self, argv, print_arguments=True): | ||||
| @ -13,15 +11,78 @@ class ZfsCheck(CliBase): | ||||
|         # NOTE: common options and parameters are in ZfsAuto | ||||
|         super(ZfsCheck, self).__init__(argv, print_arguments) | ||||
|  | ||||
|         self.node=ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output) | ||||
|  | ||||
|  | ||||
|     def get_parser(self): | ||||
|  | ||||
|         parser=super(ZfsCheck, self).get_parser() | ||||
|  | ||||
|         #positional arguments | ||||
|         parser.add_argument('snapshot', metavar='SNAPSHOT', default=None, nargs='?', | ||||
|                             help='Snapshot to checksum') | ||||
|  | ||||
|  | ||||
|         group=parser.add_argument_group('Hasher options') | ||||
|  | ||||
|         group.add_argument('--block-size', metavar="BYTES", default=4096, help="Read block-size, default %(default)s", type=int) | ||||
|         group.add_argument('--count', metavar="COUNT", default=int((100*(1024**2))/4096), help="Generate a hash for every COUNT blocks. default %(default)s", type=int) #100MiB | ||||
|  | ||||
|         return parser | ||||
|  | ||||
|     def parse_args(self, argv): | ||||
|         args=super(ZfsCheck, self).parse_args(argv) | ||||
|  | ||||
|         if args.test: | ||||
|             self.warning("TEST MODE - NOT DOING ANYTHING USEFULL") | ||||
|             self.log.show_debug=True #show at least what we would do | ||||
|  | ||||
|         return args | ||||
|  | ||||
|     def hash_filesystem(self, snapshot): | ||||
|         """ | ||||
|  | ||||
|         :type snapshot: ZfsDataset.ZfsDataset | ||||
|         """ | ||||
|         mnt="/tmp/"+tmp_name() | ||||
|  | ||||
|         try: | ||||
|             self.debug("Create temporary mount point {}".format(mnt)) | ||||
|             self.node.run(["mkdir", mnt]) | ||||
|  | ||||
|             snapshot.mount(mnt) | ||||
|  | ||||
|             self.debug("Hashing tree: {}".format(mnt)) | ||||
|             if not self.args.test: | ||||
|                 for (file, block, hash) in block_hash_tree(mnt): | ||||
|                     print("{}\t{}\t{}".format(file, block, hash)) | ||||
|  | ||||
|         finally: | ||||
|             self.debug("Cleaning up temporary mount point") | ||||
|             snapshot.unmount() | ||||
|             self.node.run(["rmdir", mnt], hide_errors=True, valid_exitcodes=[]) | ||||
|  | ||||
|  | ||||
|     def run(self): | ||||
|  | ||||
|         snapshot=self.node.get_dataset(self.args.snapshot) | ||||
|  | ||||
|         if not snapshot.exists: | ||||
|             snapshot.error("Dataset not found") | ||||
|             sys.exit(1) | ||||
|  | ||||
|         # print(sha1sum("/home/psy/Downloads/carimage.zip")) | ||||
|         for (block, h ) in block_hash("/home/psy/Downloads/carimage.zip" , count=10000): | ||||
|             print(block) | ||||
|             print (h) | ||||
|         if not snapshot.is_snapshot: | ||||
|             snapshot.error("Dataset should be a snapshot") | ||||
|             sys.exit(1) | ||||
|  | ||||
|         dataset_type=snapshot.parent.properties['type'] | ||||
|  | ||||
|         if dataset_type=='volume': | ||||
|             self.checksum_volume(snapshot) | ||||
|         elif dataset_type=='filesystem': | ||||
|             self.hash_filesystem(snapshot) | ||||
|         else: | ||||
|             raise Exception("huh?") | ||||
|  | ||||
|         pass | ||||
|  | ||||
|  | ||||
| @ -17,7 +17,7 @@ from .ExecuteNode import ExecuteError | ||||
| class ZfsNode(ExecuteNode): | ||||
|     """a node that contains zfs datasets. implements global (systemwide/pool wide) zfs commands""" | ||||
|  | ||||
|     def __init__(self, snapshot_time_format, hold_name, logger, ssh_config=None, ssh_to=None, readonly=False, | ||||
|     def __init__(self, logger, snapshot_time_format="", hold_name="", ssh_config=None, ssh_to=None, readonly=False, | ||||
|                  description="", | ||||
|                  debug_output=False, thinner=None): | ||||
|  | ||||
| @ -32,9 +32,9 @@ class ZfsNode(ExecuteNode): | ||||
|             self.verbose("Using custom SSH config: {}".format(ssh_config)) | ||||
|  | ||||
|         if ssh_to: | ||||
|             self.verbose("Datasets on: {}".format(ssh_to)) | ||||
|         else: | ||||
|             self.verbose("Datasets are local") | ||||
|             self.verbose("SSH to: {}".format(ssh_to)) | ||||
|         # else: | ||||
|         #     self.verbose("Datasets are local") | ||||
|  | ||||
|         if thinner is not None: | ||||
|             rules = thinner.human_rules() | ||||
|  | ||||
| @ -15,11 +15,21 @@ import hashlib | ||||
| # sys	0m0.462s | ||||
|  | ||||
| # NOTE: surprisingly sha1 in via python3 is faster than the native sha1sum utility, even in the way we use below! | ||||
| import os | ||||
| import platform | ||||
| import sys | ||||
| import time | ||||
|  | ||||
| import pathlib as pathlib | ||||
|  | ||||
|  | ||||
| def block_hash(fname, count=10000, bs=4006): | ||||
|     """yields sha1 hash per count blocks. | ||||
|     yields(chunk_nr, hexdigest) | ||||
|  | ||||
|     yields nothing for empty files. | ||||
|  | ||||
|     This function was created to checksum huge files and blockdevices (TB's) | ||||
|     """ | ||||
|  | ||||
|     with open(fname, "rb") as f: | ||||
| @ -37,3 +47,65 @@ def block_hash(fname, count=10000, bs=4006): | ||||
|         # yield last (incomplete) block | ||||
|         if block_nr % count != 0: | ||||
|             yield (chunk_nr, hash.hexdigest()) | ||||
|  | ||||
| def block_hash_tree(start_path, count=10000, bs=4096): | ||||
|     """block_hash every file in a tree, yielding results""" | ||||
|  | ||||
|     os.chdir(start_path) | ||||
|  | ||||
|     for f in pathlib.Path('.').glob('**/*'): | ||||
|         if f.is_file() and not f.is_symlink(): | ||||
|             for (chunk_nr, hash) in block_hash(f, count, bs): | ||||
|  | ||||
|                 yield ( f, chunk_nr, hash) | ||||
|  | ||||
|  | ||||
| def tmp_name(suffix=""): | ||||
|     """create temporary name unique to this process and node""" | ||||
|  | ||||
|     #we could use uuids but those are ugly and confusing | ||||
|     name="{}_{}_{}".format( | ||||
|         os.path.basename(sys.argv[0]), | ||||
|         platform.node(), | ||||
|         os.getpid()) | ||||
|     name=name+suffix | ||||
|     return name | ||||
|  | ||||
|  | ||||
| def get_tmp_clone_name(snapshot): | ||||
|     pool=snapshot.zfs_node.get_pool(snapshot) | ||||
|     return pool.name+"/"+tmp_name() | ||||
|  | ||||
|  | ||||
| #NOTE: https://www.google.com/search?q=Mount+Path+Limit+freebsd | ||||
| #Freebsd has limitations regarding path length, so we have to clone it so the part stays sort | ||||
| def activate_volume_snapshot(snapshot): | ||||
|     """clone volume, waits and tries to findout /dev path to the volume, in a compatible way. (linux/freebsd/smartos)""" | ||||
|  | ||||
|     clone_name= get_tmp_clone_name(snapshot) | ||||
|     clone=snapshot.clone(clone_name) | ||||
|  | ||||
|     #NOTE: add smartos location to this list as well | ||||
|     locations=[ | ||||
|         "/dev/zvol/" + clone_name | ||||
|     ] | ||||
|  | ||||
|     clone.debug("Waiting for /dev entry to appear...") | ||||
|     time.sleep(0.1) | ||||
|  | ||||
|     start_time=time.time() | ||||
|     while time.time()-start_time<10: | ||||
|         for location in locations: | ||||
|             stdout, stderr, exit_code=clone.zfs_node.run(["test", "-e", location], return_all=True, valid_exitcodes=[0,1]) | ||||
|  | ||||
|             #fake it in testmode | ||||
|             if clone.zfs_node.readonly: | ||||
|                 return location | ||||
|  | ||||
|             if exit_code==0: | ||||
|                 return location | ||||
|         time.sleep(1) | ||||
|  | ||||
|     raise(Exception("Timeout while waiting for {} entry to appear.".format(locations))) | ||||
|  | ||||
|  | ||||
|  | ||||
		Reference in New Issue
	
	Block a user