wip
This commit is contained in:
		| @ -1,4 +1,6 @@ | ||||
| import hashlib | ||||
| import os | ||||
| from random import random | ||||
|  | ||||
|  | ||||
| class BlockHasher(): | ||||
| @ -12,14 +14,45 @@ class BlockHasher(): | ||||
|     Input and output generators are in the format ( chunk_nr, hexdigest ) | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, coverage=1): | ||||
|     def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, skip=0): | ||||
|         self.count = count | ||||
|         self.bs = bs | ||||
|         self.chunk_size=bs*count | ||||
|         self.hash_class = hash_class | ||||
|         self.coverage=1 | ||||
|  | ||||
|         self.stats_total=0 | ||||
|         self.stats_checked=0 | ||||
|         # self.coverage=coverage | ||||
|         self.skip=skip | ||||
|         self._skip_count=0 | ||||
|  | ||||
|         self.stats_total_bytes=0 | ||||
|  | ||||
|  | ||||
|     def _seek_next_chunk(self, fh, fsize): | ||||
|         """seek fh to next chunk and update skip counter. | ||||
|         returns chunk_nr | ||||
|         return false it should skip the rest of the file""" | ||||
|  | ||||
|         #ignore rempty files | ||||
|         if fsize==0: | ||||
|             return False | ||||
|  | ||||
|         # need to skip chunks? | ||||
|         if self._skip_count > 0: | ||||
|             chunks_left = ((fsize - fh.tell()) // self.chunk_size) + 1 | ||||
|             # not enough chunks left in this file? | ||||
|             if self._skip_count >= chunks_left: | ||||
|                 # skip rest of this file | ||||
|                 self._skip_count = self._skip_count - chunks_left | ||||
|                 return False | ||||
|             else: | ||||
|                 # seek to next chunk, reset skip count | ||||
|                 fh.seek(self.chunk_size * self._skip_count, os.SEEK_CUR) | ||||
|                 self._skip_count = self.skip | ||||
|                 return  fh.tell()//self.chunk_size | ||||
|         else: | ||||
|             # should read this chunk, reset skip count | ||||
|             self._skip_count = self.skip | ||||
|             return fh.tell() // self.chunk_size | ||||
|  | ||||
|     def generate(self, fname): | ||||
|         """Generates checksums | ||||
| @ -28,20 +61,34 @@ class BlockHasher(): | ||||
|  | ||||
|         yields nothing for empty files. | ||||
|         """ | ||||
|         with open(fname, "rb") as f: | ||||
|         with os.open(fname, os.O_RDONLY) as fh: | ||||
|             print (os.lseek(fh, 0, os.SEEK_END)) | ||||
|  | ||||
|  | ||||
|         with os.openopen(fname, "rb") as fh: | ||||
|  | ||||
|             # print(os.path.getsize(fname)) | ||||
|             print(os.lseek(fh, 0, os.SEEK_END)) | ||||
|  | ||||
|             fsize = fh.seek(0, os.SEEK_END) | ||||
|             fh.seek(0) | ||||
|  | ||||
|             while fh.tell()<fsize: | ||||
|  | ||||
|                 chunk_nr=self._seek_next_chunk(fh, fsize) | ||||
|                 if chunk_nr is False: | ||||
|                     return | ||||
|  | ||||
|                 #read chunk | ||||
|                 hash = self.hash_class() | ||||
|                 block_nr = 0 | ||||
|             chunk_nr = 0 | ||||
|             for block in iter(lambda: f.read(self.bs), b""): | ||||
|                 while block_nr != self.count: | ||||
|                     block=fh.read(self.bs) | ||||
|                     if block==b"": | ||||
|                         break | ||||
|                     hash.update(block) | ||||
|                     block_nr = block_nr + 1 | ||||
|                 if block_nr % self.count == 0: | ||||
|                     yield (chunk_nr, hash.hexdigest()) | ||||
|                     chunk_nr = chunk_nr + 1 | ||||
|                     hash = self.hash_class() | ||||
|  | ||||
|             # yield last (incomplete) block | ||||
|             if block_nr % self.count != 0: | ||||
|                 yield (chunk_nr, hash.hexdigest()) | ||||
|  | ||||
|     def  compare(self, fname, generator): | ||||
|  | ||||
| @ -20,10 +20,7 @@ class ZfsCheck(CliBase): | ||||
|  | ||||
|         self.node = ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output) | ||||
|  | ||||
|         if self.args.check is None: | ||||
|             self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size) | ||||
|         else: | ||||
|             self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, coverage=self.args.percentage) | ||||
|         self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, skip=self.args.skip) | ||||
|  | ||||
|     def get_parser(self): | ||||
|  | ||||
| @ -37,13 +34,13 @@ class ZfsCheck(CliBase): | ||||
|         group.add_argument('--block-size', metavar="BYTES", default=4096, help="Read block-size, default %(default)s", | ||||
|                            type=int) | ||||
|         group.add_argument('--count', metavar="COUNT", default=int((100 * (1024 ** 2)) / 4096), | ||||
|                            help="Hash chunks of COUNT blocks. Default %(default)s . (Chunk size is BYTES * COUNT) ", type=int)  # 100MiB | ||||
|                            help="Hash chunks of COUNT blocks. Default %(default)s . (CHUNK size is BYTES * COUNT) ", type=int)  # 100MiB | ||||
|  | ||||
|         group.add_argument('--check', '-c', metavar="FILE", default=None, const=True, nargs='?', | ||||
|                            help="Read hashes from STDIN (or FILE) and compare them") | ||||
|  | ||||
|         group.add_argument('--percentage', '-p', metavar="NUMBER", default=100, type=float, | ||||
|                            help="Generate/compare only this percentage of hashes. Default %(default)s") | ||||
|         group.add_argument('--skip', '-s', metavar="NUMBER", default=0, type=float, | ||||
|                            help="Skip this number of chunks after every hash. %(default)s") | ||||
|  | ||||
|         return parser | ||||
|  | ||||
| @ -61,10 +58,9 @@ class ZfsCheck(CliBase): | ||||
|         self.verbose("Block size           : {} bytes".format(args.block_size)) | ||||
|         self.verbose("Block count          : {}".format(args.count)) | ||||
|         self.verbose("Effective chunk size : {} bytes".format(args.count*args.block_size)) | ||||
|         self.verbose("Percentage to check  : {} %".format(args.percentage)) | ||||
|         self.verbose("Skip chunk count     : {} (checks {:.2f}% of data)".format(args.skip, 100/(1+args.skip))) | ||||
|         self.verbose("") | ||||
|  | ||||
|         args.percentage=args.percentage/100 | ||||
|  | ||||
|         return args | ||||
|  | ||||
| @ -216,28 +212,30 @@ class ZfsCheck(CliBase): | ||||
|  | ||||
|         last_progress_time = time.time() | ||||
|         progress_checked = 0 | ||||
|         progress_total = 0 | ||||
|         progress_skipped = 0 | ||||
|  | ||||
|         line=input_fh.readline() | ||||
|         skip=0 | ||||
|         while line: | ||||
|             i=line.rstrip().split("\t") | ||||
|             #ignores lines without tabs | ||||
|             if (len(i)>1): | ||||
|  | ||||
|                 if self.args.percentage==1 or self.args.percentage>random(): | ||||
|                 if skip==0: | ||||
|                     progress_checked=progress_checked+1 | ||||
|                     yield i | ||||
|  | ||||
|                 progress_total=progress_total+1 | ||||
|                     skip=self.args.skip | ||||
|                 else: | ||||
|                     skip=skip-1 | ||||
|                     progress_skipped=progress_skipped+1 | ||||
|  | ||||
|                 if self.args.progress and time.time() - last_progress_time > 1: | ||||
|                     last_progress_time = time.time() | ||||
|                     self.progress("Checked {}/{} hashes. ({:.2f}% coverage)".format(progress_checked, progress_total, (float(progress_checked)/progress_total)*100)) | ||||
|                     self.progress("Checked {} hashes (skipped {})".format(progress_checked, progress_skipped)) | ||||
|  | ||||
|             line=input_fh.readline() | ||||
|  | ||||
|         self.verbose("Checked {}/{} hashes. ({:.2f}% coverage)".format(progress_checked, progress_total, ( | ||||
|                     float(progress_checked) / progress_total) * 100)) | ||||
|         self.verbose("Checked {} hashes (skipped {})".format(progress_checked, progress_skipped)) | ||||
|  | ||||
|     def run(self): | ||||
|  | ||||
|  | ||||
| @ -0,0 +1,70 @@ | ||||
| import os.path | ||||
| import os | ||||
| import time | ||||
| from random import random | ||||
|  | ||||
| with open('test.py', 'rb') as fh: | ||||
|  | ||||
|     # fsize = fh.seek(10000, os.SEEK_END) | ||||
|     # print(fsize) | ||||
|  | ||||
|     start=time.time() | ||||
|     for i in range(0,1000000): | ||||
|         # fh.seek(0, 0) | ||||
|         fsize=fh.seek(0, os.SEEK_END) | ||||
|         # fsize=fh.tell() | ||||
|         # os.path.getsize('test.py') | ||||
|     print(time.time()-start) | ||||
|  | ||||
|  | ||||
|     print(fh.tell()) | ||||
|  | ||||
| sys.exit(0) | ||||
|  | ||||
|  | ||||
|  | ||||
| checked=1 | ||||
| skipped=1 | ||||
| coverage=0.1 | ||||
|  | ||||
| max_skip=0 | ||||
|  | ||||
|  | ||||
| skipinarow=0 | ||||
| while True: | ||||
|     total=checked+skipped | ||||
|  | ||||
|     skip=coverage<random() | ||||
|     if skip: | ||||
|         skipped = skipped + 1 | ||||
|         print("S {:.2f}%".format(checked * 100 / total)) | ||||
|  | ||||
|         skipinarow = skipinarow+1 | ||||
|         if skipinarow>max_skip: | ||||
|             max_skip=skipinarow | ||||
|     else: | ||||
|         skipinarow=0 | ||||
|         checked=checked+1 | ||||
|         print("C {:.2f}%".format(checked * 100 / total)) | ||||
|  | ||||
|     print(max_skip) | ||||
|  | ||||
| skip=0 | ||||
| while True: | ||||
|  | ||||
|     total=checked+skipped | ||||
|     if skip>0: | ||||
|         skip=skip-1 | ||||
|         skipped = skipped + 1 | ||||
|         print("S {:.2f}%".format(checked * 100 / total)) | ||||
|     else: | ||||
|         checked=checked+1 | ||||
|         print("C {:.2f}%".format(checked * 100 / total)) | ||||
|  | ||||
|         #calc new skip | ||||
|         skip=skip+((1/coverage)-1)*(random()*2) | ||||
|         # print(skip) | ||||
|         if skip> max_skip: | ||||
|             max_skip=skip | ||||
|  | ||||
|     print(max_skip) | ||||
|  | ||||
		Reference in New Issue
	
	Block a user