zfs_autobackup 2.4: try to continue on non-fatal errors
This commit is contained in:
@ -18,7 +18,7 @@ It has the following features:
|
||||
* Supports resuming of interrupted transfers. (via the zfs extensible_dataset feature)
|
||||
* Backups and snapshots can be named to prevent conflicts. (multiple backups from and to the same filesystems are no problem)
|
||||
* Always creates a new snapshot before starting.
|
||||
* Checks everything and aborts on errors.
|
||||
* Checks everything but tries continue on non-fatal errors when possible. (Reports error-count when done)
|
||||
* Ability to 'finish' aborted backups to see what goes wrong.
|
||||
* Easy to debug and has a test-mode. Actual unix commands are printed.
|
||||
* Keeps latest X snapshots remote and locally. (default 30, configurable)
|
||||
@ -42,7 +42,7 @@ usage: zfs_autobackup [-h] [--ssh-source SSH_SOURCE] [--ssh-target SSH_TARGET]
|
||||
[--debug]
|
||||
backup_name target_path
|
||||
|
||||
ZFS autobackup v2.3
|
||||
ZFS autobackup v2.4
|
||||
|
||||
positional arguments:
|
||||
backup_name Name of the backup (you should set the zfs property
|
||||
@ -108,6 +108,9 @@ optional arguments:
|
||||
(still does all read-only operations)
|
||||
--verbose verbose output
|
||||
--debug debug output (shows commands that are executed)
|
||||
|
||||
When a filesystem fails, zfs_backup will continue and report the number of
|
||||
failures at that end. Also the exit code will indicate the number of failures.
|
||||
```
|
||||
|
||||
Backup example
|
||||
|
||||
@ -13,18 +13,20 @@ import time
|
||||
def error(txt):
|
||||
print(txt, file=sys.stderr)
|
||||
|
||||
|
||||
|
||||
def verbose(txt):
|
||||
if args.verbose:
|
||||
print(txt)
|
||||
|
||||
|
||||
|
||||
def debug(txt):
|
||||
if args.debug:
|
||||
print(txt)
|
||||
|
||||
#fatal abort execution, exit code 255
|
||||
def abort(txt):
|
||||
error(txt)
|
||||
sys.exit(255)
|
||||
|
||||
|
||||
|
||||
"""run a command. specifiy ssh user@host to run remotely"""
|
||||
def run(cmd, input=None, ssh_to="local", tab_split=False, valid_exitcodes=[ 0 ], test=False):
|
||||
@ -473,6 +475,15 @@ def zfs_get_unchanged_filesystems(ssh_to, filesystems):
|
||||
|
||||
|
||||
|
||||
#fugly..
|
||||
failures=0
|
||||
#something failed, but we try to continue with the rest
|
||||
def failed(txt):
|
||||
global failures
|
||||
failures=failures+1
|
||||
error("FAILURE: "+txt+"\n")
|
||||
|
||||
|
||||
def zfs_autobackup():
|
||||
|
||||
############## data gathering section
|
||||
@ -490,8 +501,7 @@ def zfs_autobackup():
|
||||
|
||||
#nothing todo
|
||||
if not source_filesystems:
|
||||
error("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source))
|
||||
sys.exit(1)
|
||||
abort("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source))
|
||||
|
||||
if args.ignore_replicated:
|
||||
replicated_filesystems=zfs_get_unchanged_filesystems(args.ssh_source, source_filesystems)
|
||||
@ -540,7 +550,6 @@ def zfs_autobackup():
|
||||
|
||||
|
||||
### get eixsting source snapshots
|
||||
|
||||
verbose("Getting source snapshot-list from {0}".format(args.ssh_source))
|
||||
source_snapshots=zfs_get_snapshots(args.ssh_source, source_filesystems, args.backup_name)
|
||||
debug("Source snapshots:\n" + str(pprint.pformat(source_snapshots)))
|
||||
@ -588,6 +597,7 @@ def zfs_autobackup():
|
||||
|
||||
#determine which snapshots to send for each filesystem
|
||||
for source_filesystem in source_filesystems:
|
||||
try:
|
||||
target_filesystem=args.target_path + "/" + lstrip_path(source_filesystem, args.strip_path)
|
||||
|
||||
if source_filesystem not in source_snapshots:
|
||||
@ -604,7 +614,7 @@ def zfs_autobackup():
|
||||
|
||||
if latest_target_snapshot not in source_snapshots[source_filesystem]:
|
||||
#cant find latest target anymore. find first common snapshot and inform user
|
||||
error_msg="Cant find latest target snapshot on source, did you destroy/rename it?"
|
||||
error_msg="Cant find latest target snapshot on source for '{}', did you destroy/rename it?".format(source_filesystem)
|
||||
error_msg=error_msg+"\nLatest on target : "+target_filesystem+"@"+latest_target_snapshot
|
||||
error_msg=error_msg+"\nMissing on source: "+source_filesystem+"@"+latest_target_snapshot
|
||||
found=False
|
||||
@ -617,7 +627,7 @@ def zfs_autobackup():
|
||||
error_msg=error_msg+"\nAlso could not find an earlier common snapshot to rollback to."
|
||||
else:
|
||||
if args.ignore_new:
|
||||
verbose("* Skipping source filesystem {0}, target already has newer snapshots.".format(source_filesystem))
|
||||
verbose("* Skipping source filesystem '{0}', target already has newer snapshots.".format(source_filesystem))
|
||||
continue
|
||||
|
||||
raise(Exception(error_msg))
|
||||
@ -695,7 +705,9 @@ def zfs_autobackup():
|
||||
|
||||
|
||||
latest_target_snapshot=send_snapshot
|
||||
|
||||
# failed, skip this source_filesystem
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
|
||||
|
||||
############## cleanup section
|
||||
@ -730,23 +742,28 @@ def zfs_autobackup():
|
||||
source_destroys=determine_destroy_list(source_obsolete_snapshots, args.keep_source)
|
||||
if source_destroys:
|
||||
verbose("Destroying old snapshots on source {0}:\n{1}".format(args.ssh_source, "\n".join(source_destroys)))
|
||||
try:
|
||||
zfs_destroy_snapshots(ssh_to=args.ssh_source, snapshots=source_destroys)
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
|
||||
|
||||
target_destroys=determine_destroy_list(target_obsolete_snapshots, args.keep_target)
|
||||
if target_destroys:
|
||||
verbose("Destroying old snapshots on target {0}:\n{1}".format(args.ssh_target, "\n".join(target_destroys)))
|
||||
try:
|
||||
zfs_destroy_snapshots(ssh_to=args.ssh_target, snapshots=target_destroys)
|
||||
|
||||
|
||||
verbose("All done")
|
||||
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
|
||||
|
||||
################################################################## ENTRY POINT
|
||||
|
||||
# parse arguments
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='ZFS autobackup v2.3')
|
||||
parser = argparse.ArgumentParser(
|
||||
description='ZFS autobackup v2.4',
|
||||
epilog='When a filesystem fails, zfs_backup will continue and report the number of failures at that end. Also the exit code will indicate the number of failures.')
|
||||
parser.add_argument('--ssh-source', default="local", help='Source host to get backup from. (user@hostname) Default %(default)s.')
|
||||
parser.add_argument('--ssh-target', default="local", help='Target host to push backup to. (user@hostname) Default %(default)s.')
|
||||
parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.')
|
||||
@ -782,17 +799,22 @@ parser.add_argument('--debug', action='store_true', help='debug output (shows co
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.ignore_replicated and args.allow_empty:
|
||||
print("Cannot use allow_empty with ignore_replicated.")
|
||||
sys.exit(1)
|
||||
abort("Cannot use allow_empty with ignore_replicated.")
|
||||
|
||||
|
||||
try:
|
||||
zfs_autobackup()
|
||||
if not failures:
|
||||
verbose("All operations completed succesfully.")
|
||||
sys.exit(0)
|
||||
else:
|
||||
verbose("{} OPERATION(S) FAILED!".format(failures))
|
||||
#exit with the number of failures.
|
||||
sys.exit(min(255,failed))
|
||||
|
||||
except Exception as e:
|
||||
if args.debug:
|
||||
raise
|
||||
else:
|
||||
print("ABORTED")
|
||||
print(str(e))
|
||||
sys.exit(1)
|
||||
|
||||
abort("FATAL ERROR")
|
||||
|
||||
Reference in New Issue
Block a user