zfs_autobackup 2.4: try to continue on non-fatal errors
This commit is contained in:
@ -18,7 +18,7 @@ It has the following features:
|
|||||||
* Supports resuming of interrupted transfers. (via the zfs extensible_dataset feature)
|
* Supports resuming of interrupted transfers. (via the zfs extensible_dataset feature)
|
||||||
* Backups and snapshots can be named to prevent conflicts. (multiple backups from and to the same filesystems are no problem)
|
* Backups and snapshots can be named to prevent conflicts. (multiple backups from and to the same filesystems are no problem)
|
||||||
* Always creates a new snapshot before starting.
|
* Always creates a new snapshot before starting.
|
||||||
* Checks everything and aborts on errors.
|
* Checks everything but tries continue on non-fatal errors when possible. (Reports error-count when done)
|
||||||
* Ability to 'finish' aborted backups to see what goes wrong.
|
* Ability to 'finish' aborted backups to see what goes wrong.
|
||||||
* Easy to debug and has a test-mode. Actual unix commands are printed.
|
* Easy to debug and has a test-mode. Actual unix commands are printed.
|
||||||
* Keeps latest X snapshots remote and locally. (default 30, configurable)
|
* Keeps latest X snapshots remote and locally. (default 30, configurable)
|
||||||
@ -42,7 +42,7 @@ usage: zfs_autobackup [-h] [--ssh-source SSH_SOURCE] [--ssh-target SSH_TARGET]
|
|||||||
[--debug]
|
[--debug]
|
||||||
backup_name target_path
|
backup_name target_path
|
||||||
|
|
||||||
ZFS autobackup v2.3
|
ZFS autobackup v2.4
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
backup_name Name of the backup (you should set the zfs property
|
backup_name Name of the backup (you should set the zfs property
|
||||||
@ -108,6 +108,9 @@ optional arguments:
|
|||||||
(still does all read-only operations)
|
(still does all read-only operations)
|
||||||
--verbose verbose output
|
--verbose verbose output
|
||||||
--debug debug output (shows commands that are executed)
|
--debug debug output (shows commands that are executed)
|
||||||
|
|
||||||
|
When a filesystem fails, zfs_backup will continue and report the number of
|
||||||
|
failures at that end. Also the exit code will indicate the number of failures.
|
||||||
```
|
```
|
||||||
|
|
||||||
Backup example
|
Backup example
|
||||||
|
|||||||
@ -13,18 +13,20 @@ import time
|
|||||||
def error(txt):
|
def error(txt):
|
||||||
print(txt, file=sys.stderr)
|
print(txt, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def verbose(txt):
|
def verbose(txt):
|
||||||
if args.verbose:
|
if args.verbose:
|
||||||
print(txt)
|
print(txt)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def debug(txt):
|
def debug(txt):
|
||||||
if args.debug:
|
if args.debug:
|
||||||
print(txt)
|
print(txt)
|
||||||
|
|
||||||
|
#fatal abort execution, exit code 255
|
||||||
|
def abort(txt):
|
||||||
|
error(txt)
|
||||||
|
sys.exit(255)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""run a command. specifiy ssh user@host to run remotely"""
|
"""run a command. specifiy ssh user@host to run remotely"""
|
||||||
def run(cmd, input=None, ssh_to="local", tab_split=False, valid_exitcodes=[ 0 ], test=False):
|
def run(cmd, input=None, ssh_to="local", tab_split=False, valid_exitcodes=[ 0 ], test=False):
|
||||||
@ -473,6 +475,15 @@ def zfs_get_unchanged_filesystems(ssh_to, filesystems):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#fugly..
|
||||||
|
failures=0
|
||||||
|
#something failed, but we try to continue with the rest
|
||||||
|
def failed(txt):
|
||||||
|
global failures
|
||||||
|
failures=failures+1
|
||||||
|
error("FAILURE: "+txt+"\n")
|
||||||
|
|
||||||
|
|
||||||
def zfs_autobackup():
|
def zfs_autobackup():
|
||||||
|
|
||||||
############## data gathering section
|
############## data gathering section
|
||||||
@ -490,8 +501,7 @@ def zfs_autobackup():
|
|||||||
|
|
||||||
#nothing todo
|
#nothing todo
|
||||||
if not source_filesystems:
|
if not source_filesystems:
|
||||||
error("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source))
|
abort("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source))
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if args.ignore_replicated:
|
if args.ignore_replicated:
|
||||||
replicated_filesystems=zfs_get_unchanged_filesystems(args.ssh_source, source_filesystems)
|
replicated_filesystems=zfs_get_unchanged_filesystems(args.ssh_source, source_filesystems)
|
||||||
@ -540,7 +550,6 @@ def zfs_autobackup():
|
|||||||
|
|
||||||
|
|
||||||
### get eixsting source snapshots
|
### get eixsting source snapshots
|
||||||
|
|
||||||
verbose("Getting source snapshot-list from {0}".format(args.ssh_source))
|
verbose("Getting source snapshot-list from {0}".format(args.ssh_source))
|
||||||
source_snapshots=zfs_get_snapshots(args.ssh_source, source_filesystems, args.backup_name)
|
source_snapshots=zfs_get_snapshots(args.ssh_source, source_filesystems, args.backup_name)
|
||||||
debug("Source snapshots:\n" + str(pprint.pformat(source_snapshots)))
|
debug("Source snapshots:\n" + str(pprint.pformat(source_snapshots)))
|
||||||
@ -588,6 +597,7 @@ def zfs_autobackup():
|
|||||||
|
|
||||||
#determine which snapshots to send for each filesystem
|
#determine which snapshots to send for each filesystem
|
||||||
for source_filesystem in source_filesystems:
|
for source_filesystem in source_filesystems:
|
||||||
|
try:
|
||||||
target_filesystem=args.target_path + "/" + lstrip_path(source_filesystem, args.strip_path)
|
target_filesystem=args.target_path + "/" + lstrip_path(source_filesystem, args.strip_path)
|
||||||
|
|
||||||
if source_filesystem not in source_snapshots:
|
if source_filesystem not in source_snapshots:
|
||||||
@ -604,7 +614,7 @@ def zfs_autobackup():
|
|||||||
|
|
||||||
if latest_target_snapshot not in source_snapshots[source_filesystem]:
|
if latest_target_snapshot not in source_snapshots[source_filesystem]:
|
||||||
#cant find latest target anymore. find first common snapshot and inform user
|
#cant find latest target anymore. find first common snapshot and inform user
|
||||||
error_msg="Cant find latest target snapshot on source, did you destroy/rename it?"
|
error_msg="Cant find latest target snapshot on source for '{}', did you destroy/rename it?".format(source_filesystem)
|
||||||
error_msg=error_msg+"\nLatest on target : "+target_filesystem+"@"+latest_target_snapshot
|
error_msg=error_msg+"\nLatest on target : "+target_filesystem+"@"+latest_target_snapshot
|
||||||
error_msg=error_msg+"\nMissing on source: "+source_filesystem+"@"+latest_target_snapshot
|
error_msg=error_msg+"\nMissing on source: "+source_filesystem+"@"+latest_target_snapshot
|
||||||
found=False
|
found=False
|
||||||
@ -617,7 +627,7 @@ def zfs_autobackup():
|
|||||||
error_msg=error_msg+"\nAlso could not find an earlier common snapshot to rollback to."
|
error_msg=error_msg+"\nAlso could not find an earlier common snapshot to rollback to."
|
||||||
else:
|
else:
|
||||||
if args.ignore_new:
|
if args.ignore_new:
|
||||||
verbose("* Skipping source filesystem {0}, target already has newer snapshots.".format(source_filesystem))
|
verbose("* Skipping source filesystem '{0}', target already has newer snapshots.".format(source_filesystem))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
raise(Exception(error_msg))
|
raise(Exception(error_msg))
|
||||||
@ -695,7 +705,9 @@ def zfs_autobackup():
|
|||||||
|
|
||||||
|
|
||||||
latest_target_snapshot=send_snapshot
|
latest_target_snapshot=send_snapshot
|
||||||
|
# failed, skip this source_filesystem
|
||||||
|
except Exception as e:
|
||||||
|
failed(str(e))
|
||||||
|
|
||||||
|
|
||||||
############## cleanup section
|
############## cleanup section
|
||||||
@ -730,23 +742,28 @@ def zfs_autobackup():
|
|||||||
source_destroys=determine_destroy_list(source_obsolete_snapshots, args.keep_source)
|
source_destroys=determine_destroy_list(source_obsolete_snapshots, args.keep_source)
|
||||||
if source_destroys:
|
if source_destroys:
|
||||||
verbose("Destroying old snapshots on source {0}:\n{1}".format(args.ssh_source, "\n".join(source_destroys)))
|
verbose("Destroying old snapshots on source {0}:\n{1}".format(args.ssh_source, "\n".join(source_destroys)))
|
||||||
|
try:
|
||||||
zfs_destroy_snapshots(ssh_to=args.ssh_source, snapshots=source_destroys)
|
zfs_destroy_snapshots(ssh_to=args.ssh_source, snapshots=source_destroys)
|
||||||
|
except Exception as e:
|
||||||
|
failed(str(e))
|
||||||
|
|
||||||
|
|
||||||
target_destroys=determine_destroy_list(target_obsolete_snapshots, args.keep_target)
|
target_destroys=determine_destroy_list(target_obsolete_snapshots, args.keep_target)
|
||||||
if target_destroys:
|
if target_destroys:
|
||||||
verbose("Destroying old snapshots on target {0}:\n{1}".format(args.ssh_target, "\n".join(target_destroys)))
|
verbose("Destroying old snapshots on target {0}:\n{1}".format(args.ssh_target, "\n".join(target_destroys)))
|
||||||
|
try:
|
||||||
zfs_destroy_snapshots(ssh_to=args.ssh_target, snapshots=target_destroys)
|
zfs_destroy_snapshots(ssh_to=args.ssh_target, snapshots=target_destroys)
|
||||||
|
except Exception as e:
|
||||||
|
failed(str(e))
|
||||||
verbose("All done")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
################################################################## ENTRY POINT
|
################################################################## ENTRY POINT
|
||||||
|
|
||||||
# parse arguments
|
# parse arguments
|
||||||
import argparse
|
import argparse
|
||||||
parser = argparse.ArgumentParser(description='ZFS autobackup v2.3')
|
parser = argparse.ArgumentParser(
|
||||||
|
description='ZFS autobackup v2.4',
|
||||||
|
epilog='When a filesystem fails, zfs_backup will continue and report the number of failures at that end. Also the exit code will indicate the number of failures.')
|
||||||
parser.add_argument('--ssh-source', default="local", help='Source host to get backup from. (user@hostname) Default %(default)s.')
|
parser.add_argument('--ssh-source', default="local", help='Source host to get backup from. (user@hostname) Default %(default)s.')
|
||||||
parser.add_argument('--ssh-target', default="local", help='Target host to push backup to. (user@hostname) Default %(default)s.')
|
parser.add_argument('--ssh-target', default="local", help='Target host to push backup to. (user@hostname) Default %(default)s.')
|
||||||
parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.')
|
parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.')
|
||||||
@ -782,17 +799,22 @@ parser.add_argument('--debug', action='store_true', help='debug output (shows co
|
|||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.ignore_replicated and args.allow_empty:
|
if args.ignore_replicated and args.allow_empty:
|
||||||
print("Cannot use allow_empty with ignore_replicated.")
|
abort("Cannot use allow_empty with ignore_replicated.")
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
zfs_autobackup()
|
zfs_autobackup()
|
||||||
|
if not failures:
|
||||||
|
verbose("All operations completed succesfully.")
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
verbose("{} OPERATION(S) FAILED!".format(failures))
|
||||||
|
#exit with the number of failures.
|
||||||
|
sys.exit(min(255,failed))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if args.debug:
|
if args.debug:
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
print("ABORTED")
|
|
||||||
print(str(e))
|
print(str(e))
|
||||||
sys.exit(1)
|
abort("FATAL ERROR")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user