zfs_autobackup/zfs_autobackup

#!/usr/bin/env python2
# -*- coding: utf8 -*-

#(C)edwin@datux.nl -- Edwin Eefting
#Release under GPL.

from __future__ import print_function
import os
import sys
import re
import traceback
import subprocess
import pprint
import time


def error(txt):
    print(txt, file=sys.stderr)

def verbose(txt):
    if args.verbose:
        print(txt)

def debug(txt):
    if args.debug:
        print(txt)

#fatal abort execution, exit code 255
def abort(txt):
    error(txt)
    sys.exit(255)


"""run a command. specifiy ssh user@host to run remotely"""
def run(cmd, input=None, ssh_to="local", tab_split=False, valid_exitcodes=[ 0 ], test=False):

    encoded_cmd=[]


    #use ssh?
    if ssh_to != "local":
        encoded_cmd.extend(["ssh", ssh_to])


        #make sure the command gets all the data in utf8 format:
        #(this is neccesary if LC_ALL=en_US.utf8 is not set in the environment)
        for arg in cmd:
            #add single quotes for remote commands to support spaces and other wierd stuff (remote commands are executed in a shell)
            encoded_cmd.append( ("'"+arg+"'").encode('utf-8'))

    else:
        for arg in cmd:
            encoded_cmd.append(arg.encode('utf-8'))


    #the accurate way of displaying it whould be: print encoded_cmd
    #However, we use the more human-readable way, but this is not always properly escaped!
    #(most of the time it should be copypastable however.)
    debug_txt="# "+" ".join(encoded_cmd)

    if test:
        debug("[TEST] "+debug_txt)
    else:
        debug(debug_txt)

    if input:
        debug("INPUT:\n"+input.rstrip())
        stdin=subprocess.PIPE
    else:
        stdin=None

    if test:
        return

    p=subprocess.Popen(encoded_cmd, env=os.environ, stdout=subprocess.PIPE, stdin=stdin)
    output=p.communicate(input=input)[0]
    if p.returncode not in valid_exitcodes:
        raise(subprocess.CalledProcessError(p.returncode, encoded_cmd))

    lines=output.splitlines()
    if not tab_split:
        return(lines)
    else:
        ret=[]
        for line in lines:
            ret.append(line.split("\t"))
        return(ret)


"""determine filesystems that should be backupped by looking at the special autobackup-property"""
def zfs_get_selected_filesystems(ssh_to, backup_name):
    #get all source filesystems that have the backup property
    source_filesystems=run(ssh_to=ssh_to, tab_split=True, cmd=[
        "zfs", "get", "-t",  "volume,filesystem", "-o", "name,value,source", "-s", "local,inherited", "-H", "autobackup:"+backup_name
    ])

    #determine filesystems that should be actually backupped
    selected_filesystems=[]
    direct_filesystems=[]
    for source_filesystem in source_filesystems:
        (name,value,source)=source_filesystem
        if value=="false":
            verbose("* Ignored : {0} (disabled)".format(name))

        else:
            if source=="local" and ( value=="true" or value=="child"):
                direct_filesystems.append(name)

            if source=="local" and value=="true":
                selected_filesystems.append(name)
                verbose("* Selected: {0} (direct selection)".format(name))
            elif source.find("inherited from ")==0 and (value=="true" or value=="child"):
                inherited_from=re.sub("^inherited from ", "", source)
                if inherited_from in direct_filesystems:
                    selected_filesystems.append(name)
                    verbose("* Selected: {0} (inherited selection)".format(name))
                else:
                    verbose("* Ignored : {0} (already a backup)".format(name))
            else:
                verbose("* Ignored : {0} (only childs)".format(name))

    return(selected_filesystems)


"""determine filesystems that can be resumed via receive_resume_token"""
def zfs_get_resumable_filesystems(ssh_to, filesystems):

    cmd=[ "zfs", "get", "-t",  "volume,filesystem", "-o", "name,value", "-H", "receive_resume_token" ]
    cmd.extend(filesystems)

    resumable_filesystems=run(ssh_to=ssh_to, tab_split=True, cmd=cmd, valid_exitcodes= [ 0,1 ] )

    ret={}

    for (resumable_filesystem,token) in resumable_filesystems:
        if token!='-':
            ret[resumable_filesystem]=token

    return(ret)


"""deferred destroy list of snapshots (in @format). """
def zfs_destroy_snapshots(ssh_to, snapshots):

    #zfs can only destroy one filesystem at once so we use xargs and stdin
    run(ssh_to=ssh_to, test=args.test, input="\0".join(snapshots), cmd=
        [ "xargs", "-0", "-n", "1", "zfs", "destroy", "-d" ]
    )

"""destroy list of filesystems """
def zfs_destroy(ssh_to, filesystems, recursive=False):

    cmd=[ "xargs", "-0", "-n", "1", "zfs", "destroy" ]

    if recursive:
        cmd.append("-r")

    #zfs can only destroy one filesystem at once so we use xargs and stdin
    run(ssh_to=ssh_to, test=args.test, input="\0".join(filesystems), cmd=cmd)

#simulate snapshots for --test option
test_snapshots={}


"""create snapshot on multiple filesystems at once (atomicly per pool)"""
def zfs_create_snapshot(ssh_to, filesystems, snapshot):


    #collect per pool, zfs can only take atomic snapshots per pool
    pools={}
    for filesystem in filesystems:
        pool=filesystem.split('/')[0]
        if pool not in pools:
            pools[pool]=[]
        pools[pool].append(filesystem)

    for pool in pools:
        cmd=[ "zfs", "snapshot" ]
        for filesystem in pools[pool]:
            cmd.append(filesystem+"@"+snapshot)

            #in testmode we dont actually make changes, so keep them in a list to simulate
            # if args.test:
            #     if not ssh_to in test_snapshots:
            #         test_snapshots[ssh_to]={}
            #     if not filesystem in test_snapshots[ssh_to]:
            #         test_snapshots[ssh_to][filesystem]=[]
            #     test_snapshots[ssh_to][filesystem].append(snapshot)

        run(ssh_to=ssh_to, tab_split=False, cmd=cmd, test=args.test)


"""get names of all snapshots for specified filesystems belonging to backup_name

return[filesystem_name]=[ "snashot1", "snapshot2", ... ]
"""
def zfs_get_snapshots(ssh_to, filesystems, backup_name):

    ret={}

    if filesystems:
        cmd=[
            "zfs", "list", "-d", "1", "-r", "-t" ,"snapshot", "-H", "-o", "name"
        ]
        cmd.extend(filesystems)

        snapshots=run(ssh_to=ssh_to, tab_split=False, cmd=cmd, valid_exitcodes=[ 0 ])


        for snapshot in snapshots:
            (filesystem, snapshot_name)=snapshot.split("@")
            if re.match("^"+backup_name+"-[0-9]*$", snapshot_name):
                if not filesystem in ret:
                    ret[filesystem]=[]
                ret[filesystem].append(snapshot_name)

        #also add any test-snapshots that where created with --test mode
        # if args.test:
        #     if ssh_to in test_snapshots:
        #         for filesystem in filesystems:
        #             if filesystem in test_snapshots[ssh_to]:
        #                 if not filesystem in ret:
        #                     ret[filesystem]=[]
        #                 ret[filesystem].extend(test_snapshots[ssh_to][filesystem])

    return(ret)


def default_tag():
    return("zfs_autobackup:"+args.backup_name)

"""hold a snapshot so it cant be destroyed accidently by admin or other processes"""
def zfs_hold_snapshot(ssh_to, snapshot, tag=None):
    cmd=[
        "zfs", "hold", tag or default_tag(), snapshot
    ]

    run(ssh_to=ssh_to, test=args.test, tab_split=False, cmd=cmd, valid_exitcodes=[ 0, 1 ])


"""release a snapshot"""
def zfs_release_snapshot(ssh_to, snapshot, tag=None):
    cmd=[
        "zfs", "release", tag or default_tag(), snapshot
    ]

    run(ssh_to=ssh_to, test=args.test, tab_split=False, cmd=cmd, valid_exitcodes=[ 0, 1 ])


"""transfer a zfs snapshot from source to target. both can be either local or via ssh.


TODO:

(parially implemented, local buffer is a bit more annoying to do)

buffering: specify buffer_size to use mbuffer (or alike) to apply buffering where neccesary

local to local:
local send -> local buffer -> local receive

local to remote and remote to local:
local send -> local buffer -> ssh -> remote buffer -> remote receive
remote send -> remote buffer -> ssh -> local buffer -> local receive

remote to remote:
remote send -> remote buffer -> ssh -> local buffer -> ssh -> remote buffer -> remote receive


"""
def zfs_transfer(ssh_source, source_filesystem, first_snapshot, second_snapshot,
                 ssh_target, target_filesystem, resume_token=None, buffer_size=None):

    #### build source command
    source_cmd=[]

    if ssh_source != "local":
        source_cmd.extend([ "ssh", ssh_source ])

    source_cmd.extend(["zfs", "send",  ])

    #all kind of performance options:
    source_cmd.append("-L") # large block support
    source_cmd.append("-e") # WRITE_EMBEDDED, more compact stream
    source_cmd.append("-c") # use compressed WRITE records
    if not args.resume:
        source_cmd.append("-D") # dedupped stream, sends less duplicate data


    #only verbose in debug mode, lots of output
    if args.debug :
        source_cmd.append("-v")


    if not first_snapshot:
        txt=">>> Transfer: "+source_filesystem+"@"+second_snapshot
    else:
        txt=">>> Transfer: "+source_filesystem+"@"+first_snapshot+"...@"+second_snapshot

    if resume_token:
        source_cmd.extend([ "-t", resume_token ])
        txt=txt+" [RESUMED]"

    else:
        source_cmd.append("-p")

        if first_snapshot:
            source_cmd.extend([ "-i", first_snapshot ])

        if ssh_source != "local":
            source_cmd.append("'" + source_filesystem + "@" + second_snapshot + "'")
        else:
            source_cmd.append(source_filesystem + "@" + second_snapshot)

    verbose(txt)

    if args.buffer and args.ssh_source!="local":
        source_cmd.append("|mbuffer -m {}".format(args.buffer))


    #### build target command
    target_cmd=[]

    if ssh_target != "local":
        target_cmd.extend([ "ssh", ssh_target ])

    target_cmd.extend(["zfs", "recv", "-u" ])

    # filter certain properties on receive (usefull for linux->freebsd in some cases)
    if args.filter_properties:
        for filter_property in args.filter_properties:
            target_cmd.extend([ "-x" , filter_property ])

    if args.debug:
        target_cmd.append("-v")

    if args.resume:
        target_cmd.append("-s")


    if ssh_target!="local":
        target_cmd.append("'" + target_filesystem + "'")
    else:
        target_cmd.append(target_filesystem)

    if args.buffer and  args.ssh_target!="local":
        target_cmd.append("|mbuffer -m {}".format(args.buffer))


    #### make sure parent on target exists
    parent_filesystem= "/".join(target_filesystem.split("/")[:-1])
    run(ssh_to=ssh_target, cmd=[ "zfs", "create" ,"-p", parent_filesystem ], test=args.test)

    ### execute pipe
    debug_txt="# "+source_cmd[0]+" '"+("' '".join(source_cmd[1:]))+"'" + " | " + target_cmd[0]+" '"+("' '".join(target_cmd[1:]))+"'"

    if args.test:
        debug("[TEST] "+debug_txt)
        return
    else:
        debug(debug_txt)

    source_proc=subprocess.Popen(source_cmd, env=os.environ, stdout=subprocess.PIPE)
    target_proc=subprocess.Popen(target_cmd, env=os.environ, stdin=source_proc.stdout)
    source_proc.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits.
    target_proc.communicate()

    if not args.ignore_transfer_errors:
        if source_proc.returncode:
            raise(subprocess.CalledProcessError(source_proc.returncode, source_cmd))

        #zfs recv sometimes gives an exitcode 1 while the transfer was succesfull, therefore we ignore exit 1's and do an extra check to see if the snapshot is there.
        if target_proc.returncode and target_proc.returncode!=1:
            raise(subprocess.CalledProcessError(target_proc.returncode, target_cmd))

    debug("Verifying if snapshot exists on target")
    run(ssh_to=ssh_target, cmd=["zfs", "list", target_filesystem+"@"+second_snapshot ])


#NOTE: unreliable when using with autobackup:bla=child
# """get filesystems that where already backupped to a target. """
# def zfs_get_backupped_filesystems(ssh_to, backup_name, target_path):
#     #get all target filesystems that have received or inherited the backup propert, under the target_path tree
#     ret=run(ssh_to=ssh_to, tab_split=False, valid_exitcodes=[ 0,1 ], cmd=[
#         "zfs", "get", "-r", "-t",  "volume,filesystem", "-o", "name", "-s", "received,inherited", "-H", "autobackup:"+backup_name, target_path
#     ])
#
#     return(ret)

"""get existing filesystems """
def zfs_get_existing_filesystems(ssh_to, target_path):
    #get all target filesystems that have received or inherited the backup propert, under the target_path tree
    ret=run(ssh_to=ssh_to, tab_split=False, valid_exitcodes=[ 0,1 ], cmd=[
        "zfs", "list", "-r", "-t",  "volume,filesystem", "-o", "name", "-H", target_path
    ])

    return(ret)


"""get filesystems that where once backupped to target but are no longer selected on source

these are filesystems that are not in the list in target_filesystems.

this happens when filesystems are destroyed or unselected on the source.
"""
def get_stale_backupped_filesystems(backup_name, target_path, target_filesystems, existing_target_filesystems):


    #determine backupped filesystems that are not in target_filesystems anymore
    stale_backupped_filesystems=[]
    for existing_target_filesystem in existing_target_filesystems:
        if existing_target_filesystem not in target_filesystems:
            stale_backupped_filesystems.append(existing_target_filesystem)

    return(stale_backupped_filesystems)


now=time.time()
"""determine list of snapshot (in @format) to destroy, according to age"""
def determine_destroy_list(snapshots, days):
    ret=[]
    for filesystem in snapshots:
        for snapshot in snapshots[filesystem]:
            time_str=re.findall("^.*-([0-9]*)$", snapshot)[0]
            if len(time_str)==14:
                #new format:
                time_secs=time.mktime(time.strptime(time_str,"%Y%m%d%H%M%S"))
            else:
                time_secs=int(time_str)
                # verbose("time_secs"+time_str)
            if (now-time_secs) > (24 * 3600 * days):
                ret.append(filesystem+"@"+snapshot)

    return(ret)


def lstrip_path(path, count):
    return("/".join(path.split("/")[count:]))


"""get list of filesystems that are changed, compared to specified latest snapshot. """
def zfs_get_unchanged_snapshots(ssh_to, snapshots):

    ret=[]
    for ( filesystem, snapshot_list ) in snapshots.items():
        latest_snapshot=snapshot_list[-1]

        cmd=[ "zfs", "get","-H" ,"-ovalue", "written@"+latest_snapshot, filesystem ]


        output=run(ssh_to=ssh_to, tab_split=False, cmd=cmd, valid_exitcodes=[ 0 ])

        if output[0]=="0B" or output[0]=="0":
            ret.append(filesystem)

    return(ret)

"""get filesytems that are have changed since any snapshot."""
def zfs_get_unchanged_filesystems(ssh_to, filesystems):

    ret=[]
    cmd=[ "zfs", "get","-H" ,"-oname,value", "written" ]
    cmd.extend(filesystems)
    output=run(ssh_to=ssh_to, tab_split=True, cmd=cmd, valid_exitcodes=[ 0 ])

    for ( filesystem , written ) in output:
        if written=="0B" or written=="0":
            ret.append(filesystem)

    return(ret)


#fugly..
failures=0
#something failed, but we try to continue with the rest
def failed(txt):
    global failures
    failures=failures+1
    error("FAILURE: "+txt+"\n")


def zfs_autobackup():

    ############## data gathering section

    if args.test:
        args.verbose=True
        verbose("RUNNING IN TEST-MODE, NOT MAKING ACTUAL BACKUP!")


    ### getting and determinging source/target filesystems

    # get selected filesystems on backup source
    verbose("Getting selected source filesystems for backup {0} on {1}".format(args.backup_name,args.ssh_source))
    source_filesystems=zfs_get_selected_filesystems(args.ssh_source, args.backup_name)

    #nothing todo
    if not source_filesystems:
        abort("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source))

    if args.ignore_replicated:
        replicated_filesystems=zfs_get_unchanged_filesystems(args.ssh_source, source_filesystems)
        for replicated_filesystem in replicated_filesystems:
            if replicated_filesystem in source_filesystems:
                source_filesystems.remove(replicated_filesystem)
                verbose("* Already replicated: {}".format(replicated_filesystem))

    if not source_filesystems:
        verbose("Nothing to do, all filesystems are already replicated.")
        sys.exit(0)

    # determine target filesystems
    target_filesystems=[]
    for source_filesystem in source_filesystems:
        #append args.target_path prefix and strip args.strip_path paths from source_filesystem
        target_filesystems.append(args.target_path + "/" + lstrip_path(source_filesystem, args.strip_path))
    debug("Wanted target filesystems:\n"+str(pprint.pformat(target_filesystems)))

    # get actual existing target filesystems. (including ones that might not be in the backupset anymore)
    verbose("Getting existing target filesystems")
    existing_target_filesystems=zfs_get_existing_filesystems(ssh_to=args.ssh_target, target_path=args.target_path)
    debug("Existing target filesystems:\n"+str(pprint.pformat(existing_target_filesystems)))
    common_target_filesystems=list(set(target_filesystems) & set(existing_target_filesystems))
    debug("Common target filesystems (target filesystems that also exist on source):\n"+str(pprint.pformat(common_target_filesystems)))


    ### get resumable transfers from target
    resumable_target_filesystems={}
    if args.resume:
        verbose("Checking for aborted transfers that can be resumed")
        #Important: use target_filesystem, not existing_target_filesystems (during initial transfer its resumable but doesnt exsit yet)
        resumable_target_filesystems=zfs_get_resumable_filesystems(args.ssh_target, target_filesystems)
        debug("Resumable filesystems:\n"+str(pprint.pformat(resumable_target_filesystems)))


    ### get existing target snapshots
    target_snapshots={}
    if common_target_filesystems:
        verbose("Getting target snapshot-list from {0}".format(args.ssh_target))
        target_snapshots=zfs_get_snapshots(args.ssh_target, common_target_filesystems, args.backup_name)
        # except subprocess.CalledProcessError:
        #     verbose("(ignoring errors, probably initial backup for this filesystem)")
        #     pass
        debug("Target snapshots:\n" + str(pprint.pformat(target_snapshots)))


    ### get eixsting source snapshots
    verbose("Getting source snapshot-list from {0}".format(args.ssh_source))
    source_snapshots=zfs_get_snapshots(args.ssh_source, source_filesystems, args.backup_name)
    debug("Source snapshots:\n" + str(pprint.pformat(source_snapshots)))


    ### create new snapshots on source
    if not args.no_snapshot:
        #determine which filesystems changed since last snapshot
        if not args.allow_empty and not args.ignore_replicated:
            #determine which filesystemn are unchanged since OUR snapshots. (not since ANY snapshot)
            unchanged_filesystems=zfs_get_unchanged_snapshots(args.ssh_source, source_snapshots)

        else:
            unchanged_filesystems=[]

        snapshot_filesystems=[]
        for source_filesystem in source_filesystems:
            if source_filesystem not in unchanged_filesystems:
                snapshot_filesystems.append(source_filesystem)
            else:
                verbose("* Not snapshotting {}, no changes found.".format(source_filesystem))

        #create snapshots
        if snapshot_filesystems:
            new_snapshot_name=args.backup_name+"-"+time.strftime("%Y%m%d%H%M%S")
            verbose("Creating source snapshots {0} on {1} ".format(new_snapshot_name, args.ssh_source))
            zfs_create_snapshot(args.ssh_source, snapshot_filesystems, new_snapshot_name)
        else:
            verbose("No changes at all, not creating snapshot.")

        #add it to the list of source filesystems
        for snapshot_filesystem in snapshot_filesystems:
            source_snapshots.setdefault(snapshot_filesystem,[]).append(new_snapshot_name)


    #obsolete snapshots that may be removed
    source_obsolete_snapshots={}
    target_obsolete_snapshots={}


    ############## backup section

    #determine which snapshots to send for each filesystem
    for source_filesystem in source_filesystems:
        try:
            target_filesystem=args.target_path + "/" + lstrip_path(source_filesystem, args.strip_path)

            if source_filesystem not in source_snapshots:
                #this happens if you use --no-snapshot and there are new filesystems without snapshots
                verbose("* Skipping source filesystem {0}, no snapshots found".format(source_filesystem))
            else:

                #incremental or initial send?
                if target_filesystem in target_snapshots and target_snapshots[target_filesystem]:
                    #incremental mode, determine what to send and what is obsolete

                    #latest succesfully send snapshot, should be common on both source and target
                    latest_target_snapshot=target_snapshots[target_filesystem][-1]

                    if latest_target_snapshot not in source_snapshots[source_filesystem]:
                        #cant find latest target anymore. find first common snapshot and inform user
                        error_msg="Cant find latest target snapshot on source for '{}', did you destroy/rename it?".format(source_filesystem)
                        error_msg=error_msg+"\nLatest on target : "+target_filesystem+"@"+latest_target_snapshot
                        error_msg=error_msg+"\nMissing on source: "+source_filesystem+"@"+latest_target_snapshot
                        found=False
                        for latest_target_snapshot in reversed(target_snapshots[target_filesystem]):
                            if latest_target_snapshot in source_snapshots[source_filesystem]:
                                error_msg=error_msg+"\nYou could solve this by rolling back to this common snapshot on target: "+target_filesystem+"@"+latest_target_snapshot
                                found=True
                                break
                        if not found:
                            error_msg=error_msg+"\nAlso could not find an earlier common snapshot to rollback to."
                        else:
                            if args.ignore_new:
                                verbose("* Skipping source filesystem '{0}', target already has newer snapshots.".format(source_filesystem))
                                continue

                        raise(Exception(error_msg))

                    #send all new source snapshots that come AFTER the last target snapshot
                    latest_source_index=source_snapshots[source_filesystem].index(latest_target_snapshot)
                    send_snapshots=source_snapshots[source_filesystem][latest_source_index+1:]

                    #source snapshots that come BEFORE last target snapshot are obsolete
                    source_obsolete_snapshots[source_filesystem]=source_snapshots[source_filesystem][0:latest_source_index]

                    #target snapshots that come BEFORE last target snapshot are obsolete
                    latest_target_index=target_snapshots[target_filesystem].index(latest_target_snapshot)
                    target_obsolete_snapshots[target_filesystem]=target_snapshots[target_filesystem][0:latest_target_index]
                else:
                    #initial mode, send all snapshots, nothing is obsolete:
                    latest_target_snapshot=None
                    send_snapshots=source_snapshots[source_filesystem]
                    target_obsolete_snapshots[target_filesystem]=[]
                    source_obsolete_snapshots[source_filesystem]=[]

                #now actually send the snapshots
                if not args.no_send:

                    if send_snapshots and args.rollback and latest_target_snapshot:
                        #roll back any changes on target
                        debug("Rolling back target to latest snapshot.")
                        run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "rollback", target_filesystem+"@"+latest_target_snapshot ])


                    for send_snapshot in send_snapshots:

                        #resumable?
                        if target_filesystem in resumable_target_filesystems:
                            resume_token=resumable_target_filesystems.pop(target_filesystem)
                        else:
                            resume_token=None

                        #hold the snapshot we're sending on the source
                        if not args.no_holds:
                            zfs_hold_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+send_snapshot)

                        zfs_transfer(
                            ssh_source=args.ssh_source, source_filesystem=source_filesystem,
                            first_snapshot=latest_target_snapshot, second_snapshot=send_snapshot,
                            ssh_target=args.ssh_target, target_filesystem=target_filesystem,
                            resume_token=resume_token
                        )

                        #hold the snapshot we just send to the target
                        zfs_hold_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+send_snapshot)


                        #now that we succesfully transferred this snapshot, the previous snapshot is obsolete:
                        if latest_target_snapshot:
                            zfs_release_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+latest_target_snapshot)
                            target_obsolete_snapshots[target_filesystem].append(latest_target_snapshot)

                            if not args.no_holds:
                                zfs_release_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+latest_target_snapshot)
                                source_obsolete_snapshots[source_filesystem].append(latest_target_snapshot)
                        #we just received a new filesytem?
                        else:
                            if args.clear_refreservation:
                                debug("Clearing refreservation to save space.")

                                run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "refreservation=none", target_filesystem ])


                            if args.clear_mountpoint:
                                debug("Setting canmount=noauto to prevent auto-mounting in the wrong place. (ignoring errors)")

                                run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "canmount=noauto", target_filesystem ], valid_exitcodes= [0, 1] )


                        latest_target_snapshot=send_snapshot
        # failed, skip this source_filesystem
        except Exception as e:
            failed(str(e))


    ############## cleanup section
    #we only do cleanups after everything is complete, to keep everything consistent (same snapshots everywhere)


    if not args.ignore_replicated:
        #find stale backups on target that have become obsolete

        stale_target_filesystems=get_stale_backupped_filesystems(backup_name=args.backup_name, target_path=args.target_path, target_filesystems=target_filesystems, existing_target_filesystems=existing_target_filesystems)
        debug("Stale target filesystems: {0}".format("\n".join(stale_target_filesystems)))

        stale_target_snapshots=zfs_get_snapshots(args.ssh_target, stale_target_filesystems, args.backup_name)
        debug("Stale target snapshots: " + str(pprint.pformat(stale_target_snapshots)))
        target_obsolete_snapshots.update(stale_target_snapshots)

        #determine stale filesystems that have no snapshots left (the can be destroyed)
        stale_target_destroys=[]
        for stale_target_filesystem in stale_target_filesystems:
            if stale_target_filesystem not in stale_target_snapshots:
                stale_target_destroys.append(stale_target_filesystem)

        if stale_target_destroys:
            #NOTE: dont destroy automaticly..not safe enough.
            # if args.destroy_stale:
            #     verbose("Destroying stale filesystems on target {0}:\n{1}".format(args.ssh_target, "\n".join(stale_target_destroys)))
            #     zfs_destroy(ssh_to=args.ssh_target, filesystems=stale_target_destroys, recursive=True)
            # else:
            verbose("Stale filesystems on {0}:\n{1}".format(args.ssh_target, "\n".join(stale_target_destroys)))
    else:
        verbose("NOTE: Cant determine stale target filesystems while using ignore_replicated.")


    #now actually destroy the old snapshots
    source_destroys=determine_destroy_list(source_obsolete_snapshots, args.keep_source)
    if source_destroys:
        verbose("Destroying old snapshots on source {0}:\n{1}".format(args.ssh_source, "\n".join(source_destroys)))
        try:
            zfs_destroy_snapshots(ssh_to=args.ssh_source, snapshots=source_destroys)
        except Exception as e:
            failed(str(e))


    target_destroys=determine_destroy_list(target_obsolete_snapshots, args.keep_target)
    if target_destroys:
        verbose("Destroying old snapshots on target {0}:\n{1}".format(args.ssh_target, "\n".join(target_destroys)))
        try:
            zfs_destroy_snapshots(ssh_to=args.ssh_target, snapshots=target_destroys)
        except Exception as e:
            failed(str(e))


################################################################## ENTRY POINT

# parse arguments
import argparse
parser = argparse.ArgumentParser(
    description='ZFS autobackup v2.4',
    epilog='When a filesystem fails, zfs_backup will continue and report the number of failures at that end. Also the exit code will indicate the number of failures.')
parser.add_argument('--ssh-source', default="local", help='Source host to get backup from. (user@hostname) Default %(default)s.')
parser.add_argument('--ssh-target', default="local", help='Target host to push backup to. (user@hostname) Default  %(default)s.')
parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.')
parser.add_argument('--keep-target', type=int, default=30, help='Number of days to keep old snapshots on target. Default %(default)s.')
parser.add_argument('backup_name',    help='Name of the backup (you should set the zfs property "autobackup:backup-name" to true on filesystems you want to backup')
parser.add_argument('target_path',    help='Target path')

parser.add_argument('--no-snapshot', action='store_true', help='dont create new snapshot (usefull for finishing uncompleted backups, or cleanups)')
parser.add_argument('--no-send', action='store_true', help='dont send snapshots (usefull to only do a cleanup)')
parser.add_argument('--allow-empty', action='store_true', help='if nothing has changed, still create empty snapshots.')
parser.add_argument('--ignore-replicated', action='store_true',  help='Ignore datasets that seem to be replicated some other way. (No changes since lastest snapshot. Usefull for proxmox HA replication)')
parser.add_argument('--no-holds', action='store_true',  help='Dont lock snapshots on the source. (Usefull to allow proxmox HA replication to switches nodes)')
parser.add_argument('--ignore-new', action='store_true',  help='Ignore filesystem if there are already newer snapshots for it on the target (use with caution)')

parser.add_argument('--resume', action='store_true', help='support resuming of interrupted transfers by using the zfs extensible_dataset feature (both zpools should have it enabled) Disadvantage is that you need to use zfs recv -A if another snapshot is created on the target during a receive. Otherwise it will keep failing.')
parser.add_argument('--strip-path', default=0, type=int, help='number of directory to strip from path (use 1 when cloning zones between 2 SmartOS machines)')
parser.add_argument('--buffer', default="",  help='Use mbuffer with specified size to speedup zfs transfer. (e.g. --buffer 1G) Will also show nice progress output.')


# parser.add_argument('--destroy-stale', action='store_true', help='Destroy stale backups that have no more snapshots. Be sure to verify the output before using this! ')
parser.add_argument('--clear-refreservation', action='store_true', help='Set refreservation property to none for new filesystems. Usefull when backupping SmartOS volumes. (recommended)')
parser.add_argument('--clear-mountpoint', action='store_true', help='Sets canmount=noauto property, to prevent the received filesystem from mounting over existing filesystems. (recommended)')
parser.add_argument('--filter-properties', action='append', help='Filter properties when receiving filesystems. Can be specified multiple times. (Example: If you send data from Linux to FreeNAS, you should filter xattr)')
parser.add_argument('--rollback', action='store_true', help='Rollback changes on the target before starting a backup. (normally you can prevent changes by setting the readonly property on the target_path to on)')
parser.add_argument('--ignore-transfer-errors', action='store_true', help='Ignore transfer errors (still checks if received filesystem exists. usefull for acltype errors)')


parser.add_argument('--test', action='store_true', help='dont change anything, just show what would be done (still does all read-only operations)')
parser.add_argument('--verbose', action='store_true', help='verbose output')
parser.add_argument('--debug', action='store_true', help='debug output (shows commands that are executed)')

#note args is the only global variable we use, since its a global readonly setting anyway
args = parser.parse_args()

if args.ignore_replicated and args.allow_empty:
    abort("Cannot use allow_empty with ignore_replicated.")


try:
    zfs_autobackup()
    if not failures:
        verbose("All operations completed succesfully.")
        sys.exit(0)
    else:
        verbose("{} OPERATION(S) FAILED!".format(failures))
        #exit with the number of failures.
        sys.exit(min(255,failures))

except Exception as e:
    if args.debug:
        raise
    else:
        print(str(e))
        abort("FATAL ERROR")