zfs_autobackup/zfs_autobackup

#!/usr/bin/env python
# -*- coding: utf8 -*-
from __future__ import print_function
import os
import sys
import re
import traceback
import subprocess
import pprint
# import cStringIO
import time


def error(txt):
    print(txt, file=sys.stderr)

def verbose(txt):
    if args.verbose:
        print(txt)

def debug(txt):
    if args.debug:
        print(txt)

#fatal abort execution, exit code 255
def abort(txt):
    error(txt)
    sys.exit(255)

# class TreeNode():
#     """generic tree implementation, with parent/child and prev/next relations"""
#     def __init__(self, name, parent=None, next=None, prev=None, *args, **kwargs):
#         self.childs={}
#
#         self.name=name
#         self.parent=parent
#         if parent:
#             if name in parent.childs:
#                 raise(Exception("parent {} already has child {}").format(parent.name, name))
#             parent.childs[name]=self
#
#
#         self.next=next
#         if next:
#             if next.prev:
#                 raise(Exception("{} already has a previous item").format(next.name))
#             next.prev=self
#
#         self.prev=prev
#         if prev:
#             if prev.next:
#                 raise(Exception("{} already has a next item").format(prev.name))
#             prev.next=self
#
#
#     def remove(self):
#         """remove the item from other referenced TreeNodes. call before you actually delete a treeobject"""
#
#         if self.parent:
#             self.parent.childs.remove(self.name)
#
#
#         # let previous and next objects point to eachother
#         if self.next:
#             self.next.prev=self.prev
#
#         if self.prev:
#             self.prev.next=self.next
#
#         self.parent=None
#         self.next=None
#         self.prev=None
#
#

class cached_property(object):
    """ A property that is only computed once per instance and then replaces
        itself with an ordinary attribute. Deleting the attribute resets the
        property.

        Source: https://github.com/bottlepy/bottle/commit/fa7733e075da0d790d809aa3d2f53071897e6f76
        """

    def __init__(self, func):
        self.__doc__ = getattr(func, '__doc__')
        self.func = func


    def __get__(self, obj, cls):
        if obj is None:
            return self

        propname=self.func.__name__

        #store directly in dict so its cached from now on
        # value = obj.__dict__[propname] = self.func(obj)
        if not hasattr(obj, '_cached_properties'):
            obj._cached_properties={}

        if not propname in obj._cached_properties:
            obj._cached_properties[propname]=self.func(obj)
            # value = obj.__dict__[propname] = self.func(obj)

        return obj._cached_properties[propname]


class ExecuteNode:
    """an endpoint to execute local or remote commands via ssh"""

    def __init__(self, ssh_to=None, readonly=False):
        """ssh_to: server you want to ssh to. none means local
           readonly: only execute commands that dont make any changes (usefull for testing-runs)
        """

        self.ssh_to=ssh_to
        self.readonly=readonly


    def run(self, cmd, input=None, tab_split=False, valid_exitcodes=[ 0 ], readonly=False):
        """run a command on the node

        readonly: make this True if the command doesnt make any changes and is safe to execute in testmode
        """

        encoded_cmd=[]

        #use ssh?
        if self.ssh_to != None:
            encoded_cmd.extend(["ssh", self.ssh_to])

            #make sure the command gets all the data in utf8 format:
            #(this is neccesary if LC_ALL=en_US.utf8 is not set in the environment)
            for arg in cmd:
                #add single quotes for remote commands to support spaces and other wierd stuff (remote commands are executed in a shell)
                encoded_cmd.append( ("'"+arg+"'").encode('utf-8'))

        else:
            for arg in cmd:
                encoded_cmd.append(arg.encode('utf-8'))

        #debug and test stuff
        debug_txt="# "+" ".join(encoded_cmd)

        if self.readonly and not readonly:
            debug("[NOT EXECUTING (readonly mode)] "+debug_txt)
        else:
            debug(debug_txt)

        if input:
            debug("INPUT:\n"+input.rstrip())
            stdin=subprocess.PIPE
        else:
            stdin=None

        if self.readonly and not readonly:
            return

        #execute and parse/return results
        p=subprocess.Popen(encoded_cmd, env=os.environ, stdout=subprocess.PIPE, stdin=stdin)
        output=p.communicate(input=input)[0]
        if p.returncode not in valid_exitcodes:
            raise(subprocess.CalledProcessError(p.returncode, encoded_cmd))

        lines=output.splitlines()
        if not tab_split:
            return(lines)
        else:
            ret=[]
            for line in lines:
                ret.append(line.split("\t"))
            return(ret)

    def __repr__(self):
        return(self.ssh_to)


class ZfsDataset():
    """a zfs dataset (filesystem/volume/snapshot/clone)"""

    def __init__(self, zfs_node, name):
        """name: full path of the zfs dataset"""
        self.zfs_node=zfs_node
        self.name=name

    def __repr__(self):
        return("{}: {}".format(self.zfs_node, self.name))

    def __str__(self):
        return(self.name)

    def verbose(self,txt):
        self.zfs_node.verbose("{}: {}".format(self.name, txt))

    def debug(self,txt):
        self.zfs_node.debug("{}: {}".format(self.name, txt))


    def invalidate(self):
        """clear cache"""
        #TODO: nicer
        self._cached_properties={}


    @property
    def filesystem_name(self):
        """filesystem part of the name"""
        (filesystem, snapshot_name)=self.name.split("@")
        return(filesystem)

    @property
    def snapshot_name(self):
        """snapshot part of the name"""
        (filesystem, snapshot_name)=self.name.split("@")
        return(snapshot_name)

    @cached_property
    def properties(self):
        """all zfs properties"""

        cmd=[
            "zfs", "get", "all", "-H", "-o", "property,value", self.name
        ]

        return(dict(self.zfs_node.run(tab_split=True, cmd=cmd, readonly=True, valid_exitcodes=[ 0 ])))

    def is_changed(self):
        """dataset is changed since ANY latest snapshot ?"""

        if self.properties['written']=="0B" or self.properties.written['written']=="0":
            return(False)
        else:
            return(True)

    def is_ours(self):
        """return true if this snapshot is created by this backup_nanme"""
        if re.match("^"+self.zfs_node.backup_name+"-[0-9]*$", self.snapshot_name):
            return(True)
        else:
            return(False)

    def from_names(self, names):
        """convert a list of names to a list ZfsDatasets for this zfs_node"""
        ret=[]
        for name in names:
            ret.append(ZfsDataset(self.zfs_node, name))

        return(ret)

    @cached_property
    def snapshots(self):
        """get all snaphots of this dataset"""

        cmd=[
            "zfs", "list", "-d", "1", "-r", "-t" ,"snapshot", "-H", "-o", "name", self.name
        ]

        names=self.zfs_node.run(cmd=cmd, readonly=True)
        return(self.from_names(names))

    @cached_property
    def our_snapshots(self):
        """get list of snapshots creates by us of this dataset"""
        ret=[]
        for snapshot in self.snapshots:
            if snapshot.is_ours():
                ret.append(snapshot)

        return(ret)

    @cached_property
    def is_changed_ours(self):
        """dataset is changed since OUR latest snapshot?"""

        if not self.our_snapshots:
            return(True)

        latest_snapshot=self.snapshots[-1]

        cmd=[ "zfs", "get","-H" ,"-ovalue", "written@"+str(latest_snapshot), self.name ]
        output=self.zfs_node.run(readonly=True, tab_split=False, cmd=cmd, valid_exitcodes=[ 0 ])
        if output[0]=="0B" or output[0]=="0":
            return(False)

        return(True)

    @cached_property
    def recursive_datasets(self, types="filesystem,volume"):
        """get all datasets recursively under us"""

        names=self.zfs_node.run(tab_split=False, readonly=True, valid_exitcodes=[ 0 ], cmd=[
            "zfs", "list", "-r", "-t",  types, "-o", "name", "-H", self.name
        ])

        return(self.from_names(names[1:]))


class ZfsNode(ExecuteNode):
    """a node that contains zfs datasets. implements global lowlevel zfs commands"""

    def __init__(self, backup_name, ssh_to=None, readonly=False, description=""):
        self.backup_name=backup_name
        if not description:
            self.description=ssh_to

        ExecuteNode.__init__(self, ssh_to=ssh_to, readonly=readonly)

    def verbose(self,txt):
        verbose("{}: {}".format(self.description, txt))

    def debug(self,txt):
        debug("{}: {}".format(self.description, txt))

    def new_snapshotname(self):
        """determine uniq new snapshotname"""
        return(self.backup_name+"-"+time.strftime("%Y%m%d%H%M%S"))


    def consistent_snapshot(self, datasets, snapshot_name, allow_empty=True):
        """create a consistent (atomic) snapshot of specified datasets.

        allow_empty: Allow empty snapshots. (compared to our latest snapshot)
        """

        cmd=[ "zfs", "snapshot" ]

        noop=True
        for dataset in datasets:
            if not allow_empty:
                if not dataset.is_changed_ours:
                    dataset.verbose("No changes, not snapshotting")
                    continue

            cmd.append(str(dataset)+"@"+snapshot_name)
            dataset.invalidate()
            noop=False

        if noop:
            self.verbose("No changes, not creating snapshot.")
        else:
            self.verbose("Creating snapshot {}".format(snapshot_name))
            self.run(cmd, readonly=False)


    @cached_property
    def selected_datasets(self):
        """determine filesystems that should be backupped by looking at the special autobackup-property, systemwide

           returns: list of ZfsDataset
        """
        #get all source filesystems that have the backup property
        lines=self.run(tab_split=True, readonly=True, cmd=[
            "zfs", "get", "-t",  "volume,filesystem", "-o", "name,value,source", "-s", "local,inherited", "-H", "autobackup:"+self.backup_name
        ])

        #determine filesystems that should be actually backupped
        selected_filesystems=[]
        direct_filesystems=[]
        for line in lines:
            (name,value,source)=line
            dataset=ZfsDataset(self, name)

            if value=="false":
                dataset.verbose("Ignored (disabled)")

            else:
                if source=="local" and ( value=="true" or value=="child"):
                    direct_filesystems.append(name)

                if source=="local" and value=="true":
                    selected_filesystems.append(ZfsDataset(self, name))
                    verbose("* Selected: {0} (direct selection)".format(name))
                elif source.find("inherited from ")==0 and (value=="true" or value=="child"):
                    inherited_from=re.sub("^inherited from ", "", source)
                    if inherited_from in direct_filesystems:
                        selected_filesystems.append(ZfsDataset(self, name))
                        verbose("* Selected: {0} (inherited selection)".format(name))
                    else:
                        verbose("* Ignored : {0} (already a backup)".format(name))
                else:
                    verbose("* Ignored : {0} (only childs)".format(name))

        return(selected_filesystems)


################################################################## ENTRY POINT

# parse arguments
import argparse
parser = argparse.ArgumentParser(
    description='ZFS autobackup v2.4',
    epilog='When a filesystem fails, zfs_backup will continue and report the number of failures at that end. Also the exit code will indicate the number of failures.')
parser.add_argument('--ssh-source', default=None, help='Source host to get backup from. (user@hostname) Default %(default)s.')
parser.add_argument('--ssh-target', default=None, help='Target host to push backup to. (user@hostname) Default  %(default)s.')
parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.')
parser.add_argument('--keep-target', type=int, default=30, help='Number of days to keep old snapshots on target. Default %(default)s.')
parser.add_argument('backup_name',    help='Name of the backup (you should set the zfs property "autobackup:backup-name" to true on filesystems you want to backup')
parser.add_argument('target_path',    help='Target ZFS filesystem')

parser.add_argument('--no-snapshot', action='store_true', help='dont create new snapshot (usefull for finishing uncompleted backups, or cleanups)')
parser.add_argument('--no-send', action='store_true', help='dont send snapshots (usefull to only do a cleanup)')
parser.add_argument('--allow-empty', action='store_true', help='if nothing has changed, still create empty snapshots.')
parser.add_argument('--ignore-replicated', action='store_true',  help='Ignore datasets that seem to be replicated some other way. (No changes since lastest snapshot. Usefull for proxmox HA replication)')
parser.add_argument('--no-holds', action='store_true',  help='Dont lock snapshots on the source. (Usefull to allow proxmox HA replication to switches nodes)')
parser.add_argument('--ignore-new', action='store_true',  help='Ignore filesystem if there are already newer snapshots for it on the target (use with caution)')

parser.add_argument('--resume', action='store_true', help='support resuming of interrupted transfers by using the zfs extensible_dataset feature (both zpools should have it enabled) Disadvantage is that you need to use zfs recv -A if another snapshot is created on the target during a receive. Otherwise it will keep failing.')
parser.add_argument('--strip-path', default=0, type=int, help='number of directory to strip from path (use 1 when cloning zones between 2 SmartOS machines)')
parser.add_argument('--buffer', default="",  help='Use mbuffer with specified size to speedup zfs transfer. (e.g. --buffer 1G) Will also show nice progress output.')


# parser.add_argument('--destroy-stale', action='store_true', help='Destroy stale backups that have no more snapshots. Be sure to verify the output before using this! ')
parser.add_argument('--properties', default=None, help='Comma seperated list of zfs properties that should be synced to target. (Quotas are always disabled temporarily)')
parser.add_argument('--rollback', action='store_true', help='Rollback changes on the target before starting a backup. (normally you can prevent changes by setting the readonly property on the target_path to on)')
parser.add_argument('--ignore-transfer-errors', action='store_true', help='Ignore transfer errors (still checks if received filesystem exists. usefull for acltype errors)')


parser.add_argument('--test', action='store_true', help='dont change anything, just show what would be done (still does all read-only operations)')
parser.add_argument('--verbose', action='store_true', help='verbose output')
parser.add_argument('--debug', action='store_true', help='debug output (shows commands that are executed)')

#note args is the only global variable we use, since its a global readonly setting anyway
args = parser.parse_args()


source_node=ZfsNode(args.backup_name, ssh_to=args.ssh_source, readonly=args.test)
target_node=ZfsNode(args.backup_name, ssh_to=args.ssh_target, readonly=args.test)


source_datasets=source_node.selected_datasets

if not source_datasets:
    abort("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source))

source_node.consistent_snapshot(source_datasets, source_node.new_snapshotname(), allow_empty=args.allow_empty)

# for source_dataset in source_datasets:
#     print(source_dataset)
#     print(source_dataset.recursive_datasets)
#
#
# pprint.pprint(ZfsDataset(node, "rpool").recursive_datasets)