429 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			429 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python
 | |
| # -*- coding: utf8 -*-
 | |
| from __future__ import print_function
 | |
| import os
 | |
| import sys
 | |
| import re
 | |
| import traceback
 | |
| import subprocess
 | |
| import pprint
 | |
| # import cStringIO
 | |
| import time
 | |
| 
 | |
| class Log:
 | |
|     def __init__(self):
 | |
|         self.titles=[]
 | |
|         pass
 | |
| 
 | |
|     def titled_str(self, txt, titles):
 | |
|         """magic to make our log messages ident and more clear"""
 | |
|         str=""
 | |
|         count=0
 | |
|         for title in titles:
 | |
|             if len(self.titles)>count and self.titles[count]==title:
 | |
|                 str=str+ ( " " * len(title))
 | |
|             else:
 | |
|                 str=str+title
 | |
|             str=str+": "
 | |
|             count=count+1
 | |
| 
 | |
|         str=str+txt
 | |
|         self.titles=titles
 | |
|         return(str)
 | |
| 
 | |
|     def error(self, txt, titles=[]):
 | |
|         print(txt, file=sys.stderr)
 | |
| 
 | |
|     def verbose(self, txt, titles=[]):
 | |
|         if args.verbose:
 | |
|             print(self.titled_str(txt, titles))
 | |
| 
 | |
|     def debug(self, txt, titles=[]):
 | |
|         if args.debug:
 | |
|             print(txt)
 | |
| 
 | |
| 
 | |
| log=Log()
 | |
| 
 | |
| #fatal abort execution, exit code 255
 | |
| def abort(txt):
 | |
|     log.error(txt)
 | |
|     sys.exit(255)
 | |
| 
 | |
| 
 | |
| class cached_property(object):
 | |
|     """ A property that is only computed once per instance and then replaces
 | |
|         itself with an ordinary attribute. Deleting the attribute resets the
 | |
|         property.
 | |
| 
 | |
|         Source: https://github.com/bottlepy/bottle/commit/fa7733e075da0d790d809aa3d2f53071897e6f76
 | |
|         """
 | |
| 
 | |
|     def __init__(self, func):
 | |
|         self.__doc__ = getattr(func, '__doc__')
 | |
|         self.func = func
 | |
| 
 | |
| 
 | |
|     def __get__(self, obj, cls):
 | |
|         if obj is None:
 | |
|             return self
 | |
| 
 | |
|         propname=self.func.__name__
 | |
| 
 | |
|         #store directly in dict so its cached from now on
 | |
|         # value = obj.__dict__[propname] = self.func(obj)
 | |
|         if not hasattr(obj, '_cached_properties'):
 | |
|             obj._cached_properties={}
 | |
| 
 | |
|         if not propname in obj._cached_properties:
 | |
|             obj._cached_properties[propname]=self.func(obj)
 | |
|             # value = obj.__dict__[propname] = self.func(obj)
 | |
| 
 | |
|         return obj._cached_properties[propname]
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| class ExecuteNode:
 | |
|     """an endpoint to execute local or remote commands via ssh"""
 | |
| 
 | |
|     def __init__(self, ssh_to=None, readonly=False):
 | |
|         """ssh_to: server you want to ssh to. none means local
 | |
|            readonly: only execute commands that dont make any changes (usefull for testing-runs)
 | |
|         """
 | |
| 
 | |
|         self.ssh_to=ssh_to
 | |
|         self.readonly=readonly
 | |
| 
 | |
| 
 | |
|     def run(self, cmd, input=None, tab_split=False, valid_exitcodes=[ 0 ], readonly=False):
 | |
|         """run a command on the node
 | |
| 
 | |
|         readonly: make this True if the command doesnt make any changes and is safe to execute in testmode
 | |
|         """
 | |
| 
 | |
|         encoded_cmd=[]
 | |
| 
 | |
|         #use ssh?
 | |
|         if self.ssh_to != None:
 | |
|             encoded_cmd.extend(["ssh", self.ssh_to])
 | |
| 
 | |
|             #make sure the command gets all the data in utf8 format:
 | |
|             #(this is neccesary if LC_ALL=en_US.utf8 is not set in the environment)
 | |
|             for arg in cmd:
 | |
|                 #add single quotes for remote commands to support spaces and other wierd stuff (remote commands are executed in a shell)
 | |
|                 encoded_cmd.append( ("'"+arg+"'").encode('utf-8'))
 | |
| 
 | |
|         else:
 | |
|             for arg in cmd:
 | |
|                 encoded_cmd.append(arg.encode('utf-8'))
 | |
| 
 | |
|         #debug and test stuff
 | |
|         debug_txt="# "+" ".join(encoded_cmd)
 | |
| 
 | |
|         if self.readonly and not readonly:
 | |
|             log.debug("[NOT EXECUTING (readonly mode)] "+debug_txt)
 | |
|         else:
 | |
|             log.debug(debug_txt)
 | |
| 
 | |
|         if input:
 | |
|             log.debug("INPUT:\n"+input.rstrip())
 | |
|             stdin=subprocess.PIPE
 | |
|         else:
 | |
|             stdin=None
 | |
| 
 | |
|         if self.readonly and not readonly:
 | |
|             return
 | |
| 
 | |
|         #execute and parse/return results
 | |
|         p=subprocess.Popen(encoded_cmd, env=os.environ, stdout=subprocess.PIPE, stdin=stdin)
 | |
|         output=p.communicate(input=input)[0]
 | |
|         if p.returncode not in valid_exitcodes:
 | |
|             raise(subprocess.CalledProcessError(p.returncode, encoded_cmd))
 | |
| 
 | |
|         lines=output.splitlines()
 | |
|         if not tab_split:
 | |
|             return(lines)
 | |
|         else:
 | |
|             ret=[]
 | |
|             for line in lines:
 | |
|                 ret.append(line.split("\t"))
 | |
|             return(ret)
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return(self.ssh_to)
 | |
| 
 | |
| 
 | |
| 
 | |
| class ZfsDataset():
 | |
|     """a zfs dataset (filesystem/volume/snapshot/clone)"""
 | |
| 
 | |
|     def __init__(self, zfs_node, name):
 | |
|         """name: full path of the zfs dataset"""
 | |
|         self.zfs_node=zfs_node
 | |
|         self.name=name
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return("{}: {}".format(self.zfs_node, self.name))
 | |
| 
 | |
|     def __str__(self):
 | |
|         return(self.name)
 | |
| 
 | |
|     def verbose(self,txt):
 | |
|         self.zfs_node.verbose(txt,[self.name])
 | |
| 
 | |
|     def debug(self,txt):
 | |
|         self.zfs_node.debug(txt,[self.name])
 | |
| 
 | |
| 
 | |
|     def invalidate(self):
 | |
|         """clear cache"""
 | |
|         #TODO: nicer
 | |
|         self._cached_properties={}
 | |
| 
 | |
| 
 | |
|     @property
 | |
|     def filesystem_name(self):
 | |
|         """filesystem part of the name"""
 | |
|         (filesystem, snapshot_name)=self.name.split("@")
 | |
|         return(filesystem)
 | |
| 
 | |
|     @property
 | |
|     def snapshot_name(self):
 | |
|         """snapshot part of the name"""
 | |
|         (filesystem, snapshot_name)=self.name.split("@")
 | |
|         return(snapshot_name)
 | |
| 
 | |
|     @cached_property
 | |
|     def properties(self):
 | |
|         """all zfs properties"""
 | |
| 
 | |
|         cmd=[
 | |
|             "zfs", "get", "all", "-H", "-o", "property,value", self.name
 | |
|         ]
 | |
| 
 | |
|         return(dict(self.zfs_node.run(tab_split=True, cmd=cmd, readonly=True, valid_exitcodes=[ 0 ])))
 | |
| 
 | |
|     def is_changed(self):
 | |
|         """dataset is changed since ANY latest snapshot ?"""
 | |
| 
 | |
|         if self.properties['written']=="0B" or self.properties.written['written']=="0":
 | |
|             return(False)
 | |
|         else:
 | |
|             return(True)
 | |
| 
 | |
|     def is_ours(self):
 | |
|         """return true if this snapshot is created by this backup_nanme"""
 | |
|         if re.match("^"+self.zfs_node.backup_name+"-[0-9]*$", self.snapshot_name):
 | |
|             return(True)
 | |
|         else:
 | |
|             return(False)
 | |
| 
 | |
|     def from_names(self, names):
 | |
|         """convert a list of names to a list ZfsDatasets for this zfs_node"""
 | |
|         ret=[]
 | |
|         for name in names:
 | |
|             ret.append(ZfsDataset(self.zfs_node, name))
 | |
| 
 | |
|         return(ret)
 | |
| 
 | |
|     @cached_property
 | |
|     def snapshots(self):
 | |
|         """get all snaphots of this dataset"""
 | |
| 
 | |
|         cmd=[
 | |
|             "zfs", "list", "-d", "1", "-r", "-t" ,"snapshot", "-H", "-o", "name", self.name
 | |
|         ]
 | |
| 
 | |
|         names=self.zfs_node.run(cmd=cmd, readonly=True)
 | |
|         return(self.from_names(names))
 | |
| 
 | |
|     @cached_property
 | |
|     def our_snapshots(self):
 | |
|         """get list of snapshots creates by us of this dataset"""
 | |
|         ret=[]
 | |
|         for snapshot in self.snapshots:
 | |
|             if snapshot.is_ours():
 | |
|                 ret.append(snapshot)
 | |
| 
 | |
|         return(ret)
 | |
| 
 | |
|     @cached_property
 | |
|     def is_changed_ours(self):
 | |
|         """dataset is changed since OUR latest snapshot?"""
 | |
| 
 | |
|         if not self.our_snapshots:
 | |
|             return(True)
 | |
| 
 | |
|         latest_snapshot=self.snapshots[-1]
 | |
| 
 | |
|         cmd=[ "zfs", "get","-H" ,"-ovalue", "written@"+str(latest_snapshot), self.name ]
 | |
|         output=self.zfs_node.run(readonly=True, tab_split=False, cmd=cmd, valid_exitcodes=[ 0 ])
 | |
|         if output[0]=="0B" or output[0]=="0":
 | |
|             return(False)
 | |
| 
 | |
|         return(True)
 | |
| 
 | |
|     @cached_property
 | |
|     def recursive_datasets(self, types="filesystem,volume"):
 | |
|         """get all datasets recursively under us"""
 | |
| 
 | |
|         names=self.zfs_node.run(tab_split=False, readonly=True, valid_exitcodes=[ 0 ], cmd=[
 | |
|             "zfs", "list", "-r", "-t",  types, "-o", "name", "-H", self.name
 | |
|         ])
 | |
| 
 | |
|         return(self.from_names(names[1:]))
 | |
| 
 | |
| 
 | |
| class ZfsNode(ExecuteNode):
 | |
|     """a node that contains zfs datasets. implements global lowlevel zfs commands"""
 | |
| 
 | |
|     def __init__(self, backup_name, ssh_to=None, readonly=False, description=""):
 | |
|         self.backup_name=backup_name
 | |
|         if not description:
 | |
|             self.description=ssh_to
 | |
| 
 | |
|         ExecuteNode.__init__(self, ssh_to=ssh_to, readonly=readonly)
 | |
| 
 | |
|     def verbose(self,txt,titles=[]):
 | |
|         titles.insert(0,self.description)
 | |
|         log.verbose(txt, titles)
 | |
| 
 | |
|     def debug(self,txt, titles=[]):
 | |
|         titles.insert(0,self.description)
 | |
|         log.debug(txt, titles)
 | |
| 
 | |
|     def new_snapshotname(self):
 | |
|         """determine uniq new snapshotname"""
 | |
|         return(self.backup_name+"-"+time.strftime("%Y%m%d%H%M%S"))
 | |
| 
 | |
| 
 | |
|     def consistent_snapshot(self, datasets, snapshot_name, allow_empty=True):
 | |
|         """create a consistent (atomic) snapshot of specified datasets.
 | |
| 
 | |
|         allow_empty: Allow empty snapshots. (compared to our latest snapshot)
 | |
|         """
 | |
| 
 | |
|         cmd=[ "zfs", "snapshot" ]
 | |
| 
 | |
|         noop=True
 | |
|         for dataset in datasets:
 | |
|             if not allow_empty:
 | |
|                 if not dataset.is_changed_ours:
 | |
|                     dataset.verbose("No changes, not snapshotting")
 | |
|                     continue
 | |
| 
 | |
|             cmd.append(str(dataset)+"@"+snapshot_name)
 | |
|             dataset.invalidate()
 | |
|             noop=False
 | |
| 
 | |
|         if noop:
 | |
|             self.verbose("No changes, not creating snapshot.")
 | |
|         else:
 | |
|             self.verbose("Creating snapshot {}".format(snapshot_name))
 | |
|             self.run(cmd, readonly=False)
 | |
| 
 | |
| 
 | |
|     @cached_property
 | |
|     def selected_datasets(self):
 | |
|         """determine filesystems that should be backupped by looking at the special autobackup-property, systemwide
 | |
| 
 | |
|            returns: list of ZfsDataset
 | |
|         """
 | |
|         #get all source filesystems that have the backup property
 | |
|         lines=self.run(tab_split=True, readonly=True, cmd=[
 | |
|             "zfs", "get", "-t",  "volume,filesystem", "-o", "name,value,source", "-s", "local,inherited", "-H", "autobackup:"+self.backup_name
 | |
|         ])
 | |
| 
 | |
|         #determine filesystems that should be actually backupped
 | |
|         selected_filesystems=[]
 | |
|         direct_filesystems=[]
 | |
|         for line in lines:
 | |
|             (name,value,source)=line
 | |
|             dataset=ZfsDataset(self, name)
 | |
| 
 | |
|             if value=="false":
 | |
|                 dataset.verbose("Ignored (disabled)")
 | |
| 
 | |
|             else:
 | |
|                 if source=="local" and ( value=="true" or value=="child"):
 | |
|                     direct_filesystems.append(name)
 | |
| 
 | |
|                 if source=="local" and value=="true":
 | |
|                     selected_filesystems.append(dataset)
 | |
|                     dataset.verbose("Selected (direct selection)")
 | |
|                 elif source.find("inherited from ")==0 and (value=="true" or value=="child"):
 | |
|                     inherited_from=re.sub("^inherited from ", "", source)
 | |
|                     if inherited_from in direct_filesystems:
 | |
|                         selected_filesystems.append(dataset)
 | |
|                         dataset.verbose("Selected (inherited selection)")
 | |
|                     else:
 | |
|                         dataset.verbose("Ignored (already a backup)")
 | |
|                 else:
 | |
|                     dataset.verbose("Ignored (only childs)")
 | |
| 
 | |
|         return(selected_filesystems)
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| ################################################################## ENTRY POINT
 | |
| 
 | |
| # parse arguments
 | |
| import argparse
 | |
| parser = argparse.ArgumentParser(
 | |
|     description='ZFS autobackup v2.4',
 | |
|     epilog='When a filesystem fails, zfs_backup will continue and report the number of failures at that end. Also the exit code will indicate the number of failures.')
 | |
| parser.add_argument('--ssh-source', default=None, help='Source host to get backup from. (user@hostname) Default %(default)s.')
 | |
| parser.add_argument('--ssh-target', default=None, help='Target host to push backup to. (user@hostname) Default  %(default)s.')
 | |
| parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.')
 | |
| parser.add_argument('--keep-target', type=int, default=30, help='Number of days to keep old snapshots on target. Default %(default)s.')
 | |
| parser.add_argument('backup_name',    help='Name of the backup (you should set the zfs property "autobackup:backup-name" to true on filesystems you want to backup')
 | |
| parser.add_argument('target_path',    help='Target ZFS filesystem')
 | |
| 
 | |
| parser.add_argument('--no-snapshot', action='store_true', help='dont create new snapshot (usefull for finishing uncompleted backups, or cleanups)')
 | |
| parser.add_argument('--no-send', action='store_true', help='dont send snapshots (usefull to only do a cleanup)')
 | |
| parser.add_argument('--allow-empty', action='store_true', help='if nothing has changed, still create empty snapshots.')
 | |
| parser.add_argument('--ignore-replicated', action='store_true',  help='Ignore datasets that seem to be replicated some other way. (No changes since lastest snapshot. Usefull for proxmox HA replication)')
 | |
| parser.add_argument('--no-holds', action='store_true',  help='Dont lock snapshots on the source. (Usefull to allow proxmox HA replication to switches nodes)')
 | |
| parser.add_argument('--ignore-new', action='store_true',  help='Ignore filesystem if there are already newer snapshots for it on the target (use with caution)')
 | |
| 
 | |
| parser.add_argument('--resume', action='store_true', help='support resuming of interrupted transfers by using the zfs extensible_dataset feature (both zpools should have it enabled) Disadvantage is that you need to use zfs recv -A if another snapshot is created on the target during a receive. Otherwise it will keep failing.')
 | |
| parser.add_argument('--strip-path', default=0, type=int, help='number of directory to strip from path (use 1 when cloning zones between 2 SmartOS machines)')
 | |
| parser.add_argument('--buffer', default="",  help='Use mbuffer with specified size to speedup zfs transfer. (e.g. --buffer 1G) Will also show nice progress output.')
 | |
| 
 | |
| 
 | |
| # parser.add_argument('--destroy-stale', action='store_true', help='Destroy stale backups that have no more snapshots. Be sure to verify the output before using this! ')
 | |
| parser.add_argument('--properties', default=None, help='Comma seperated list of zfs properties that should be synced to target. (Quotas are always disabled temporarily)')
 | |
| parser.add_argument('--rollback', action='store_true', help='Rollback changes on the target before starting a backup. (normally you can prevent changes by setting the readonly property on the target_path to on)')
 | |
| parser.add_argument('--ignore-transfer-errors', action='store_true', help='Ignore transfer errors (still checks if received filesystem exists. usefull for acltype errors)')
 | |
| 
 | |
| 
 | |
| parser.add_argument('--test', action='store_true', help='dont change anything, just show what would be done (still does all read-only operations)')
 | |
| parser.add_argument('--verbose', action='store_true', help='verbose output')
 | |
| parser.add_argument('--debug', action='store_true', help='debug output (shows commands that are executed)')
 | |
| 
 | |
| #note args is the only global variable we use, since its a global readonly setting anyway
 | |
| args = parser.parse_args()
 | |
| 
 | |
| 
 | |
| source_node=ZfsNode(args.backup_name, ssh_to=args.ssh_source, readonly=args.test)
 | |
| target_node=ZfsNode(args.backup_name, ssh_to=args.ssh_target, readonly=args.test)
 | |
| 
 | |
| 
 | |
| source_datasets=source_node.selected_datasets
 | |
| 
 | |
| if not source_datasets:
 | |
|     abort("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source))
 | |
| 
 | |
| source_node.consistent_snapshot(source_datasets, source_node.new_snapshotname(), allow_empty=args.allow_empty)
 | |
| 
 | |
| # for source_dataset in source_datasets:
 | |
| #     print(source_dataset)
 | |
| #     print(source_dataset.recursive_datasets)
 | |
| #
 | |
| #
 | |
| # pprint.pprint(ZfsDataset(node, "rpool").recursive_datasets)
 |