923 lines
32 KiB
Python
Executable File
923 lines
32 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf8 -*-
|
|
|
|
# (c)edwin@datux.nl - Released under GPL
|
|
#
|
|
# Greetings from eth0 2019 :)
|
|
|
|
from __future__ import print_function
|
|
import os
|
|
import sys
|
|
import re
|
|
import traceback
|
|
import subprocess
|
|
import pprint
|
|
# import cStringIO
|
|
import time
|
|
import argparse
|
|
from pprint import pprint as p
|
|
import select
|
|
|
|
|
|
import imp
|
|
try:
|
|
import colorama
|
|
use_color=True
|
|
except ImportError:
|
|
use_color=False
|
|
|
|
|
|
class Log:
|
|
def __init__(self, show_debug=False, show_verbose=False):
|
|
self.last_log=""
|
|
self.show_debug=show_debug
|
|
self.show_verbose=show_verbose
|
|
|
|
# def titled_str(self, txt, titles):
|
|
# """magic to make our log messages ident and more clear"""
|
|
# str=""
|
|
# count=0
|
|
# changed=False
|
|
# for title in titles:
|
|
# if not self.show_debug and not changed and len(self.titles)>count and self.titles[count]==title:
|
|
# str=str+ ( " " * len(title))+ " "
|
|
# else:
|
|
# str=str+title+": "
|
|
# changed=True
|
|
# # str=str+": "
|
|
# count=count+1
|
|
#
|
|
# str=str+txt
|
|
# self.titles=list(titles)
|
|
# return(str)
|
|
|
|
def error(self, txt):
|
|
if use_color:
|
|
print(colorama.Fore.RED+colorama.Style.BRIGHT+ "! "+txt+colorama.Style.RESET_ALL, file=sys.stderr)
|
|
else:
|
|
print("! "+txt, file=sys.stderr)
|
|
|
|
def verbose(self, txt):
|
|
if self.show_verbose:
|
|
if use_color:
|
|
print(colorama.Style.NORMAL+ " "+txt+colorama.Style.RESET_ALL)
|
|
else:
|
|
print(" "+txt)
|
|
|
|
def debug(self, txt):
|
|
if self.show_debug:
|
|
if use_color:
|
|
print(colorama.Fore.GREEN+ "# "+txt+colorama.Style.RESET_ALL)
|
|
else:
|
|
print("# "+txt)
|
|
|
|
|
|
|
|
#fatal abort execution, exit code 255
|
|
def abort(txt):
|
|
print(txt, file=sys.stderr)
|
|
sys.exit(255)
|
|
|
|
|
|
class cached_property(object):
|
|
""" A property that is only computed once per instance and then replaces
|
|
itself with an ordinary attribute. Deleting the attribute resets the
|
|
property.
|
|
|
|
Source: https://github.com/bottlepy/bottle/commit/fa7733e075da0d790d809aa3d2f53071897e6f76
|
|
"""
|
|
|
|
def __init__(self, func):
|
|
self.__doc__ = getattr(func, '__doc__')
|
|
self.func = func
|
|
|
|
|
|
def __get__(self, obj, cls):
|
|
if obj is None:
|
|
return self
|
|
|
|
propname=self.func.__name__
|
|
|
|
#store directly in dict so its cached from now on
|
|
# value = obj.__dict__[propname] = self.func(obj)
|
|
if not hasattr(obj, '_cached_properties'):
|
|
obj._cached_properties={}
|
|
|
|
if not propname in obj._cached_properties:
|
|
obj._cached_properties[propname]=self.func(obj)
|
|
# value = obj.__dict__[propname] = self.func(obj)
|
|
|
|
return obj._cached_properties[propname]
|
|
|
|
|
|
|
|
|
|
class ExecuteNode:
|
|
"""an endpoint to execute local or remote commands via ssh"""
|
|
|
|
|
|
def __init__(self, ssh_to=None, readonly=False, debug_output=False):
|
|
"""ssh_to: server you want to ssh to. none means local
|
|
readonly: only execute commands that dont make any changes (usefull for testing-runs)
|
|
debug_output: show output and exit codes of commands in debugging output.
|
|
"""
|
|
|
|
self.ssh_to=ssh_to
|
|
self.readonly=readonly
|
|
self.debug_output=debug_output
|
|
|
|
|
|
def run(self, cmd, input=None, tab_split=False, valid_exitcodes=[ 0 ], readonly=False, hide_errors=False, pipe=False):
|
|
"""run a command on the node
|
|
|
|
readonly: make this True if the command doesnt make any changes and is safe to execute in testmode
|
|
pipe: Instead of executing, return a pipe-handle to be used to input to another run() command. (just like a | in linux)
|
|
input: Can be None, a string or a pipe-handle you got from another run()
|
|
"""
|
|
|
|
encoded_cmd=[]
|
|
|
|
#use ssh?
|
|
if self.ssh_to != None:
|
|
encoded_cmd.extend(["ssh", self.ssh_to])
|
|
|
|
#make sure the command gets all the data in utf8 format:
|
|
#(this is neccesary if LC_ALL=en_US.utf8 is not set in the environment)
|
|
for arg in cmd:
|
|
#add single quotes for remote commands to support spaces and other wierd stuff (remote commands are executed in a shell)
|
|
encoded_cmd.append( ("'"+arg+"'").encode('utf-8'))
|
|
|
|
else:
|
|
for arg in cmd:
|
|
encoded_cmd.append(arg.encode('utf-8'))
|
|
|
|
#debug and test stuff
|
|
debug_txt=" ".join(encoded_cmd)
|
|
if pipe:
|
|
debug_txt=debug_txt+" |"
|
|
|
|
if self.readonly and not readonly:
|
|
self.debug("SKIP > "+ debug_txt)
|
|
else:
|
|
if pipe:
|
|
self.debug("PIPE > "+ debug_txt)
|
|
else:
|
|
self.debug("RUN > "+ debug_txt)
|
|
|
|
#determine stdin
|
|
if input==None:
|
|
stdin=None
|
|
elif isinstance(input,str):
|
|
self.debug("INPUT > \n"+input.rstrip())
|
|
stdin=subprocess.PIPE
|
|
elif isinstance(input, subprocess.Popen):
|
|
self.debug("Piping input")
|
|
stdin=input.stdout
|
|
else:
|
|
abort("Incompatible input")
|
|
|
|
if self.readonly and not readonly:
|
|
#todo: what happens if input is piped?
|
|
return
|
|
|
|
#execute and parse/return results
|
|
p=subprocess.Popen(encoded_cmd, env=os.environ, stdout=subprocess.PIPE, stdin=stdin, stderr=subprocess.PIPE)
|
|
|
|
#Note: make streaming?
|
|
if isinstance(input,str):
|
|
p.stdin.write(input)
|
|
|
|
if pipe:
|
|
return(p)
|
|
|
|
#handle all outputs
|
|
if isinstance(input, subprocess.Popen):
|
|
selectors=[p.stdout, p.stderr, input.stderr ]
|
|
input.stdout.close() #otherwise inputprocess wont exit when ours does
|
|
else:
|
|
selectors=[p.stdout, p.stderr ]
|
|
|
|
output_lines=[]
|
|
while True:
|
|
(read_ready, write_ready, ex_ready)=select.select(selectors, [], [])
|
|
eof_count=0
|
|
if p.stdout in read_ready:
|
|
line=p.stdout.readline()
|
|
if line!="":
|
|
output_lines.append(line.rstrip())
|
|
if self.debug_output:
|
|
self.debug("STDOUT > "+line.rstrip())
|
|
else:
|
|
eof_count=eof_count+1
|
|
if p.stderr in read_ready:
|
|
line=p.stderr.readline()
|
|
if line!="":
|
|
if hide_errors:
|
|
self.debug("STDERR > "+line.rstrip())
|
|
else:
|
|
self.error("STDERR > "+line.rstrip())
|
|
else:
|
|
eof_count=eof_count+1
|
|
if isinstance(input, subprocess.Popen) and (input.stderr in read_ready):
|
|
line=input.stderr.readline()
|
|
if line!="":
|
|
if hide_errors:
|
|
self.debug("STDERR|> "+line.rstrip())
|
|
else:
|
|
self.error("STDERR|> "+line.rstrip())
|
|
else:
|
|
eof_count=eof_count+1
|
|
|
|
#stop if both processes are done and all filehandles are EOF:
|
|
if p.poll()!=None and ((not isinstance(input, subprocess.Popen)) or input.poll()!=None) and eof_count==len(selectors):
|
|
break
|
|
|
|
|
|
|
|
#handle piped process error output and exit codes
|
|
if isinstance(input, subprocess.Popen):
|
|
|
|
if self.debug_output:
|
|
self.debug("EXIT |> {}".format(input.returncode))
|
|
if input.returncode not in valid_exitcodes:
|
|
raise(subprocess.CalledProcessError(input.returncode, "(pipe)"))
|
|
|
|
|
|
if self.debug_output:
|
|
self.debug("EXIT > {}".format(p.returncode))
|
|
if p.returncode not in valid_exitcodes:
|
|
raise(subprocess.CalledProcessError(p.returncode, encoded_cmd))
|
|
|
|
if not tab_split:
|
|
return(output_lines)
|
|
else:
|
|
ret=[]
|
|
for line in output_lines:
|
|
ret.append(line.split("\t"))
|
|
return(ret)
|
|
|
|
def __repr__(self):
|
|
return(self.ssh_to)
|
|
|
|
|
|
|
|
class ZfsDataset():
|
|
"""a zfs dataset (filesystem/volume/snapshot/clone)
|
|
Note that a dataset doesnt have to actually exist (yet/anymore)
|
|
Also most properties are cached for performance-reasons, but also to allow --test to function correctly.
|
|
|
|
"""
|
|
|
|
def __init__(self, zfs_node, name):
|
|
"""name: full path of the zfs dataset"""
|
|
self.zfs_node=zfs_node
|
|
self.name=name #full name
|
|
|
|
def __repr__(self):
|
|
return("{}: {}".format(self.zfs_node, self.name))
|
|
|
|
def __str__(self):
|
|
return(self.name)
|
|
|
|
def __eq__(self, obj):
|
|
return(self.name == obj.name)
|
|
|
|
def verbose(self,txt):
|
|
self.zfs_node.verbose("{}: {}".format(self.name, txt))
|
|
|
|
def error(self,txt):
|
|
self.zfs_node.error("{}: {}".format(self.name, txt))
|
|
|
|
def debug(self,txt):
|
|
self.zfs_node.debug("{}: {}".format(self.name, txt))
|
|
|
|
|
|
def invalidate(self):
|
|
"""clear cache"""
|
|
#TODO: nicer?
|
|
self._cached_properties={}
|
|
|
|
def lstrip_path(self,count):
|
|
"""return name with first count components stripped"""
|
|
return("/".join(self.name.split("/")[count:]))
|
|
|
|
def rstrip_path(self,count):
|
|
"""return name with last count components stripped"""
|
|
return("/".join(self.name.split("/")[:-count]))
|
|
|
|
|
|
@property
|
|
def filesystem_name(self):
|
|
"""filesystem part of the name (before the @)"""
|
|
(filesystem, snapshot_name)=self.name.split("@")
|
|
return(filesystem)
|
|
|
|
@property
|
|
def snapshot_name(self):
|
|
"""snapshot part of the name"""
|
|
(filesystem, snapshot_name)=self.name.split("@")
|
|
return(snapshot_name)
|
|
|
|
@property
|
|
def is_snapshot(self):
|
|
"""true if this dataset is a snapshot"""
|
|
return(self.name.find("@")!=-1)
|
|
|
|
|
|
@cached_property
|
|
def parent(self):
|
|
"""get zfs-parent of this dataset.
|
|
for snapshots this means it will get the filesystem/volume that it belongs to. otherwise it will return the parent according to path
|
|
|
|
we cache this so everything in the parent that is cached also stays.
|
|
"""
|
|
if self.is_snapshot:
|
|
return(ZfsDataset(self.zfs_node, self.filesystem_name))
|
|
else:
|
|
return(ZfsDataset(self.zfs_node, self.rstrip_path(1)))
|
|
|
|
@cached_property
|
|
def exists(self):
|
|
"""check if dataset exists"""
|
|
self.debug("Checking if filesystem exists")
|
|
return(self.zfs_node.run(tab_split=True, cmd=[ "zfs", "list", self.name], readonly=True, valid_exitcodes=[ 0,1 ], hide_errors=True) and True)
|
|
|
|
def create_filesystem(self, parents=False):
|
|
"""create a filesytem"""
|
|
if parents:
|
|
self.verbose("Creating filesystem and parents")
|
|
self.zfs_node.run(["zfs", "create", "-p", self.name ])
|
|
else:
|
|
self.verbose("Creating filesystem")
|
|
self.zfs_node.run(["zfs", "create", self.name ])
|
|
|
|
#update cache
|
|
self.exists=1
|
|
|
|
def destroy(self):
|
|
self.debug("Destroying")
|
|
self.zfs_node.run(["zfs", "destroy", self.name])
|
|
self.invalidate()
|
|
|
|
@cached_property
|
|
def properties(self):
|
|
"""all zfs properties"""
|
|
self.debug("Getting zfs properties")
|
|
|
|
cmd=[
|
|
"zfs", "get", "-H", "-o", "property,value", "all", self.name
|
|
]
|
|
|
|
return(dict(self.zfs_node.run(tab_split=True, cmd=cmd, readonly=True, valid_exitcodes=[ 0 ])))
|
|
|
|
def is_changed(self):
|
|
"""dataset is changed since ANY latest snapshot ?"""
|
|
self.debug("Checking if dataset is changed")
|
|
|
|
if self.properties['written']=="0B" or self.properties['written']=="0":
|
|
return(False)
|
|
else:
|
|
return(True)
|
|
|
|
def is_ours(self):
|
|
"""return true if this snapshot is created by this backup_nanme"""
|
|
if re.match("^"+self.zfs_node.backup_name+"-[0-9]*$", self.snapshot_name):
|
|
return(True)
|
|
else:
|
|
return(False)
|
|
|
|
def from_names(self, names):
|
|
"""convert a list of names to a list ZfsDatasets for this zfs_node"""
|
|
ret=[]
|
|
for name in names:
|
|
ret.append(ZfsDataset(self.zfs_node, name))
|
|
|
|
return(ret)
|
|
|
|
@cached_property
|
|
def snapshots(self):
|
|
"""get all snapshots of this dataset"""
|
|
self.debug("Getting snapshots")
|
|
|
|
if not self.exists:
|
|
return([])
|
|
|
|
cmd=[
|
|
"zfs", "list", "-d", "1", "-r", "-t" ,"snapshot", "-H", "-o", "name", self.name
|
|
]
|
|
|
|
names=self.zfs_node.run(cmd=cmd, readonly=True)
|
|
return(self.from_names(names))
|
|
|
|
@property
|
|
def our_snapshots(self):
|
|
"""get list of snapshots creates by us of this dataset"""
|
|
ret=[]
|
|
for snapshot in self.snapshots:
|
|
if snapshot.is_ours():
|
|
ret.append(snapshot)
|
|
|
|
return(ret)
|
|
|
|
|
|
# def progressive_thinning(self, schedule):
|
|
# """cleanup snapshots by progressive thinning schedule"""
|
|
|
|
|
|
def find_snapshot(self, snapshot_name):
|
|
"""find snapshot by snapshot_name"""
|
|
for snapshot in self.our_snapshots:
|
|
if snapshot.snapshot_name==snapshot_name:
|
|
return(snapshot)
|
|
|
|
return(None)
|
|
|
|
|
|
@cached_property
|
|
def is_changed_ours(self):
|
|
"""dataset is changed since OUR latest snapshot?"""
|
|
|
|
self.debug("Checking if dataset is changed since our snapshot")
|
|
|
|
if not self.our_snapshots:
|
|
return(True)
|
|
|
|
latest_snapshot=self.snapshots[-1]
|
|
|
|
cmd=[ "zfs", "get","-H" ,"-ovalue", "written@"+str(latest_snapshot), self.name ]
|
|
output=self.zfs_node.run(readonly=True, tab_split=False, cmd=cmd, valid_exitcodes=[ 0 ])
|
|
if output[0]=="0B" or output[0]=="0":
|
|
return(False)
|
|
|
|
return(True)
|
|
|
|
@cached_property
|
|
def recursive_datasets(self, types="filesystem,volume"):
|
|
"""get all datasets recursively under us"""
|
|
|
|
self.debug("Getting all datasets under us")
|
|
|
|
names=self.zfs_node.run(tab_split=False, readonly=True, valid_exitcodes=[ 0 ], cmd=[
|
|
"zfs", "list", "-r", "-t", types, "-o", "name", "-H", self.name
|
|
])
|
|
|
|
return(self.from_names(names[1:]))
|
|
|
|
|
|
def send_pipe(self, prev_snapshot=None, resume=True, resume_token=None, show_progress=False):
|
|
"""returns a pipe with zfs send output for this snapshot
|
|
|
|
resume: Use resuming (both sides need to support it)
|
|
resume_token: resume sending from this token. (in that case we dont need to know snapshot names)
|
|
|
|
"""
|
|
#### build source command
|
|
cmd=[]
|
|
|
|
cmd.extend(["zfs", "send", ])
|
|
|
|
#all kind of performance options:
|
|
cmd.append("-L") # large block support
|
|
cmd.append("-e") # WRITE_EMBEDDED, more compact stream
|
|
cmd.append("-c") # use compressed WRITE records
|
|
if not resume:
|
|
cmd.append("-D") # dedupped stream, sends less duplicate data
|
|
|
|
#progress output
|
|
if show_progress:
|
|
cmd.append("-v")
|
|
cmd.append("-P")
|
|
|
|
|
|
#resume a previous send? (dont need more parameters in that case)
|
|
if resume_token:
|
|
cmd.extend([ "-t", resume_token ])
|
|
|
|
else:
|
|
#send properties
|
|
cmd.append("-p")
|
|
|
|
#incremental?
|
|
if prev_snapshot:
|
|
cmd.extend([ "-i", prev_snapshot.snapshot_name ])
|
|
|
|
cmd.append(self.name)
|
|
|
|
|
|
# if args.buffer and args.ssh_source!="local":
|
|
# cmd.append("|mbuffer -m {}".format(args.buffer))
|
|
|
|
#NOTE: this doenst start the send yet, it only returns a subprocess.Pipe
|
|
return(self.zfs_node.run(cmd, pipe=True))
|
|
|
|
|
|
def recv_pipe(self, pipe, resume=True):
|
|
"""starts a zfs recv on this dataset and uses pipe as input"""
|
|
#### build target command
|
|
cmd=[]
|
|
|
|
cmd.extend(["zfs", "recv"])
|
|
|
|
#dont mount filesystem that is received
|
|
cmd.append("-u")
|
|
|
|
# filter certain properties on receive (usefull for linux->freebsd in some cases)
|
|
# if args.filter_properties:
|
|
# for filter_property in args.filter_properties:
|
|
# cmd.extend([ "-x" , filter_property ])
|
|
|
|
#verbose output
|
|
cmd.append("-v")
|
|
|
|
if resume:
|
|
#support resuming
|
|
cmd.append("-s")
|
|
|
|
cmd.append(self.name)
|
|
|
|
self.zfs_node.run(cmd, input=pipe)
|
|
|
|
# if args.buffer and args.ssh_target!="local":
|
|
# cmd.append("|mbuffer -m {}".format(args.buffer))
|
|
|
|
|
|
def transfer_snapshot(self, target_dataset, prev_snapshot=None, resume=True, resume_token=None, show_progress=False):
|
|
"""transfer this snapshot to target_dataset. specify prev_snapshot for incremental transfer
|
|
|
|
connects a send_pipe() to recv_pipe()
|
|
"""
|
|
|
|
self.debug("Transfer snapshot to {}".format(target_dataset))
|
|
|
|
if resume_token:
|
|
target_dataset.verbose("resuming")
|
|
|
|
#initial or resume
|
|
if not prev_snapshot:
|
|
target_dataset.verbose("receiving @{} (new)".format(self.snapshot_name))
|
|
else:
|
|
#incemental
|
|
target_dataset.verbose("receiving @{}".format(self.snapshot_name))
|
|
|
|
#do it
|
|
pipe=self.send_pipe(resume=resume, show_progress=show_progress, resume_token=resume_token, prev_snapshot=prev_snapshot)
|
|
target_dataset.recv_pipe(pipe)
|
|
|
|
#update cache
|
|
target_dataset.snapshots.append(ZfsDataset(target_dataset.zfs_node, target_dataset.name+"@"+self.snapshot_name))
|
|
|
|
|
|
def sync_snapshots(self, target_dataset, show_progress=False):
|
|
"""sync our snapshots to target_dataset"""
|
|
|
|
# inital transfer
|
|
resume_token=None
|
|
if not target_dataset.exists:
|
|
self.debug("Sync snapshots: Initial transfer")
|
|
self.our_snapshots[0].transfer_snapshot(target_dataset, show_progress=show_progress)
|
|
else:
|
|
#filesystem exists, need to resume something?
|
|
if 'receive_resume_token' in target_dataset.properties:
|
|
self.debug("Sync snapshots: Found resume token")
|
|
resume_token=target_dataset.properties['receive_resume_token']
|
|
|
|
#resume initial transfer?
|
|
if len(target_dataset.our_snapshots)==0:
|
|
self.debug("Sync snapshots: Resuming inital transfer")
|
|
self.our_snapshots[0].transfer_snapshot(target_dataset, show_progress=show_progress, resume_token=resume_token)
|
|
resume_token=None
|
|
|
|
self.debug("Sync snapshots: Incremental transfer")
|
|
latest_common_snapshot=None
|
|
for source_snapshot in self.our_snapshots:
|
|
target_snapshot=target_dataset.find_snapshot(source_snapshot.snapshot_name)
|
|
#already transferred
|
|
if target_snapshot:
|
|
latest_common_snapshot=source_snapshot
|
|
else:
|
|
if latest_common_snapshot:
|
|
#transfer it
|
|
source_snapshot.transfer_snapshot(target_dataset, latest_common_snapshot, show_progress=True, resume_token=resume_token)
|
|
resume_token=None
|
|
latest_common_snapshot=source_snapshot
|
|
|
|
if not latest_common_snapshot:
|
|
raise(Exception("Cant find a common snapshot. (hint: zfs destroy {})".format(target_dataset)))
|
|
|
|
|
|
|
|
class ZfsNode(ExecuteNode):
|
|
"""a node that contains zfs datasets. implements global (systemwide/pool wide) zfs commands"""
|
|
|
|
def __init__(self, backup_name, zfs_autobackup, ssh_to=None, readonly=False, description="", debug_output=False):
|
|
self.backup_name=backup_name
|
|
if not description:
|
|
self.description=ssh_to
|
|
else:
|
|
self.description=description
|
|
|
|
self.zfs_autobackup=zfs_autobackup #for logging
|
|
|
|
ExecuteNode.__init__(self, ssh_to=ssh_to, readonly=readonly, debug_output=debug_output)
|
|
|
|
def verbose(self,txt):
|
|
self.zfs_autobackup.verbose("{} {}".format(self.description, txt))
|
|
|
|
def error(self,txt,titles=[]):
|
|
self.zfs_autobackup.error("{} {}".format(self.description, txt))
|
|
|
|
def debug(self,txt, titles=[]):
|
|
self.zfs_autobackup.debug("{} {}".format(self.description, txt))
|
|
|
|
def new_snapshotname(self):
|
|
"""determine uniq new snapshotname"""
|
|
return(self.backup_name+"-"+time.strftime("%Y%m%d%H%M%S"))
|
|
|
|
|
|
def consistent_snapshot(self, datasets, snapshot_name, allow_empty=True):
|
|
"""create a consistent (atomic) snapshot of specified datasets.
|
|
|
|
allow_empty: Allow empty snapshots. (compared to our latest snapshot)
|
|
"""
|
|
|
|
cmd=[ "zfs", "snapshot" ]
|
|
|
|
noop=True
|
|
for dataset in datasets:
|
|
if not allow_empty:
|
|
if not dataset.is_changed_ours:
|
|
dataset.verbose("No changes since {}".format(dataset.our_snapshots[-1].snapshot_name))
|
|
continue
|
|
|
|
snapshot=ZfsDataset(dataset.zfs_node, dataset.name+"@"+snapshot_name)
|
|
cmd.append(str(snapshot))
|
|
|
|
#add snapshot to cache (also usefull in testmode)
|
|
dataset.snapshots.append(snapshot)
|
|
|
|
noop=False
|
|
|
|
if noop:
|
|
self.verbose("No changes, not creating snapshot.")
|
|
else:
|
|
self.verbose("Creating snapshot {}".format(snapshot_name))
|
|
self.run(cmd, readonly=False)
|
|
|
|
|
|
@cached_property
|
|
def selected_datasets(self):
|
|
"""determine filesystems that should be backupped by looking at the special autobackup-property, systemwide
|
|
|
|
returns: list of ZfsDataset
|
|
"""
|
|
#get all source filesystems that have the backup property
|
|
lines=self.run(tab_split=True, readonly=True, cmd=[
|
|
"zfs", "get", "-t", "volume,filesystem", "-o", "name,value,source", "-s", "local,inherited", "-H", "autobackup:"+self.backup_name
|
|
])
|
|
|
|
#determine filesystems that should be actually backupped
|
|
selected_filesystems=[]
|
|
direct_filesystems=[]
|
|
for line in lines:
|
|
(name,value,source)=line
|
|
dataset=ZfsDataset(self, name)
|
|
|
|
if value=="false":
|
|
dataset.verbose("Ignored (disabled)")
|
|
|
|
else:
|
|
if source=="local" and ( value=="true" or value=="child"):
|
|
direct_filesystems.append(name)
|
|
|
|
if source=="local" and value=="true":
|
|
dataset.verbose("Selected (direct selection)")
|
|
selected_filesystems.append(dataset)
|
|
elif source.find("inherited from ")==0 and (value=="true" or value=="child"):
|
|
inherited_from=re.sub("^inherited from ", "", source)
|
|
if inherited_from in direct_filesystems:
|
|
selected_filesystems.append(dataset)
|
|
dataset.verbose("Selected (inherited selection)")
|
|
else:
|
|
dataset.verbose("Ignored (already a backup)")
|
|
else:
|
|
dataset.verbose("Ignored (only childs)")
|
|
|
|
return(selected_filesystems)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ZfsAutobackup:
|
|
"""main class"""
|
|
def __init__(self):
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='ZFS autobackup v3.0',
|
|
epilog='When a filesystem fails, zfs_backup will continue and report the number of failures at that end. Also the exit code will indicate the number of failures.')
|
|
parser.add_argument('--ssh-source', default=None, help='Source host to get backup from. (user@hostname) Default %(default)s.')
|
|
parser.add_argument('--ssh-target', default=None, help='Target host to push backup to. (user@hostname) Default %(default)s.')
|
|
parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.')
|
|
parser.add_argument('--keep-target', type=int, default=30, help='Number of days to keep old snapshots on target. Default %(default)s.')
|
|
parser.add_argument('backup_name', help='Name of the backup (you should set the zfs property "autobackup:backup-name" to true on filesystems you want to backup')
|
|
parser.add_argument('target_path', help='Target ZFS filesystem')
|
|
|
|
parser.add_argument('--no-snapshot', action='store_true', help='dont create new snapshot (usefull for finishing uncompleted backups, or cleanups)')
|
|
parser.add_argument('--no-send', action='store_true', help='dont send snapshots (usefull to only do a cleanup)')
|
|
parser.add_argument('--allow-empty', action='store_true', help='if nothing has changed, still create empty snapshots.')
|
|
parser.add_argument('--ignore-replicated', action='store_true', help='Ignore datasets that seem to be replicated some other way. (No changes since lastest snapshot. Usefull for proxmox HA replication)')
|
|
parser.add_argument('--no-holds', action='store_true', help='Dont lock snapshots on the source. (Usefull to allow proxmox HA replication to switches nodes)')
|
|
parser.add_argument('--ignore-new', action='store_true', help='Ignore filesystem if there are already newer snapshots for it on the target (use with caution)')
|
|
|
|
parser.add_argument('--resume', action='store_true', help='support resuming of interrupted transfers by using the zfs extensible_dataset feature (both zpools should have it enabled) Disadvantage is that you need to use zfs recv -A if another snapshot is created on the target during a receive. Otherwise it will keep failing.')
|
|
parser.add_argument('--strip-path', default=0, type=int, help='number of directory to strip from path (use 1 when cloning zones between 2 SmartOS machines)')
|
|
parser.add_argument('--buffer', default="", help='Use mbuffer with specified size to speedup zfs transfer. (e.g. --buffer 1G) Will also show nice progress output.')
|
|
|
|
|
|
# parser.add_argument('--destroy-stale', action='store_true', help='Destroy stale backups that have no more snapshots. Be sure to verify the output before using this! ')
|
|
parser.add_argument('--properties', default=None, help='Comma seperated list of zfs properties that should be synced to target. (Quotas are always disabled temporarily)')
|
|
parser.add_argument('--rollback', action='store_true', help='Rollback changes on the target before starting a backup. (normally you can prevent changes by setting the readonly property on the target_path to on)')
|
|
parser.add_argument('--ignore-transfer-errors', action='store_true', help='Ignore transfer errors (still checks if received filesystem exists. usefull for acltype errors)')
|
|
|
|
|
|
parser.add_argument('--test', action='store_true', help='dont change anything, just show what would be done (still does all read-only operations)')
|
|
parser.add_argument('--verbose', action='store_true', help='verbose output')
|
|
parser.add_argument('--debug', action='store_true', help='Show zfs commands that are executed.')
|
|
parser.add_argument('--debug-output', action='store_true', help='Show zfs commands and their output/exit codes. (noisy)')
|
|
parser.add_argument('--progress', action='store_true', help='show zfs progress output (to stderr)')
|
|
|
|
#note args is the only global variable we use, since its a global readonly setting anyway
|
|
args = parser.parse_args()
|
|
|
|
self.args=args
|
|
|
|
if args.debug_output:
|
|
args.debug=True
|
|
|
|
self.log=Log(show_debug=self.args.debug, show_verbose=self.args.verbose)
|
|
|
|
|
|
def verbose(self,txt,titles=[]):
|
|
self.log.verbose(txt)
|
|
|
|
def error(self,txt,titles=[]):
|
|
self.log.error(txt)
|
|
|
|
def debug(self,txt, titles=[]):
|
|
self.log.debug(txt)
|
|
|
|
def set_title(self, title):
|
|
self.log.verbose("")
|
|
self.log.verbose("#### "+title)
|
|
|
|
def run(self):
|
|
description="[Source]"
|
|
source_node=ZfsNode(self.args.backup_name, self, ssh_to=self.args.ssh_source, readonly=self.args.test, debug_output=self.args.debug_output, description=description)
|
|
|
|
description="[Target]"
|
|
target_node=ZfsNode(self.args.backup_name, self, ssh_to=self.args.ssh_target, readonly=self.args.test, debug_output=self.args.debug_output, description=description)
|
|
# target_node.run(["/root/outputtest"], readonly=True)
|
|
|
|
self.set_title("Selecting")
|
|
source_datasets=source_node.selected_datasets
|
|
if not source_datasets:
|
|
abort("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(self.args.backup_name, self.args.ssh_source))
|
|
|
|
if not self.args.no_snapshot:
|
|
self.set_title("Snapshotting")
|
|
source_node.consistent_snapshot(source_datasets, source_node.new_snapshotname(), allow_empty=self.args.allow_empty)
|
|
|
|
|
|
self.set_title("Transferring")
|
|
|
|
for source_dataset in source_datasets:
|
|
|
|
try:
|
|
if self.args.ignore_replicated and not source_dataset.is_changed():
|
|
source_dataset.verbose("Already replicated")
|
|
else:
|
|
#determine corresponding target_dataset
|
|
target_name=self.args.target_path + "/" + source_dataset.lstrip_path(self.args.strip_path)
|
|
target_dataset=ZfsDataset(target_node, target_name)
|
|
|
|
#ensure parents exists
|
|
if not target_dataset.parent.exists:
|
|
target_dataset.parent.create_filesystem(parents=True)
|
|
|
|
source_dataset.sync_snapshots(target_dataset, show_progress=self.args.progress)
|
|
except Exception as e:
|
|
source_dataset.error(str(e))
|
|
if self.args.debug:
|
|
raise
|
|
|
|
|
|
times=[]
|
|
|
|
|
|
time_blocks={
|
|
'years' : 3600 * 24 * 365.25,
|
|
'months' : 3600 * 24 * 30,
|
|
'weeks' : 3600 * 24 * 7,
|
|
'days' : 3600 * 24,
|
|
'hours' : 3600,
|
|
'minutes' : 60,
|
|
}
|
|
|
|
|
|
|
|
now=int(time.time())
|
|
|
|
def thin(schedule, snapshots):
|
|
if len(snapshots)==0:
|
|
return(snapshots)
|
|
|
|
ret=[]
|
|
|
|
time_blocks={}
|
|
|
|
for ( period, ttl ) in schedule:
|
|
time_blocks[period]={}
|
|
|
|
# for snapshot in list(reversed(snapshots)):
|
|
#always keep latest
|
|
for snapshot in snapshots[:-1]:
|
|
|
|
snapshot_time=snapshot
|
|
age=now-snapshot_time
|
|
|
|
keeps=""
|
|
for ( period, ttl ) in schedule:
|
|
block_nr=int(snapshot_time/period)
|
|
if age<=ttl:
|
|
if not block_nr in time_blocks[period]:
|
|
time_blocks[period][block_nr]=snapshot_time
|
|
keeps=keeps+" ({}days, block{}) ".format(int(period/(3600*24)), block_nr)
|
|
|
|
struct=time.localtime(snapshot_time)
|
|
if keeps:
|
|
ret.append(snapshot)
|
|
print("{} {} {}days".format(time.strftime("%Y-%m-%d %H:%M:%S",struct),keeps,int(age/(3600*24))))
|
|
# else:
|
|
# print("{}".format(time.strftime("%Y-%m-%d %H:%M:%S",struct)))
|
|
|
|
|
|
|
|
# return(list(reversed(ret)))
|
|
|
|
#always keep latest!
|
|
# if not keeps and snapshots:
|
|
# ret.append(snapshots[:-1])
|
|
|
|
ret.append(snapshots[-1])
|
|
struct=time.localtime(snapshots[-1])
|
|
print("{}".format(time.strftime("%Y-%m-%d %H:%M:%S",struct)))
|
|
return(ret)
|
|
|
|
# snapshots=range(now-400*24*3600, now, 24*3600)
|
|
|
|
schedule=[
|
|
#every ... keep for ...
|
|
( 1*time_blocks['days'] , 7 * time_blocks['days'] ),
|
|
( 1*time_blocks['weeks'] , 4 * time_blocks['weeks'] ),
|
|
( 1*time_blocks['months'], (12 * time_blocks['months']) ),
|
|
( 1*time_blocks['years'], 2 * time_blocks['years'] ),
|
|
|
|
]
|
|
|
|
|
|
|
|
import random
|
|
msnapshots=[]
|
|
|
|
while True:
|
|
print("#################### {}".format(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(now))))
|
|
|
|
# if random.random()>0.5:
|
|
msnapshots.append(now)
|
|
msnapshots=thin(schedule, msnapshots)
|
|
|
|
sys.stdin.readline()
|
|
now=now+random.randint(0,80000)
|
|
# msnapshots.insert(0,now)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
#
|
|
#
|
|
# now=time.time()
|
|
# time_str=re.findall("^.*-([0-9]*)$", snapshot)[0]
|
|
# if len(time_str)==14:
|
|
# #new format:
|
|
# time_secs=time.mktime(time.strptime(time_str,"%Y%m%d%H%M%S"))
|
|
#
|
|
#
|
|
# zfs_autobackup=ZfsAutobackup()
|
|
# zfs_autobackup.run()
|