Commit fce7c7c7 authored by Mike Hibler's avatar Mike Hibler

Implement an "on server" strategy for copying persistent datasets.

This is implemented as a variant of createdataset. If you do:

    createdataset -F pid/old pid/new

It will create a new dataset, initializing it with the contents of old.
The new dataset will of course have the same size, type, and filesystem type
(if any). Right now the old and new both have to be in the same project, and
new gets placed in the same pool on the same server (i.e., this is a local
"zfs send | zfs recv" pipeline).

Implementing copy as a variant of create will hopefully make it easy for
Leigh in the portal interface as he doesn't have to treat it any different
than a normal create: fire it off in the background and wait til the lease
state becomes "valid".

Since a copy could takes hours or even days, there are plenty of opportunities
for failure that I have not considered too much yet, e.g., the storage server
rebooting in the middle or boss rebooting in the middle. These things could
happen already, but we have just made the window of opportunity much larger.

Anyway, this mechanism can serve as the basis for creating persistent datasets
from clones or other ephemeral datasets.
parent 3aab6343
#!/usr/bin/perl -wT
#
# Copyright (c) 2013-2018 University of Utah and the Flux Group.
# Copyright (c) 2013-2019 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -43,6 +43,8 @@ sub usage()
print STDERR " Create a snapshot of <pool>/<vol> with timestamp <tstamp>\n";
print STDERR " clone <pool> <ovol> <nvol> [ <tstamp> ]\n";
print STDERR " Create a clone of <pool>/<vol> called <nvol> from the snapshot at <tstamp> (most recent if not specified)\n";
print STDERR " copy <pool> <ovol> <nvol>\n";
print STDERR " Create a copy of <pool>/<vol> called <nvol>\n";
print STDERR " destroy <pool> <vol>\n";
print STDERR " Destroy <vol> in <pool>\n";
print STDERR " desnapshot <pool> <vol> [ <tstamp> ]\n";
......@@ -94,6 +96,7 @@ my %cmds = (
"targets" => \&targets,
"assocs" => \&assocs,
"desnapshotall" => \&desnapshotall,
"copy" => \&copy,
);
#
......@@ -495,3 +498,48 @@ sub declone($$$)
return freenasVolumeDeclone($pool, $vol, 1);
}
#
# Create a deep copy of a dataset using zend/zrecv.
#
sub copy($$$)
{
my ($pool,$ovol,$nvol) = @_;
if (defined($pool) && $pool =~ /^([-\w]+)$/) {
$pool = $1;
} else {
print STDERR "bscontrol_proxy: bogus pool arg\n";
return 1;
}
if (defined($ovol) && $ovol =~ /^([-\w]+)$/) {
$ovol = $1;
} else {
print STDERR "bscontrol_proxy: bogus origin volume arg\n";
return 1;
}
if (defined($nvol) && $nvol =~ /^([-\w]+)$/) {
$nvol = $1;
} else {
print STDERR "bscontrol_proxy: bogus clone volume arg\n";
return 1;
}
return freenasVolumeCopy($pool, $ovol, $nvol, 1);
}
#
# Report the progress of a copy.
#
# On boss, dataset copy is in progress (or did not complete) if lease
# "copyfrom" attribute is set. It is still in progress if a webtask
# exists?
#
# From the blockstore server perspecive, a copy is in progress (or did
# not complete) if the "receive_resume_token" property is set on zfs dataset.
# It is still in progress if send/recv processes exist. We should write some
# state to disk (a "pid file") to make this detection easier.
#
# The "referenced" attribute tells how much data has been copied, ala:
# zfs get -Hp referenced persist-1/lease-200
#
......@@ -75,6 +75,7 @@ use Exporter;
freenasVolumeCreate freenasVolumeDestroy freenasFSCreate
freenasVolumeSnapshot freenasVolumeClone
freenasVolumeDesnapshot freenasVolumeDeclone
freenasVolumeCopy
freenasParseListing freenasRequest
freenasLock freenasUnlock
$FREENAS_API_RESOURCE_IFACE $FREENAS_API_RESOURCE_IST_EXTENT
......@@ -170,6 +171,7 @@ sub freenasVolumeSnapshot($$;$$);
sub freenasVolumeDesnapshot($$;$$$);
sub freenasVolumeClone($$$;$$);
sub freenasVolumeDeclone($$;$);
sub freenasVolumeCopy($$$;$);
#
# Local Functions
......@@ -932,6 +934,140 @@ sub freenasVolumeClone($$$;$$)
return 0;
}
#
# Create a deep copy of a ZFS volume:
# - Snapshot the source volume
# - Start a resumeable zsend/zrecv pipeline
# - Do the copy
# - Remove the snapshot (in both old and new volumes)
#
# zfs snapshot persist-1/lease-546@copy
# zfs send -R persist-1/lease-546@copy | zfs recv -Fs persist-1/lease-546-new
# zfs destroy persist-1/lease-546@copy
# zfs destroy persist-1/lease-546-new@copy
#
# If the send or recv are interrupted, the target volume will have a
# receive_resume_token attribute that can be used to continue the copy:
#
# zfs send -t <token> | zfs recv -s persist-1/lease-546-new
#
sub freenasVolumeCopy($$$;$)
{
my ($pool, $ovolname, $nvolname, $dolock) = @_;
# Untaint arguments that are passed to a command execution
$pool = untaintHostname($pool);
$ovolname = untaintHostname($ovolname);
$nvolname = untaintHostname($nvolname);
if (!$pool || !$ovolname || !$nvolname) {
warn("*** ERROR: freenasVolumeCopy: ".
"Invalid arguments");
return -1;
}
$dolock = 1
if (!defined($dolock));
freenasLock()
if ($dolock);
# Get volume and snapshot info
my $vollist = freenasVolumeList(0, 2);
# The source volume must exist
my $vref = $vollist->{$ovolname};
if (!$vref || $vref->{'pool'} ne $pool) {
warn("*** ERROR: freenasVolumeSnapshot: ".
"Source volume '$ovolname' does not exist in pool '$pool'");
freenasUnlock()
if ($dolock);
return -1;
}
# The destination volume must NOT exist
my $nvref = $vollist->{$nvolname};
if ($nvref && $nvref->{'pool'} eq $pool) {
warn("*** ERROR: freenasVolumeCopy: ".
"Destination volume '$nvolname' already exists in pool '$pool'");
freenasUnlock()
if ($dolock);
return -1;
}
# The snapshot must not exist
my $sname = "C" . time();
my $snapshot = "$ovolname\@$sname";
if (exists($vref->{'snapshots'})) {
my @snaps = split(',', $vref->{'snapshots'});
foreach my $sname (@snaps) {
if ($snapshot eq $sname) {
warn("*** ERROR: freenasVolumeCopy: ".
"Source snapshot '$snapshot' already exists");
freenasUnlock()
if ($dolock);
return -1;
}
}
}
# Create the snapshot
my $res = freenasRequest($FREENAS_API_RESOURCE_SNAPSHOT, "POST", undef,
{"dataset" => "$pool/$ovolname",
"name" => "$sname"});
if (!$res) {
warn("*** ERROR: freenasVolumeSnapshot: could not create snapshot");
freenasUnlock()
if ($dolock);
return -1;
}
freenasUnlock()
if ($dolock);
#
# Do the send/recv pipeline.
# This could take a really, really long time so we leave things unlocked
# while we do it.
#
TBDebugTimeStampWithDate("freenasVolumeCopy: starting send/recv")
if ($debug);
if (system("$ZFS_CMD send -R $pool/$snapshot | $ZFS_CMD recv -Fs $pool/$nvolname")) {
TBDebugTimeStampWithDate("freenasVolumeCopy: send/recv FAILED")
if ($debug);
warn("*** ERROR: ".
"'$ZFS_CMD send -R $pool/$snapshot | $ZFS_CMD recv -Fs $pool/$nvolname' ".
"failed, may be able to finish with:\n".
"'$ZFS_CMD send -t <token> | $ZFS_CMD recv -s $nvolname'\n");
return -1;
}
TBDebugTimeStampWithDate("freenasVolumeCopy: finished send/recv")
if ($debug);
freenasLock()
if ($dolock);
# Remove the snapshot in both the original and copy datasets
my $msg;
my $resource = "$FREENAS_API_RESOURCE_SNAPSHOT/${pool}\%2F${snapshot}";
$res = freenasRequest($resource, "DELETE", undef, undef, undef, \$msg);
if (!$res) {
warn("*** WARNING: freenasVolumeCopy: ".
"delete of $snapshot failed:\n$msg");
}
$snapshot = "$nvolname\@$sname";
$resource = "$FREENAS_API_RESOURCE_SNAPSHOT/${pool}\%2F${snapshot}";
$res = freenasRequest($resource, "DELETE", undef, undef, undef, \$msg);
if (!$res) {
warn("*** WARNING: freenasVolumeCopy: ".
"delete of $snapshot failed:\n$msg");
}
freenasUnlock()
if ($dolock);
return 0;
}
sub freenasVolumeDeclone($$;$)
{
my ($pool, $volname, $dolock) = @_;
......
#!/usr/bin/perl -wT
#
# Copyright (c) 2012-2018 University of Utah and the Flux Group.
# Copyright (c) 2012-2019 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -651,12 +651,20 @@ sub AllocResources($;$$) {
$fstype = "";
}
#
# If there is a "copyfrom" attribute then we are creating a copy
# of an existing blockstore.
#
my $srcbs = $self->GetAttribute("copyfrom");
#
# XXX hack, hack
# XXX this doesn't belong here
#
if ($fstype) {
print STDERR "NOTE: FS creation could take 5 minutes or longer, ";
if ($fstype || $srcbs) {
print STDERR "NOTE: " .
($srcbs ? "Dataset copy" : "FS creation") .
" could take 5 minutes or longer, ";
if ($interruptable) {
print STDERR "please be patient!\n";
} else {
......@@ -670,7 +678,10 @@ sub AllocResources($;$$) {
# on the storage servers).
#
my $idx = $self->lease_idx();
my $cmd = "$BSCONTROL -l $idx -s $size $fstype create lease-$idx";
my $cmd = defined($srcbs) ?
"$BSCONTROL copy $srcbs lease-$idx" :
"$BSCONTROL -l $idx -s $size $fstype create lease-$idx";
my $rv;
if (!$interruptable) {
local $SIG{INT} = "IGNORE";
......@@ -703,6 +714,11 @@ sub AllocResources($;$$) {
$self->SetAttribute("last_snapshot", $tstamp, "integer");
}
}
# Clear the "copyfrom" attribute as an indicator we are done
if (defined($srcbs)) {
$self->DeleteAttribute("copyfrom");
}
}
# It all worked!
......
This diff is collapsed.
#!/usr/bin/perl -w
#
# Copyright (c) 2013-2017 University of Utah and the Flux Group.
# Copyright (c) 2013-2019 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -27,6 +27,7 @@ use Getopt::Std;
use Date::Parse;
use File::Temp qw(tempfile);
use CGI;
use Data::Dumper;
#
# Create a new dataset.
......@@ -48,13 +49,18 @@ sub usage()
print STDERR " -b Allocate resources in the background\n";
print STDERR " name Name (in the form <pid>/<id> or <pid>/<gid>/<id>)\n";
print STDERR "\n";
print STDERR "Usage: createdataset [-F srcname] [-e expiration] name\n";
print STDERR "Copy a persistent dataset to a new one of the same type. Options:\n";
print STDERR " -F srcname Source dataset to copy\n";
print STDERR " name Name (in the form <pid>/<id> or <pid>/<gid>/<id>)\n";
print STDERR "\n";
print STDERR "Usage: createdataset -t imdataset [-I node,bsname] name\n";
print STDERR "Create a dataset image. Options:\n";
print STDERR " -I node,bsname Take an immediate snapshot of a local blockstore on a node to populate the image.\n";
print STDERR " name Name (in the form <pid>/<id> or <pid>/<gid>/<id>)\n";
exit(-1);
}
my $optlist = "dhUo:s:t:e:a:f:bCR:W:I:T:";
my $optlist = "dhUo:s:t:e:a:f:bCR:W:I:T:F:";
my $debug = 0;
my $background = 0;
my $pid;
......@@ -75,6 +81,8 @@ my $needapproval = 0;
my $unapproved_reason;
my $webtask_id;
my $webtask;
my $srcds;
my $srcdsname;
my $qprefix = "global_";
my $quota;
......@@ -89,6 +97,7 @@ my %descrip = (
# Protos
sub fatal($);
sub HandleIMDataset();
sub parseleasename($);
#
# Configure variables
......@@ -215,30 +224,24 @@ if (defined($options{"T"})) {
}
$webtask->AutoStore(1);
}
if (defined($options{"F"})) {
$srcdsname = $options{"F"};
if (defined($options{t}) || defined($options{s}) ||
defined($options{f}) || defined($options{a})) {
fatal("Cannot specify type/size/fstype/attributes when copying");
}
}
if (@ARGV != 1) {
print STDERR "Must specify dataset name\n";
usage();
}
if ($dstype ne "imdataset" && !$size) {
if ($dstype ne "imdataset" && !$srcdsname && !$size) {
print STDERR "Must specify dataset size\n";
usage();
}
# name must include a project
$lname = $ARGV[0];
if ($lname =~ /^([-\w]+)\/([-\w]+)$/) {
$pid = $gid = $1;
$lname = $2;
}
elsif ($lname =~ /^([-\w]+)\/([-\w]+)\/([-\w]+)$/) {
$pid = $1;
$gid = $2;
$lname = $3;
}
else {
fatal("Dataset name $lname not in the form <pid>/<lname>.");
}
($pid,$gid,$lname) = parseleasename($ARGV[0]);
my $this_user = User->ThisUser();
if (! defined($this_user)) {
......@@ -278,6 +281,46 @@ if (!TBAdmin() &&
fatal("Must have local_root privileges in $pid");
}
#
# When copying a lease, make sure they have access to the source lease.
#
if ($srcdsname) {
my ($spid, $sgid, $sname) = parseleasename($srcdsname);
# XXX only allow admins to copy across projects right now
if (!TBAdmin() && ($spid ne $pid || $sgid ne $gid)) {
fatal("Can only copy leases within a project right now");
}
$srcds = Lease->Lookup($spid, $sgid, $sname);
if (!TBAdmin() && !$srcds->AccessCheck($this_user, LEASE_ACCESS_READ())) {
fatal("$srcdsname: you are not allow to copy lease.");
}
# XXX only allow copy of valid leases right now .
if ($srcds->state ne "valid") {
fatal("$srcdsname: lease is not in 'valid' state.");
}
# keep most of the attributes from the source
$dstype = $srcds->type();
my $sattrs = $srcds->GetAttributes();
foreach my $a (keys %$sattrs) {
# XXX do not copy URNs as they will be invalid in the copy
if ($a !~ /_urn$/) {
$attrs{$a} = $sattrs->{$a}->{'value'};
}
}
$size = $attrs{'size'};
$fstype = $attrs{'fstype'};
delete $attrs{'size'};
delete $attrs{'fstype'};
$attrstr = "";
# we want the internal name later
$srcdsname = "lease-" . $srcds->lease_idx();
}
#
# Check type: currently only two defined.
#
......@@ -399,6 +442,13 @@ if ($fstype) {
$attrs{'fstype'} = $fstype;
}
#
# If this is a copy, add an attribute for the source dataset.
#
if ($srcdsname) {
$attrs{'copyfrom'} = $srcdsname;
}
#
# Check name: lease with this name must not already exist.
#
......@@ -477,6 +527,7 @@ if ($vars->{"usequotas"}) {
$unapproved_reason =
"Allocation of $size would put $pid over quota ($qsize).";
if (!$alwayscreate) {
print STDERR "$unapproved_reason\n";
print STDERR
"Try again with '-U' to request special approval by testbed-ops.\n"
if (!$background);
......@@ -530,7 +581,8 @@ if (defined($write_access)) {
#
# Approve the lease unless otherwise told not to.
# This can take a long time so we lock it to avoid races with others.
# This can take a long time (due to resource allocation) so we lock it to
# avoid races with others.
#
if ($approveme) {
my $logname;
......@@ -545,7 +597,7 @@ if ($approveme) {
}
if ($background) {
print "Resource allocation proceeding the background ...\n";
print "Resource allocation proceeding in the background ...\n";
$logname = TBMakeLogname("createdataset");
if (my $childpid = TBBackGround($logname)) {
......@@ -622,6 +674,31 @@ if (!$approveme) {
exit(0);
#
# Parse a string as a lease name.
# Returns (pid,gid,name) on success, exits otherwise.
#
sub parseleasename($)
{
my ($n) = @_;
my ($p, $g);
# name must include a project
if ($n =~ /^([-\w]+)\/([-\w]+)$/) {
$p = $g = $1;
$n = $2;
}
elsif ($n =~ /^([-\w]+)\/([-\w]+)\/([-\w]+)$/) {
$p = $1;
$g = $2;
$n = $3;
}
else {
fatal("Dataset name $n not in the form <pid>/<name> or <pid>/<gid>/<name>.");
}
return ($p, $g, $n);
}
#
# Image backed datasets. Basically create an image and optionally take
# a snapshot from the BSname.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment