Commit 7aefdaa1 authored by Mike Hibler's avatar Mike Hibler

Implement "plan 1" for dataset sharing: "ephemeral RO snapshots".

You can now simultaneously RW and RO map a dataset because all the RO
mappings use copies (clones) of a snapshot. Only a single RW mapping
of course.

When the RW mapping swaps out it automatically creates a new snapshot.
So there is currently no user control over when a version of the dataset
is "published", it just happens everytime you swapout an experiment with
a RW mapping.

A new RW mapping does not affect current RO mappings of course as they
continue to use whatever snapshot they were created with. New RO mappings
with get the most recent snapshot, which we currently track in the DB via
the per-lease attribute "last_snapshot".

You can also now declare a lease to be "exclusive use" by setting the
"exclusive_use" lease attribute (via modlease). This means that it follows
the old semantics of only one mapping at a time, whether it be RO or RW.
This is an alternative to the "simultaneous_ro_datasets" sitevar which
enforces the old behavior globally. Primarily, I put this attribute in to
prevent an unexpected failure in the snapshot/clone path from wreaking
havoc over time. I don't know if there is any value in exposing this to
the user.
parent 944ef906
......@@ -390,13 +390,26 @@ sub freenasVolumeDesnapshot($$;$)
if ($@ =~ /has dependent clones/) {
warn("*** WARNING: freenasVolumeDesnapshot: ".
"snapshot '$sname' in use");
#
# XXX only return an error for this case if we are
# removing a specific snapshot. Otherwise, it causes
# too much drama up the line for something that is
# "normal" (i.e., we are attempting to remove all
# snapshots and some of them are in use).
#
if ($tstamp) {
$rv = -1;
}
} else {
my $msg = " $@";
$msg =~ s/\\n/\n /g;
warn("*** ERROR: freenasVolumeDesnapshot: ".
"'del $pool/$snapshot' failed:\n$msg");
# if it isn't an "in use" error, we really do fail
$rv = -1;
}
$rv = -1;
}
}
}
......
......@@ -714,9 +714,17 @@ sub allocSlice($$$$) {
return -1;
}
# For possible later cloning, remember if it has snapshots
# For possible later cloning, find the highest numbered snapshot
if (exists($vref->{'snapshots'})) {
$priv->{'hassnapshot'} = 1;
my $lastsnap = 0;
foreach my $snap (split(',', $vref->{'snapshots'})) {
if ($snap =~ /@(\d+)$/ && $1 > $lastsnap) {
$lastsnap = $1;
}
}
if ($lastsnap) {
$priv->{'lastsnapshot'} = $lastsnap;
}
}
$priv->{'pool'} = $vref->{'pool'};
......@@ -814,13 +822,18 @@ sub exportSlice($$$$) {
# That does not matter right now, but something to watch out for.
#
my $tstamp;
if (!exists($priv->{'hassnapshot'})) {
if (!exists($priv->{'lastsnapshot'})) {
# XXX this will be an error
warn("*** WARNING: blockstore_exportSlice: $volname: ".
"no snapshot found; created one for now");
$tstamp = time();
if (freenasVolumeSnapshot($pool, $volume, $tstamp)) {
warn("*** ERROR: blockstore_exportSlice: $volname: ".
"Could not create snapshot for RO mapping");
return -1;
}
} else {
$tstamp = $priv->{'lastsnapshot'};
}
#
......@@ -1427,12 +1440,15 @@ sub deallocSlice($$$$) {
# Check for clone volumes. A clone will have our (vnode_id)
# name and be a "cloneof" a snapshot of this lease.
#
# N.B. we now call Destroy rather than Declone, leaving our
# caller responsible for cleaning up snapshots.
#
if (exists($volumes->{$vnode_id})) {
my $vref = $volumes->{$vnode_id};
my $pool = $vref->{'pool'};
if (exists($vref->{'cloneof'}) &&
$vref->{'cloneof'} =~ /^$bsid\@\d+/) {
return freenasVolumeDeclone($pool, $vnode_id);
return freenasVolumeDestroy($pool, $vnode_id);
}
warn("*** WARNING: blockstore_deallocSlice: $volname: ".
"Found stale ephemeral volume '$pool/$vnode_id'");
......
#!/usr/bin/perl -wT
#
# Copyright (c) 2012-2014 University of Utah and the Flux Group.
# Copyright (c) 2012-2015 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -841,6 +841,9 @@ sub Release($)
# last used time since this represents an unmapping (aka swapout)
# of the lease.
#
# XXX currently, we also create a new snapshot of the blockstore
# if the blockstore is marked as "multiuse".
#
$query_result =
DBQueryWarn("select lease_idx from blockstores ".
"where bsidx='$bsidx'");
......@@ -849,8 +852,16 @@ sub Release($)
my ($lidx) = $query_result->fetchrow_array();
my $lease = Lease->Lookup($lidx);
$lease->BumpLastUsed()
if ($lease);
if ($lease) {
$lease->BumpLastUsed();
if (!$lease->IsExclusiveUse() &&
$lease->CreateResourceSnapshot(1)) {
print STDERR "Blockstore->Release: ".
"Could not create snapshot for $bsidx ($lease); ".
"marking as exclusive-use\n";
$lease->SetExclusiveUse();
}
}
}
return 0;
......
......@@ -494,6 +494,32 @@ sub InUse($) {
return ((@$lref > 0) ? 1 : 0);
}
sub IsExclusiveUse($) {
my ($self) = @_;
# Every lease is exclusive use unless sitevar is set non-zero
if (!TBSiteVarExists("storage/simultaneous_ro_datasets") ||
TBGetSiteVar("storage/simultaneous_ro_datasets") == 0) {
return 1;
}
# Otherwise, no lease is exclusive use unless attribute is set non-zero
my $rv = $self->GetAttribute("exclusive_use");
return $rv ? 1 : 0;
}
sub SetExclusiveUse($) {
my ($self) = @_;
return $self->SetAttribute("exclusive_use", 1, "integer");
}
sub ClearExclusiveUse($) {
my ($self) = @_;
return $self->SetAttribute("exclusive_use", 0, "integer");
}
#
# Returns a list of blockstore reservations that are currently using the
# resources associated with this lease. XXX: this is a blockstore-specific
......@@ -672,6 +698,131 @@ sub DeallocResources($) {
return 0;
}
sub CreateResourceSnapshot($$) {
my ($self,$exclusive) = @_;
return LEASE_ERROR_FAILED()
if (!ref($self));
#
# If lease is in the unapproved state, assume there is nothing to do.
#
if ($self->state() eq "unapproved") {
return 0;
}
my $tstamp = time();
#
# For a dataset lease, we call over to the server to take a snapshot
# XXX this should be in Lease::Blockstore object!
#
if ($self->type() =~ /dataset$/) {
my $idx = $self->lease_idx();
my $sarg = "";
#
# For efficiency, lookup the server in the blockstores table.
# Saves gathering info from every storage server.
#
my $bstore = Blockstore->LookupByLease($idx);
if ($bstore) {
$sarg = "-S " . $bstore->node_id();
}
#
# If we want an "exclusive" snapshot, we remove all others.
# We don't return any errors, this is just best effort.
#
if (system("$BSCONTROL $sarg desnapshot lease-$idx")) {
print STDERR "$self: CreateResourceSnapshot: ".
"WARNING! Could not remove all old storage snapshots\n";
}
#
# Call the blockstore control program to handle all things blockstore
# related (e.g., the actual deallocation of storage on the servers).
#
if (system("$BSCONTROL $sarg snapshot lease-$idx $tstamp")) {
print STDERR "$self: CreateResourceSnapshot: ".
"Could not snapshot storage.\n";
return LEASE_ERROR_FAILED();
}
}
$self->SetAttribute("last_snapshot", $tstamp, "integer");
return 0;
}
sub DestroyResourceSnapshot($$) {
my ($self,$tstamp) = @_;
return LEASE_ERROR_FAILED()
if (!ref($self));
#
# If lease is in the unapproved state, assume there is nothing to do.
#
if ($self->state() eq "unapproved") {
return 0;
}
#
# For a dataset lease, we call over to the server to take a snapshot
# XXX this should be in Lease::Blockstore object!
#
if ($self->type() =~ /dataset$/) {
my $idx = $self->lease_idx();
my $sarg = "";
#
# For efficiency, lookup the server in the blockstores table.
# Saves gathering info from every storage server.
#
my $bstore = Blockstore->LookupByLease($idx);
if ($bstore) {
$sarg = "-S " . $bstore->node_id();
}
# If tstamp is not set, then clear all snapshots
$tstamp = ""
if (!defined($tstamp));
#
# Call the blockstore control program to handle all things blockstore
# related (e.g., the actual deallocation of storage on the servers).
#
# Note that we only fail if bscontrol fails to remove a specific
# snapshot. Otherwise it may have failed due to snapshots that
# were still in use.
#
if (system("$BSCONTROL $sarg desnapshot lease-$idx $tstamp") &&
$tstamp ne "") {
print STDERR
"$self: DestroySnapshot: could not remove storage snapshot(s).\n";
return LEASE_ERROR_FAILED();
}
}
$self->DeleteAttribute("last_snapshot");
return 0;
}
sub LastResourceSnapshot($) {
my ($self) = @_;
# XXX we don't call over to the server to get a list--too expensive
my $tstamp = $self->GetAttribute("last_snapshot");
return (defined($tstamp) ? int($tstamp) : 0);
}
sub HasResourceSnapshot($) {
my ($self) = @_;
return ($self->LastResourceSnapshot() != 0);
}
#
# Extend (renew) a lease by the indicated amount.
# Also increments the renewal count and transitions the lease back into
......@@ -812,7 +963,7 @@ sub AllLeases($;$)
if (_validLeaseType($type)) {
$tclause = "where type='$type'";
} else {
print STDERR "Lease->AllLeases(): Invalid lease type: $type\n";
print STDERR "Lease->AllLeases: Invalid lease type: $type\n";
return undef;
}
}
......@@ -858,7 +1009,7 @@ sub AllProjectLeases($$;$)
if (_validLeaseType($type)) {
$tclause = "and type='$type'";
} else {
print STDERR "Lease->AllProjectLeases(): Invalid lease type: $type\n";
print STDERR "Lease->AllProjectLeases: Invalid lease type: $type\n";
return undef;
}
}
......@@ -896,7 +1047,8 @@ sub AllGroupLeases($$;$)
if !defined($group);
if (ref($group) ne "Group") {
print STDERR "Input object must be of type \"Group\"";
print STDERR "Lease->AllGroupLeases: Input object must be of type \"Group\"";
return undef;
}
my $pid = $group->pid();
......@@ -907,7 +1059,7 @@ sub AllGroupLeases($$;$)
if (_validLeaseType($type)) {
$tclause = "and type='$type'";
} else {
print STDERR "Lease->AllGroupLeases(): Invalid lease type: $type\n";
print STDERR "Lease->AllGroupLeases: Invalid lease type: $type\n";
return undef;
}
}
......@@ -943,6 +1095,7 @@ sub AllUserLeases($$;$)
return undef
if !defined($uid);
# If uid is a User object extract the user name
if (ref($uid) eq "User") {
$uid = $uid->uid();
}
......@@ -952,7 +1105,7 @@ sub AllUserLeases($$;$)
if (_validLeaseType($type)) {
$tclause = "and type='$type'";
} else {
print STDERR "Lease->AllUserLeases(): Invalid lease type: $type\n";
print STDERR "Lease->AllUserLeases: Invalid lease type: $type\n";
return undef;
}
}
......
#!/usr/bin/perl -w
#
# Copyright (c) 2013-2014 University of Utah and the Flux Group.
# Copyright (c) 2013-2015 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -50,11 +50,23 @@ use Getopt::Std;
# If the server is not specified, we pick the "best" server,
# based on availability of space.
#
# bscontrol destroy [ -S server -P pool ] bsname
# Destroy the named blockstore freeing up the space.
# Here bsname needs to be unique across all servers
# bscontrol snapshot [ -S server -P pool ] bsname [ tstamp ]
# Create a snapshot of the named blockstore with the indicated
# timestamp. If timestamp is not provided, it will use the
# current time. bsname needs to be unique across all servers
# or the server and pool need to be explicitly specified.
#
# bscontrol desnapshot [ -S server -P pool ] bsname [ tstamp ]
# Delete the snapshot of the named blockstore with the indicated
# timestamp. If timestamp is not provided, delete all snapshots
# associated with the blockstore. bsname needs to be unique across
# all servers or the server and pool need to be explicitly specified.
#
# bscontrol destroy [ -S server -P pool ] bsname
# Destroy the named blockstore freeing up the space. Also destroys
# any associated snapshots. bsname needs to be unique across all
# servers or the server and pool need to be explicitly specified.
#
# bscontrol [ -S server [ -P pool ] ] copy from-bsname to-bsname
# Do an efficient copy of one blockstore to another. Use the
# server/pool arguments to force a specific placement of to-bsname.
......@@ -86,6 +98,8 @@ sub bs_list($$$@);
sub bs_avail($$$@);
sub bs_info($$$@);
sub bs_create($$$@);
sub bs_snapshot($$$@);
sub bs_desnapshot($$$@);
sub bs_destory($$$@);
#
......@@ -129,11 +143,13 @@ if ($EUID != 0) {
# Commands
my %cmds = (
"list" => \&bs_list,
"avail" => \&bs_avail,
"info" => \&bs_info,
"create" => \&bs_create,
"destroy" => \&bs_destroy,
"list" => \&bs_list,
"avail" => \&bs_avail,
"info" => \&bs_info,
"create" => \&bs_create,
"snapshot" => \&bs_snapshot,
"desnapshot" => \&bs_desnapshot,
"destroy" => \&bs_destroy,
);
#
......@@ -610,14 +626,37 @@ sub bs_info($$$@)
my $bsref = getblockstores($dsrv, $dpool);
if (keys(%{$bsref}) > 0) {
printf("%-32s %-24s %-10s %10s %-s\n",
"Unique ID", "Server/Pool/Volume", "Type", "Size", "Exported as");
printf("%-32s %-24s %-10s %10s %4s %-s\n",
"Unique ID", "Server/Pool/Volume", "Type", "Size", "Snap", "Exported as");
# create a snapshot to unique ID hash
my %snaps = ();
foreach my $bs (sort keys(%{$bsref})) {
my $attrs = $bsref->{$bs};
my $scount = 0;
if (exists($attrs->{'snapshots'})) {
foreach my $sname (split(',', $attrs->{'snapshots'})) {
$snaps{$sname} = $bs;
$scount++;
}
}
$bsref->{$bs}->{'snapcount'} = $scount;
}
foreach my $bs (sort keys(%{$bsref})) {
my $attrs = $bsref->{$bs};
printf("%-32s %-24s %-10s %10s %s\n", $bs,
my $tstr = $attrs->{'type'};
if (exists($attrs->{'cloneof'})) {
$tstr = "clone (" . $snaps{$attrs->{'cloneof'}} . ")";
}
my $scount = 0;
if (exists($attrs->{'snapcount'})) {
$scount = $attrs->{'snapcount'};
}
printf("%-32s %-24s %-10s %10s %4s %s\n", $bs,
$attrs->{'server'} . "/" . $attrs->{'pool'} . "/" . $attrs->{'volume'},
$attrs->{'type'}, $attrs->{'size'},
$tstr, $attrs->{'size'}, $scount,
($attrs->{'active'} ? $attrs->{'iname'} : ""));
}
}
......@@ -759,14 +798,123 @@ sub bs_create($$$@)
return 0;
}
sub dosnapshot($$$$$)
{
my ($create,$srv,$pool,$name,$tstamp) = @_;
my $cmd = ($create ? "snapshot" : "desnapshot");
if (defined($name) && $name =~ /^([-\w]+)$/) {
$name = $1;
} else {
fatal("$cmd: must specify a valid volume name");
}
if (defined($tstamp)) {
if ($tstamp =~ /^(\d+)$/) {
$tstamp = $1;
} else {
fatal("$cmd: must specify a numeric timestamp value");
}
} elsif ($create) {
$tstamp = time();
} else {
$tstamp = "";
}
#
# Find the blockstore based on info from the server(s).
#
my $volattrs;
my $volref = getvolumes($srv, $pool);
foreach my $vol (sort keys(%{$volref})) {
my $attrs = $volref->{$vol};
if ($name eq $attrs->{'volume'}) {
if ($volattrs) {
fatal("$cmd: ".
"multiple volumes match '$name', must specify a server and pool");
}
$volattrs = $attrs;
}
}
if (!$volattrs) {
fatal("$cmd: no such volume '$name'");
}
if ($srv) {
if ($srv ne $volattrs->{'server'}) {
fatal("$cmd: found server is not the specified server!?");
}
} else {
$srv = $volattrs->{'server'};
}
if ($pool) {
if ($pool ne $volattrs->{'pool'}) {
fatal("$cmd: found pool is not the specified pool!?");
}
} else {
$pool = $volattrs->{'pool'};
}
#
# See if the snapshot already exists
#
if ($tstamp && exists($volattrs->{'snapshots'})) {
my @snaps = split(',', $volattrs->{'snapshots'});
my $snapshot = "$name\@$tstamp";
my $found = 0;
foreach my $sname (@snaps) {
if ($snapshot eq $sname) {
$found = 1;
last;
}
}
if ($create && $found) {
fatal("$cmd: snapshot '$snapshot' already exists");
} elsif (!$create && !$found) {
fatal("$cmd: no such snapshot '$snapshot'");
}
}
#
# Call out to the server to create/destroy the snapshot.
#
my $outref;
if (bsserver_cmd($srv, "$PROXYCMD $cmd $pool $name $tstamp", \$outref)) {
my $action = ($create ? "create" : "destroy");
print STDERR "*** $cmd: ".
"could not $action snapshot for '$name' on $srv/$pool:\n";
print STDERR "proxycmd: '$cmd $pool $name $tstamp'\n";
print STDERR "output:\n";
foreach my $str (@$outref) {
print STDERR " $str\n";
}
exit(-1);
}
return 0;
}
sub bs_snapshot($$$@)
{
my ($srv,$pool,undef,$name,$tstamp) = @_;
return dosnapshot(1, $srv, $pool, $name, $tstamp);
}
sub bs_desnapshot($$$@)
{
my ($srv,$pool,undef,$name,$tstamp) = @_;
return dosnapshot(0, $srv, $pool, $name, $tstamp);
}
sub bs_destroy($$$@)
{
my ($srv,$pool,$size,$name) = @_;
my ($srv,$pool,undef,$name) = @_;
if (defined($name) && $name =~ /^([-\w]+)$/) {
$name = $1;
} else {
fatal("create: must specify a valid volume name");
fatal("destroy: must specify a valid volume name");
}
#
......@@ -806,21 +954,42 @@ sub bs_destroy($$$@)
#
my $bstore = Blockstore->Lookup($srv, $name);
if (!$bstore) {
fatal("destroy: no blockstore '$srv/$name'");
fatal("destroy: no blockstore '$srv/$name' in DB");
}
if ($bstore->role() ne "partition") {
fatal("destroy: wrong type of blockstore '$srv/$name'");
}
if ($bstore->Delete()) {
fatal("destroy: could not delete blockstore '$srv/$name'");
fatal("destroy: could not delete blockstore '$srv/$name' from DB");
}
#
# Call out to the server to delete the storage.
#
my $outref;
#
# First destroy any snapshots
#
if (exists($volattrs->{'snapshots'}) &&
bsserver_cmd($srv, "$PROXYCMD desnapshot $pool $name", \$outref)) {
print STDERR "*** destroy: ".
"could not destroy snapshots for '$name' on $srv/$pool:";
foreach my $str (@$outref) {
print STDERR " $str\n";
}
exit(-1);
}
#
# Destroy the actual volume.
#
# N.B. if the volume is a clone, this call will not get rid of
# the snapshot the volume was associated with.
#
if (bsserver_cmd($srv, "$PROXYCMD destroy $pool $name", \$outref)) {
print STDERR "*** create: could not deallocate storage for '$name' on $srv/$pool:";
print STDERR "*** destroy: ".
"could not deallocate storage for '$name' on $srv/$pool:";
foreach my $str (@$outref) {
print STDERR " $str\n";
}
......
......@@ -1647,6 +1647,31 @@ sub LoadVirtNodes($)
#
# Note: we do not make these checks during a pre-assign pass.
#
# Exclusive use semantics:
#
# If the sitevar "simultaneous_ro_datasets" is zero (or unset)
# or the dataset has the "exclusive_use" attribute set, only one
# mapping at a time can be in effect for a dataset whether it
# is RO or RW. This condition is embodied in the IsExclusiveUse()
# Lease method.
#
# Shared use semantics:
#
# To be shared, the global "simultaneous_ro_datasets" sitevar
# must be non-zero and a dataset's "exclusive_use" attribute must
# either not exist or be set to zero (!IsExclusiveUse). Additionally,
# the dataset must have a snapshot. Currently a snapshot is created
# upon termination of any RW mapping (see Blockstore->Release).
# Snapshot creation may be explicit in the future.
#
# A RO mapping of a dataset always gets the most recent snapshot
# of the dataset. This is true whether the dataset is currently
# in use or not. If the dataset does not have a snapshot, it is
# an error.
#
# A RW mapping of a dataset always gets the dataset itself.
# If a RW mapping already exists, it is an error.
#
if ($attrkey eq "lease" && !$self->preassign()) {
my $lease = Lease->Lookup($attrval);
# Valid lease?
......@@ -1655,12 +1680,13 @@ sub LoadVirtNodes($)
return -1;
}
# If sitevar disables simultaneous use, catch it now.
if ((!TBSiteVarExists("storage/simultaneous_ro_datasets") ||
int(TBGetSiteVar("storage/simultaneous_ro_datasets")) != 1) &&
$lease->InUse()) {
tberror("Dataset $lease is currently in use, ".
"so cannot be accessed at this time.\n");
my $exclusive = $lease->IsExclusiveUse();
my $snapshot = $lease->HasResourceSnapshot();
# If sitevar or dataset disables simultaneous use, catch it now.
if ($exclusive && $lease->InUse()) {
tberror("Exclusive-use dataset $lease is currently in use ".
"and cannot be mapped at this time.\n");
return -1;
}
......@@ -1670,14 +1696,17 @@ sub LoadVirtNodes($)
if (!$lease->AccessCheck($self->realuser(),
LEASE_ACCESS_READ())) {
tberror("Not allowed to use dataset $lease in RO mode\n");
return -1
}
# Leases already in use RW cannot be accessed.
if ($lease->InUseReadWrite()) {
tberror("Dataset $lease is currently in use read-write, ".
"so cannot be accessed at this time.\n");
return -1;
}
# For shared use, a snapshot must exist
if (!$exclusive) {
if (!$snapshot) {
tberror("Dataset $lease has no snapshot ".
"and cannot be mapped RO at this time.\n");
return -1;
}
}
} else {
# Does user have RW rights?
if (!$lease->AccessCheck($self->realuser(),
......@@ -1685,18 +1714,32 @@ sub LoadVirtNodes($)
tberror("Not allowed to use dataset $lease in RW mode\n");
return -1;
}
# RW mode is exclusive (single-experiment).
if ($lease->InUse()) {
tberror("Dataset $lease is currently in use and cannot ".
"be requested read-write at this time.\n");
return -1;
}
# Deny RW access if lease is in grace period.
if ($lease->state() eq LEASE_STATE_GRACE()) {
tberror("Dataset $lease in grace period, must ".
"specify '\$$vname set-readonly 1' in NS file.\n");
return -1;
}
if (!$exclusive) {
# only one RW mapping
if ($lease->InUseReadWrite()) {
tberror("Dataset $lease is currently in use (RW) ".
"and cannot be mapped RW at this time.\n");
return -1;
}
# Make sure there is a snapshot in place
if (!$snapshot) {
if ($lease->InUse()) {
tberror("Dataset $lease is in use (RO) but ".
"has no snapshot ".
"and cannot be mapped RW at this time.\n");
return -1;
}
tbwarn("Dataset $lease has no snapshot ".
"and cannot be mapped RW at this time.\n");
}
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment