Commit 613d90dd authored by Leigh Stoller's avatar Leigh Stoller

New support for importing image backed datasets from other clusters. This

is just like importing images (by using a url instead of a urn), which
makes sense since image backed datasets are just images with a flag set.

Key differences:

1. You cannot snapshot a new version of the dataset on a cluster it has
   been imported to. The snapshot has to be done where the dataset was
   created initially. This is slightly inconvenient and will perhaps
   confuse users, but it is far less confusing that then datasets getting
   out of sync.

2. No image versioning of datasets. We can add that later if we want to.
parent 36573cad
......@@ -882,23 +882,49 @@ sub CheckDatasets($$$)
return -1;
}
foreach my $ref (GeniXML::FindNodes("n:node", $rspec)->get_nodelist()) {
my $manager_urn = GetManagerId($ref);
foreach my $blockref (GeniXML::FindNodesNS("n:blockstore",
$ref,
$GeniXML::EMULAB_NS)->get_nodelist()) {
my $leaseurn = GeniXML::GetText("persistent", $blockref);
my $dataset_id = GeniXML::GetText("dataset", $blockref);
my $class = GeniXML::GetText("class", $blockref);
#
# We only care about datasets here, we let the backend
# do the error checking on ephemeral blockstores.
#
next
if (!defined($leaseurn));
if (!GeniHRN::IsValid($leaseurn)) {
$$pmsg = "Persistent dataset name is not a valid URN";
if (!defined($dataset_id));
if (!defined($class)) {
$class = "remote";
}
elsif ($class ne "local" && $class ne "remote") {
$$pmsg = "class must be local or remote";
return 1;
}
#
# If the dataset is local and its a URL, then make sure its
# a valid URL.
#
if ($class eq "local" && $dataset_id =~ /^(http|https):/) {
if (!TBcheck_dbslot($dataset_id, "virt_nodes", "osname",
TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)){
$$pmsg = "Invalid url for dataset";
return 1;
}
next;
}
if (!GeniHRN::IsValid($dataset_id)) {
$$pmsg = "Persistent dataset is not a valid URN";
return 1;
}
my ($authority, $type, $id) = GeniHRN::Parse($leaseurn);
my $dataset_urn = $dataset_id;
my ($dataset_authority, $type, $id) = GeniHRN::Parse($dataset_urn);
my ($dataset_domain) = split(":", $dataset_authority);
#
# Separate project from name; this is how the rspec specifies
# the dataset they want, since it might be in another project
......@@ -907,36 +933,40 @@ sub CheckDatasets($$$)
$pid = $1;
$id = $2;
}
#
# The domain of the dataset has to match the domain of aggregate.
# We also use this when creating a profile, so rspec might not
# be bound.
#
if (defined($manager_urn)) {
my ($manager_authority) = GeniHRN::Parse($manager_urn);
my ($manager_domain) = split(":", $manager_authority);
if ($manager_domain ne $dataset_domain) {
$$pmsg = "Dataset $id is not located on $manager_authority";
return 1;
}
}
next
if ($class eq "local");
#
# Not all backends have blockstore support.
#
if (!APT_Dataset::ValidBlockstoreBackend($authority)) {
$$pmsg = "Persistent dataset is not on a valid aggregate";
if (!APT_Dataset::ValidBlockstoreBackend($dataset_authority)) {
$$pmsg = "Dataset $id is not on a valid aggregate";
return 1;
}
my $dataset = APT_Dataset->Lookup("$pid/$id");
if (!defined($dataset)) {
$dataset = APT_Dataset->LookupByRemoteURN($leaseurn);
$dataset = APT_Dataset->LookupByRemoteURN($dataset_urn);
if (!defined($dataset)) {
$$pmsg = "Persistent dataset '$pid/$id' does not exist";
return 1;
}
}
#
# Dataset must already exists on the aggregate. But we have to
# make these checks again at instantiation, since the dataset
# might be gone, or it might have different permissions
# settings.
#
my ($d_authority) = GeniHRN::Parse($dataset->aggregate_urn());
my ($domain,$subauth) = split(":", $authority);
if ($domain ne $d_authority) {
$$pmsg = "Persistent dataset '$pid/$id' in not on $authority";
return 1;
}
#
# XXX Need basic frontend permission checks?
#
......
......@@ -31,7 +31,7 @@ use strict;
use English;
use Getopt::Long;
use XML::Simple;
use File::Temp qw(tempfile tmpnam :POSIX);
use File::Temp qw(tempfile :mktemp tmpnam :POSIX);
use Data::Dumper;
use Cwd qw(realpath);
......@@ -297,27 +297,6 @@ if (exists($xmlparse->{'attribute'}->{"rspec"})) {
}
else {
$rspecstr = $profile->CheckFirewall(!$localuser);
#
# Look for datasets; need to verify that the datasets being referenced
# still exist and are still permissible to use, and we have to generate
# credentials for those datasets (if not a global dataset). The tricky
# aspect is that while a dataset and a profile have project permissions,
# the experiment has no project association, so if the profile/dataset
# perms are okay, then we send over a credential that tells the CM to
# allow this experiment to use that dataset in that project.
#
$errmsg = "Bad dataset";
if (APT_Profile::CheckDatasets($rspecstr, $profile->pid(), \$errmsg)) {
UserError($errmsg);
}
#
# A temporary hack to make sure that the user does not try to run
# an x386 image on the Cloudlab cluster (ARMs). This will eventually
# get replaced with Jon's constraint checking code.
#
if ($profile->CheckNodeConstraints($default_aggregate_urn, \$errmsg)) {
UserError($errmsg);
}
}
#
......@@ -345,6 +324,28 @@ else {
push(@aggregate_urns, $default_aggregate_urn);
}
#
# Look for datasets; need to verify that the datasets being referenced
# still exist and are still permissible to use, and we have to generate
# credentials for those datasets (if not a global dataset). The tricky
# aspect is that while a dataset and a profile have project permissions,
# the experiment has no project association, so if the profile/dataset
# perms are okay, then we send over a credential that tells the CM to
# allow this experiment to use that dataset in that project.
#
$errmsg = "Bad dataset";
if (APT_Profile::CheckDatasets($rspecstr, $profile->pid(), \$errmsg)) {
UserError($errmsg);
}
#
# A temporary hack to make sure that the user does not try to run
# an x386 image on the Cloudlab cluster (ARMs). This will eventually
# get replaced with Jon's constraint checking code.
#
if ($profile->CheckNodeConstraints($default_aggregate_urn, \$errmsg)) {
UserError($errmsg);
}
#
# Use ssh-keygen to see if the key is valid and convertable. We first
# try to get the fingerprint, which will tells us if its already in
......@@ -932,10 +933,10 @@ sub CreateDatasetCreds($$$$$)
foreach my $blockref (GeniXML::FindNodesNS("n:blockstore",
$ref,
$GeniXML::EMULAB_NS)->get_nodelist()) {
my $leaseurn = GeniXML::GetText("persistent", $blockref);
if (!defined($leaseurn)) {
my $dataset_id = GeniXML::GetText("persistent", $blockref);
if (!defined($dataset_id)) {
# persistent is deprecated.
$leaseurn = GeniXML::GetText("dataset", $blockref);
$dataset_id = GeniXML::GetText("dataset", $blockref);
}
#
......@@ -943,9 +944,18 @@ sub CreateDatasetCreds($$$$$)
# do the error checking on ephemeral blockstores.
#
next
if (!defined($leaseurn));
if (!defined($dataset_id));
my $class = GeniXML::GetText("class", $blockref);
if (!defined($class)) {
$class = "remote";
}
# Image backed. No checking since the image has to be global
# anyway. Needs more thought.
next
if ($class eq "local");
my ($authority, $type, $id) = GeniHRN::Parse($leaseurn);
my ($authority, $type, $id) = GeniHRN::Parse($dataset_id);
#
# Separate project from name; this is how the rspec specifies
# the dataset they want, since it might be in another project
......@@ -956,7 +966,7 @@ sub CreateDatasetCreds($$$$$)
}
my $dataset = APT_Dataset->Lookup("$pid/$id");
if (!defined($dataset)) {
$dataset = APT_Dataset->LookupByRemoteURN($leaseurn);
$dataset = APT_Dataset->LookupByRemoteURN($dataset_id);
if (!defined($dataset)) {
$$pmsg = "Persistent dataset '$pid/$id' does not exist";
return 1;
......@@ -1112,7 +1122,7 @@ sub CreateSlivers()
#
sub RunStitcher()
{
my $tmpdir = tmpnam();
my $tmpdir = mktemp("/tmp/stitcher.XXXXXX");
my $slicecredfile = "$tmpdir/slicecred.xml";
my $speaksforfile = "$tmpdir/speaksforcred.xml";
my $al2scredfile = "$tmpdir/al2scred.xml";
......@@ -1377,8 +1387,8 @@ sub RunStitcher()
goto bad;
}
}
system("/bin/rm -rf $tmpdir")
if (!$debug && defined($tmpdir) && -e $tmpdir);
# system("/bin/rm -rf $tmpdir")
# if (!$debug && defined($tmpdir) && -e $tmpdir);
return 0;
bad:
......@@ -1390,8 +1400,8 @@ sub RunStitcher()
system("/bin/cat $tmpdir/stitcher.log");
print "-----------------------------------------\n";
}
system("/bin/rm -rf $tmpdir")
if (!$debug && defined($tmpdir) && -e $tmpdir);
# system("/bin/rm -rf $tmpdir")
# if (!$debug && defined($tmpdir) && -e $tmpdir);
return -1;
}
......
......@@ -328,6 +328,9 @@ sub DoCreate()
$blob = $response->value();
$dataset->Update({"remote_uuid" => $blob->{"uuid"},
"remote_urn" => $blob->{"urn"}});
if (exists($blob->{'url'}) && $blob->{'url'} ne "") {
$dataset->Update({"remote_url" => $blob->{"url"}});
}
#
# Okay, this is silly; there is no distinct state for resource allocation.
......
......@@ -105,6 +105,7 @@ sub DoRefresh();
sub DoReboot();
sub DoReload();
sub DoLockdown();
sub DoManifests();
sub WriteCredentials();
sub StartMonitor();
......@@ -165,6 +166,9 @@ elsif ($action eq "lockdown") {
elsif ($action eq "writecreds") {
WriteCredentials()
}
elsif ($action eq "getmanifests") {
DoManifests()
}
else {
usage();
}
......@@ -1170,6 +1174,61 @@ sub DoRebootOrReload($)
sub DoReboot() { return DoRebootOrReload("reboot"); }
sub DoReload() { return DoRebootOrReload("reload"); }
#
#
#
sub DoManifests()
{
my $errmsg;
my $slice = $instance->GetGeniSlice();
if (!defined($slice)) {
print STDERR "No slice for instance\n";
goto killit;
}
my $coderef = sub {
my ($sliver) = @_;
my $webtask = $sliver->webtask();
my $errmsg;
my $response = $sliver->GetManifest();
if (!defined($response)) {
$errmsg = "RPC Error calling GetManifest";
goto bad;
}
return 0;
bad:
return 1;
};
my @return_codes = ();
my @agglist = $instance->AggregateList();
if (ParRun({"maxwaittime" => 99999,
"maxchildren" => scalar(@agglist)},
\@return_codes, $coderef, @agglist)) {
#
# The parent caught a signal. Leave things intact so that we can
# kill things cleanly later.
#
$errmsg = "Internal error calling GetManifest";
goto bad;
}
#
# Check the exit codes.
#
foreach my $code (@return_codes) {
if ($code) {
$errmsg = "Could not get manifest for some slivers";
goto bad;
}
}
exit(0);
bad:
print STDERR $errmsg . "\n";
exit(1);
}
#
# Start up the monitor for an instance. Only one though.
#
......
......@@ -120,6 +120,7 @@ my $ADDAUTHORITY = "$TB/sbin/protogeni/addauthority";
my $EMULAB_PEMFILE = "@prefix@/etc/genicm.pem";
my $TARINSTALL = "/usr/local/bin/install-tarfile";
my $IMAGE_SETUP = "$TB/sbin/image_setup";
my $IMAGE_IMPORT = "$TB/sbin/image_import";
my $SHAREVLAN = "$TB/sbin/sharevlan";
my $FWNAME = "fw";
my $API_VERSION = 1;
......@@ -6983,34 +6984,78 @@ sub HandleBlockstore($$$$$$@)
# or we get it from the image if its an image backed blockstore.
#
if ($class eq "local") {
if (defined($dataset_id)) {
# Default project to lookup lease.
if (!defined($dataset_id)) {
$size = GeniXML::GetText("size", $blockref);
if (!defined($size)) {
$message = "Missing blockstore size for $bsname";
goto bad;
}
}
else {
# Default project to lookup.
my $pid = $experiment->pid();
my $gid = $pid;
my $vers;
my $image;
my ($domainsubauth,$dtype,$id) = GeniHRN::Parse($dataset_id);
if ($dtype ne "dataset") {
$message = "Illegal dataset urn for $dataset_id";
goto bad;
}
my ($domain,$subauth) = split(":", $domainsubauth);
if ($domain ne $OURDOMAIN) {
$message = "This is not the correct site for this dataset";
goto bad;
}
if (defined($subauth)) {
$pid = $subauth;
#
# Is it a url. We might not have the image locally, but
# will later when image_setup runs. But lets do a check to
# make sure it really exist by trying to import it. This will
# get the descriptor and the information we need.
#
if ($dataset_id =~ /^(ftp|http|https):/) {
if (!TBcheck_dbslot($dataset_id, "virt_nodes", "osname",
TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) {
$message = "Invalid url for dataset";
goto bad;
}
$image = Image->LookupByURL($dataset_id);
if (!defined($image)) {
my $safe_url = User::escapeshellarg($dataset_id);
print STDERR "$dataset_id is not here, trying to get it\n";
system("$IMAGE_IMPORT -p $pid $safe_url");
if ($?) {
$message = "Could not import $dataset_id";
goto bad;
}
$image = Image->LookupByURL($dataset_id);
if (!defined($image)) {
$message = "Could not lookup dataset after import";
goto bad;
}
}
}
if ($id =~ /^([^\/]+)\/\/([^\/]+)(\/\/(\d+))?$/) {
$id = $2;
if (defined($4)) {
$vers = $4;
elsif (GeniHRN::IsValid($dataset_id)) {
my ($domainsubauth,$dtype,$id) = GeniHRN::Parse($dataset_id);
if ($dtype ne "dataset") {
$message = "Illegal dataset urn for $dataset_id";
goto bad;
}
my ($domain,$subauth) = split(":", $domainsubauth);
if ($domain ne $OURDOMAIN) {
$message = "This is not the correct site for this dataset";
goto bad;
}
if (defined($subauth)) {
$pid = $subauth;
}
if ($id =~ /^([^\/]+)\/\/([^\/]+)(\/\/(\d+))?$/) {
$id = $2;
if (defined($4)) {
$vers = $4;
}
}
$image = Image->Lookup($pid, $id, $vers);
if (!defined($image)) {
$message = "Unknown dataset: $dataset_id";
goto bad;
}
}
my $image = Image->Lookup($pid, $id, $vers);
if (!defined($image)) {
$message = "Unknown dataset: $dataset_id";
else {
$message = "Invalid name/url for dataset";
goto bad;
}
if (!$image->isdataset()) {
......@@ -7040,11 +7085,10 @@ sub HandleBlockstore($$$$$$@)
goto bad;
}
}
# This needs to be name, not id.
push(@attributes, ["dataset", $image->versname(), 0]);
$btype = "imdataset";
if (!$image->size()) {
if (! ($image->lba_low() || $image->lba_high())) {
$message = "No local size info for $dataset_id";
goto bad;
}
......@@ -7052,14 +7096,6 @@ sub HandleBlockstore($$$$$$@)
$image->lba_low() + 1) /
(1024 / $image->lba_size())) . "KB";
}
else {
$size = GeniXML::GetText("size", $blockref);
if (!defined($size)) {
$message = "Missing blockstore size for $bsname";
goto bad;
}
}
$size = Blockstore::ConvertToMebi($size);
if ($size < 0) {
$message = "Illegal blockstore size for $bsname";
......
......@@ -3534,7 +3534,7 @@ sub CreateDataset($)
if ($PROTOGENI_LOCALUSER);
# Stuff to put in the return blob.
my ($state,$uuid,$busy,$msg,$urn);
my ($state,$uuid,$busy,$msg,$urn,$url);
#
# Grab the lease or image to see if its been created/approved, we want
......@@ -3552,6 +3552,7 @@ sub CreateDataset($)
$busy = 0;
$urn = GeniHRN::Generate($OURDOMAIN . ":" . $image->pid(), "dataset",
$image->gid() . "//" . $image->imagename());
$url = $image->LocalURL();
}
else {
my $lease = Lease->Lookup($group->pid(), $group->gid(), $dataset);
......@@ -3584,6 +3585,8 @@ sub CreateDataset($)
"urn" => $urn,
"busy" => $busy,
};
$blob->{'url'} = $url if (defined($url));
return GeniResponse->Create(GENIRESPONSE_SUCCESS, $blob, $msg);
}
sub DeleteDataset($)
......
#!/usr/bin/perl -w
#
# Copyright (c) 2003-2014 University of Utah and the Flux Group.
# Copyright (c) 2003-2015 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -24,6 +24,7 @@
use English;
use Getopt::Std;
use Socket;
use Data::Dumper;
#
# Fetch external image definitions and create local descriptors.
......@@ -121,6 +122,8 @@ if (!$experiment->AccessCheck($this_user, TB_EXPT_MODIFY())) {
tbdie("You are not allowed to modify experiment $eid in project $pid");
}
my @urllist = ();
#
# Look for any nodes that specify a url for the osname.
#
......@@ -129,16 +132,41 @@ my $result = $experiment->TableLookUp("virt_nodes", "vname,osname");
while (my ($vname, $osname) = $result->fetchrow()) {
my $url;
next
if (! ($osname =~ /^(ftp|http|https):/));
# Verify entire URL and taint check.
if ($osname =~ /^((http|https|ftp)\:\/\/[-\w\.\/\@\:\~\?\=\&]*)$/) {
$url = $1;
if ($osname =~ /^(ftp|http|https):/) {
# Verify entire URL and taint check.
if ($osname =~ /^((http|https|ftp)\:\/\/[-\w\.\/\@\:\~\?\=\&]*)$/) {
$url = $1;
}
else {
tbdie("Invalid URL $osname\n");
}
push(@urllist, $url);
}
else {
tbdie("Invalid URL $osname\n");
#
# Look for image backed datasets that refer to an external URL.
#
my @blockstores = $experiment->LookupBlockstoresForNode($vname);
foreach my $blockstore (@blockstores) {
next
if ($blockstore->type() ne "imdataset");
if (!exists($blockstore->{'attributes'}->{'dataset'})) {
tbdie("No dataset if for blockstore on $vname\n");
}
my $dataset = $blockstore->{'attributes'}->{'dataset'};
my $image = Image->Lookup($dataset);
if (!defined($image)) {
tbdie("Could not lookup image for $dataset");
}
next
if ($image->IsLocal());
push(@urllist, $image->metadata_url());
}
}
foreach my $url (@urllist) {
my $safe_url = User::escapeshellarg($url);
#
......
......@@ -86,7 +86,7 @@ my $doprovenance = 0;
# version of the software we are so it gives something we can handle.
# Be sure to update this if you change the version in dumpdescriptor.
#
my $METADATA_CLIENTVERSION = 1;
my $METADATA_CLIENTVERSION = 2;
#
# Untaint the path
......
......@@ -95,6 +95,7 @@ class Dataset
function gid() { return $this->pid(); }
function aggregate_urn() { return $this->field("aggregate_urn"); }
function remote_urn() { return $this->field("remote_urn"); }
function remote_url() { return $this->field("remote_url"); }
function type() { return $this->field("type"); }
function fstype() { return $this->field("fstype"); }
function created() { return NullDate($this->field("created")); }
......@@ -176,6 +177,12 @@ class Dataset
function URN() {
return $this->remote_urn();
}
function URL() {
if ($this->type() != "imdataset") {
return null;
}
return $this->remote_url();
}
function deleteCommand() {
return "webmanage_dataset delete " . $this->pid() . "/" . $this->id();
......
......@@ -38,7 +38,7 @@ $urn_mapping =
"urn:publicid:IDN+emulab.net+authority+cm" => "MS",
"urn:publicid:IDN+utahddc.geniracks.net+authority+cm" => "DDC",
"urn:publicid:IDN+stitch.geniracks.net+authority+cm" => "UStitch",
"urn:publicid:IDN+al2s.internet2.edu+authority+cm" => "AL2S");
"urn:publicid:IDN+al2s.internet2.edu+authority+am" => "AL2S");
$freenodes_mapping =
array("urn:publicid:IDN+utah.cloudlab.us+authority+cm" =>
......@@ -318,7 +318,9 @@ class Instance
'APT Utah' =>
"urn:publicid:IDN+apt.emulab.net+authority+cm",
'IG UtahDDC' =>
"urn:publicid:IDN+utahddc.geniracks.net+authority+cm"
"urn:publicid:IDN+utahddc.geniracks.net+authority+cm",
'Utah PG' =>
"urn:publicid:IDN+emulab.net+authority+cm"
);
} else {
$am_array = array(
......
......@@ -108,6 +108,9 @@ $fields["dataset_write"] = $dataset->write_access();
if (ISADMIN()) {
$fields["dataset_idx"] = $dataset->idx();
}
if ($dataset->type() == "imdataset") {
$fields["dataset_url"] = $dataset->URL();
}
#
# The state is a bit of a problem, since local leases do not have
......
......@@ -83,8 +83,24 @@
</tr>
<tr>
<td>URN</td>
<td><%- formfields.dataset_urn %></td>
<td>
<input onClick="this.select();"
class='form-control'
readonly
value='<%- formfields.dataset_urn %>'>
</td>
</tr>
<% if (_.has(formfields, 'dataset_url')) { %>
<tr>
<td>URL</td>
<td>
<input onClick="this.select();"
class='form-control'
readonly
value='<%- formfields.dataset_url %>'>
</td>
</tr>
<% } %>
<% if (_.has(formfields, 'dataset_idx')) { %>
<tr>
<td>Internal ID</td>
......
......@@ -1153,4 +1153,19 @@ class Image
function OSinfo() {
return OSinfo::Lookup($this->imageid(), $this->version());
}
function URL() {
global $TBBASE;
$uuid = $this->uuid();
$image_uuid = $this->image_uuid();
return "$TBBASE/image_metadata.php?uuid=$image_uuid";
}
function VersionURL() {
global $TBBASE;
$uuid = $this->uuid();
$image_uuid = $this->image_uuid();
return "$TBBASE/image_metadata.php?uuid=$uuid";
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment