Commit 88e0ecb5 authored by Leigh Stoller's avatar Leigh Stoller

Checkpoint multisite support.

parent e11d79be
......@@ -46,6 +46,10 @@ my $MAINSITE = @TBMAINSITE@;
my $TBOPS = "@TBOPSEMAIL@";
my $SACERT = "$TB/etc/genisa.pem";
# Cache credentials so we do not keep regenerating down inside the
# libraries that make the XMLRPC calls.
my %credcache = ();
#
# Generate the credentials we need.
#
......@@ -66,6 +70,15 @@ sub GenCredentials($$;$)
$speaker_signer = "/usr/testbed/etc/utah-apt.sa";
}
#
# Check cache.
#
my $cachetag = $target->urn() . "::" . $geniuser->urn();
if (exists($credcache{$cachetag})) {
($credential,$speaksfor) = @{ $credcache{$cachetag} };
goto cached;
}
#
# If a local user account, but a nonlocal id, then we should
# have a speaksfor credential stored, as well as a certificate
......@@ -139,6 +152,10 @@ sub GenCredentials($$;$)
print STDERR "Could not sign $target credential\n";
goto bad;
}
if ($wantspeaksfor) {
$credcache{$cachetag} = [$credential, $speaksfor];
}
cached:
if (wantarray) {
return ($credential, $speaksfor);
}
......
......@@ -87,21 +87,22 @@ sub Lookup($$;$)
$self->{'INSTANCE'} = $query_result->fetchrow_hashref();
$self->{'BRAND'} = Brand->Create($self->{'INSTANCE'}->{'servername'});
$self->{'HASH'} = {};
$self->{'AGGREGATES'} = [];
$self->{'AGGREGATES'} = {};
bless($self, $class);
#
# Lookup existing aggregates.
#
my @aggregates = APT_Instance::Aggregate->LookupForInstance($self);
if (!@aggregates && defined($self->aggregate_urn())) {
my $aggregates = APT_Instance::Aggregate->LookupForInstance($self);
if (!keys(%{$aggregates}) && defined($self->aggregate_urn())) {
#
# Make up a fake one; eventually the old ones will die or
# I will create entries for them. Not worrying about it now.
#
@aggregates = (APT_Instance::Aggregate->GenTemp($self));
$aggregates = {$self->aggregate_urn() =>
APT_Instance::Aggregate->GenTemp($self)};
}
$self->{'AGGREGATES'} = \@aggregates;
$self->{'AGGREGATES'} = $aggregates;
# Add to cache.
$instances{"$uuid"} = $self;
......@@ -134,7 +135,8 @@ AUTOLOAD {
sub Brand($) { return $_[0]->{'BRAND'}; }
sub isAPT($) { return $_[0]->Brand()->isAPT() ? 1 : 0; }
sub isCloud($) { return $_[0]->Brand()->isCloud() ? 1 : 0; }
sub AggregateList($) { return $_[0]->{'AGGREGATES'}; }
sub AggregateList($) { return values(%{ $_[0]->{'AGGREGATES'} }); }
sub AggregateHash($) { return $_[0]->{'AGGREGATES'}; }
# Break circular reference someplace to avoid exit errors.
sub DESTROY {
......@@ -302,7 +304,7 @@ sub Delete($)
my ($self) = @_;
my $uuid = $self->uuid();
foreach my $agg (@{ $self->AggregateList() }) {
foreach my $agg ($self->AggregateList()) {
$agg->Delete() == 0
or return -1;
}
......@@ -540,7 +542,7 @@ sub ComputeNodeCounts($)
my ($self) = @_;
my $physnode_count = 0;
my $virtnode_count = 0;
my @slivers = @{ $self->AggregateList() };
my @slivers = $self->AggregateList();
if (!@slivers) {
print STDERR "No slivers for $self\n";
return -1;
......@@ -554,6 +556,12 @@ sub ComputeNodeCounts($)
foreach my $ref (GeniXML::FindNodes("n:node",
$manifest)->get_nodelist()) {
my $virtualization_type = GeniXML::GetVirtualizationSubtype($ref);
my $manager_urn = GetManagerId($ref);
# Combined rspec.
next
if (!defined($manager_urn) ||
$manager_urn ne $sliver->aggregate_urn());
if (defined($virtualization_type) &&
$virtualization_type eq "emulab-xen") {
......@@ -580,7 +588,7 @@ sub AddAggregate($$)
return undef
if (!defined($aggobj));
push(@{ $self->{'AGGREGATES'} }, $aggobj);
$self->{'AGGREGATES'}->{$aggregate_urn} = $aggobj;
return $aggobj;
}
......@@ -590,7 +598,7 @@ sub AddAggregate($$)
sub FindAggregateByNodeId($$)
{
my ($self, $node_id) = @_;
my @slivers = @{ $self->AggregateList() };
my @slivers = $self->AggregateList();
if (!@slivers) {
return undef;
}
......@@ -614,6 +622,43 @@ sub FindAggregateByNodeId($$)
return undef;
}
sub WriteCredentials($$)
{
my ($self, $directory) = @_;
my $geniuser = $self->GetGeniUser();
my $slice = $self->GetGeniSlice();
my $context = APT_Geni::GeniContext();
if (! (defined($geniuser) && defined($slice))) {
fatal("Could not lookup geniuser or slice");
}
my ($slice_credential, $speaksfor_credential) =
APT_Geni::GenCredentials($slice, $geniuser);
if (! (defined($speaksfor_credential) &&
defined($slice_credential))) {
fatal("Could not create credentials");
}
my $credfile = "$directory/slicecred.xml";
unlink($credfile)
if (-e $credfile);
open(XML, ">$credfile") or
fatal("Could not create $credfile");
print XML $slice_credential->asString();
close(XML);
$credfile = "$directory/speaksforcred.xml";
unlink($credfile)
if (-e $credfile);
open(XML, ">$credfile") or
fatal("Could not create $credfile");
print XML $speaksfor_credential->asString();
close(XML);
return 0;
}
###################################################################
package APT_Instance::Aggregate;
use emdb;
......@@ -714,7 +759,7 @@ sub GenTemp($$)
sub LookupForInstance($$)
{
my ($class, $instance) = @_;
my @result = ();
my $result = {};
my $uuid = $instance->uuid();
my $query_result =
......@@ -729,9 +774,9 @@ sub LookupForInstance($$)
print STDERR "No apt_instance_aggregate for $uuid/$aggregate_urn\n";
return ();
}
push(@result, $agg);
$result->{$aggregate_urn} = $agg;
}
return @result;
return $result;
}
# Break circular reference someplace to avoid exit errors.
......@@ -1099,6 +1144,131 @@ sub SliceStatus($)
return Genixmlrpc::CallMethod($cmurl, $context, "SliverStatus", $args);
}
#
# Ask for the manifest
#
sub GetManifest($)
{
my ($self) = @_;
my $authority = $self->GetGeniAuthority();
my $urn = $self->aggregate_urn();
my $geniuser = $self->instance()->GetGeniUser();
my $slice = $self->instance()->GetGeniSlice();
my $context = APT_Geni::GeniContext();
return undef
if (! (defined($geniuser) && defined($authority) &&
defined($slice) && defined($context)));
my ($slice_credential, $speaksfor_credential) =
APT_Geni::GenCredentials($slice, $geniuser);
return undef
if (! (defined($speaksfor_credential) &&
defined($slice_credential)));
my $args = {
"urn" => $slice->urn(),
"credentials" => [$slice_credential->asString(),
$speaksfor_credential->asString()],
};
my $cmurl = $authority->url();
$cmurl =~ s/protogeni/protogeni\/stoller/ if ($usemydevtree);
my $tries = 10;
my $response;
while ($tries) {
$response =
Genixmlrpc::CallMethod($cmurl, $context, "Resolve", $args);
if (!defined($response) || $response->code() != GENIRESPONSE_SUCCESS) {
if (defined($response) &&
($response->code() == GENIRESPONSE_SERVER_UNAVAILABLE ||
$response->code() == GENIRESPONSE_BUSY) &&
$tries >= 0) {
print STDERR "Server for $urn reports too busy or slice busy, ".
"waiting a while ...\n";
sleep(int(rand(20)) + 10);
$tries--;
next;
}
print STDERR "Resolve failed on $urn: ".
(defined($response) ? $response->output() : "") . "\n";
return undef;
}
last;
}
return undef
if (! exists($response->value()->{'manifest'}));
return $response->value()->{'manifest'};
}
sub Provision($$$)
{
my ($self, $perrmsg, $keys) = @_;
my $authority = $self->GetGeniAuthority();
my $urn = $self->aggregate_urn();
my $geniuser = $self->instance()->GetGeniUser();
my $slice = $self->instance()->GetGeniSlice();
my $context = APT_Geni::GeniContext();
return -1
if (! (defined($geniuser) && defined($authority) &&
defined($slice) && defined($context)));
my ($slice_credential, $speaksfor_credential) =
APT_Geni::GenCredentials($slice, $geniuser);
return -1
if (! (defined($speaksfor_credential) &&
defined($slice_credential)));
#
# AM V3 API.
#
my @params = ([$slice->urn()],
[{"geni_type" => "geni_sfa",
"geni_version" => 3,
"geni_value" => $speaksfor_credential->asString()},
{"geni_type" => "geni_sfa",
"geni_version" => 3,
"geni_value" => $slice_credential->asString()},
],
# Options array.
{"speaking_for" => $geniuser->urn(),
"geni_speaking_for" => $geniuser->urn(),
"geni_users" => [{'urn' => $geniuser->urn(),
'keys' => $keys }],
});
my $cmurl = $authority->url();
# Convert URL.
$cmurl =~ s/\/cm$/\/am/;
$cmurl =~ s/protogeni/protogeni\/stoller/ if ($usemydevtree);
$cmurl .= "/3.0";
my $tries = 10;
while ($tries) {
my $response =
Genixmlrpc::CallMethod($cmurl, $context, "Provision", @params);
if (!defined($response) || $response->code() != GENIRESPONSE_SUCCESS) {
if (defined($response) &&
($response->code() == GENIRESPONSE_SERVER_UNAVAILABLE ||
$response->code() == GENIRESPONSE_BUSY) &&
$tries >= 0) {
print STDERR "Server for $urn reports too busy or slice busy, ".
"waiting a while ...\n";
sleep(int(rand(20)) + 10);
$tries--;
next;
}
$$perrmsg = $response->output()
if (defined($response));
return -1;
}
last;
}
return 0;
}
#
# Ask aggregate for the console URL for a node.
#
......@@ -1205,11 +1375,13 @@ sub CreateImage($$$$;$)
#
# Reboot some nodes
#
sub SliverAction($$@)
sub SliverAction($$$@)
{
my ($self, $which, @slivers) = @_;
my $method = ($which eq "reboot" ? "RestartSliver" : "ReloadSliver");
my ($self, $perrmsg, $which, @slivers) = @_;
my $method = ($which eq "reboot" ? "RestartSliver" :
($which eq "start" ? "StartSliver" : "ReloadSliver"));
my $authority = $self->GetGeniAuthority();
my $urn = $self->aggregate_urn();
my $geniuser = $self->instance()->GetGeniUser();
my $slice = $self->instance()->GetGeniSlice();
my $context = APT_Geni::GeniContext();
......@@ -1224,14 +1396,41 @@ sub SliverAction($$@)
defined($slice_credential)));
my $args = {
"sliver_urns" => \@slivers,
"credentials" => [$slice_credential->asString(),
$speaksfor_credential->asString()],
};
if (@slivers) {
$args->{"sliver_urns"} = \@slivers;
}
else {
$args->{"slice_urn"} = $slice->urn();
}
my $cmurl = $authority->url();
$cmurl =~ s/protogeni/protogeni\/stoller/ if ($usemydevtree);
return Genixmlrpc::CallMethod($cmurl, $context, $method, $args);
my $response;
my $tries = 5;
while ($tries) {
$response = Genixmlrpc::CallMethod($cmurl, $context, $method, $args);
if (!defined($response) || $response->code() != GENIRESPONSE_SUCCESS) {
if (defined($response) &&
($response->code() == GENIRESPONSE_SERVER_UNAVAILABLE ||
$response->code() == GENIRESPONSE_BUSY) &&
$tries >= 0) {
print STDERR "Server for $urn reports too busy or slice busy, ".
"waiting a while ...\n";
sleep(int(rand(20)) + 10);
$tries--;
next;
}
$$perrmsg = $response->output()
if (defined($response));
return $response;
}
last;
}
return $response;
}
#
......@@ -1291,7 +1490,7 @@ sub ImageInfo($$)
my $cmurl = $authority->url();
$cmurl =~ s/protogeni/protogeni\/stoller/ if ($usemydevtree);
return Genixmlrpc::CallMethod($cmurl, $context, "SliverStatus", $args);
return Genixmlrpc::CallMethod($cmurl, $context, "ImageInfo", $args);
}
# _Always_ make sure that this 1 is at the end of the file...
......
......@@ -947,9 +947,9 @@ sub CheckDatasets($$$)
#
# Set the component_manager_urn for the sites.
#
sub SetSites($$$)
sub SetSites($$$$)
{
my ($prspecstr, $sitemap, $perrmsg) = @_;
my ($prspecstr, $sitemap, $pneedstitcher, $perrmsg) = @_;
my $rspec = GeniXML::Parse($$prspecstr);
if (! defined($rspec)) {
......@@ -961,17 +961,60 @@ sub SetSites($$$)
my $site_id = GeniXML::GetJacksSiteId($ref);
if (!defined($site_id)) {
$$perrmsg = "No site ID for $client_id";
$$perrmsg = "No site ID for node $client_id";
return -1;
}
my $site_mid = "site:" . $site_id;
if (!exists($sitemap->{$site_mid})) {
$$perrmsg = "No site mapping for $client_id ($site_id)";
$$perrmsg = "No site mapping for node $client_id ($site_id)";
return -1;
}
GeniXML::SetManagerId($ref, $sitemap->{$site_mid});
GeniXML::SetJacksSiteManagerId($ref, $sitemap->{$site_mid});
}
foreach my $ref (GeniXML::FindNodes("n:link", $rspec)->get_nodelist()) {
my %linksites = ();
my $client_id = GetVirtualId($ref);
foreach my $siteref (GeniXML::FindNodesNS("n:site", $ref,
$GeniXML::JACKS_NS)->get_nodelist()) {
my $site_id = GeniXML::GetText("id", $siteref);
if (!defined($site_id)) {
$$perrmsg = "No site ID for link $client_id";
return -1;
}
my $site_mid = "site:" . $site_id;
if (!exists($sitemap->{$site_mid})) {
$$perrmsg = "No site mapping for link $client_id ($site_id)";
return -1;
}
GeniXML::AddManagerToLink($ref, $sitemap->{$site_mid});
$linksites{$sitemap->{$site_mid}} = 1;
}
# if more then one site for a link, must use the stitcher.
$$pneedstitcher = 1
if (keys(%linksites) > 1);
}
$$prspecstr = GeniXML::Serialize($rspec);
return 0;
}
#
# Set the component_manager_urn for the rspec
#
sub BindRspec($$$)
{
my ($prspecstr, $aggregate_urn, $perrmsg) = @_;
my $rspec = GeniXML::Parse($$prspecstr);
if (! defined($rspec)) {
$$perrmsg = "Could not parse rspec\n";
return -1;
}
foreach my $ref (GeniXML::FindNodes("n:node", $rspec)->get_nodelist()) {
GeniXML::SetManagerId($ref, $aggregate_urn);
}
$$prspecstr = GeniXML::Serialize($rspec);
return 0;
}
......
......@@ -31,7 +31,7 @@ use strict;
use English;
use Getopt::Long;
use XML::Simple;
use File::Temp qw(tempfile :POSIX );
use File::Temp qw(tempfile tmpnam :POSIX);
use Data::Dumper;
use Cwd qw(realpath);
......@@ -40,18 +40,18 @@ use Cwd qw(realpath);
#
sub usage()
{
print "Usage: quickvm [-u uuid] [--site site1=aggregate ...] <xmlfile>\n";
print "Usage: quickvm [-u uuid] [--site site:1=aggregate ...] <xmlfile>\n";
exit(1);
}
my @optlist = ('d', 'v', 'u=s', 't=s', 'a=s', 'f');
my @optlist = ('d', 'v', 'u=s', 't=s', 'a=s', 'S');
my $debug = 0;
my $verbose = 1;
my $DEFAULT_URN = "urn:publicid:IDN+apt.emulab.net+authority+cm";
my $xmlfile;
my $webtask;
my $webtask_id;
my $foreground = 0;
my $localuser = 0;
my $usestitcher= 0;
my $quickuuid;
my $default_aggregate_urn = $DEFAULT_URN;
my $this_user;
......@@ -70,6 +70,8 @@ sub UserError($);
sub SnapShot($$$);
sub GenCredentials($$$$);
sub CreateDatasetCreds($$$$$);
sub CreateSlivers();
sub RunStitcher();
#
# Configure variables
......@@ -86,6 +88,7 @@ my $SSHKEYGEN = "/usr/bin/ssh-keygen";
my $SSHSETUP = "$TB/sbin/aptssh-setup";
my $ADDPUBKEY = "$TB/sbin/addpubkey";
my $UPDATEGENIUSER= "$TB/sbin/protogeni/updategeniuser";
my $STITCHER = "/usr/testbed/gcf/src/stitcher.py";
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin';
......@@ -139,8 +142,8 @@ if (defined($options{"d"})) {
if (defined($options{"v"})) {
$verbose = 1;
}
if (defined($options{"f"})) {
$foreground = 1;
if (defined($options{"S"})) {
$usestitcher = 1;
}
if (defined($options{"t"})) {
$webtask_id = $options{"t"};
......@@ -319,10 +322,18 @@ else {
#
# Update rspec with site aggregate urns.
#
if (keys(%{$sitemap}) > 1) {
if (APT_Profile::SetSites(\$rspecstr, $sitemap, \$errmsg)) {
if (keys(%{$sitemap})) {
# SetSites will tell us if we must use stitcher.
my $needstitcher = 0;
if (APT_Profile::SetSites(\$rspecstr, $sitemap, \$needstitcher, \$errmsg)) {
fatal($errmsg);
}
# but do not override command line force.
$usestitcher = 1 if ($needstitcher);
}
elsif (APT_Profile::BindRspec(\$rspecstr, $default_aggregate_urn, \$errmsg)) {
fatal($errmsg);
}
if (keys(%{$sitemap})) {
foreach my $siteid (keys(%{$sitemap})) {
......@@ -720,126 +731,20 @@ if (!$debug) {
# Bind the process id.
$webtask->SetProcessID($PID);
#
# Create a sliver at a single aggregate. This is called from parrun
# so it needs to return success or failure, we lookup the results
# in the DB.
#
sub CreateSliver($)
{
my ($ref) = @_;
my $aggobj = $ref;
$aggobj->Refresh();
my $webtask = $aggobj->webtask();
my $authority = $aggobj->_authority();
my $cmurl = $authority->url();
my $urn = $authority->urn();
$webtask->Refresh();
# Debugging
$cmurl =~ s/protogeni/protogeni\/stoller/ if ($usemydevtree);
#
# This creates the sliver and starts it. We have to watch for the
# server being too busy.
#
my $tries = 15;
my $response;
while (1) {
$response =
Genixmlrpc::CallMethod($cmurl, undef,
"CreateSliver",
{ "slice_urn" => $slice_urn,
"rspec" => $rspecstr,
"keys" =>
[{'urn' => $user_urn,
'login' => $user_uid,
'keys' => \@sshkeys }],
"credentials" =>
[$slice_credential->asString(),
$speaksfor_credential->asString(),
@dataset_credentials
]});
if (!defined($response) || $response->code() != GENIRESPONSE_SUCCESS) {
if (defined($response) &&
$response->code() == GENIRESPONSE_SERVER_UNAVAILABLE &&
$tries >= 0) {
print STDERR "Server for $urn reports too busy, ".
"waiting a while ...\n";
sleep(int(rand(20)) + 10);
$tries--;
next;
}
if (defined($response)) {
$webtask->output($response->output());
$webtask->Exited($response->code());
}
else {
$webtask->Exited(1);
}
$aggobj->SetStatus("failed");
if (defined($response) && defined($response->logurl())) {
$aggobj->SetPublicURL($response->logurl());
}
print STDERR "CreateSliver failed on $urn: ".
(defined($response) ? $response->output() : "") . "\n";
return -1;
}
last;
}
# This will get overwritten later.
if (defined($response) && defined($response->logurl())) {
$aggobj->SetPublicURL($response->logurl());
}
my $manifest = $response->value()->[1];
if (!defined($manifest)) {
$webtask->Exited(1);
$aggobj->SetStatus("failed");
print STDERR "CreateSliver $urn: No manifest returned\n";
return -1;
}
$aggobj->SetStatus("provisioned");
$aggobj->SetManifest($manifest);
return 0;
}
#
# Finally, do it.
#
my @return_codes = ();
if (ParRun({"maxwaittime" => 99999, "maxchildren" => scalar(@aggregate_list)},
\@return_codes, \&CreateSliver, @aggregate_list)) {
#
# The parent caught a signal. Leave things intact so that we can
# kill things cleanly later.
#
$slice->UnLock();
$instance->SetStatus("failed");
$webtask->Exited(1);
exit(-1);
}
#
# Check the exit codes; any failure is a total failure (for now).
#
foreach my $aggobj (@aggregate_list) {
#
# Have to refresh the sliver objects since they were updated in a fork.
# Need the manifests for the call to ComputeNodeCounts below.
#
$aggobj->Refresh();
my $code = shift(@return_codes);
if ($code) {
if ($usestitcher) {
my $rval = RunStitcher();
if ($rval) {
$slice->UnLock();
$instance->SetStatus("failed");
$webtask->output($aggobj->webtask()->output())
if (defined($aggobj->webtask()->output()));
$webtask->Exited(1);
exit(1);
exit($rval);
}
}
else {
my $rval = CreateSlivers();
exit($rval)
if ($rval);
}
$instance->SetStatus("provisioned");
$instance->ComputeNodeCounts();
......@@ -954,7 +859,7 @@ sub WaitForSliver($)
#
# Okay, fire off the waits for each aggregate
#
@return_codes = ();
my @return_codes = ();
if (ParRun({"maxwaittime" => 99999, "maxchildren" => scalar(@aggregate_list)},
\@return_codes, \&WaitForSliver, @aggregate_list)) {
#
......@@ -989,7 +894,6 @@ foreach my $aggobj (@aggregate_list) {
$webtask->output($aggobj->webtask()->output())
if (defined($aggobj->webtask()->output()));
}
$count++;
}
$slice->UnLock();
......@@ -1021,6 +925,10 @@ sub CreateDatasetCreds($$$$$)
$ref,
$GeniXML::EMULAB_NS)->get_nodelist()) {
my $leaseurn = GeniXML::GetText("persistent", $blockref);
if (!defined($leaseurn)) {
# persistent is deprecated.
$leaseurn = GeniXML::GetText("dataset", $blockref);
}
#
# We only care about datasets here, we let the backend
......@@ -1065,6 +973,331 @@ sub CreateDatasetCreds($$$$$)
return 0;
}
#
# Create a sliver at a single aggregate. This is called from parrun
# so it needs to return success or failure, we lookup the results
# in the DB.
#
sub CreateSliver($)
{
my ($ref) = @_;
my $aggobj = $ref;
$aggobj->Refresh();
my $webtask = $aggobj->webtask();
my $authority = $aggobj->_authority();
my $cmurl = $authority->url();
my $urn = $authority->urn();
$webtask->Refresh();
# Debugging
$cmurl =~ s/protogeni/protogeni\/stoller/ if ($usemydevtree);
#
# This creates the sliver and starts it. We have to watch for the
# server being too busy.
#
my $tries = 15;