Commit 0c749af4 authored by Leigh B Stoller's avatar Leigh B Stoller

Add a "monitor" process to start/restart sliver to watch nodes.

This is very similar to what Emulab does on the swapin path for
normal experiments; wait and watch the nodes to see which ones
fail or otherwise timeout. Up till now, we did not do this on the
PG path, and so failed nodes were never signaled, and the slice
was left in a changing state forever. This also allows us to capture
the node bootlogs and convert them to logfiles that we can associate
with the slice on the showslice web page.

Details: start/restart forks a child (WrapperFork()) and allows
the parent to return to the client. The slice is unlocked so that
the client can call SliverStatus(), etc. But the client cannot
do anything that actually changes the sliver (update, stop, etc)
until the monitor finishes (or times out on its own). The lone
exception is Deleteslice(), which will asynchronously kill the
monitor and then terminate the slice. Ditto the command line
script "cleanupslice".

We will probably need to add another way to allow the client to
terminate the monitor early, but have not decided where yet.
parent 5953f4fb
......@@ -914,6 +914,11 @@ sub Update
my ($slice, $aggregate) = GeniCMV2::Credential2SliceAggregate($cred);
if (defined($slice)) {
main::AddLogfileMetaDataFromSlice($slice);
# If a monitor process is running, we are "busy".
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
}
# The URN list must be either empty or contain the slice URN.
......
......@@ -52,6 +52,9 @@ use GeniComponent;
use GeniHRN;
use GeniXML;
use emutil;
use EmulabConstants;
use Logfile;
use libtestbed;
use Data::Dumper;
use English;
use overload ('""' => 'Stringify');
......@@ -850,6 +853,10 @@ sub Start($$$)
$msg .= "$node no longer belongs to $self";
goto bad;
}
$node->_reloaded(0);
# Backpointer used in WaitForNodes().
$node->_sliver($sliver);
if ($reservation->SameExperiment($experiment)) {
my $vnode;
......@@ -872,6 +879,7 @@ sub Start($$$)
}
# See below.
$vnode = $node;
$vnode->_parent(undef);
#
# We now allow the user to specify the OS for vnodes.
......@@ -897,6 +905,7 @@ sub Start($$$)
push(@{ $reloads{$image->imageid()} }, $vnode);
$imageinfo{$vnode->node_id()} = [$osinfo->osid(),
$image->imageid()];
$vnode->_reloaded(1);
}
# No more to do.
......@@ -913,15 +922,18 @@ sub Start($$$)
# But, have to make sure that the phys node gets setup.
#
my $physnodeid = $node->phys_nodeid();
next
if (exists($poweron{$physnodeid}) ||
exists($reboots{$physnodeid}) ||
exists($reloads{$physnodeid}));
$node = Node->Lookup($physnodeid);
if (!defined($node)) {
$msg .= "Could not lookup $physnodeid";
goto bad;
}
$node->_reloaded(0);
# Signal that vnode depends on parent.
$vnode->_parent($node);
next
if (exists($poweron{$physnodeid}) ||
exists($reboots{$physnodeid}) ||
exists($reloads{$physnodeid}));
}
#
# If the node is not imageable, then there is not much to
......@@ -978,6 +990,7 @@ sub Start($$$)
$reloads{$image->imageid()} = [ ];
}
push(@{ $reloads{$image->imageid()} }, $node);
$node->_reloaded(1);
# As with os_setup, we do not count images unless
# they are actually reloaded. I have no idea why.
......@@ -1045,6 +1058,14 @@ sub Start($$$)
# See "bad" label below.
$sliver = undef;
my @waitvnodes = values(%vnodes);
my @waitpnodes = (values(%poweron), values(%reboots));
# Want to make sure we see fresh logs (and do not store the same log).
foreach my $node (@waitpnodes, @waitpnodes) {
$node->ClearBootLog();
}
#
# Cull out vnodes that are going to get rebooted cause the
......@@ -1285,6 +1306,9 @@ sub Start($$$)
$sliver->SetState("started")
if (ref($sliver) eq "GeniSliver::Node");
}
$self->WaitForNodes(@waitpnodes, @waitvnodes);
return 0;
bad:
......@@ -1299,6 +1323,239 @@ sub Start($$$)
return -1;
}
#
# Wait for nodes
#
sub WaitForNodes($@)
{
my ($self, @nodes) = @_;
my %nodes = ();
my @waitstates = (TBDB_NODESTATE_TBFAILED, TBDB_NODESTATE_ISUP);
return 0
if (!@nodes);
my $slice = $self->GetSlice();
if (!defined($slice)) {
print STDERR "WaitForNodes: Could not map $self to its slice\n";
return -1;
}
my $experiment = Experiment->Lookup($self->slice_uuid());
if (!defined($experiment)) {
print STDERR "Could not map $self to its experiment\n";
return -1;
}
my $group = $experiment->GetGroup();
if (!defined($group)) {
print STDERR "Could not map $self to its experiment group\n";
return -1;
}
my $creator = $self->GetCreator();
if (!defined($creator)) {
print STDERR "Could not map $self to its creator\n";
return -1;
}
#
# At this point we want to return and let the startsliver proceed
# in the background.
#
my $mypid = main::WrapperFork();
if ($mypid) {
return 0;
}
$slice->SetMonitorPid($PID);
#
# This is essentially what libossetup (os_setup) does. I want to
# eventually use that code directly, but that will require some
# restructuring in that code.
#
my %childcounts = ();
# Array from the list.
foreach my $node (@nodes) {
$nodes{$node->node_id()} = $node;
$node->_waitstart(time());
$node->_waitend(undef);
$node->Refresh();
#
# Count up number of virtnodes on each physnode.
#
if ($node->isvirtnode()) {
if (!exists($childcounts{$node->phys_nodeid()})) {
$childcounts{$node->phys_nodeid()} = 0;
}
$childcounts{$node->phys_nodeid()} += 1;
}
}
# Set the waitmax time for each node.
foreach my $node (@nodes) {
$node->_maxwait(500 + ($node->_reloaded() ? 300 : 0));
#
# Bump waitime according to number of virtnodes on each physnode.
#
if ($node->isvirtnode()) {
$node->_maxwait($node->_maxwait() +
($childcounts{$node->phys_nodeid()} * 60))
}
}
#
# Start a counter going, relative to the time we rebooted the first
# node.
#
my $waittime = 0;
my $minutes = 0;
my $canceled = $experiment->canceled();
#
# Wait for the nodes to finish booting, as recorded in database.
#
while (keys(%nodes)) {
#
# Check for cancelation. We quit the monitor.
#
$canceled = $experiment->canceled();
if ($canceled) {
print STDERR "WaitForNodes canceled; terminating early!\n";
# Reset before return; do not want it left.
$slice->LockTables();
$experiment->SetCancelFlag(0);
$slice->ClearMonitorPid();
$slice->UnLockTables();
return -1;
}
#
# We want to do this in order the nodes were passed in, so do not
# use the array for the list.
#
foreach my $node (@nodes) {
my $node_id = $node->node_id();
# Already done?
next
if (!exists($nodes{$node_id}));
#
# If this is a virtnode, check to see if the parent node
# failed to boot. No point in going on. Also reset the
# start time to the time that the parent came ready.
# No parent if its a shared node, and the phys node will
# already be ready anyway.
#
if ($node->isvirtnode() && defined($node->_parent())) {
my $parent = $node->_parent();
# Skip if still waiting on the parent.
next
if (!defined($parent->_waitend()));
if ($parent->_sliver()->status() eq "failed") {
$node->_sliver()->SetStatus("failed");
$node->_waitend(time());
delete($nodes{$node_id});
next;
}
$node->_waitstart($parent->_waitend());
}
my $state;
if ($node->GetEventState(\$state)) {
print STDERR "*** Error getting event state for $node_id.\n";
$node->_sliver()->SetStatus("failed");
$node->_waitend(time());
delete($nodes{$node_id});
next;
}
if (grep {$_ eq $state} @waitstates) {
print "$node_id has reported state $state\n";
$node->_sliver()->ComputeStatus();
$node->_waitend(time());
delete($nodes{$node_id});
next;
}
$waittime = time() - $node->_waitstart();
if ($waittime > $node->_maxwait()) {
$minutes = int($waittime / 60);
print STDERR "*** Giving up on $node_id ($state) - ".
"it's been $minutes minute(s).\n";
$node->_sliver()->SetStatus("failed");
$node->_waitend(time());
delete($nodes{$node_id});
next;
}
if (int($waittime / 60) > $minutes) {
# Changing minutes is why we get this print for just
# a single node each time.
$minutes = int($waittime / 60);
print STDERR "Still waiting for $node_id ($state) - ".
"it's been $minutes minute(s).\n";
}
}
sleep(5);
}
#
# Go through nodes and see what failed.
#
my @failed = ();
foreach my $node (@nodes) {
my $node_id = $node->node_id();
push(@failed, $node)
if ($node->_sliver()->status() eq "failed");
#
# Create a logfile from the boot log.
#
if (grep {$_ eq $node->eventstate()} @waitstates) {
my $bootlog;
if ($node->GetBootLog(\$bootlog) == 0 && $bootlog ne "") {
my $logfile = Logfile->CreateFromString($group, $bootlog);
if (defined($logfile)) {
$logfile->SetMetadata([["bootlog" , $node->node_id()],
["Method", "reboot $node_id"],
["slice_idx" , $slice->idx()],
["slice_urn" , $slice->urn()],
["slice_uuid", $slice->uuid()]], 1);
}
}
}
}
#
# Notify.
#
if (@failed) {
my $name = $creator->name();
my $email = $creator->email();
my $count = scalar(@failed);
my $urn = $slice->urn();
SENDMAIL("$name <$email>", "$count nodes failed to boot",
"Nodes:\n".
" " . join(" ", @nodes) . "\n".
"in $urn failed.\n\n",
$TBOPS, "Cc: $TBOPS");
}
# Too late, but reset before return; do not want it left set.
$slice->LockTables();
if ($experiment->canceled()) {
$experiment->SetCancelFlag(0);
}
# Do this last.
$slice->ClearMonitorPid();
$slice->UnLockTables();
return 0;
}
#
# Stop all the slivers in the aggregate. Stop is brutal, better to
# use restart!
......
......@@ -81,6 +81,8 @@ my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
my $TBAPPROVAL = "@TBAPPROVALEMAIL@";
my $TBAUDIT = "@TBAUDITEMAIL@";
my $TBBASE = "@TBBASE@";
my $TBDOCBASE = "@TBDOCBASE@";
my $BOSSNODE = "@BOSSNODE@";
my $OURDOMAIN = "@OURDOMAIN@";
my $MAINSITE = @TBMAINSITE@;
......@@ -5458,6 +5460,19 @@ sub CleanupDeadSlice($;$)
return -1;
}
#
# If a monitor process is running, then cancel it so that
# we do not leave it behind on a slice/experiment that is
# now gone.
#
if ($slice->monitor_pid()) {
my $response = KillMonitor($slice);
if (GeniResponse::IsResponse($response)) {
print STDERR "CleanupDeadSlice: Could not kill monitor process\n";
return -1;
}
}
# print "Cleaning up dead slice $slice\n";
my $slice_uuid = $slice->uuid();
my $experiment = $slice->GetExperiment();
......@@ -6480,5 +6495,68 @@ sub findStitchPoint
return ($edgeiface, $network, $lasthop, undef);
}
#
# Kill the monitor process.
#
sub KillMonitor($)
{
my ($slice) = @_;
my $pid = $slice->monitor_pid();
return 0
if (!$pid);
print STDERR "Monitor in progress: process id $pid ...\n";
my $experiment = $slice->GetExperiment();
if (!defined($experiment)) {
print STDERR "No experiment for $slice\n";
return GeniResponse->Create(GENIRESPONSE_ERROR, undef,
"No slice experiment");
}
if ($experiment->canceled()) {
print STDERR "Cancel flag already set for $experiment\n";
return GeniResponse->Create(GENIRESPONSE_ERROR, undef,
"Unable to stop monitor");
}
#
# Okay, before we commit to waiting for something that might
# not happen, or erroneously setting the cancel flag, lets lock
# the tables and check again, to avoid race in start/restart.
#
$slice->LockTables();
if ($slice->GetMonitorPid()) {
$experiment->SetCancelFlag(1);
$slice->UnLockTables();
#
# Wait for the monitor to go away, but should not take long.
#
my $count = 6;
while ($count > 0) {
sleep(5);
# Go to the DB.
my $monitor_pid = $slice->GetMonitorPid();
last
if (! $monitor_pid);
$count--;
}
if ($slice->GetMonitorPid()) {
print STDERR "Monitor process $pid would not die!\n";
SENDMAIL($TBOPS, "Monitor for slice would not die!",
"Monitor would not die: $slice\n\n" .
"$TBBASE/showslice.php?slice_idx=" . $slice->idx(),
$TBOPS);
print STDERR "Could not shutdown $slice!\n";
return GeniResponse->Create(GENIRESPONSE_ERROR, undef,
"Monitor would not die");
}
}
else {
$slice->UnLockTables();
}
return 0;
}
# _Always_ make sure that this 1 is at the end of the file...
1;
......@@ -76,6 +76,7 @@ my $OURDOMAIN = "@OURDOMAIN@";
my $PGENIDOMAIN = "@PROTOGENI_DOMAIN@";
my $ELABINELAB = "@ELABINELAB@";
my $TBBASE = "@TBBASE@";
my $TBDOCBASE = "@TBDOCBASE@";
my $CREATEEXPT = "$TB/bin/batchexp";
my $ENDEXPT = "$TB/bin/endexp";
my $NALLOC = "$TB/bin/nalloc";
......@@ -267,6 +268,9 @@ sub Resolve($)
$blob->{'users'} = $bindings;
}
}
$blob->{'public_url'} =
"$TBDOCBASE/showslicepub.php?publicid=" . $slice->publicid()
if (defined($slice->publicid()));
}
my $ticket = GeniTicket->SliceTicket($slice);
if (defined($ticket)) {
......@@ -522,12 +526,26 @@ sub CreateSliver($)
Experiment->FlushAll();
Node->FlushAll();
#
# The callee might also do a wrapper fork, so remember our PID
# to make sure we unlock properly in only the parent side of the
# fork. Child runs with slice unlocked for now.
#
$mypid = $PID;
if ($aggregate->Start($API_VERSION, 0) != 0) {
$slice->UnLock();
print STDERR "Could not start sliver\n";
if ($PID == $mypid) {
$slice->UnLock();
print STDERR "Could not start sliver.\n";
}
else {
print STDERR "Error waiting for nodes.\n";
}
return -1;
}
$slice->UnLock();
if ($PID == $mypid) {
$slice->UnLock();
}
return 0;
}
......@@ -551,8 +569,6 @@ sub DeleteSliver($)
return $credential
if (GeniResponse::IsResponse($credential));
main::AddLogfileMetaData("sliver_urn", $sliver_urn);
#
# In this implementation, the user must provide a slice or sliver
# credential
......@@ -565,13 +581,19 @@ sub DeleteSliver($)
return GeniResponse->Create(GENIRESPONSE_SEARCHFAILED, undef,
"Sliver does not exist");
}
main::AddLogfileMetaDataFromSlice($slice);
if ($sliver_urn ne $aggregate->urn()) {
return GeniResponse->Create(GENIRESPONSE_FORBIDDEN(), undef,
"Credential does not match the URN");
}
# If a monitor process is running, we are "busy".
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
main::AddLogfileMetaData("sliver_urn", $sliver_urn);
main::AddLogfileMetaDataFromSlice($slice);
#
# We need this below to sign the ticket.
#
......@@ -680,10 +702,25 @@ sub DeleteSlice($)
if ($slice->Lock() != 0) {
return GeniResponse->BusyResponse();
}
#
# If a monitor process is running, then cancel it so that
# we do not leave it behind on a slice/experiment that is
# now gone.
#
if ($slice->monitor_pid()) {
my $response = GeniCM::KillMonitor($slice);
if (GeniResponse::IsResponse($response)) {
$slice->UnLock();
return $response;
}
}
if (GeniCM::CleanupDeadSlice($slice, 1) != 0) {
return GeniResponse->Create(GENIRESPONSE_ERROR, undef,
"Could not cleanup slice");
}
done:
return GeniResponse->Create(GENIRESPONSE_SUCCESS);
}
......@@ -797,10 +834,6 @@ sub SliverAction($$$$$)
return $slice
if (defined($slice) && GeniResponse::IsResponse($slice));
if (defined($slice)) {
main::AddLogfileMetaDataFromSlice($slice);
}
if ( (!defined($slice)) &&
($credential->target_urn() =~ /\+authority\+cm$/)) {
# administrative credentials are presented.
......@@ -827,6 +860,13 @@ sub SliverAction($$$$$)
return GeniResponse->Create(GENIRESPONSE_BADARGS, undef,
"No slice or aggregate here");
}
main::AddLogfileMetaDataFromSlice($slice);
# If a monitor process is running, we are "busy".
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
if (defined($slice_urn)) {
if (! GeniHRN::IsValid($slice_urn)) {
return
......@@ -928,13 +968,16 @@ sub SliverAction($$$$$)
if ($mypid) {
return GeniResponse->Create(GENIRESPONSE_SUCCESS);
}
$isasync = 1;
# Remember our pid in case callee wrapper forks again.
$isasync = $PID;
}
$response = &$PerformAction($aggregate, $action);
goto bad
if (GeniResponse::IsResponse($response));
$slice->UnLock();
if (!$isasync || $isasync == $PID) {
$slice->UnLock();
}
return ($isasync ? GENIRESPONSE_SUCCESS :
GeniResponse->Create(GENIRESPONSE_SUCCESS));
}
......@@ -1151,6 +1194,18 @@ sub Shutdown($)
if ($slice->Lock() != 0) {
return GeniResponse->BusyResponse();
}
#
# If a monitor process is running, then cancel it so that
# we do not leave it behind on a slice/experiment that is
# now gone.
#
if ($slice->monitor_pid()) {
my $response = GeniCM::KillMonitor($slice);
if (GeniResponse::IsResponse($response)) {
$slice->UnLock();
return $response;
}
}
if (GeniCM::CleanupDeadSlice($slice, 0) != 0) {
libtestbed::SENDMAIL($TBOPS, "Emergency Shutdown failed",
"Emergency shutdown failed on $slice\n");
......@@ -1249,6 +1304,11 @@ sub GetTicket($)
"Credential does not match the URN");
}
# If a monitor process is running, we are "busy".
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
#
# Watch for a placeholder slice and update it.
#
......@@ -1277,7 +1337,7 @@ sub GetTicket($)
}
$slice->UnLock();
}
#
# GetTicket applies only to slices that are not active. Must
# use UpdateSliver() for an active sliver.
......@@ -1360,6 +1420,11 @@ sub UpdateTicket($)
}
main::AddLogfileMetaDataFromSlice($slice);
# If a monitor process is running, we are "busy".
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}