Commit 937d633f authored by Leigh Stoller's avatar Leigh Stoller

CM changes to termination and panic.

* For termination, added a flag to the slice that marks it for
  termination. DeleteSlice sets this if the slice is
  "busy" (start,restart,reload) and the "cancel" flag is provided.  We
  also mark the underlying experiment as canceled, to stop ossetup and
  osload early. We then wait for "busy" to clear, and then kill the
  slice. The new termination flag in the slice is used by the expire
  daemon, in case our wait loop dies early (say, boss reboot), we can
  catch the the termination there instead.

* Panic gets essentially the same changes as termination, except of
  course instead of terminating, we put the underlying experiment into
  panic mode.

Note that we still need to be able to lock the slice to do either
termination or panic, and so the caller has to be able to deal with
retrying if they get back a busy response. In general, we do not lock
slices for very long except during reload and restart, I am still
working on dropping the lock during those (like we already do for
Start). Disk imaging is another place we currently need to wait for,
that needs to be worked on as well.
parent cf6dd622
......@@ -6066,6 +6066,12 @@ sub RenewSliverAux($$$$)
$message = "Slice has been shutdown";
goto bad;
}
# Ditto termination pending.
if ($slice->termination_pending()) {
$message = "Slice is marked for termination";
goto bad;
}
#
# We do not save renew logs, so add a metadata tag with the
# expiration so we can see what the user actually tried to do.
......
This diff is collapsed.
......@@ -336,6 +336,7 @@ sub async_code($) { return field($_[0], "async_code"); }
sub async_output($) { return field($_[0], "async_output"); }
sub portal_tag($) { return field($_[0], "portal_tag"); }
sub portal_url($) { return field($_[0], "portal_url"); }
sub termination_pending($) { return field($_[0], "termination_pending"); }
sub cert($) { return $_[0]->{'CERT'}->cert(); }
sub GetCertificate($) { return $_[0]->{'CERT'}; }
sub LOCKED($) { return $_[0]->{'LOCKED'}; }
......@@ -1218,6 +1219,21 @@ sub SetIdleIgnore($$)
}
}
#
# Mark slice for termination.
#
sub MarkForTermination($)
{
my ($self) = @_;
my $uuid = $self->uuid();
DBQueryWarn("update geni_slices set termination_pending=now() ".
"where uuid='$uuid'")
or return -1;
return 0;
}
#
# Set the shutdown field.
#
......
......@@ -550,6 +550,51 @@ sub ExpireSlices()
}
}
#
# Terminate slices that are pending, see GeniCMV2::DeleteSlice().
#
sub TerminateSlices()
{
my $query_result =
GeniDB::DBQueryWarn("select idx from geni_slices ".
"where termination_pending is not null and ".
" shutdown is null and isshutdown=0");
while (my ($idx) = $query_result->fetchrow_array()) {
my $slice = GeniSlice->Lookup($idx);
my $aggregate;
if (!defined($slice)) {
# Slice is gone, lets not worry.
next;
}
if ($slice->Lock() != 0) {
print STDERR "Could not lock slice $slice.\n";
goto skip;
}
$aggregate = GeniAggregate->SliceAggregate($slice);
# This would be unusual, so call it quits.
if (!defined($aggregate)) {
print STDERR "No aggregate for slice, skipping\n";
$slice->UnLock();
goto skip;
}
if (!$aggregate->Busy()) {
if (GeniCM::CleanupDeadSlice($slice, 1) != 0) {
print STDERR "Could not terminate $slice\n";
SENDMAIL($TBOPS, "Could not terminate slice",
"Could not terminate slice $slice");
}
}
$slice->UnLock();
skip:
$slice->Flush()
if (defined($slice));
$aggregate->Flush()
if (defined($aggregate));
}
}
#
# Look for slices that need to be shutdown
#
......@@ -1756,6 +1801,7 @@ while (1) {
ExpireTickets();
WarnSlices();
ExpireSlices();
TerminateSlices();
#
# Run the idle check periodically.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment