Commit 57646ea5 authored by Leigh B Stoller's avatar Leigh B Stoller

Do not allow paniced experiments to be terminated.

Add a CheckMonitor() method to see if the monitor is running, and if not
clear the DB. This is one cause of slices not getting terminated when Utah
Cloudlab crashes.
parent b26f569f
......@@ -6026,6 +6026,16 @@ sub CleanupDeadSlice($;$)
my $experiment = $slice->GetExperiment();
if (defined($experiment)) {
#
# Do not allow a paniced slice to be terminated until the panic
# is cleared.
#
if ($experiment->state() eq EXPTSTATE_PANICED() ||
# Need this cause we are not locking. We need to lock.
$experiment->paniced()) {
print STDERR "Refusing to terminate a paniced experiment\n";
return -1;
}
my $pid = $experiment->pid();
my $eid = $experiment->eid();
......@@ -6712,6 +6722,27 @@ sub ReserveLocalVlanTag($$$)
return [$lanid, $tag];
}
#
# Check the monitor process, clear it if it died.
#
sub CheckMonitor($)
{
my ($slice) = @_;
my $pid = $slice->monitor_pid();
#
# See if the process still exists.
#
if (kill(0, $pid) == 0) {
if ($!{ESRCH}) {
print STDERR "Monitor process $pid no longer exists.\n";
$slice->ClearMonitorPid();
return 0;
}
}
return 0;
}
#
# Kill the monitor process.
#
......
......@@ -613,13 +613,16 @@ sub DeleteSliver($)
"Credential does not match the URN");
}
if ($slice->Lock() != 0) {
return GeniResponse->BusyResponse();
}
# If a monitor process is running, we are "busy".
GeniCM::CheckMonitor($slice);
if ($slice->monitor_pid()) {
$slice->UnLock();
return GeniResponse->MonitorResponse();
}
if ($slice->Lock() != 0) {
return GeniResponse->BusyResponse();
}
# If any slivers are imaging, then we are busy as well.
if ($aggregate->CheckSliverStates("imaging")) {
$slice->UnLock();
......@@ -758,6 +761,7 @@ sub DeleteSlice($)
# directly, which *does* kill the monitor, so admin cleanup
# is not affected.
#
GeniCM::CheckMonitor($slice);
if ($slice->monitor_pid()) {
$slice->UnLock();
return GeniResponse->MonitorResponse();
......@@ -943,11 +947,6 @@ sub SliverAction($$$$$)
}
main::AddLogfileMetaDataFromSlice($slice);
# If a monitor process is running, we are "busy".
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
if (defined($slice_urn)) {
if (! GeniHRN::IsValid($slice_urn)) {
return
......@@ -961,6 +960,12 @@ sub SliverAction($$$$$)
if ($slice->Lock() != 0) {
return GeniResponse->BusyResponse();
}
# If a monitor process is running, we are "busy".
GeniCM::CheckMonitor($slice);
if ($slice->monitor_pid()) {
$slice->UnLock();
return GeniResponse->MonitorResponse();
}
# Shutdown slices get nothing.
if ($slice->shutdown()) {
$slice->UnLock();
......@@ -1392,6 +1397,7 @@ sub GetTicket($)
}
# If a monitor process is running, we are "busy".
GeniCM::CheckMonitor($slice);
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
......@@ -1508,6 +1514,7 @@ sub UpdateTicket($)
main::AddLogfileMetaDataFromSlice($slice);
# If a monitor process is running, we are "busy".
GeniCM::CheckMonitor($slice);
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
......@@ -1604,6 +1611,7 @@ sub UpdateSliver($)
main::AddLogfileMetaDataFromSlice($slice);
# If a monitor process is running, we are "busy".
GeniCM::CheckMonitor($slice);
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
......@@ -1695,6 +1703,7 @@ sub RedeemTicket($)
main::AddLogfileMetaDataFromSlice($slice);
# If a monitor process is running, we are "busy".
GeniCM::CheckMonitor($slice);
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
......@@ -2490,6 +2499,7 @@ sub CreateImage($)
main::AddLogfileMetaDataFromSlice($slice);
# If a monitor process is running, we are "busy".
GeniCM::CheckMonitor($slice);
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
......@@ -3123,11 +3133,6 @@ sub ShareLanAux($$)
}
main::AddLogfileMetaDataFromSlice($slice);
# If a monitor process is running, we are "busy".
if ($slice->monitor_pid()) {
return GeniResponse->MonitorResponse();
}
if ($slice_urn ne $slice->urn()) {
return GeniResponse->Create(GENIRESPONSE_FORBIDDEN(), undef,
"Credential does not match the URN");
......@@ -3138,6 +3143,12 @@ sub ShareLanAux($$)
if ($slice->Lock() != 0) {
return GeniResponse->BusyResponse();
}
# If a monitor process is running, we are "busy".
GeniCM::CheckMonitor($slice);
if ($slice->monitor_pid()) {
$slice->UnLock();
return GeniResponse->MonitorResponse();
}
my $experiment = $slice->GetExperiment();
if (!defined($experiment)) {
$slice->UnLock();
......@@ -3949,9 +3960,8 @@ sub Lockdown($)
# Only the SA for the slice can do this.
#
my $caller = GeniHRN->new($credential->owner_urn());
my ($tmp) = split(":", $slice->urnOBJ()->domain());
if (! ($caller->IsSA() &&
$caller->domain() eq $tmp)) {
$caller->domain() eq $slice->urnOBJ()->domain())) {
return GeniResponse->Create(GENIRESPONSE_FORBIDDEN, undef,
"Not enough permission to set/clr lockdown");
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment