Commit 0199f565 authored by Leigh B Stoller's avatar Leigh B Stoller

Some tweaks to DeleteSlice() and cancelation while still setting up,

hopefully fixes the race.
parent b6f5a1c2
...@@ -1579,6 +1579,8 @@ sub ActionStart($$;$) ...@@ -1579,6 +1579,8 @@ sub ActionStart($$;$)
$self->ComputeState(); $self->ComputeState();
$experiment->SetState($expstate); $experiment->SetState($expstate);
$slice->ClearMonitorPid(); $slice->ClearMonitorPid();
# in case we were canceled by DeleteSlice()
$experiment->SetCancelFlag(0);
return 0; return 0;
bad: bad:
...@@ -1599,6 +1601,8 @@ sub ActionStart($$;$) ...@@ -1599,6 +1601,8 @@ sub ActionStart($$;$)
} }
$experiment->SetState($expstate); $experiment->SetState($expstate);
$slice->ClearMonitorPid(); $slice->ClearMonitorPid();
# in case we were canceled by DeleteSlice()
$experiment->SetCancelFlag(0);
return -1; return -1;
} }
......
...@@ -904,6 +904,7 @@ sub DeleteSlice($) ...@@ -904,6 +904,7 @@ sub DeleteSlice($)
if ($slice->Lock() != 0) { if ($slice->Lock() != 0) {
return GeniResponse->BusyResponse(); return GeniResponse->BusyResponse();
} }
my $slice_experiment = $slice->GetExperiment();
# #
# If a monitor process is running, then the slice is busy. # If a monitor process is running, then the slice is busy.
...@@ -928,7 +929,6 @@ sub DeleteSlice($) ...@@ -928,7 +929,6 @@ sub DeleteSlice($)
# rebooting timed out nodes, and quit earlier. The caller will not # rebooting timed out nodes, and quit earlier. The caller will not
# have to retry as long. # have to retry as long.
# #
my $slice_experiment = $slice->GetExperiment();
if (defined($slice_experiment)) { if (defined($slice_experiment)) {
$slice_experiment->SetCancelFlag(1); $slice_experiment->SetCancelFlag(1);
} }
...@@ -953,7 +953,8 @@ sub DeleteSlice($) ...@@ -953,7 +953,8 @@ sub DeleteSlice($)
} }
} }
# #
# If we were canceled, we wait for the monitor to stop, instead of # If we were canceled, we wait for the monitor to stop before
# we can kill it.
# #
if ($canceled) { if ($canceled) {
while ($slice->GetMonitorPid()) { while ($slice->GetMonitorPid()) {
...@@ -961,7 +962,11 @@ sub DeleteSlice($) ...@@ -961,7 +962,11 @@ sub DeleteSlice($)
GeniCM::CheckMonitor($slice); GeniCM::CheckMonitor($slice);
print STDERR "Checking to see if monitor has stopped ...\n"; print STDERR "Checking to see if monitor has stopped ...\n";
} }
#
# The monitor has stopped and we have the lock. Clear the
# cancel flag so we can actually terminate (checked in endexp).
#
$slice_experiment->SetCancelFlag(1);
} }
my $retval = GeniCM::CleanupDeadSlice($slice, 1); my $retval = GeniCM::CleanupDeadSlice($slice, 1);
if ($retval) { if ($retval) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment