Commit e59fc714 authored by Leigh B Stoller's avatar Leigh B Stoller

Fix a bug that was introduced when we shifted to using os_setup

directly (on the Cloudlab clusters); we were losing a lock out that
allowed DeleteSliver() to run while in the middle of a CreateSliver().
This was resulting in a lot of email about node failures since the nodes
were getting yanked out from underneath the CreateSliver(). From the
user perspective, this did not matter much, since they wanted the slice
gone, but it finally bothered me enough to look more closely.
parent adefb6f5
......@@ -1296,19 +1296,26 @@ sub ActionStart($$;$)
}
$sliver = undef;
#
# os_setup requires the expstate to be set appropriately, which we
# generally do not do on the geni path.
#
$experiment->SetState(EXPTSTATE_ACTIVATING());
#
# So we are going to fork and let os_setup proceed.
#
my $childpid = main::WrapperFork();
if ($childpid) {
#
# This indicates that while not locked, we are still busy.
# KillMonitor operates using the cancel flag, so we should
# be fine.
#
$slice->SetMonitorPid($childpid);
print STDERR "Monitor PID $childpid\n";
return 0;
}
#
# os_setup requires the expstate to be set appropriately, which we
# generally do not do on the geni path.
#
$experiment->SetState(EXPTSTATE_ACTIVATING());
#
# We want to let snmpit run in parallel with os_setup, like the
# classic path does.
......@@ -1379,7 +1386,6 @@ sub ActionStart($$;$)
$rval = system("$OSSETUP $pid $eid @nodes");
print STDERR "os_setup exited with status $rval\n";
}
$experiment->SetState($expstate);
#
# See what nodes succeeded or failed. We want to hold off setting
......@@ -1544,6 +1550,8 @@ sub ActionStart($$;$)
# ready or failed state until we are fully done here.
$self->SetStatus("mixed");
$self->ComputeState();
$experiment->SetState($expstate);
$slice->ClearMonitorPid();
return 0;
bad:
......@@ -1562,6 +1570,8 @@ sub ActionStart($$;$)
$self->SetStatus("mixed");
$self->ComputeState();
}
$experiment->SetState($expstate);
$slice->ClearMonitorPid();
return -1;
}
......
......@@ -7938,14 +7938,15 @@ sub KillMonitor($)
#
# Wait for the monitor to go away, but should not take long.
#
my $count = 6;
my $count = 10;
while ($count > 0) {
sleep(5);
sleep(10);
# Go to the DB.
my $monitor_pid = $slice->GetMonitorPid();
last
if (! $monitor_pid);
$count--;
print STDERR "Waiting for monitor to die ...\n";
}
if ($slice->GetMonitorPid()) {
print STDERR "Monitor process $pid would not die!\n";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment