Commit 850b5ab7 authored by Leigh Stoller's avatar Leigh Stoller

Latest attempt to improve vnode booting. See below.

1. Change hackwaitandexit on the client, to return zero if the guest
   has not finished setting up. We used to treat 30 seconds as too
   long must have failed, but this is really not the case, especially
   on busy machines.

2. Fix up vnode_setup exit code handling, we were losing non-zero
   status cause of not shifting it down, and so failures were never
   being reported.

   New: If the vnode setup does return failure, set its event state to
   TBFAILED to cut short the wait in os_setup and the IG monitor
   process. On the surface this seems like an obviously good idea, but
   I'm sure it will come and bite me when I least expect it.

3. Change GeniAggregate Start/Restart to ignore vnode_setup failures,
   and let the monitor watch for TBFAILED or timeout. There are just
   too many ways for it to fail, and we want to allow vnodes that did
   not fail to set up normally, and give the user the choice to
   restart the ones that failed.

4. Don't let frisbee run forever, protect with timeout. I need to use
   Mike's new -T option, but not till I actually get new frisbee
   pushed out.
parent 24fed54c
#!/usr/bin/perl -wT
#
# Copyright (c) 2000-2013 University of Utah and the Flux Group.
# Copyright (c) 2000-2014 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -635,6 +635,9 @@ sub killvnode(;$)
# vnodesetup and Emulab would never hear back from it.
killvserver();
}
# We used to exit with non-zero since we considered 30 seconds of
# waiting to be failure, but that is not the case on a really busy
# host, so let the caller do the usual wait for failure or timeout.
return 0;
}
......@@ -925,8 +928,9 @@ sub removeconfdir($)
#
# Totally gross; wait for the watchdog to startup before exiting. This
# indicates the vnode is running. If nothing after minute, assume the
# vnode had a problem and exit accordingly.
# indicates the vnode is running, but also gives us a chance to look
# for setup failure from the direct child, so we can tell the caller.
# Otherwise, need to use the normal wait path (timeout or TBFAILED).
#
sub hackwaitandexit($)
{
......@@ -966,7 +970,7 @@ sub hackwaitandexit($)
}
}
}
exit(1);
exit(0);
}
#
......
......@@ -473,6 +473,7 @@ sub downloadImage($$$$) {
my $addr = $reload_args_ref->{"ADDR"};
my $FRISBEE = "/usr/local/etc/emulab/frisbee";
my $IMAGEUNZIP = "/usr/local/bin/imageunzip";
my $command = "";
if (!defined($addr) || $addr eq "") {
# frisbee master server world
......@@ -493,10 +494,8 @@ sub downloadImage($$$$) {
$todiskopt = "-N";
}
if ($server && $imageid) {
mysystem2("$FRISBEE -f -M 64 $proxyopt $todiskopt ".
" -S $server -B 30 -F $imageid $imagepath");
return -1
if ($?);
$command = "$FRISBEE -f -M 64 $proxyopt $todiskopt ".
" -S $server -B 30 -F $imageid $imagepath";
}
else {
print STDERR "Could not parse frisbee loadinfo\n";
......@@ -507,20 +506,45 @@ sub downloadImage($$$$) {
my $mcastaddr = $1;
my $mcastport = $2;
mysystem2("$FRISBEE -f -M 64 -m $mcastaddr -p $mcastport $imagepath");
return -1
if ($?);
$command = "$FRISBEE -f -M 64 -m $mcastaddr -p $mcastport $imagepath";
}
elsif ($addr =~ /^http/) {
if ($todisk) {
mysystem("wget -nv -N -O - '$addr' | ".
"$IMAGEUNZIP -f -W 32 - $imagepath");
} else {
mysystem("wget -nv -N -O $imagepath '$addr'");
$command = "wget -nv -N -O - '$addr' | ".
"$IMAGEUNZIP -f -W 32 - $imagepath";
}
else {
$command = "wget -nv -N -O $imagepath '$addr'";
}
}
return 0;
print STDERR $command . "\n";
#
# Run the command protected by an alarm to avoid trying forever,
# as frisbee is prone to doing.
#
my $childpid = fork();
if ($childpid) {
local $SIG{ALRM} = sub { kill("TERM", $childpid); };
alarm 60 * 30;
waitpid($childpid, 0);
my $stat = $?;
alarm 0;
if ($stat) {
print STDERR " returned $stat ... \n";
return -1;
}
return 0;
}
else {
#
# We have blocked most signals in mkvnode, including TERM.
#
local $SIG{TERM} = 'DEFAULT';
exec($command);
exit(1);
}
}
#
......
......@@ -1461,10 +1461,13 @@ sub Action($$$)
my @node_ids = keys(%vnodes);
#
# Should waiting be an option?
# There are so many ways this can throw an error, lets
# not give up here, but go ahead and use the monitor
# to wait for nodes since some might actually boot. Unless
# the exit code indicates abject failure (-1).
#
system("$VNODESETUP -j -m $pid $eid @node_ids");
if ($?) {
if ($? && $? >> 8 == 256) {
$msg .= "Failed to set up vnodes @node_ids";
goto bad;
}
......
......@@ -24,6 +24,7 @@
#
use English;
use Getopt::Std;
use POSIX ":sys_wait_h";
#
# Set up the vnode state on a virtual (multiplexed) node.
......@@ -61,7 +62,7 @@ my $PGENISUPPORT= @PROTOGENI_SUPPORT@;
my $SAVEUID = $UID;
my $ssh = "$TB/bin/sshtb -n";
my $debug = 0;
my $debug = 1;
my $force = 0;
my $failed = 0;
my $killmode = 0;
......@@ -547,6 +548,19 @@ while (1) {
die("*** $0:\n".
" exec failed!\n");
}
#
# TBForkCmd() returns the full exit status, but we cannot
# pass that to exit directly. Watch for a TERM signal,
# so we can tell the parent we exited cause of the timeout.
#
if ($exval) {
if (WIFSIGNALED($exval)) {
$exval = WTERMSIG($exval);
}
else {
$exval = $exval >> 8;
}
}
exit($exval);
}
} else {
......@@ -607,10 +621,13 @@ while (1) {
$childpid = wait();
alarm 0;
$exitstatus = $?;
print STDERR "Child return $exitstatus\n";
};
if ($@) {
die unless $@ =~ /alarm clock/;
next;
next
if ($@ =~ /alarm clock/);
die("bad exit from eval\n");
}
#
......@@ -625,8 +642,11 @@ while (1) {
# Look up to see what vnode, etc. this was associated with - if we
# don't know about this child, ignore it
#
if (! exists($child_vnodes{$childpid})) {
print STDERR "Unknown child $childpid returned from wait\n";
next;
}
my $aref = $child_vnodes{$childpid};
next unless @$aref;
my ($nodeobj, $mode, $birthtime) = @$aref;
my $vnode = $nodeobj->node_id();
my $pnode = $nodeobj->phys_nodeid();
......@@ -640,11 +660,8 @@ while (1) {
print STDERR "vnode $vnode $mode on $pnode returned $?.\n"
if $debug;
if ($exitstatus == 256) {
print STDERR "$vnode is not running sshd.\n" if $debug;
}
elsif ($exitstatus == 15) {
print STDERR "$vnode is wedged.\n" if $debug;
if ($exitstatus == 15) {
print STDERR "$pnode is wedged.\n" if $debug;
}
elsif ($exitstatus >> 8 == 99) {
print STDERR "$vnode did not allocate properly.\n" if $debug;
......@@ -654,18 +671,17 @@ while (1) {
warn("*** $0:\n".
" Virtual node $vnode $mode failure!\n");
}
if ($nodeobj->isplabdslice()) {
#
# If the node was in the setup process, then mark its allocstate
# as down so os_setup knows not to bother waiting for it. DEAD
# is a temp state, different then DOWN. It indicates the node
# was never instantiated (important distinction for plab).
# If the node was in the setup process, then set its state
# to TBFAILED so that anything waiting knows its toast.
# We set it to SHUTDOWN above.
#
if ($exitstatus &&
(($mode eq "setup") || ($mode eq "reboot"))) {
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_DEAD());
if ($mode eq "setup" || $mode eq "reboot") {
$nodeobj->Refresh();
# Avoid duplicate state as it annoys stated.
$nodeobj->SetEventState(TBDB_NODESTATE_TBFAILED())
if (!$nodeobj->eventstate() ne TBDB_NODESTATE_TBFAILED());
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment