Commit cb5ab9f5 authored by David Johnson's avatar David Johnson

Make clientside startcmdstatus reporting more reliable.

(I had a disk image containing unmodifiable binary software that would
overwrite dhcpcd's sane copy of /etc/resolv.conf, at a nondeterministic
point in time, with something completely bogus.  That screwed up
startcmdstatus reports; this helps out with that case (in combination
with other custom scripting that returns /etc/resolv.conf to sanity).

Note though that we only retry infinitely once runstartup has
successfully gone to the background; up til then, we're limited to about
a minute's worth of retries.  Likewise, we don't retry forever if
runstartup itself experiences an error.  We only retry forever if we
actually have a status to send.
parent 7e44a003
#!/usr/bin/perl -w
#
# Copyright (c) 2000-2017 University of Utah and the Flux Group.
# Copyright (c) 2000-2018 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -3485,11 +3485,15 @@ sub ixpsetup($)
# Report startupcmd status back to TMCD. Called by the runstartup
# script.
#
sub startcmdstatus($)
sub startcmdstatus($;$)
{
my($status) = @_;
my($status,$timeout) = @_;
my %opthash;
if (defined($timeout)) {
$opthash{'timeout'} = $timeout;
}
return(tmcc(TMCCCMD_STARTSTAT, "$status"));
return(tmcc(TMCCCMD_STARTSTAT, "$status", undef, %opthash));
}
#
......
#!/usr/bin/perl -wT
#
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
# Copyright (c) 2000-2010, 2018 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -31,6 +31,7 @@ BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }
#
sub fatal($);
sub background();
sub safe_startcmdstatus($;$$);
#
# Turn off line buffering on output
......@@ -150,9 +151,36 @@ print STDOUT "$runcmd returned $stat\n";
#
# Use the TMCC to tell the TMCD what the exit status was.
#
startcmdstatus($stat);
safe_startcmdstatus($stat);
exit(0);
sub safe_startcmdstatus($;$$)
{
my ($stat,$retries,$interval) = @_;
if (!defined($retries)) {
$retries = -1;
}
if (!defined($interval)) {
$interval = 4;
}
my $tries = 0;
my $ret = -1;
while ($retries <= 0 || ++$tries <= $retries) {
print "*** retrying startcmdstatus($stat) (code $ret)\n"
if ($tries > 1);
($ret,) = startcmdstatus($stat,5);
if ($ret == 0) {
print "*** succeeded sending startcmdstatus after $tries tries\n"
if ($tries > 1);
return 0;
}
sleep($interval);
}
print "*** failed to send startcmdstatus after ".($tries-1)." tries!\n";
return $ret;
}
sub fatal($)
{
my($mesg) = $_[0];
......@@ -162,7 +190,12 @@ sub fatal($)
#
# Use the TMCC to tell the TMCD that we screwed the pooch.
#
startcmdstatus(666);
# We do use a retry strategy, but it is limited; we cannot block our
# caller indefinitely if we are not in the background. We will wait
# for approximately 8 * (5 + 2) seconds, because
# safe_startcmdstatus calls tmcc with timeout 5.
#
safe_startcmdstatus(666,8,2);
exit(-1);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment