Commit 4d037e7c authored by Mike Hibler's avatar Mike Hibler
Browse files

The new watchdog:

	- check intervals driven by sitevars delivered by TMCD command
	- handles rusage stats return on plab nodes
It is now a single process and executes any auxilliary scripts synchronously.
This may prove to be unwieldy in the face of long running scripts like
update.  If so, we'll have to add all that fork/exec/waitpid mucky-muck.
parent a7fe9871
......@@ -26,7 +26,7 @@ use Exporter;
TMCCCMD_PLABCONFIG TMCCCMD_SUBCONFIG TMCCCMD_LINKDELAYS
TMCCCMD_PROGRAMS TMCCCMD_SYNCSERVER TMCCCMD_KEYHASH TMCCCMD_NODEID
TMCCCMD_NTPINFO TMCCCMD_NTPDRIFT TMCCCMD_EVENTKEY TMCCCMD_ROUTELIST
TMCCCMD_ROLE TMCCCMD_RUSAGE
TMCCCMD_ROLE TMCCCMD_RUSAGE TMCCCMD_WATCHDOGINFO
);
# Must come after package declaration!
......@@ -147,6 +147,7 @@ my %commandset =
"routelist" => {TAG => "routelist"},
"role" => {TAG => "role"},
"rusage" => {TAG => "rusage"},
"watchdoginfo" => {TAG => "watchdoginfo"},
);
#
......@@ -188,6 +189,7 @@ sub TMCCCMD_EVENTKEY() { $commandset{"eventkey"}->{TAG}; }
sub TMCCCMD_ROUTELIST() { $commandset{"routelist"}->{TAG}; }
sub TMCCCMD_ROLE() { $commandset{"role"}->{TAG}; }
sub TMCCCMD_RUSAGE() { $commandset{"rusage"}->{TAG}; }
sub TMCCCMD_WATCHDOGINFO(){ $commandset{"watchdoginfo"}->{TAG}; }
#
# Caller uses this routine to set configuration of this library
......
......@@ -15,10 +15,10 @@ use POSIX qw(strftime);
#
sub usage()
{
print "Usage: watchdog [-d] [-t timeout] [start | stop]\n";
print "Usage: watchdog [-dv] [start | stop]\n";
exit(1);
}
my $optlist = "t:d";
my $optlist = "Fdv";
#
# Turn off line buffering on output
......@@ -35,21 +35,65 @@ BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }
use libsetup;
use libtmcc;
# XXX should be in libsetup
my $svcslice = "utah_elab_svc";
sub PLABSVC() {
return (PLAB() &&
(defined($ENV{'USER'}) && ($ENV{'USER'} eq $svcslice) ||
defined($ENV{'SUDO_USER'}) && ($ENV{'SUDO_USER'} eq $svcslice)));
}
# Locals
my $action = "start";
my $timeout = (60 * 60 * 12); # In seconds of course.
my $logname = "$LOGDIR/emulab-watchdog.debug";
my $logname = "$LOGDIR/emulab-watchdog.log";
my $pidfile = "/var/run/emulab-watchdog.pid";
my $rusagebin = "$BINDIR/plabrusage";
my $debug = 0;
my $isalivewait = ((REMOTE() == 1) ? (PLAB() ? 600 : 60) :
(JAILED() ? 600 : 180)); # Seconds to wait.
my $verbose = 0;
my $updatefailed= 0;
my $driftfile;
my $vnodeid;
my $lastdrift;
my $rusagestr;
my $curtime;
# tmcc retries
my $trytcp = 0;
my $maxretries = 3;
my %retry;
# XXX testing
my $fakeit;
my %faketimes;
#
# Default interval values in seconds.
# Compatible with old, static watchdog.
#
my %iv = (
check => 0,
isalive => ((REMOTE() == 1) ? (PLAB() ? 600 : 60) : (JAILED() ? 600 : 180)),
drift => (60 * 60 * 12),
cvsup => (60 * 60 * 12),
rusage => 0,
);
my %funcs = (
check => \&setintervals,
isalive => \&sendisalive,
drift => \&ntpdrift,
cvsup => \&runcvsup,
rusage => \&sendrusage,
);
#
# Forward declarations for prototype checking
#
sub startisalive();
sub setintervals($);
sub sendisalive($);
sub ntpdrift($);
sub runcvsup($);
sub sendrusage($);
sub logmsg($);
#
# Parse command arguments. Once we return from getopts, all that should be
......@@ -59,12 +103,15 @@ sub startisalive();
if (! getopts($optlist, \%options)) {
usage();
}
if (defined($options{"t"})) {
$timeout = $options{"t"};
}
if (defined($options{"d"})) {
$debug = 1;
}
if (defined($options{"v"})) {
$verbose = 1;
}
if (defined($options{"F"})) {
$fakeit = 1;
}
if (@ARGV) {
$action = $ARGV[0];
......@@ -133,127 +180,575 @@ $SIG{INT} = \&handler;
# If jailed, get our jailname.
#
if (JAILED() || PLAB()) {
$vnodeid = libsetup_getvnodeid();
my $vnodeid = libsetup_getvnodeid();
# Tell the tmcc library. Note that its actually been done via libsetup
# but I duplicate it here to make it explicit.
configtmcc("subnode", $vnodeid);
}
#
# Start isalive daemon.
# XXX plab UDP calls sometimes fail with EINVAL when reading a reply,
# combat this by forcing the last retry of a failing call to use TCP
# in the plab service slice. Maybe we should do this for all plab
# slices...
#
startisalive();
$trytcp = 1
if (PLABSVC());
#
# For sending back ntpdrift.
#
if (-e "/etc/ntp.drift") {
$driftfile = "/etc/ntp.drift";
}
elsif (-e "/etc/ntp/drift") {
} elsif (-e "/etc/ntp/drift") {
$driftfile = "/etc/ntp/drift";
}
#
# Initial drift value, we only update if it changes
#
my $lastdrift = `cat $driftfile`;
chomp($lastdrift);
if (defined($driftfile)) {
$lastdrift = `cat $driftfile`;
chomp($lastdrift);
}
#
# Retry state for failed tmcc calls
#
$retry{check} = 0;
$retry{isalive} = 0;
$retry{drift} = 0;
$retry{rusage} = 0;
$curtime = time();
if ($fakeit) {
logmsg("Faking it\n");
} else {
logmsg("Dogging it\n");
}
#
# Loop!
# Set our initial interval values.
# This will queue the interval check.
#
my $firsttime = 1;
setintervals($curtime);
$firsttime = 0;
#
# Loop, sleeping and then processing events
#
while (1) {
sleep($timeout);
my $date = POSIX::strftime("20%y/%m/%d %H:%M:%S", localtime());
my ($nexttime, $event);
$curtime = time();
print "Dogging it at $date\n";
qhead($nexttime, $event) == 0 or
die("All timeouts disabled at $date!");
while ($curtime >= $nexttime) {
qpop($nexttime, $event);
&$event($curtime);
qhead($nexttime, $event) == 0 or
die("All timeouts disabled at $date!");
$curtime = time();
}
sleep($nexttime - $curtime);
}
exit(0);
sub sendisalive($)
{
my ($curtime) = @_;
#
# Run account update. Use immediate mode so that it exits right away
# if the lock is taken (another update already running).
#
print "Looking for new Emulab accounts ...\n";
system("update -i -l");
if ($fakeit) {
my $delta = $curtime - $faketimes{isalive};
$faketimes{isalive} = $curtime;
logmsg("sendisalive at +$delta\n");
qinsert($curtime + $iv{isalive}, \&sendisalive) if ($iv{isalive});
return;
}
#
# Do this only for "real" nodes (cluster and remote PCs)
#
next
if (JAILED() || PLAB() || MFS());
if ($verbose) {
if ($retry{isalive} == 0) {
logmsg("isalive: sending\n");
} else {
logmsg("isalive: resending, retry=$retry{isalive}\n");
}
}
my %tmccargs = ();
$tmccargs{timeout} = 3;
$tmccargs{useudp} = 1
if (!$trytcp || $retry{isalive} != $maxretries);
my @tmccresults;
if (tmcc(TMCCCMD_ISALIVE, undef, \@tmccresults, %tmccargs) != 0 ||
scalar(@tmccresults) == 0) {
#
# Failed, schedule a retry using a backoff.
#
if ($retry{isalive} < $maxretries) {
my $nexttime = time() + (1 << $retry{isalive});
qinsert($nexttime, \&sendisalive);
$retry{isalive}++;
logmsg("isalive: failed ($?), retry $retry{isalive}\n");
return;
}
#
# Failed miserably, just whine and reschedule at the normal time.
#
logmsg("isalive: failed ($?) after $maxretries attempts\n");
} else {
#
# Success. The format of the response is rather simple right now.
# Note: if the update failed last time, run it no matter what.
#
logmsg("isalive: succeeded after $retry{isalive} retries\n")
if ($retry{isalive});
if ($updatefailed ||
$tmccresults[0] =~ /^UPDATE=1$/) {
logmsg("isalive: running an account update\n");
system("$BINDIR/update -i -l");
$updatefailed = $?;
logmsg("isalive: update done\n");
}
}
#
# Send back ntpdrift info. Should move elsewhere.
# Set up for another interval.
# Since the tmcc call and update can take awhile, we update curtime
#
if (!REMOTE() && defined($driftfile)) {
my $drift = `cat $driftfile`;
chomp($drift);
if ($drift ne $lastdrift && $drift =~ /^([-\d\.]*)$/) {
print "Updating NTP drift from $lastdrift to $drift\n";
# Server also checks the value for sanity.
tmcc(TMCCCMD_NTPDRIFT, $1, undef, ("timeout" => 3));
$lastdrift = $drift;
$retry{isalive} = 0;
$curtime = time();
qinsert($curtime + $iv{isalive}, \&sendisalive)
if ($iv{isalive});
}
sub setintervals($)
{
my ($curtime) = @_;
my $report = 0;
if ($fakeit) {
$iv{check} = 7;
$iv{isalive} = 3;
$iv{drift} = 9;
$iv{cvsup} = 21;
$iv{rusage} = 15;
my $delta = $curtime - $faketimes{check};
$faketimes{check} = $curtime;
logmsg("setintervals at +$delta\n");
qinsert($curtime + $iv{check}, \&setintervals) if ($iv{check});
return;
}
if ($verbose) {
if ($retry{check} == 0) {
logmsg("setintervals: fetching intervals\n");
} else {
logmsg("setintervals: refetching intervals, retry=$retry{check}\n");
}
}
if (REMOTE()) {
# XXX fake an argument to force request to TMCD and avoid the cache
my $arg = "foo";
my %tmccargs = ();
$tmccargs{timeout} = 3;
$tmccargs{useudp} = 1
if (!$trytcp || $retry{check} != $maxretries);
my @tmccresults;
if (tmcc(TMCCCMD_WATCHDOGINFO, $arg, \@tmccresults, %tmccargs) != 0 ||
scalar(@tmccresults) == 0) {
#
# Failed, schedule a retry using a backoff.
#
if ($retry{check} < $maxretries) {
my $nexttime = time() + (1 << $retry{check});
qinsert($nexttime, \&setintervals);
$retry{check}++;
logmsg("setintervals: failed ($?), retry $retry{check}\n");
return;
}
#
# Failed miserably, just whine and reschedule at the normal time.
#
logmsg("setintervals: failed ($?) after $maxretries attempts, ".
"using current values\n");
$report = 1;
} else {
#
# Do a cvsup to get updated software.
# Success.
#
print "Looking for software updates ... \n";
system("runcvsup.sh");
logmsg("setintervals: succeeded after $retry{check} retries\n")
if ($retry{check});
my %oiv;
$oiv{check} = $iv{check};
$oiv{isalive} = $iv{isalive};
$oiv{drift} = $iv{drift};
$oiv{cvsup} = $iv{cvsup};
$oiv{rusage} = $iv{rusage};
if ($tmccresults[0] =~
/INTERVAL=(-?\d+) ISALIVE=(-?\d+) NTPDRIFT=(-?\d+) CVSUP=(-?\d+) RUSAGE=(-?\d+)/) {
$iv{check} = $1
if ($1 >= 0);
$iv{isalive} = $2
if ($2 >= 0);
$iv{drift} = $3
if ($3 >= 0);
$iv{cvsup} = $4
if ($4 >= 0);
$iv{rusage} = $5
if ($5 >= 0);
}
#
# Policy:
#
# - vnodes, plab nodes and MFS systems do not send NTP drift or cvsup
# - widearea nodes do not record drift
# - local nodes do not cvsup
# - only a plab node service slice reports rusage
#
if (JAILED() || PLAB() || MFS()) {
$iv{drift} = 0;
$iv{cvsup} = 0;
}
if (REMOTE()) {
$iv{drift} = 0;
} else {
$iv{cvsup} = 0;
}
if (!PLABSVC()) {
$iv{rusage} = 0;
}
foreach my $key (keys %iv) {
if ($firsttime || $iv{$key} != $oiv{$key}) {
$report = 1;
#
# Special handling of ourselves:
# warn if future checks are disabled.
#
if ($key eq "check") {
if ($iv{$key} == 0) {
logmsg("setintervals: ".
"WARNING interval checks disabled!\n");
} else {
logmsg("setintervals: scheduling $key\n");
}
next;
}
if ($iv{$key} == 0) {
logmsg("setintervals: descheduling $key\n");
qdelete($funcs{$key});
} elsif ($firsttime || $oiv{$key} == 0) {
logmsg("setintervals: scheduling $key\n");
qinsert($curtime + $iv{$key}, $funcs{$key});
} else {
#
# To reschedule an already existing event,
# we recompute when it was last scheduled and
# add the new interval to that. If the result
# is before the current time, we set it to the
# current time so it will trigger immediately.
#
my $ntime = qfind($funcs{$key});
if (defined($ntime)) {
$ntime -= $oiv{$key};
$ntime += $iv{$key};
$ntime = $curtime
if ($ntime < $curtime);
} else {
$ntime = $curtime;
}
logmsg("setintervals: rescheduling $key at $ntime ".
"(now=$curtime)\n");
qinsert($ntime, $funcs{$key});
}
}
}
}
if ($report) {
logmsg("setintervals: check=$iv{check}, isalive=$iv{isalive}, ".
"drift=$iv{drift}, cvsup=$iv{cvsup}, rusage=$iv{rusage}\n");
}
#
# Set up for another interval.
# Since the tmcc call can take awhile, we update curtime
#
$retry{check} = 0;
$curtime = time();
qinsert($curtime + $iv{check}, \&setintervals)
if ($iv{check});
}
sub ntpdrift($)
{
my ($curtime) = @_;
if ($fakeit) {
my $delta = $curtime - $faketimes{drift};
$faketimes{drift} = $curtime;
logmsg("ntpdrift at +$delta\n");
qinsert($curtime + $iv{drift}, \&ntpdrift) if ($iv{drift});
return;
}
logmsg("ntpdrift: reporting NTP drift\n")
if ($verbose);
my $drift = `cat $driftfile`;
chomp($drift);
if ($drift ne $lastdrift && $drift =~ /^([-\d\.]*)$/) {
logmsg("ntpdrift: updating NTP drift from $lastdrift to $drift\n");
# Server also checks the value for sanity.
tmcc(TMCCCMD_NTPDRIFT, $1, undef, ("timeout" => 3));
$lastdrift = $drift;
}
qinsert($curtime + $iv{drift}, \&ntpdrift)
if ($iv{drift});
}
exit(0);
#
# Fire off a child that does nothing but tell the boss we are alive.
# Do a cvsup to get updated software.
# XXX fork this off?
#
sub startisalive()
sub runcvsup($)
{
my $mypid = fork();
if ($mypid) {
my ($curtime) = @_;
if ($fakeit) {
my $delta = $curtime - $faketimes{cvsup};
$faketimes{cvsup} = $curtime;
logmsg("runcvsup at +$delta\n");
qinsert($curtime + $iv{cvsup}, \&runcvsup) if ($iv{cvsup});
return;
}
my $failed = 0;
logmsg("runcvsup: checking for software updates\n");
system("$BINDIR/runcvsup.sh");
logmsg("runcvsup: software updates done\n");
# cvsup can take awhile so update curtime
$curtime = time();
qinsert($curtime + $iv{cvsup}, \&runcvsup)
if ($iv{cvsup});
}
sub sendrusage($)
{
my ($curtime) = @_;
my $date = POSIX::strftime("20%y/%m/%d %H:%M:%S", localtime());
if ($fakeit) {
my $delta = $curtime - $faketimes{rusage};
$faketimes{rusage} = $curtime;
logmsg("sendrusage at +$delta\n");
qinsert($curtime + $iv{rusage}, \&sendrusage) if ($iv{rusage});
return;
}
print "Keep alive starting up at $date\n";
if ($verbose) {
if ($retry{rusage} == 0) {
logmsg("rusage: sending\n");
} else {
logmsg("rusage: resending, retry=$retry{rusage}\n");
}
}
while (1) {
#
# Collect the stats
#
if ($retry{rusage} == 0) {
if (! -x $rusagebin) {
logmsg("rusage: no $rusagebin\n");
goto resched;
}
$rusagestr = `$rusagebin 2>>$LOGDIR/emulab-rusage.log`;
if ($?) {
logmsg("rusage: $rusagebin failed ($?)\n");
goto resched;
}
chomp $rusagestr;
if ($rusagestr !~ /LA1=[\d\.]+ LA5=[\d\.]+ LA15=[\d\.]+ DUSED=[\d\.]+/) {
logmsg("rusage: $rusagebin returns gobbledy-gook: $rusagestr\n");
goto resched;
}
}
logmsg("rusage: sending: $rusagestr\n")
if ($verbose);
#
# Run tmcc in UDP mode.
#
if (tmcc(TMCCCMD_RUSAGE, $rusagestr, undef,
("timeout" => 3, "useudp" => 1)) != 0) {
#
# Failed, schedule a retry using a backoff.
#
if ($retry{rusage} < $maxretries) {
my $nexttime = time() + (1 << $retry{rusage});
qinsert($nexttime, \&sendrusage);
$retry{rusage}++;
logmsg("rusage: failed ($?), retry $retry{rusage}\n");
return;
}
#
# Run tmcc in UDP mode.
# Since its UDP, we try it a couple of times if it fails.
# Failed miserably, just whine and reschedule at the normal time.
#
my $retries = 3;
logmsg("rusage failed ($?) after $maxretries attempts\n");
} else {
#
# Success.
#
logmsg("rusage succeeded after $retry{rusage} retries\n")
if ($retry{rusage});
}
while ($retries) {
my @tmccresults;