Commit 5cf6aad2 authored by Leigh Stoller's avatar Leigh Stoller

New approach to dealing with nodes that fail to boot is os_setup, and

land in hwdown.

Currently, if a node fails to boot in os_setup and the node is running
a system image, it is moved into hwdown. 99% of the time this is
wasted work; the node did not fail for hardware reasons, but for some
other reason that is transient.

The new approach is to move the node into another holding experiment,
emulab-ops/hwcheckup. The daemon watches that experiment, and nodes
that land in it are freshly reloaded with the default image and
rebooted. If the node reboots okay after reload, it is released back
into the free pool. If it fails any part of the reload/reboot, it is
officially moved into hwdown.

Another possible use; if you have a suspect node, you go wiggle some
hardware, and instead of releasing it into the free pool, you move it
into hwcheckup, to see if it reloads/reboots. If not, it lands in
hwdown again. Then you break out the hammer.

Most of the changes in Node.pm, libdb.pm, and os_setup are
organizational changes to make the code cleaner.
parent 46157519
......@@ -1259,6 +1259,29 @@ sub InsertNodeLogEntry($$$$)
return 0;
}
#
# Clear a bunch of stuff from the nodes tale entry so boot is clean.
#
sub ClearBootAttributes($)
{
my ($self) = @_;
my $node_id = (ref($self) ? $self->node_id() : $self);
my $allocFreeState = TBDB_ALLOCSTATE_FREE_DIRTY();
DBQueryWarn("update nodes set startupcmd='',rpms='',deltas='', ".
"tarballs='',failureaction='fatal', routertype='none', ".
"def_boot_cmd_line='',next_boot_cmd_line='', ".
"temp_boot_osid=NULL,next_boot_osid=NULL, ".
"update_accounts=0,ipport_next=ipport_low,rtabid=0, ".
"sfshostid=NULL,allocstate='$allocFreeState',boot_errno=0, ".
"destination_x=NULL,destination_y=NULL, ".
"destination_orientation=NULL ".
"where node_id='$node_id'")
or return -1;
return 0;
}
#
# Clear the experimental interfaces for a node.
#
......@@ -2012,6 +2035,82 @@ sub SetSchedReload($$;$)
return 0;
}
#
# Mark a node as down. We schedule a next reservation for it so that it
# remains in the users experiment through the termination so that there
# are no permission errors (say, from snmpit).
#
sub MarkAsDown($)
{
my ($self) = @_;
my $nodeid = (ref($self) ? $self->node_id() : $self);
if (ClearBootAttributes($nodeid)) {
print STDERR "*** WARNING: Could not clear boot attributes: $self!\n";
}
my $pid = NODEDEAD_PID();
my $eid = NODEDEAD_EID();
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
print STDERR "*** WARNING: No such experiment $pid/$eid!\n";
return -1;
}
my $exptidx = $experiment->idx();
my $query_result =
DBQueryWarn("replace into next_reserve " .
"(node_id, exptidx, pid, eid) " .
"values ('$nodeid', '$exptidx', '$pid', '$eid')");
if (!$query_result || !$query_result->num_rows) {
print STDERR "*** WARNING: Could not mark $self as down\n";
return -1;
}
return 0;
}
sub MarkAsIll($)
{
my ($self) = @_;
my $nodeid = (ref($self) ? $self->node_id() : $self);
if (ClearBootAttributes($nodeid)) {
print STDERR "*** WARNING: Could not clear boot attributes: $self!\n";
}
my $pid = NODEILL_PID();
my $eid = NODEILL_EID();
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
print STDERR "*** WARNING: No such experiment $pid/$eid!\n";
return -1;
}
my $exptidx = $experiment->idx();
my $query_result =
DBQueryWarn("replace into next_reserve " .
"(node_id, exptidx, pid, eid) " .
"values ('$nodeid', '$exptidx', '$pid', '$eid')");
if (!$query_result || !$query_result->num_rows) {
print STDERR "*** WARNING: Could not mark $self as ill\n";
return -1;
}
return 0;
}
#
# Set the boot status for a node. We also update the fail stamp/count
# as appropriate.
#
sub SetBootStatus($$)
{
my ($self, $bstat) = @_;
my $nodeid = (ref($self) ? $self->node_id() : $self);
return -1
if (!DBQueryWarn("update nodes set bootstatus='$bstat' ".
"where node_id='$nodeid'"));
return 0;
}
#
# Do a normal wakeonlan after power cycle. This is for laptops that like
# to go to sleep (especially while in PXEWAIT).
......
......@@ -30,6 +30,7 @@ use vars qw(@ISA @EXPORT);
NODESTARTSTATUS_NOSTATUS PROJMEMBERTRUST_NONE PROJMEMBERTRUST_USER
PROJMEMBERTRUST_ROOT PROJMEMBERTRUST_GROUPROOT
PROJMEMBERTRUST_PROJROOT PROJMEMBERTRUST_LOCALROOT
NODEILL_PID NODEILL_EID
TBOPSPID EXPTLOGNAME
PLABMOND_PID PLABMOND_EID PLABHOLDING_PID PLABHOLDING_EID
......@@ -159,8 +160,7 @@ use vars qw(@ISA @EXPORT);
TBDB_PHYSICAL_NODE_TABLES
TBAdmin TBOpsGuy TBProjAccessCheck TBNodeAccessCheck
TBExptAccessCheck MarkNodeDown
SetNodeBootStatus NodeidToExp
TBExptAccessCheck NodeidToExp
ExpState
ExpNodes ExpNodeVnames ExpNodesOldReserved
DBDateTime DefaultImageID
......@@ -284,6 +284,8 @@ sub NODEREPOSITIONING_EID() { "repositioning"; }
sub NODEREPOSPENDING_EID() { "repositionpending"; }
sub NODEDEAD_PID() { $TBOPSPID; }
sub NODEDEAD_EID() { "hwdown"; }
sub NODEILL_PID() { $TBOPSPID; }
sub NODEILL_EID() { "hwcheckup"; }
sub PLABMOND_PID() { $TBOPSPID; }
sub PLABMOND_EID() { "plab-monitor"; }
sub PLABTESTING_PID() { $TBOPSPID; }
......@@ -1088,50 +1090,6 @@ sub ExpNodeVnames($$;$$)
return %nodes;
}
#
# Mark a node as down. We schedule a next reservation for it so that it
# remains in the users experiment through the termination so that there
# are no permission errors (say, from snmpit).
#
# usage: MarkNodeDown(char *nodeid)
#
sub MarkNodeDown($)
{
my($node) = $_[0];
my($pid, $eid);
$pid = NODEDEAD_PID;
$eid = NODEDEAD_EID;
my $exptidx;
if (!TBExptIDX($pid, $eid, \$exptidx)) {
print "*** WARNING: No such experiment $pid/$eid!\n";
return -1;
}
my $query_result =
DBQueryFatal("replace into next_reserve " .
"(node_id, exptidx, pid, eid) " .
"values ('$node', '$exptidx', '$pid', '$eid')");
if ($query_result->num_rows < 1) {
DBWarn("WARNING: Could not mark $node down");
}
}
#
# Set the boot status for a node.
#
# usage: SetNodeBootStatus(char *status)
#
sub SetNodeBootStatus($$)
{
my($node, $bstat) = @_;
DBQueryFatal("update nodes set bootstatus='$bstat' ".
"where node_id='$node'");
}
#
# Find out what osid a node will boot next time it comes up,
# Usually (but not always) the currently running OS as well.
......
......@@ -447,18 +447,8 @@ foreach my $node (@freed_nodes) {
"Moved from hwdown; nfree");
}
}
my $allocFreeState = TBDB_ALLOCSTATE_FREE_DIRTY();
DBQueryWarn("update nodes set startupcmd='',rpms='',deltas='', ".
"tarballs='',failureaction='fatal', routertype='none', ".
"def_boot_cmd_line='',next_boot_cmd_line='', ".
"temp_boot_osid=NULL,next_boot_osid=NULL, ".
"update_accounts=0,ipport_next=ipport_low,rtabid=0, ".
"sfshostid=NULL,allocstate='$allocFreeState',boot_errno=0, ".
"destination_x=NULL,destination_y=NULL, ".
"destination_orientation=NULL ".
"where node_id='$node_id'") || $error++;
$node->ClearBootAttributes() == 0
or $error++;
#
# If the node is a virtnode, force its state to SHUTDOWN. This is mostly
......
......@@ -38,7 +38,8 @@ SBIN_STUFF = resetvlans console_setup.proxy sched_reload named_setup \
newnode_reboot savelogs.proxy eventsys.proxy \
elabinelab snmpit.proxy panic node_attributes \
nfstrace plabinelab smbpasswd_setup smbpasswd_setup.proxy \
rmproj snmpit.proxynew snmpit.proxyv2 pool_daemon
rmproj snmpit.proxynew snmpit.proxyv2 pool_daemon \
checknodes_daemon
ifeq ($(ISMAINSITE),1)
SBIN_STUFF += repos_daemon
endif
......
#!/usr/bin/perl -w
#
# GENIPUBLIC-COPYRIGHT
# Copyright (c) 2009 University of Utah and the Flux Group.
# All rights reserved.
#
use strict;
use English;
use Getopt::Std;
#
# Attempt to determine if nodes are really messed up.
#
sub usage()
{
print "Usage: checknodes_daemon [-d]\n";
exit(1);
}
my $optlist = "d";
my $debug = 0;
#
# Configure variables
#
my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
my $LOGFILE = "$TB/log/checknodes.log";
my $SUDO = "/usr/local/bin/sudo";
my $PROTOUSER = "elabman";
my $WAP = "$TB/sbin/withadminprivs";
my $BATCHEXP = "$TB/bin/batchexp";
my $NAMED_SETUP = "$TB/sbin/named_setup";
my $EXPORTS_SETUP = "$TB/sbin/exports_setup";
my $GENTOPOFILE = "$TB/libexec/gentopofile";
my $NFREE = "$TB/bin/nfree";
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
# Protos
sub fatal($);
#
# Turn off line buffering on output
#
$| = 1;
if ($UID != 0) {
die("Must be root to run this script\n");
}
#
# Check args early so we get the right DB.
#
my %options = ();
if (! getopts($optlist, \%options)) {
usage();
}
if (defined($options{"d"})) {
$debug = 1;
}
# Load the Testbed support stuff.
use lib "@prefix@/lib";
use libdb;
use libosload;
use libtestbed;
use Experiment;
use Node;
use User;
my $NODEILL_PID = NODEILL_PID();
my $NODEILL_EID = NODEILL_EID();
#
# We need this user for running below.
#
my $elabman = User->Lookup($PROTOUSER);
if (!defined($elabman)) {
fatal("Could not lookup $PROTOUSER user. Exiting ...");
}
#
# Grab the expeiment we use,
#
my $experiment = Experiment->Lookup($NODEILL_PID, $NODEILL_EID);
if (!defined($experiment)) {
#
# Create if it does not exist.
#
system("$SUDO -u $PROTOUSER $WAP $BATCHEXP ".
" -q -i -k -j -w -f -n -S 'System Experiment' ".
" -L 'System Experiment' ".
" -E 'Check failed nodes before moving to hwdown - DO NOT DELETE' ".
" -p $NODEILL_PID -e $NODEILL_EID");
if ($?) {
fatal("Could not create experiment for $NODEILL_PID/$NODEILL_EID\n");
}
$experiment = Experiment->Lookup($NODEILL_PID, $NODEILL_EID);
}
if ($experiment->state() eq EXPTSTATE_SWAPPED()) {
$experiment->SetState(EXPTSTATE_ACTIVE());
}
my $pid = $experiment->pid();
my $eid = $experiment->eid();
# Go to ground.
if (! $debug) {
if (TBBackGround($LOGFILE)) {
exit(0);
}
}
print "Check Nodes Daemon starting ... pid $$, at ".`date`;
if ($elabman->FlipTo($experiment->unix_gid())) {
fatal("Could not flipto $elabman ($experiment)");
}
#
# Setup a signal handler for newsyslog.
#
sub handler()
{
ReOpenLog($LOGFILE);
}
$SIG{HUP} = \&handler
if (!$debug);
while (1) {
print "Running at ".
POSIX::strftime("20%y-%m-%d %H:%M:%S", localtime()) . "\n";
$experiment->Flush();
Node->FlushAll();
my @nodelist = $experiment->NodeList();
goto loop
if (!@nodelist);
foreach my $node (@nodelist) {
if ($node->ClearBootAttributes()) {
print STDERR "$node: Could not clear boot attributes.\n";
}
}
print "Running $GENTOPOFILE ...\n";
if (system("$GENTOPOFILE $pid $eid")) {
print STDERR "$GENTOPOFILE failed\n";
next;
}
print "Running $EXPORTS_SETUP ...\n";
if (system("$EXPORTS_SETUP")) {
print STDERR "$EXPORTS_SETUP failed\n";
next;
}
# The nodes will not boot locally unless there is a DNS record.
print "Running $NAMED_SETUP ...\n";
if (system("$NAMED_SETUP")) {
print STDERR "$NAMED_SETUP failed\n";
next;
}
my @nodenames = map { $_->node_id() } @nodelist;
my %reload_args = ();
my %reload_results = ();
$reload_args{'debug'} = $debug;
$reload_args{'waitmode'} = 2; # XXX Wait till reboot after reload.
$reload_args{'nodelist'} = [ @nodenames ];
print "Running osload on @nodenames\n";
my $failures = osload(\%reload_args, \%reload_results);
if ($failures) {
print STDERR "osload returned $failures failures\n";
}
my @informtbopsfatal = ();
my @informtbopswarn = ();
foreach my $node (@nodelist) {
if ($reload_results{$node->node_id()}) {
push(@informtbopsfatal, $node->node_id());
print STDERR "$node is fatally ill; moving to hwdown.\n";
$node->MarkAsDown();
$node->InsertNodeLogEntry($elabman, TB_DEFAULT_NODELOGTYPE(),
"Moved to hwdown by checknodes daemon");
}
else {
push(@informtbopswarn, $node->node_id());
print STDERR "$node appears to be okay; releasing.\n";
$node->InsertNodeLogEntry($elabman, TB_DEFAULT_NODELOGTYPE(),
"Released by checknodes daemon");
}
}
if (@informtbopsfatal) {
my $count = scalar(@informtbopsfatal);
SENDMAIL($TBOPS, "$count nodes are down",
"Nodes:\n".
" " . join(" ", @informtbopsfatal) . "\n".
"appear to be dead.\n\n".
"The nodes have been taken out of the pool.\n");
}
if (@informtbopswarn) {
my $count = scalar(@informtbopswarn);
system("$NFREE $pid $eid @informtbopswarn");
if ($?) {
fatal("Could not free nodes: @informtbopswarn");
}
else {
SENDMAIL($TBOPS, "$count nodes appear to be okay",
"Nodes:\n".
" " . join(" ", @informtbopswarn) . "\n".
"have reloaded and rebooted okay.\n\n".
"The nodes have been freed.\n");
}
}
print "Running $GENTOPOFILE ...\n";
if (system("$GENTOPOFILE $pid $eid")) {
print STDERR "$GENTOPOFILE failed\n";
next;
}
print "Running $EXPORTS_SETUP ...\n";
if (system("$EXPORTS_SETUP")) {
print STDERR "$EXPORTS_SETUP failed\n";
next;
}
# The nodes will not boot locally unless there is a DNS record.
print "Running $NAMED_SETUP ...\n";
if (system("$NAMED_SETUP")) {
print STDERR "$NAMED_SETUP failed\n";
next;
}
loop:
sleep(10);
}
exit(0);
sub fatal($)
{
my ($msg) = @_;
#
# Send a message to the testbed list.
#
SENDMAIL($TBOPS,
"Check Nodes Daemon died",
$msg,
$TBOPS);
die("*** $0:\n".
" $msg\n");
}
......@@ -461,12 +461,12 @@ sub nodereboot($$)
TBDB_NODESTATE_ISUP))) {
if ($actual_state eq TBDB_NODESTATE_ISUP) {
print STDOUT "reboot ($node): alive and well.\n";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
next;
}
tberror "$node reported a TBFAILED event.";
}
SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
Node::SetBootStatus($node, NODEBOOTSTATUS_FAILED);
$result->{$node} = -1;
$failed++;
}
......
......@@ -612,7 +612,7 @@ if ($firewalled) {
# We assume that firewall node images are "standard" here,
# and whine to tbops.
#
MarkNodeDown($node);
Node::MarkAsDown($node);
TBSetNodeLogEntry($node, $user_uid, TB_DEFAULT_NODELOGTYPE(),
"'Moved to hwdown by os_setup; ".
"failed to boot image for osid " . $osmap{$node} .
......@@ -913,7 +913,7 @@ if ($plabinelab) {
foreach my $node (@plabnodes) {
if (exists($nodes{$node})) {
tbnotice "Not waiting for emulated plab node $node";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
TBSetNodeEventState($node, TBDB_NODESTATE_ISUP());
......@@ -977,7 +977,7 @@ while ( @nodelist ) {
goto tbfailed;
}
print "$node is alive and well\n";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_READY() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
next;
......@@ -1013,7 +1013,7 @@ while ( @nodelist ) {
tbwarn "$node may be down. This has been reported to testbed-ops.";
tbfailed:
SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
Node::SetBootStatus($node, NODEBOOTSTATUS_FAILED);
if ($canfail{$node} && !($canceled || $noretry)) {
push(@informuser, $node);
......@@ -1033,9 +1033,9 @@ while ( @nodelist ) {
if (!exists($geninodes{$node}) &&
(! TBOsidToPid($osids{$node}, \$pidofosid) ||
$pidofosid eq TBOPSPID())) {
MarkNodeDown($node);
Node::MarkAsIll($node);
TBSetNodeLogEntry($node, $user_uid, TB_DEFAULT_NODELOGTYPE(),
"'Moved to hwdown by os_setup; ".
"'Moved to hwcheckup by os_setup; ".
"failed to boot image for osid " . $osmap{$node} .
" in $pid/$eid'");
push(@informtbopsfatal, $node);
......@@ -1075,8 +1075,7 @@ if ($count > 0) {
"Nodes:\n".
" " . join(" ", @informtbopsfatal) . "\n".
"in pid/eid $pid/$eid appear to be dead.\n\n".
"The nodes have been taken out of the pool until this matter ".
"is resolved.\n",
"The nodes have been moved into hardware checkup.\n",
$user_email_to);
}
$count = scalar(@informtbopswarn);
......@@ -1241,13 +1240,13 @@ elsif (@vnodelist) {
# Might have already been set above.
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY);
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
next;
}
vtbfailed:
TBDebugTimeStamp("Virtual node $node setup FAILED");
SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
Node::SetBootStatus($node, NODEBOOTSTATUS_FAILED);
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_DOWN());
#
......@@ -1834,7 +1833,7 @@ sub os_setup_one($$$;$)
return 0;
}
print "$node is alive and well\n";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
} else {
......@@ -1870,7 +1869,7 @@ sub os_setup_one($$$;$)
return 0;
}
print "$node is alive and well\n";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment