Commit 2a1b2b81 authored by Leigh B. Stoller's avatar Leigh B. Stoller
Browse files

Minor cleanup and simplification to WaitTillAlive.

parent c9478035
......@@ -26,6 +26,7 @@ my $optlist = "";
my $TB = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS = "@TBOPSEMAIL@";
my $TFTP = "/tftpboot";
my $nodereboot = "$TB/bin/node_reboot";
my $ping = "/sbin/ping";
......@@ -135,21 +136,53 @@ for ($i = 0; $i < $db_result->numrows; $i++) {
$osid{$node} = $row{'def_boot_osid'};
#
# Make sure the files specified in the paths exist.
# Make sure the files specified in the paths exist. We mount the
# user tftp directory on boss node, so we can ignore the IP address,
# and just check the path directly.
#
if (defined($row{'def_boot_path'})) {
my $path = $row{'def_boot_path'};
#if ($path ne "" && ! -e $path) {
# die("File $path for node $node does not exist!");
#}
if ($path ne "") {
my $ip = 0;
# Split out IP address if it exists.
if ($path =~ /^([0-9\.]+):(\/.*)$/) {
$ip = $1;
$path = $2;
}
# Path must begin with $TFTP
if (! ($path =~ /^\/$TFTP\//)) {
die("File $path for node $node must reside in $TFTP\n");
}
if (! -e $path) {
die("File $path for node $node does not exist!");
}
}
}
if (defined($row{'next_boot_path'})) {
my $path = $row{'next_boot_path'};
#if ($path ne "" && ! -e $path) {
# die("File $path for node $node does not exist!");
#}
if ($path ne "") {
my $ip = 0;
# Split out IP address if it exists.
if ($path =~ /^([0-9\.]+):(\/.*)$/) {
$ip = $1;
$path = $2;
}
# Path must begin with $TFTP
if (! ($path =~ /^\/$TFTP\//)) {
die("File $path for node $node must reside in $TFTP\n");
}
if (! -e $path) {
die("File $path for node $node does not exist!");
}
}
}
#
......@@ -256,43 +289,58 @@ foreach my $node ( @nodes ) {
print STDOUT "OS Setup Done!\n";
exit 0;
#
# Wait for a node to come back alive.
#
sub WaitTillAlive {
my ($pc) = @_;
my $maxwait = 180;
print STDERR "Waiting for $pc to come alive\n" if $dbg;
#
# Eight minutes seems like a long time to wait, but it ain't!
#
my $maxwait = (60 * 3);
if ($reload{$pc}) {
$maxwait += 350;
$maxwait += (60 * 5);
}
print STDERR "Waiting for $pc to come alive\n" if $dbg;
#
# Start a counter going, relative to the time we rebooted the first
# node.
#
my $waittime = 0;
my $minutes = 0;
#
# Sigh, a long ping results in the script waiting until all the
# packets are sent from all the pings, before it will exit. So,
# loop doing a bunch of shorter pings.
#
my $lasttime = ( (time - $waitstart) > 60 ? 61 : (time - $waitstart));
for ($i = 0; $i < 200; $i++) {
open(PING, "$ping -c 3 -t 4 $pc 2>&1 |");
do {
$_ = <PING>;
if ( $_ =~ /bytes from/ ) {
print STDERR "Yep, $pc alive and well\n" if $dbg;
return 0;
}
while (1) {
system("$ping -q -c 4 -t 4 $pc >/dev/null 2>&1");
$status = $? >> 8;
#
# Returns 0 if any packets are returned. Returns 2 if pingable
# but no packets are returned. Other non-zero error codes indicate
# other problems. Any non-zero return indicates "not pingable" to us.
#
if (! $status) {
print STDERR "$pc alive and well\n" if $dbg;
return 0;
}
$waittime = time - $waitstart;
if ($waittime > $maxwait) {
print "$pc appears dead; its been ",
(int ($waittime / 60))," minutes since reload started.\n";
return 1;
}
until ( $_ =~ /transmitted, (\d*) packets received/ );
my $curtime = time - $waitstart;
print "Waited ",$curtime," seconds...\n" if $dbg;
if ( $curtime % 60 < $lasttime % 60 ) {
print STDERR "Still waiting for $pc - its been ",
(int ($curtime/60))," min.\n";
if (int($waittime / 60) > $minutes) {
$minutes = int($waittime / 60);
print "Still waiting for $pc - its been $minutes minute(s)\n";
}
$lasttime = $curtime;
if ($i > 3 && $curtime > $maxwait) { last; }
}
close(PING);
print STDERR "$pc is not responding. Better check into it.\n" if $dbg;
return 1;
}
sub OSFeatureSupported {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment