Commit 20d25733 authored by Mike Hibler's avatar Mike Hibler

Optimize, getting rid of some excess fork/execs.

Tease out some of the hardwired constants and attempt to make sense of them.
parent b5dae9a1
#!/usr/bin/perl -wT #!/usr/bin/perl -wT
# #
# Copyright (c) 2000-2012 University of Utah and the Flux Group. # Copyright (c) 2000-2013 University of Utah and the Flux Group.
# #
# {{{EMULAB-LICENSE # {{{EMULAB-LICENSE
# #
...@@ -58,14 +58,53 @@ my $PGENISUPPORT= @PROTOGENI_SUPPORT@; ...@@ -58,14 +58,53 @@ my $PGENISUPPORT= @PROTOGENI_SUPPORT@;
my $BATCHCOUNT = 12; my $BATCHCOUNT = 12;
my $BATCHSLEEP = 5; my $BATCHSLEEP = 5;
#
# Various timeouts that really should come from the DB.
#
# These determine the max time per-node that we take to attempt to effect
# a reboot short of power cycling (i.e., the time spent in RebootNode):
#
# node is in PXEWAIT, returns after sending a PXEWAKEUP ("immediately").
# node does not ping, returns after 2 seconds.
# node is pingable and ssh is running, returns after between
# 2 and ($REBOOTTIMO + $PINGWAIT) seconds.
# ssh reboot fails but ipod works, returns after between
# ($REBOOTTIMO + $PINGWAIT) and ($REBOOTTIMO + 2 * $PINGWAIT) seconds.
# unresponsive to ipod, returns after ($REBOOTTIMO + 2 * $PINGWAIT) seconds.
#
# With current settings, this is between "immediately" and 60 seconds
# per node. In the common cases where nodes are in PXEWAIT, alive and
# well (ssh running), or completely dead it takes around 10 seconds max.
# Ironically, the slowest case if for the "alive and well" scenario where
# we have to wait for the node to stop pinging, which means after it has
# shutdown all services and reached the point where it shuts down network
# interfaces.
#
#
# SSH timeouts.
# Connection timeout value should be less than the reboot/reconfig values,
# since the latter two are for the entire operation. Set to zero to not
# have a connect timeout (the historic case).
#
my $CONNECTTIMO = 10;
my $REBOOTTIMO = 20;
my $REBOOTVNODETIMO = 30;
my $RECONFIGTIMO = 30;
# #
# Default reboot waittime. # Default reboot waittime.
# If the user doesn't specify, we use this historic value. # If the user doesn't specify, we use this historic value.
# #
# XXX should come from DB.
#
my $MAXWAITTIME = (6 * 60); my $MAXWAITTIME = (6 * 60);
#
# Wait times for a node to stop pinging.
# Both regular case and when the prepare script has to be run.
#
my $PINGWAIT = 20;
my $PREPAREWAIT = 200;
# #
# Testbed Support libraries # Testbed Support libraries
# #
...@@ -82,8 +121,10 @@ if ($PGENISUPPORT) { ...@@ -82,8 +121,10 @@ if ($PGENISUPPORT) {
# External Programs # External Programs
my $ssh = "$TB/bin/sshtb -n"; my $ssh = "$TB/bin/sshtb -n";
if ($CONNECTTIMO) {
$ssh .= " -o ConnectTimeout=$CONNECTTIMO";
}
my $power = "$TB/bin/power"; my $power = "$TB/bin/power";
my $ipod = "$TB/sbin/apod";
my $vnodesetup = "$TB/sbin/vnode_setup"; my $vnodesetup = "$TB/sbin/vnode_setup";
my $bisend = "$TB/sbin/bootinfosend"; my $bisend = "$TB/sbin/bootinfosend";
my $logfile = "$TB/log/reboot.log"; my $logfile = "$TB/log/reboot.log";
...@@ -405,7 +446,7 @@ sub nodereboot($$) ...@@ -405,7 +446,7 @@ sub nodereboot($$)
# #
foreach my $node ( @batch ) { foreach my $node ( @batch ) {
$pids{$node} = RebootNode($nodeobjects{$node}, $reconfig, $pids{$node} = RebootNode($nodeobjects{$node}, $reconfig,
$killmode, $rebootmode, $prepare, $waittime); $killmode, $rebootmode, $prepare);
} }
} }
...@@ -513,7 +554,7 @@ sub nodereboot($$) ...@@ -513,7 +554,7 @@ sub nodereboot($$)
if ($waitmode) { if ($waitmode) {
my $waitstart = time; my $waitstart = time;
print STDOUT "reboot: Waiting ($waittime s) for nodes to come up.\n" print STDOUT "reboot: Waiting (${waittime}s) for nodes to come up.\n"
if (!$silent); if (!$silent);
# Wait for events to filter through stated! If we do not wait, then we # Wait for events to filter through stated! If we do not wait, then we
...@@ -681,7 +722,7 @@ sub RebootNode { ...@@ -681,7 +722,7 @@ sub RebootNode {
# See if the machine is pingable. If its not pingable, then we just # See if the machine is pingable. If its not pingable, then we just
# power cycle the machine rather than wait for ssh to time out. # power cycle the machine rather than wait for ssh to time out.
# #
if (! DoesPing($pc, 0)) { if (! DoesPing($pc, 0, 1)) {
if ($nodestate eq TBDB_NODESTATE_POWEROFF) { if ($nodestate eq TBDB_NODESTATE_POWEROFF) {
info("$pc powered off: will power on"); info("$pc powered off: will power on");
tbnotice "$pc powered off; will power on."; tbnotice "$pc powered off; will power on.";
...@@ -718,7 +759,7 @@ sub RebootNode { ...@@ -718,7 +759,7 @@ sub RebootNode {
if ($syspid) { if ($syspid) {
local $SIG{ALRM} = sub { kill("TERM", $syspid); }; local $SIG{ALRM} = sub { kill("TERM", $syspid); };
alarm 30; alarm $RECONFIGTIMO;
waitpid($syspid, 0); waitpid($syspid, 0);
alarm 0; alarm 0;
...@@ -786,7 +827,7 @@ sub RebootNode { ...@@ -786,7 +827,7 @@ sub RebootNode {
if ($syspid) { if ($syspid) {
my $timedout = 0; my $timedout = 0;
local $SIG{ALRM} = sub { kill("TERM", $syspid); $timedout = 1; }; local $SIG{ALRM} = sub { kill("TERM", $syspid); $timedout = 1; };
alarm 20;; alarm $REBOOTTIMO;
waitpid($syspid, 0); waitpid($syspid, 0);
alarm 0; alarm 0;
my $stat = $? >> 8; my $stat = $? >> 8;
...@@ -801,19 +842,20 @@ sub RebootNode { ...@@ -801,19 +842,20 @@ sub RebootNode {
# of seconds. # of seconds.
# #
if ($timedout) { if ($timedout) {
print STDERR "*** reboot ($pc): wedged, sending ipod.\n" if $debug;
info("$pc: ssh reboot failed (hung) ... sending ipod"); info("$pc: ssh reboot failed (hung) ... sending ipod");
print STDERR "*** reboot ($pc): wedged, sending ipod.\n" if $debug;
system("$ipod $pc"); if ($nodeobject->SendApod(1) == 0) {
$didipod = 1; $didipod = 1;
}
} }
# #
# The ssh can return non-zero exit status, but still have worked. # The ssh can return non-zero exit status, but still have worked.
# FreeBSD for example. # FreeBSD for example.
# #
else { else {
print STDERR "reboot ($pc): reboot returned $stat.\n" if $debug;
info("$pc: ssh reboot ($stat)"); info("$pc: ssh reboot ($stat)");
print STDERR "reboot ($pc): reboot returned $stat.\n" if $debug;
$didipod = 0; $didipod = 0;
} }
} }
...@@ -837,7 +879,9 @@ sub RebootNode { ...@@ -837,7 +879,9 @@ sub RebootNode {
# We wait a while for the node to stop responding to pings, and if it never # We wait a while for the node to stop responding to pings, and if it never
# goes silent, whack it with a bigger stick. # goes silent, whack it with a bigger stick.
# #
if (WaitTillDead($pc, ($prepare ? 200 : 30)) == 0) { my $wtime = ($prepare ? $PREPAREWAIT : $PINGWAIT);
print STDERR "reboot ($pc): waiting for $wtime for reboot.\n" if $debug;
if (WaitTillDead($pc, $wtime) == 0) {
my $state = TBDB_NODESTATE_SHUTDOWN; my $state = TBDB_NODESTATE_SHUTDOWN;
TBSetNodeEventState($pc,$state); TBSetNodeEventState($pc,$state);
exit(0); exit(0);
...@@ -852,18 +896,25 @@ sub RebootNode { ...@@ -852,18 +896,25 @@ sub RebootNode {
# #
if (! $didipod) { if (! $didipod) {
info("$pc: ssh reboot failed ... sending ipod"); info("$pc: ssh reboot failed ... sending ipod");
print STDERR "*** reboot ($pc): ssh reboot failed, sending ipod\n"
if ($debug);
$UID = 0; $UID = 0;
system("$ipod $pc"); my $rv = $nodeobject->SendApod(1);
$UID = $oldUID; $UID = $oldUID;
if (WaitTillDead($pc, 20) == 0) { if ($rv == 0) {
my $state = TBDB_NODESTATE_SHUTDOWN; print STDERR "reboot ($pc): waiting for $PINGWAIT for ipod.\n"
TBSetNodeEventState($pc,$state); if $debug;
exit(0); if (WaitTillDead($pc, $PINGWAIT) == 0) {
my $state = TBDB_NODESTATE_SHUTDOWN;
TBSetNodeEventState($pc,$state);
exit(0);
}
} }
} }
info("$pc: ipod failed ... power cycle"); info("$pc: ipod failed ... power cycle");
print STDERR "*** reboot ($pc): ipod failed, will power cycle.\n" if $debug; print STDERR "*** reboot ($pc): ipod failed, will power cycle.\n"
if $debug;
exit(2); exit(2);
} }
...@@ -896,7 +947,7 @@ sub RebootVNode($$) { ...@@ -896,7 +947,7 @@ sub RebootVNode($$) {
if ($syspid) { if ($syspid) {
local $SIG{ALRM} = sub { kill("TERM", $syspid); }; local $SIG{ALRM} = sub { kill("TERM", $syspid); };
alarm 30; alarm $REBOOTVNODETIMO;
waitpid($syspid, 0); waitpid($syspid, 0);
alarm 0; alarm 0;
my $exitstatus = $?; my $exitstatus = $?;
...@@ -985,7 +1036,7 @@ sub WaitTillDead { ...@@ -985,7 +1036,7 @@ sub WaitTillDead {
# #
my $iters = int(($waittime + 1) / 2); my $iters = int(($waittime + 1) / 2);
for (my $i = 0; $i < $iters; $i++) { for (my $i = 0; $i < $iters; $i++) {
if (! DoesPing($pc, $i)) { if (! DoesPing($pc, $i, 0)) {
print STDERR "reboot ($pc): Died off.\n" if $debug > 1; print STDERR "reboot ($pc): Died off.\n" if $debug > 1;
return 0; return 0;
} }
...@@ -997,17 +1048,38 @@ sub WaitTillDead { ...@@ -997,17 +1048,38 @@ sub WaitTillDead {
# #
# Returns 1 if host is responding to pings, 0 otherwise. # Returns 1 if host is responding to pings, 0 otherwise.
# Pings for roughly two seconds. # Pings for roughly two seconds.
# If $immediate is set, return after the first successful ping.
# This routine is NOT allowed to do any DB queries! # This routine is NOT allowed to do any DB queries!
# #
sub DoesPing { sub DoesPing {
my ($pc, $index) = @_; my ($pc, $index, $immediate) = @_;
my $status; my $status;
my $saveuid; my $saveuid;
$saveuid = $UID; #
$UID = 0; # We fork/exec rather than system() for two reasons.
system("$ping -q -i 0.25 -c 9 -t 2 $pc >/dev/null 2>&1"); # One, is so that we don't have to flip the UID back and forth in
$UID = $saveuid; # the parent, and two, so that we can throw away stdout/stderr without
# an extra level of "sh -c" to setup redirection (">/dev/null 2>&1").
#
# XXX I am not sure either of these is particularly compelling,
# but when we can have literally hundreds of pending nodereboots
# outstanding at any time, it might matter.
#
my $child = fork();
if ($child == 0) {
my $args = "-q -i 0.25 -c 9 -t 2";
$args .= " -o" if ($immediate);
# get rid of output that -q doesn't
open(STDOUT, ">/dev/null");
open(STDERR, ">&STDOUT");
$UID = 0;
exec("$ping $args $pc");
exit(1);
}
waitpid($child, 0);
$status = $? >> 8; $status = $? >> 8;
# #
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment