#!/usr/bin/perl -wT # # EMULAB-COPYRIGHT # Copyright (c) 2000-2002 University of Utah and the Flux Group. # All rights reserved. # use English; use Getopt::Std; # # This should run as root to make sure that it has permission to reboot nodes # (since only root is allowed to power cycle nodes at any time - it's time- # limited for anyone else) # if ($UID != 0) { die "This should only be run as root!\n"; } # # Look for nodes to reload. # # usage: reload_daemon [-d] # # TODO: Use "logger" instead of writing a log file. # sub usage() { print STDOUT "Usage: reload_daemon [-d]\n" . "Use the -d option to prevent daemonization\n"; exit(-1); } my $optlist = "d"; # # Configure variables # my $TB = "@prefix@"; my $DBNAME = "@TBDBNAME@"; my $TBOPS = "@TBOPSEMAIL@"; # Testbed Support library use lib "@prefix@/lib"; use libdb; use libtestbed; # # These come from the library. # my $RELOADPID = NODERELOADING_PID; my $RELOADEID = NODERELOADING_EID; my $PENDINGEID = NODERELOADPENDING_EID; my $os_load = "$TB/bin/os_load -s"; my $sched_reload= "$TB/sbin/sched_reload"; my $reboot = "$TB/bin/node_reboot"; my $logfile = "$TB/log/reloadlog"; my $debug = 0; my $retry_time = 10; # in minutes my $warn_time = 30; # in minutes # # Turn off line buffering on output (dots ...). # $| = 1; # # Untaint the path # $ENV{'PATH'} = "/bin:/usr/bin:"; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; # # Parse command arguments. Once we return from getopts, all that should be # left are the required arguments. # %options = (); if (! getopts($optlist, \%options)) { usage(); } if (@ARGV != 0) { usage(); } if (defined($options{"d"})) { $debug = $options{"d"}; } # Go to ground. if (! $debug) { daemonize(); } print "Reload Daemon starting... pid $$\n"; # # Loop, looking for nodes to reload. # while (1) { my($count, $which, @row, %hrow, $imageid, $node, $retry, $stamp); my($pid, $eid); # # Partial delay between loops in case of an error. # sleep(1); # # We use this to figure out when to delete nodes from the retried and # warned hashes # my $time = time(); # # First, look for nodes that have been in the reloading experiment for # longer than $retry_time, and try rebooting them # $query_result = DBQueryWarn("select node_id from reserved where pid='$RELOADPID' " . "and eid='$RELOADEID' and " . "(CURRENT_TIMESTAMP - INTERVAL $retry_time MINUTE) ". " > rsrv_time"); if (! $query_result) { print "DB Error. Waiting a bit.\n"; next; } while (($node) = $query_result->fetchrow){ if (!$retried{$node}) { print "\nReload appears wedged at ".`date`. "Power cycling and trying once more!\n"; if (system("$reboot -f $node")) { fatal("$node was wedged, but could not be power cycled.\n"); } } $retried{$node} = $time; } # # We can pull out all nodes that were not 'touched' (matched by the # select above) during this pass # foreach $node (keys %retried) { if ($retried{$node} != $time) { delete $retried{$node}; } } # # Next, we do the same thing for nodes in the reloading experiment for # longer than $warn_time, and warn the admins. # $query_result = DBQueryWarn("select node_id from reserved where pid='$RELOADPID' " . "and eid='$RELOADEID' and " . "(CURRENT_TIMESTAMP - INTERVAL $warn_time MINUTE) > ". " rsrv_time"); if (! $query_result) { print "DB Error. Waiting a bit.\n"; next; } while (($node) = $query_result->fetchrow){ if (!$warned{$node}) { notify("Node $node has been in $RELOADPID/$RELOADEID for " . "more than $warn_time minutes"); } $warned{$node} = $time; } # # We can pull out all nodes that were not 'touched' (matched by the # select above) during this pass # foreach $node (keys %warned) { if ($warned{$node} != $time) { delete $warned{$node}; } } # # Find all of the free nodes that have not been reloaded (no pid entry # in last_reservation, which is reset anytime a node is reloaded by # the system). # # XXX - This should not be hardwired in. # my $CLASSCLAUSE = "(n.class='pc' or n.class='pct')"; $query_result = DBQueryWarn("select a.node_id,b.pid,b.eid from nodes as a ". "left join reserved as b on a.node_id=b.node_id ". "left join last_reservation as l on l.node_id=a.node_id ". "left join node_types as n on n.type=a.type where ". "(b.node_id is null and $CLASSCLAUSE and l.pid!='') ". "or (b.pid='$RELOADPID' and b.eid='$PENDINGEID') ". "order by a.node_id"); if (! $query_result) { print "DB Error. Waiting a bit.\n"; next; } $count = $query_result->numrows; if (! $count) { next; } # # RAND() does not work in our version of mysql, so generate a random # number with perl and pick out that node. # $which = int(rand($count)); $query_result->dataseek($which); %hrow = $query_result->fetchhash(); $node = $hrow{'node_id'}; $pid = $hrow{'pid'}; $eid = $hrow{'eid'}; print "Trying to reload $node ... \n"; # # What we do depends on whether its a free node or a node reserved # into the reload pending experiment. # if ((defined($pid) && $pid eq $RELOADPID) && (defined($eid) && $eid eq $PENDINGEID)) { # # Query for the imageid from the reloads table. # $query_result = DBQueryWarn("select image_id from scheduled_reloads " . "where node_id='$node'"); if ((! $query_result) || (!$query_result->numrows())) { # # If this node didn't make it into the scheduled_reloads table # for some reason, then we load it with the default image and # type # $imageid = ""; } else { @row = $query_result->fetchrow_array(); $imageid = $row[0]; } # # The node is reserved into the special pid/eid, as the result # of a sched_reload while it was still allocated to an experiment. # We change the reservation EID over and fire up an os_load # directly. # if (! DBQueryWarn("update reserved set eid='$RELOADEID' where ". "node_id='$node'")) { print "Could not update EID for $node. Waiting a bit.\n"; next; } my $os_load_flags = ""; # # We only add the -m flag to os_load if we found a specific image # above. Omitting it causes os_load to pick the default image for # the node's type # if ($imageid) { $os_load_flags .= " -m $imageid "; } if (system("$os_load $os_load_flags $node")) { # # This should not fail! # fatal("$os_load failed on $node. That's not supposed to happen.\n". "Please check the reload daemon log before restarting!\n"); } } else { # # Call sched_reload with the "force" option, which says that if # sched_reload cannot reserve the node (cause someone just got it) # then don't schedule a reload for later. Just fail outright. # We will try again in a bit. # # We do not need to specify an imageid, since we want the node # default, and sched_reload will pick that up from the database # in the absence of a -i option. # if (system("$sched_reload -f $node")) { # # Could not get it. Wait and go around again. # print "$sched_reload failed on $node. Waiting a bit.\n"; next; } } $stamp = DBDateTime(); print "Reload of $node has started at $stamp.\n"; # # For Frisbee reloads, we don't wait for the node to finish reloading, # since the whole point is to let many nodes load at once. # #if ($reload_type eq TB_RELOADTYPE_FRISBEE) { print "Not waiting for frisbee reload of $node.\n"; next; #} # # START OBSOLETE SECTION # # # Reload was started. We want to wait until its finished. # $retry = 0; again: $count = 0; while ($count < 130) { $query_result = DBQueryWarn("select pid,eid from reserved where node_id='$node'"); if (! $query_result) { print "DB Error getting reservation for $node. Waiting a bit\n"; next; } $stamp = DBDateTime(); if (! $query_result->numrows) { print "\nReload of $node appears to have finished at $stamp.\n"; last; } # # Make sure its still in the "reloading" experiment. Its possible # (although unlikely) that the node will get freed up by the TMCD # when it reboots, and then reallocated to another experiment, # before we get back here to check. # @row = $query_result->fetchrow_array(); if ($row[0] ne "$RELOADPID" || $row[1] ne "$RELOADEID") { print "\nReload of $node has finished at $stamp.\n"; last; } print "."; if (($count % 70) == 0) { print "\n"; } $count++; sleep(5); } if ($count == 130) { if ($retry) { fatal("$node appears to have wedged. Stopping reload daemon.\n"); } if (system("$reboot -f $node")) { fatal("$node was wedged, but could not be power cycled.\n"); } print "\nReload appears wedged. Power cycling and trying once more!\n"; $retry = 1; goto again; } sleep(10); # # END OBSOLETE SECTION # } sub fatal($) { local($msg) = $_[0]; SENDMAIL($TBOPS, "Reload Daemon Died", $msg, $TBOPS); die($msg); } sub notify($) { my($mesg) = $_[0]; print "$mesg\n"; SENDMAIL($TBOPS, "Reload Daemon Message", $mesg, $TBOPS); } # # Become a daemon. # sub daemonize() { my $mypid = fork(); if ($mypid) { exit(0); } # # We have to disconnect from the caller by redirecting both STDIN and # STDOUT away from the pipe. Otherwise the caller will continue to wait # even though the parent has exited. # open(STDIN, "< /dev/null") or die("opening /dev/null for STDIN: $!"); # # Open the batch log and start writing to it. # open(STDERR, ">> $logfile") or die("opening $logfile for STDERR: $!"); open(STDOUT, ">> $logfile") or die("opening $logfile for STDOUT: $!"); return 0; }