#!/usr/bin/perl -wT use English; use Getopt::Std; # # Look for nodes to reload. # # usage: reload_daemon [-d] # # TODO: Use "logger" instead of writing a log file. # sub usage() { print STDOUT "Usage: reload_daemon [-d]\n" . "Use the -d option to prevent daemonization\n"; exit(-1); } my $optlist = "d"; # # Configure variables # my $TB = "@prefix@"; my $DBNAME = "@TBDBNAME@"; my $TBOPS = "@TBOPSEMAIL@"; # Testbed Support library use lib "@prefix@/lib"; use libdb; use libtestbed; # # This should not be hardwired in. # my $CLASS = "pc"; # XXX: Needs a better approach. # # These come from the library. # my $RELOADPID = NODERELOADING_PID; my $RELOADEID = NODERELOADING_EID; my $PENDINGEID = NODERELOADPENDING_EID; my $os_load = "$TB/bin/os_load"; my $sched_reload= "$TB/sbin/sched_reload"; my $reboot = "$TB/bin/node_reboot"; my $logfile = "$TB/log/reloadlog"; my $debug = 0; my $warn_time = 30; # in minutes # # Turn off line buffering on output (dots ...). # $| = 1; # # Untaint the path # $ENV{'PATH'} = "/bin:/usr/bin:"; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; # # Parse command arguments. Once we return from getopts, all that should be # left are the required arguments. # %options = (); if (! getopts($optlist, \%options)) { usage(); } if (@ARGV != 0) { usage(); } if (defined($options{"d"})) { $debug = $options{"d"}; } # Go to ground. if (! $debug) { daemonize(); } # # Loop, looking for nodes to reload. # while (1) { my($count, $which, @row, %hrow, $imageid, $node, $retry, $stamp); my($pid, $eid, $reload_type); # # Partial delay between loops in case of an error. # sleep(10); # # Check for nodes in the reloading experiment. Notify the admins if # any node stays in the reloading experiment for longer than $warn_time # $query_result = DBQueryWarn("select node_id from reserved where pid='$RELOADPID' " . "and eid='$RELOADEID' and " . "(CURRENT_TIMESTAMP - INTERVAL $warn_time MINUTE) > rsrv_time"); while (($node) = $query_result->fetchrow){ if (!$warned{$node}) { notify("Node $node has been in $RELOADPID/$RELOADEID for " . "more than $warn_time minutes"); $warned{$node} = 1; } } # # Find all of the free node that have not been reloaded (no pid entry # in last_reservation, which is reset anytime a node is reloaded by # the system). # $query_result = DBQueryWarn("select a.node_id,b.pid,b.eid from nodes as a ". "left join reserved as b on a.node_id=b.node_id ". "left join last_reservation as l on l.node_id=a.node_id ". "left join node_types as n on n.type=a.type where ". "(b.node_id is null and n.class='$CLASS' and l.pid!='') ". "or (b.pid='$RELOADPID' and b.eid='$PENDINGEID') ". "order by a.node_id"); if (! $query_result) { print "DB Error getting free nodes. Waiting a bit.\n"; next; } $count = $query_result->numrows; if (! $count) { next; } # # RAND() does not work in our version of mysql, so generate a random # number with perl and pick out that node. # $which = int(rand($count)); $query_result->dataseek($which); %hrow = $query_result->fetchhash(); $node = $hrow{'node_id'}; $pid = $hrow{'pid'}; $eid = $hrow{'eid'}; print "Trying to reload $node ... \n"; # # What we do depends on whether its a free node or a node reserved # into the reload pending experiment. # if ((defined($pid) && $pid eq $RELOADPID) && (defined($eid) && $eid eq $PENDINGEID)) { # # Query for the imageid from the reloads table. # $query_result = DBQueryWarn("select image_id, reload_type from scheduled_reloads " . "where node_id='$node'"); if ((! $query_result) || (!$query_result->numrows())) { # # If this node didn't make it into the scheduled_reloads table # for some reason, then we load it with the default image and # type # $imageid = ""; $reload_type = TB_DEFAULT_RELOADTYPE; } else { @row = $query_result->fetchrow_array(); $imageid = $row[0]; $reload_type= $row[1]; } # # The node is reserved into the special pid/eid, as the result # of a sched_reload while it was still allocated to an experiment. # We change the reservation EID over and fire up an os_load # directly. # if (! DBQueryWarn("update reserved set eid='$RELOADEID' where ". "node_id='$node'")) { print "Could not update EID for $node. Waiting a bit.\n"; next; } # # We add flags to our os_load call depending on the type of reload # the user asked for. We basically need a case for each type. # my $os_load_flags = ""; if ($reload_type eq TB_RELOADTYPE_FRISBEE) { $os_load_flags .= " -r"; } elsif ($reload_type eq TB_RELOADTYPE_NETDISK) { $os_load_flags .= " -n"; } else { fatal("Found a reload type I don't know how to handle: $reload_type"); } # # We only add the -i flag to os_load if we found a specific image # above. Omitting it causes os_load to pick the default image for # the node's type # if ($imageid) { $os_load_flags .= " -i $imageid "; } if (system("$os_load $os_load_flags $node")) { # # This should not fail! # fatal("$os_load failed on $node. Thats not supposed to happen.\n". "Please check the reload daemon log before restarting!"); } } else { # # Call sched_reload with the "force" option, which says that if # sched_reload cannot reserve the node (cause someone just got it) # then don't schedule a reload for later. Just fail outright. # We will try again in a bit. # # We do not need to specify an imageid, since we want the node # default, and sched_reload will pick that up from the database # in the absence of a -i option. # if (system("$sched_reload -f $node")) { # # Could not get it. Wait and go around again. # print "$sched_reload failed on $node. Waiting a bit.\n"; next; } $reload_type = TB_DEFAULT_RELOADTYPE; } $stamp = DBDateTime(); print "Reload of $node has started at $stamp.\n"; # # For Frisbee reloads, we don't wait for the node to finish reloading, # since the whole point is to let many nodes load at once. # if ($reload_type eq TB_RELOADTYPE_FRISBEE) { print "Not waiting for frisbee reload of $node.\n"; next; } # # Reload was started. We want to wait until its finished. # $retry = 0; again: $count = 0; while ($count < 130) { $query_result = DBQueryWarn("select pid,eid from reserved where node_id='$node'"); if (! $query_result) { print "DB Error getting reservation for $node. Waiting a bit\n"; next; } $stamp = DBDateTime(); if (! $query_result->numrows) { print "\nReload of $node appears to have finished at $stamp.\n"; last; } # # Make sure its still in the "reloading" experiment. Its possible # (although unlikely) that the node will get freed up by the TMCD # when it reboots, and then reallocated to another experiment, # before we get back here to check. # @row = $query_result->fetchrow_array(); if ($row[0] ne "$RELOADPID" || $row[1] ne "$RELOADEID") { print "\nReload of $node has finished at $stamp.\n"; last; } print "."; if (($count % 70) == 0) { print "\n"; } $count++; sleep(5); } if ($count == 130) { if ($retry) { fatal("$node appears to have wedged. Stopping reload daemon."); } if (system("$reboot -f $node")) { fatal("$node was wedged, but could not be power cycled."); } print "\nReload appears wedged. Power cycling and trying once more!\n"; $retry = 1; goto again; } sleep(20); } sub fatal($) { local($msg) = $_[0]; SENDMAIL($TBOPS, "TESTBED: Reload Daemon Died", $msg); die($msg); } sub notify($) { my($mesg) = $_[0]; print "$mesg\n"; SENDMAIL($TBOPS, "TESTBED: Reload Daemon Message", $mesg); } # # Become a daemon. # sub daemonize() { my $mypid = fork(); if ($mypid) { exit(0); } # # We have to disconnect from the caller by redirecting both STDIN and # STDOUT away from the pipe. Otherwise the caller will continue to wait # even though the parent has exited. # open(STDIN, "< /dev/null") or die("opening /dev/null for STDIN: $!"); # # Open the batch log and start writing to it. # open(STDERR, ">> $logfile") or die("opening $logfile for STDERR: $!"); open(STDOUT, ">> $logfile") or die("opening $logfile for STDOUT: $!"); return 0; }