#!/usr/bin/perl -wT use English; use Getopt::Std; # # Look for nodes to reload. # # usage: reload_daemon [-d] # # XXX - Hardwired to type "pc600". # Path to image and the partition are hardwired in. # # TODO: Use "logger" instead of writing a log file. # sub usage() { print STDOUT "Usage: reload_daemon [-d]\n" . "Use the -d option to prevent daemonization\n"; exit(-1); } my $optlist = "d"; # # Configure variables # my $TB = "@prefix@"; my $DBNAME = "@TBDBNAME@"; my $TBOPS = "@TBOPSEMAIL@"; # Testbed Support library use lib "@prefix@/lib"; use libdb; use libtestbed; # # This should not be hardwired in. # my $CLASS = "pc"; # XXX: Needs a better approach. # # These come from the library. # my $RELOADPID = NODERELOADING_PID; my $RELOADEID = NODERELOADING_EID; my $PENDINGEID = NODERELOADPENDING_EID; my $os_load = "$TB/bin/os_load"; my $sched_reload= "$TB/sbin/sched_reload"; my $reboot = "$TB/bin/node_reboot"; my $logfile = "$TB/log/reloadlog"; my $debug = 0; # # Turn off line buffering on output (dots ...). # $| = 1; # # Untaint the path # $ENV{'PATH'} = "/bin:/usr/bin:"; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; # # Parse command arguments. Once we return from getopts, all that should be # left are the required arguments. # %options = (); if (! getopts($optlist, \%options)) { usage(); } if (@ARGV != 0) { usage(); } if (defined($options{"d"})) { $debug = $options{"d"}; } # Go to ground. if (! $debug) { daemonize(); } # # Loop, looking for nodes to reload. # while (1) { my($count, $which, @row, %hrow, $imageid, $node, $retry, $stamp); my($pid, $eid, $type, $reload_type); # # Partial delay between loops in case of an error. # sleep(10); # # Find all of the free node that have not been reloaded (no pid entry # in last_reservation, which is reset anytime a node is reloaded by # the system). # $query_result = DBQueryWarn("select a.node_id,b.pid,b.eid,n.type from nodes as a ". "left join reserved as b on a.node_id=b.node_id ". "left join last_reservation as l on l.node_id=a.node_id ". "left join node_types as n on n.type=a.type where ". "(b.node_id is null and n.class='$CLASS' and l.pid!='') ". "or (b.pid='$RELOADPID' and b.eid='$PENDINGEID') ". "order by a.node_id"); if (! $query_result) { print "DB Error getting free nodes. Waiting a bit.\n"; next; } $count = $query_result->numrows; if (! $count) { next; } # # RAND() does not work in our version of mysql, so generate a random # number with perl and pick out that node. # $which = int(rand($count)); $query_result->dataseek($which); %hrow = $query_result->fetchhash(); $node = $hrow{'node_id'}; $pid = $hrow{'pid'}; $eid = $hrow{'eid'}; $type = $hrow{'type'}; print "Trying to reload $node ... \n"; # # What we do depends on whether its a free node or a node reserved # into the reload pending experiment. # if ((defined($pid) && $pid eq $RELOADPID) && (defined($eid) && $eid eq $PENDINGEID)) { # # Query for the imageid from the reloads table. Note that there # has to be one since the only way this node got into the pending # EID was when nfree noticed an entry in the table. # $query_result = DBQueryWarn("select image_id, reload_type from scheduled_reloads " . "where node_id='$node'"); if (! $query_result) { print "DB Error getting imageid from scheduled_reloads. ". "Waiting a bit.\n"; next; } @row = $query_result->fetchrow_array(); $imageid = $row[0]; $reload_type= $row[1]; # # The node is reserved into the special pid/eid, as the result # of a sched_reload while it was still allocated to an experiment. # We change the reservation EID over and fire up an os_load # directly. # if (! DBQueryWarn("update reserved set eid='$RELOADEID' where ". "node_id='$node'")) { print "Could not update EID for $node. Waiting a bit.\n"; next; } # # We add flags to our os_load call depending on the type of reload # the user asked for. We basically need a case for each type. # my $os_load_flags = ""; if ($reload_type eq 'frisbee') { $os_load_flags .= " -r"; } elsif ($reload_type eq 'netdisk') { # Nothing special to do } else { fatal("Found a reload type I don't know how to handle: $reload_type"); } if (system("$os_load $os_load_flags -i $imageid $node")) { # # This should not fail! # fatal("$os_load failed on $node. Thats not supposed to happen.\n". "Please check the reload daemon log before restarting!"); } } else { # # Call sched_reload with the "force" option, which says that if # sched_reload cannot reserve the node (cause someone just got it) # then don't schedule a reload for later. Just fail outright. # We will try again in a bit. # # We do not need to specify an imageid, since we want the node # default, and sched_reload will pick that up from the database # in the absence of a -i option. # if (system("$sched_reload -f $node")) { # # Could not get it. Wait and go around again. # print "$sched_reload failed on $node. Waiting a bit.\n"; next; } } $stamp = DBDateTime(); print "Reload of $node has started at $stamp.\n"; # # For Frisbee reloads, we don't wait for the node to finish reloading, # since the whole point is to let many nodes load at once. # if ($reload_type eq "frisbee") { print "Not waiting for frisbee reload of $node.\n"; next; } # # Reload was started. We want to wait until its finished. # $retry = 0; again: $count = 0; while ($count < 130) { $query_result = DBQueryWarn("select pid,eid from reserved where node_id='$node'"); if (! $query_result) { print "DB Error getting reservation for $node. Waiting a bit\n"; next; } $stamp = DBDateTime(); if (! $query_result->numrows) { print "\nReload of $node appears to have finished at $stamp.\n"; last; } # # Make sure its still in the "reloading" experiment. Its possible # (although unlikely) that the node will get freed up by the TMCD # when it reboots, and then reallocated to another experiment, # before we get back here to check. # @row = $query_result->fetchrow_array(); if ($row[0] ne "$RELOADPID" || $row[1] ne "$RELOADEID") { print "\nReload of $node has finished at $stamp.\n"; last; } print "."; if (($count % 70) == 0) { print "\n"; } $count++; sleep(5); } if ($count == 130) { if ($retry) { fatal("$node appears to have wedged. Stopping reload daemon."); } if (system("$reboot -f $node")) { fatal("$node was wedged, but could not be power cycled."); } print "\nReload appears wedged. Power cycling and trying once more!\n"; $retry = 1; goto again; } sleep(20); } sub fatal($) { local($msg) = $_[0]; SENDMAIL($TBOPS, "TESTBED: Reload Daemon Died", $msg); die($msg); } sub notify($) { my($mesg) = $_[0]; print "$mesg\n"; SENDMAIL($TBOPS, "TESTBED: Reload Daemon Message", $msg); } # # Become a daemon. # sub daemonize() { my $mypid = fork(); if ($mypid) { exit(0); } # # We have to disconnect from the caller by redirecting both STDIN and # STDOUT away from the pipe. Otherwise the caller will continue to wait # even though the parent has exited. # open(STDIN, "< /dev/null") or die("opening /dev/null for STDIN: $!"); # # Open the batch log and start writing to it. # open(STDERR, ">> $logfile") or die("opening $logfile for STDERR: $!"); open(STDOUT, ">> $logfile") or die("opening $logfile for STDOUT: $!"); return 0; }