reload_daemon.in 7.62 KB
Newer Older
1 2 3 4 5 6 7 8 9
#!/usr/bin/perl -wT
use English;
use Getopt::Std;

#
# Look for nodes to reload.
#
#	usage: reload_daemon [-d]
#
10
# XXX - Hardwired to type "pc600".
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
#       Path to image and the partition are hardwired in.
#
# TODO: Use "logger" instead of writing a log file.
#
sub usage()
{
    print STDOUT "Usage: reload_daemon [-d]\n" .
	"Use the -d option to prevent daemonization\n";
    exit(-1);
}
my  $optlist = "d";

#
# Configure variables
#
my $TB       = "@prefix@";
my $DBNAME   = "@TBDBNAME@";
my $TBOPS    = "@TBOPSEMAIL@";

30 31 32 33 34
# Testbed Support library
use lib "@prefix@/lib";
use libdb;
use libtestbed;

35
#
36
# This should not be hardwired in.
37 38
# 
my $CLASS	= "pc";	 # XXX: Needs a better approach.
39 40 41 42 43 44 45

#
# These come from the library.
# 
my $RELOADPID	= NODERELOADING_PID;
my $RELOADEID	= NODERELOADING_EID;
my $PENDINGEID	= NODERELOADPENDING_EID;
46 47 48 49 50 51

my $os_load	= "$TB/bin/os_load";
my $sched_reload= "$TB/sbin/sched_reload";
my $reboot	= "$TB/bin/node_reboot";
my $logfile	= "$TB/log/reloadlog";
my $debug	= 0;
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87

#
# Turn off line buffering on output (dots ...).
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/usr/bin:";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 0) {
    usage();
}
if (defined($options{"d"})) {
    $debug = $options{"d"};
}

# Go to ground.
if (! $debug) {
    daemonize();
}

#
# Loop, looking for nodes to reload.
# 
while (1) {
88
    my($count, $which, @row, %hrow, $imageid, $node, $retry, $stamp);
89
    my($pid, $eid, $type, $reload_type);
90 91 92 93 94

    #
    # Partial delay between loops in case of an error.
    # 
    sleep(10);
95 96 97 98 99 100 101
    
    #
    # Find all of the free node that have not been reloaded (no pid entry
    # in last_reservation, which is reset anytime a node is reloaded by
    # the system).
    #
    $query_result =
102 103 104 105 106 107 108 109
	DBQueryWarn("select a.node_id,b.pid,b.eid,n.type from nodes as a ".
		    "left join reserved as b on a.node_id=b.node_id ".
		    "left join last_reservation as l on l.node_id=a.node_id ".
		    "left join node_types as n on n.type=a.type where ".
		    "(b.node_id is null and n.class='$CLASS' and l.pid!='') ".
		    "or (b.pid='$RELOADPID' and b.eid='$PENDINGEID') ".
		    "order by a.node_id");

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
    if (! $query_result) {
	print "DB Error getting free nodes. Waiting a bit.\n";
	next;
    }
    $count = $query_result->numrows;

    if (! $count) {
	next;
    }

    #
    # RAND() does not work in our version of mysql, so generate a random
    # number with perl and pick out that node.
    #
    $which = int(rand($count));
    $query_result->dataseek($which);
126 127 128 129 130 131 132
    %hrow  = $query_result->fetchhash();
    $node  = $hrow{'node_id'};
    $pid   = $hrow{'pid'};
    $eid   = $hrow{'eid'};
    $type  = $hrow{'type'};

    print "Trying to reload $node ... \n";
133 134

    #
135 136
    # What we do depends on whether its a free node or a node reserved
    # into the reload pending experiment.
137
    #
138 139 140 141 142 143 144 145
    if ((defined($pid) && $pid eq $RELOADPID) &&
	(defined($eid) && $eid eq $PENDINGEID)) {
	#
	# Query for the imageid from the reloads table. Note that there
	# has to be one since the only way this node got into the pending
	# EID was when nfree noticed an entry in the table.
	#
	$query_result =
146 147
	    DBQueryWarn("select image_id, reload_type from scheduled_reloads " .
			"where node_id='$node'");
148
    
149
	if (! $query_result) {
150 151
	    print "DB Error getting imageid from scheduled_reloads. ".
		"Waiting a bit.\n";
152 153 154 155
	    next;
	}
	@row     = $query_result->fetchrow_array();
	$imageid = $row[0];
156
	$reload_type= $row[1];
157 158 159 160 161 162 163 164 165 166 167 168
	
	#
	# The node is reserved into the special pid/eid, as the result
	# of a sched_reload while it was still allocated to an experiment.
	# We change the reservation EID over and fire up an os_load
	# directly.
	#
	if (! DBQueryWarn("update reserved set eid='$RELOADEID' where ".
			  "node_id='$node'")) {
	    print "Could not update EID for $node. Waiting a bit.\n";
	    next;
	}
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
	
	#
	# We add flags to our os_load call depending on the type of reload
	# the user asked for. We basically need a case for each type.
	#
	my $os_load_flags = "";
	if ($reload_type eq 'frisbee') {
	    $os_load_flags .= " -r";
	} elsif ($reload_type eq 'netdisk') {
	    # Nothing special to do
	} else {
	    fatal("Found a reload type I don't know how to handle: $reload_type");
	}

	if (system("$os_load $os_load_flags -i $imageid $node")) {
184
	    #
185
	    # This should not fail! 
186
	    #
187 188
	    fatal("$os_load failed on $node. Thats not supposed to happen.\n".
		  "Please check the reload daemon log before restarting!");
189
	}
190
    }
191
    else {
192
	#
193 194 195 196
	# Call sched_reload with the "force" option, which says that if
	# sched_reload cannot reserve the node (cause someone just got it)
	# then don't schedule a reload for later. Just fail outright.
	# We will try again in a bit.
197
	#
198 199 200 201 202
	# We do not need to specify an imageid, since we want the node
	# default, and sched_reload will pick that up from the database
	# in the absence of a -i option. 
	#
	if (system("$sched_reload -f $node")) {
203 204 205 206 207 208
	    #
	    # Could not get it. Wait and go around again.
	    #
	    print "$sched_reload failed on $node. Waiting a bit.\n";
	    next;
	}
209
    }
210
    $stamp = DBDateTime();
211 212
	
    print "Reload of $node has started at $stamp.\n";
213

214 215 216 217 218 219 220 221 222
    #
    # For Frisbee reloads, we don't wait for the node to finish reloading,
    # since the whole point is to let many nodes load at once.
    #
    if ($reload_type eq "frisbee") {
	print "Not waiting for frisbee reload of $node.\n";
	next;
    }

223 224 225
    #
    # Reload was started. We want to wait until its finished.
    #
226 227
    $retry = 0;
  again:
228
    $count = 0;
229
    while ($count < 130) {
230
	$query_result =
231
	    DBQueryWarn("select pid,eid from reserved where node_id='$node'");
232 233 234 235
	if (! $query_result) {
	    print "DB Error getting reservation for $node. Waiting a bit\n";
	    next;
	}
236
	$stamp = DBDateTime();
237

238
	if (! $query_result->numrows) {
239
	    print "\nReload of $node appears to have finished at $stamp.\n";
240 241 242 243 244 245 246 247 248
	    last;
	}
	#
	# Make sure its still in the "reloading" experiment. Its possible
	# (although unlikely) that the node will get freed up by the TMCD
	# when it reboots, and then reallocated to another experiment,
	# before we get back here to check.
	#
	@row  = $query_result->fetchrow_array();
249
	if ($row[0] ne "$RELOADPID" || $row[1] ne "$RELOADEID") {
250
	    print "\nReload of $node has finished at $stamp.\n";
251 252 253
	    last;
	}
	print ".";
254 255 256
	if (($count % 70) == 0) {
	    print "\n";
	}
257 258 259
	$count++;
	sleep(5);
    }
260
    if ($count == 130) {
261 262 263 264 265 266 267 268 269
	if ($retry) {
	    fatal("$node appears to have wedged. Stopping reload daemon.");
	}
	if (system("$reboot -f $node")) {
	    fatal("$node was wedged, but could not be power cycled.");
	}
	print "\nReload appears wedged. Power cycling and trying once more!\n";
	$retry = 1;
	goto again;
270
    }
271
    sleep(20);
272 273
}

274 275
sub fatal($)
{
276 277
    local($msg) = $_[0];

278
    SENDMAIL($TBOPS, "TESTBED: Reload Daemon Died", $msg);
279 280 281
    die($msg);
}

282 283 284 285 286 287 288 289
sub notify($)
{
    my($mesg) = $_[0];
    
    print "$mesg\n";
    SENDMAIL($TBOPS, "TESTBED: Reload Daemon Message", $msg);
}

290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
#
# Become a daemon.
# 
sub daemonize()
{
    my $mypid = fork();
    if ($mypid) {
	exit(0);
    }

    #
    # We have to disconnect from the caller by redirecting both STDIN and
    # STDOUT away from the pipe. Otherwise the caller will continue to wait
    # even though the parent has exited. 
    #
    open(STDIN, "< /dev/null") or
	die("opening /dev/null for STDIN: $!");

    #
    # Open the batch log and start writing to it. 
    #
    open(STDERR, ">> $logfile") or die("opening $logfile for STDERR: $!");
    open(STDOUT, ">> $logfile") or die("opening $logfile for STDOUT: $!");

    return 0;
}