reload_daemon.in 9.77 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4 5 6 7 8

#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2002 University of Utah and the Flux Group.
# All rights reserved.
#

9 10 11
use English;
use Getopt::Std;

12 13 14 15 16 17 18 19 20
#
# This should run as root to make sure that it has permission to reboot nodes
# (since only root is allowed to power cycle nodes at any time - it's time-
# limited for anyone else)
#
if ($UID != 0) {
    die "This should only be run as root!\n";
}

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
#
# Look for nodes to reload.
#
#	usage: reload_daemon [-d]
#
# TODO: Use "logger" instead of writing a log file.
#
sub usage()
{
    print STDOUT "Usage: reload_daemon [-d]\n" .
	"Use the -d option to prevent daemonization\n";
    exit(-1);
}
my  $optlist = "d";

#
# Configure variables
#
my $TB       = "@prefix@";
my $DBNAME   = "@TBDBNAME@";
my $TBOPS    = "@TBOPSEMAIL@";

43 44 45 46 47 48 49 50 51 52 53
# Testbed Support library
use lib "@prefix@/lib";
use libdb;
use libtestbed;

#
# These come from the library.
# 
my $RELOADPID	= NODERELOADING_PID;
my $RELOADEID	= NODERELOADING_EID;
my $PENDINGEID	= NODERELOADPENDING_EID;
54

Leigh B. Stoller's avatar
Leigh B. Stoller committed
55
my $os_load	= "$TB/bin/os_load -s";
56 57 58 59
my $sched_reload= "$TB/sbin/sched_reload";
my $reboot	= "$TB/bin/node_reboot";
my $logfile	= "$TB/log/reloadlog";
my $debug	= 0;
60
my $retry_time  = 10; # in minutes
61
my $warn_time   = 30; # in minutes
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93

#
# Turn off line buffering on output (dots ...).
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/usr/bin:";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 0) {
    usage();
}
if (defined($options{"d"})) {
    $debug = $options{"d"};
}

# Go to ground.
if (! $debug) {
    daemonize();
}

94 95
print "Reload Daemon starting... pid $$\n";

96 97 98 99
#
# Loop, looking for nodes to reload.
# 
while (1) {
100
    my($count, $which, @row, %hrow, $imageid, $node, $retry, $stamp);
101
    my($pid, $eid);
102 103 104 105

    #
    # Partial delay between loops in case of an error.
    # 
106
    sleep(1);
107

108 109 110 111 112 113
    #
    # We use this to figure out when to delete nodes from the retried and
    # warned hashes
    #
    my $time = time();

114
    #
115 116 117 118 119 120
    # First, look for nodes that have been in the reloading experiment for
    # longer than $retry_time, and try rebooting them
    #
    $query_result =
	DBQueryWarn("select node_id from reserved where pid='$RELOADPID' " .
		    "and eid='$RELOADEID' and " .
121 122 123 124 125 126 127 128
		    "(CURRENT_TIMESTAMP - INTERVAL $retry_time MINUTE) ".
		    "  > rsrv_time");

    if (! $query_result) {
	print "DB Error. Waiting a bit.\n";
	next;
    }

129 130
    while (($node) = $query_result->fetchrow){ 
	if (!$retried{$node}) {
131 132 133
	    print "\nReload appears wedged at ".`date`.
		"Power cycling and trying once more!\n";
		
134
	    if (system("$reboot -f $node")) {
135
		fatal("$node was wedged, but could not be power cycled.\n");
136
	    }
137 138 139 140 141
	}
	$retried{$node} = $time;
    }

    #
142 143
    # We can pull out all nodes that were not 'touched' (matched by the
    # select above) during this pass
144 145 146 147
    #
    foreach $node (keys %retried) {
	if ($retried{$node} != $time) {
	    delete $retried{$node};
148 149 150 151 152 153
	}
    }

    #
    # Next, we do the same thing for nodes in the reloading experiment for
    # longer than $warn_time, and warn the admins.
154 155 156
    #
    $query_result =
	DBQueryWarn("select node_id from reserved where pid='$RELOADPID' " .
157
		    "and eid='$RELOADEID' and " .
158 159 160 161 162 163 164 165
		    "(CURRENT_TIMESTAMP - INTERVAL $warn_time MINUTE) > ".
		    "   rsrv_time");
    
    if (! $query_result) {
	print "DB Error. Waiting a bit.\n";
	next;
    }

166 167 168
    while (($node) = $query_result->fetchrow){ 
	if (!$warned{$node}) {
	    notify("Node $node has been in $RELOADPID/$RELOADEID for " .
169
	    "more than $warn_time minutes");
170 171 172 173 174
	}
	$warned{$node} = $time;
    }

    #
175 176
    # We can pull out all nodes that were not 'touched' (matched by the
    # select above) during this pass
177 178 179 180
    #
    foreach $node (keys %warned) {
	if ($warned{$node} != $time) {
	    delete $warned{$node};
181 182 183
	}
    }

184
    #
185
    # Find all of the free nodes that have not been reloaded (no pid entry
186 187 188
    # in last_reservation, which is reset anytime a node is reloaded by
    # the system).
    #
189 190 191 192
    # XXX - This should not be hardwired in.
    # 
    my $CLASSCLAUSE = "(n.class='pc' or n.class='pct')";
    
193
    $query_result =
194
	DBQueryWarn("select a.node_id,b.pid,b.eid from nodes as a ".
195 196 197
		    "left join reserved as b on a.node_id=b.node_id ".
		    "left join last_reservation as l on l.node_id=a.node_id ".
		    "left join node_types as n on n.type=a.type where ".
198
		    "(b.node_id is null and $CLASSCLAUSE and l.pid!='') ".
199 200 201
		    "or (b.pid='$RELOADPID' and b.eid='$PENDINGEID') ".
		    "order by a.node_id");

202
    if (! $query_result) {
203
	print "DB Error. Waiting a bit.\n";
204 205 206 207 208 209 210 211 212 213 214 215 216 217
	next;
    }
    $count = $query_result->numrows;

    if (! $count) {
	next;
    }

    #
    # RAND() does not work in our version of mysql, so generate a random
    # number with perl and pick out that node.
    #
    $which = int(rand($count));
    $query_result->dataseek($which);
218 219 220 221 222 223
    %hrow  = $query_result->fetchhash();
    $node  = $hrow{'node_id'};
    $pid   = $hrow{'pid'};
    $eid   = $hrow{'eid'};

    print "Trying to reload $node ... \n";
224 225

    #
226 227
    # What we do depends on whether its a free node or a node reserved
    # into the reload pending experiment.
228
    #
229 230 231
    if ((defined($pid) && $pid eq $RELOADPID) &&
	(defined($eid) && $eid eq $PENDINGEID)) {
	#
232
	# Query for the imageid from the reloads table.
233 234
	#
	$query_result =
235
	    DBQueryWarn("select image_id from scheduled_reloads " .
236
			"where node_id='$node'");
237
    
238 239 240 241 242 243 244 245 246 247 248
	if ((! $query_result) || (!$query_result->numrows())) {
	    #
	    # If this node didn't make it into the scheduled_reloads table
	    # for some reason, then we load it with the default image and
	    # type
	    #
	    $imageid = "";

	} else {
	    @row     = $query_result->fetchrow_array();
	    $imageid = $row[0];
249 250 251 252 253 254 255 256 257 258 259 260 261
	}
	
	#
	# The node is reserved into the special pid/eid, as the result
	# of a sched_reload while it was still allocated to an experiment.
	# We change the reservation EID over and fire up an os_load
	# directly.
	#
	if (! DBQueryWarn("update reserved set eid='$RELOADEID' where ".
			  "node_id='$node'")) {
	    print "Could not update EID for $node. Waiting a bit.\n";
	    next;
	}
262 263 264
	
	my $os_load_flags = "";

265
	#
266
	# We only add the -m flag to os_load if we found a specific image
267 268 269 270
	# above. Omitting it causes os_load to pick the default image for
	# the node's type
	#
	if ($imageid) {
271
	    $os_load_flags .= " -m $imageid ";
272 273 274
	}

	if (system("$os_load $os_load_flags $node")) {
275
	    #
276
	    # This should not fail! 
277
	    #
278
	    fatal("$os_load failed on $node. That's not supposed to happen.\n".
279
		  "Please check the reload daemon log before restarting!\n");
280
	}
281
    }
282
    else {
283
	#
284 285 286 287
	# Call sched_reload with the "force" option, which says that if
	# sched_reload cannot reserve the node (cause someone just got it)
	# then don't schedule a reload for later. Just fail outright.
	# We will try again in a bit.
288
	#
289 290 291 292 293
	# We do not need to specify an imageid, since we want the node
	# default, and sched_reload will pick that up from the database
	# in the absence of a -i option. 
	#
	if (system("$sched_reload -f $node")) {
294 295 296 297 298 299
	    #
	    # Could not get it. Wait and go around again.
	    #
	    print "$sched_reload failed on $node. Waiting a bit.\n";
	    next;
	}
300

301
    }
302
    $stamp = DBDateTime();
303 304
	
    print "Reload of $node has started at $stamp.\n";
305

306 307 308 309
    #
    # For Frisbee reloads, we don't wait for the node to finish reloading,
    # since the whole point is to let many nodes load at once.
    #
310
    #if ($reload_type eq TB_RELOADTYPE_FRISBEE) {
311 312
	print "Not waiting for frisbee reload of $node.\n";
	next;
313
    #}
314

315 316 317 318
    #
    # START OBSOLETE SECTION
    #

319 320 321
    #
    # Reload was started. We want to wait until its finished.
    #
322 323
    $retry = 0;
  again:
324
    $count = 0;
325
    while ($count < 130) {
326
	$query_result =
327
	    DBQueryWarn("select pid,eid from reserved where node_id='$node'");
328 329 330 331
	if (! $query_result) {
	    print "DB Error getting reservation for $node. Waiting a bit\n";
	    next;
	}
332
	$stamp = DBDateTime();
333

334
	if (! $query_result->numrows) {
335
	    print "\nReload of $node appears to have finished at $stamp.\n";
336 337 338 339 340 341 342 343 344
	    last;
	}
	#
	# Make sure its still in the "reloading" experiment. Its possible
	# (although unlikely) that the node will get freed up by the TMCD
	# when it reboots, and then reallocated to another experiment,
	# before we get back here to check.
	#
	@row  = $query_result->fetchrow_array();
345
	if ($row[0] ne "$RELOADPID" || $row[1] ne "$RELOADEID") {
346
	    print "\nReload of $node has finished at $stamp.\n";
347 348 349
	    last;
	}
	print ".";
350 351 352
	if (($count % 70) == 0) {
	    print "\n";
	}
353 354 355
	$count++;
	sleep(5);
    }
356
    if ($count == 130) {
357
	if ($retry) {
358
	    fatal("$node appears to have wedged. Stopping reload daemon.\n");
359 360
	}
	if (system("$reboot -f $node")) {
361
	    fatal("$node was wedged, but could not be power cycled.\n");
362 363 364 365
	}
	print "\nReload appears wedged. Power cycling and trying once more!\n";
	$retry = 1;
	goto again;
366
    }
367
    sleep(10);
368 369 370
    #
    # END OBSOLETE SECTION
    #
371 372
}

373 374
sub fatal($)
{
375 376
    local($msg) = $_[0];

377
    SENDMAIL($TBOPS, "Reload Daemon Died", $msg, $TBOPS);
378 379 380
    die($msg);
}

381 382 383
sub notify($)
{
    my($mesg) = $_[0];
384

385
    print "$mesg\n";
386
    SENDMAIL($TBOPS, "Reload Daemon Message", $mesg, $TBOPS);
387 388
}

389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
#
# Become a daemon.
# 
sub daemonize()
{
    my $mypid = fork();
    if ($mypid) {
	exit(0);
    }

    #
    # We have to disconnect from the caller by redirecting both STDIN and
    # STDOUT away from the pipe. Otherwise the caller will continue to wait
    # even though the parent has exited. 
    #
    open(STDIN, "< /dev/null") or
	die("opening /dev/null for STDIN: $!");

    #
    # Open the batch log and start writing to it. 
    #
    open(STDERR, ">> $logfile") or die("opening $logfile for STDERR: $!");
    open(STDOUT, ">> $logfile") or die("opening $logfile for STDOUT: $!");

    return 0;
}