reload_daemon.in 8.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
#!/usr/bin/perl -wT
use English;
use Getopt::Std;

#
# Look for nodes to reload.
#
#	usage: reload_daemon [-d]
#
# TODO: Use "logger" instead of writing a log file.
#
sub usage()
{
    print STDOUT "Usage: reload_daemon [-d]\n" .
	"Use the -d option to prevent daemonization\n";
    exit(-1);
}
my  $optlist = "d";

#
# Configure variables
#
my $TB       = "@prefix@";
my $DBNAME   = "@TBDBNAME@";
my $TBOPS    = "@TBOPSEMAIL@";

27 28 29 30 31
# Testbed Support library
use lib "@prefix@/lib";
use libdb;
use libtestbed;

32
#
33
# This should not be hardwired in.
34 35
# 
my $CLASS	= "pc";	 # XXX: Needs a better approach.
36 37 38 39 40 41 42

#
# These come from the library.
# 
my $RELOADPID	= NODERELOADING_PID;
my $RELOADEID	= NODERELOADING_EID;
my $PENDINGEID	= NODERELOADPENDING_EID;
43 44 45 46 47 48

my $os_load	= "$TB/bin/os_load";
my $sched_reload= "$TB/sbin/sched_reload";
my $reboot	= "$TB/bin/node_reboot";
my $logfile	= "$TB/log/reloadlog";
my $debug	= 0;
49
my $warn_time   = 30; # in minutes
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85

#
# Turn off line buffering on output (dots ...).
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/usr/bin:";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 0) {
    usage();
}
if (defined($options{"d"})) {
    $debug = $options{"d"};
}

# Go to ground.
if (! $debug) {
    daemonize();
}

#
# Loop, looking for nodes to reload.
# 
while (1) {
86
    my($count, $which, @row, %hrow, $imageid, $node, $retry, $stamp);
87
    my($pid, $eid, $reload_type);
88 89 90 91 92

    #
    # Partial delay between loops in case of an error.
    # 
    sleep(10);
93 94 95 96 97 98 99

    #
    # Check for nodes in the reloading experiment. Notify the admins if
    # any node stays in the reloading experiment for longer than $warn_time
    #
    $query_result =
	DBQueryWarn("select node_id from reserved where pid='$RELOADPID' " .
100
		    "and eid='$RELOADEID' and " .
101
		    "(CURRENT_TIMESTAMP - INTERVAL $warn_time MINUTE) > rsrv_time");
102 103 104
    while (($node) = $query_result->fetchrow){ 
	if (!$warned{$node}) {
	    notify("Node $node has been in $RELOADPID/$RELOADEID for " .
105
	    "more than $warn_time minutes");
106
	    $warned{$node} = 1;
107 108 109
	}
    }

110 111 112 113 114 115
    #
    # Find all of the free node that have not been reloaded (no pid entry
    # in last_reservation, which is reset anytime a node is reloaded by
    # the system).
    #
    $query_result =
116
	DBQueryWarn("select a.node_id,b.pid,b.eid from nodes as a ".
117 118 119 120 121 122 123
		    "left join reserved as b on a.node_id=b.node_id ".
		    "left join last_reservation as l on l.node_id=a.node_id ".
		    "left join node_types as n on n.type=a.type where ".
		    "(b.node_id is null and n.class='$CLASS' and l.pid!='') ".
		    "or (b.pid='$RELOADPID' and b.eid='$PENDINGEID') ".
		    "order by a.node_id");

124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    if (! $query_result) {
	print "DB Error getting free nodes. Waiting a bit.\n";
	next;
    }
    $count = $query_result->numrows;

    if (! $count) {
	next;
    }

    #
    # RAND() does not work in our version of mysql, so generate a random
    # number with perl and pick out that node.
    #
    $which = int(rand($count));
    $query_result->dataseek($which);
140 141 142 143 144 145
    %hrow  = $query_result->fetchhash();
    $node  = $hrow{'node_id'};
    $pid   = $hrow{'pid'};
    $eid   = $hrow{'eid'};

    print "Trying to reload $node ... \n";
146 147

    #
148 149
    # What we do depends on whether its a free node or a node reserved
    # into the reload pending experiment.
150
    #
151 152 153
    if ((defined($pid) && $pid eq $RELOADPID) &&
	(defined($eid) && $eid eq $PENDINGEID)) {
	#
154
	# Query for the imageid from the reloads table.
155 156
	#
	$query_result =
157 158
	    DBQueryWarn("select image_id, reload_type from scheduled_reloads " .
			"where node_id='$node'");
159
    
160 161 162 163 164 165 166 167 168 169 170 171 172
	if ((! $query_result) || (!$query_result->numrows())) {
	    #
	    # If this node didn't make it into the scheduled_reloads table
	    # for some reason, then we load it with the default image and
	    # type
	    #
	    $imageid = "";
	    $reload_type = TB_DEFAULT_RELOADTYPE;

	} else {
	    @row     = $query_result->fetchrow_array();
	    $imageid = $row[0];
	    $reload_type= $row[1];
173 174 175 176 177 178 179 180 181 182 183 184 185
	}
	
	#
	# The node is reserved into the special pid/eid, as the result
	# of a sched_reload while it was still allocated to an experiment.
	# We change the reservation EID over and fire up an os_load
	# directly.
	#
	if (! DBQueryWarn("update reserved set eid='$RELOADEID' where ".
			  "node_id='$node'")) {
	    print "Could not update EID for $node. Waiting a bit.\n";
	    next;
	}
186 187 188 189 190 191
	
	#
	# We add flags to our os_load call depending on the type of reload
	# the user asked for. We basically need a case for each type.
	#
	my $os_load_flags = "";
192
	if ($reload_type eq TB_RELOADTYPE_FRISBEE) {
193
	    $os_load_flags .= " -r";
194
	} elsif ($reload_type eq TB_RELOADTYPE_NETDISK) {
195
	    $os_load_flags .= " -n";
196 197 198 199
	} else {
	    fatal("Found a reload type I don't know how to handle: $reload_type");
	}

200 201 202 203 204 205 206 207 208 209
	#
	# We only add the -i flag to os_load if we found a specific image
	# above. Omitting it causes os_load to pick the default image for
	# the node's type
	#
	if ($imageid) {
	    $os_load_flags .= " -i $imageid ";
	}

	if (system("$os_load $os_load_flags $node")) {
210
	    #
211
	    # This should not fail! 
212
	    #
213 214
	    fatal("$os_load failed on $node. Thats not supposed to happen.\n".
		  "Please check the reload daemon log before restarting!");
215
	}
216
    }
217
    else {
218
	#
219 220 221 222
	# Call sched_reload with the "force" option, which says that if
	# sched_reload cannot reserve the node (cause someone just got it)
	# then don't schedule a reload for later. Just fail outright.
	# We will try again in a bit.
223
	#
224 225 226 227 228
	# We do not need to specify an imageid, since we want the node
	# default, and sched_reload will pick that up from the database
	# in the absence of a -i option. 
	#
	if (system("$sched_reload -f $node")) {
229 230 231 232 233 234
	    #
	    # Could not get it. Wait and go around again.
	    #
	    print "$sched_reload failed on $node. Waiting a bit.\n";
	    next;
	}
235

236
	$reload_type = TB_DEFAULT_RELOADTYPE;
237
    }
238
    $stamp = DBDateTime();
239 240
	
    print "Reload of $node has started at $stamp.\n";
241

242 243 244 245
    #
    # For Frisbee reloads, we don't wait for the node to finish reloading,
    # since the whole point is to let many nodes load at once.
    #
246
    if ($reload_type eq TB_RELOADTYPE_FRISBEE) {
247 248 249 250
	print "Not waiting for frisbee reload of $node.\n";
	next;
    }

251 252 253
    #
    # Reload was started. We want to wait until its finished.
    #
254 255
    $retry = 0;
  again:
256
    $count = 0;
257
    while ($count < 130) {
258
	$query_result =
259
	    DBQueryWarn("select pid,eid from reserved where node_id='$node'");
260 261 262 263
	if (! $query_result) {
	    print "DB Error getting reservation for $node. Waiting a bit\n";
	    next;
	}
264
	$stamp = DBDateTime();
265

266
	if (! $query_result->numrows) {
267
	    print "\nReload of $node appears to have finished at $stamp.\n";
268 269 270 271 272 273 274 275 276
	    last;
	}
	#
	# Make sure its still in the "reloading" experiment. Its possible
	# (although unlikely) that the node will get freed up by the TMCD
	# when it reboots, and then reallocated to another experiment,
	# before we get back here to check.
	#
	@row  = $query_result->fetchrow_array();
277
	if ($row[0] ne "$RELOADPID" || $row[1] ne "$RELOADEID") {
278
	    print "\nReload of $node has finished at $stamp.\n";
279 280 281
	    last;
	}
	print ".";
282 283 284
	if (($count % 70) == 0) {
	    print "\n";
	}
285 286 287
	$count++;
	sleep(5);
    }
288
    if ($count == 130) {
289 290 291 292 293 294 295 296 297
	if ($retry) {
	    fatal("$node appears to have wedged. Stopping reload daemon.");
	}
	if (system("$reboot -f $node")) {
	    fatal("$node was wedged, but could not be power cycled.");
	}
	print "\nReload appears wedged. Power cycling and trying once more!\n";
	$retry = 1;
	goto again;
298
    }
299
    sleep(20);
300 301
}

302 303
sub fatal($)
{
304 305
    local($msg) = $_[0];

306
    SENDMAIL($TBOPS, "TESTBED: Reload Daemon Died", $msg);
307 308 309
    die($msg);
}

310 311 312 313 314
sub notify($)
{
    my($mesg) = $_[0];
    
    print "$mesg\n";
315
    SENDMAIL($TBOPS, "TESTBED: Reload Daemon Message", $mesg);
316 317
}

318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
#
# Become a daemon.
# 
sub daemonize()
{
    my $mypid = fork();
    if ($mypid) {
	exit(0);
    }

    #
    # We have to disconnect from the caller by redirecting both STDIN and
    # STDOUT away from the pipe. Otherwise the caller will continue to wait
    # even though the parent has exited. 
    #
    open(STDIN, "< /dev/null") or
	die("opening /dev/null for STDIN: $!");

    #
    # Open the batch log and start writing to it. 
    #
    open(STDERR, ">> $logfile") or die("opening $logfile for STDERR: $!");
    open(STDOUT, ">> $logfile") or die("opening $logfile for STDOUT: $!");

    return 0;
}