reload_daemon.in 18.9 KB
Newer Older
1
#!/usr/bin/perl -w
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3

#
4
# Copyright (c) 2000-2014 University of Utah and the Flux Group.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
24 25
#

26 27 28 29 30 31 32 33 34 35 36 37
use English;
use Getopt::Std;

#
# Look for nodes to reload.
#
#	usage: reload_daemon [-d]
#
# TODO: Use "logger" instead of writing a log file.
#
sub usage()
{
38 39 40 41 42 43 44 45
    print STDOUT "Usage: reload_daemon [-d] [-t tag]\n" .
	"    -d     Prevent daemonization\n" . 
	"    -t tag Only manage reloads for nodes or node types\n" . 
	"           that have the value of <tag> for a node_type_attribute\n" . 
	"           or a node_attribute named 'reload_daemon_tag'.\n" . 
	"           IF this tag is not set, the reload_daemon picks only\n" . 
	"           those nodes that DO NOT have this type or node\n" . 
	"           attribute set!\n";
46 47
    exit(-1);
}
48
my  $optlist = "dt:";
49 50 51 52 53 54 55 56

#
# Configure variables
#
my $TB       = "@prefix@";
my $DBNAME   = "@TBDBNAME@";
my $TBOPS    = "@TBOPSEMAIL@";

57 58 59
# Set this to turn off tblog in libraries.
$ENV{'TBLOG_OFF'} = "yep";

60 61 62
# Testbed Support library
use lib "@prefix@/lib";
use libdb;
63
use libosload;
64
use libtestbed;
65 66
use Experiment;
use Node;
67
use NodeType;
68 69
use EmulabFeatures;
use User;
70 71 72 73 74 75 76

#
# These come from the library.
# 
my $RELOADPID	= NODERELOADING_PID;
my $RELOADEID	= NODERELOADING_EID;
my $PENDINGEID	= NODERELOADPENDING_EID;
77 78
my $REPOSPID	= NODEREPOSITIONING_PID;
my $RPPENDINGEID= NODEREPOSPENDING_EID;
79 80
my $NODEDEAD_PID= NODEDEAD_PID;
my $NODEDEAD_EID= NODEDEAD_EID;
81

82 83
sub myosload($$$$);
sub logit($);
84 85
sub fatal($);
sub notify($);
86 87
sub freefromreloading($);
		      
88 89
my $sched_reload= "$TB/sbin/sched_reload";
my $reboot	= "$TB/bin/node_reboot";
90
my $tbrsync     = "$TB/bin/tbrsync";
91 92
my $logfile	= "$TB/log/reloadlog";
my $debug	= 0;
93
my $tag;
94 95
my $retry_time  = 20;              # in minutes
my $warn_time   = $retry_time * 2; # in minutes
96 97
my $widearea_multiplier = 2;       # widearea nodes get (mult+1)x longer, but
                                   #  possibly not quite true cause of mustwipe)
98 99 100
my %retried     = ();
my %warned	= ();
my %failed	= ();
101
my @retry_list  = ();
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127

#
# Turn off line buffering on output (dots ...).
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/usr/bin:";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 0) {
    usage();
}
if (defined($options{"d"})) {
    $debug = $options{"d"};
}
128 129 130 131 132
if (defined($options{"t"})) {
    $tag = $options{"t"};
    # rename the logfile too
    $logfile = "$logfile-$tag";
}
133

134 135 136 137 138 139 140 141 142 143
#
# This should run as root to make sure that it has permission to reboot nodes
# (since only root is allowed to power cycle nodes at any time - it's time-
# limited for anyone else)
#
if ($UID != 0 && !defined($tag)) {
    die("*** $0:\n".
	"    Only root can run this script!\n");
}

144
#
145 146
# Only one please (for the default reload_daemon).  If you specified
# a tag, it's your problem.
147
#
148
if (!defined($tag) && CheckDaemonRunning("reload_daemon")) {
149 150 151
    fatal("Not starting another reload daemon!");
}

152 153
# Go to ground.
if (! $debug) {
154 155 156 157
    if (TBBackGround($logfile)) {
	exit(0);
    }
}
158
if (!defined($tag) && MarkDaemonRunning("reload_daemon")) {
159
    fatal("Could not mark daemon as running!");
160
}
161 162 163 164 165 166 167 168 169
#
# Setup a signal handler for newsyslog.
#
sub handler()
{
    ReOpenLog($logfile);
}
$SIG{HUP} = \&handler
    if (!$debug);
170

171
logit("Reload Daemon starting... pid $$");
172

173 174 175 176 177 178 179
# We use this a lot.
my $reloading_experiment = Experiment->Lookup($RELOADPID, $RELOADEID);
if (!defined($reloading_experiment)) {
    Fatal("Could not locate experiment object for $RELOADEID");
    return;
}

180 181 182
#
# Loop, looking for nodes to reload.
# 
183
my $idle=0;
184
while (1) {
185
    my($count, $which, @row, %hrow, $imageid, $node, $retry, $stamp);
186
    my($pid, $eid);
187 188

    # Partial delay between loops in case of an error.
189 190
    if ($idle) { sleep(10); } # Wait longer if we're not doing anything
    else { sleep(1); }
191

192
    $idle=1; # Assume we're going to be idle this iteration
193 194 195 196 197 198
    #
    # We use this to figure out when to delete nodes from the retried and
    # warned hashes
    #
    my $time = time();

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
    #
    # If we are the default reload daemon (i.e., have no tag for our 
    # reload_pool), only look for nodes that have neither a reload_pool
    # node_type_attribute nor a node_attribute.
    #
    # If we have a reload_pool tag, only pick up nodes that 
    #  * have our tag for the node_type_attribute, and our tag or NULL
    #    for the node_attribute, OR
    #  * have our tag for the node attribute.
    #
    my $tag_query = '';
    if (!defined($tag)) {
	$tag_query = 'and nta_reload_pool.attrvalue is NULL' . 
	    ' and na_reload_pool.attrvalue is NULL';
    }
    else {
	$tag_query = "" . 
	    " and ((nta_reload_pool.attrvalue='$tag' and" . 
	    "       (na_reload_pool.attrvalue='$tag'" . 
	    "        or na_reload_pool.attrvalue is NULL))" . 
	    "      or na_reload_pool.attrvalue='$tag')";
    }

222
    #
223 224 225
    # First, look for nodes that have been in the reloading experiment for
    # longer than $retry_time, and try rebooting them
    #
226 227 228 229
    # XXX we count on mustwipe having the value 0, 1, 2 to represent
    # ever slower forms of wipeage.  For retry_time of 20 minutes that
    # yields waits of 20, 40 and 60 minutes.
    #
230
    $query_result =
231 232 233
	DBQueryWarn("select r.node_id,r.mustwipe from reserved as r" . 
		    " left join nodes as n on r.node_id=n.node_id" . 
		    " left join node_types as nt on n.type=nt.type " . 
234 235 236 237 238 239 240 241 242 243
		    " left outer join (select type,attrvalue from node_type_attributes" . 
		    "   where attrkey='reload_daemon_pool') as nta_reload_pool" . 
		    "   on n.type=nta_reload_pool.type" . 
		    " left outer join (select node_id,attrvalue from node_attributes" . 
		    "   where attrkey='reload_daemon_pool') as na_reload_pool" . 
		    "   on r.node_id=na_reload_pool.node_id" . 
		    " where r.pid='$RELOADPID' and r.eid='$RELOADEID' and" .
		    " (CURRENT_TIMESTAMP - INTERVAL ($retry_time * (r.mustwipe + 1) + (nt.isremotenode * $retry_time * $widearea_multiplier)) MINUTE)".
		    "  > rsrv_time" . 
		    " $tag_query");
244 245

    if (! $query_result) {
246
	logit("DB Error. Waiting a bit.");
247 248 249
	next;
    }

250
    while (($node, $mustwipe) = $query_result->fetchrow) {
251
	$idle=0;
252
	#
253 254
	# If this was a node that failed osload, then instead of rebooting,
	# send it back through osload.
255 256
	# 
	if ($failed{$node}) {
257
	    logit("$node failed an earlier osload. Trying again.");
258
	    push(@retry_list, [$node, $mustwipe]);
259 260 261 262 263
	    delete $failed{$node};
	    # Skip any reboots. 
	    $retried{$node} = $time;
	    next;
	}
264
	if (!$retried{$node}) {
265 266
	    logit("\nReload appears wedged. ".
		  "Power cycling and trying once more!");
267
		
268
	    if (system("$reboot -f $node")) {
269 270 271 272 273 274 275
		notify("$node was wedged, but could not be rebooted.\n".
		       "Moved to $NODEDEAD_PID/$NODEDEAD_EID\n");

		MarkPhysNodeDown($node);
		TBSetNodeLogEntry($node, "daemon",
				  TB_DEFAULT_NODELOGTYPE(),
				  "'Moved to hwdown; reload reboot failed'");
276
	    }
277 278 279 280 281
	}
	$retried{$node} = $time;
    }

    #
282 283
    # We can pull out all nodes that were not 'touched' (matched by the
    # select above) during this pass
284 285 286 287
    #
    foreach $node (keys %retried) {
	if ($retried{$node} != $time) {
	    delete $retried{$node};
288 289 290 291 292 293
	}
    }

    #
    # Next, we do the same thing for nodes in the reloading experiment for
    # longer than $warn_time, and warn the admins.
294
    #
295 296
    # XXX again, we scale by the value of mustwipe.
    #
297
    $query_result =
298 299 300
	DBQueryWarn("select r.node_id,r.mustwipe from reserved as r" . 
		    " left join nodes as n on r.node_id=n.node_id" . 
		    " left join node_types as nt on n.type=nt.type " . 
301 302 303 304 305 306 307 308 309 310
		    " left outer join (select type,attrvalue from node_type_attributes" . 
		    "   where attrkey='reload_daemon_pool') as nta_reload_pool" . 
		    "   on n.type=nta_reload_pool.type" . 
		    " left outer join (select node_id,attrvalue from node_attributes" . 
		    "   where attrkey='reload_daemon_pool') as na_reload_pool" . 
		    "   on r.node_id=na_reload_pool.node_id" . 
		    " where r.pid='$RELOADPID' and r.eid='$RELOADEID' and " .
		    " (CURRENT_TIMESTAMP - INTERVAL ($warn_time * (mustwipe + 1) + (nt.isremotenode * $warn_time * $widearea_multiplier)) MINUTE)".
		    "  > rsrv_time" . 
		    " $tag_query");
311 312
    
    if (! $query_result) {
313
	logit("DB Error. Waiting a bit.");
314 315 316
	next;
    }

317
    while (($node, $mustwipe) = $query_result->fetchrow) {
318
	$idle=0;
319
	if (!$warned{$node}) {
320
	    my $toolong = $warn_time * ($mustwipe + 1);
321
	    notify("Node $node has been in $RELOADPID/$RELOADEID for " .
322
		   "more than $toolong minutes");
323 324 325 326 327
	}
	$warned{$node} = $time;
    }

    #
328 329
    # We can pull out all nodes that were not 'touched' (matched by the
    # select above) during this pass
330 331 332 333
    #
    foreach $node (keys %warned) {
	if ($warned{$node} != $time) {
	    delete $warned{$node};
334 335 336
	}
    }

337
    #
338
    # Find all of the free nodes that have not been reloaded (no pid entry
339 340 341
    # in last_reservation, which is reset anytime a node is reloaded by
    # the system).
    #
342 343 344 345
    # XXX - This should not be hardwired in.
    # 
    my $CLASSCLAUSE = "(n.class='pc' or n.class='pct')";
    
346
    $query_result =
347
	DBQueryWarn("select a.node_id,b.pid,b.eid,b.mustwipe,a.type ".
348
		    "from reserved as b ".
349
		    "left join nodes as a on a.node_id=b.node_id ".
350
		    "left join last_reservation as l on l.node_id=a.node_id ".
351 352 353 354 355 356 357 358 359 360
		    "left join node_types as n on n.type=a.type ".
		    " left outer join (select type,attrvalue from node_type_attributes" . 
		    "   where attrkey='reload_daemon_pool') as nta_reload_pool" . 
		    "   on n.type=nta_reload_pool.type" . 
		    " left outer join (select node_id,attrvalue from node_attributes" . 
		    "   where attrkey='reload_daemon_pool') as na_reload_pool" . 
		    "   on b.node_id=na_reload_pool.node_id" .
		    " where ((b.node_id is null and $CLASSCLAUSE and l.pid!='') ".
		    "or (b.pid='$RELOADPID' and b.eid='$PENDINGEID')) ". 
		    " $tag_query " . 
361 362
		    "order by a.node_id");

363
    if (! $query_result) {
364
	logit("DB Error. Waiting a bit.");
365 366 367 368
	next;
    }
    $count = $query_result->numrows;

369
    if (!$count && !scalar(@retry_list)) {
370
	next;
371 372
    } else {
	$idle=0;
373 374
    }

375
    # Grab all the nodes that match
376
    my @pending_list = @retry_list;
377
    while (%hrow = $query_result->fetchhash()) {
378 379 380
	$node = $hrow{'node_id'};
	$pid  = $hrow{'pid'};
	$eid  = $hrow{'eid'};
381
	$mustwipe = $hrow{'mustwipe'};
382 383
	$type = $hrow{'type'};
	$imageable = NodeType->LookupSync($type)->imageable();
384 385 386 387 388

	#
	# If any non-imageable nodes made it this far, just free them now
	#
	if (!$imageable) {
389
	    logit("Skipping non-imageable node $node.");
390 391 392
	    freefromreloading($node);
	    next;
	}
393
	if ($pid eq $RELOADPID && $eid eq $PENDINGEID) {
394
	    push(@pending_list, [$node,$mustwipe]);
395
	} else {
396
	    push(@other_list, [$node,$mustwipe]);
397 398
	}
    }
399
    my $nodes = join(" ", map { $_->[0] } @pending_list, @other_list);
400 401 402 403
    if (!$nodes) {
	next;
    }

404 405
    logit("Trying to reload $nodes.");
    $nodes = "";
406 407

    #
408 409
    # What we do depends on whether its a free node or a node reserved
    # into the reload pending experiment.
410
    #
411
    if (@pending_list > 0) {
412
	#
413
	# Query for the imageid from the reloads table.
414
	#
415 416
	my %images = ();
	my %imagenodes = ();
417
	my %nodeobjs = ();
418 419
	foreach $ref (@pending_list) {
	    ($node, $mustwipe) = @{$ref};
420 421 422 423 424 425 426 427 428
	    my $nodeobj = Node->Lookup($node);
	    if (!defined($nodeobj)) {
		notify("Could not local node object for $node\n");
		next;
	    }
	    $nodeobjs{$node} = $nodeobj;
	    
	    my ($image_id, $reload_type) = $nodeobj->GetSchedReload();
	    if (!defined($image_id)) {
429 430 431
		#
		# If this node didn't make it into the scheduled_reloads table
		# for some reason, then we load it with the default image and
432
		# type.
433 434 435
		#
		$imageid = "";
	    }
436

437 438 439 440 441 442 443 444 445 446 447
	    #
	    # We need to divide up nodes not only by the image they are
	    # to load (imageid) but also by if and how the disk should be
	    # zeroed (mustzero).  So we really have a hash of hashes each
	    # of which is an array of nodes.  However, my perl skilz are
	    # not up to that so just combine the imageid and mustwipe into
	    # a single hash key ('/' is illegal in both, so we use it as
	    # the separator).
	    #
	    my $idid = "$imageid/$mustwipe";

448
	    $images{$node} = $imageid;
449 450
	    if (defined(@{$imagenodes{$idid}})) {
		push(@{$imagenodes{$idid}},$node);
451
	    } else {
452
		$imagenodes{$idid} = [$node];
453 454
	    }
	    if ($debug) {
455 456
		print "$node ($mustwipe) => $images{$node} == $imageid (".
		  join(",",@{$imagenodes{$idid}}).")\n";
457
	    }
458 459 460 461 462
	}
	
	#
	# The node is reserved into the special pid/eid, as the result
	# of a sched_reload while it was still allocated to an experiment.
463
	# We change the reservation EID over and fire up an osload
464 465
	# directly.
	#
466 467 468 469 470
	foreach $ref (@pending_list) {
	    ($node, $mustwipe) = @{$ref};
	    my $nodeobj = $nodeobjs{$node};
	    next
		if (!defined($nodeobj));
471

472 473 474
	    if ($nodeobj->MoveReservation($reloading_experiment) == 0) {
		$nodeobj->SetNodeHistory(TB_NODEHISTORY_OP_MOVE, undef,
					 $reloading_experiment);
475
	    }
476
	}
477 478
	# It is now safe to clear this.
	@retry_list = ();
479

480 481 482 483 484 485
	#
	# Now run an OS load for each image.
	# We invoke libosload directly rather than calling os_load,
	# not so much for efficiency but because it gives us more
	# precise knowledge about failures.
	#
486
	foreach my $idid (keys %imagenodes) {
487

488 489
	    my @nodelist = @{$imagenodes{$idid}};
	    my $nodestr = join(' ', @nodelist);
490

491 492
	    ($imageid, $mustzero) = split("/", $idid);

493
	    logit("Invoking osload on $nodestr.");
494

495 496 497 498
	    my @failedload = ();
	    if (myosload($imageid, $mustzero, \@nodelist, \@failedload)) {
		$nodestr = join(' ', @failedload);
		notify("OS load failed on $nodestr. ".
499 500 501 502 503
		       "That is not supposed to happen.\n".
		       "Attempting to recover from this unfortunate ".
		       "situation!\n");

		# Record the failure list. If we get to the 15 minute
504 505
		# retry, call osload again instead of rebooting.
		foreach my $node (@failedload) {
506 507
		    $failed{$node} = $time;		    
		}
508 509 510 511 512
		foreach my $node (@nodelist) {
		    if (!$failed{$node}) {
			$nodes .= "$node ";
		    }
		}
513 514
	    }
	    else {
515 516
		$nodes .= "$nodestr ";
		logit("osload done.");
517
	    }
518
	}
519
    }
520 521
	
    if (@other_list > 0 ) {
522
	my $nodestr = join(" ", map { $_->[0] } @other_list);
523

524
	#
525 526 527 528
	# Call sched_reload with the "force" option, which says that if
	# sched_reload cannot reserve the node (cause someone just got it)
	# then don't schedule a reload for later. Just fail outright.
	# We will try again in a bit.
529
	#
530 531 532 533
	# We do not need to specify an imageid, since we want the node
	# default, and sched_reload will pick that up from the database
	# in the absence of a -i option. 
	#
534 535
	logit("Invoking sched_reload on $nodestr.");
	if (system("$sched_reload -f $nodestr")) {
536 537 538
	    #
	    # Could not get it. Wait and go around again.
	    #
539
	    logit("$sched_reload failed on $nodestr. Waiting a bit.");
540 541
	    next;
	}
542
	$nodes .= "$nodestr ";
543
    }
544

Mike Hibler's avatar
Mike Hibler committed
545 546 547 548 549 550 551 552 553 554
    if ($nodes) {
	logit("Reload of $nodes has started.");
	#
	# For Frisbee reloads, we don't wait for the node to finish reloading,
	# since the whole point is to let many nodes load at once.
	#
	logit("Not waiting for frisbee reload of $nodes.");
    } else {
	logit("No nodes eligible for reload.");
    }
555 556
}

557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
sub myosload($$$$)
{
    my ($imageid, $mustzero, $nlist, $failedp) = @_;

    my %osloadargs  = ();
    my %nodestatus = ();
    my $failed = 0;

    $osloadargs{'waitmode'} = 0;
    $osloadargs{'zerofree'} = $mustzero;
    # XXX we don't set prepare?
    #$osloadargs{'prepare'}  = 1;
    $osloadargs{'nodelist'} = [ @{$nlist} ];
    # No imageid means to load the default image.
    $osloadargs{'imageids'} = [ $imageid ]
	if ($imageid);

    # XXX replicate what os_load does
    my $oquerymax = $libdb::DBQUERY_MAXTRIES;
    $libdb::DBQUERY_MAXTRIES = 30;

    my $user = User->ThisUser();
    my $experiment = $reloading_experiment;
    my $group = $experiment->GetGroup();
    if (EmulabFeatures->FeatureEnabled("NewOsload",$user,$group,$experiment)) {
	require libosload_new;

	my $loadobj = libosload_new->New();
	$loadobj->debug($debug);
	#
	# XXX basically, tell devices that might be reconfig'd via push
	# from us (like switches) that a reconfig should follow the reload!
	#
	$osloadargs{'reconfig'} = 1;

	# add a few more things for feature checks down the line:
	$osloadargs{'user'} = $user;
	$osloadargs{'experiment'} = $experiment;
	$osloadargs{'group'} = $group;
	$failed = $loadobj->osload(\%osloadargs, \%nodestatus);
    } else {
	$failed = osload(\%osloadargs, \%nodestatus);
    }

    if ($failed) {
	my @list = ();
	foreach my $node (keys %nodestatus) {
	    if ($nodestatus{$node}) {
		push @list, $node;
	    }
	}
	@{$failedp} = @list;
    }

    $libdb::DBQUERY_MAXTRIES = $oquerymax;

    return $failed;
}
615 616 617 618 619 620

#
# free up the node and clear any assocaited reload DB state.
# (code stolen from stated).
#
sub freefromreloading($) {
621 622 623 624 625 626 627 628 629 630 631 632 633 634
    my $nodeid = shift;
    my $node = Node->Lookup($nodeid);
    if (!defined($node)) {
	notify("Could not get node object for $nodeid\n");
	return;
    }
    $node->FlushReserved();
    $node->ClearCurrentReload();
    my $experiment = $node->Reservation();
    if (defined($experiment) &&
	$experiment->pid() eq $RELOADPID &&
	($experiment->eid() eq $RELOADEID ||
	 $experiment->eid() eq $PENDINGEID)) {
	$node->ClearSchedReload();
635 636 637 638 639

	# Check if the robot is back in its pen, otherwise we have to throw it
	# back to repositionpending.
	my $loc_result =
	    DBQueryWarn("SELECT * FROM reposition_status ".
640
			"WHERE node_id='$nodeid'");
641 642

	if ($loc_result->numrows) {
643 644 645 646 647 648 649
	    my $target_experiment =
		Experiment->Lookup($RELOADPID, $RPPENDINGEID);
	    if (!defined($target_experiment)) {
		notify("Could not locate experiment object for $RPPENDINGEID\n");
		return;
	    }
	    if ($node->MoveReservation($target_experiment) == 0) {
650 651
		logit("Reposition pending nodes moved to $RPPENDINGEID.");

652 653
		$node->SetNodeHistory(TB_NODEHISTORY_OP_MOVE, undef,
				      $target_experiment);
654 655 656
	    }
	}
	else {
657 658
	    $node->ClearReservation();
	    $node->SetNodeHistory(TB_NODEHISTORY_OP_FREE, undef, $experiment);
659
	}
660 661 662
    }
}

663 664 665 666 667 668 669 670
sub logit($)
{
    my ($msg) = @_;
    my $stamp = localtime();

    print "$stamp: $msg\n";
}

671 672
sub fatal($)
{
673 674
    local($msg) = $_[0];

675
    SENDMAIL($TBOPS, "Reload Daemon Died", $msg, $TBOPS);
676
    MarkDaemonStopped("reload_daemon");
677 678 679
    die($msg);
}

680 681 682
sub notify($)
{
    my($mesg) = $_[0];
683
    my $stamp = localtime();
684

685
    print "$stamp: $mesg\n";
686
    SENDMAIL($TBOPS, "Reload Daemon Message", $mesg, $TBOPS);
687
}