stated.in 33 KB
Newer Older
Robert Ricci's avatar
Robert Ricci committed
1
#!/usr/bin/perl -w
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5 6 7
# All rights reserved.
#

Robert Ricci's avatar
Robert Ricci committed
8 9 10
#
# stated - A daemon to monitor the states of nodes in the testbed. Recives
# state change notification through the event system, and writes the new
11 12
# state into the database. Also watches for invalid transitions, timeouts, 
# and performs other state-related control functions.
Robert Ricci's avatar
Robert Ricci committed
13 14 15 16
#
# Send it a HUP signal to get it to reload the timeout and transition
# information. Periodically reloads this information regardless, though.
#
17
# Will restart when sent SIGUSR1, by exec'ing its executable again.
Robert Ricci's avatar
Robert Ricci committed
18
#
19

Robert Ricci's avatar
Robert Ricci committed
20 21
# Configure variables
use lib '@prefix@/lib';
22
my $TB = "@prefix@";
23
my $REALTB = "/usr/testbed"; # So we know if we're the "real" stated or not
Robert Ricci's avatar
Robert Ricci committed
24
my $BOSSNODE = "@BOSSNODE@";
25
my $TBOPS = "@TBSTATEDEMAIL@";
26
my $REALTBOPS = "@TBOPSEMAIL@";
27
my $TBDBNAME = "@TBDBNAME@";
28
my $REALTBDBNAME = "tbdb"; # So we know if we're using the "real" db
29
my $osselect = "$TB/bin/os_select";
Robert Ricci's avatar
Robert Ricci committed
30 31 32 33 34 35

$| = 1;

use event;
use libdb;
use libtestbed;
36
use TimeoutQueue;
Robert Ricci's avatar
Robert Ricci committed
37
use Getopt::Std;
38
#use strict;
Robert Ricci's avatar
Robert Ricci committed
39
use English;
Mac Newbold's avatar
Mac Newbold committed
40 41
use POSIX;			# for strftime, and sigprocmask and friends
use Fcntl;			# file constants for pidfile
Mac Newbold's avatar
Mac Newbold committed
42 43 44 45 46
use Sys::Syslog;
# Important note about syslog: It defaults to using an inet socket,
# but 'syslogd -s' (the default) doesn't listen for one. So either
# run syslogd without -s, or use setlogsock('unix') before openlog.
# (To get setlocksock: 'use Sys::Syslog qw(:DEFAULT setlogsock);' )
Robert Ricci's avatar
Robert Ricci committed
47

Mac Newbold's avatar
Mac Newbold committed
48 49 50
# Do lots of db retries before we fail and die
$libdb::DBQUERY_MAXTRIES = 5;

Robert Ricci's avatar
Robert Ricci committed
51 52 53
# Number of iterations (roughly, seconds) after which we'll reload 
# information from the database. This is so we don't end up with information
# that's _too_ out of sync.
54
my $reload_time = 600;
55
my $last_reload = time;
Robert Ricci's avatar
Robert Ricci committed
56 57 58 59

# Process command-line arguments

sub usage {
Mac Newbold's avatar
Mac Newbold committed
60
    print << "END";
61 62 63
Usage: $0 [-h] [-d] [-s server] [-p port]
-h              This message
-d              Turn on debugging output, and don't go into the background
Robert Ricci's avatar
Robert Ricci committed
64 65
-s server       Use specified server, instead of this site's bossnode
-p port	        Use specified port
66
Send SIGHUP to reload database state, or SIGUSR1 to restart completely.
Robert Ricci's avatar
Robert Ricci committed
67 68 69
END
}

Mac Newbold's avatar
Mac Newbold committed
70
# Only root should run this - it won't work when run as a user...
71
# (Or, let an admin run it if it isn't the real one in /usr/testbed/ )
72
if ($UID && ( $TB eq $REALTB || ! TBAdmin($UID) ) ) {
Mac Newbold's avatar
Mac Newbold committed
73 74 75
    die("Only root can run this script!\n");
}

76
my @args = @ARGV;    # save a copy for restart before we mess with them.
Robert Ricci's avatar
Robert Ricci committed
77
my %opt = ();
78
getopts("ds:p:h",\%opt);
Robert Ricci's avatar
Robert Ricci committed
79

Mac Newbold's avatar
Mac Newbold committed
80 81 82 83 84 85
if ($opt{h}) {
    exit &usage;
}
if (@ARGV) {
    exit &usage;
}
Robert Ricci's avatar
Robert Ricci committed
86

87
my ($server,$port,$debug);
Mac Newbold's avatar
Mac Newbold committed
88 89 90 91 92 93 94 95 96 97 98 99 100
if ($opt{s}) {
    $server = $opt{s};
} else {
    $server = $BOSSNODE;
}
if ($opt{p}) {
    $port = $opt{p};
}
if ($opt{d}) {
    $debug = 1;
} else {
    $debug = 0;
}
Robert Ricci's avatar
Robert Ricci committed
101

102
# Grab some constants into variables
103
my $TBANYMODE    = TBDB_NODEOPMODE_ANY;
104 105 106 107 108 109 110
my $TBRESET      = TBDB_TBCONTROL_RESET;
my $TBRELOADDONE = TBDB_TBCONTROL_RELOADDONE;
my $TBTIMEOUT    = TBDB_TBCONTROL_TIMEOUT;
my $TBNOTIMEOUT  = TBDB_NO_STATE_TIMEOUT;
my $TBNODESTATE  = TBDB_TBEVENT_NODESTATE;
my $TBNODEOPMODE = TBDB_TBEVENT_NODEOPMODE;
my $TBCONTROL    = TBDB_TBEVENT_TBCONTROL;
111
my $TB_OSID_MBKERNEL = TB_OSID_MBKERNEL;
112

113 114
# This only gets used here, so it isn't in a lib constant.
my $TBFREENODE = "FREENODE";
115
my $TBISUP = TBDB_NODESTATE_ISUP;
116

117
# Set up some notification throttling
Mac Newbold's avatar
Mac Newbold committed
118
my $mailgap = 15;		# in seconds
119 120 121
my $lastmail = time() - $mailgap + 2; # Send a digest of startup msgs after 2s.
my %msgs = ();

Mac Newbold's avatar
Mac Newbold committed
122
my $pidfile;
123
if ( $TB eq $REALTB ) {
Mac Newbold's avatar
Mac Newbold committed
124 125
    $pidfile = "/var/run/stated.pid";
} else {
126
    $pidfile = "$TB/locks/stated.pid";
Mac Newbold's avatar
Mac Newbold committed
127
}
Mac Newbold's avatar
Mac Newbold committed
128 129
debug("Using pidfile $pidfile\n");
if (-e $pidfile) {
Mac Newbold's avatar
Mac Newbold committed
130 131 132 133 134 135 136 137 138 139 140
    my $otherpid = `cat $pidfile`;
    my $running = `ps -auxww | grep $otherpid | grep -v grep`;
    if ($running ne "") {
	fatal("Lockfile $pidfile exists, and process $otherpid appears to be ".
	      "running.\n");
    } else {
	notify("Lockfile exists, but process $otherpid appears to be dead.\n".
	       "Removing lock file...\n");
    }
    system("rm $pidfile") &&
      fatal("Couldn't remove $pidfile: $? $!\n");
Mac Newbold's avatar
Mac Newbold committed
141
}
Robert Ricci's avatar
Robert Ricci committed
142
# Background
143
if (!$debug) {
Mac Newbold's avatar
Mac Newbold committed
144 145 146 147
    # We use syslog, so redirect the output to nothing
    if (TBBackGround("/dev/null")) {
	exit(0);
    }
Robert Ricci's avatar
Robert Ricci committed
148
}
Mac Newbold's avatar
Mac Newbold committed
149 150
# set up syslog
openlog("stated","pid","user");
Mac Newbold's avatar
Mac Newbold committed
151 152 153 154 155 156
sysopen(PIDFILE, $pidfile, O_WRONLY | O_EXCL | O_CREAT) ||
  fatal("Couldn't create '$pidfile': $? $!\n");
print PIDFILE "$$";
close PIDFILE;
# If I make it to here, I'll need to clean up the lock file
my $lockfile=$pidfile;
Robert Ricci's avatar
Robert Ricci committed
157

158 159 160
# Change my $0 so that it is easier to see in a ps/top
$0 = "$0";

Robert Ricci's avatar
Robert Ricci committed
161
my $URL = "elvin://$server";
Mac Newbold's avatar
Mac Newbold committed
162 163 164
if ($port) {
    $URL .= ":$port";
}
Robert Ricci's avatar
Robert Ricci committed
165 166 167

# Connect to the event system, and subscribe the the events we want 
my $handle = event_register($URL,0);
Mac Newbold's avatar
Mac Newbold committed
168 169 170
if (!$handle) {
    fatal("Unable to register with event system\n");
}
Robert Ricci's avatar
Robert Ricci committed
171 172

my $tuple = address_tuple_alloc();
Mac Newbold's avatar
Mac Newbold committed
173 174 175
if (!$tuple) {
    fatal("Could not allocate an address tuple\n");
}
Robert Ricci's avatar
Robert Ricci committed
176

177 178
%$tuple = ( objtype => join(",",$TBNODESTATE,$TBNODEOPMODE,$TBCONTROL) );

Robert Ricci's avatar
Robert Ricci committed
179
if (!event_subscribe($handle,\&handleEvent,$tuple)) {
Mac Newbold's avatar
Mac Newbold committed
180
    fatal("Could not subscribe to events\n");
Robert Ricci's avatar
Robert Ricci committed
181 182 183 184
}

# Read in the pre-existing node states, and timeout and valid transition
# information from the database
185 186 187 188
my %timeouts  = getTimeouts();
my %valid     = getValid();
my %modeTrans = getModeTrans();
my %triggers  = getTriggers();
189 190
my %nodes     = readStates();
if ($debug) { qshow(); }
Robert Ricci's avatar
Robert Ricci committed
191 192 193

# Gets set if a reload of state from the database should happen.
my $do_reload = 0;
194 195
my $sigrestart= 0;
my $sigcleanup= 0;
Robert Ricci's avatar
Robert Ricci committed
196 197 198 199

# Make the daemon reload database state on a sighup - but I'm worried
# about what would happen if we tried to do this mid-loop. So, we'll
# just set a flag and do it when we're done with our current pass.
200 201
$SIG{HUP}  = sub { info("SIGHUP - Reloading DB state\n"); $do_reload = 1; };

Mac Newbold's avatar
Mac Newbold committed
202
# Set up other signals.
203 204 205 206 207 208 209
$SIG{USR1} = \&restart_wrap;
$SIG{USR2} = \&cleanup_wrap;
$SIG{INT}  = \&cleanup_wrap;
$SIG{QUIT} = \&cleanup_wrap;
$SIG{ABRT} = \&cleanup_wrap;
$SIG{TERM} = \&cleanup_wrap;
$SIG{KILL} = \&cleanup_wrap;
Robert Ricci's avatar
Robert Ricci committed
210

211 212 213
# Track if I handled an event or not
my $event_count = 0;

214 215 216 217 218
# Control how long I block while waiting for events
my $blockwait=0;
my $nextdeadline=time();
my $mailqueue=0;

219 220 221 222 223
notify("Stated starting up\n");

sub process_event_queue() {
    $event_count=0;
    my $lastcount=-1;
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
    my $wait;
    my $now = time();
    debug("Polling - mq=$mailqueue bw=$blockwait\n");
    if ( $mailqueue == 0) {
	# no messages waiting...
	if ($blockwait) {
	    # we can wait a long time - nothing else will happen
	    # until we get an event, or get woken up by a signal
	    $wait = 600;
	} else {
	    # only wait until the next deadline...
	    if ($nextdeadline > 0) {
		$wait = $nextdeadline - $now;
	    }
	}
    } else {
	# mail is waiting. Only block until it is time to send it.
	$wait = $lastmail + $mailgap - $now;
	debug("Now $now, Mailgap $mailgap, lastmail $lastmail ==> wait $wait\n");
    }
    if ($wait < 0) { debug("Wait was $wait!\n"); $wait=0; }
    my $finish = $now + $wait;
    while ($event_count != $lastcount || $wait > 0) {
247
	$lastcount = $event_count;
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
	if ($wait<=0) {
	    event_poll($handle);
	} else {
	    debug("Using blocking event poll - $wait seconds\n");
	    # timeout param is in milliseconds, so multiply
	    event_poll_blocking($handle, $wait*1000);
	    $now = time();
	    # subtract seconds elapsed from my wait time
	    $wait = $finish - $now;
	    debug("Finished blocking event poll - $wait seconds remian\n");
	    if ($event_count > 0 &&
		(qsize() > 0 || $mailqueue || $do_reload)) {
		$blockwait=0;
		$wait=0;
		#debug("Cancelling wait - timeouts/msgs waiting, or HUP'd\n");
		#debug("---End Blocking Wait ---\n");
	    }
	}
	#debug("Wait is $wait\n");
267 268
    }
    if ($event_count > 0) {
Mac Newbold's avatar
Mac Newbold committed
269
	debug("Handled $event_count event(s).\n");
270 271
    }
}
Robert Ricci's avatar
Robert Ricci committed
272

273
# Now, we just poll for events, and watch for timeouts
Robert Ricci's avatar
Robert Ricci committed
274
while (1) {
Mac Newbold's avatar
Mac Newbold committed
275 276
    process_event_queue;
    my $now = time();
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
    my ($deadline,$node);

    # Check for nodes that have passed their timeout
    if (!qhead($deadline,$node)) {
	#if (($now % 10) == 0) {
	#    print "Time is $now, deadline is $deadline for $node\n";
	#}
	while ($now >= $deadline && $node ne "") {
	    qpop($deadline,$node);
	    $notified = $nodes{$node}{notified};
	    if (!$notified) {
		handleCtrlEvent($node,$TBTIMEOUT);
		$nodes{$node}{notified} = 1;
	    } else {
		notify("$node: Timed out at $now (d=$deadline), ".
		       "but notified already!\n");
	    }
	    if (0) { qshow(); }
	    if (qhead($deadline,$node)) {
		$deadline=0; $node="";
	    }
298
	}
299 300
    } else {
	$deadline=0;
301
    }
302
    $nextdeadline = $deadline;
303

304 305 306 307 308
    if (qsize()==0) {
	$blockwait=1;
	debug("---Blocking wait okay---\n");
    }
		
Mac Newbold's avatar
Mac Newbold committed
309 310 311 312
    if ($do_reload || ($now - $last_reload > $reload_time)) {
	reload();
	$do_reload = 0;
    }
Mac Newbold's avatar
Mac Newbold committed
313

Mac Newbold's avatar
Mac Newbold committed
314 315
    # Send any messages in the queue if it is time
    notify("",1);
Mac Newbold's avatar
Mac Newbold committed
316

317 318 319
    if ($sigrestart) { restart(); }
    if ($sigcleanup) { cleanup(); }

320
    #sleep(1);
Robert Ricci's avatar
Robert Ricci committed
321 322
}

Mac Newbold's avatar
Mac Newbold committed
323 324
exit(0);

Robert Ricci's avatar
Robert Ricci committed
325
# Read the current states of nodes from the database
326
sub readStates(;@) {
Mac Newbold's avatar
Mac Newbold committed
327 328 329 330 331 332
    my %oldnodes = @_;

    # Guard against undefined variable warnings
    if (! defined(%oldnodes)) {
	%oldnodes = ();
    }
333

Mac Newbold's avatar
Mac Newbold committed
334 335 336
    #debug("readStates called\n");
    my $result = DBQueryFatal("SELECT node_id, eventstate, " .
			      "state_timestamp, op_mode, " .
337 338
			      "op_mode_timestamp FROM nodes ".
			      "where node_id not like 'sh%'");
Mac Newbold's avatar
Mac Newbold committed
339 340 341 342

    my %nodes;
    while (my ($node_id, $state, $timestamp, $mode, $mode_timestamp)
	   = $result->fetchrow()) {
343
	#
Mac Newbold's avatar
Mac Newbold committed
344 345 346 347
	# If there's an entry in oldnodes for this node, and it
	# hasn't changed state or time, use the old entry (so that
	# we don't lose information about which nodes we've already
	# notified the ops about, etc.)
348
	#
Mac Newbold's avatar
Mac Newbold committed
349 350 351 352 353
	if ($oldnodes{$node_id} && $state && $timestamp &&
	    ($oldnodes{$node_id}{state} eq $state) &&
	    ($oldnodes{$node_id}{mode} eq $mode) &&
	    ($oldnodes{$node_id}{timestamp} == $timestamp)) {
	    $nodes{$node_id} = $oldnodes{$node_id};
354
	} else {
Mac Newbold's avatar
Mac Newbold committed
355 356 357 358
	    $nodes{$node_id}{state}          = $state;
	    $nodes{$node_id}{timestamp}      = $timestamp;
	    $nodes{$node_id}{mode}           = $mode;
	    $nodes{$node_id}{mode_timestamp} = $mode_timestamp;
359 360
	    # Is there a timeout? If so, set it up!
	    setTimeout($mode,$state,$node_id,$timestamp);
361
	}
Mac Newbold's avatar
Mac Newbold committed
362 363
    }
    return %nodes;
Robert Ricci's avatar
Robert Ricci committed
364 365 366 367 368 369
}

#
# Read timeouts for various states from the database
#
sub getTimeouts() {
Mac Newbold's avatar
Mac Newbold committed
370 371 372
    #debug("getTimeouts called\n");
    my $result = DBQueryFatal("SELECT op_mode, state, timeout, action " .
			      "FROM state_timeouts");
Robert Ricci's avatar
Robert Ricci committed
373

Mac Newbold's avatar
Mac Newbold committed
374 375 376 377 378
    my %timeouts;
    while (my ($op_mode, $state, $timeout, $action) = $result->fetchrow()) {
	$timeouts{$op_mode}{$state} = [ $timeout, $action ];
    }
    return %timeouts;
Robert Ricci's avatar
Robert Ricci committed
379 380 381 382 383 384
}

#
# Read the list of valid state transitions from the database
#
sub getValid() {
Mac Newbold's avatar
Mac Newbold committed
385 386 387
    #debug("getValid called\n");
    my $result = DBQueryFatal("SELECT op_mode, state1, state2 " .
			      "FROM state_transitions");
Robert Ricci's avatar
Robert Ricci committed
388

Mac Newbold's avatar
Mac Newbold committed
389 390 391 392 393
    my %valid;
    while (my ($mode,$state1, $state2) = $result->fetchrow()) {
	$valid{$mode}{$state1}{$state2} = 1;
    }
    return %valid;
Robert Ricci's avatar
Robert Ricci committed
394 395
}

396 397 398 399
#
# Read the list of valid mode transitions from the database
#
sub getModeTrans() {
Mac Newbold's avatar
Mac Newbold committed
400 401 402 403 404 405 406 407 408 409 410 411 412
    #debug("getModeTrans called\n");
    my $result = 
      DBQueryFatal("SELECT op_mode1, state1, op_mode2, state2 " .
		   "FROM mode_transitions order by op_mode1,state1");

    my %modeTrans;
    while (my ($mode1,$state1, $mode2, $state2) = $result->fetchrow()) {
	if (!defined($modeTrans{"$mode1:$state1"})) {
	    $modeTrans{"$mode1:$state1"}= ["$mode2:$state2"];
	} else {
	    my @l = @{$modeTrans{"$mode1:$state1"}};
	    push(@l, "$mode2:$state2");
	    $modeTrans{"$mode1:$state1"}= \@l;
413
	}
Mac Newbold's avatar
Mac Newbold committed
414 415
    }
    return %modeTrans;
416 417 418 419 420 421
}

#
# Read the list of states which trigger an action
#
sub getTriggers() {
422 423 424 425 426
    debug("getTriggers called\n");
    
    debug("anymode ==> '$TBANYMODE'\n");

    # Grab global triggers
Mac Newbold's avatar
Mac Newbold committed
427 428
    my $result = 
      DBQueryFatal("SELECT op_mode, state, trigger " .
429 430
		   "FROM state_triggers where node_id='$TBANYMODE' ".
		   "order by op_mode,state");
Mac Newbold's avatar
Mac Newbold committed
431
    my %t;
432
    while (my ($mode, $state, $trig) = $result->fetchrow()) {
Mac Newbold's avatar
Mac Newbold committed
433
	$t{"$mode:$state"} = $trig;
434 435 436 437 438 439 440 441 442 443 444 445
	debug("trig($mode:$state)\t => $trig\n");
    }

    # Grab per-node triggers
    $result = 
      DBQueryFatal("SELECT node_id, op_mode, state, trigger " .
		   "FROM state_triggers where node_id!='$TBANYMODE' ".
		   "order by op_mode,state");
    while (my ($n, $mode, $state, $trig) = $result->fetchrow()) {
	my @trigs = split(/\s*,\s*/,$trig);
	$t{"$n:$mode:$state"} = \@trigs;
	debug("trig($n:$mode:$state)\t => ".join(',',@trigs)."\n");
Mac Newbold's avatar
Mac Newbold committed
446
    }
447

Mac Newbold's avatar
Mac Newbold committed
448
    return %t;
449 450
}

Robert Ricci's avatar
Robert Ricci committed
451 452 453 454
#
# Gets called for every event that we recieve
#
sub handleEvent($$$) {
Mac Newbold's avatar
Mac Newbold committed
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
    my ($handle,$notification,$data) = @_;
    my $objtype = event_notification_get_objtype($handle,$notification);
    my $objname = event_notification_get_objname($handle,$notification);
    my $eventtype = event_notification_get_eventtype($handle,$notification);

    $event_count++;
    debug("Got an event: ($objtype,$objname,$eventtype)\n");

    #
    # Check to see if another instance is supposed to be handling this node
    #
    if (!checkDBRedirect($objname)) {
	info("Got an event for node $objname, which isn't mine\n");
	return;
    }
470

Mac Newbold's avatar
Mac Newbold committed
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
  SWITCH: for ($objtype) {
	
	(/$TBNODESTATE/) && do {
	    stateTransition($objname,$eventtype);
	    last;
	};
	(/$TBNODEOPMODE/) && do {
	    opModeTransition($objname,$eventtype);
	    notify("Use of deprecated event TBNODEOPMODE:\n".
		   "$objname->$eventtype\n");
	    last;
	};
	(/$TBCONTROL/) && do {
	    handleCtrlEvent($objname,$eventtype);
	    last;
	};
487

Mac Newbold's avatar
Mac Newbold committed
488
    }
489 490 491 492 493

}

sub stateTransition($$) {

494
    my ($node,$newstate) = @_;
Robert Ricci's avatar
Robert Ricci committed
495

496 497 498 499 500 501 502 503
    # Check for invalid transitions
    my ($oldstate, $mode);
    if ($nodes{$node}) {
	$oldstate = $nodes{$node}{state};
	$mode = $nodes{$node}{mode};
    } else {
	# Try reloading the cache once before we give up on this node
	reload();
504
	if ($nodes{$node}) {
505 506
	    $oldstate = $nodes{$node}{state};
	    $mode = $nodes{$node}{mode};
Robert Ricci's avatar
Robert Ricci committed
507
	} else {
508
	    notify("Got an event for a node ($node) I don't know about\n");
Robert Ricci's avatar
Robert Ricci committed
509
	}
510 511 512 513
    }
    if ($oldstate && $mode && $valid{$mode} && $valid{$mode}{$oldstate} &&
	!$valid{$mode}{$oldstate}{$newstate}) {
	notify("Invalid transition for node $node from $mode/$oldstate " .
514
	       "to $newstate\n");
515
    }
Robert Ricci's avatar
Robert Ricci committed
516

517 518 519 520
    my $now = time();
    $nodes{$node}{state}     = $newstate;
    $nodes{$node}{timestamp} = $now;
    $nodes{$node}{notified}  = 0;
521

522 523 524
    info("$node: $mode/$oldstate => $mode/$newstate\n");
    DBQueryFatal("UPDATE nodes SET eventstate='$newstate', " .
		 "state_timestamp='$now' WHERE node_id='$node'");
525

526 527 528
    # Check if this state has a timeout, and if so, put it in the queue
    setTimeout($mode,$newstate,$node,$now);

529 530
    # Check if this is TBDB_NODESTATE_BOOTING , which has actions
    if ($newstate eq TBDB_NODESTATE_BOOTING) {
531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
	# If I skipped shutdown, and came to booting directly from isup,
	# check for a mode transition so I don't miss one...
	if ($oldstate eq TBDB_NODESTATE_ISUP) {
	    info("$node: Came from ISUP! Checking for mode transition\n");
	    my $r = DBQueryWarn("select next_op_mode from nodes ".
				"where node_id='$node'");
	    my ($nextmode) = $r->fetchrow();
	    if ($nextmode) {
		# Force the transition even though it is illegal
		info("$node: Forcing mode transition!\n");
		opModeTransition($node,$nextmode,1);
		$mode=$nextmode;
	    } else {
		debug("No next mode.\n");
	    }
	}

	# Check if I'm in the right mode
549
	my $osid = TBBootWhat($node,$debug);
550 551
	my $os_op_mode = os_opmode($osid);
	info("$node: Current OS is '$osid', OS mode is '$os_op_mode'\n");
552
	DBQueryFatal("UPDATE nodes SET osid='$osid' WHERE node_id='$node'");
553
	if ($os_op_mode ne $mode) {
554 555
	    my $str = "Node $node is running OS '$osid' but in mode '$mode' ".
	      "instead of mode '$os_op_mode'!\n";
556 557 558
	    # For now, only force if we're going into reload mode, so we
	    # don't get stuck looping in reloading.
	    if ($os_op_mode eq "RELOAD") {
559 560 561 562 563 564 565 566 567
		DBQueryFatal("UPDATE nodes SET op_mode='$os_op_mode', ".
			     "op_mode_timestamp=unix_timestamp(now()) ".
			     "WHERE node_id='$node'");
		$nodes{$node}{mode} = $os_op_mode;
		$nodes{$node}{mode_timestamp} = $now;
		$str .= "Forced op_mode to '$os_op_mode'.\n";
	    }
	    notify($str);
	}
568 569
	checkGenISUP($node);
    }
570

571 572 573 574 575
    # Check if this state has any triggers
    my @nodetrigs = GetNodeTriggerList($node,$mode,$newstate);
    if (defined($triggers{"$mode:$newstate"}) ||
        (@nodetrigs > 0) ) {
	# check for global triggers
576
	my @trigs = split(/\s*,\s*/,$triggers{"$mode:$newstate"});
577 578 579 580
	# Run all the triggers
	debug("Running triggers. Global=".join("/",@trigs).
	      "   node=".join("/",@nodetrigs)."\n");
	foreach ( @trigs , @nodetrigs) {
581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
	    my $trig = $_;
	    /^$TBRESET$/ && do {
		# Check if we really need to do a reset
		my $r = DBQueryWarn("select osid,def_boot_osid from nodes ".
				    "where node_id='$node'");
		my ($osid,$defosid) = $r->fetchrow();
		if ($osid ne $defosid) {
		    handleCtrlEvent($node,$trig);
		}
		next;
	    };
	    /^$TBRELOADDONE$/ && do {
		handleCtrlEvent($node,$trig);
		next;
	    };
596 597 598 599
	    /^$TBFREENODE$/ && do {
		handleCtrlEvent($node,$trig);
		next;
	    };
600 601 602 603 604 605 606 607
	    /^$TBISUP$/ && do {
		info("$node: Triggered $TBISUP\n");
		EventSendWarn(host      => $BOSSNODE ,
			      objtype   => TBDB_TBEVENT_NODESTATE ,
			      eventtype => TBDB_NODESTATE_ISUP ,
			      objname   => $node);
		next;
	    };
608
	    notify("Unknown trigger '$trig' for $node in $mode/$newstate!\n");
609
	}
610 611 612
	# Clear any of the node triggers that we ran
	debug("Clearing node triggers: ".join("/",@nodetrigs)."\n");
	ClearNodeTrigger($node,$mode,$newstate,@nodetrigs);
613
    }
614

615 616 617 618 619 620 621 622
    # Check if this state can trigger a mode transition
    if (defined($modeTrans{"$mode:$newstate"})) {
	info("$node: Checking for mode transition\n");
	my $r = DBQueryWarn("select next_op_mode from nodes ".
			    "where node_id='$node'");
	my ($nextmode) = $r->fetchrow();
	if ($nextmode) {
	    opModeTransition($node,$nextmode);
Mac Newbold's avatar
Mac Newbold committed
623 624 625
	} else {
	    debug("No next mode.\n");
	}
626 627
    }
}
628

629
sub opModeTransition($$;$) {
Mac Newbold's avatar
Mac Newbold committed
630

631 632
    my ($node,$newmode,$force) = @_;
    if (!defined($force)) { $force = 0; }
Mac Newbold's avatar
Mac Newbold committed
633

634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
    info("$node: Mode change to $newmode requested\n");
    # Check for invalid transitions
    my ($oldstate, $mode, $nextstate);
    if ($nodes{$node}) {
	$oldstate = $nodes{$node}{state};
	$mode = $nodes{$node}{mode};
    } else {
	# Try reloading the cache once before we give up on this node
	reload();
	if ($nodes{$node}) {
	    $oldstate = $nodes{$node}{state};
	    $mode = $nodes{$node}{mode};
	} else {
	    notify("Got an event for a node ($node) I don't know about\n");
	}
    }
650
    if (defined($modeTrans{"$mode:$oldstate"}) || $force) {
651
	if (!$force) {
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
	    debug("Mode Transition check:\n");
	    my $translist = join(",",@{$modeTrans{"$mode:$oldstate"}});
	    #debug("translist=$translist\n");
	    #debug("splitlist=".join(", ",split(/[:,]/,$translist))."\n");
	    my %trans = split(/[:,]/,$translist);
	    debug("Valid transitions from $mode/$oldstate are:\n");
	    foreach my $k (sort keys %trans) {
		debug("$k => $trans{$k}\n");
	    }
	    if (defined($trans{$newmode})) {
		$nextstate=$trans{$newmode};
	    } else {
		notify("Invalid mode transition for $node from ".
		       "$mode/$oldstate to $newmode!\n");
	    }
667 668
	}
    } else {
669
	notify("Invalid mode transition for $node from $mode/$oldstate: ".
670 671
	       "Not a valid mode transition state!\n");
    }
Mac Newbold's avatar
Mac Newbold committed
672 673 674 675
    if (!$nextstate) {
	$nextstate=$oldstate;
    }
    
676 677 678 679 680 681
    my $now = time();
    $nodes{$node}{state}     = $nextstate;
    $nodes{$node}{timestamp} = $now;
    $nodes{$node}{mode}           = $newmode;
    $nodes{$node}{mode_timestamp} = $now;
    $nodes{$node}{notified}       = 0;
Mac Newbold's avatar
Mac Newbold committed
682
    
683 684 685 686 687 688 689 690 691
    info("$node: $mode/$oldstate => $newmode/$nextstate\n");
    DBQueryFatal("UPDATE nodes SET eventstate='$nextstate', ".
		 "next_op_mode='', op_mode='$newmode', ".
		 "state_timestamp='$now', ".
		 "op_mode_timestamp='$now' WHERE node_id='$node'");
}

sub handleCtrlEvent($$) {
    my ($node,$event) = @_;
Mac Newbold's avatar
Mac Newbold committed
692
    
693
    info("CtrlEvent: $node, $event\n");
Mac Newbold's avatar
Mac Newbold committed
694
    
695 696
    foreach ($event) {
	/^$TBRESET$/ && do {
Mac Newbold's avatar
Mac Newbold committed
697 698 699
	    my $result = DBQueryFatal("SELECT pxe_boot_path, def_boot_osid ".
				      "FROM nodes where node_id='$node'");
	    my ($pxepath,$osid) = $result->fetchrow();
Mac Newbold's avatar
Mac Newbold committed
700
	    
701 702 703 704
	    # Important note on ordering here:
	    # Because setting a normal osid resets pxe path to PXEBOOT,
	    # We need to read it out first, then set the osid, then set
	    # the pxepath back to its original value at the end.
Mac Newbold's avatar
Mac Newbold committed
705
	    
706 707
	    $cmd = "$osselect $osid $node";
	    system($cmd) and
Mac Newbold's avatar
Mac Newbold committed
708 709 710
	      notify("$node/$event: Couldn't clear next_boot_*\n".
		     "\tcmd=$cmd\n\t*** $!\n");
	    
711
	    $pxepath = "-p ".$pxepath;
Mac Newbold's avatar
Mac Newbold committed
712 713 714 715
	    if ($pxepath eq "-p ") {
		$pxepath="PXEBOOT";
	    }
	    ;
Mac Newbold's avatar
Mac Newbold committed
716
	    my $cmd = "$osselect -m $pxepath $node";
717
	    system($cmd) and
Mac Newbold's avatar
Mac Newbold committed
718 719 720
	      notify("$node/$event: Couldn't clear next_pxe_boot_path\n".
		     "\tcmd=$cmd\n\t*** $!\n");
	    
Mac Newbold's avatar
Mac Newbold committed
721
	    info("Performed RESET for $node to $osid/$pxepath\n");
722 723 724 725 726 727 728 729 730 731
	    next;
	};
	/^$TBRELOADDONE$/ && do {
	    info("Clearing reload info for $node\n");
	    DBQueryFatal("delete from current_reloads where node_id='$node'");
	    my ($pid,$eid);
	    NodeidToExp($node,\$pid,\$eid);
	    if (($pid eq NODERELOADING_PID) && ($eid eq NODERELOADING_EID)) {
		DBQueryFatal("delete from scheduled_reloads ".
			     "where node_id='$node'");
732 733 734 735
		AddNodeTrigger($node, $TBANYMODE, TBDB_NODESTATE_ISUP,
			       $TBFREENODE)
		  && notify("$node: Couldn't add trigger $TBFREENODE!\n");
		info("Set up freeing of $node from $pid/$eid\n");
736 737 738
	    }
	    next;
	};
739 740 741 742 743 744 745 746
	/^$TBFREENODE$/ && do {
	    # Don't need pid/eid, but we should put it in the log
	    my ($pid,$eid);
	    NodeidToExp($node,\$pid,\$eid);
	    DBQueryFatal("delete from reserved where node_id='$node'");
	    info("Released $node from $pid/$eid\n");
	    next;
	};
747 748 749 750 751 752 753 754 755 756
	/^$TBTIMEOUT$/ && do {
	    my $state = $nodes{$node}{state};
	    my $mode = $nodes{$node}{mode};
	    my ($timeout,$action);
	    if ($mode && $state && $timeouts{$mode} &&
		$timeouts{$mode}{$state}) {
		($timeout, $action) = @{$timeouts{$mode}{$state}};
	    }
	    notify("Node $node has timed out in state $mode/$state".
		   ($action ne "" ? "\n\tRequested action $action." : "").
757
		   "\n");
758 759
	    next;
	};
760
	notify("$node: Unknown CtrlEvent: $event\n");
761 762
    }
}
Robert Ricci's avatar
Robert Ricci committed
763

764 765 766 767
#
# Check if we need to generate an ISUP
#
sub checkGenISUP($) {
Mac Newbold's avatar
Mac Newbold committed
768 769 770 771 772 773 774 775 776 777
    my ($node) = @_;
    debug("$node: Checking ISUP Generation\n");
    my $r = DBQueryWarn("select osfeatures from nodes as n ".
			"left join os_info as o on o.osid=n.osid ".
			"where node_id='$node' and osfeatures is not null");
    my $osfeatures="";
    # If we don't get anything back, assume it has no features.
    if ($r->num_rows() > 0) {
	($osfeatures) = $r->fetchrow();
    }
Mac Newbold's avatar
Mac Newbold committed
778

Mac Newbold's avatar
Mac Newbold committed
779 780 781 782 783 784
    my @features = split(",",$osfeatures);
    # Make sure features I care about are defined
    my %can=("ping"=>0, "isup"=>0);
    foreach my $f (@features) {
	$can{"\L$f"}=1;	# make sure it's all lowercase
    }
Mac Newbold's avatar
Mac Newbold committed
785

Mac Newbold's avatar
Mac Newbold committed
786 787 788 789 790
    # If os will send ISUP on its own, do nothing here.
    if ($can{"isup"}) {
	debug("$node: Will send own ISUP\n"); 
	return 0;
    }
Mac Newbold's avatar
Mac Newbold committed
791

Mac Newbold's avatar
Mac Newbold committed
792 793 794 795 796 797
    # If os doesn't support isup but can ping, fork and ping it every
    # few seconds and send isup when it pings, or timeout after too long.
    if ($can{"ping"}) {
	debug("$node: Needs to be pinged - calling eventping\n");
	system("$TB/sbin/eventping $node &");
	return 0;
798
    }
Mac Newbold's avatar
Mac Newbold committed
799

Mac Newbold's avatar
Mac Newbold committed
800 801 802 803 804 805 806 807
    # If os doesn't support ping or isup, stated sends ISUP just after 
    # the node gets to BOOTING (a bit early, but the best we can do)

    debug("$node: OS doesn't ping - sending ISUP\n");
    EventSendWarn(host      => $BOSSNODE ,
		  objtype   => TBDB_TBEVENT_NODESTATE ,
		  eventtype => TBDB_NODESTATE_ISUP ,
		  objname   => $node);
Robert Ricci's avatar
Robert Ricci committed
808 809
}

810 811 812 813
# Figure out if this node belongs to us (ie. if it's using our database.)
# Returns 1 if it does, 0 if not
sub checkDBRedirect($) {

Mac Newbold's avatar
Mac Newbold committed
814
    my ($node) = @_;
815

Mac Newbold's avatar
Mac Newbold committed
816 817 818 819 820 821 822 823
    # XXX: I don't want to do this every time, for performance reaons,
    # but we need to make sure that we don't get into an inconsistent 
    # state
    my $result=DBQueryFatal("SELECT testdb FROM nodes as n " .
			    "LEFT JOIN reserved as r ON n.node_id=r.node_id ".
			    "LEFT JOIN experiments as e ON r.pid = e.pid " .
			    "AND r.eid = e.eid " .
			    "WHERE n.node_id = '$node'");
824

Mac Newbold's avatar
Mac Newbold committed
825 826 827 828
    if (!$result->num_rows()) {
	notify("Got an event for a node ($node) I don't know about\n");
	return 0;
    }
829

Mac Newbold's avatar
Mac Newbold committed
830
    my ($testdb) = $result->fetchrow();
831

Mac Newbold's avatar
Mac Newbold committed
832 833 834
    # XXX: It's hokey to hardcode tbdb here, but....

    #debug("checkDBRedirect: $node => $testdb (I'm $TBDBNAME)\n");
835
    if ((!$testdb && ($TBDBNAME eq $REALTBDBNAME)) ||
Mac Newbold's avatar
Mac Newbold committed
836 837 838 839 840
	($testdb && ($testdb eq $TBDBNAME))) {
	return 1;
    } else {
	return 0;
    }
841 842
}

843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
# Check if this state has a timeout, and if so, put it in the queue
sub setTimeout( $$$$ ) {
    my ($mode,$state,$node,$now) = @_;
    if (0) { print "Original: ($mode,$state,$node,$now)\n"; qshow(); }
    if (defined(qfind($node))) { qdelete($node); }
    if (0) { print "Deleted:\n"; qshow(); }
    if (defined($mode) && defined($state) &&
	defined($timeouts{$mode}) &&
	defined($timeouts{$mode}{$state})) {
	my $deadline = ${$timeouts{$mode}{$state}}[0];
        if (defined($deadline) &&
	    $deadline != $TBNOTIMEOUT) {
	    my $TO = $deadline + $now;
	    debug("Setting timeout for ($node,$mode,$state) at ".
		  "$deadline + $now ($TO)\n");
	    qinsert($TO,$node);
	    if (0) { qshow(); }
	}
    }
    if (0) { print "Done:\n"; qshow(); }
}

Robert Ricci's avatar
Robert Ricci committed
865 866
# Reload state from the database
sub reload() {
Mac Newbold's avatar
Mac Newbold committed
867 868 869 870 871 872
    debug("Reloading state from database\n");
    $last_reload = time();
    %timeouts  = getTimeouts();
    %valid     = getValid();
    %modeTrans = getModeTrans();
    %triggers  = getTriggers();
873
    %nodes     = readStates(%nodes);
Robert Ricci's avatar
Robert Ricci committed
874 875
}

876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
#
# Some functions for node triggers
#

# $rv   = AddNodeTrigger($node, $mode, $state, @triglist);
sub AddNodeTrigger( $$$@ ) {
    my ($node, $mode, $state, @trigs) = @_;
    if (@trigs == 0) { return 1; }
    if (defined($triggers{"$node:$mode:$state"})) {
	my %t = ();
	foreach $k (@{$triggers{"$node:$mode:$state"}}) { $t{$k} = 1; }
	my @newtrigs = ();
	foreach $k (@trigs) { if (!defined($t{$k})) { push(@newtrigs,$k); } }
	push(@{$triggers{"$node:$mode:$state"}},@newtrigs);
    } else {
	$triggers{"$node:$mode:$state"} = \@trigs;
    }
    my $triglist = join(",",@{$triggers{"$node:$mode:$state"}});
    DBQueryFatal("replace into state_triggers ".
		 "(node_id,op_mode,state,trigger) values ".
		 "('$node','$mode','$state','$triglist')");
    return 0;
}

# @list = GetNodeTriggerList($node, $mode, $state);
sub GetNodeTriggerList( $$$ ) {
    my ($node, $mode, $state) = @_;
    my @l = ();
    if (defined($triggers{"$node:$mode:$state"})) {
	push(@l,@{$triggers{"$node:$mode:$state"}});
    }
    if (defined($triggers{"$node:$TBANYMODE:$state"})) {
	push(@l,@{$triggers{"$node:$TBANYMODE:$state"}});
    }
    return @l;
}

# $rv   = ClearNodeTrigger($node, $mode, $state, @triglist);
# Note: When not clearing all triggers, ordering is not preserved!
sub ClearNodeTrigger( $$$ ; @ ) {
    my ($node, $mode, $state, @trigs) = @_;
    # We have to keep any triggers that aren't on the list, but the
    # most common case will be that the list they give us is the whole
    # list anyway. So treat that case special.
    my @reallist = GetNodeTriggerList($node,$mode,$state);
    # empty list means clear all...
    if ((@trigs==0) || join(",",sort @reallist) eq join(",",sort @trigs)) {
	# Same list... just nuke the entry
	debug("Clearing all triggers for $node...\n");
	delete($triggers{"$node:$mode:$state"});
	delete($triggers{"$node:$TBANYMODE:$state"});
	DBQueryFatal("delete from state_triggers ".
		     "where node_id='$node' and state='$state' and ".
		     "(op_mode='$mode' or op_mode='$TBANYMODE')");
    } else {
	# Subtract @trigs from @reallist
	my %temptrigs = ();
	foreach $k (@reallist) { $temptrigs{$k} = 1; }
	debug("Reallist = ".join("/",@reallist).", trigs=".
	      join("/",@trigs).".\n");
	foreach $t (@trigs) {
	    if (defined($temptrigs{$t})) {
		delete($temptrigs{$t});
		debug("Clearing $t\n");
	    }
	}
	# Note: This doesn't quite do the right thing with triggers
	# for a fixed mode vs TBANYMODE. So if you start using this
	# code, make sure and debug it first!
	my @newtrigs = keys %temptrigs;
	debug("Newlist = ".join("/",@newtrigs).".\n");
	delete($triggers{"$node:$mode:$state"});
	if (@newtrigs > 0) {
	    AddNodeTrigger($node,$mode,$state,@newtrigs);
	} 
    }

    return 0;
}

956 957
sub os_opmode() {
    my $osid = shift || "";
Mac Newbold's avatar
Mac Newbold committed
958 959 960
    if ($osid eq $TB_OSID_MBKERNEL) {
	return "MINIMAL";
    }
961 962
    my $cmd = "select op_mode from os_info where osid='$osid';";
    my $q = DBQueryFatal($cmd);
Mac Newbold's avatar
Mac Newbold committed
963 964 965
    if ($q->numrows() < 1) {
	return "";
    }
966 967 968
    my @r = $q->fetchrow_array();
    my $opmode=$r[0];
    debug("OpMode for '$osid' is '$opmode'\n");
Mac Newbold's avatar
Mac Newbold committed
969 970 971
    if (defined($opmode) && $opmode ne "") {
	return $opmode;
    }
972 973
    return "";
}
Mac Newbold's avatar
Mac Newbold committed
974

975 976 977 978
#
# Functions for controlling output/logging, and signal handling
#

Robert Ricci's avatar
Robert Ricci committed
979
sub debug(@) {
Mac Newbold's avatar
Mac Newbold committed
980 981 982
    if ($debug) {
	print @_;
    }
Mac Newbold's avatar
Mac Newbold committed
983 984 985
}

sub fatal($) {
Mac Newbold's avatar
Mac Newbold committed
986 987 988
    my $msg = shift;
    notify($msg);
    die($msg);
Robert Ricci's avatar
Robert Ricci committed
989 990
}

Mac Newbold's avatar
Mac Newbold committed
991
sub showqueue() {
Mac Newbold's avatar
Mac Newbold committed
992 993 994 995 996 997 998 999 1000 1001
    if ($debug < 2) {
	return;
    }
    if ((keys %msgs) > 0) {
	debug("\nMAILQUEUE:\n");
    }
    foreach $k (sort keys %msgs) {
	my @l = @{$msgs{$k}};
	debug("MSGS:\n$k==> (".(@l+0).",'".join("','",@l)."')\n");
    }
Mac Newbold's avatar
Mac Newbold committed
1002 1003 1004
}

sub notify($;$) {
Mac Newbold's avatar
Mac Newbold committed
1005 1006 1007 1008
    my $message = shift;
    my $checkonly = shift || 0;
    # Use a timestamp, now that we're throttling mail
    my $tstamp=strftime("%b %e %H:%M:%S",localtime);
Mac Newbold's avatar
Mac Newbold committed
1009
    showqueue();
Mac Newbold's avatar
Mac Newbold committed
1010 1011
    if (!$checkonly) {
	info($message);
1012
	$mailqueue++;
Mac Newbold's avatar
Mac Newbold committed
1013 1014 1015 1016
	# Queue up the message
	# (The queue is a hash of lists of timestamps, keyed by message
	if (defined($msgs{$message})) {
	    push(@{$msgs{$message}},$tstamp);
Mac Newbold's avatar
Mac Newbold committed
1017
	} else {
Mac Newbold's avatar
Mac Newbold committed
1018
	    $msgs{$message} = [$tstamp];
Mac Newbold's avatar
Mac Newbold committed
1019
	}
Mac Newbold's avatar
Mac Newbold committed
1020
	showqueue();
Mac Newbold's avatar
Mac Newbold committed
1021
    }
Mac Newbold's avatar
Mac Newbold committed
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
    my $now = time;
    if ($now - $lastmail >= $mailgap) {
	if ((keys %msgs)>0) {
	    debug("SENDING MAILQUEUE\n"."(now $now, lastmail $lastmail, ".
		  ($now-$lastmail).">=$mailgap)\n");
	    my $mailbody="";
	    my $sep = '-'x5;
	    # We're okay to send. Make a digest of all the queued messages.
	    foreach my $msg (sort keys %msgs) {
		my @tlist = @{$msgs{$msg}};
		my $count = 0+@tlist;
		$mailbody .= "\n$msg\n";
		if ($count > 1) {
		    my $first = shift @tlist;
		    my $last = pop @tlist;
		    $mailbody .= "($count copies from $first to $last)\n";
		} else {
		    $mailbody .= "($count copy at $tlist[0])\n";
		}
		$mailbody .= "$sep\n";
	    }
	    # Now reset the mail queue
	    %msgs = ();
1045
	    $mailqueue=0;
Mac Newbold's avatar
Mac Newbold committed
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
	    showqueue();
	    $lastmail = time;
	    if (!$debug) {
		SENDMAIL("Stated List <".$TBOPS.">",
			 "Stated Messsage",$mailbody,
			 "Stated Daemon <".$TBOPS.">");
	    } else {
		debug("notify: Not sending mail in debug mode\n");
		debug("MAIL CONTAINS:\n".$mailbody."\n");
	    }
	}
    } # else do nothing, not time yet
1058 1059
}

1060
sub announce($) {
Mac Newbold's avatar
Mac Newbold committed
1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
    my $message = shift;
    my $tstamp=strftime("%b %e %H:%M:%S",localtime);
    notify("ANNOUCEMENT: ".$message."\n\n(Sent to $REALTBOPS)\n");
    $mailbody = "\n$message\n\n$tstamp\n";
    if (!$debug) {
	SENDMAIL($REALTBOPS,
		 "Stated Messsage",$mailbody,
		 "Stated Daemon <".$TBOPS.">");
    } else {
	debug("announce: Not sending mail in debug mode\n");
	debug("MAIL CONTAINS:\n".$mailbody."\n");
    }
1073 1074
}

1075
sub info($;$) {
1076
    my $message = shift;
1077
    my $notice = shift || 0;
Mac Newbold's avatar
Mac Newbold committed
1078 1079
    # Use syslog
    my $prio="info";
Mac Newbold's avatar
Mac Newbold committed
1080 1081 1082
    if ($notice) {
	$prio = "notice";
    }
Mac Newbold's avatar
Mac Newbold committed
1083
    if ($debug) {
Mac Newbold's avatar
Mac Newbold committed
1084 1085 1086 1087
	# Print out log entries like this:
	# Sep 20 09:36:00 stated[238]: Reloading state from database
	print strftime("%b %e %H:%M:%S",localtime)." stated[$$]: $message";
	$message = "DEBUG: ".$message;
1088
    }
Mac Newbold's avatar
Mac Newbold committed
1089
    syslog($prio,$message) || notify("syslog failed: $? $!\n");
1090 1091
}

1092 1093
sub restart_wrap { $sigrestart=1; }

1094 1095
# This gets called if we catch a signal USR1
sub restart {
Mac Newbold's avatar
Mac Newbold committed
1096 1097 1098 1099 1100 1101 1102 1103
    my $params = join(" ",@args);
    my $prog = "";
    # If we're started from an abosolute path, use that.
    if ($0 =~ /^\//) {
	$prog = $0;
    } else {
	$prog = "$TB/sbin/stated";
    }
1104 1105 1106
    info("SIGUSER1 received: Performing final event poll before restarting\n");
    $blockwait=0;
    process_event_queue;
Mac Newbold's avatar
Mac Newbold committed
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
    info("Restarting from '$prog".($params ne "" ? " $params" : "")."'\n");
    if ($handle && event_unregister($handle) == 0) {
	warn "Unable to unregister with event system\n";
    }
    if (defined($lockfile) && $lockfile ne "") {
	unlink $lockfile;
    }
    if (!defined(sigprocmask(SIG_UNBLOCK, POSIX::SigSet->new(SIGUSR1,SIGHUP)))) {
	notify("sigprocmask: sig unblock failed! $?, $!\n");
	die("\n");
    }
    announce("Stated restarted\n");
    exec("$prog $params") or 
      do {
	  my $msg = "Couldn't restart stated! cmd='$prog $params'\n".
	    "Error: ($?) $!\n";
	  announce($msg);
	  die($msg);
      };
Robert Ricci's avatar
Robert Ricci committed
1126 1127
}

1128 1129
sub cleanup_wrap { $sigcleanup=1; }

1130 1131
# This gets called if we catch a signal (TERM, etc.)
sub cleanup {
Mac Newbold's avatar
Mac Newbold committed
1132 1133 1134
    notify("Signal received, exiting\n");
    # now do the normal exit stuff in END {}
    exit(0);
1135 1136
}

Robert Ricci's avatar
Robert Ricci committed
1137 1138
# This gets called if we die of 'natural causes' (exit, die, etc.)
END {
1139
    debug("Ending stated...\n");
Mac Newbold's avatar
Mac Newbold committed
1140 1141 1142 1143 1144 1145 1146 1147
    my $stat = $?;
    if (defined($lockfile) && $lockfile ne "") {
	unlink $lockfile;
	announce("Stated exiting, cleaning up\n");
    } else {
	# Must be a child
	info("Stated child exiting\n");
    }
1148
    debug("Annouced. Cleaning up...\n");
Mac Newbold's avatar
Mac Newbold committed
1149 1150 1151
    # clean up Syslog
    closelog();
    if ($handle) {
1152
	debug("Unregistering w/event system...\n");
Mac Newbold's avatar
Mac Newbold committed
1153 1154 1155
	if (event_unregister($handle) == 0) {
	    die "Unable to unregister with event system\n";
	}
1156
	debug("Unregistered.\n");
Mac Newbold's avatar
Mac Newbold committed
1157
    }
1158
    debug("Cleaned up. Bye!\n");
Mac Newbold's avatar
Mac Newbold committed
1159 1160
    # Restore $? in case one of the things I called changed it
    $? = $stat;
Robert Ricci's avatar
Robert Ricci committed
1161
}
1162