stated.in 32.8 KB
Newer Older
Robert Ricci's avatar
Robert Ricci committed
1
#!/usr/bin/perl -w
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5 6 7
# All rights reserved.
#

Robert Ricci's avatar
Robert Ricci committed
8 9 10
#
# stated - A daemon to monitor the states of nodes in the testbed. Recives
# state change notification through the event system, and writes the new
11 12
# state into the database. Also watches for invalid transitions, timeouts, 
# and performs other state-related control functions.
Robert Ricci's avatar
Robert Ricci committed
13 14 15 16
#
# Send it a HUP signal to get it to reload the timeout and transition
# information. Periodically reloads this information regardless, though.
#
17
# Will restart when sent SIGUSR1, by exec'ing its executable again.
Robert Ricci's avatar
Robert Ricci committed
18
#
19

Robert Ricci's avatar
Robert Ricci committed
20 21
# Configure variables
use lib '@prefix@/lib';
22
my $TB = "@prefix@";
23
my $REALTB = "/usr/testbed"; # So we know if we're the "real" stated or not
Robert Ricci's avatar
Robert Ricci committed
24
my $BOSSNODE = "@BOSSNODE@";
25
my $TBOPS = "@TBSTATEDEMAIL@";
26
my $REALTBOPS = "@TBOPSEMAIL@";
27
my $TBDBNAME = "@TBDBNAME@";
28
my $REALTBDBNAME = "tbdb"; # So we know if we're using the "real" db
29
my $osselect = "$TB/bin/os_select";
Robert Ricci's avatar
Robert Ricci committed
30 31 32 33 34 35

$| = 1;

use event;
use libdb;
use libtestbed;
36
use TimeoutQueue;
Robert Ricci's avatar
Robert Ricci committed
37
use Getopt::Std;
38
#use strict;
Robert Ricci's avatar
Robert Ricci committed
39
use English;
Mac Newbold's avatar
Mac Newbold committed
40 41
use POSIX;			# for strftime, and sigprocmask and friends
use Fcntl;			# file constants for pidfile
Mac Newbold's avatar
Mac Newbold committed
42 43 44 45 46
use Sys::Syslog;
# Important note about syslog: It defaults to using an inet socket,
# but 'syslogd -s' (the default) doesn't listen for one. So either
# run syslogd without -s, or use setlogsock('unix') before openlog.
# (To get setlocksock: 'use Sys::Syslog qw(:DEFAULT setlogsock);' )
Robert Ricci's avatar
Robert Ricci committed
47

Mac Newbold's avatar
Mac Newbold committed
48 49 50
# Do lots of db retries before we fail and die
$libdb::DBQUERY_MAXTRIES = 5;

Robert Ricci's avatar
Robert Ricci committed
51 52 53
# Number of iterations (roughly, seconds) after which we'll reload 
# information from the database. This is so we don't end up with information
# that's _too_ out of sync.
54
my $reload_time = 600;
55
my $last_reload = time;
Robert Ricci's avatar
Robert Ricci committed
56 57 58 59

# Process command-line arguments

sub usage {
Mac Newbold's avatar
Mac Newbold committed
60
    print << "END";
61 62 63
Usage: $0 [-h] [-d] [-s server] [-p port]
-h              This message
-d              Turn on debugging output, and don't go into the background
Robert Ricci's avatar
Robert Ricci committed
64 65
-s server       Use specified server, instead of this site's bossnode
-p port	        Use specified port
66
Send SIGHUP to reload database state, or SIGUSR1 to restart completely.
Robert Ricci's avatar
Robert Ricci committed
67 68 69
END
}

Mac Newbold's avatar
Mac Newbold committed
70
# Only root should run this - it won't work when run as a user...
71
# (Or, let an admin run it if it isn't the real one in /usr/testbed/ )
72
if ($UID && ( $TB eq $REALTB || ! TBAdmin($UID) ) ) {
Mac Newbold's avatar
Mac Newbold committed
73 74 75
    die("Only root can run this script!\n");
}

76
my @args = @ARGV;    # save a copy for restart before we mess with them.
Robert Ricci's avatar
Robert Ricci committed
77
my %opt = ();
78
getopts("ds:p:h",\%opt);
Robert Ricci's avatar
Robert Ricci committed
79

Mac Newbold's avatar
Mac Newbold committed
80 81 82 83 84 85
if ($opt{h}) {
    exit &usage;
}
if (@ARGV) {
    exit &usage;
}
Robert Ricci's avatar
Robert Ricci committed
86

87
my ($server,$port,$debug);
Mac Newbold's avatar
Mac Newbold committed
88 89 90 91 92 93 94 95 96 97 98 99 100
if ($opt{s}) {
    $server = $opt{s};
} else {
    $server = $BOSSNODE;
}
if ($opt{p}) {
    $port = $opt{p};
}
if ($opt{d}) {
    $debug = 1;
} else {
    $debug = 0;
}
Robert Ricci's avatar
Robert Ricci committed
101

102
# Grab some constants into variables
103
my $TBANYMODE    = TBDB_NODEOPMODE_ANY;
104 105 106 107 108 109 110
my $TBRESET      = TBDB_TBCONTROL_RESET;
my $TBRELOADDONE = TBDB_TBCONTROL_RELOADDONE;
my $TBTIMEOUT    = TBDB_TBCONTROL_TIMEOUT;
my $TBNOTIMEOUT  = TBDB_NO_STATE_TIMEOUT;
my $TBNODESTATE  = TBDB_TBEVENT_NODESTATE;
my $TBNODEOPMODE = TBDB_TBEVENT_NODEOPMODE;
my $TBCONTROL    = TBDB_TBEVENT_TBCONTROL;
111
my $TB_OSID_MBKERNEL = TB_OSID_MBKERNEL;
112

113 114
# This only gets used here, so it isn't in a lib constant.
my $TBFREENODE = "FREENODE";
115
my $TBISUP = TBDB_NODESTATE_ISUP;
116

117
# Set up some notification throttling
Mac Newbold's avatar
Mac Newbold committed
118
my $mailgap = 15;		# in seconds
119 120 121
my $lastmail = time() - $mailgap + 2; # Send a digest of startup msgs after 2s.
my %msgs = ();

Mac Newbold's avatar
Mac Newbold committed
122
my $pidfile;
123
if ( $TB eq $REALTB ) {
Mac Newbold's avatar
Mac Newbold committed
124 125
    $pidfile = "/var/run/stated.pid";
} else {
126
    $pidfile = "$TB/locks/stated.pid";
Mac Newbold's avatar
Mac Newbold committed
127
}
Mac Newbold's avatar
Mac Newbold committed
128 129
debug("Using pidfile $pidfile\n");
if (-e $pidfile) {
Mac Newbold's avatar
Mac Newbold committed
130 131 132 133 134 135 136 137 138 139 140
    my $otherpid = `cat $pidfile`;
    my $running = `ps -auxww | grep $otherpid | grep -v grep`;
    if ($running ne "") {
	fatal("Lockfile $pidfile exists, and process $otherpid appears to be ".
	      "running.\n");
    } else {
	notify("Lockfile exists, but process $otherpid appears to be dead.\n".
	       "Removing lock file...\n");
    }
    system("rm $pidfile") &&
      fatal("Couldn't remove $pidfile: $? $!\n");
Mac Newbold's avatar
Mac Newbold committed
141
}
Robert Ricci's avatar
Robert Ricci committed
142
# Background
143
if (!$debug) {
Mac Newbold's avatar
Mac Newbold committed
144 145 146 147
    # We use syslog, so redirect the output to nothing
    if (TBBackGround("/dev/null")) {
	exit(0);
    }
Robert Ricci's avatar
Robert Ricci committed
148
}
Mac Newbold's avatar
Mac Newbold committed
149 150
# set up syslog
openlog("stated","pid","user");
Mac Newbold's avatar
Mac Newbold committed
151 152 153 154 155 156
sysopen(PIDFILE, $pidfile, O_WRONLY | O_EXCL | O_CREAT) ||
  fatal("Couldn't create '$pidfile': $? $!\n");
print PIDFILE "$$";
close PIDFILE;
# If I make it to here, I'll need to clean up the lock file
my $lockfile=$pidfile;
Robert Ricci's avatar
Robert Ricci committed
157

158 159 160
# Change my $0 so that it is easier to see in a ps/top
$0 = "$0";

Robert Ricci's avatar
Robert Ricci committed
161
my $URL = "elvin://$server";
Mac Newbold's avatar
Mac Newbold committed
162 163 164
if ($port) {
    $URL .= ":$port";
}
Robert Ricci's avatar
Robert Ricci committed
165 166 167

# Connect to the event system, and subscribe the the events we want 
my $handle = event_register($URL,0);
Mac Newbold's avatar
Mac Newbold committed
168 169 170
if (!$handle) {
    fatal("Unable to register with event system\n");
}
Robert Ricci's avatar
Robert Ricci committed
171 172

my $tuple = address_tuple_alloc();
Mac Newbold's avatar
Mac Newbold committed
173 174 175
if (!$tuple) {
    fatal("Could not allocate an address tuple\n");
}
Robert Ricci's avatar
Robert Ricci committed
176

177 178
%$tuple = ( objtype => join(",",$TBNODESTATE,$TBNODEOPMODE,$TBCONTROL) );

Robert Ricci's avatar
Robert Ricci committed
179
if (!event_subscribe($handle,\&handleEvent,$tuple)) {
Mac Newbold's avatar
Mac Newbold committed
180
    fatal("Could not subscribe to events\n");
Robert Ricci's avatar
Robert Ricci committed
181 182 183 184
}

# Read in the pre-existing node states, and timeout and valid transition
# information from the database
185 186 187 188
my %timeouts  = getTimeouts();
my %valid     = getValid();
my %modeTrans = getModeTrans();
my %triggers  = getTriggers();
189 190
my %nodes     = readStates();
if ($debug) { qshow(); }
Robert Ricci's avatar
Robert Ricci committed
191 192 193 194 195 196 197

# Gets set if a reload of state from the database should happen.
my $do_reload = 0;

# Make the daemon reload database state on a sighup - but I'm worried
# about what would happen if we tried to do this mid-loop. So, we'll
# just set a flag and do it when we're done with our current pass.
198 199
$SIG{HUP}  = sub { info("SIGHUP - Reloading DB state\n"); $do_reload = 1; };

Mac Newbold's avatar
Mac Newbold committed
200
# Set up other signals.
201 202 203
$SIG{USR1} = \&restart;
$SIG{USR2} = \&cleanup;
$SIG{INT}  = \&cleanup;
204 205 206 207
$SIG{QUIT} = \&cleanup;
$SIG{ABRT} = \&cleanup;
$SIG{TERM} = \&cleanup;
$SIG{KILL} = \&cleanup;
Robert Ricci's avatar
Robert Ricci committed
208

209 210 211
# Track if I handled an event or not
my $event_count = 0;

212 213 214 215 216
# Control how long I block while waiting for events
my $blockwait=0;
my $nextdeadline=time();
my $mailqueue=0;

217 218 219 220 221
notify("Stated starting up\n");

sub process_event_queue() {
    $event_count=0;
    my $lastcount=-1;
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
    my $wait;
    my $now = time();
    debug("Polling - mq=$mailqueue bw=$blockwait\n");
    if ( $mailqueue == 0) {
	# no messages waiting...
	if ($blockwait) {
	    # we can wait a long time - nothing else will happen
	    # until we get an event, or get woken up by a signal
	    $wait = 600;
	} else {
	    # only wait until the next deadline...
	    if ($nextdeadline > 0) {
		$wait = $nextdeadline - $now;
	    }
	}
    } else {
	# mail is waiting. Only block until it is time to send it.
	$wait = $lastmail + $mailgap - $now;
	debug("Now $now, Mailgap $mailgap, lastmail $lastmail ==> wait $wait\n");
    }
    if ($wait < 0) { debug("Wait was $wait!\n"); $wait=0; }
    my $finish = $now + $wait;
    while ($event_count != $lastcount || $wait > 0) {
245
	$lastcount = $event_count;
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
	if ($wait<=0) {
	    event_poll($handle);
	} else {
	    debug("Using blocking event poll - $wait seconds\n");
	    # timeout param is in milliseconds, so multiply
	    event_poll_blocking($handle, $wait*1000);
	    $now = time();
	    # subtract seconds elapsed from my wait time
	    $wait = $finish - $now;
	    debug("Finished blocking event poll - $wait seconds remian\n");
	    if ($event_count > 0 &&
		(qsize() > 0 || $mailqueue || $do_reload)) {
		$blockwait=0;
		$wait=0;
		#debug("Cancelling wait - timeouts/msgs waiting, or HUP'd\n");
		#debug("---End Blocking Wait ---\n");
	    }
	}
	#debug("Wait is $wait\n");
265 266
    }
    if ($event_count > 0) {
Mac Newbold's avatar
Mac Newbold committed
267
	debug("Handled $event_count event(s).\n");
268 269
    }
}
Robert Ricci's avatar
Robert Ricci committed
270

271
# Now, we just poll for events, and watch for timeouts
Robert Ricci's avatar
Robert Ricci committed
272
while (1) {
Mac Newbold's avatar
Mac Newbold committed
273 274
    process_event_queue;
    my $now = time();
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
    my ($deadline,$node);

    # Check for nodes that have passed their timeout
    if (!qhead($deadline,$node)) {
	#if (($now % 10) == 0) {
	#    print "Time is $now, deadline is $deadline for $node\n";
	#}
	while ($now >= $deadline && $node ne "") {
	    qpop($deadline,$node);
	    $notified = $nodes{$node}{notified};
	    if (!$notified) {
		handleCtrlEvent($node,$TBTIMEOUT);
		$nodes{$node}{notified} = 1;
	    } else {
		notify("$node: Timed out at $now (d=$deadline), ".
		       "but notified already!\n");
	    }
	    if (0) { qshow(); }
	    if (qhead($deadline,$node)) {
		$deadline=0; $node="";
	    }
296
	}
297 298
    } else {
	$deadline=0;
299
    }
300
    $nextdeadline = $deadline;
301

302 303 304 305 306
    if (qsize()==0) {
	$blockwait=1;
	debug("---Blocking wait okay---\n");
    }
		
Mac Newbold's avatar
Mac Newbold committed
307 308 309 310
    if ($do_reload || ($now - $last_reload > $reload_time)) {
	reload();
	$do_reload = 0;
    }
Mac Newbold's avatar
Mac Newbold committed
311

Mac Newbold's avatar
Mac Newbold committed
312 313
    # Send any messages in the queue if it is time
    notify("",1);
Mac Newbold's avatar
Mac Newbold committed
314

315
    #sleep(1);
Robert Ricci's avatar
Robert Ricci committed
316 317
}

Mac Newbold's avatar
Mac Newbold committed
318 319
exit(0);

Robert Ricci's avatar
Robert Ricci committed
320
# Read the current states of nodes from the database
321
sub readStates(;@) {
Mac Newbold's avatar
Mac Newbold committed
322 323 324 325 326 327
    my %oldnodes = @_;

    # Guard against undefined variable warnings
    if (! defined(%oldnodes)) {
	%oldnodes = ();
    }
328

Mac Newbold's avatar
Mac Newbold committed
329 330 331
    #debug("readStates called\n");
    my $result = DBQueryFatal("SELECT node_id, eventstate, " .
			      "state_timestamp, op_mode, " .
332 333
			      "op_mode_timestamp FROM nodes ".
			      "where node_id not like 'sh%'");
Mac Newbold's avatar
Mac Newbold committed
334 335 336 337

    my %nodes;
    while (my ($node_id, $state, $timestamp, $mode, $mode_timestamp)
	   = $result->fetchrow()) {
338
	#
Mac Newbold's avatar
Mac Newbold committed
339 340 341 342
	# If there's an entry in oldnodes for this node, and it
	# hasn't changed state or time, use the old entry (so that
	# we don't lose information about which nodes we've already
	# notified the ops about, etc.)
343
	#
Mac Newbold's avatar
Mac Newbold committed
344 345 346 347 348
	if ($oldnodes{$node_id} && $state && $timestamp &&
	    ($oldnodes{$node_id}{state} eq $state) &&
	    ($oldnodes{$node_id}{mode} eq $mode) &&
	    ($oldnodes{$node_id}{timestamp} == $timestamp)) {
	    $nodes{$node_id} = $oldnodes{$node_id};
349
	} else {
Mac Newbold's avatar
Mac Newbold committed
350 351 352 353
	    $nodes{$node_id}{state}          = $state;
	    $nodes{$node_id}{timestamp}      = $timestamp;
	    $nodes{$node_id}{mode}           = $mode;
	    $nodes{$node_id}{mode_timestamp} = $mode_timestamp;
354 355
	    # Is there a timeout? If so, set it up!
	    setTimeout($mode,$state,$node_id,$timestamp);
356
	}
Mac Newbold's avatar
Mac Newbold committed
357 358
    }
    return %nodes;
Robert Ricci's avatar
Robert Ricci committed
359 360 361 362 363 364
}

#
# Read timeouts for various states from the database
#
sub getTimeouts() {
Mac Newbold's avatar
Mac Newbold committed
365 366 367
    #debug("getTimeouts called\n");
    my $result = DBQueryFatal("SELECT op_mode, state, timeout, action " .
			      "FROM state_timeouts");
Robert Ricci's avatar
Robert Ricci committed
368

Mac Newbold's avatar
Mac Newbold committed
369 370 371 372 373
    my %timeouts;
    while (my ($op_mode, $state, $timeout, $action) = $result->fetchrow()) {
	$timeouts{$op_mode}{$state} = [ $timeout, $action ];
    }
    return %timeouts;
Robert Ricci's avatar
Robert Ricci committed
374 375 376 377 378 379
}

#
# Read the list of valid state transitions from the database
#
sub getValid() {
Mac Newbold's avatar
Mac Newbold committed
380 381 382
    #debug("getValid called\n");
    my $result = DBQueryFatal("SELECT op_mode, state1, state2 " .
			      "FROM state_transitions");
Robert Ricci's avatar
Robert Ricci committed
383

Mac Newbold's avatar
Mac Newbold committed
384 385 386 387 388
    my %valid;
    while (my ($mode,$state1, $state2) = $result->fetchrow()) {
	$valid{$mode}{$state1}{$state2} = 1;
    }
    return %valid;
Robert Ricci's avatar
Robert Ricci committed
389 390
}

391 392 393 394
#
# Read the list of valid mode transitions from the database
#
sub getModeTrans() {
Mac Newbold's avatar
Mac Newbold committed
395 396 397 398 399 400 401 402 403 404 405 406 407
    #debug("getModeTrans called\n");
    my $result = 
      DBQueryFatal("SELECT op_mode1, state1, op_mode2, state2 " .
		   "FROM mode_transitions order by op_mode1,state1");

    my %modeTrans;
    while (my ($mode1,$state1, $mode2, $state2) = $result->fetchrow()) {
	if (!defined($modeTrans{"$mode1:$state1"})) {
	    $modeTrans{"$mode1:$state1"}= ["$mode2:$state2"];
	} else {
	    my @l = @{$modeTrans{"$mode1:$state1"}};
	    push(@l, "$mode2:$state2");
	    $modeTrans{"$mode1:$state1"}= \@l;
408
	}
Mac Newbold's avatar
Mac Newbold committed
409 410
    }
    return %modeTrans;
411 412 413 414 415 416
}

#
# Read the list of states which trigger an action
#
sub getTriggers() {
417 418 419 420 421
    debug("getTriggers called\n");
    
    debug("anymode ==> '$TBANYMODE'\n");

    # Grab global triggers
Mac Newbold's avatar
Mac Newbold committed
422 423
    my $result = 
      DBQueryFatal("SELECT op_mode, state, trigger " .
424 425
		   "FROM state_triggers where node_id='$TBANYMODE' ".
		   "order by op_mode,state");
Mac Newbold's avatar
Mac Newbold committed
426
    my %t;
427
    while (my ($mode, $state, $trig) = $result->fetchrow()) {
Mac Newbold's avatar
Mac Newbold committed
428
	$t{"$mode:$state"} = $trig;
429 430 431 432 433 434 435 436 437 438 439 440
	debug("trig($mode:$state)\t => $trig\n");
    }

    # Grab per-node triggers
    $result = 
      DBQueryFatal("SELECT node_id, op_mode, state, trigger " .
		   "FROM state_triggers where node_id!='$TBANYMODE' ".
		   "order by op_mode,state");
    while (my ($n, $mode, $state, $trig) = $result->fetchrow()) {
	my @trigs = split(/\s*,\s*/,$trig);
	$t{"$n:$mode:$state"} = \@trigs;
	debug("trig($n:$mode:$state)\t => ".join(',',@trigs)."\n");
Mac Newbold's avatar
Mac Newbold committed
441
    }
442

Mac Newbold's avatar
Mac Newbold committed
443
    return %t;
444 445
}

Robert Ricci's avatar
Robert Ricci committed
446 447 448 449
#
# Gets called for every event that we recieve
#
sub handleEvent($$$) {
Mac Newbold's avatar
Mac Newbold committed
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
    my ($handle,$notification,$data) = @_;
    my $objtype = event_notification_get_objtype($handle,$notification);
    my $objname = event_notification_get_objname($handle,$notification);
    my $eventtype = event_notification_get_eventtype($handle,$notification);

    $event_count++;
    debug("Got an event: ($objtype,$objname,$eventtype)\n");

    #
    # Check to see if another instance is supposed to be handling this node
    #
    if (!checkDBRedirect($objname)) {
	info("Got an event for node $objname, which isn't mine\n");
	return;
    }
465

Mac Newbold's avatar
Mac Newbold committed
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
  SWITCH: for ($objtype) {
	
	(/$TBNODESTATE/) && do {
	    stateTransition($objname,$eventtype);
	    last;
	};
	(/$TBNODEOPMODE/) && do {
	    opModeTransition($objname,$eventtype);
	    notify("Use of deprecated event TBNODEOPMODE:\n".
		   "$objname->$eventtype\n");
	    last;
	};
	(/$TBCONTROL/) && do {
	    handleCtrlEvent($objname,$eventtype);
	    last;
	};
482

Mac Newbold's avatar
Mac Newbold committed
483
    }
484 485 486 487 488

}

sub stateTransition($$) {

489
    my ($node,$newstate) = @_;
Robert Ricci's avatar
Robert Ricci committed
490

491 492 493 494 495 496 497 498
    # Check for invalid transitions
    my ($oldstate, $mode);
    if ($nodes{$node}) {
	$oldstate = $nodes{$node}{state};
	$mode = $nodes{$node}{mode};
    } else {
	# Try reloading the cache once before we give up on this node
	reload();
499
	if ($nodes{$node}) {
500 501
	    $oldstate = $nodes{$node}{state};
	    $mode = $nodes{$node}{mode};
Robert Ricci's avatar
Robert Ricci committed
502
	} else {
503
	    notify("Got an event for a node ($node) I don't know about\n");
Robert Ricci's avatar
Robert Ricci committed
504
	}
505 506 507 508
    }
    if ($oldstate && $mode && $valid{$mode} && $valid{$mode}{$oldstate} &&
	!$valid{$mode}{$oldstate}{$newstate}) {
	notify("Invalid transition for node $node from $mode/$oldstate " .
509
	       "to $newstate\n");
510
    }
Robert Ricci's avatar
Robert Ricci committed
511

512 513 514 515
    my $now = time();
    $nodes{$node}{state}     = $newstate;
    $nodes{$node}{timestamp} = $now;
    $nodes{$node}{notified}  = 0;
516

517 518 519
    info("$node: $mode/$oldstate => $mode/$newstate\n");
    DBQueryFatal("UPDATE nodes SET eventstate='$newstate', " .
		 "state_timestamp='$now' WHERE node_id='$node'");
520

521 522 523
    # Check if this state has a timeout, and if so, put it in the queue
    setTimeout($mode,$newstate,$node,$now);

524 525
    # Check if this is TBDB_NODESTATE_BOOTING , which has actions
    if ($newstate eq TBDB_NODESTATE_BOOTING) {
526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
	# If I skipped shutdown, and came to booting directly from isup,
	# check for a mode transition so I don't miss one...
	if ($oldstate eq TBDB_NODESTATE_ISUP) {
	    info("$node: Came from ISUP! Checking for mode transition\n");
	    my $r = DBQueryWarn("select next_op_mode from nodes ".
				"where node_id='$node'");
	    my ($nextmode) = $r->fetchrow();
	    if ($nextmode) {
		# Force the transition even though it is illegal
		info("$node: Forcing mode transition!\n");
		opModeTransition($node,$nextmode,1);
		$mode=$nextmode;
	    } else {
		debug("No next mode.\n");
	    }
	}

	# Check if I'm in the right mode
544
	my $osid = TBBootWhat($node,$debug);
545 546
	my $os_op_mode = os_opmode($osid);
	info("$node: Current OS is '$osid', OS mode is '$os_op_mode'\n");
547
	DBQueryFatal("UPDATE nodes SET osid='$osid' WHERE node_id='$node'");
548
	if ($os_op_mode ne $mode) {
549 550
	    my $str = "Node $node is running OS '$osid' but in mode '$mode' ".
	      "instead of mode '$os_op_mode'!\n";
551 552 553
	    # For now, only force if we're going into reload mode, so we
	    # don't get stuck looping in reloading.
	    if ($os_op_mode eq "RELOAD") {
554 555 556 557 558 559 560 561 562
		DBQueryFatal("UPDATE nodes SET op_mode='$os_op_mode', ".
			     "op_mode_timestamp=unix_timestamp(now()) ".
			     "WHERE node_id='$node'");
		$nodes{$node}{mode} = $os_op_mode;
		$nodes{$node}{mode_timestamp} = $now;
		$str .= "Forced op_mode to '$os_op_mode'.\n";
	    }
	    notify($str);
	}
563 564
	checkGenISUP($node);
    }
565

566 567 568 569 570
    # Check if this state has any triggers
    my @nodetrigs = GetNodeTriggerList($node,$mode,$newstate);
    if (defined($triggers{"$mode:$newstate"}) ||
        (@nodetrigs > 0) ) {
	# check for global triggers
571
	my @trigs = split(/\s*,\s*/,$triggers{"$mode:$newstate"});
572 573 574 575
	# Run all the triggers
	debug("Running triggers. Global=".join("/",@trigs).
	      "   node=".join("/",@nodetrigs)."\n");
	foreach ( @trigs , @nodetrigs) {
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
	    my $trig = $_;
	    /^$TBRESET$/ && do {
		# Check if we really need to do a reset
		my $r = DBQueryWarn("select osid,def_boot_osid from nodes ".
				    "where node_id='$node'");
		my ($osid,$defosid) = $r->fetchrow();
		if ($osid ne $defosid) {
		    handleCtrlEvent($node,$trig);
		}
		next;
	    };
	    /^$TBRELOADDONE$/ && do {
		handleCtrlEvent($node,$trig);
		next;
	    };
591 592 593 594
	    /^$TBFREENODE$/ && do {
		handleCtrlEvent($node,$trig);
		next;
	    };
595 596 597 598 599 600 601 602
	    /^$TBISUP$/ && do {
		info("$node: Triggered $TBISUP\n");
		EventSendWarn(host      => $BOSSNODE ,
			      objtype   => TBDB_TBEVENT_NODESTATE ,
			      eventtype => TBDB_NODESTATE_ISUP ,
			      objname   => $node);
		next;
	    };
603
	    notify("Unknown trigger '$trig' for $node in $mode/$newstate!\n");
604
	}
605 606 607
	# Clear any of the node triggers that we ran
	debug("Clearing node triggers: ".join("/",@nodetrigs)."\n");
	ClearNodeTrigger($node,$mode,$newstate,@nodetrigs);
608
    }
609

610 611 612 613 614 615 616 617
    # Check if this state can trigger a mode transition
    if (defined($modeTrans{"$mode:$newstate"})) {
	info("$node: Checking for mode transition\n");
	my $r = DBQueryWarn("select next_op_mode from nodes ".
			    "where node_id='$node'");
	my ($nextmode) = $r->fetchrow();
	if ($nextmode) {
	    opModeTransition($node,$nextmode);
Mac Newbold's avatar
Mac Newbold committed
618 619 620
	} else {
	    debug("No next mode.\n");
	}
621 622
    }
}
623

624
sub opModeTransition($$;$) {
Mac Newbold's avatar
Mac Newbold committed
625

626 627
    my ($node,$newmode,$force) = @_;
    if (!defined($force)) { $force = 0; }
Mac Newbold's avatar
Mac Newbold committed
628

629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
    info("$node: Mode change to $newmode requested\n");
    # Check for invalid transitions
    my ($oldstate, $mode, $nextstate);
    if ($nodes{$node}) {
	$oldstate = $nodes{$node}{state};
	$mode = $nodes{$node}{mode};
    } else {
	# Try reloading the cache once before we give up on this node
	reload();
	if ($nodes{$node}) {
	    $oldstate = $nodes{$node}{state};
	    $mode = $nodes{$node}{mode};
	} else {
	    notify("Got an event for a node ($node) I don't know about\n");
	}
    }
645
    if (defined($modeTrans{"$mode:$oldstate"}) || $force) {
646
	if (!$force) {
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
	    debug("Mode Transition check:\n");
	    my $translist = join(",",@{$modeTrans{"$mode:$oldstate"}});
	    #debug("translist=$translist\n");
	    #debug("splitlist=".join(", ",split(/[:,]/,$translist))."\n");
	    my %trans = split(/[:,]/,$translist);
	    debug("Valid transitions from $mode/$oldstate are:\n");
	    foreach my $k (sort keys %trans) {
		debug("$k => $trans{$k}\n");
	    }
	    if (defined($trans{$newmode})) {
		$nextstate=$trans{$newmode};
	    } else {
		notify("Invalid mode transition for $node from ".
		       "$mode/$oldstate to $newmode!\n");
	    }
662 663
	}
    } else {
664
	notify("Invalid mode transition for $node from $mode/$oldstate: ".
665 666
	       "Not a valid mode transition state!\n");
    }
Mac Newbold's avatar
Mac Newbold committed
667 668 669 670
    if (!$nextstate) {
	$nextstate=$oldstate;
    }
    
671 672 673 674 675 676
    my $now = time();
    $nodes{$node}{state}     = $nextstate;
    $nodes{$node}{timestamp} = $now;
    $nodes{$node}{mode}           = $newmode;
    $nodes{$node}{mode_timestamp} = $now;
    $nodes{$node}{notified}       = 0;
Mac Newbold's avatar
Mac Newbold committed
677
    
678 679 680 681 682 683 684 685 686
    info("$node: $mode/$oldstate => $newmode/$nextstate\n");
    DBQueryFatal("UPDATE nodes SET eventstate='$nextstate', ".
		 "next_op_mode='', op_mode='$newmode', ".
		 "state_timestamp='$now', ".
		 "op_mode_timestamp='$now' WHERE node_id='$node'");
}

sub handleCtrlEvent($$) {
    my ($node,$event) = @_;
Mac Newbold's avatar
Mac Newbold committed
687
    
688
    info("CtrlEvent: $node, $event\n");
Mac Newbold's avatar
Mac Newbold committed
689
    
690 691
    foreach ($event) {
	/^$TBRESET$/ && do {
Mac Newbold's avatar
Mac Newbold committed
692 693 694
	    my $result = DBQueryFatal("SELECT pxe_boot_path, def_boot_osid ".
				      "FROM nodes where node_id='$node'");
	    my ($pxepath,$osid) = $result->fetchrow();
Mac Newbold's avatar
Mac Newbold committed
695
	    
696 697 698 699
	    # Important note on ordering here:
	    # Because setting a normal osid resets pxe path to PXEBOOT,
	    # We need to read it out first, then set the osid, then set
	    # the pxepath back to its original value at the end.
Mac Newbold's avatar
Mac Newbold committed
700
	    
701 702
	    $cmd = "$osselect $osid $node";
	    system($cmd) and
Mac Newbold's avatar
Mac Newbold committed
703 704 705
	      notify("$node/$event: Couldn't clear next_boot_*\n".
		     "\tcmd=$cmd\n\t*** $!\n");
	    
706
	    $pxepath = "-p ".$pxepath;
Mac Newbold's avatar
Mac Newbold committed
707 708 709 710
	    if ($pxepath eq "-p ") {
		$pxepath="PXEBOOT";
	    }
	    ;
Mac Newbold's avatar
Mac Newbold committed
711
	    my $cmd = "$osselect -m $pxepath $node";
712
	    system($cmd) and
Mac Newbold's avatar
Mac Newbold committed
713 714 715
	      notify("$node/$event: Couldn't clear next_pxe_boot_path\n".
		     "\tcmd=$cmd\n\t*** $!\n");
	    
Mac Newbold's avatar
Mac Newbold committed
716
	    info("Performed RESET for $node to $osid/$pxepath\n");
717 718 719 720 721 722 723 724 725 726
	    next;
	};
	/^$TBRELOADDONE$/ && do {
	    info("Clearing reload info for $node\n");
	    DBQueryFatal("delete from current_reloads where node_id='$node'");
	    my ($pid,$eid);
	    NodeidToExp($node,\$pid,\$eid);
	    if (($pid eq NODERELOADING_PID) && ($eid eq NODERELOADING_EID)) {
		DBQueryFatal("delete from scheduled_reloads ".
			     "where node_id='$node'");
727 728 729 730
		AddNodeTrigger($node, $TBANYMODE, TBDB_NODESTATE_ISUP,
			       $TBFREENODE)
		  && notify("$node: Couldn't add trigger $TBFREENODE!\n");
		info("Set up freeing of $node from $pid/$eid\n");
731 732 733
	    }
	    next;
	};
734 735 736 737 738 739 740 741
	/^$TBFREENODE$/ && do {
	    # Don't need pid/eid, but we should put it in the log
	    my ($pid,$eid);
	    NodeidToExp($node,\$pid,\$eid);
	    DBQueryFatal("delete from reserved where node_id='$node'");
	    info("Released $node from $pid/$eid\n");
	    next;
	};
742 743 744 745 746 747 748 749 750 751
	/^$TBTIMEOUT$/ && do {
	    my $state = $nodes{$node}{state};
	    my $mode = $nodes{$node}{mode};
	    my ($timeout,$action);
	    if ($mode && $state && $timeouts{$mode} &&
		$timeouts{$mode}{$state}) {
		($timeout, $action) = @{$timeouts{$mode}{$state}};
	    }
	    notify("Node $node has timed out in state $mode/$state".
		   ($action ne "" ? "\n\tRequested action $action." : "").
752
		   "\n");
753 754
	    next;
	};
755
	notify("$node: Unknown CtrlEvent: $event\n");
756 757
    }
}
Robert Ricci's avatar
Robert Ricci committed
758

759 760 761 762
#
# Check if we need to generate an ISUP
#
sub checkGenISUP($) {
Mac Newbold's avatar
Mac Newbold committed
763 764 765 766 767 768 769 770 771 772
    my ($node) = @_;
    debug("$node: Checking ISUP Generation\n");
    my $r = DBQueryWarn("select osfeatures from nodes as n ".
			"left join os_info as o on o.osid=n.osid ".
			"where node_id='$node' and osfeatures is not null");
    my $osfeatures="";
    # If we don't get anything back, assume it has no features.
    if ($r->num_rows() > 0) {
	($osfeatures) = $r->fetchrow();
    }
Mac Newbold's avatar
Mac Newbold committed
773

Mac Newbold's avatar
Mac Newbold committed
774 775 776 777 778 779
    my @features = split(",",$osfeatures);
    # Make sure features I care about are defined
    my %can=("ping"=>0, "isup"=>0);
    foreach my $f (@features) {
	$can{"\L$f"}=1;	# make sure it's all lowercase
    }
Mac Newbold's avatar
Mac Newbold committed
780

Mac Newbold's avatar
Mac Newbold committed
781 782 783 784 785
    # If os will send ISUP on its own, do nothing here.
    if ($can{"isup"}) {
	debug("$node: Will send own ISUP\n"); 
	return 0;
    }
Mac Newbold's avatar
Mac Newbold committed
786

Mac Newbold's avatar
Mac Newbold committed
787 788 789 790 791 792
    # If os doesn't support isup but can ping, fork and ping it every
    # few seconds and send isup when it pings, or timeout after too long.
    if ($can{"ping"}) {
	debug("$node: Needs to be pinged - calling eventping\n");
	system("$TB/sbin/eventping $node &");
	return 0;
793
    }
Mac Newbold's avatar
Mac Newbold committed
794

Mac Newbold's avatar
Mac Newbold committed
795 796 797 798 799 800 801 802
    # If os doesn't support ping or isup, stated sends ISUP just after 
    # the node gets to BOOTING (a bit early, but the best we can do)

    debug("$node: OS doesn't ping - sending ISUP\n");
    EventSendWarn(host      => $BOSSNODE ,
		  objtype   => TBDB_TBEVENT_NODESTATE ,
		  eventtype => TBDB_NODESTATE_ISUP ,
		  objname   => $node);
Robert Ricci's avatar
Robert Ricci committed
803 804
}

805 806 807 808
# Figure out if this node belongs to us (ie. if it's using our database.)
# Returns 1 if it does, 0 if not
sub checkDBRedirect($) {

Mac Newbold's avatar
Mac Newbold committed
809
    my ($node) = @_;
810

Mac Newbold's avatar
Mac Newbold committed
811 812 813 814 815 816 817 818
    # XXX: I don't want to do this every time, for performance reaons,
    # but we need to make sure that we don't get into an inconsistent 
    # state
    my $result=DBQueryFatal("SELECT testdb FROM nodes as n " .
			    "LEFT JOIN reserved as r ON n.node_id=r.node_id ".
			    "LEFT JOIN experiments as e ON r.pid = e.pid " .
			    "AND r.eid = e.eid " .
			    "WHERE n.node_id = '$node'");
819

Mac Newbold's avatar
Mac Newbold committed
820 821 822 823
    if (!$result->num_rows()) {
	notify("Got an event for a node ($node) I don't know about\n");
	return 0;
    }
824

Mac Newbold's avatar
Mac Newbold committed
825
    my ($testdb) = $result->fetchrow();
826

Mac Newbold's avatar
Mac Newbold committed
827 828 829
    # XXX: It's hokey to hardcode tbdb here, but....

    #debug("checkDBRedirect: $node => $testdb (I'm $TBDBNAME)\n");
830
    if ((!$testdb && ($TBDBNAME eq $REALTBDBNAME)) ||
Mac Newbold's avatar
Mac Newbold committed
831 832 833 834 835
	($testdb && ($testdb eq $TBDBNAME))) {
	return 1;
    } else {
	return 0;
    }
836 837
}

838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
# Check if this state has a timeout, and if so, put it in the queue
sub setTimeout( $$$$ ) {
    my ($mode,$state,$node,$now) = @_;
    if (0) { print "Original: ($mode,$state,$node,$now)\n"; qshow(); }
    if (defined(qfind($node))) { qdelete($node); }
    if (0) { print "Deleted:\n"; qshow(); }
    if (defined($mode) && defined($state) &&
	defined($timeouts{$mode}) &&
	defined($timeouts{$mode}{$state})) {
	my $deadline = ${$timeouts{$mode}{$state}}[0];
        if (defined($deadline) &&
	    $deadline != $TBNOTIMEOUT) {
	    my $TO = $deadline + $now;
	    debug("Setting timeout for ($node,$mode,$state) at ".
		  "$deadline + $now ($TO)\n");
	    qinsert($TO,$node);
	    if (0) { qshow(); }
	}
    }
    if (0) { print "Done:\n"; qshow(); }
}

Robert Ricci's avatar
Robert Ricci committed
860 861
# Reload state from the database
sub reload() {
Mac Newbold's avatar
Mac Newbold committed
862 863 864 865 866 867
    debug("Reloading state from database\n");
    $last_reload = time();
    %timeouts  = getTimeouts();
    %valid     = getValid();
    %modeTrans = getModeTrans();
    %triggers  = getTriggers();
868
    %nodes     = readStates(%nodes);
Robert Ricci's avatar
Robert Ricci committed
869 870
}

871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
#
# Some functions for node triggers
#

# $rv   = AddNodeTrigger($node, $mode, $state, @triglist);
sub AddNodeTrigger( $$$@ ) {
    my ($node, $mode, $state, @trigs) = @_;
    if (@trigs == 0) { return 1; }
    if (defined($triggers{"$node:$mode:$state"})) {
	my %t = ();
	foreach $k (@{$triggers{"$node:$mode:$state"}}) { $t{$k} = 1; }
	my @newtrigs = ();
	foreach $k (@trigs) { if (!defined($t{$k})) { push(@newtrigs,$k); } }
	push(@{$triggers{"$node:$mode:$state"}},@newtrigs);
    } else {
	$triggers{"$node:$mode:$state"} = \@trigs;
    }
    my $triglist = join(",",@{$triggers{"$node:$mode:$state"}});
    DBQueryFatal("replace into state_triggers ".
		 "(node_id,op_mode,state,trigger) values ".
		 "('$node','$mode','$state','$triglist')");
    return 0;
}

# @list = GetNodeTriggerList($node, $mode, $state);
sub GetNodeTriggerList( $$$ ) {
    my ($node, $mode, $state) = @_;
    my @l = ();
    if (defined($triggers{"$node:$mode:$state"})) {
	push(@l,@{$triggers{"$node:$mode:$state"}});
    }
    if (defined($triggers{"$node:$TBANYMODE:$state"})) {
	push(@l,@{$triggers{"$node:$TBANYMODE:$state"}});
    }
    return @l;
}

# $rv   = ClearNodeTrigger($node, $mode, $state, @triglist);
# Note: When not clearing all triggers, ordering is not preserved!
sub ClearNodeTrigger( $$$ ; @ ) {
    my ($node, $mode, $state, @trigs) = @_;
    # We have to keep any triggers that aren't on the list, but the
    # most common case will be that the list they give us is the whole
    # list anyway. So treat that case special.
    my @reallist = GetNodeTriggerList($node,$mode,$state);
    # empty list means clear all...
    if ((@trigs==0) || join(",",sort @reallist) eq join(",",sort @trigs)) {
	# Same list... just nuke the entry
	debug("Clearing all triggers for $node...\n");
	delete($triggers{"$node:$mode:$state"});
	delete($triggers{"$node:$TBANYMODE:$state"});
	DBQueryFatal("delete from state_triggers ".
		     "where node_id='$node' and state='$state' and ".
		     "(op_mode='$mode' or op_mode='$TBANYMODE')");
    } else {
	# Subtract @trigs from @reallist
	my %temptrigs = ();
	foreach $k (@reallist) { $temptrigs{$k} = 1; }
	debug("Reallist = ".join("/",@reallist).", trigs=".
	      join("/",@trigs).".\n");
	foreach $t (@trigs) {
	    if (defined($temptrigs{$t})) {
		delete($temptrigs{$t});
		debug("Clearing $t\n");
	    }
	}
	# Note: This doesn't quite do the right thing with triggers
	# for a fixed mode vs TBANYMODE. So if you start using this
	# code, make sure and debug it first!
	my @newtrigs = keys %temptrigs;
	debug("Newlist = ".join("/",@newtrigs).".\n");
	delete($triggers{"$node:$mode:$state"});
	if (@newtrigs > 0) {
	    AddNodeTrigger($node,$mode,$state,@newtrigs);
	} 
    }

    return 0;
}

951 952
sub os_opmode() {
    my $osid = shift || "";
Mac Newbold's avatar
Mac Newbold committed
953 954 955
    if ($osid eq $TB_OSID_MBKERNEL) {
	return "MINIMAL";
    }
956 957
    my $cmd = "select op_mode from os_info where osid='$osid';";
    my $q = DBQueryFatal($cmd);
Mac Newbold's avatar
Mac Newbold committed
958 959 960
    if ($q->numrows() < 1) {
	return "";
    }
961 962 963
    my @r = $q->fetchrow_array();
    my $opmode=$r[0];
    debug("OpMode for '$osid' is '$opmode'\n");
Mac Newbold's avatar
Mac Newbold committed
964 965 966
    if (defined($opmode) && $opmode ne "") {
	return $opmode;
    }
967 968
    return "";
}
Mac Newbold's avatar
Mac Newbold committed
969

970 971 972 973
#
# Functions for controlling output/logging, and signal handling
#

Robert Ricci's avatar
Robert Ricci committed
974
sub debug(@) {
Mac Newbold's avatar
Mac Newbold committed
975 976 977
    if ($debug) {
	print @_;
    }
Mac Newbold's avatar
Mac Newbold committed
978 979 980
}

sub fatal($) {
Mac Newbold's avatar
Mac Newbold committed
981 982 983
    my $msg = shift;
    notify($msg);
    die($msg);
Robert Ricci's avatar
Robert Ricci committed
984 985
}

Mac Newbold's avatar
Mac Newbold committed
986
sub showqueue() {
Mac Newbold's avatar
Mac Newbold committed
987 988 989 990 991 992 993 994 995 996
    if ($debug < 2) {
	return;
    }
    if ((keys %msgs) > 0) {
	debug("\nMAILQUEUE:\n");
    }
    foreach $k (sort keys %msgs) {
	my @l = @{$msgs{$k}};
	debug("MSGS:\n$k==> (".(@l+0).",'".join("','",@l)."')\n");
    }
Mac Newbold's avatar
Mac Newbold committed
997 998 999
}

sub notify($;$) {
Mac Newbold's avatar
Mac Newbold committed
1000 1001 1002 1003
    my $message = shift;
    my $checkonly = shift || 0;
    # Use a timestamp, now that we're throttling mail
    my $tstamp=strftime("%b %e %H:%M:%S",localtime);
Mac Newbold's avatar
Mac Newbold committed
1004
    showqueue();
Mac Newbold's avatar
Mac Newbold committed
1005 1006
    if (!$checkonly) {
	info($message);
1007
	$mailqueue++;
Mac Newbold's avatar
Mac Newbold committed
1008 1009 1010 1011
	# Queue up the message
	# (The queue is a hash of lists of timestamps, keyed by message
	if (defined($msgs{$message})) {
	    push(@{$msgs{$message}},$tstamp);
Mac Newbold's avatar
Mac Newbold committed
1012
	} else {
Mac Newbold's avatar
Mac Newbold committed
1013
	    $msgs{$message} = [$tstamp];
Mac Newbold's avatar
Mac Newbold committed
1014
	}
Mac Newbold's avatar
Mac Newbold committed
1015
	showqueue();
Mac Newbold's avatar
Mac Newbold committed
1016
    }
Mac Newbold's avatar
Mac Newbold committed
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
    my $now = time;
    if ($now - $lastmail >= $mailgap) {
	if ((keys %msgs)>0) {
	    debug("SENDING MAILQUEUE\n"."(now $now, lastmail $lastmail, ".
		  ($now-$lastmail).">=$mailgap)\n");
	    my $mailbody="";
	    my $sep = '-'x5;
	    # We're okay to send. Make a digest of all the queued messages.
	    foreach my $msg (sort keys %msgs) {
		my @tlist = @{$msgs{$msg}};
		my $count = 0+@tlist;
		$mailbody .= "\n$msg\n";
		if ($count > 1) {
		    my $first = shift @tlist;
		    my $last = pop @tlist;
		    $mailbody .= "($count copies from $first to $last)\n";
		} else {
		    $mailbody .= "($count copy at $tlist[0])\n";
		}
		$mailbody .= "$sep\n";
	    }
	    # Now reset the mail queue
	    %msgs = ();
1040
	    $mailqueue=0;
Mac Newbold's avatar
Mac Newbold committed
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
	    showqueue();
	    $lastmail = time;
	    if (!$debug) {
		SENDMAIL("Stated List <".$TBOPS.">",
			 "Stated Messsage",$mailbody,
			 "Stated Daemon <".$TBOPS.">");
	    } else {
		debug("notify: Not sending mail in debug mode\n");
		debug("MAIL CONTAINS:\n".$mailbody."\n");
	    }
	}
    } # else do nothing, not time yet
1053 1054
}

1055
sub announce($) {
Mac Newbold's avatar
Mac Newbold committed
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067
    my $message = shift;
    my $tstamp=strftime("%b %e %H:%M:%S",localtime);
    notify("ANNOUCEMENT: ".$message."\n\n(Sent to $REALTBOPS)\n");
    $mailbody = "\n$message\n\n$tstamp\n";
    if (!$debug) {
	SENDMAIL($REALTBOPS,
		 "Stated Messsage",$mailbody,
		 "Stated Daemon <".$TBOPS.">");
    } else {
	debug("announce: Not sending mail in debug mode\n");
	debug("MAIL CONTAINS:\n".$mailbody."\n");
    }
1068 1069
}

1070
sub info($;$) {
1071
    my $message = shift;
1072
    my $notice = shift || 0;
Mac Newbold's avatar
Mac Newbold committed
1073 1074
    # Use syslog
    my $prio="info";
Mac Newbold's avatar
Mac Newbold committed
1075 1076 1077
    if ($notice) {
	$prio = "notice";
    }
Mac Newbold's avatar
Mac Newbold committed
1078
    if ($debug) {
Mac Newbold's avatar
Mac Newbold committed
1079 1080 1081 1082
	# Print out log entries like this:
	# Sep 20 09:36:00 stated[238]: Reloading state from database
	print strftime("%b %e %H:%M:%S",localtime)." stated[$$]: $message";
	$message = "DEBUG: ".$message;
1083
    }
Mac Newbold's avatar
Mac Newbold committed
1084
    syslog($prio,$message) || notify("syslog failed: $? $!\n");
1085 1086 1087 1088
}

# This gets called if we catch a signal USR1
sub restart {
Mac Newbold's avatar
Mac Newbold committed
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117
    info("SIGUSER1 received: Performing final event poll before restarting\n");
    process_event_queue;
    my $params = join(" ",@args);
    my $prog = "";
    # If we're started from an abosolute path, use that.
    if ($0 =~ /^\//) {
	$prog = $0;
    } else {
	$prog = "$TB/sbin/stated";
    }
    info("Restarting from '$prog".($params ne "" ? " $params" : "")."'\n");
    if ($handle && event_unregister($handle) == 0) {
	warn "Unable to unregister with event system\n";
    }
    if (defined($lockfile) && $lockfile ne "") {
	unlink $lockfile;
    }
    if (!defined(sigprocmask(SIG_UNBLOCK, POSIX::SigSet->new(SIGUSR1,SIGHUP)))) {
	notify("sigprocmask: sig unblock failed! $?, $!\n");
	die("\n");
    }
    announce("Stated restarted\n");
    exec("$prog $params") or 
      do {
	  my $msg = "Couldn't restart stated! cmd='$prog $params'\n".
	    "Error: ($?) $!\n";
	  announce($msg);
	  die($msg);
      };
Robert Ricci's avatar
Robert Ricci committed
1118 1119
}

1120 1121
# This gets called if we catch a signal (TERM, etc.)
sub cleanup {
Mac Newbold's avatar
Mac Newbold committed
1122 1123 1124
    notify("Signal received, exiting\n");
    # now do the normal exit stuff in END {}
    exit(0);
1125 1126
}

Robert Ricci's avatar
Robert Ricci committed
1127 1128
# This gets called if we die of 'natural causes' (exit, die, etc.)
END {
1129
    debug("Ending stated...\n");
Mac Newbold's avatar
Mac Newbold committed
1130 1131 1132 1133 1134 1135 1136 1137
    my $stat = $?;
    if (defined($lockfile) && $lockfile ne "") {
	unlink $lockfile;
	announce("Stated exiting, cleaning up\n");
    } else {
	# Must be a child
	info("Stated child exiting\n");
    }
1138
    debug("Annouced. Cleaning up...\n");
Mac Newbold's avatar
Mac Newbold committed
1139 1140 1141
    # clean up Syslog
    closelog();
    if ($handle) {
1142
	debug("Unregistering w/event system...\n");
Mac Newbold's avatar
Mac Newbold committed
1143 1144 1145
	if (event_unregister($handle) == 0) {
	    die "Unable to unregister with event system\n";
	}
1146
	debug("Unregistered.\n");
Mac Newbold's avatar
Mac Newbold committed
1147
    }
1148
    debug("Cleaned up. Bye!\n");
Mac Newbold's avatar
Mac Newbold committed
1149 1150
    # Restore $? in case one of the things I called changed it
    $? = $stat;
Robert Ricci's avatar
Robert Ricci committed
1151
}
1152