stated.in 33.3 KB
Newer Older
Robert Ricci's avatar
Robert Ricci committed
1
#!/usr/bin/perl -w
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5 6 7
# All rights reserved.
#

Robert Ricci's avatar
Robert Ricci committed
8 9 10
#
# stated - A daemon to monitor the states of nodes in the testbed. Recives
# state change notification through the event system, and writes the new
11 12
# state into the database. Also watches for invalid transitions, timeouts, 
# and performs other state-related control functions.
Robert Ricci's avatar
Robert Ricci committed
13 14 15 16
#
# Send it a HUP signal to get it to reload the timeout and transition
# information. Periodically reloads this information regardless, though.
#
17
# Will restart when sent SIGUSR1, by exec'ing its executable again.
Robert Ricci's avatar
Robert Ricci committed
18
#
19

Robert Ricci's avatar
Robert Ricci committed
20 21
# Configure variables
use lib '@prefix@/lib';
22
my $TB = "@prefix@";
23
my $REALTB = "/usr/testbed"; # So we know if we're the "real" stated or not
Robert Ricci's avatar
Robert Ricci committed
24
my $BOSSNODE = "@BOSSNODE@";
25
my $TBOPS = "@TBSTATEDEMAIL@";
26
my $REALTBOPS = "@TBOPSEMAIL@";
27
my $TBDBNAME = "@TBDBNAME@";
28
my $REALTBDBNAME = "tbdb"; # So we know if we're using the "real" db
29
my $osselect = "$TB/bin/os_select";
Robert Ricci's avatar
Robert Ricci committed
30 31 32 33 34 35

$| = 1;

use event;
use libdb;
use libtestbed;
36
use TimeoutQueue;
Robert Ricci's avatar
Robert Ricci committed
37
use Getopt::Std;
38
#use strict;
Robert Ricci's avatar
Robert Ricci committed
39
use English;
Mac Newbold's avatar
Mac Newbold committed
40 41
use POSIX;			# for strftime, and sigprocmask and friends
use Fcntl;			# file constants for pidfile
Mac Newbold's avatar
Mac Newbold committed
42 43 44 45 46
use Sys::Syslog;
# Important note about syslog: It defaults to using an inet socket,
# but 'syslogd -s' (the default) doesn't listen for one. So either
# run syslogd without -s, or use setlogsock('unix') before openlog.
# (To get setlocksock: 'use Sys::Syslog qw(:DEFAULT setlogsock);' )
Robert Ricci's avatar
Robert Ricci committed
47

Mac Newbold's avatar
Mac Newbold committed
48 49 50
# Do lots of db retries before we fail and die
$libdb::DBQUERY_MAXTRIES = 5;

Robert Ricci's avatar
Robert Ricci committed
51 52 53
# Number of iterations (roughly, seconds) after which we'll reload 
# information from the database. This is so we don't end up with information
# that's _too_ out of sync.
54
my $reload_time = 600;
55
my $last_reload = time;
Robert Ricci's avatar
Robert Ricci committed
56 57 58 59

# Process command-line arguments

sub usage {
Mac Newbold's avatar
Mac Newbold committed
60
    print << "END";
61 62 63
Usage: $0 [-h] [-d] [-s server] [-p port]
-h              This message
-d              Turn on debugging output, and don't go into the background
Robert Ricci's avatar
Robert Ricci committed
64 65
-s server       Use specified server, instead of this site's bossnode
-p port	        Use specified port
66
Send SIGHUP to reload database state, or SIGUSR1 to restart completely.
Robert Ricci's avatar
Robert Ricci committed
67 68 69
END
}

Mac Newbold's avatar
Mac Newbold committed
70
# Only root should run this - it won't work when run as a user...
71
# (Or, let an admin run it if it isn't the real one in /usr/testbed/ )
72
if ($UID && ( $TB eq $REALTB || ! TBAdmin($UID) ) ) {
Mac Newbold's avatar
Mac Newbold committed
73 74 75
    die("Only root can run this script!\n");
}

76
my @args = @ARGV;    # save a copy for restart before we mess with them.
Robert Ricci's avatar
Robert Ricci committed
77
my %opt = ();
78
getopts("ds:p:h",\%opt);
Robert Ricci's avatar
Robert Ricci committed
79

Mac Newbold's avatar
Mac Newbold committed
80 81 82 83 84 85
if ($opt{h}) {
    exit &usage;
}
if (@ARGV) {
    exit &usage;
}
Robert Ricci's avatar
Robert Ricci committed
86

87
my ($server,$port,$debug);
Mac Newbold's avatar
Mac Newbold committed
88 89 90 91 92 93 94 95 96 97 98 99 100
if ($opt{s}) {
    $server = $opt{s};
} else {
    $server = $BOSSNODE;
}
if ($opt{p}) {
    $port = $opt{p};
}
if ($opt{d}) {
    $debug = 1;
} else {
    $debug = 0;
}
Robert Ricci's avatar
Robert Ricci committed
101

102
# Grab some constants into variables
103
my $TBANYMODE    = TBDB_NODEOPMODE_ANY;
104 105 106 107 108 109 110
my $TBRESET      = TBDB_TBCONTROL_RESET;
my $TBRELOADDONE = TBDB_TBCONTROL_RELOADDONE;
my $TBTIMEOUT    = TBDB_TBCONTROL_TIMEOUT;
my $TBNOTIMEOUT  = TBDB_NO_STATE_TIMEOUT;
my $TBNODESTATE  = TBDB_TBEVENT_NODESTATE;
my $TBNODEOPMODE = TBDB_TBEVENT_NODEOPMODE;
my $TBCONTROL    = TBDB_TBEVENT_TBCONTROL;
111
my $TB_OSID_MBKERNEL = TB_OSID_MBKERNEL;
112

113 114
# This only gets used here, so it isn't in a lib constant.
my $TBFREENODE = "FREENODE";
115
my $TBISUP = TBDB_NODESTATE_ISUP;
116

117
# Set up some notification throttling
Mac Newbold's avatar
Mac Newbold committed
118
my $mailgap = 15;		# in seconds
119 120 121
my $lastmail = time() - $mailgap + 2; # Send a digest of startup msgs after 2s.
my %msgs = ();

Mac Newbold's avatar
Mac Newbold committed
122
my $pidfile;
123
if ( $TB eq $REALTB ) {
Mac Newbold's avatar
Mac Newbold committed
124 125
    $pidfile = "/var/run/stated.pid";
} else {
126
    $pidfile = "$TB/locks/stated.pid";
Mac Newbold's avatar
Mac Newbold committed
127
}
Mac Newbold's avatar
Mac Newbold committed
128 129
debug("Using pidfile $pidfile\n");
if (-e $pidfile) {
Mac Newbold's avatar
Mac Newbold committed
130 131 132 133 134 135 136 137 138 139 140
    my $otherpid = `cat $pidfile`;
    my $running = `ps -auxww | grep $otherpid | grep -v grep`;
    if ($running ne "") {
	fatal("Lockfile $pidfile exists, and process $otherpid appears to be ".
	      "running.\n");
    } else {
	notify("Lockfile exists, but process $otherpid appears to be dead.\n".
	       "Removing lock file...\n");
    }
    system("rm $pidfile") &&
      fatal("Couldn't remove $pidfile: $? $!\n");
Mac Newbold's avatar
Mac Newbold committed
141
}
Robert Ricci's avatar
Robert Ricci committed
142
# Background
143
if (!$debug) {
Mac Newbold's avatar
Mac Newbold committed
144 145 146 147
    # We use syslog, so redirect the output to nothing
    if (TBBackGround("/dev/null")) {
	exit(0);
    }
Robert Ricci's avatar
Robert Ricci committed
148
}
Mac Newbold's avatar
Mac Newbold committed
149 150
# set up syslog
openlog("stated","pid","user");
Mac Newbold's avatar
Mac Newbold committed
151 152 153 154 155 156
sysopen(PIDFILE, $pidfile, O_WRONLY | O_EXCL | O_CREAT) ||
  fatal("Couldn't create '$pidfile': $? $!\n");
print PIDFILE "$$";
close PIDFILE;
# If I make it to here, I'll need to clean up the lock file
my $lockfile=$pidfile;
Robert Ricci's avatar
Robert Ricci committed
157

158 159 160
# Change my $0 so that it is easier to see in a ps/top
$0 = "$0";

Robert Ricci's avatar
Robert Ricci committed
161
my $URL = "elvin://$server";
Mac Newbold's avatar
Mac Newbold committed
162 163 164
if ($port) {
    $URL .= ":$port";
}
Robert Ricci's avatar
Robert Ricci committed
165 166 167

# Connect to the event system, and subscribe the the events we want 
my $handle = event_register($URL,0);
Mac Newbold's avatar
Mac Newbold committed
168 169 170
if (!$handle) {
    fatal("Unable to register with event system\n");
}
Robert Ricci's avatar
Robert Ricci committed
171 172

my $tuple = address_tuple_alloc();
Mac Newbold's avatar
Mac Newbold committed
173 174 175
if (!$tuple) {
    fatal("Could not allocate an address tuple\n");
}
Robert Ricci's avatar
Robert Ricci committed
176

177 178
%$tuple = ( objtype => join(",",$TBNODESTATE,$TBNODEOPMODE,$TBCONTROL) );

Robert Ricci's avatar
Robert Ricci committed
179
if (!event_subscribe($handle,\&handleEvent,$tuple)) {
Mac Newbold's avatar
Mac Newbold committed
180
    fatal("Could not subscribe to events\n");
Robert Ricci's avatar
Robert Ricci committed
181 182 183 184
}

# Read in the pre-existing node states, and timeout and valid transition
# information from the database
185 186 187 188
my %timeouts  = getTimeouts();
my %valid     = getValid();
my %modeTrans = getModeTrans();
my %triggers  = getTriggers();
189 190
my %nodes     = readStates();
if ($debug) { qshow(); }
Robert Ricci's avatar
Robert Ricci committed
191 192 193

# Gets set if a reload of state from the database should happen.
my $do_reload = 0;
194 195
my $sigrestart= 0;
my $sigcleanup= 0;
Robert Ricci's avatar
Robert Ricci committed
196 197 198 199

# Make the daemon reload database state on a sighup - but I'm worried
# about what would happen if we tried to do this mid-loop. So, we'll
# just set a flag and do it when we're done with our current pass.
200 201
$SIG{HUP}  = sub { info("SIGHUP - Reloading DB state\n"); $do_reload = 1; };

Mac Newbold's avatar
Mac Newbold committed
202
# Set up other signals.
203 204 205 206 207 208 209
$SIG{USR1} = \&restart_wrap;
$SIG{USR2} = \&cleanup_wrap;
$SIG{INT}  = \&cleanup_wrap;
$SIG{QUIT} = \&cleanup_wrap;
$SIG{ABRT} = \&cleanup_wrap;
$SIG{TERM} = \&cleanup_wrap;
$SIG{KILL} = \&cleanup_wrap;
Robert Ricci's avatar
Robert Ricci committed
210

211 212 213
# Track if I handled an event or not
my $event_count = 0;

214 215 216 217 218
# Control how long I block while waiting for events
my $blockwait=0;
my $nextdeadline=time();
my $mailqueue=0;

219 220 221 222 223
notify("Stated starting up\n");

sub process_event_queue() {
    $event_count=0;
    my $lastcount=-1;
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
    my $wait;
    my $now = time();
    debug("Polling - mq=$mailqueue bw=$blockwait\n");
    if ( $mailqueue == 0) {
	# no messages waiting...
	if ($blockwait) {
	    # we can wait a long time - nothing else will happen
	    # until we get an event, or get woken up by a signal
	    $wait = 600;
	} else {
	    # only wait until the next deadline...
	    if ($nextdeadline > 0) {
		$wait = $nextdeadline - $now;
	    }
	}
    } else {
	# mail is waiting. Only block until it is time to send it.
	$wait = $lastmail + $mailgap - $now;
Mac Newbold's avatar
Mac Newbold committed
242
	debug("Now $now, mailgap $mailgap, last $lastmail ==> wait $wait\n");
243 244 245
    }
    if ($wait < 0) { debug("Wait was $wait!\n"); $wait=0; }
    my $finish = $now + $wait;
Mac Newbold's avatar
Mac Newbold committed
246 247
    while (($event_count != $lastcount || $wait > 0) &&
	   !($sigrestart || $sigcleanup || $do_reload)) {
248
	$lastcount = $event_count;
Mac Newbold's avatar
Mac Newbold committed
249 250
	# Don't block if we got a signal!
	if ($wait<=0 || $sigrestart || $sigcleanup || $do_reload) {
251 252 253 254 255 256 257 258
	    event_poll($handle);
	} else {
	    debug("Using blocking event poll - $wait seconds\n");
	    # timeout param is in milliseconds, so multiply
	    event_poll_blocking($handle, $wait*1000);
	    $now = time();
	    # subtract seconds elapsed from my wait time
	    $wait = $finish - $now;
Mac Newbold's avatar
Mac Newbold committed
259
	    debug("Finished blocking event poll - $wait seconds remain\n");
260
	    if ($event_count > 0 &&
Mac Newbold's avatar
Mac Newbold committed
261 262
		(qsize() > 0 || $mailqueue ||
		 $sigrestart || $sigcleanup || $do_reload)) {
263 264 265 266 267 268 269
		$blockwait=0;
		$wait=0;
		#debug("Cancelling wait - timeouts/msgs waiting, or HUP'd\n");
		#debug("---End Blocking Wait ---\n");
	    }
	}
	#debug("Wait is $wait\n");
270 271
    }
    if ($event_count > 0) {
Mac Newbold's avatar
Mac Newbold committed
272
	debug("Handled $event_count event(s).\n");
273 274
    }
}
Robert Ricci's avatar
Robert Ricci committed
275

276
# Now, we just poll for events, and watch for timeouts
Robert Ricci's avatar
Robert Ricci committed
277
while (1) {
Mac Newbold's avatar
Mac Newbold committed
278 279
    process_event_queue;
    my $now = time();
280 281 282 283
    my ($deadline,$node);

    # Check for nodes that have passed their timeout
    if (!qhead($deadline,$node)) {
Mac Newbold's avatar
Mac Newbold committed
284
	info("HEAD: $node in ".($deadline-$now).", queue=".qsize()."\n");
285 286
	while ($now >= $deadline && $node ne "") {
	    qpop($deadline,$node);
Mac Newbold's avatar
Mac Newbold committed
287
	    info("POP: $node in ".($deadline-$now).", queue=".qsize()."\n");
288 289 290 291 292 293 294 295 296 297 298 299
	    $notified = $nodes{$node}{notified};
	    if (!$notified) {
		handleCtrlEvent($node,$TBTIMEOUT);
		$nodes{$node}{notified} = 1;
	    } else {
		notify("$node: Timed out at $now (d=$deadline), ".
		       "but notified already!\n");
	    }
	    if (0) { qshow(); }
	    if (qhead($deadline,$node)) {
		$deadline=0; $node="";
	    }
300
	}
301 302
    } else {
	$deadline=0;
303
    }
304
    $nextdeadline = $deadline;
305

306 307 308 309 310
    if (qsize()==0) {
	$blockwait=1;
	debug("---Blocking wait okay---\n");
    }
		
Mac Newbold's avatar
Mac Newbold committed
311 312 313 314
    if ($do_reload || ($now - $last_reload > $reload_time)) {
	reload();
	$do_reload = 0;
    }
Mac Newbold's avatar
Mac Newbold committed
315

Mac Newbold's avatar
Mac Newbold committed
316 317
    # Send any messages in the queue if it is time
    notify("",1);
Mac Newbold's avatar
Mac Newbold committed
318

319 320 321
    if ($sigrestart) { restart(); }
    if ($sigcleanup) { cleanup(); }

322
    #sleep(1);
Robert Ricci's avatar
Robert Ricci committed
323 324
}

Mac Newbold's avatar
Mac Newbold committed
325 326
exit(0);

Robert Ricci's avatar
Robert Ricci committed
327
# Read the current states of nodes from the database
328
sub readStates(;@) {
Mac Newbold's avatar
Mac Newbold committed
329 330 331 332 333 334
    my %oldnodes = @_;

    # Guard against undefined variable warnings
    if (! defined(%oldnodes)) {
	%oldnodes = ();
    }
335

Mac Newbold's avatar
Mac Newbold committed
336 337 338
    #debug("readStates called\n");
    my $result = DBQueryFatal("SELECT node_id, eventstate, " .
			      "state_timestamp, op_mode, " .
339 340
			      "op_mode_timestamp FROM nodes ".
			      "where node_id not like 'sh%'");
Mac Newbold's avatar
Mac Newbold committed
341 342 343 344

    my %nodes;
    while (my ($node_id, $state, $timestamp, $mode, $mode_timestamp)
	   = $result->fetchrow()) {
345
	#
Mac Newbold's avatar
Mac Newbold committed
346 347 348 349
	# If there's an entry in oldnodes for this node, and it
	# hasn't changed state or time, use the old entry (so that
	# we don't lose information about which nodes we've already
	# notified the ops about, etc.)
350
	#
Mac Newbold's avatar
Mac Newbold committed
351 352 353 354 355
	if ($oldnodes{$node_id} && $state && $timestamp &&
	    ($oldnodes{$node_id}{state} eq $state) &&
	    ($oldnodes{$node_id}{mode} eq $mode) &&
	    ($oldnodes{$node_id}{timestamp} == $timestamp)) {
	    $nodes{$node_id} = $oldnodes{$node_id};
356
	} else {
Mac Newbold's avatar
Mac Newbold committed
357 358 359 360
	    $nodes{$node_id}{state}          = $state;
	    $nodes{$node_id}{timestamp}      = $timestamp;
	    $nodes{$node_id}{mode}           = $mode;
	    $nodes{$node_id}{mode_timestamp} = $mode_timestamp;
361 362
	    # Is there a timeout? If so, set it up!
	    setTimeout($mode,$state,$node_id,$timestamp);
363
	}
Mac Newbold's avatar
Mac Newbold committed
364 365
    }
    return %nodes;
Robert Ricci's avatar
Robert Ricci committed
366 367 368 369 370 371
}

#
# Read timeouts for various states from the database
#
sub getTimeouts() {
Mac Newbold's avatar
Mac Newbold committed
372 373 374
    #debug("getTimeouts called\n");
    my $result = DBQueryFatal("SELECT op_mode, state, timeout, action " .
			      "FROM state_timeouts");
Robert Ricci's avatar
Robert Ricci committed
375

Mac Newbold's avatar
Mac Newbold committed
376 377 378 379 380
    my %timeouts;
    while (my ($op_mode, $state, $timeout, $action) = $result->fetchrow()) {
	$timeouts{$op_mode}{$state} = [ $timeout, $action ];
    }
    return %timeouts;
Robert Ricci's avatar
Robert Ricci committed
381 382 383 384 385 386
}

#
# Read the list of valid state transitions from the database
#
sub getValid() {
Mac Newbold's avatar
Mac Newbold committed
387 388 389
    #debug("getValid called\n");
    my $result = DBQueryFatal("SELECT op_mode, state1, state2 " .
			      "FROM state_transitions");
Robert Ricci's avatar
Robert Ricci committed
390

Mac Newbold's avatar
Mac Newbold committed
391 392 393 394 395
    my %valid;
    while (my ($mode,$state1, $state2) = $result->fetchrow()) {
	$valid{$mode}{$state1}{$state2} = 1;
    }
    return %valid;
Robert Ricci's avatar
Robert Ricci committed
396 397
}

398 399 400 401
#
# Read the list of valid mode transitions from the database
#
sub getModeTrans() {
Mac Newbold's avatar
Mac Newbold committed
402 403 404 405 406 407 408 409 410 411 412 413 414
    #debug("getModeTrans called\n");
    my $result = 
      DBQueryFatal("SELECT op_mode1, state1, op_mode2, state2 " .
		   "FROM mode_transitions order by op_mode1,state1");

    my %modeTrans;
    while (my ($mode1,$state1, $mode2, $state2) = $result->fetchrow()) {
	if (!defined($modeTrans{"$mode1:$state1"})) {
	    $modeTrans{"$mode1:$state1"}= ["$mode2:$state2"];
	} else {
	    my @l = @{$modeTrans{"$mode1:$state1"}};
	    push(@l, "$mode2:$state2");
	    $modeTrans{"$mode1:$state1"}= \@l;
415
	}
Mac Newbold's avatar
Mac Newbold committed
416 417
    }
    return %modeTrans;
418 419 420 421 422 423
}

#
# Read the list of states which trigger an action
#
sub getTriggers() {
424 425 426 427 428
    debug("getTriggers called\n");
    
    debug("anymode ==> '$TBANYMODE'\n");

    # Grab global triggers
Mac Newbold's avatar
Mac Newbold committed
429 430
    my $result = 
      DBQueryFatal("SELECT op_mode, state, trigger " .
431 432
		   "FROM state_triggers where node_id='$TBANYMODE' ".
		   "order by op_mode,state");
Mac Newbold's avatar
Mac Newbold committed
433
    my %t;
434
    while (my ($mode, $state, $trig) = $result->fetchrow()) {
Mac Newbold's avatar
Mac Newbold committed
435
	$t{"$mode:$state"} = $trig;
436 437 438 439 440 441 442 443 444 445 446 447
	debug("trig($mode:$state)\t => $trig\n");
    }

    # Grab per-node triggers
    $result = 
      DBQueryFatal("SELECT node_id, op_mode, state, trigger " .
		   "FROM state_triggers where node_id!='$TBANYMODE' ".
		   "order by op_mode,state");
    while (my ($n, $mode, $state, $trig) = $result->fetchrow()) {
	my @trigs = split(/\s*,\s*/,$trig);
	$t{"$n:$mode:$state"} = \@trigs;
	debug("trig($n:$mode:$state)\t => ".join(',',@trigs)."\n");
Mac Newbold's avatar
Mac Newbold committed
448
    }
449

Mac Newbold's avatar
Mac Newbold committed
450
    return %t;
451 452
}

Robert Ricci's avatar
Robert Ricci committed
453 454 455 456
#
# Gets called for every event that we recieve
#
sub handleEvent($$$) {
Mac Newbold's avatar
Mac Newbold committed
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
    my ($handle,$notification,$data) = @_;
    my $objtype = event_notification_get_objtype($handle,$notification);
    my $objname = event_notification_get_objname($handle,$notification);
    my $eventtype = event_notification_get_eventtype($handle,$notification);

    $event_count++;
    debug("Got an event: ($objtype,$objname,$eventtype)\n");

    #
    # Check to see if another instance is supposed to be handling this node
    #
    if (!checkDBRedirect($objname)) {
	info("Got an event for node $objname, which isn't mine\n");
	return;
    }
472

Mac Newbold's avatar
Mac Newbold committed
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
  SWITCH: for ($objtype) {
	
	(/$TBNODESTATE/) && do {
	    stateTransition($objname,$eventtype);
	    last;
	};
	(/$TBNODEOPMODE/) && do {
	    opModeTransition($objname,$eventtype);
	    notify("Use of deprecated event TBNODEOPMODE:\n".
		   "$objname->$eventtype\n");
	    last;
	};
	(/$TBCONTROL/) && do {
	    handleCtrlEvent($objname,$eventtype);
	    last;
	};
489

Mac Newbold's avatar
Mac Newbold committed
490
    }
491 492 493 494 495

}

sub stateTransition($$) {

496
    my ($node,$newstate) = @_;
Robert Ricci's avatar
Robert Ricci committed
497

498 499 500 501 502 503 504 505
    # Check for invalid transitions
    my ($oldstate, $mode);
    if ($nodes{$node}) {
	$oldstate = $nodes{$node}{state};
	$mode = $nodes{$node}{mode};
    } else {
	# Try reloading the cache once before we give up on this node
	reload();
506
	if ($nodes{$node}) {
507 508
	    $oldstate = $nodes{$node}{state};
	    $mode = $nodes{$node}{mode};
Robert Ricci's avatar
Robert Ricci committed
509
	} else {
510
	    notify("Got an event for a node ($node) I don't know about\n");
Robert Ricci's avatar
Robert Ricci committed
511
	}
512 513 514 515
    }
    if ($oldstate && $mode && $valid{$mode} && $valid{$mode}{$oldstate} &&
	!$valid{$mode}{$oldstate}{$newstate}) {
	notify("Invalid transition for node $node from $mode/$oldstate " .
516
	       "to $newstate\n");
517
    }
Robert Ricci's avatar
Robert Ricci committed
518

519 520 521 522
    my $now = time();
    $nodes{$node}{state}     = $newstate;
    $nodes{$node}{timestamp} = $now;
    $nodes{$node}{notified}  = 0;
523

524 525 526
    info("$node: $mode/$oldstate => $mode/$newstate\n");
    DBQueryFatal("UPDATE nodes SET eventstate='$newstate', " .
		 "state_timestamp='$now' WHERE node_id='$node'");
527

528 529 530
    # Check if this state has a timeout, and if so, put it in the queue
    setTimeout($mode,$newstate,$node,$now);

531 532
    # Check if this is TBDB_NODESTATE_BOOTING , which has actions
    if ($newstate eq TBDB_NODESTATE_BOOTING) {
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
	# If I skipped shutdown, and came to booting directly from isup,
	# check for a mode transition so I don't miss one...
	if ($oldstate eq TBDB_NODESTATE_ISUP) {
	    info("$node: Came from ISUP! Checking for mode transition\n");
	    my $r = DBQueryWarn("select next_op_mode from nodes ".
				"where node_id='$node'");
	    my ($nextmode) = $r->fetchrow();
	    if ($nextmode) {
		# Force the transition even though it is illegal
		info("$node: Forcing mode transition!\n");
		opModeTransition($node,$nextmode,1);
		$mode=$nextmode;
	    } else {
		debug("No next mode.\n");
	    }
	}

	# Check if I'm in the right mode
551
	my $osid = TBBootWhat($node,$debug);
552 553
	my $os_op_mode = os_opmode($osid);
	info("$node: Current OS is '$osid', OS mode is '$os_op_mode'\n");
554
	DBQueryFatal("UPDATE nodes SET osid='$osid' WHERE node_id='$node'");
555
	if ($os_op_mode ne $mode) {
556 557
	    my $str = "Node $node is running OS '$osid' but in mode '$mode' ".
	      "instead of mode '$os_op_mode'!\n";
558 559 560
	    # For now, only force if we're going into reload mode, so we
	    # don't get stuck looping in reloading.
	    if ($os_op_mode eq "RELOAD") {
561 562 563 564 565 566 567 568 569
		DBQueryFatal("UPDATE nodes SET op_mode='$os_op_mode', ".
			     "op_mode_timestamp=unix_timestamp(now()) ".
			     "WHERE node_id='$node'");
		$nodes{$node}{mode} = $os_op_mode;
		$nodes{$node}{mode_timestamp} = $now;
		$str .= "Forced op_mode to '$os_op_mode'.\n";
	    }
	    notify($str);
	}
570 571
	checkGenISUP($node);
    }
572

573 574 575 576 577
    # Check if this state has any triggers
    my @nodetrigs = GetNodeTriggerList($node,$mode,$newstate);
    if (defined($triggers{"$mode:$newstate"}) ||
        (@nodetrigs > 0) ) {
	# check for global triggers
578
	my @trigs = split(/\s*,\s*/,$triggers{"$mode:$newstate"});
579 580 581 582
	# Run all the triggers
	debug("Running triggers. Global=".join("/",@trigs).
	      "   node=".join("/",@nodetrigs)."\n");
	foreach ( @trigs , @nodetrigs) {
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
	    my $trig = $_;
	    /^$TBRESET$/ && do {
		# Check if we really need to do a reset
		my $r = DBQueryWarn("select osid,def_boot_osid from nodes ".
				    "where node_id='$node'");
		my ($osid,$defosid) = $r->fetchrow();
		if ($osid ne $defosid) {
		    handleCtrlEvent($node,$trig);
		}
		next;
	    };
	    /^$TBRELOADDONE$/ && do {
		handleCtrlEvent($node,$trig);
		next;
	    };
598 599 600 601
	    /^$TBFREENODE$/ && do {
		handleCtrlEvent($node,$trig);
		next;
	    };
602 603 604 605 606 607 608 609
	    /^$TBISUP$/ && do {
		info("$node: Triggered $TBISUP\n");
		EventSendWarn(host      => $BOSSNODE ,
			      objtype   => TBDB_TBEVENT_NODESTATE ,
			      eventtype => TBDB_NODESTATE_ISUP ,
			      objname   => $node);
		next;
	    };
610
	    notify("Unknown trigger '$trig' for $node in $mode/$newstate!\n");
611
	}
612 613 614
	# Clear any of the node triggers that we ran
	debug("Clearing node triggers: ".join("/",@nodetrigs)."\n");
	ClearNodeTrigger($node,$mode,$newstate,@nodetrigs);
615
    }
616

617 618 619 620 621 622 623 624
    # Check if this state can trigger a mode transition
    if (defined($modeTrans{"$mode:$newstate"})) {
	info("$node: Checking for mode transition\n");
	my $r = DBQueryWarn("select next_op_mode from nodes ".
			    "where node_id='$node'");
	my ($nextmode) = $r->fetchrow();
	if ($nextmode) {
	    opModeTransition($node,$nextmode);
Mac Newbold's avatar
Mac Newbold committed
625 626 627
	} else {
	    debug("No next mode.\n");
	}
628 629
    }
}
630

631
sub opModeTransition($$;$) {
Mac Newbold's avatar
Mac Newbold committed
632

633 634
    my ($node,$newmode,$force) = @_;
    if (!defined($force)) { $force = 0; }
Mac Newbold's avatar
Mac Newbold committed
635

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
    info("$node: Mode change to $newmode requested\n");
    # Check for invalid transitions
    my ($oldstate, $mode, $nextstate);
    if ($nodes{$node}) {
	$oldstate = $nodes{$node}{state};
	$mode = $nodes{$node}{mode};
    } else {
	# Try reloading the cache once before we give up on this node
	reload();
	if ($nodes{$node}) {
	    $oldstate = $nodes{$node}{state};
	    $mode = $nodes{$node}{mode};
	} else {
	    notify("Got an event for a node ($node) I don't know about\n");
	}
    }
652
    if (defined($modeTrans{"$mode:$oldstate"}) || $force) {
653
	if (!$force) {
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
	    debug("Mode Transition check:\n");
	    my $translist = join(",",@{$modeTrans{"$mode:$oldstate"}});
	    #debug("translist=$translist\n");
	    #debug("splitlist=".join(", ",split(/[:,]/,$translist))."\n");
	    my %trans = split(/[:,]/,$translist);
	    debug("Valid transitions from $mode/$oldstate are:\n");
	    foreach my $k (sort keys %trans) {
		debug("$k => $trans{$k}\n");
	    }
	    if (defined($trans{$newmode})) {
		$nextstate=$trans{$newmode};
	    } else {
		notify("Invalid mode transition for $node from ".
		       "$mode/$oldstate to $newmode!\n");
	    }
669 670
	}
    } else {
671
	notify("Invalid mode transition for $node from $mode/$oldstate: ".
672 673
	       "Not a valid mode transition state!\n");
    }
Mac Newbold's avatar
Mac Newbold committed
674 675 676
    if (!$nextstate) {
	$nextstate=$oldstate;
    }
Mac Newbold's avatar
Mac Newbold committed
677

678 679 680 681 682 683
    my $now = time();
    $nodes{$node}{state}     = $nextstate;
    $nodes{$node}{timestamp} = $now;
    $nodes{$node}{mode}           = $newmode;
    $nodes{$node}{mode_timestamp} = $now;
    $nodes{$node}{notified}       = 0;
Mac Newbold's avatar
Mac Newbold committed
684

685 686 687 688 689
    info("$node: $mode/$oldstate => $newmode/$nextstate\n");
    DBQueryFatal("UPDATE nodes SET eventstate='$nextstate', ".
		 "next_op_mode='', op_mode='$newmode', ".
		 "state_timestamp='$now', ".
		 "op_mode_timestamp='$now' WHERE node_id='$node'");
Mac Newbold's avatar
Mac Newbold committed
690 691 692 693

    # Check if this state has a timeout, and if so, put it in the queue
    setTimeout($newmode,$nextstate,$node,$now);

694 695 696 697
}

sub handleCtrlEvent($$) {
    my ($node,$event) = @_;
Mac Newbold's avatar
Mac Newbold committed
698
    
699
    info("CtrlEvent: $node, $event\n");
Mac Newbold's avatar
Mac Newbold committed
700
    
701 702
    foreach ($event) {
	/^$TBRESET$/ && do {
Mac Newbold's avatar
Mac Newbold committed
703 704 705
	    my $result = DBQueryFatal("SELECT pxe_boot_path, def_boot_osid ".
				      "FROM nodes where node_id='$node'");
	    my ($pxepath,$osid) = $result->fetchrow();
Mac Newbold's avatar
Mac Newbold committed
706
	    
707 708 709 710
	    # Important note on ordering here:
	    # Because setting a normal osid resets pxe path to PXEBOOT,
	    # We need to read it out first, then set the osid, then set
	    # the pxepath back to its original value at the end.
Mac Newbold's avatar
Mac Newbold committed
711
	    
712 713
	    $cmd = "$osselect $osid $node";
	    system($cmd) and
Mac Newbold's avatar
Mac Newbold committed
714 715 716
	      notify("$node/$event: Couldn't clear next_boot_*\n".
		     "\tcmd=$cmd\n\t*** $!\n");
	    
717
	    $pxepath = "-p ".$pxepath;
Mac Newbold's avatar
Mac Newbold committed
718 719 720 721
	    if ($pxepath eq "-p ") {
		$pxepath="PXEBOOT";
	    }
	    ;
Mac Newbold's avatar
Mac Newbold committed
722
	    my $cmd = "$osselect -m $pxepath $node";
723
	    system($cmd) and
Mac Newbold's avatar
Mac Newbold committed
724 725 726
	      notify("$node/$event: Couldn't clear next_pxe_boot_path\n".
		     "\tcmd=$cmd\n\t*** $!\n");
	    
Mac Newbold's avatar
Mac Newbold committed
727
	    info("Performed RESET for $node to $osid/$pxepath\n");
728 729 730 731 732 733 734 735 736 737
	    next;
	};
	/^$TBRELOADDONE$/ && do {
	    info("Clearing reload info for $node\n");
	    DBQueryFatal("delete from current_reloads where node_id='$node'");
	    my ($pid,$eid);
	    NodeidToExp($node,\$pid,\$eid);
	    if (($pid eq NODERELOADING_PID) && ($eid eq NODERELOADING_EID)) {
		DBQueryFatal("delete from scheduled_reloads ".
			     "where node_id='$node'");
738 739 740 741
		AddNodeTrigger($node, $TBANYMODE, TBDB_NODESTATE_ISUP,
			       $TBFREENODE)
		  && notify("$node: Couldn't add trigger $TBFREENODE!\n");
		info("Set up freeing of $node from $pid/$eid\n");
742 743 744
	    }
	    next;
	};
745 746 747 748 749 750 751 752
	/^$TBFREENODE$/ && do {
	    # Don't need pid/eid, but we should put it in the log
	    my ($pid,$eid);
	    NodeidToExp($node,\$pid,\$eid);
	    DBQueryFatal("delete from reserved where node_id='$node'");
	    info("Released $node from $pid/$eid\n");
	    next;
	};
753 754 755 756 757 758 759 760 761 762
	/^$TBTIMEOUT$/ && do {
	    my $state = $nodes{$node}{state};
	    my $mode = $nodes{$node}{mode};
	    my ($timeout,$action);
	    if ($mode && $state && $timeouts{$mode} &&
		$timeouts{$mode}{$state}) {
		($timeout, $action) = @{$timeouts{$mode}{$state}};
	    }
	    notify("Node $node has timed out in state $mode/$state".
		   ($action ne "" ? "\n\tRequested action $action." : "").
763
		   "\n");
764 765
	    next;
	};
766
	notify("$node: Unknown CtrlEvent: $event\n");
767 768
    }
}
Robert Ricci's avatar
Robert Ricci committed
769

770 771 772 773
#
# Check if we need to generate an ISUP
#
sub checkGenISUP($) {
Mac Newbold's avatar
Mac Newbold committed
774 775 776 777 778 779 780 781 782 783
    my ($node) = @_;
    debug("$node: Checking ISUP Generation\n");
    my $r = DBQueryWarn("select osfeatures from nodes as n ".
			"left join os_info as o on o.osid=n.osid ".
			"where node_id='$node' and osfeatures is not null");
    my $osfeatures="";
    # If we don't get anything back, assume it has no features.
    if ($r->num_rows() > 0) {
	($osfeatures) = $r->fetchrow();
    }
Mac Newbold's avatar
Mac Newbold committed
784

Mac Newbold's avatar
Mac Newbold committed
785 786 787 788 789 790
    my @features = split(",",$osfeatures);
    # Make sure features I care about are defined
    my %can=("ping"=>0, "isup"=>0);
    foreach my $f (@features) {
	$can{"\L$f"}=1;	# make sure it's all lowercase
    }
Mac Newbold's avatar
Mac Newbold committed
791

Mac Newbold's avatar
Mac Newbold committed
792 793 794 795 796
    # If os will send ISUP on its own, do nothing here.
    if ($can{"isup"}) {
	debug("$node: Will send own ISUP\n"); 
	return 0;
    }
Mac Newbold's avatar
Mac Newbold committed
797

Mac Newbold's avatar
Mac Newbold committed
798 799 800 801 802 803
    # If os doesn't support isup but can ping, fork and ping it every
    # few seconds and send isup when it pings, or timeout after too long.
    if ($can{"ping"}) {
	debug("$node: Needs to be pinged - calling eventping\n");
	system("$TB/sbin/eventping $node &");
	return 0;
804
    }
Mac Newbold's avatar
Mac Newbold committed
805

Mac Newbold's avatar
Mac Newbold committed
806 807 808 809 810 811 812 813
    # If os doesn't support ping or isup, stated sends ISUP just after 
    # the node gets to BOOTING (a bit early, but the best we can do)

    debug("$node: OS doesn't ping - sending ISUP\n");
    EventSendWarn(host      => $BOSSNODE ,
		  objtype   => TBDB_TBEVENT_NODESTATE ,
		  eventtype => TBDB_NODESTATE_ISUP ,
		  objname   => $node);
Robert Ricci's avatar
Robert Ricci committed
814 815
}

816 817 818 819
# Figure out if this node belongs to us (ie. if it's using our database.)
# Returns 1 if it does, 0 if not
sub checkDBRedirect($) {

Mac Newbold's avatar
Mac Newbold committed
820
    my ($node) = @_;
821

Mac Newbold's avatar
Mac Newbold committed
822 823 824 825 826 827 828 829
    # XXX: I don't want to do this every time, for performance reaons,
    # but we need to make sure that we don't get into an inconsistent 
    # state
    my $result=DBQueryFatal("SELECT testdb FROM nodes as n " .
			    "LEFT JOIN reserved as r ON n.node_id=r.node_id ".
			    "LEFT JOIN experiments as e ON r.pid = e.pid " .
			    "AND r.eid = e.eid " .
			    "WHERE n.node_id = '$node'");
830

Mac Newbold's avatar
Mac Newbold committed
831 832 833 834
    if (!$result->num_rows()) {
	notify("Got an event for a node ($node) I don't know about\n");
	return 0;
    }
835

Mac Newbold's avatar
Mac Newbold committed
836
    my ($testdb) = $result->fetchrow();
837

Mac Newbold's avatar
Mac Newbold committed
838 839 840
    # XXX: It's hokey to hardcode tbdb here, but....

    #debug("checkDBRedirect: $node => $testdb (I'm $TBDBNAME)\n");
841
    if ((!$testdb && ($TBDBNAME eq $REALTBDBNAME)) ||
Mac Newbold's avatar
Mac Newbold committed
842 843 844 845 846
	($testdb && ($testdb eq $TBDBNAME))) {
	return 1;
    } else {
	return 0;
    }
847 848
}

849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
# Check if this state has a timeout, and if so, put it in the queue
sub setTimeout( $$$$ ) {
    my ($mode,$state,$node,$now) = @_;
    if (0) { print "Original: ($mode,$state,$node,$now)\n"; qshow(); }
    if (defined(qfind($node))) { qdelete($node); }
    if (0) { print "Deleted:\n"; qshow(); }
    if (defined($mode) && defined($state) &&
	defined($timeouts{$mode}) &&
	defined($timeouts{$mode}{$state})) {
	my $deadline = ${$timeouts{$mode}{$state}}[0];
        if (defined($deadline) &&
	    $deadline != $TBNOTIMEOUT) {
	    my $TO = $deadline + $now;
	    debug("Setting timeout for ($node,$mode,$state) at ".
		  "$deadline + $now ($TO)\n");
	    qinsert($TO,$node);
	    if (0) { qshow(); }
	}
    }
    if (0) { print "Done:\n"; qshow(); }
}

Robert Ricci's avatar
Robert Ricci committed
871 872
# Reload state from the database
sub reload() {
Mac Newbold's avatar
Mac Newbold committed
873 874 875 876 877 878
    debug("Reloading state from database\n");
    $last_reload = time();
    %timeouts  = getTimeouts();
    %valid     = getValid();
    %modeTrans = getModeTrans();
    %triggers  = getTriggers();
879
    %nodes     = readStates(%nodes);
Robert Ricci's avatar
Robert Ricci committed
880 881
}

882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
#
# Some functions for node triggers
#

# $rv   = AddNodeTrigger($node, $mode, $state, @triglist);
sub AddNodeTrigger( $$$@ ) {
    my ($node, $mode, $state, @trigs) = @_;
    if (@trigs == 0) { return 1; }
    if (defined($triggers{"$node:$mode:$state"})) {
	my %t = ();
	foreach $k (@{$triggers{"$node:$mode:$state"}}) { $t{$k} = 1; }
	my @newtrigs = ();
	foreach $k (@trigs) { if (!defined($t{$k})) { push(@newtrigs,$k); } }
	push(@{$triggers{"$node:$mode:$state"}},@newtrigs);
    } else {
	$triggers{"$node:$mode:$state"} = \@trigs;
    }
    my $triglist = join(",",@{$triggers{"$node:$mode:$state"}});
    DBQueryFatal("replace into state_triggers ".
		 "(node_id,op_mode,state,trigger) values ".
		 "('$node','$mode','$state','$triglist')");
    return 0;
}

# @list = GetNodeTriggerList($node, $mode, $state);
sub GetNodeTriggerList( $$$ ) {
    my ($node, $mode, $state) = @_;
    my @l = ();
    if (defined($triggers{"$node:$mode:$state"})) {
	push(@l,@{$triggers{"$node:$mode:$state"}});
    }
    if (defined($triggers{"$node:$TBANYMODE:$state"})) {
	push(@l,@{$triggers{"$node:$TBANYMODE:$state"}});
    }
    return @l;
}

# $rv   = ClearNodeTrigger($node, $mode, $state, @triglist);
# Note: When not clearing all triggers, ordering is not preserved!
sub ClearNodeTrigger( $$$ ; @ ) {
    my ($node, $mode, $state, @trigs) = @_;
    # We have to keep any triggers that aren't on the list, but the
    # most common case will be that the list they give us is the whole
    # list anyway. So treat that case special.
    my @reallist = GetNodeTriggerList($node,$mode,$state);
    # empty list means clear all...
    if ((@trigs==0) || join(",",sort @reallist) eq join(",",sort @trigs)) {
	# Same list... just nuke the entry
	debug("Clearing all triggers for $node...\n");
	delete($triggers{"$node:$mode:$state"});
	delete($triggers{"$node:$TBANYMODE:$state"});
	DBQueryFatal("delete from state_triggers ".
		     "where node_id='$node' and state='$state' and ".
		     "(op_mode='$mode' or op_mode='$TBANYMODE')");
    } else {
	# Subtract @trigs from @reallist
	my %temptrigs = ();
	foreach $k (@reallist) { $temptrigs{$k} = 1; }
	debug("Reallist = ".join("/",@reallist).", trigs=".
	      join("/",@trigs).".\n");
	foreach $t (@trigs) {
	    if (defined($temptrigs{$t})) {
		delete($temptrigs{$t});
		debug("Clearing $t\n");
	    }
	}
	# Note: This doesn't quite do the right thing with triggers
	# for a fixed mode vs TBANYMODE. So if you start using this
	# code, make sure and debug it first!
	my @newtrigs = keys %temptrigs;
	debug("Newlist = ".join("/",@newtrigs).".\n");
	delete($triggers{"$node:$mode:$state"});
	if (@newtrigs > 0) {
	    AddNodeTrigger($node,$mode,$state,@newtrigs);
	} 
    }

    return 0;
}

962 963
sub os_opmode() {
    my $osid = shift || "";
Mac Newbold's avatar
Mac Newbold committed
964 965 966
    if ($osid eq $TB_OSID_MBKERNEL) {
	return "MINIMAL";
    }
967 968
    my $cmd = "select op_mode from os_info where osid='$osid';";
    my $q = DBQueryFatal($cmd);
Mac Newbold's avatar
Mac Newbold committed
969 970 971
    if ($q->numrows() < 1) {
	return "";
    }
972 973 974
    my @r = $q->fetchrow_array();
    my $opmode=$r[0];
    debug("OpMode for '$osid' is '$opmode'\n");
Mac Newbold's avatar
Mac Newbold committed
975 976 977
    if (defined($opmode) && $opmode ne "") {
	return $opmode;
    }
978 979
    return "";
}
Mac Newbold's avatar
Mac Newbold committed
980

981 982 983 984
#
# Functions for controlling output/logging, and signal handling
#

Robert Ricci's avatar
Robert Ricci committed
985
sub debug(@) {
Mac Newbold's avatar
Mac Newbold committed
986 987 988
    if ($debug) {
	print @_;
    }
Mac Newbold's avatar
Mac Newbold committed
989 990 991
}

sub fatal($) {
Mac Newbold's avatar
Mac Newbold committed
992 993 994
    my $msg = shift;
    notify($msg);
    die($msg);
Robert Ricci's avatar
Robert Ricci committed
995 996
}

Mac Newbold's avatar
Mac Newbold committed
997
sub showqueue() {
Mac Newbold's avatar
Mac Newbold committed
998 999 1000 1001 1002 1003 1004 1005 1006 1007
    if ($debug < 2) {
	return;
    }
    if ((keys %msgs) > 0) {
	debug("\nMAILQUEUE:\n");
    }
    foreach $k (sort keys %msgs) {
	my @l = @{$msgs{$k}};
	debug("MSGS:\n$k==> (".(@l+0).",'".join("','",@l)."')\n");
    }
Mac Newbold's avatar
Mac Newbold committed
1008 1009 1010
}

sub notify($;$) {
Mac Newbold's avatar
Mac Newbold committed
1011 1012 1013 1014
    my $message = shift;
    my $checkonly = shift || 0;
    # Use a timestamp, now that we're throttling mail
    my $tstamp=strftime("%b %e %H:%M:%S",localtime);
Mac Newbold's avatar
Mac Newbold committed
1015
    showqueue();
Mac Newbold's avatar
Mac Newbold committed
1016 1017
    if (!$checkonly) {
	info($message);
1018
	$mailqueue++;
Mac Newbold's avatar
Mac Newbold committed
1019 1020 1021 1022
	# Queue up the message
	# (The queue is a hash of lists of timestamps, keyed by message
	if (defined($msgs{$message})) {
	    push(@{$msgs{$message}},$tstamp);
Mac Newbold's avatar
Mac Newbold committed
1023
	} else {
Mac Newbold's avatar
Mac Newbold committed
1024
	    $msgs{$message} = [$tstamp];
Mac Newbold's avatar
Mac Newbold committed
1025
	}
Mac Newbold's avatar
Mac Newbold committed
1026
	showqueue();
Mac Newbold's avatar
Mac Newbold committed
1027
    }
Mac Newbold's avatar
Mac Newbold committed
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
    my $now = time;
    if ($now - $lastmail >= $mailgap) {
	if ((keys %msgs)>0) {
	    debug("SENDING MAILQUEUE\n"."(now $now, lastmail $lastmail, ".
		  ($now-$lastmail).">=$mailgap)\n");
	    my $mailbody="";
	    my $sep = '-'x5;
	    # We're okay to send. Make a digest of all the queued messages.
	    foreach my $msg (sort keys %msgs) {
		my @tlist = @{$msgs{$msg}};
		my $count = 0+@tlist;
		$mailbody .= "\n$msg\n";
		if ($count > 1) {
		    my $first = shift @tlist;
		    my $last = pop @tlist;
		    $mailbody .= "($count copies from $first to $last)\n";
		} else {
		    $mailbody .= "($count copy at $tlist[0])\n";
		}
		$mailbody .= "$sep\n";
	    }
	    # Now reset the mail queue
	    %msgs = ();
1051
	    $mailqueue=0;
Mac Newbold's avatar
Mac Newbold committed
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
	    showqueue();
	    $lastmail = time;
	    if (!$debug) {
		SENDMAIL("Stated List <".$TBOPS.">",
			 "Stated Messsage",$mailbody,
			 "Stated Daemon <".$TBOPS.">");
	    } else {
		debug("notify: Not sending mail in debug mode\n");
		debug("MAIL CONTAINS:\n".$mailbody."\n");
	    }
	}
    } # else do nothing, not time yet
1064 1065
}

1066
sub announce($) {
Mac Newbold's avatar
Mac Newbold committed
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
    my $message = shift;
    my $tstamp=strftime("%b %e %H:%M:%S",localtime);
    notify("ANNOUCEMENT: ".$message."\n\n(Sent to $REALTBOPS)\n");
    $mailbody = "\n$message\n\n$tstamp\n";
    if (!$debug) {
	SENDMAIL($REALTBOPS,
		 "Stated Messsage",$mailbody,
		 "Stated Daemon <".$TBOPS.">");
    } else {
	debug("announce: Not sending mail in debug mode\n");
	debug("MAIL CONTAINS:\n".$mailbody."\n");
    }
1079 1080
}

1081
sub info($;$) {
1082
    my $message = shift;
1083
    my $notice = shift || 0;
Mac Newbold's avatar
Mac Newbold committed
1084 1085
    # Use syslog
    my $prio="info";
Mac Newbold's avatar
Mac Newbold committed
1086 1087 1088
    if ($notice) {
	$prio = "notice";
    }
Mac Newbold's avatar
Mac Newbold committed
1089
    if ($debug) {
Mac Newbold's avatar
Mac Newbold committed
1090 1091 1092 1093
	# Print out log entries like this:
	# Sep 20 09:36:00 stated[238]: Reloading state from database
	print strftime("%b %e %H:%M:%S",localtime)." stated[$$]: $message";
	$message = "DEBUG: ".$message;
1094
    }
Mac Newbold's avatar
Mac Newbold committed
1095
    syslog($prio,$message) || notify("syslog failed: $? $!\n");
1096 1097
}

1098 1099
sub restart_wrap { $sigrestart=1; }

1100 1101
# This gets called if we catch a signal USR1
sub restart {
Mac Newbold's avatar
Mac Newbold committed
1102 1103 1104 1105 1106 1107 1108 1109
    my $params = join(" ",@args);
    my $prog = "";
    # If we're started from an abosolute path, use that.
    if ($0 =~ /^\//) {
	$prog = $0;
    } else {
	$prog = "$TB/sbin/stated";
    }
1110 1111 1112
    info("SIGUSER1 received: Performing final event poll before restarting\n");
    $blockwait=0;
    process_event_queue;
Mac Newbold's avatar
Mac Newbold committed
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
    info("Restarting from '$prog".($params ne "" ? " $params" : "")."'\n");
    if ($handle && event_unregister($handle) == 0) {
	warn "Unable to unregister with event system\n";
    }
    if (defined($lockfile) && $lockfile ne "") {
	unlink $lockfile;
    }
    if (!defined(sigprocmask(SIG_UNBLOCK, POSIX::SigSet->new(SIGUSR1,SIGHUP)))) {
	notify("sigprocmask: sig unblock failed! $?, $!\n");
	die("\n");
    }
    announce("Stated restarted\n");
    exec("$prog $params") or 
      do {
	  my $msg = "Couldn't restart stated! cmd='$prog $params'\n".
	    "Error: ($?) $!\n";
	  announce($msg);
	  die($msg);
      };
Robert Ricci's avatar
Robert Ricci committed
1132 1133
}

1134 1135
sub cleanup_wrap { $sigcleanup=1; }

1136 1137
# This gets called if we catch a signal (TERM, etc.)
sub cleanup {
Mac Newbold's avatar
Mac Newbold committed
1138 1139 1140
    notify("Signal received, exiting\n");
    # now do the normal exit stuff in END {}
    exit(0);
1141 1142
}

Robert Ricci's avatar
Robert Ricci committed
1143 1144
# This gets called if we die of 'natural causes' (exit, die, etc.)
END {
1145
    debug("Ending stated...\n");
Mac Newbold's avatar
Mac Newbold committed
1146 1147 1148 1149 1150 1151 1152 1153
    my $stat = $?;
    if (defined($lockfile) && $lockfile ne "") {
	unlink $lockfile;
	announce("Stated exiting, cleaning up\n");
    } else {
	# Must be a child
	info("Stated child exiting\n");
    }
1154
    debug("Annouced. Cleaning up...\n");
Mac Newbold's avatar
Mac Newbold committed
1155 1156 1157
    # clean up Syslog
    closelog();
    if ($handle) {
1158
	debug("Unregistering w/event system...\n");
Mac Newbold's avatar
Mac Newbold committed
1159 1160 1161
	if (event_unregister($handle) == 0) {
	    die "Unable to unregister with event system\n";
	}
1162
	debug("Unregistered.\n");
Mac Newbold's avatar
Mac Newbold committed
1163
    }
1164
    debug("Cleaned up. Bye!\n");
Mac Newbold's avatar
Mac Newbold committed
1165 1166
    # Restore $? in case one of the things I called changed it
    $? = $stat;
Robert Ricci's avatar
Robert Ricci committed
1167
}
1168