stated.in 45.8 KB
Newer Older
Robert Ricci's avatar
Robert Ricci committed
1
#!/usr/bin/perl -w
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2
3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5
6
7
# All rights reserved.
#

Robert Ricci's avatar
Robert Ricci committed
8
9
10
#
# stated - A daemon to monitor the states of nodes in the testbed. Recives
# state change notification through the event system, and writes the new
Mac Newbold's avatar
Mac Newbold committed
11
# state into the database. Also watches for invalid transitions, timeouts,
12
# and performs other state-related control functions.
Robert Ricci's avatar
Robert Ricci committed
13
14
15
16
#
# Send it a HUP signal to get it to reload the timeout and transition
# information. Periodically reloads this information regardless, though.
#
17
# Will restart when sent SIGUSR1, by exec'ing its executable again.
Robert Ricci's avatar
Robert Ricci committed
18
#
19

Robert Ricci's avatar
Robert Ricci committed
20
21
# Configure variables
use lib '@prefix@/lib';
22
my $TB = "@prefix@";
23
my $REALTB = "/usr/testbed"; # So we know if we're the "real" stated or not
Robert Ricci's avatar
Robert Ricci committed
24
my $BOSSNODE = "@BOSSNODE@";
25
my $TBOPS = "@TBSTATEDEMAIL@";
26
my $REALTBOPS = "@TBOPSEMAIL@";
27
my $TBDBNAME = "@TBDBNAME@";
28
my $REALTBDBNAME = "tbdb"; # So we know if we're using the "real" db
29
my $osselect = "$TB/bin/os_select";
30
my $nodereboot = "$TB/bin/node_reboot";
31
my $rebootlog  = "$TB/log/nodereboot.log";
32
my $power = "$TB/bin/power";
33
my $TBLOG = "@TBLOGFACIL@";
Robert Ricci's avatar
Robert Ricci committed
34
35
36
37
38
39

$| = 1;

use event;
use libdb;
use libtestbed;
40
use TimeoutQueue;
Robert Ricci's avatar
Robert Ricci committed
41
use Getopt::Std;
42
#use strict;
Robert Ricci's avatar
Robert Ricci committed
43
use English;
Mac Newbold's avatar
Mac Newbold committed
44
45
use POSIX;			# for strftime, and sigprocmask and friends
use Fcntl;			# file constants for pidfile
Mac Newbold's avatar
Mac Newbold committed
46
47
48
49
50
use Sys::Syslog;
# Important note about syslog: It defaults to using an inet socket,
# but 'syslogd -s' (the default) doesn't listen for one. So either
# run syslogd without -s, or use setlogsock('unix') before openlog.
# (To get setlocksock: 'use Sys::Syslog qw(:DEFAULT setlogsock);' )
Robert Ricci's avatar
Robert Ricci committed
51

Mac Newbold's avatar
Mac Newbold committed
52
# Do lots of db retries before we fail and die
53
$libdb::DBQUERY_MAXTRIES = 100;
Mac Newbold's avatar
Mac Newbold committed
54

55
56
57
58
59
# Set up some notification throttling
my $mailgap = 15;		# in seconds
my $lastmail = time() - $mailgap + 2; # Send a digest of startup msgs after 2s.
my %msgs = ();

Mac Newbold's avatar
Mac Newbold committed
60
# Number of iterations (roughly, seconds) after which we'll reload
Robert Ricci's avatar
Robert Ricci committed
61
62
# information from the database. This is so we don't end up with information
# that's _too_ out of sync.
63
my $reload_time = 600;
64
my $last_reload = time;
Robert Ricci's avatar
Robert Ricci committed
65

66
67
68
69
# Command line opts.
my $dbtag = "";
my $debug = 0;
my $nolog = 0;
70
71
my $server = "localhost";
my $port   = @BOSSEVENTPORT@;
72
73
74
my $lockfile;
my $pidfile;

Robert Ricci's avatar
Robert Ricci committed
75
76
77
# Process command-line arguments

sub usage {
Mac Newbold's avatar
Mac Newbold committed
78
    print << "END";
79
Usage: $0 [-h] [-d] [-s server] [-p port] [-t dbtag]
80
-h              This message
81
82
83
-d              Turn on debugging output, and do not go into the background
-l              Do not use syslog; send output to stderr. Use with -d only
-t tag          Use only those nodes with matching tag in nodes table
Robert Ricci's avatar
Robert Ricci committed
84
85
-s server       Use specified server, instead of this site's bossnode
-p port	        Use specified port
86
Send SIGHUP to reload database state, or SIGUSR1 to restart completely.
Robert Ricci's avatar
Robert Ricci committed
87
END
88
    exit(1);
Robert Ricci's avatar
Robert Ricci committed
89
90
}

Mac Newbold's avatar
Mac Newbold committed
91
# Only root should run this - it won't work when run as a user...
92
# (Or, let an admin run it if it isn't the real one in /usr/testbed/ )
93
if ($UID && ( $TB eq $REALTB || ! TBAdmin($UID) ) ) {
Mac Newbold's avatar
Mac Newbold committed
94
95
96
    die("Only root can run this script!\n");
}

97
my @args = @ARGV;    # save a copy for restart before we mess with them.
Robert Ricci's avatar
Robert Ricci committed
98
my %opt = ();
99
if (!getopts("ds:p:ht:l",\%opt)) { usage(); }
Robert Ricci's avatar
Robert Ricci committed
100

Mac Newbold's avatar
Mac Newbold committed
101
102
103
104
105
106
if ($opt{h}) {
    exit &usage;
}
if (@ARGV) {
    exit &usage;
}
Robert Ricci's avatar
Robert Ricci committed
107

Mac Newbold's avatar
Mac Newbold committed
108
109
110
111
112
113
if ($opt{s}) {
    $server = $opt{s};
}
if ($opt{p}) {
    $port = $opt{p};
}
114
115
116
117
118
119
120
121
if ($opt{l}) {
    usage()
	if (! $opt{d});
    $nolog = 1;
}
if ($opt{t}) {
    $dbtag = $opt{t};
}
Mac Newbold's avatar
Mac Newbold committed
122
123
124
if ($opt{d}) {
    $debug = 1;
}
Robert Ricci's avatar
Robert Ricci committed
125

126
# Grab some constants into variables
127
my $TBANYMODE    = TBDB_NODEOPMODE_ANY;
128
129
130
my $TBRESET      = TBDB_TBCONTROL_RESET;
my $TBRELOADDONE = TBDB_TBCONTROL_RELOADDONE;
my $TBTIMEOUT    = TBDB_TBCONTROL_TIMEOUT;
Mac Newbold's avatar
Mac Newbold committed
131
132
133
my $PXEBOOT      = TBDB_TBCONTROL_PXEBOOT;
my $BOOTING      = TBDB_TBCONTROL_BOOTING;
my $CHECKGENISUP = TBDB_TBCONTROL_CHECKGENISUP;
134
135
136
my $TBNOTIMEOUT  = TBDB_NO_STATE_TIMEOUT;
my $TBNODESTATE  = TBDB_TBEVENT_NODESTATE;
my $TBNODEOPMODE = TBDB_TBEVENT_NODEOPMODE;
137
138
139
140
141
142
my $TBCONTROL    = TBDB_TBEVENT_CONTROL;
my $TBCOMMAND    = TBDB_TBEVENT_COMMAND;
my $TBREBOOT     = TBDB_COMMAND_REBOOT;
my $TBPOWEROFF   = TBDB_COMMAND_POWEROFF;
my $TBPOWERON    = TBDB_COMMAND_POWERON;
my $TBPOWERCYCLE = TBDB_COMMAND_POWERCYCLE;
143
my $TBISUP       = TBDB_NODESTATE_ISUP;
144
145
146
my $PXEWAIT      = TBDB_NODESTATE_PXEWAIT;
my $PXEWAKEUP    = TBDB_NODESTATE_PXEWAKEUP;
my $PXEBOOTING   = TBDB_NODESTATE_PXEBOOTING;
147
148
149
150
my $TBTIMEOUTREBOOT   = TBDB_STATED_TIMEOUT_REBOOT;
my $TBTIMEOUTNOTIFY   = TBDB_STATED_TIMEOUT_NOTIFY;
my $TBTIMEOUTCMDRETRY = TBDB_STATED_TIMEOUT_CMDRETRY;
my $TB_OSID_MBKERNEL  = TB_OSID_MBKERNEL;
151

152
153
# Special PXEBOOT state machine that all local nodes use.
my $PXEKERNEL	 = "PXEKERNEL";
154

155
156
157
if (!$debug) {
    if ( $TB eq $REALTB ) {
	$pidfile = "/var/run/stated.pid";
Mac Newbold's avatar
Mac Newbold committed
158
    } else {
159
	$pidfile = "$TB/locks/stated.pid";
Mac Newbold's avatar
Mac Newbold committed
160
    }
161
    debug("Using pidfile $pidfile\n");
Mac Newbold's avatar
Mac Newbold committed
162

163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
    if (-e $pidfile) {
	my $otherpid = `cat $pidfile`;
	my $running = `ps -auxww | grep $otherpid | grep -v grep`;
	if ($running ne "") {
	    fatal("Lockfile $pidfile exists, and process $otherpid appears ".
		  "to be running.\n");
	} else {
	    notify("Lockfile exists, but process $otherpid appears to be dead".
		   "\n".
		   "Removing lock file...\n");
	}
	system("rm $pidfile") &&
	    fatal("Couldn't remove $pidfile: $? $!\n");
    }
    # Background
Mac Newbold's avatar
Mac Newbold committed
178
179
180
181
    # We use syslog, so redirect the output to nothing
    if (TBBackGround("/dev/null")) {
	exit(0);
    }
Robert Ricci's avatar
Robert Ricci committed
182
}
Mac Newbold's avatar
Mac Newbold committed
183
# set up syslog
184
185
186
187
188
189
190
191
192
193
194
195
if (! $nolog) {
    openlog("stated","pid",$TBLOG);
}

if (defined($pidfile)) {
    sysopen(PIDFILE, $pidfile, O_WRONLY | O_EXCL | O_CREAT) ||
	fatal("Couldn't create '$pidfile': $? $!\n");
    print PIDFILE "$$";
    close PIDFILE;
    # If I make it to here, I'll need to clean up the lock file
    $lockfile = $pidfile;
}
Robert Ricci's avatar
Robert Ricci committed
196

197
198
199
# Change my $0 so that it is easier to see in a ps/top
$0 = "$0";

Robert Ricci's avatar
Robert Ricci committed
200
my $URL = "elvin://$server";
Mac Newbold's avatar
Mac Newbold committed
201
202
203
if ($port) {
    $URL .= ":$port";
}
Robert Ricci's avatar
Robert Ricci committed
204

Mac Newbold's avatar
Mac Newbold committed
205
# Connect to the event system, and subscribe the the events we want
Robert Ricci's avatar
Robert Ricci committed
206
my $handle = event_register($URL,0);
Mac Newbold's avatar
Mac Newbold committed
207
208
209
if (!$handle) {
    fatal("Unable to register with event system\n");
}
Robert Ricci's avatar
Robert Ricci committed
210
211

my $tuple = address_tuple_alloc();
Mac Newbold's avatar
Mac Newbold committed
212
213
214
if (!$tuple) {
    fatal("Could not allocate an address tuple\n");
}
Robert Ricci's avatar
Robert Ricci committed
215

216
217
218
%$tuple = ( objtype => join(",",
			    $TBNODESTATE, $TBNODEOPMODE,
			    $TBCONTROL, $TBCOMMAND) );
219

Robert Ricci's avatar
Robert Ricci committed
220
if (!event_subscribe($handle,\&handleEvent,$tuple)) {
Mac Newbold's avatar
Mac Newbold committed
221
    fatal("Could not subscribe to events\n");
Robert Ricci's avatar
Robert Ricci committed
222
223
224
225
}

# Read in the pre-existing node states, and timeout and valid transition
# information from the database
226
227
228
229
my %timeouts  = getTimeouts();
my %valid     = getValid();
my %modeTrans = getModeTrans();
my %triggers  = getTriggers();
230
my %nodes     = readStates();
231
my %timeouttag= ();
232
if ($debug) { qshow(); }
Robert Ricci's avatar
Robert Ricci committed
233
234
235

# Gets set if a reload of state from the database should happen.
my $do_reload = 0;
236
237
my $sigrestart= 0;
my $sigcleanup= 0;
Robert Ricci's avatar
Robert Ricci committed
238
239
240
241

# Make the daemon reload database state on a sighup - but I'm worried
# about what would happen if we tried to do this mid-loop. So, we'll
# just set a flag and do it when we're done with our current pass.
242
243
$SIG{HUP}  = sub { info("SIGHUP - Reloading DB state\n"); $do_reload = 1; };

Mac Newbold's avatar
Mac Newbold committed
244
# Set up other signals.
245
246
247
248
249
250
251
$SIG{USR1} = \&restart_wrap;
$SIG{USR2} = \&cleanup_wrap;
$SIG{INT}  = \&cleanup_wrap;
$SIG{QUIT} = \&cleanup_wrap;
$SIG{ABRT} = \&cleanup_wrap;
$SIG{TERM} = \&cleanup_wrap;
$SIG{KILL} = \&cleanup_wrap;
Robert Ricci's avatar
Robert Ricci committed
252

253
254
255
# Track if I handled an event or not
my $event_count = 0;

256
257
# Control how long I block while waiting for events
my $blockwait=0;
258
my $nextdeadline=0;
259
260
my $mailqueue=0;

261
262
263
264
265
notify("Stated starting up\n");

sub process_event_queue() {
    $event_count=0;
    my $lastcount=-1;
266
267
268
269
270
271
272
273
274
275
276
277
278
    my $wait;
    my $now = time();
    debug("Polling - mq=$mailqueue bw=$blockwait\n");
    if ( $mailqueue == 0) {
	# no messages waiting...
	if ($blockwait) {
	    # we can wait a long time - nothing else will happen
	    # until we get an event, or get woken up by a signal
	    $wait = 600;
	} else {
	    # only wait until the next deadline...
	    if ($nextdeadline > 0) {
		$wait = $nextdeadline - $now;
279
280
	    } else {
		$wait = 0;
281
282
283
284
285
	    }
	}
    } else {
	# mail is waiting. Only block until it is time to send it.
	$wait = $lastmail + $mailgap - $now;
Mac Newbold's avatar
Mac Newbold committed
286
	debug("Now $now, mailgap $mailgap, last $lastmail ==> wait $wait\n");
287
288
289
    }
    if ($wait < 0) { debug("Wait was $wait!\n"); $wait=0; }
    my $finish = $now + $wait;
Mac Newbold's avatar
Mac Newbold committed
290
291
    while (($event_count != $lastcount || $wait > 0) &&
	   !($sigrestart || $sigcleanup || $do_reload)) {
292
	$lastcount = $event_count;
Mac Newbold's avatar
Mac Newbold committed
293
294
	# Don't block if we got a signal!
	if ($wait<=0 || $sigrestart || $sigcleanup || $do_reload) {
295
296
	    event_poll($handle);
	} else {
297
	    #debug("Using blocking event poll - $wait seconds\n");
298
299
300
301
302
	    # timeout param is in milliseconds, so multiply
	    event_poll_blocking($handle, $wait*1000);
	    $now = time();
	    # subtract seconds elapsed from my wait time
	    $wait = $finish - $now;
303
	    #debug("Finished blocking event poll - $wait seconds remain\n");
304
	    if ($event_count > 0 &&
Mac Newbold's avatar
Mac Newbold committed
305
306
		(qsize() > 0 || $mailqueue ||
		 $sigrestart || $sigcleanup || $do_reload)) {
307
308
309
310
311
312
313
		$blockwait=0;
		$wait=0;
		#debug("Cancelling wait - timeouts/msgs waiting, or HUP'd\n");
		#debug("---End Blocking Wait ---\n");
	    }
	}
	#debug("Wait is $wait\n");
314
315
    }
    if ($event_count > 0) {
Mac Newbold's avatar
Mac Newbold committed
316
	debug("Handled $event_count event(s).\n");
317
318
    }
}
Robert Ricci's avatar
Robert Ricci committed
319

320
# Now, we just poll for events, and watch for timeouts
Robert Ricci's avatar
Robert Ricci committed
321
while (1) {
Mac Newbold's avatar
Mac Newbold committed
322
    my $now = time();
323
324
325
326
    my ($deadline,$node);

    # Check for nodes that have passed their timeout
    if (!qhead($deadline,$node)) {
327
	info("HEAD: $node in ".($deadline-$now).", queue=".qsize()."\n");
328
329
	while ($now >= $deadline && $node ne "") {
	    qpop($deadline,$node);
330
	    info("POP: $node in ".($deadline-$now).", queue=".qsize()."\n");
331
	    handleCtrlEvent($node,$TBTIMEOUT);
332
333
334
335
	    if (0) { qshow(); }
	    if (qhead($deadline,$node)) {
		$deadline=0; $node="";
	    }
336
	}
337
338
    } else {
	$deadline=0;
339
    }
340
    $nextdeadline = $deadline;
341

342
343
344
345
    if (qsize()==0) {
	$blockwait=1;
	debug("---Blocking wait okay---\n");
    }
Mac Newbold's avatar
Mac Newbold committed
346

Mac Newbold's avatar
Mac Newbold committed
347
348
349
350
    if ($do_reload || ($now - $last_reload > $reload_time)) {
	reload();
	$do_reload = 0;
    }
Mac Newbold's avatar
Mac Newbold committed
351

Mac Newbold's avatar
Mac Newbold committed
352
353
    # Send any messages in the queue if it is time
    notify("",1);
Mac Newbold's avatar
Mac Newbold committed
354

355
356
357
    if ($sigrestart) { restart(); }
    if ($sigcleanup) { cleanup(); }

358
    process_event_queue;
Robert Ricci's avatar
Robert Ricci committed
359
360
}

Mac Newbold's avatar
Mac Newbold committed
361
362
exit(0);

Robert Ricci's avatar
Robert Ricci committed
363
# Read the current states of nodes from the database
364
sub readStates(;@) {
Mac Newbold's avatar
Mac Newbold committed
365
366
367
368
369
370
    my %oldnodes = @_;

    # Guard against undefined variable warnings
    if (! defined(%oldnodes)) {
	%oldnodes = ();
    }
371

Mac Newbold's avatar
Mac Newbold committed
372
373
374
    #debug("readStates called\n");
    my $result = DBQueryFatal("SELECT node_id, eventstate, " .
			      "state_timestamp, op_mode, " .
375
			      "op_mode_timestamp, stated_tag FROM nodes ".
376
			      "where node_id not like 'sh%'");
Mac Newbold's avatar
Mac Newbold committed
377
378

    my %nodes;
379
    while (my ($node_id, $state, $timestamp, $mode, $mode_timestamp, $tag)
Mac Newbold's avatar
Mac Newbold committed
380
	   = $result->fetchrow()) {
381
382
383
384
385
386
387
388
389
390
391
	$nodes{$node_id}{"tag"} = (defined($tag) ? $tag : "");

	if ($dbtag ne "" && $dbtag eq $nodes{$node_id}{"tag"}) {
	    info("This stated will work on $node_id\n");
	}
	if ($dbtag eq "" && $dbtag ne $nodes{$node_id}{"tag"}) {
	    info("This stated will *NOT* work on $node_id\n");
	}
	if ($dbtag ne $nodes{$node_id}{"tag"}) {
	    remTimeout($node_id);
	}
Mac Newbold's avatar
Mac Newbold committed
392

393
	#
Mac Newbold's avatar
Mac Newbold committed
394
395
396
397
	# If there's an entry in oldnodes for this node, and it
	# hasn't changed state or time, use the old entry (so that
	# we don't lose information about which nodes we've already
	# notified the ops about, etc.)
398
	#
Mac Newbold's avatar
Mac Newbold committed
399
400
401
402
403
	if ($oldnodes{$node_id} && $state && $timestamp &&
	    ($oldnodes{$node_id}{state} eq $state) &&
	    ($oldnodes{$node_id}{mode} eq $mode) &&
	    ($oldnodes{$node_id}{timestamp} == $timestamp)) {
	    $nodes{$node_id} = $oldnodes{$node_id};
404
	} else {
Mac Newbold's avatar
Mac Newbold committed
405
406
407
408
	    $nodes{$node_id}{state}          = $state;
	    $nodes{$node_id}{timestamp}      = $timestamp;
	    $nodes{$node_id}{mode}           = $mode;
	    $nodes{$node_id}{mode_timestamp} = $mode_timestamp;
409
410
411
	    $nodes{$node_id}{notified}       = 0;
	    $nodes{$node_id}{timedout}       = 0;
	    $nodes{$node_id}{noretry}        = 0;
412
	    # Is there a timeout? If so, set it up!
413
414
415
	    if ($dbtag eq $nodes{$node_id}{"tag"}) {
		setTimeout($mode,$state,$node_id,$timestamp);
	    }
416
	}
Mac Newbold's avatar
Mac Newbold committed
417
418
    }
    return %nodes;
Robert Ricci's avatar
Robert Ricci committed
419
420
421
422
423
424
}

#
# Read timeouts for various states from the database
#
sub getTimeouts() {
Mac Newbold's avatar
Mac Newbold committed
425
426
427
    #debug("getTimeouts called\n");
    my $result = DBQueryFatal("SELECT op_mode, state, timeout, action " .
			      "FROM state_timeouts");
Robert Ricci's avatar
Robert Ricci committed
428

Mac Newbold's avatar
Mac Newbold committed
429
430
431
432
433
    my %timeouts;
    while (my ($op_mode, $state, $timeout, $action) = $result->fetchrow()) {
	$timeouts{$op_mode}{$state} = [ $timeout, $action ];
    }
    return %timeouts;
Robert Ricci's avatar
Robert Ricci committed
434
435
436
437
438
439
}

#
# Read the list of valid state transitions from the database
#
sub getValid() {
Mac Newbold's avatar
Mac Newbold committed
440
441
442
    #debug("getValid called\n");
    my $result = DBQueryFatal("SELECT op_mode, state1, state2 " .
			      "FROM state_transitions");
Robert Ricci's avatar
Robert Ricci committed
443

Mac Newbold's avatar
Mac Newbold committed
444
445
446
447
448
    my %valid;
    while (my ($mode,$state1, $state2) = $result->fetchrow()) {
	$valid{$mode}{$state1}{$state2} = 1;
    }
    return %valid;
Robert Ricci's avatar
Robert Ricci committed
449
450
}

451
452
453
454
#
# Read the list of valid mode transitions from the database
#
sub getModeTrans() {
Mac Newbold's avatar
Mac Newbold committed
455
    #debug("getModeTrans called\n");
Mac Newbold's avatar
Mac Newbold committed
456
    my $result =
Mac Newbold's avatar
Mac Newbold committed
457
458
459
460
461
462
463
464
465
466
467
      DBQueryFatal("SELECT op_mode1, state1, op_mode2, state2 " .
		   "FROM mode_transitions order by op_mode1,state1");

    my %modeTrans;
    while (my ($mode1,$state1, $mode2, $state2) = $result->fetchrow()) {
	if (!defined($modeTrans{"$mode1:$state1"})) {
	    $modeTrans{"$mode1:$state1"}= ["$mode2:$state2"];
	} else {
	    my @l = @{$modeTrans{"$mode1:$state1"}};
	    push(@l, "$mode2:$state2");
	    $modeTrans{"$mode1:$state1"}= \@l;
468
	}
Mac Newbold's avatar
Mac Newbold committed
469
470
    }
    return %modeTrans;
471
472
473
474
475
476
}

#
# Read the list of states which trigger an action
#
sub getTriggers() {
477
    debug("getTriggers called\n");
Mac Newbold's avatar
Mac Newbold committed
478

479
480
    debug("anymode ==> '$TBANYMODE'\n");

Mac Newbold's avatar
Mac Newbold committed
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
    # A note about triggers:
    #
    # "per-node" triggers only affect their specific node in a
    # particular mode/state, and are run first of all. "global"
    # triggers are triggers for a given mode/state that affect all
    # nodes, and are run after any per-node triggers. "Any-mode"
    # triggers are tied to a state, and occur in that state in any
    # mode. The any-mode triggers are over-ridden by global triggers,
    # and if an "Any-mode" trigger for state XYZ exists as well as a
    # global trigger for mode FOOBAR state XYZ, then when I arrive in
    # XYZ any per-node triggers will be run. Then, if I'm in mode
    # FOOBAR, only the global trigger will run. If I'm in any other
    # mode, only the any-mode trigger will run.

    # (our "*" is stored as $TBANYMODE)
    # Per-node triggers have a specific node_id
    # Global triggers have "*" as the node_id
    # Any-mode triggers have "*" as the mode, and can be global or per-node

    # Grab global triggers (including any-mode)
    my $result =
Mac Newbold's avatar
Mac Newbold committed
502
      DBQueryFatal("SELECT op_mode, state, trigger " .
503
504
		   "FROM state_triggers where node_id='$TBANYMODE' ".
		   "order by op_mode,state");
Mac Newbold's avatar
Mac Newbold committed
505
    my %t;
506
    while (my ($mode, $state, $trig) = $result->fetchrow()) {
Mac Newbold's avatar
Mac Newbold committed
507
508
509
	my @trigs = split(/\s*,\s*/,$trig);
	$t{"$mode:$state"} = \@trigs;
	debug("trig($mode:$state)\t => ".join(',',@trigs)."\n");
510
511
    }

Mac Newbold's avatar
Mac Newbold committed
512
513
    # Grab per-node triggers (including any-mode)
    $result =
514
515
516
517
518
519
520
      DBQueryFatal("SELECT node_id, op_mode, state, trigger " .
		   "FROM state_triggers where node_id!='$TBANYMODE' ".
		   "order by op_mode,state");
    while (my ($n, $mode, $state, $trig) = $result->fetchrow()) {
	my @trigs = split(/\s*,\s*/,$trig);
	$t{"$n:$mode:$state"} = \@trigs;
	debug("trig($n:$mode:$state)\t => ".join(',',@trigs)."\n");
Mac Newbold's avatar
Mac Newbold committed
521
    }
522

Mac Newbold's avatar
Mac Newbold committed
523
524
    debug(hash_recurse(%t));

Mac Newbold's avatar
Mac Newbold committed
525
    return %t;
526
527
}

Robert Ricci's avatar
Robert Ricci committed
528
529
530
531
#
# Gets called for every event that we recieve
#
sub handleEvent($$$) {
Mac Newbold's avatar
Mac Newbold committed
532
533
534
535
536
537
    my ($handle,$notification,$data) = @_;
    my $objtype = event_notification_get_objtype($handle,$notification);
    my $objname = event_notification_get_objname($handle,$notification);
    my $eventtype = event_notification_get_eventtype($handle,$notification);

    $event_count++;
Mac Newbold's avatar
Mac Newbold committed
538

539
    #
Mac Newbold's avatar
Mac Newbold committed
540
541
542
    # For readability, only do this on the main stated. This will print all
    # events, which gets cumbersome with debugging versions, so we'll print
    # only applicable events in debug versions, after we decide they apply.
543
    #
544
    if ($dbtag eq "") {
545
546
	debug("Got an event: ($objtype,$objname,$eventtype)\n");
    }
Mac Newbold's avatar
Mac Newbold committed
547
548
549
550

    #
    # Check to see if another instance is supposed to be handling this node
    #
551
552
    if ($objtype ne $TBCOMMAND) {
	my $node = $objname;
Mac Newbold's avatar
Mac Newbold committed
553

554
555
556
557
558
559
560
	#
	# If we have never seen this node, reload.
	#
	if (! defined($nodes{$node})) {
	    reload();

	    # Still not defined, someone screwed up! This could end up
Mac Newbold's avatar
Mac Newbold committed
561
	    # churning via reload(). Bad.
562
	    if (! defined($nodes{$node})) {
563
564
565
566
		notify("Got $objtype/$eventtype for nonexistent $node!\n");
		return;
	    }
	}
Mac Newbold's avatar
Mac Newbold committed
567

568
569
	#
	# If a stated_tag was specified on the command line, ignore those
Mac Newbold's avatar
Mac Newbold committed
570
	# nodes that do not match.
571
	#
Mac Newbold's avatar
Mac Newbold committed
572
	#debug("dbtag='$dbtag', node $node='".$nodes{$node}{"tag"}."'\n");
573
574
575
576
577
578
579
580
581
582
	if ($dbtag ne $nodes{$node}{"tag"}) {
	    # Record when main stated ignores a node.
	    info("Got $objtype/$eventtype for $node, which is not mine\n")
		if ($dbtag eq "");
	    return;
	}
	if (!checkDBRedirect($node)) {
	    info("Got $objtype/$eventtype for $node, which is not mine\n");
	    return;
	}
Mac Newbold's avatar
Mac Newbold committed
583
    }
Mac Newbold's avatar
Mac Newbold committed
584
585
586
587
588

    #
    # If this is a debugging version, then this event is for one of my
    # nodes, so I can print out the event now. (Main version prints earlier.)
    #
589
590
591
    if ($dbtag ne "") {
	debug("Got an event: ($objtype,$objname,$eventtype)\n");
    }
Mac Newbold's avatar
Mac Newbold committed
592
593
594

 SWITCH: for ($objtype) {

Mac Newbold's avatar
Mac Newbold committed
595
596
597
598
599
600
601
602
603
604
605
606
607
608
	(/$TBNODESTATE/) && do {
	    stateTransition($objname,$eventtype);
	    last;
	};
	(/$TBNODEOPMODE/) && do {
	    opModeTransition($objname,$eventtype);
	    notify("Use of deprecated event TBNODEOPMODE:\n".
		   "$objname->$eventtype\n");
	    last;
	};
	(/$TBCONTROL/) && do {
	    handleCtrlEvent($objname,$eventtype);
	    last;
	};
609
610
611
612
	(/$TBCOMMAND/) && do {
	    handleCommand($objname,$eventtype);
	    last;
	};
613

Mac Newbold's avatar
Mac Newbold committed
614
    }
615
616
617
618
619

}

sub stateTransition($$) {

620
    my ($node,$newstate) = @_;
Robert Ricci's avatar
Robert Ricci committed
621

622
623
    # Check for invalid transitions
    my ($oldstate, $mode);
624
625
626
    $oldstate = $nodes{$node}{state};
    $mode     = $nodes{$node}{mode};

627
628
629
    if ($oldstate && $mode && $valid{$mode} && $valid{$mode}{$oldstate} &&
	!$valid{$mode}{$oldstate}{$newstate}) {
	notify("Invalid transition for node $node from $mode/$oldstate " .
630
	       "to $newstate\n");
631
    }
Robert Ricci's avatar
Robert Ricci committed
632

633
634
635
636
    my $now = time();
    $nodes{$node}{state}     = $newstate;
    $nodes{$node}{timestamp} = $now;
    $nodes{$node}{notified}  = 0;
637

638
639
640
    info("$node: $mode/$oldstate => $mode/$newstate\n");
    DBQueryFatal("UPDATE nodes SET eventstate='$newstate', " .
		 "state_timestamp='$now' WHERE node_id='$node'");
641

642
643
644
645
    # Before we set the timeout (overwriting any current ones), we need
    # to check if we had a pending command
    if (qfind($node) &&
	$timeout_tag{$node} =~ /^$TBCOMMAND:/) {
646
        debug("TimeoutTag = '$timeout_tag{$node}'\n");
647
	my ($str,$cmd) = split(":",$timeout_tag{$node});
648
	debug("str=$str\tcmd=$cmd\tTBREBOOT=$TBREBOOT\tstate=$newstate\n");
649
	if ($cmd eq $TBREBOOT) {
650
	    if ($newstate eq TBDB_NODESTATE_SHUTDOWN ) {
651
652
653
		info("$node: $TBREBOOT success\n");
		# Timeout will get cleared below by setTimeout call
	    } else {
654
655
		notify("$node: $TBREBOOT in progress, but got state ".
		       "$newstate instead of ".TBDB_NODESTATE_SHUTDOWN."!\n");
656
657
658
659
660
	    }
	#} elsif ($cmd eq $FOO ) {
	    # Add more here...
	} else {
	    notify("$node: Unknown command timeout '$timeout_tag{$node}' ".
661
		   "found at $mode/$newstate\n");
662
663
664
	}
    }

665
666
667
668
669
    #
    # Check if this state has a timeout, and if so, put it in the queue.
    # Note that any opmode transition below will replace (or remove) this
    # timeout if appropriate.
    #
670
671
    setTimeout($mode,$newstate,$node,$now);

Mac Newbold's avatar
Mac Newbold committed
672
673
674
675
676
677
678
    # Check if this state has any triggers
    my @nodetrigs = GetNodeTriggerList($node,$mode,$newstate,1);
    my @trigs = GetNodeTriggerList($node,$mode,$newstate);
    if (@trigs > 0) {
	debug("Running triggers: ".join("/",@trigs)."\n");
	foreach ( @trigs) {
	    my $trig = $_;
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
	    /^SCRIPT:([-\w\/]+)$/ && do {
		#
		# Run a script. No arguments at the moment.
		#
		my $script = $1;

		#
		# If the path is absolute, run it. Otherwise it has to
		# come from the sbin directory.
		#
		if (! ($script =~ /^\//)) {
		    $script = "$TB/sbin/$script";
		}
		info("$node: Running $script.\n");
		system("$script &");
		next;
	    };
Mac Newbold's avatar
Mac Newbold committed
696
	    /^$PXEBOOT$/ && do {
697
		#
Mac Newbold's avatar
Mac Newbold committed
698
699
700
701
702
703
704
705
706
707
708
709
		# See if we jumped into the PXEBOOT kernel. Bootinfo
		# will send PXEBOOTING every time a node contacts it,
		# which is our indicator that the node is in the first
		# phase of booting. At this point we want to switch
		# state machines since the entire boot process is
		# governed by a single state machine that is
		# independent of the OS that the node will eventually
		# boot.  Rather then encode that in each state
		# machine, we use a special machine with a defined
		# entrypoint (PXEBOOTING) and a defined exitpoint
		# (BOOTING). See below for where we jump back out of
		# this state machine.
710
		#
Mac Newbold's avatar
Mac Newbold committed
711
712
713
714
		# Jumped in. We need to change the opmode so that
		# the state transitions are legal. We do not
		# bother to save the old opmode since we can
		# figure it out later when we leave.
715
		#
Mac Newbold's avatar
Mac Newbold committed
716
717
718
719
720
		debug("Running $PXEBOOT trigger\n");
		if ($mode ne $PXEKERNEL) {
		    info("$node: Forcing mode transition into $PXEKERNEL!\n");
		    opModeTransition($node, $PXEKERNEL, 1);
		    $mode=$PXEKERNEL;
721
		}
Mac Newbold's avatar
Mac Newbold committed
722
723
724
725
726
727
728
		next;
	    };
	    /^$BOOTING$/ && do {
		#
		# See if we are in the right mode/osid.
		#
		my ($bootosid,$bootopmode) = TBBootWhat($node, $debug);
729

Mac Newbold's avatar
Mac Newbold committed
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
		info("$node: BootWhat says $bootosid (mode $bootopmode).\n");
		DBQueryFatal("update nodes set osid='$bootosid' ".
			     "where node_id='$node'");

		if ($bootopmode ne $mode) {
		    if ($mode eq $PXEKERNEL) {
			#
			# If we came from PXE boot, then we have to
			# jump out of the PXEKERNEL state machine into
			# whatever state machine is current for the
			# node. Since we came through bootinfo, we
			# know that the node is doing what it is
			# supposed to, and that this change matches
			# what the node is booting.
			#
			info("$node: Forcing mode transition out of $PXEKERNEL!\n");
			opModeTransition($node, $bootopmode, 1);
			$mode=$bootopmode;
		    }
		    elsif ($oldstate eq TBDB_NODESTATE_ISUP) {
			#
			# Skipped SHUTDOWN, which could result in a
			# missed opmode transition. Can this really
			# happen anymore?
			#
			info("$node: Came from ISUP! ".
			     "Checking for mode transition\n");
			my $query_result =
			  DBQueryWarn("select next_op_mode from nodes ".
				      "where node_id='$node'");
			my ($nextmode) = $query_result->fetchrow();
			if ($nextmode) {
			    info("$node: Forcing mode transition!\n");
			    opModeTransition($node, $nextmode, 1);
			    $mode=$nextmode;
			}
		    }
		    else {
			my $str = "$node is running $bootosid, but in ".
			  "mode $mode\ninstead of mode $bootopmode!\n";
			
			if ($bootopmode eq "RELOAD") {
			    #
			    # For now, only force if we're going into
			    # reload mode, so we don't get stuck
			    # looping in reloading.  Can this happen
			    # anymore?
			    #
			    DBQueryFatal("UPDATE nodes SET ".
					 "op_mode='$bootopmode', ".
					 "op_mode_timestamp=".
					 "unix_timestamp(now()) ".
					 "WHERE node_id='$node'");
			    $nodes{$node}{mode} = $bootopmode;
			    $nodes{$node}{mode_timestamp} = $now;
			    $str .= "Forced op_mode to $bootopmode.\n";
			}
			notify($str);
		    }
		}
		next;
	    };
	    /^$CHECKGENISUP$/ && do {
		checkGenISUP($node);
		next;
	    };
796
	    /^$TBRESET$/ && do {
797
798
799
		# We successfully booted, so clear some flags
		$nodes{$node}{noretry}   = 0;
		$nodes{$node}{timedout}  = 0;
800
801
802
803
804
805
806
807
808
809
810
811
812
		# Check if we really need to do a reset
		my $r = DBQueryWarn("select osid,def_boot_osid from nodes ".
				    "where node_id='$node'");
		my ($osid,$defosid) = $r->fetchrow();
		if ($osid ne $defosid) {
		    handleCtrlEvent($node,$trig);
		}
		next;
	    };
	    /^$TBRELOADDONE$/ && do {
		handleCtrlEvent($node,$trig);
		next;
	    };
813
814
815
816
817
818
819
820
	    /^$TBISUP$/ && do {
		info("$node: Triggered $TBISUP\n");
		EventSendWarn(host      => $BOSSNODE ,
			      objtype   => TBDB_TBEVENT_NODESTATE ,
			      eventtype => TBDB_NODESTATE_ISUP ,
			      objname   => $node);
		next;
	    };
821
	    notify("Unknown trigger '$trig' for $node in $mode/$newstate!\n");
822
	}
Mac Newbold's avatar
Mac Newbold committed
823
824
825
826
827
828
829
	# Clear any of the node triggers that we ran.
	# (Don't clear all of them, because some of the triggers we ran
	# may have caused others to be set, and we don't want to nuke them.)
	if (@nodetrigs > 0) {
	    debug("Clearing node triggers: ".join("/",@nodetrigs)."\n");
	    ClearNodeTrigger($node,$mode,$newstate,@nodetrigs);
	}
830
    }
831

832
833
834
835
836
837
838
839
    # Check if this state can trigger a mode transition
    if (defined($modeTrans{"$mode:$newstate"})) {
	info("$node: Checking for mode transition\n");
	my $r = DBQueryWarn("select next_op_mode from nodes ".
			    "where node_id='$node'");
	my ($nextmode) = $r->fetchrow();
	if ($nextmode) {
	    opModeTransition($node,$nextmode);
Mac Newbold's avatar
Mac Newbold committed
840
841
842
	} else {
	    debug("No next mode.\n");
	}
843
844
    }
}
845

846
sub opModeTransition($$;$) {
Mac Newbold's avatar
Mac Newbold committed
847

848
849
    my ($node,$newmode,$force) = @_;
    if (!defined($force)) { $force = 0; }
Mac Newbold's avatar
Mac Newbold committed
850

851
    info("$node: Mode change to $newmode requested ($force)\n");
Mac Newbold's avatar
Mac Newbold committed
852

853
854
    # Check for invalid transitions
    my ($oldstate, $mode, $nextstate);
855
856
857
    $oldstate = $nodes{$node}{state};
    $mode     = $nodes{$node}{mode};

858
    if (defined($modeTrans{"$mode:$oldstate"}) || $force) {
859
	if (!$force) {
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
	    debug("Mode Transition check:\n");
	    my $translist = join(",",@{$modeTrans{"$mode:$oldstate"}});
	    #debug("translist=$translist\n");
	    #debug("splitlist=".join(", ",split(/[:,]/,$translist))."\n");
	    my %trans = split(/[:,]/,$translist);
	    debug("Valid transitions from $mode/$oldstate are:\n");
	    foreach my $k (sort keys %trans) {
		debug("$k => $trans{$k}\n");
	    }
	    if (defined($trans{$newmode})) {
		$nextstate=$trans{$newmode};
	    } else {
		notify("Invalid mode transition for $node from ".
		       "$mode/$oldstate to $newmode!\n");
	    }
875
876
	}
    } else {
877
	notify("Invalid mode transition for $node from $mode/$oldstate: ".
878
879
	       "Not a valid mode transition state!\n");
    }
Mac Newbold's avatar
Mac Newbold committed
880
881
882
    if (!$nextstate) {
	$nextstate=$oldstate;
    }
Mac Newbold's avatar
Mac Newbold committed
883

884
885
886
887
888
889
    my $now = time();
    $nodes{$node}{state}     = $nextstate;
    $nodes{$node}{timestamp} = $now;
    $nodes{$node}{mode}           = $newmode;
    $nodes{$node}{mode_timestamp} = $now;
    $nodes{$node}{notified}       = 0;
Mac Newbold's avatar
Mac Newbold committed
890

891
892
893
894
895
    info("$node: $mode/$oldstate => $newmode/$nextstate\n");
    DBQueryFatal("UPDATE nodes SET eventstate='$nextstate', ".
		 "next_op_mode='', op_mode='$newmode', ".
		 "state_timestamp='$now', ".
		 "op_mode_timestamp='$now' WHERE node_id='$node'");
Mac Newbold's avatar
Mac Newbold committed
896
897
898
899

    # Check if this state has a timeout, and if so, put it in the queue
    setTimeout($newmode,$nextstate,$node,$now);

900
901
902
903
}

sub handleCtrlEvent($$) {
    my ($node,$event) = @_;
904

905
    info("CtrlEvent: $node, $event\n");
906

907
908
    foreach ($event) {
	/^$TBRESET$/ && do {
909
910
	    #
	    # Clear next_boot_path with os_select.
Mac Newbold's avatar
Mac Newbold committed
911
	    #
912
	    $cmd = "$osselect -d -c -1 $node";
913
	    system($cmd) and
914
		notify("$node/$event: Could not clear next_boot_path!\n");
Mac Newbold's avatar
Mac Newbold committed
915
916

	    info("Performed $TBRESET for $node\n");
917
918
919
920
921
922
923
924
925
926
	    next;
	};
	/^$TBRELOADDONE$/ && do {
	    info("Clearing reload info for $node\n");
	    DBQueryFatal("delete from current_reloads where node_id='$node'");
	    my ($pid,$eid);
	    NodeidToExp($node,\$pid,\$eid);
	    if (($pid eq NODERELOADING_PID) && ($eid eq NODERELOADING_EID)) {
		DBQueryFatal("delete from scheduled_reloads ".
			     "where node_id='$node'");
927
928
		DBQueryFatal("delete from reserved where node_id='$node'");
		info("Released $node from $pid/$eid\n");
929
930
931
932
	    }
	    next;
	};
	/^$TBTIMEOUT$/ && do {
933
934
935
936
	    my ($mode,$state) = split(":",$timeout_tag{$node});
	    delete($timeout_tag{$node});
	    my $curstate = $nodes{$node}{state};
	    my $curmode = $nodes{$node}{mode};
937
	    my ($timeout,$action);
938
939
940
941
942
	    if (!defined($nodes{$node}{notified})) {
		$nodes{$node}{notified}=0;
	    }
	    $nodes{$node}{notified}++;
	    my $notified = $nodes{$node}{notified};
943
944
	    $nodes{$node}{timedout}++;
	    my $timedout = $nodes{$node}{timedout};
945
946
947
948
	    if ($mode && $state && $timeouts{$mode} &&
		$timeouts{$mode}{$state}) {
		($timeout, $action) = @{$timeouts{$mode}{$state}};
	    }
949
950
	    if ($mode eq $TBCOMMAND) {
		# It is a command, not a true state
951
		if ($action eq $TBTIMEOUTCMDRETRY) {
952
		    # Retry the command
953
		    notify("$node: Command $state, retry #$timedout\n");
954
		    # notify in case we get in a retry loop...
955
		    handleCommand($node,$state,$timedout,1);
956
957
958
959
960
961
		} else {
		    notify("$node: Unknown timeout action for ".
			   "$mode/$state: '$action'\n");
		}
		next;
	    }
962
963
964
965
966
967
968

	    #
	    # Trash. This stuff should not be encoded this way, but I have
	    # no idea how timeouts, TBCOMMAND, and actions interact.
	    #
	    if ($curstate eq $PXEWAKEUP) {
		my $optarg = ($debug ? "-d " : "");
Mac Newbold's avatar
Mac Newbold committed
969

970
971
		if ($timedout < 3) {
		    #
Mac Newbold's avatar
Mac Newbold committed
972
973
		    # Try again.
		    #
974
975
976
977
978
979
		    info("Node $node has timed out $timedout times in ".
			 "$PXEWAKEUP!\n".
			 "Sending it a another wakeup command\n");
		}
		else {
		    #
Mac Newbold's avatar
Mac Newbold committed
980
981
		    # Failed too many times, power cycle instead.
		    #
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
		    notify("Node $node has timed out $timedout times in ".
			   "$PXEWAKEUP!\n".
			   "Sending it a reboot command\n");
		    $optarg .= "-k";
		}
		my $cmd = "$nodereboot -r $optarg $node";
		debug("$cmd\n");
		system("date 2>&1 >> $rebootlog");
		system("$cmd 2>&1 >> $rebootlog &") and
		    notify("PXEWAKEUP retry: ".
			   "Command '$cmd' failed, error $?: $!\n");

		next;
	    }

997
998
999
1000
	    info("Node $node has timed out in state $mode/$state".
		 ($action ne "" ? "\n\tRequested action $action." : "").
		 "\n");

For faster browsing, not all history is shown. View entire blame