Commit 94ccc3f4 authored by Leigh Stoller's avatar Leigh Stoller

A bunch of boot changes. Read carefully.

* Add boot_errno to the nodes table so that nodes can report in a
  subcode to indicate what went wrong. At present, we do not report any
  real error codes; that is going to take some time to work out since it
  will reqiure a bunch of changes to the boot scripts.

* Add new table node_bootlogs to store logs provided by the nodes. Not
  a full console log, but a log of the tmcd client side part. We can
  make it a full log if we want though; just means mucking about with
  the boot phase a bit.

* Add new state transition to NORMALv2 and PCVM state machines. "TBFAILED"
  is a new state that is sent (after TBSETUP) if a node fails somewhere in
  the tmcd client side.

* Change TBNodeStateWait() to take a list of states (instead of single
  state) and an optional pass by reference parameter to return the actual
  state that the node landed in. Change all calls to TBNodeStateWait() of
  course.

* Change os_setup (and libreboot in wait mode) to look for both TBFAILED
  and ISUP. If a TBFAILED event is seen, we can terminate the wait early
  and not retry os_setup on physical nodes (although still retry virtual
  nodes). The nice thing about this is that the wait should terminate much
  earlier (rather then waiting for timeout), especially for virtual nodes
  which can take a really long time when there are a couple of hundred.

* Add new routines dobooterrno() and dobootlog() to tmcd. Bump version
  number and increase the buffer size to allow for the larger packets that
  a console log wikk generate (added MAXTMCDPACKET variable, set to 0x4000).

* Add new -f option to tmcc to specify a datafile to send along as the last
  argument to tmcd. This is more pleasing then trying to send a console log
  in on the command line. For example: "tmcc -f /tmp/log BOOTLOG" will send
  a BOOTLOG command along with the contents of /tmp/log.

  Also close the write side of the pipe so that server sees EOF on
  read. See aside comment below.

* Changes to rc.bootsetup:
     1. Use perl tricks to capture all output, duping to the console and to
        a log file in /var/emulab/logs.
     2. On any error, send a status code (boot_errno) and the bootlog to
        tmcd.
     3. Generate a TBFAILED state transition.

* Changes to rc.injail:
     1. Same as rc.bootsetup, but do not send log files; that would pummel
        boss. Leave them on the physical node.

* Change vnodesetup (which calls mkjail) to watch for any error and send a
  TBFAILED state transition. This should catch almost all errors, and
  dramatically reduce waiting when something fails.

* Changes to rc.cdboot are essentially the same as rc.bootsetup, although a
  bootlog is sent all the time (success or failure), and I do not generate
  a boot_errno yet. Also, instead of TBFAILED, generate a PXEFAILED state
  since the CDROM is actually operating within the PXEFBSD opmode. I have
  yet to work this into the rest of the system though; waiting to get a new
  CD built and actually experiment with it.

* Add new menu option and web page to display the node bootlog. We store
  only the lastest bootlog, but maybe someday store more then one. Display
  boot_errno on node page.

Aside: I made a big mistake in the tmcd protocol; I did not envision
passing more then a small amount of data (one fragment) and so I do not
include a record terminator (ie: close of the write side on the client
sends EOF) or a size field at the beginning. No big deal since small
requests are sent in one fragment and the server sees the entire
thing. Well, with a large console log, that will end up as multiple
fragments, and the server will often not get the entire thing on the first
read, and there are no subsequent reads (with no EOF or known size, it
would block forever). Well, fixing this in a backwards compatable manner
(for old images) was way too much pain. Instead, tmcc now closes the write
side, and the server does subsequent reads *only* in the new dobbootlog()
routine. Note that it *is* possible to fix this in a backwards compatable
manner, but I did not want to go down that path just yet.
parent 54eb1dea
......@@ -93,7 +93,7 @@ use vars qw(@ISA @EXPORT);
TBDB_NODESTATE_RELOADDONE TBDB_NODESTATE_UNKNOWN
TBDB_NODESTATE_PXEWAIT TBDB_NODESTATE_PXEWAKEUP
TBDB_NODESTATE_PXEBOOTING TBDB_NODESTATE_ALWAYSUP
TBDB_NODESTATE_MFSSETUP
TBDB_NODESTATE_MFSSETUP TBDB_NODESTATE_TBFAILED
TBDB_NODEOPMODE_NORMAL TBDB_NODEOPMODE_DELAYING
TBDB_NODEOPMODE_UNKNOWNOS TBDB_NODEOPMODE_RELOADING
......@@ -489,6 +489,7 @@ sub TBDB_NODESTATE_BOOTING() { "BOOTING"; }
sub TBDB_NODESTATE_TBSETUP() { "TBSETUP"; }
sub TBDB_NODESTATE_RELOADSETUP(){ "RELOADSETUP"; }
sub TBDB_NODESTATE_MFSSETUP() { "MFSSETUP"; }
sub TBDB_NODESTATE_TBFAILED() { "TBFAILED"; }
sub TBDB_NODESTATE_RELOADING() { "RELOADING"; }
sub TBDB_NODESTATE_RELOADDONE() { "RELOADDONE"; }
sub TBDB_NODESTATE_UNKNOWN() { "UNKNOWN"; };
......@@ -3575,8 +3576,8 @@ sub TBNodeBootReset($)
# Still, it avoids duplication in 4 scripts.
# Also, watch for events not filtering through stated in time.
#
sub TBNodeStateWait ($$$$) {
my ($pc, $waitstate, $waitstart, $maxwait) = @_;
sub TBNodeStateWait ($$$$@) {
my ($pc, $waitstart, $maxwait, $actual, @waitstates) = @_;
#
# Start a counter going, relative to the time we rebooted the first
......@@ -3595,7 +3596,9 @@ sub TBNodeStateWait ($$$$) {
return 1;
}
if ($state eq $waitstate) {
if (grep {$_ eq $state} @waitstates) {
$$actual = $state
if (defined($actual));
return 0;
}
......
......@@ -289,7 +289,7 @@ foreach my $n (@freed_nodes) {
"def_boot_cmd_line='',next_boot_cmd_line='', ".
"temp_boot_osid='',next_boot_osid='', ".
"update_accounts=0,ipport_next=ipport_low, ".
"sfshostid=NULL,allocstate='$allocFreeState' ".
"sfshostid=NULL,allocstate='$allocFreeState',boot_errno=0 ".
"where node_id='$n'") || $error++;
#
......
......@@ -873,6 +873,17 @@ CREATE TABLE node_auxtypes (
PRIMARY KEY (node_id,type)
) TYPE=MyISAM;
--
-- Table structure for table `node_bootlogs`
--
CREATE TABLE node_bootlogs (
node_id varchar(32) NOT NULL default '',
bootlog text,
bootlog_timestamp datetime default NULL,
PRIMARY KEY (node_id)
) TYPE=MyISAM;
--
-- Table structure for table `node_features`
--
......@@ -1078,6 +1089,7 @@ CREATE TABLE nodes (
battery_voltage float default NULL,
battery_percentage float default NULL,
battery_timestamp int(10) unsigned default NULL,
boot_errno int(11) NOT NULL default '0',
PRIMARY KEY (node_id),
KEY phys_nodeid (phys_nodeid),
KEY node_id (node_id,phys_nodeid),
......
......@@ -352,7 +352,9 @@ REPLACE INTO state_transitions VALUES ('PCVM','BOOTING','TBSETUP','BootOK');
REPLACE INTO state_transitions VALUES ('PCVM','ISUP','SHUTDOWN','Reboot');
REPLACE INTO state_transitions VALUES ('PCVM','SHUTDOWN','BOOTING','StartBoot');
REPLACE INTO state_transitions VALUES ('PCVM','TBSETUP','ISUP','BootDone');
REPLACE INTO state_transitions VALUES ('PCVM','TBSETUP','TBFAILED','BootError');
REPLACE INTO state_transitions VALUES ('PCVM','TBSETUP','SHUTDOWN','Error');
REPLACE INTO state_transitions VALUES ('PCVM','TBFAILED','SHUTDOWN','Reboot');
REPLACE INTO state_transitions VALUES ('PCVM','BOOTING','ISUP','BootDone');
REPLACE INTO state_transitions VALUES ('ALWAYSUP','ISUP','ISUP','Retry');
REPLACE INTO state_transitions VALUES ('PCVM','SHUTDOWN','SHUTDOWN','Retry');
......@@ -434,6 +436,8 @@ REPLACE INTO state_transitions VALUES ('NORMALv2','TBSETUP','SHUTDOWN','Error');
REPLACE INTO state_transitions VALUES ('NORMALv2','ISUP','RECONFIG','DoReConfig');
REPLACE INTO state_transitions VALUES ('NORMALv2','RECONFIG','TBSETUP','ReConfig');
REPLACE INTO state_transitions VALUES ('NORMALv2','TBSETUP','ISUP','BootDone');
REPLACE INTO state_transitions VALUES ('NORMALv2','TBSETUP','TBFAILED','BootFail');
REPLACE INTO state_transitions VALUES ('NORMALv2','TBFAILED','SHUTDOWN','RebootAfterFail');
REPLACE INTO state_transitions VALUES ('NORMALv1','TBSETUP','SHUTDOWN','Error');
REPLACE INTO state_transitions VALUES ('NORMAL','SHUTDOWN','SHUTDOWN','Retry');
REPLACE INTO state_transitions VALUES ('NETBOOT','PXEBOOTING','BOOTING','BootInfo');
......
......@@ -2249,3 +2249,15 @@ last_net_act,last_cpu_act,last_ext_act);
alter table nodes add battery_percentage float default NULL;
alter table nodes add battery_timestamp int(10) unsigned default NULL;
1.295: Add stuff for capturing boot errors.
alter table nodes add boot_errno int(11) NOT NULL default '0';
CREATE TABLE node_bootlogs (
node_id varchar(32) NOT NULL default '',
bootlog text,
bootlog_timestamp datetime default NULL,
PRIMARY KEY (node_id)
) TYPE=MyISAM;
\ No newline at end of file
......@@ -426,17 +426,23 @@ sub nodereboot($$)
sleep(2);
foreach my $node (sort(@nodes)) {
my $actual_state;
#
# Skip if something failed earlier.
#
next
if ($result->{node});
if (!TBNodeStateWait($node,
TBDB_NODESTATE_ISUP, $waitstart, (60*6))) {
print STDOUT "reboot ($node): alive and well.\n";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
next;
if (!TBNodeStateWait($node, $waitstart, (60*6), \$actual_state,
(TBDB_NODESTATE_TBFAILED,
TBDB_NODESTATE_ISUP))) {
if ($actual_state eq TBDB_NODESTATE_ISUP) {
print STDOUT "reboot ($node): alive and well.\n";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
next;
}
print STDOUT "*** reboot ($node): reported a TBFAILED event.\n";
}
SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
$result->{$node} = -1;
......
......@@ -648,8 +648,17 @@ TBDebugTimeStamp("Local node waiting started");
while ( @nodelist ) {
my $node = shift(@nodelist);
my $wstart = $waitstart{$node};
my $actual_state;
if (!TBNodeStateWait($node, TBDB_NODESTATE_ISUP, $wstart, (60*7))) {
if (!TBNodeStateWait($node, $wstart, (60*7), \$actual_state,
(TBDB_NODESTATE_TBFAILED, TBDB_NODESTATE_ISUP))) {
print "state is $actual_state\n";
if ($actual_state eq TBDB_NODESTATE_TBFAILED) {
print "*** WARNING: $node reported a TBFAILED event; not retrying\n";
$retries{$node} = 0;
goto tbfailed;
}
print "$node is alive and well\n";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_READY() );
......@@ -682,11 +691,12 @@ while ( @nodelist ) {
# Fall through on failure.
}
SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
print "*** WARNING: $node may be down.\n".
" This has been reported to testbed-ops.\n";
tbfailed:
SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
if ($canfail{$node} && !($canceled || $noretry)) {
push(@informuser, $node);
print "*** Continuing with experiment setup anyway ...\n";
......@@ -848,6 +858,7 @@ elsif (@vnodelist) {
my $wstart = $waitstart{$node};
my $maxwait = 90 + (40 * $pnodevcount{$pnode});
my $curallocstate;
my $actual_state;
TBGetNodeAllocState($node, \$curallocstate);
......@@ -857,8 +868,14 @@ elsif (@vnodelist) {
if ($curallocstate ne TBDB_ALLOCSTATE_DOWN() &&
$curallocstate ne TBDB_ALLOCSTATE_DEAD()) {
if (!TBNodeStateWait($node, TBDB_NODESTATE_ISUP,
$wstart, $maxwait)) {
if (!TBNodeStateWait($node, $wstart, $maxwait, \$actual_state,
(TBDB_NODESTATE_TBFAILED,
TBDB_NODESTATE_ISUP))) {
if ($actual_state eq TBDB_NODESTATE_TBFAILED) {
print "*** WARNING: $node reported a TBFAILED event.\n";
goto vtbfailed;
}
print "$node is alive and well\n";
TBDebugTimeStamp("Virtual node $node setup ISUP");
......@@ -868,6 +885,7 @@ elsif (@vnodelist) {
next;
}
vtbfailed:
TBDebugTimeStamp("Virtual node $node setup FAILED");
SetNodeBootStatus($node, NODEBOOTSTATUS_FAILED);
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_DOWN());
......
......@@ -2,7 +2,7 @@
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
# All rights reserved.
#
......@@ -149,7 +149,8 @@ foreach my $node ( @nodes ) {
print STDOUT "Waiting for nodes to come up ...\n";
foreach my $node ( sort(@nodes) ) {
if (! TBNodeStateWait($node, TBDB_NODESTATE_ISUP, $waitstart, (60*6))) {
if (! TBNodeStateWait($node, $waitstart, (60*6), undef,
(TBDB_NODESTATE_ISUP))) {
print STDOUT "$node is alive and well\n";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
next;
......
......@@ -45,7 +45,7 @@ use libtmcc;
#
# BE SURE TO BUMP THIS AS INCOMPATIBILE CHANGES TO TMCD ARE MADE!
#
sub TMCD_VERSION() { 21; };
sub TMCD_VERSION() { 22; };
libtmcc::configtmcc("version", TMCD_VERSION());
# Control tmcc timeout.
......
......@@ -29,6 +29,7 @@ use Exporter;
TMCCCMD_ROLE TMCCCMD_RUSAGE TMCCCMD_WATCHDOGINFO TMCCCMD_HOSTKEYS
TMCCCMD_FIREWALLINFO TMCCCMD_EMULABCONFIG
TMCCCMD_CREATOR TMCCCMD_HOSTINFO TMCCCMD_LOCALIZATION
TMCCCMD_BOOTERRNO TMCCCMD_BOOTLOG
);
# Must come after package declaration!
......@@ -71,6 +72,7 @@ my $beproxy = 0;
"version" => undef,
"subnode" => undef,
"keyfile" => undef,
"datafile" => undef,
"timeout" => undef,
"logfile" => undef,
"nocache" => 0,
......@@ -160,6 +162,8 @@ my %commandset =
"creator" => {TAG => "creator"},
"hostinfo" => {TAG => "hostinfo"},
"localization" => {TAG => "localization"},
"booterrno" => {TAG => "booterrno"},
"bootlog" => {TAG => "bootlog"},
);
#
......@@ -208,6 +212,8 @@ sub TMCCCMD_EMULABCONFIG(){ $commandset{"emulabconfig"}->{TAG}; }
sub TMCCCMD_CREATOR (){ $commandset{"creator"}->{TAG}; }
sub TMCCCMD_HOSTINFO (){ $commandset{"hostinfo"}->{TAG}; }
sub TMCCCMD_LOCALIZATION(){ $commandset{"localization"}->{TAG}; }
sub TMCCCMD_BOOTERRNO (){ $commandset{"booterrno"}->{TAG}; }
sub TMCCCMD_BOOTLOG (){ $commandset{"bootlog"}->{TAG}; }
#
# Caller uses this routine to set configuration of this library
......@@ -266,6 +272,9 @@ sub optionstring($%)
if (defined($opthash{"keyfile"})) {
$options .= " -k " . $opthash{"keyfile"};
}
if (defined($opthash{"datafile"})) {
$options .= " -f " . $opthash{"datafile"};
}
if (defined($opthash{"timeout"})) {
$options .= " -t " . $opthash{"timeout"};
}
......
......@@ -59,6 +59,7 @@ sub doboot();
sub doshutdown();
sub doreconfig();
sub docleanup();
sub BootFatal($$);
# Parse command line.
if (! getopts($optlist, \%options)) {
......@@ -72,10 +73,30 @@ if (@ARGV) {
$action = $ARGV[0];
}
# Execute in background and return immediately.
if ($background && TBBackGround($LOGFILE)) {
print "Node reconfiguration started ...\n";
exit(0);
if ($action eq "boot" || $action eq "reconfig") {
#
# We want to save all of the output off, but also dup it to the console.
#
open(LOG, "> $LOGFILE") or
BootFatal(-1, "Could not open $LOGFILE!");
#
# This open implicitly forks a child, which goes on to execute the rest
# of the script. The parent is going to sit in this loop and capture the
# output of the child, writing it to the logfile and to the console.
#
if (open(FOO, "-|")) {
while (<FOO>) {
print LOG $_;
print "$_";
}
close(FOO);
close(LOG);
if ($?) {
BootFatal($? >> 8, "Boot Failure!");
}
exit(0);
}
}
# Execute the action.
......@@ -100,6 +121,32 @@ SWITCH: for ($action) {
}
exit(0);
#
# This version of fatal sends the console log to tmcd, and then generates
# a TBFAILED state transition.
#
sub BootFatal($$)
{
my ($code, $msg) = @_;
#
# Send the console log to the server.
#
if (-e $LOGFILE && -s $LOGFILE &&
tmcc(TMCCCMD_BOOTLOG, "", undef, ("datafile" => $LOGFILE)) < 0) {
print "Error sending TBFAILED to Emulab Control!\n";
}
if (tmcc(TMCCCMD_BOOTERRNO, $code) < 0) {
print "Error sending boot errno to Emulab Control!\n";
}
if (tmcc(TMCCCMD_STATE, "TBFAILED") < 0) {
print "Error sending TBFAILED to Emulab Control!\n";
}
exit($code);
}
#
# Boot Action.
#
......
......@@ -21,6 +21,7 @@ sub usage()
print STDERR " -v versnum Specify a version number for tmcd\n";
print STDERR " -n subnode Specify the subnode id\n";
print STDERR " -k keyfile Specify the private keyfile\n";
print STDERR " -f datafile Extra stuff to send to tmcd (tcp mode only)\n";
print STDERR " -u Use UDP instead of TCP\n";
print STDERR " -l path Use named unix domain socket instead of TCP\n";
print STDERR " -t timeout Timeout waiting for the controller.\n";
......@@ -31,7 +32,7 @@ sub usage()
print STDERR " -D Force command to use a direct, UDP request\n";
exit(1);
}
my $optlist = "ds:p:v:n:k:ul:t:x:o:bcDi:";
my $optlist = "ds:p:v:n:k:ul:t:x:o:bcDi:f:";
my $debug = 0;
my $CMD;
my $ARGS;
......@@ -115,6 +116,9 @@ sub ParseOptions()
if (defined($options{"k"})) {
libtmcc::configtmcc("keyfile", $options{"k"});
}
if (defined($options{"f"})) {
libtmcc::configtmcc("datafile", $options{"f"});
}
if (defined($options{"u"})) {
libtmcc::configtmcc("useudp", $options{"u"});
}
......
......@@ -50,6 +50,7 @@ my $rebooting = 0;
my $leavejail = 0;
my $timestamps = 0;
my $jailpid;
my $cleanupstate = "SHUTDOWN";
#
# Turn off line buffering on output
......@@ -375,6 +376,7 @@ if ($dojail) {
cleanup();
exit(0);
}
$cleanupstate = "TBFAILED";
fatal("Jail exited unexpectedly!");
}
else {
......@@ -517,7 +519,7 @@ sub cleanup()
$cleaning = 1;
# Inform testbed that vnode going down.
tmcc(TMCCCMD_STATE, "SHUTDOWN", undef, ("timeout" => 2));
tmcc(TMCCCMD_STATE, $cleanupstate, undef, ("timeout" => 2));
#
# First force the jail to exit.
......
/*
* EMULAB-COPYRIGHT
* Copyright (c) 2000-2005 University of Utah and the Flux Group.
* Copyright (c) 2000-2005, 2004 University of Utah and the Flux Group.
* All rights reserved.
*/
......@@ -8,6 +8,7 @@
#define TBSERVER_PORT2 14447
#define MYBUFSIZE 2048
#define BOSSNODE_FILENAME "bossnode"
#define MAXTMCDPACKET 0x4000 /* Allow for console logs */
/*
* As the tmcd changes, incompatable changes with older version of
......
......@@ -39,6 +39,7 @@ if ($EUID != 0) {
# Script specific goo.
my $RCDIR = "$BINDIR/rc";
my $LOGFILE = "$LOGDIR/cdbootsetup.debug";
#
# Disk related parameters
......@@ -74,12 +75,38 @@ sub doboot();
sub doshutdown();
sub doreconfig();
sub docleanup();
sub BootNotify($$);
# Allow default above.
if (@ARGV) {
$action = $ARGV[0];
}
if ($action eq "boot" || $action eq "reconfig") {
#
# We want to save all of the output off, but also dup it to the console.
#
open(LOG, "> $LOGFILE") or
BootFatal(-1, "Could not open $LOGFILE!");
#
# This open implicitly forks a child, which goes on to execute the rest
# of the script. The parent is going to sit in this loop and capture the
# output of the child, writing it to the logfile and to the console.
#
if (open(FOO, "-|")) {
while (<FOO>) {
print LOG $_;
print "$_";
}
close(FOO);
close(LOG);
# The CDROM *always* reports back what it can.
BootNotify($? >> 8, "CD Boot");
exit(0);
}
}
# Execute the action.
SWITCH: for ($action) {
/^boot$/i && do {
......@@ -107,6 +134,35 @@ SWITCH: for ($action) {
}
exit(0);
#
# Notify boss of what happened. If the code is non-zero, then also
# generate a booterrno and a TBFAILED state transition.
#
sub BootNotify($$)
{
my ($code, $msg) = @_;
print "$msg\n";
system("sync");
#
# Send the console log to the server.
#
if (-e $LOGFILE && -s $LOGFILE &&
tmcc(TMCCCMD_BOOTLOG, "", undef,
("datafile" => $LOGFILE, "timeout" => 10)) < 0) {
print "Error sending TBFAILED to Emulab Control!\n";
}
exit(0)
if (!$code);
if (tmcc(TMCCCMD_STATE, "PXEFAILED") < 0) {
print "Error sending PXEFAILED to Emulab Control!\n";
}
exit($code);
}
#
# Boot Action.
#
......@@ -184,6 +240,7 @@ sub doboot()
if ($bootwhat eq "reboot") {
print("Bootinfo says to reboot ... so thats what we gonna do!\n");
system("sync");
BootNotify(0, "Bootinfo said to reboot");
system("reboot");
sleep(10000);
}
......@@ -227,6 +284,7 @@ sub doboot()
}
}
system("sync");
BootNotify(0, "Bootinfo said to boot partition $bpart");
system("reboot");
sleep(10000);
}
......
......@@ -8,8 +8,8 @@ use English;
use Getopt::Std;
#
# This script is the plab vserver equiv of ../common/rc.bootsetup. It runs
# inside the vserver and does a limited set of bootstrapping tasks.
# This script is the jail equiv of ../common/rc.bootsetup. It runs
# inside the jail and does a limited set of bootstrapping tasks.
#
sub usage()
{
......@@ -45,6 +45,7 @@ sub doshutdown();
sub doreconfig();
sub docleanup();
sub doplabconfig();
sub BootFatal($);
# Parse command line.
#if (! getopts($optlist, \%options)) {
......@@ -73,10 +74,28 @@ SWITCH: for ($action) {
docleanup();
last SWITCH;
};
fatal("Invalid action: $action\n");
BootFatal("Invalid action: $action\n");
}
exit(0);
#
# This version of fatal sends boot status to tmcd, and then generates
# a TBFAILED state transition.
#
sub BootFatal($)
{
my ($msg) = @_;
if (tmcc(TMCCCMD_BOOTERRNO, "-1") < 0) {
print "Error sending boot errno to Emulab Control!\n";
}
if (tmcc(TMCCCMD_STATE, "TBFAILED") < 0) {
print "Error sending TBFAILED to Emulab Control!\n";
}
exit(-1);
}
#
# Boot Action.
#
......@@ -106,7 +125,7 @@ sub doboot()
print("Starting testbed watchdog daemon\n");
system("$BINDIR/watchdog start");
if ($?) {
fatal("Error running $BINDIR/watchdog");
BootFatal("Error running $BINDIR/watchdog");
}
}
......@@ -118,7 +137,7 @@ sub doboot()
print("Running config scripts\n");
system("$RCDIR/rc.config boot");
if ($?) {
fatal("Error running $RCDIR/rc.config");
BootFatal("Error running $RCDIR/rc.config");
}
TBDebugTimeStamp("rc.injail done running config scripts");
......@@ -127,13 +146,13 @@ sub doboot()
print("Starting linktest daemon\n");
system("$RCDIR/rc.linktest start");
if ($?) {
fatal("Error running $RCDIR/rc.linktest");
BootFatal("Error running $RCDIR/rc.linktest");
}
}
print("Informing the testbed that we are up and running\n");
if (tmcc(TMCCCMD_STATE, "ISUP") < 0) {
fatal("Error sneding ISUP to Emulab Control!");
BootFatal("Error sending ISUP to Emulab Control!");
}
}
......
......@@ -76,7 +76,7 @@ static char *clientcertdirs[] = {
static SSL *ssl;
static SSL_CTX *ctx;
static int client = 0;
static char nosslbuf[BUFSIZ];
static char nosslbuf[MAXTMCDPACKET];
static int nosslbuflen, nosslbufidx;
static void tmcd_sslerror();
static void tmcd_sslprint(const char *fmt, ...);
......
......@@ -104,6 +104,7 @@ char *usagestr =
" -t timeout Timeout waiting for the controller.\n"
" -x path Be a tmcc proxy, using the named unix domain socket\n"
" -o logfile Specify log file name for -x option\n"
" -f datafile Extra stuff to send to tmcd (tcp mode only)\n"
" -i Do not use SSL protocol\n"
"\n";
......@@ -145,7 +146,7 @@ main(int argc, char **argv)
int n, ch;
struct hostent *he;
struct in_addr serverip;
char buf[MYBUFSIZE], *bp;
char buf[MAXTMCDPACKET], *bp;
FILE *fp;
volatile int useudp = 0;
char * volatile unixpath = NULL;
......@@ -155,11 +156,12 @@ main(int argc, char **argv)
char *keyfile = NULL;
char *privkey = NULL;
char *proxypath= NULL;
char *datafile = NULL;
#ifdef _WIN32
WSADATA wsaData;
#endif
while ((ch = getopt(argc, argv, "v:s:p:un:t:k:x:l:do:i")) != -1)
while ((ch = getopt(argc, argv, "v:s:p:un:t:k:x:l:do:if:")) != -1)
switch(ch) {
case 'd':
debug++;
......@@ -195,6 +197,9 @@ main(int argc, char **argv)
case 'o':
logfile = optarg;
break;
case 'f':
datafile = optarg;
break;
case 'i':
#ifdef WITHSSL
nousessl = 1;
......@@ -212,6 +217,9 @@ main(int argc, char **argv)
if (unixpath && proxypath)
usage();
if (useudp && datafile)
usage();
if (unixpath && (keyfile || bossnode)) {
fprintf(stderr,
"You may not use the -k or -s with the -l option\n");
......@@ -331,6 +339,21 @@ main(int argc, char **argv)
fprintf(stderr, "Command too large!\n");
exit(-1);
}
if (!useudp && datafile) {
int len;
if ((fp = fopen(datafile, "r")) == NULL) {
perror("accessing datafile");
exit(-1);
}
len = fread(&buf[n], sizeof(char), sizeof(buf) - (n + 1), fp);
if (!len) {
perror("reading datafile");
exit(-1);
}
n += len;
fclose(fp);
}
buf[n] = '\0';
/*
......@@ -510,6 +533,15 @@ dotcp(char *data, int outfd, struct in_addr serverip)
bp += cc;
n -= cc;
}
if (! isssl) {
/*
* Send EOF to server so it knows it has all the data.
* SSL mode takes care of this for us by making sure the
* the server gets all the data when it reads. Still, its
* a terrible way to do this.
*/
shutdown(sock, SHUT_WR);
}
while (1) {
if ((cc = READ(sock, buf, sizeof(buf) - 1)) <= 0) {
......@@ -687,7 +719,7 @@ beproxy(char *localpath, struct in_addr serverip, char *partial)
#else
int sock, newsock, cc, length;
struct sockaddr_un sunaddr, client;
char command[MYBUFSIZE], buf[MYBUFSIZE];
char command[MAXTMCDPACKET], buf[MAXTMCDPACKET];
char *bp, *cp;
/* don't let a client kill us */
......
......@@ -231,6 +231,8 @@ COMMAND_PROTOTYPE(dofwinfo);
COMMAND_PROTOTYPE(dohostinfo);
COMMAND_PROTOTYPE(doemulabconfig);
COMMAND_PROTOTYPE(dolocalize);
COMMAND_PROTOTYPE(dobooterrno);
COMMAND_PROTOTYPE(dobootlog);
/*
* The fullconfig slot determines what routines get called when pushing
......@@ -308,6 +310,8 @@ struct command {
{ "hostinfo", FULLCONFIG_NONE, 0, dohostinfo},
{ "emulabconfig", FULLCONFIG_NONE, F_ALLOCATED, doemulabconfig},
{ "localization", FULLCONFIG_PHYS, 0, dolocalize},
{ "booterrno", FULLCONFIG_NONE, 0, dobooterrno},
{ "bootlog", FULLCONFIG_NONE, 0, dobootlog},
};
static int numcommands = sizeof(command_array)/sizeof(struct command);
......@@ -765,7 +769,7 @@ udpserver(int sock, int portnum)
static void
tcpserver(int sock, int portnum)
{
char buf[MYBUFSIZE];
char buf[MAXTMCDPACKET];
struct sockaddr_in client;
int length, cc, newsock;
unsigned int nreq = 0;
......@@ -5563,3 +5567,99 @@ COMMAND_PROTOTYPE(dolocalize)
return 0;
}
/*
* Upload boot log to DB for the node.
*/
COMMAND_PROTOTYPE(dobootlog)
{
char *cp = (char *) NULL;
int len;
/*
* Dig out the log message text.
*/
while (*rdata && isspace(*rdata))
rdata++;
/*