Commit cdcbedc7 authored by Leigh Stoller's avatar Leigh Stoller

Various fixes for ualloc switches:

* Stop using the ALWAYSUP state machine for switches, this causes ISUP
  to always get sent, which in certain cases, results in stated
  rebooting the switch!

  Added new ONIE state machine, which handles the way switches actually
  boot into ONIE first and then does the bootinfo/grub dance, or does a
  reload or does admin mode.

* Do not send PXEBOOTING from ONIE; this was a mistake, it throws us
  into the PXEKERNEL state machine, which sometimes results is stated
  rebooting the switch!

  We still use PXEWAIT (it is sent by bootinfod), since that is the
  "waiting" state that is wired into a lot of Emulab, it just happens to
  now be a state in the ONIE state machine, so its legal.

* Fix a bug in libossetup, that was fooling libossetup_switch into
  thinking the wrong thing.

* Add some timeouts to the libosload_mlnx code, sshd sometime refuses to
  answer after a failed login. Strange.

* Fix a fork() problem in the switch reload code; gotta call exit, not
  return! This was wreaking subtle (okay not so subtle) havoc in
  libossetup.
parent 18a98f7e
......@@ -88,10 +88,6 @@ waitmode()
{
echo "Waiting for something to do"
# This tells stated we are in the PXEKERNEL state machine.
$TMCC state PXEBOOTING
sleep 1
#
# We use bootinfoclient. If we get a reboot command do that,
# otherwise we just return to query bootwhat again.
......
......@@ -359,6 +359,9 @@ REPLACE INTO mode_transitions VALUES ('RELOAD-UE','SHUTDOWN','ALWAYSUP','ISUP','
REPLACE INTO mode_transitions VALUES ('ALWAYSUP','SHUTDOWN','RELOAD','SHUTDOWN','');
REPLACE INTO mode_transitions VALUES ('RELOAD','SHUTDOWN','ALWAYSUP','SHUTDOWN','');
REPLACE INTO mode_transitions VALUES ('RELOAD','RELOADDONE','ALWAYSUP','SHUTDOWN','');
REPLACE INTO mode_transitions VALUES ('ONIE','SHUTDOWN','RELOAD','SHUTDOWN','');
REPLACE INTO mode_transitions VALUES ('RELOAD','SHUTDOWN','ONIE','SHUTDOWN','');
REPLACE INTO mode_transitions VALUES ('RELOAD','RELOADDONE','ONIE','SHUTDOWN','');
--
-- Dumping data for table `priorities`
......@@ -448,7 +451,15 @@ REPLACE INTO state_timeouts VALUES ('PXEKERNEL','PXEBOOTING',240,'REBOOT');
-- Dumping data for table `state_transitions`
--
REPLACE INTO state_transitions VALUES ('ONIE','ISUP','SHUTDOWN','');
REPLACE INTO state_transitions VALUES ('ONIE','SHUTDOWN','BOOTING','');
REPLACE INTO state_transitions VALUES ('ONIE','SHUTDOWN','PXEWAIT','');
REPLACE INTO state_transitions VALUES ('ONIE','BOOTING','ISUP','');
REPLACE INTO state_transitions VALUES ('ONIE','BOOTING','BOOTING','');
REPLACE INTO state_transitions VALUES ('ONIE','PXEWAIT','PXEWAIT','bootinfoclient');
REPLACE INTO state_transitions VALUES ('ONIE','PXEWAIT','PXEWAKEUP','');
REPLACE INTO state_transitions VALUES ('ONIE','PXEWAKEUP','BOOTING','');
REPLACE INTO state_transitions VALUES ('ONIE','ISUP','ISUP','');
REPLACE INTO state_transitions VALUES ('ALWAYSUP','ISUP','SHUTDOWN','Reboot');
REPLACE INTO state_transitions VALUES ('ALWAYSUP','SHUTDOWN','ISUP','BootDone');
REPLACE INTO state_transitions VALUES ('PCVM','ISUP','BOOTING','Crash');
......
use strict;
use libdb;
sub DoUpdate($$$)
{
my ($dbhandle, $dbname, $version) = @_;
DBQueryFatal("REPLACE INTO state_transitions ".
" VALUES ('ONIE','ISUP','SHUTDOWN','')");
DBQueryFatal("REPLACE INTO state_transitions ".
" VALUES ('ONIE','SHUTDOWN','BOOTING','')");
DBQueryFatal("REPLACE INTO state_transitions ".
" VALUES ('ONIE','SHUTDOWN','PXEWAIT','')");
DBQueryFatal("REPLACE INTO state_transitions ".
" VALUES ('ONIE','BOOTING','ISUP','')");
DBQueryFatal("REPLACE INTO state_transitions ".
" VALUES ('ONIE','BOOTING','BOOTING','')");
DBQueryFatal("REPLACE INTO state_transitions ".
" VALUES ('ONIE','PXEWAIT','PXEWAIT','bootinfoclient')");
DBQueryFatal("REPLACE INTO state_transitions ".
" VALUES ('ONIE','PXEWAIT','PXEWAKEUP','')");
DBQueryFatal("REPLACE INTO state_transitions ".
" VALUES ('ONIE','PXEWAKEUP','BOOTING','')");
DBQueryFatal("REPLACE INTO state_transitions ".
" VALUES ('ONIE','ISUP','ISUP','')");
DBQueryFatal("REPLACE INTO mode_transitions ".
" VALUES ('ONIE','SHUTDOWN','RELOAD','SHUTDOWN','')");
DBQueryFatal("REPLACE INTO mode_transitions ".
" VALUES ('RELOAD','SHUTDOWN','ONIE','SHUTDOWN','')");
DBQueryFatal("REPLACE INTO mode_transitions ".
" VALUES ('RELOAD','RELOADDONE','ONIE','SHUTDOWN','')");
return 0;
}
1;
# Local Variables:
# mode:perl
# End:
......@@ -75,7 +75,10 @@ sub createExpectObject($$)
# default password.
return 0
if ($self->createExpectObjectAux($nodeobject, 0) == 0);
sleep(5);
# Sleep for a while, sometimes sshd locks out the next login,
# not really sure what is going on yet.
$self->dprint(0,"$self createExpectObject($node_id): sleeping for a bit\n");
sleep(20);
return -1
if ($self->createExpectObjectAux($nodeobject, 1));
......@@ -376,6 +379,10 @@ sub DeviceReconfigure($$)
tberror "$self: Reconfigure($node_id): could not generate config!\n";
return -1;
}
# Sleep for a few seconds to let switch settle down after booting.
sleep(10);
if ($self->createExpectObject($nodeobject)) {
return -1;
}
......
......@@ -221,7 +221,6 @@ sub Reload($$)
$self->nodeinfo($nodeobject,'reloadchildpid',$childpid);
return 0;
}
# child continues:
#
# So we are setup to boot the diskload MFS (onie).
......@@ -395,6 +394,7 @@ sub Reconfigure($$$)
my $node_id = $nodeobject->node_id();
my $reconfig_only = 0;
my $running_onie = 0;
my $forked = 0;
my $retval;
$self->dprint(0, "$self: Reconfigure($node_id): starting, dowait:$dowait");
......@@ -425,9 +425,14 @@ sub Reconfigure($$$)
$self->dprint(2,"$self: Reconfigure($node_id): ".
"setting state to SHUTDOWN");
#
# Need to do this before we return to ossetup, since it is looking
# for a transition to ISUP to know when the Reconfigure is done.
TBSetNodeEventState($node_id, TBDB_NODESTATE_SHUTDOWN);
#
$nodeobject->Refresh();
if ($nodeobject->eventstate() eq TBDB_NODESTATE_ISUP()) {
TBSetNodeEventState($node_id, TBDB_NODESTATE_SHUTDOWN);
}
#
# Reload can't block, so fork and make a note of ourself!
......@@ -438,6 +443,8 @@ sub Reconfigure($$$)
$self->nodeinfo($nodeobject, 'reloadchildpid', $childpid);
return 0;
}
# child continues; note that we have exit instead of return.
$forked = 1;
}
#
# Again, $dowait is our indicator we are called from os_setup.
......@@ -518,10 +525,16 @@ sub Reconfigure($$$)
if (!$dowait) {
TBSetNodeEventState($node_id, TBDB_NODESTATE_ISUP());
}
if ($forked) {
exit(0);
}
return 0;
failed:
TBSetNodeEventState($node_id, TBDB_NODESTATE_TBFAILED());
if ($forked) {
exit(-1);
}
return -1;
}
......
......@@ -924,7 +924,12 @@ sub WaitForNodes($@)
}
if (grep {$_ eq $state} @waitstates) {
print "$node_id has reported state $state\n";
$node->_setupstatus($SETUP_OKAY);
if ($state eq TBDB_NODESTATE_ISUP()) {
$node->_setupstatus($SETUP_OKAY);
}
else {
$node->_setupstatus($SETUP_FAILED);
}
delete($nodes{$node_id});
$typehandler->WaitDone($node);
next;
......@@ -943,7 +948,7 @@ sub WaitForNodes($@)
}
else {
if ($typehandler->Retry($node) != 0) {
$node->_setupstatus($SETUP_OKAY);
$node->_setupstatus($SETUP_FAILED);
delete($nodes{$node_id});
$typehandler->WaitDone($node);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment