Commit 7f90bc5c authored by Leigh Stoller's avatar Leigh Stoller

Stated headaches.

parent 8c991ba5
......@@ -74,6 +74,10 @@ waitmode()
{
echo "Waiting for something to do"
# This tells stated we are in the PXEKERNEL state machine.
$TMCC state PXEBOOTING
sleep 1
#
# We use bootinfoclient. If we get a reboot command do that,
# otherwise we just return to query bootwhat again.
......@@ -104,18 +108,16 @@ reload_nos()
{
echo "Setting up to reload the NOS"
# Tell boss we are booting.
# Tell boss we are booting into reload MFS.
$TMCC state BOOTING
$BINDIR/rc.reload
rc=$?
if [ $rc -ne 0 ]; then
echo "Dropping into admin shell"
return 1
echo "Dropping into the shell after failed reload"
exit 1
fi
boot_nos
# Should not return
return 1;
return 0
}
#
......@@ -139,6 +141,10 @@ boot_nos()
echo "Failed to update grub env with bootnos=yes"
return 1
fi
# Tell boss we are booting into reload MFS.
$TMCC state BOOTING
sleep 5
echo "Rebooting into the NOS"
/sbin/reboot
exit 0;
......@@ -176,8 +182,8 @@ while : ; do
;;
*onie_reload)
reload_nos
# Does not return
exit 1
# We loop again to see what we do next. If we got here by the
# reload daemon, we are probably going into waitmode.
;;
*onie_admin)
echo "Dropping into admin mode"
......
......@@ -394,6 +394,7 @@ sub Reboot($$)
$self->dprint(0, "$self RebootNOS($node_id): error: '$error'\n");
goto reboot;
}
TBSetNodeEventState($node_id, TBDB_NODESTATE_SHUTDOWN);
return 0;
reboot:
......
......@@ -602,6 +602,13 @@ sub Reconfigure($$;$)
return -1;
}
$self->dprint(2,"$self: Reconfigure($node_id): ".
"setting state to SHUTDOWN");
# Need to do this before we return to ossetup, since it is looking
# for a transition to ISUP to know when the Reconfigure is done.
TBSetNodeEventState($node_id, TBDB_NODESTATE_SHUTDOWN);
#
# Reload can't block, so fork and make a note of ourself!
#
......
......@@ -2013,19 +2013,11 @@ sub SetBootOS($$)
# last image
#
my $image = $images[-1];
my $imageid = $image->imageid();
my $defosid = $image->default_osid();
my $osimage = OSImage->Lookup($defosid);
if (!defined($osimage)) {
tberror("$self SetBootOS($node_id): could not map OSID $defosid to its object!");
return -1;
}
print "$self SetBootOS($node_id): changing default OS to $osimage\n";
print "$self SetBootOS($node_id): changing default OS to $image\n";
if (!$TESTMODE) {
if ($nodeobject->OSSelect($image,"def_boot_osid",$self->debug())) {
tberror "$self SetBootOS($node_id): os_select $defosid failed!";
tberror "$self SetBootOS($node_id): os_select $image failed!";
return -1;
}
}
......@@ -2039,7 +2031,7 @@ sub SetBootOS($$)
$ocmdline = $nodeobject->def_boot_cmd_line();
$ocmdline = ""
if (!defined($ocmdline));
$osimage->OSBootCmd("delay", \$ncmdline);
$image->OSBootCmd("delay", \$ncmdline);
$ncmdline = ""
if (!defined($ncmdline));
if ($ocmdline ne $ncmdline) {
......
......@@ -36,6 +36,7 @@ my $TBOPS = "@TBOPSEMAIL@";
my $OURDOMAIN = "@OURDOMAIN@";
my $PING = "/sbin/ping";
my $NETCAT = "/usr/local/bin/netcat";
my $BISEND = "$TB/sbin/bootinfosend";
my $EXPECT_CONN_TIMEOUT = 10;
......@@ -375,10 +376,25 @@ sub Reconfigure($$$)
{
my ($self, $nodeobject, $dowait) = @_;
my $node_id = $nodeobject->node_id();
my $reconfig_only = 0;
my $running_onie = 0;
my $retval;
$self->dprint(0, "$self: Reconfigure($node_id): starting, dowait:$dowait");
# os_setup is doing only a reconfig.
if (!$dowait && !$self->nodeflag($nodeobject, 'reconfig_will_follow')) {
$reconfig_only = 1;
#
# See if the switch is in PXEWAIT, which says we can reboot it
# with with node_reboot directly which will tell ONIE to query.
#
$nodeobject->Refresh();
if ($nodeobject->eventstate() eq TBDB_NODESTATE_PXEWAIT()) {
$running_onie = 1;
}
}
#
# For now, we allow Reconfigure to block -- it will be called from Reload
# or from ossetup, ossetup says do not wait.
......@@ -406,6 +422,73 @@ sub Reconfigure($$$)
return 0;
}
}
#
# Again, $dowait is our indicator we are called from os_setup.
# There is a bit of mismatch between the what libossetup_switch
# does and what we expect; the switch should be in the ONIE
# MFS at this point, so we need to reboot it and wait for it
# to come back online before we can actually reconfig.
#
# If this is a reconfig after reload from os_setup, then the
# switch is already in the NOS and ready for us.
#
# Need to clean this up, but going to wait until MLNX support.
#
if ($reconfig_only) {
if ($running_onie) {
system("$BISEND -q $node_id");
if ($?) {
goto failed;
}
}
elsif ($self->Reboot($nodeobject)) {
goto failed;
}
#
# Now we are waiting for reboot to complete and the switch to come back
# online. Should be very quick, although there will some delay before
# DHCP finishes and we can ssh over.
#
my $seconds = 180;
$self->dprint(0,"Reconfigure($node_id): ".
"waiting $seconds seconds for ping");
# Need time for node to actually reboot;
sleep(30);
$seconds -= 30;
while ($seconds >= 0) {
sleep(15);
$seconds -= 15;
last
if ($self->Pingable($nodeobject));
}
if ($seconds < 0) {
tbwarn "$self Reconfigure($node_id): timed out waiting for ping\n";
goto failed;
}
#
# Now we wait for sshd to come online.
#
$seconds = 180;
$self->dprint(0,"Reconfigure($node_id): ".
"waiting $seconds seconds for sshd");
while ($seconds >= 0) {
sleep(15);
$seconds -= 15;
system("$NETCAT -z -w 3 $node_id 22 > /dev/null ");
if ($? == 0) {
last
}
}
if ($seconds < 0) {
tbwarn "$self Reconfigure($node_id): timed out waiting for sshd\n";
goto failed;
}
}
#
# Hand this off to the device dependent library. Which is actually
......@@ -415,7 +498,7 @@ sub Reconfigure($$$)
goto failed;
}
# Signal ossetup that we are done with reconfig.
if ($nodeobject->eventstate() eq TBDB_NODESTATE_SHUTDOWN()) {
if (!$dowait) {
TBSetNodeEventState($node_id, TBDB_NODESTATE_ISUP());
}
return 0;
......
......@@ -104,13 +104,6 @@ sub AddNode($$)
|| $node->allocstate() eq TBDB_ALLOCSTATE_RES_INIT_DIRTY()) {
$self->{OPLIST}->{$node_id} = [ $libossetup::RECONFIG ];
$node->_setupoperation($libossetup::RECONFIG);
#
# XXX hack -- our node is already ISUP, and we need to force it
# out of that state so taht WaitForNodes doesn't beat our type
# handler object to forcing it to SHUTDOWN
#
print STDERR "$self AddNode($node_id): forcing to SHUTDOWN before RECONFIG\n";
TBSetNodeEventState($node_id,TBDB_NODESTATE_SHUTDOWN);
}
elsif ($node->allocstate() ne TBDB_ALLOCSTATE_RES_READY()) {
# only reboot node if assign_wrapper just pulled it into expt.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment