Commit fbc26201 authored by Leigh B Stoller's avatar Leigh B Stoller

Bug Fix: my previous attempt to catch failed XEN guests via a state machine

change had some problems.
parent 088ae82c
...@@ -2298,16 +2298,6 @@ sub vnodeBoot($$$$) ...@@ -2298,16 +2298,6 @@ sub vnodeBoot($$$$)
# stated to do its thing, this state name is treated specially. # stated to do its thing, this state name is treated specially.
libutil::setState("BOOTING"); libutil::setState("BOOTING");
#
# But, we find ourselves stuck in BOOTING quite often if the VM
# fails to boot far enough to to send in a state transition. We want
# to catch this specific hangup, so we will send an intermediate
# state that the server side can notice, and watch for how long it
# stays in the state.
#
sleep(1);
libutil::setState("VNODEBOOTSTART");
# #
# We are going to watch for a busted control network interface, which # We are going to watch for a busted control network interface, which
# happens a lot. There is a problem with the control vif not working, # happens a lot. There is a problem with the control vif not working,
...@@ -2350,6 +2340,15 @@ sub vnodeBoot($$$$) ...@@ -2350,6 +2340,15 @@ sub vnodeBoot($$$$)
# Ping returns zero if any packets received. # Ping returns zero if any packets received.
if (! $?) { if (! $?) {
TBDebugTimeStamp("Created virtual machine $vnode_id"); TBDebugTimeStamp("Created virtual machine $vnode_id");
#
# But, we still find ourselves stuck in BOOTING quite
# often if the VM fails to boot far enough to to send
# in a state transition. We want to catch this
# specific hangup, so we will send an intermediate
# state that the server side can notice, and watch for
# how long it stays in the state.
#
libutil::setState("VNODEBOOTSTART");
return 0; return 0;
} }
$countdown--; $countdown--;
......
...@@ -141,7 +141,7 @@ use vars qw(@ISA @EXPORT); ...@@ -141,7 +141,7 @@ use vars qw(@ISA @EXPORT);
TBDB_NODEOPMODE_DELAY TBDB_NODEOPMODE_DELAY
TBDB_NODEOPMODE_BOOTWHAT TBDB_NODEOPMODE_BOOTWHAT
TBDB_NODEOPMODE_ANY TBDB_NODEOPMODE_ANY
TBDB_NODEOPMODE_UNKNOWN TBDB_NODEOPMODE_UNKNOWN TBDB_NODEOPMODE_NORMALv2
TBDB_COMMAND_REBOOT TBDB_COMMAND_REBOOT
TBDB_COMMAND_POWEROFF TBDB_COMMAND_POWERON TBDB_COMMAND_POWERCYCLE TBDB_COMMAND_POWEROFF TBDB_COMMAND_POWERON TBDB_COMMAND_POWERCYCLE
...@@ -511,6 +511,7 @@ sub TBDB_NODEOPMODE_DELAYING { "DELAYING"; } ...@@ -511,6 +511,7 @@ sub TBDB_NODEOPMODE_DELAYING { "DELAYING"; }
sub TBDB_NODEOPMODE_UNKNOWNOS { "UNKNOWNOS"; } sub TBDB_NODEOPMODE_UNKNOWNOS { "UNKNOWNOS"; }
sub TBDB_NODEOPMODE_RELOADING { "RELOADING"; } sub TBDB_NODEOPMODE_RELOADING { "RELOADING"; }
sub TBDB_NODEOPMODE_NORMALv1 { "NORMALv1"; } sub TBDB_NODEOPMODE_NORMALv1 { "NORMALv1"; }
sub TBDB_NODEOPMODE_NORMALv2 { "NORMALv2"; }
sub TBDB_NODEOPMODE_MINIMAL { "MINIMAL"; } sub TBDB_NODEOPMODE_MINIMAL { "MINIMAL"; }
sub TBDB_NODEOPMODE_PCVM { "PCVM"; } sub TBDB_NODEOPMODE_PCVM { "PCVM"; }
sub TBDB_NODEOPMODE_RELOAD { "RELOAD"; } sub TBDB_NODEOPMODE_RELOAD { "RELOAD"; }
......
...@@ -928,23 +928,28 @@ sub GetAllocState($$) ...@@ -928,23 +928,28 @@ sub GetAllocState($$)
# #
# We do this cause we always want to go to the DB. # We do this cause we always want to go to the DB.
# #
sub GetEventState($$) sub GetEventState($$;$)
{ {
my ($self, $pstate) = @_; my ($self, $pstate, $popmode) = @_;
my $node_id = $self->node_id(); my $node_id = $self->node_id();
my $query_result = my $query_result =
DBQueryWarn("select eventstate from nodes where node_id='$node_id'"); DBQueryWarn("select eventstate,op_mode from nodes ".
"where node_id='$node_id'");
return -1 return -1
if (!$query_result || !$query_result->numrows); if (!$query_result || !$query_result->numrows);
my ($state) = $query_result->fetchrow_array(); my ($state,$op_mode) = $query_result->fetchrow_array();
$state = TBDB_NODESTATE_UNKNOWN $state = TBDB_NODESTATE_UNKNOWN
if (!defined($state)); if (!defined($state));
$op_mode = TBDB_NODEOPMODE_UNKNOWN
if (!defined($op_mode));
$self->{'DBROW'}->{'eventstate'} = $state $self->{'DBROW'}->{'eventstate'} = $state
if (defined($self->{'DBROW'})); if (defined($self->{'DBROW'}));
$$pstate = $state; $$pstate = $state;
$$popmode= $op_mode
if (defined($popmode));
return 0; return 0;
} }
......
...@@ -1771,9 +1771,9 @@ sub WaitForNodes($$@) ...@@ -1771,9 +1771,9 @@ sub WaitForNodes($$@)
} }
$node->_waitstart($parent->_waitend()); $node->_waitstart($parent->_waitend());
} }
my $state; my ($state,$op_mode);
if ($node->GetEventState(\$state)) { if ($node->GetEventState(\$state, \$op_mode)) {
print STDERR "*** Error getting event state for $node_id.\n"; print STDERR "*** Error getting event state for $node_id.\n";
$node->_sliver()->SetStatus("failed") $node->_sliver()->SetStatus("failed")
if (defined($node->_sliver())); if (defined($node->_sliver()));
...@@ -1805,6 +1805,7 @@ sub WaitForNodes($$@) ...@@ -1805,6 +1805,7 @@ sub WaitForNodes($$@)
# XEN VMs, a reboot typically solves it. # XEN VMs, a reboot typically solves it.
# #
if ($node->isvirtnode() && if ($node->isvirtnode() &&
$op_mode eq TBDB_NODEOPMODE_NORMALv2 &&
$state eq TBDB_NODESTATE_VNODEBOOTSTART && $state eq TBDB_NODESTATE_VNODEBOOTSTART &&
$state eq $node->_laststate() && $state eq $node->_laststate() &&
time() - $node->_laststatestamp() > 180) { time() - $node->_laststatestamp() > 180) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment