Commit af9b08f3 authored by Leigh B Stoller's avatar Leigh B Stoller

Add new state for XEN guest boot; send a VNODEBOOTSTART just before we do

the xl create, so that we can watch for VMs that do not get to TBSETUP in a
reasonable amount of time (which means it hung and we need to restart it).
parent b5695436
...@@ -2294,9 +2294,20 @@ sub vnodeBoot($$$$) ...@@ -2294,9 +2294,20 @@ sub vnodeBoot($$$$)
captureStart($vnode_id); captureStart($vnode_id);
} }
# notify stated that we are about to boot # notify stated that we are about to boot. We need this transition for
# stated to do its thing, this state name is treated specially.
libutil::setState("BOOTING"); libutil::setState("BOOTING");
#
# But, we find ourselves stuck in BOOTING quite often if the VM
# fails to boot far enough to to send in a state transition. We want
# to catch this specific hangup, so we will send an intermediate
# state that the server side can notice, and watch for how long it
# stays in the state.
#
sleep(1);
libutil::setState("VNODEBOOTSTART");
# #
# We are going to watch for a busted control network interface, which # We are going to watch for a busted control network interface, which
# happens a lot. There is a problem with the control vif not working, # happens a lot. There is a problem with the control vif not working,
......
...@@ -130,6 +130,7 @@ use vars qw(@ISA @EXPORT); ...@@ -130,6 +130,7 @@ use vars qw(@ISA @EXPORT);
TBDB_NODESTATE_MFSSETUP TBDB_NODESTATE_TBFAILED TBDB_NODESTATE_MFSSETUP TBDB_NODESTATE_TBFAILED
TBDB_NODESTATE_POWEROFF TBDB_NODESTATE_SECVIOLATION TBDB_NODESTATE_POWEROFF TBDB_NODESTATE_SECVIOLATION
TBDB_NODESTATE_GPXEBOOTING TBDB_NODESTATE_TPMSIGNOFF TBDB_NODESTATE_GPXEBOOTING TBDB_NODESTATE_TPMSIGNOFF
TBDB_NODESTATE_VNODEBOOTSTART
TBDB_NODEOPMODE_NORMAL TBDB_NODEOPMODE_DELAYING TBDB_NODEOPMODE_NORMAL TBDB_NODEOPMODE_DELAYING
TBDB_NODEOPMODE_UNKNOWNOS TBDB_NODEOPMODE_RELOADING TBDB_NODEOPMODE_UNKNOWNOS TBDB_NODEOPMODE_RELOADING
...@@ -502,6 +503,7 @@ sub TBDB_NODESTATE_GPXEBOOTING(){ "GPXEBOOTING"; } ...@@ -502,6 +503,7 @@ sub TBDB_NODESTATE_GPXEBOOTING(){ "GPXEBOOTING"; }
sub TBDB_NODESTATE_TPMSIGNOFF() { "TPMSIGNOFF"; } sub TBDB_NODESTATE_TPMSIGNOFF() { "TPMSIGNOFF"; }
sub TBDB_NODESTATE_SECVIOLATION(){ "SECVIOLATION"; } sub TBDB_NODESTATE_SECVIOLATION(){ "SECVIOLATION"; }
sub TBDB_NODESTATE_MFSBOOTING() { "MFSBOOTING"; } sub TBDB_NODESTATE_MFSBOOTING() { "MFSBOOTING"; }
sub TBDB_NODESTATE_VNODEBOOTSTART() { "VNODEBOOTSTART"; }
sub TBDB_NODEOPMODE_ANY { "*"; } # A wildcard opmode sub TBDB_NODEOPMODE_ANY { "*"; } # A wildcard opmode
sub TBDB_NODEOPMODE_NORMAL { "NORMAL"; } sub TBDB_NODEOPMODE_NORMAL { "NORMAL"; }
......
...@@ -1801,11 +1801,11 @@ sub WaitForNodes($$@) ...@@ -1801,11 +1801,11 @@ sub WaitForNodes($$@)
next; next;
} }
# #
# Watch for a node stuck in BOOTING; this happens a lot with # Watch for a node stuck in VNODEBOOTSTART; this happens a lot with
# XEN VMs, a reboot typically solves it. # XEN VMs, a reboot typically solves it.
# #
if ($node->isvirtnode() && if ($node->isvirtnode() &&
$state eq TBDB_NODESTATE_BOOTING && $state eq TBDB_NODESTATE_VNODEBOOTSTART &&
$state eq $node->_laststate() && $state eq $node->_laststate() &&
time() - $node->_laststatestamp() > 180) { time() - $node->_laststatestamp() > 180) {
my $giveup = $node->_retried(); my $giveup = $node->_retried();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment