Commit e96ba6fa authored by Leigh B Stoller's avatar Leigh B Stoller

Cleanup the code that determines if an OS is loaded, and the next os

resolution. Less confusing now.

Ongoing changes to make better use of the node objects and methods.
parent dacad593
#!/usr/bin/perl -wT
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
......@@ -55,8 +54,10 @@ use libArchive;
use Template;
use NodeType;
use Experiment;
use Image;
use OSinfo;
use User;
use Node;
if ($PGENISUPPORT) {
require libGeni;
}
......@@ -64,7 +65,6 @@ if ($PGENISUPPORT) {
TBDebugTimeStampsOn();
my $vnode_setup = "$TB/sbin/vnode_setup";
my $osselect = "$TB/bin/os_select";
my $nodereboot = "$TB/bin/node_reboot";
my $elab_setup = "$TB/sbin/elabinelab";
my $dbg = 0;
......@@ -74,6 +74,7 @@ my $failedvnodes= 0;
my $failedplab = 0;
my $failedgeni = 0;
my $canceled = 0;
my %nodeobjs = ();
my %nodes = ();
my %vnodes = ();
my %sharednodes = ();
......@@ -256,6 +257,12 @@ while (my %row = $db_result->fetchhash()) {
my $bootpath = 0;
my $osinfo = undef;
my $nodeobj = Node->Lookup($node);
if (!defined($nodeobj)) {
die_noretry("Cannot lookup object for $node!");
}
$nodeobjs{$node} = $nodeobj;
if ($isgeninode) {
#
# Geni nodes are currently a lot like plab nodes, but that will
......@@ -287,7 +294,7 @@ while (my %row = $db_result->fetchhash()) {
}
else {
my $nodeAllocState;
TBGetNodeAllocState( $node, \$nodeAllocState );
$nodeobj->GetAllocState(\$nodeAllocState);
$nodes{$node} = $node;
$nodeAllocStates{$node} = $nodeAllocState;
if ($nodeAllocState eq TBDB_ALLOCSTATE_RES_RECONFIG()) {
......@@ -412,103 +419,62 @@ while (my %row = $db_result->fetchhash()) {
if (! $osinfo) {
die_noretry("$node has no bootpath and no def_boot_osid set!");
}
#
# If there is an actual path, its an OSKit kernel not an image.
#
if (! defined($osinfo->path()) || $osinfo->path() eq "") {
my $nextosinfo;
#
# Not an OSKit kernel.
# Make sure this OSID is actually loaded on the machine.
#
my $p_result =
DBQueryFatal("select * from partitions ".
"where node_id='$node' and osid='$osid'".
"order by partition");
if ($osinfo->IsGeneric()) {
#
# Map generic OSID to the specific one.
#
$nextosinfo = $osinfo->ResolveNextOSID($experiment);
if (!defined($nextosinfo)) {
die_noretry("No next mapping for $osinfo on $node!\n");
}
print "Mapping $osinfo on $node to $nextosinfo\n";
$osinfo = $nextosinfo;
}
#
# If not loaded, then see if the user was looking for the generic
# name of the OS that is loaded.
# Make sure this OSID is actually loaded on the machine.
#
if ($p_result->numrows == 0) {
my $isloaded = $nodeobj->IsOSLoaded($osinfo);
if ($isloaded < 0) {
die_noretry("Error determining if $osinfo ".
"is loaded on $node\n");
}
if ($isloaded) {
#
# Check to see if a non specific version specified.
# OSID is loaded, but might need to be cleaned.
#
if (!defined($osinfo->version()) || $osinfo->version() eq "") {
#
# A non-specific version. There needs to be a way to
# map it to another osid.
#
if (! defined($osinfo->nextosid())) {
die_noretry("No mapping for $osinfo ($node)!");
}
my $nextosid = TBResolveNextOSID($osid, $pid, $eid);
if (!defined($nextosid)) {
die_noretry("No mapping for $osinfo ($node)!");
}
$nextosinfo = OSinfo->Lookup($nextosid);
die_noretry("Could not map $osid to its object!")
if (!defined($nextosinfo));
#
# See if the nextosid is already on the disk. If not,
# it needs to be loaded.
#
my $o_result =
DBQueryFatal("select osid from partitions as p ".
"where p.node_id='$node' and ".
" p.osid='$nextosid'");
if (! $o_result->numrows) {
#
# User wants a specific version of an OS, but its not
# loaded on the machine.
#
print "Mapping $osinfo on $node to $nextosinfo ".
"and setting up a reload.\n";
SetupReload($node, $nextosinfo, $type);
$osids{$node} = $nextosid;
$osmap{$node} = $nextosinfo;
}
else {
#
# Already loaded.
#
print "Mapping $osinfo on $node to $nextosinfo.\n";
if ($dolastload &&
defined($row{'pid'}) && $row{'pid'} ne $pid) {
SetupReload($node, $nextosinfo, $type);
}
else {
system("$osselect $nextosid $node") and
die_noretry("Could not set boot OS to ".
"$nextosid for $node");
}
$osids{$node} = $nextosid;
$osmap{$node} = $nextosinfo;
}
if ($dolastload &&
defined($row{'pid'}) && $row{'pid'} ne $pid) {
SetupReload($node, $osinfo, $type);
}
else {
elsif ($nextosinfo) {
#
# User wants a specific version of an OS, but its not
# loaded on the machine.
# Seems like a bad place for this; if the OS was
# mapped to something else that is already on the
# disk, need to reset def_boot_osid.
#
SetupReload($node, $osinfo, $type);
if ($nodeobj->OSSelect($osinfo, "def_boot_osid", 0)) {
die_noretry("Could not set boot OS to ".
"$osinfo for $node");
}
}
}
else {
#
# OSID is loaded, but might need to be cleaned.
# OS not loaded.
#
if ($dolastload &&
defined($row{'pid'}) && $row{'pid'} ne $pid) {
SetupReload($node, $osinfo, $type);
}
SetupReload($node, $osinfo, $type);
}
$osmap{$node} = $osinfo;
$osids{$node} = $osinfo->osid();
}
}
......@@ -523,7 +489,7 @@ while (my %row = $db_result->fetchhash()) {
#
$osid = $osids{$node};
if (!exists($reboot_waittime{$osid})) {
$reboot_waittime{$osid} = TBOSIDRebootWaittime($osid);
$reboot_waittime{$osid} = $osmap{$node}->reboot_waittime();
}
print STDERR "$node - $osmap{$node} - $canfail{$node}\n"
......@@ -545,18 +511,15 @@ while (my %row = $db_result->fetchhash()) {
# Collect some info about vnodes.
#
foreach my $vnode (keys(%vnodes)) {
my $jailed = $vnodes{$vnode};
my $pnode;
my $nodeobj = $nodeobjs{$vnode};
my $jailed = $vnodes{$vnode};
# print "$vnode, $jailed\n";
if (! $jailed) {
next;
}
if (! TBPhysNodeID($vnode, \$pnode)) {
die_noretry("Cannot determine phys_nodeid for $vnode!");
}
my $pnode = $nodeobj->phys_nodeid();
# print "$vnode, $jailed, $pnode\n";
......@@ -569,16 +532,11 @@ foreach my $vnode (keys(%vnodes)) {
if (!defined($pnodevcount{$pnode}));
$pnodevcount{$pnode}++;
$vnode2pnode{$vnode} = $pnode;
if (!exists($nodes{$pnode})) {
#
# Typical on remote nodes; we do not allocate the underlying
# phys node to the experiment.
#
next;
my $pnodeobj = Node->Lookup($pnode);
if (!defined($pnodeobj)) {
die_noretry("Cannot lookup object for $pnode!");
}
# Nothing else to do for local jail nodes at this time ...
$nodeobjs{$pnode} = $pnodeobj;
}
#
......@@ -600,6 +558,7 @@ foreach my $vnode (keys(%vnodes)) {
#
if ($firewalled) {
my $node = $firewall;
my $nodeobj = $nodeobjs{$node};
TBDebugTimeStamp("rebooting/reloading firewall");
if (!FirewallSetup($node)) {
......@@ -612,8 +571,8 @@ if ($firewalled) {
# We assume that firewall node images are "standard" here,
# and whine to tbops.
#
Node::MarkAsDown($node);
TBSetNodeLogEntry($node, $user_uid, TB_DEFAULT_NODELOGTYPE(),
$nodeobj->MarkAsDown();
$nodeobj->InsertNodeLogEntry($this_user, TB_DEFAULT_NODELOGTYPE(),
"'Moved to hwdown by os_setup; ".
"failed to boot image for osid " . $osmap{$node} .
" in $pid/$eid'");
......@@ -634,7 +593,7 @@ if ($firewalled) {
# Check for cancelation. Firewall setup may have taken awhile.
#
if (!$canceled) {
TBGetCancelFlag($pid, $eid, \$canceled);
$canceled = $experiment->canceled();
if ($canceled) {
tbnotice({cause => 'canceled', severity => SEV_IMMEDIATE,
error => ['cancel_flag']},
......@@ -676,7 +635,7 @@ if ($plabinelab) {
# Check for cancelation. PLC setup may have taken awhile.
#
if (!$canceled) {
TBGetCancelFlag($pid, $eid, \$canceled);
$canceled = $experiment->canceled();
if ($canceled) {
tbnotice({cause => 'canceled', severity => SEV_IMMEDIATE,
error => ['cancel_flag']},
......@@ -744,6 +703,8 @@ if (!$TESTMODE) {
my %nodeflags = ();
foreach my $node (@nodelist) {
my $nodeobj = $nodeobjs{$node};
#
# vnodes only get rebooted if this is a modify and we need to
# reload them (otherwise they will get rebooted because of presence
......@@ -751,7 +712,7 @@ if (!$TESTMODE) {
#
if (defined($vnodes{$node})) {
my $vstate;
TBGetNodeAllocState($node,\$vstate);
$nodeobj->GetAllocState(\$vstate);
if ($vstate eq TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
$nodeflags{$node}{'noreboot'} = 1;
}
......@@ -767,7 +728,7 @@ if (!$TESTMODE) {
$nodeflags{$node}{'nowait'} = 1;
}
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_RELOAD() );
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_RELOAD());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_RELOAD();
# No point in reboot/reconfig obviously, since node will reboot!
delete $reboots{$node};
......@@ -795,11 +756,13 @@ if (!$TESTMODE) {
#
if (keys(%reboots)) {
foreach my $node (keys(%reboots)) {
my $nodeobj = $nodeobjs{$node};
if ($nodeAllocStates{$node} eq TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_REBOOT_CLEAN());
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_REBOOT_CLEAN());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_REBOOT_CLEAN();
} else {
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_REBOOT_DIRTY());
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_REBOOT_DIRTY());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_REBOOT_DIRTY();
}
# See below, needed for vnode_setup.
......@@ -888,7 +851,7 @@ if (!$TESTMODE) {
add_failed_node_reload($node);
delete($nodes{$node});
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_DOWN());
$nodeobjs{$node}->SetAllocState(TBDB_ALLOCSTATE_DOWN());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_DOWN();
}
}
......@@ -905,10 +868,10 @@ if ($plabinelab) {
foreach my $node (@plabnodes) {
if (exists($nodes{$node})) {
tbnotice "Not waiting for emulated plab node $node";
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY());
$nodeobjs{$node}->SetBootStatus(NODEBOOTSTATUS_OKAY);
$nodeobjs{$node}->SetAllocState(TBDB_ALLOCSTATE_RES_READY());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
TBSetNodeEventState($node, TBDB_NODESTATE_ISUP());
$nodeobjs{$node}->SetEventState(TBDB_NODESTATE_ISUP());
delete($nodes{$node});
}
}
......@@ -949,8 +912,9 @@ my @informtbopsfatal = ();
TBDebugTimeStamp("Local node waiting started");
while ( @nodelist ) {
my $node = shift(@nodelist);
my $wstart = $waitstart{$node};
my $node = shift(@nodelist);
my $nodeobj = $nodeobjs{$node};
my $wstart = $waitstart{$node};
my $actual_state;
my $waittime = (60 * 7); # The default.
......@@ -969,8 +933,8 @@ while ( @nodelist ) {
goto tbfailed;
}
print "$node is alive and well\n";
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_READY() );
$nodeobj->SetBootStatus(NODEBOOTSTATUS_OKAY);
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_READY());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
next;
}
......@@ -980,8 +944,7 @@ while ( @nodelist ) {
# swap was canceled.
#
if (!$canceled) {
TBGetCancelFlag($pid, $eid, \$canceled);
$canceled = $experiment->canceled();
if ($canceled) {
tbnotice({cause => 'canceled', severity => SEV_IMMEDIATE,
error => ['cancel_flag']},
......@@ -1005,7 +968,7 @@ while ( @nodelist ) {
tbwarn "$node may be down. This has been reported to testbed-ops.";
tbfailed:
Node::SetBootStatus($node, NODEBOOTSTATUS_FAILED);
$nodeobj->SetBootStatus(NODEBOOTSTATUS_FAILED);
if ($canfail{$node} && !($canceled || $noretry)) {
push(@informuser, $node);
......@@ -1021,12 +984,11 @@ while ( @nodelist ) {
# and allow it to be returned to the pool (caller, tbswap will end
# doing the nfree on nodes with a DOWN allocstate).
#
my $pidofosid;
my $pidofosid = $osmap{$node}->pid();
if (!exists($geninodes{$node}) &&
(! TBOsidToPid($osids{$node}, \$pidofosid) ||
$pidofosid eq TBOPSPID())) {
Node::MarkAsIll($node);
TBSetNodeLogEntry($node, $user_uid, TB_DEFAULT_NODELOGTYPE(),
(! defined($pidofosid) || $pidofosid eq TBOPSPID())) {
$nodeobj->MarkAsIll();
$nodeobj->InsertNodeLogEntry($this_user, TB_DEFAULT_NODELOGTYPE(),
"'Moved to hwcheckup by os_setup; ".
"failed to boot image for osid " . $osmap{$node} .
" in $pid/$eid'");
......@@ -1034,7 +996,7 @@ while ( @nodelist ) {
} else {
push(@informtbopswarn, $node);
}
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_DOWN() );
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_DOWN());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_DOWN();
$failed++;
......@@ -1119,7 +1081,7 @@ foreach my $vnode (@vnodelist) {
if (!exists($rebooted{$pnode}) && !exists($reconfigs{$pnode}));
if ($nodeAllocStates{$pnode} eq TBDB_ALLOCSTATE_RES_READY()) {
TBSetNodeAllocState($vnode, TBDB_ALLOCSTATE_RES_READY());
$nodeobjs{$vnode}->SetAllocState(TBDB_ALLOCSTATE_RES_READY());
$nodeAllocStates{$vnode} = TBDB_ALLOCSTATE_RES_READY();
}
}
......@@ -1179,10 +1141,12 @@ elsif (@vnodelist) {
@vnodelist = sort(@vnodelist);
while ( @vnodelist ) {
my $node = shift(@vnodelist);
my $pnode = $vnode2pnode{$node};
my $islocal= exists($nodes{$pnode});
my $wstart = $waitstart{$node};
my $node = shift(@vnodelist);
my $nodeobj = $nodeobjs{$node};
my $pnode = $vnode2pnode{$node};
my $pnodeobj= $nodeobjs{$pnode};
my $islocal = exists($nodes{$pnode});
my $wstart = $waitstart{$node};
my $curallocstate;
my $actual_state;
my $maxwait;
......@@ -1211,7 +1175,7 @@ elsif (@vnodelist) {
$maxwait = $reboot_time + 60 * $pnodevcount{$pnode};
}
TBGetNodeAllocState($node, \$curallocstate);
$nodeobj->GetAllocState(\$curallocstate);
#
# See if vnode_setup already determined the node was dead.
......@@ -1231,15 +1195,15 @@ elsif (@vnodelist) {
TBDebugTimeStamp("Virtual node $node setup ISUP");
# Might have already been set above.
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY);
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_READY);
$nodeobj->SetBootStatus(NODEBOOTSTATUS_OKAY);
next;
}
vtbfailed:
TBDebugTimeStamp("Virtual node $node setup FAILED");
Node::SetBootStatus($node, NODEBOOTSTATUS_FAILED);
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_DOWN());
$nodeobj->SetBootStatus(NODEBOOTSTATUS_FAILED);
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_DOWN());
#
# If a local node, lets retry since jail setup appears to be
......@@ -1266,8 +1230,8 @@ elsif (@vnodelist) {
#
# XXX Need to deal with the same pnode being used twice.
#
MarkPhysNodeDown($pnode);
TBSetNodeLogEntry($pnode, $user_uid, TB_DEFAULT_NODELOGTYPE(),
$pnodeobj->MarkAsDown();
$pnodeobj->InsertNodeLogEntry($this_user, TB_DEFAULT_NODELOGTYPE(),
"'Moved to hwdown; ".
"$node ($pid/$eid) failed to setup'");
}
......@@ -1298,7 +1262,7 @@ elsif (@vnodelist) {
if (@retry_list) {
# Check cancel first.
if (!$canceled) {
TBGetCancelFlag($pid, $eid, \$canceled);
$canceled = $experiment->canceled();
if ($canceled) {
tbnotice({cause => 'canceled', severity => SEV_IMMEDIATE,
......@@ -1308,7 +1272,7 @@ elsif (@vnodelist) {
else {
# Mark each node so that vnode_setup will retry.
foreach my $node (@retry_list) {
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_INIT_DIRTY());
$nodeobjs{$node}->SetAllocState(TBDB_ALLOCSTATE_RES_INIT_DIRTY());
}
@vnodelist = @retry_list;
@retry_list = ();
......@@ -1657,23 +1621,6 @@ TBDebugTimeStamp("os_setup finished");
exit($exit_code);
#
# Map an OSID to an image for a node type.
#
sub TBMapOSIDtoImageID($$)
{
my ($osid, $type) = @_;
my $query_result =
DBQueryFatal("select imageid from osidtoimageid ".
"where type='$type' and osid='$osid'");
if ($query_result->numrows == 0) {
return 0;
}
return Image->Lookup($query_result->fetchrow_array());
}
#
# Setup a reload of a node if we can find an image.
# This goo constructs a hashed array of lists.
......@@ -1694,7 +1641,7 @@ sub SetupReload($$$)
$type = "pcvm";
}
if ((my $image = TBMapOSIDtoImageID($osinfo->osid(), $type))) {
if ((my $image = $osinfo->MapToImage($type))) {
# XXX firewall is treated special
if ($firewalled && ($node eq $firewall)) {
$firewallimage = $image;
......@@ -1774,6 +1721,7 @@ sub FirewallSetup($)
sub os_setup_one($$$;$)
{
my ($node,$image,$msgstr,$reboot_waittime) = @_;
my $nodeobj = $nodeobjs{$node};
#
# XXX this is probably not entirely right.
......@@ -1789,7 +1737,7 @@ sub os_setup_one($$$;$)
delete $reboots{$node};
delete $reconfigs{$node};
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_RELOAD());
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_RELOAD());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_RELOAD();
my @nodelist = ($node);
......@@ -1825,8 +1773,8 @@ sub os_setup_one($$$;$)
return 0;
}
print "$node is alive and well\n";
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY());
$nodeobj->SetBootStatus(NODEBOOTSTATUS_OKAY);
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_READY());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
} else {
tbwarn "$msgstr $node reload timed-out";
......@@ -1841,10 +1789,10 @@ sub os_setup_one($$$;$)
delete $reboots{$node};
if ($nodeAllocStates{$node} eq TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_REBOOT_CLEAN());
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_REBOOT_CLEAN());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_REBOOT_CLEAN();
} else {
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_REBOOT_DIRTY());
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_REBOOT_DIRTY());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_REBOOT_DIRTY();
}
......@@ -1861,8 +1809,8 @@ sub os_setup_one($$$;$)
return 0;
}
print "$node is alive and well\n";
Node::SetBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY());
$nodeobj->SetBootStatus(NODEBOOTSTATUS_OKAY);
$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_READY());
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment