Commit 92ff875a authored by Leigh Stoller's avatar Leigh Stoller

A set of changes to make swapmod work on jailed nodes (note, swapmod

does not yet work with remove virtual nodes; that will take even more
work).

Added a new allocstate called RES_TEARDOWN. assign_wrapper no longer
deallocates unused nodes, but rather moves them into the new state for
the wrapper (tbswap) to deal with. Thats cause deleted vnodes need to
be torn down, since its possible that the node on which they were
living will not be deallocated (say, if there are other vnodes on
it). We do not want to be doing that from assign_wrapper, so tbswap
looks for those nodes.

Made vnode_setup allocstate aware in the same way that os_setup is;
do not reboot vnodes or try to set up vnodes when they are already in
the RES_READY state, as they will be when doing a swapmod. In
addition, if os_setup is going to reboot the underlying physnode, move
the vnodes on that node into RES_READY too, since there they will
setup automatically. Might need an interim state here, for correctness.
parent ffcdfeca
......@@ -103,6 +103,7 @@ use Exporter;
TBDB_ALLOCSTATE_RES_INIT_DIRTY TBDB_ALLOCSTATE_RES_INIT_CLEAN
TBDB_ALLOCSTATE_RES_REBOOT_DIRTY TBDB_ALLOCSTATE_RES_REBOOT_CLEAN
TBDB_ALLOCSTATE_RES_READY TBDB_ALLOCSTATE_UNKNOWN
TBDB_ALLOCSTATE_RES_TEARDOWN
TBDB_STATS_PRELOAD TBDB_STATS_START TBDB_STATS_TERMINATE
TBDB_STATS_SWAPIN TBDB_STATS_SWAPOUT TBDB_STATS_SWAPMODIFY
......@@ -442,6 +443,7 @@ sub TBDB_ALLOCSTATE_RES_REBOOT_CLEAN() { "RES_REBOOT_CLEAN"; }
sub TBDB_ALLOCSTATE_RES_INIT_DIRTY() { "RES_INIT_DIRTY"; }
sub TBDB_ALLOCSTATE_RES_INIT_CLEAN() { "RES_INIT_CLEAN"; }
sub TBDB_ALLOCSTATE_RES_READY() { "RES_READY"; }
sub TBDB_ALLOCSTATE_RES_TEARDOWN() { "RES_TEARDOWN"; }
sub TBDB_ALLOCSTATE_UNKNOWN() { "UNKNOWN"; };
sub TBDB_TBCONTROL_RESET { "RESET"; }
......
This diff is collapsed.
......@@ -82,6 +82,7 @@ my @row;
#
my %reloads = ();
my %reboots = ();
my %willreboot = ();
my $doautoload = 1;
my $dolastload = 1;
......@@ -426,14 +427,26 @@ foreach my $vnode (keys(%vnodes)) {
if (!defined($pnodevcount{$pnode}));
$pnodevcount{$pnode}++;
$vnode2pnode{$vnode} = $pnode;
if (!defined($nodes{$pnode})) {
if (!exists($nodes{$pnode})) {
#
# Typical on remote nodes; we do not allocate the underlying
# phys node to the experiment.
#
next;
}
#
# Set the allocstate for the local vnode. Used to by vnode_setup to
# determine if a reboot is required. If the underlying physnode is
# going to be rebooted, move the node into the RES_READY state, since
# by definition, when the node reboots the vnode is going to get
# set up, and so vnode_setup should not do anything. Might need an
# intermediate state here, but not sure yet.
#
if (exists($reboots{$pnode})) {
TBSetNodeAllocState($vnode, TBDB_ALLOCSTATE_RES_READY());
}
# Nothing else to do for local jail nodes at this time ...
}
......@@ -446,6 +459,21 @@ if (!$TESTMODE) {
my $count = 0;
my $cmd;
foreach my $imageid ( keys(%reloads) ) {
my @list = @{ $reloads{$imageid} };
foreach my $node (@list) {
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_RELOAD() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_RELOAD();
# No point in rebooting, obviously.
delete $reboots{$node};
}
sleep(5);
$pids{"$os_load -m $imageid @list"} =
ForkCmd("$os_load -m $imageid @list");
}
if (keys(%reboots)) {
foreach my $node (keys(%reboots)) {
if ($nodeAllocStates{$node} eq TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
......@@ -461,19 +489,6 @@ if (!$TESTMODE) {
$pids{$cmd} = ForkCmd($cmd);
}
foreach my $imageid ( keys(%reloads) ) {
my @list = @{ $reloads{$imageid} };
foreach my $node (@list) {
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_RELOAD() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_RELOAD();
}
sleep(5);
$pids{"$os_load -m $imageid @list"} =
ForkCmd("$os_load -m $imageid @list");
}
foreach $cmd ( keys(%pids) ) {
my $pid = $pids{$cmd};
......@@ -637,10 +652,11 @@ elsif (@vnodelist) {
my $node = shift(@vnodelist);
my $pnode = $vnode2pnode{$node};
my $wstart = $waitstart{$node};
my $maxwait = 90 + (30 * $pnodevcount{$pnode});
my $maxwait = 90 + (3000 * $pnodevcount{$pnode});
if (!TBNodeStateWait($node, TBDB_NODESTATE_ISUP, $wstart, $maxwait)) {
print "$node is alive and well\n";
# Might have been set above.
TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_READY);
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
next;
......@@ -722,7 +738,6 @@ sub SetupReload($$$)
else {
push(@{ $reloads{$imageid} }, $node);
}
delete $reboots{$node};
}
else {
die_noretry("*** $0:\n".
......
......@@ -5,7 +5,6 @@
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
use English;
# Returns 0 on success.
......@@ -518,7 +517,7 @@ sub doSwapout($) {
"n.node_id = rv.node_id ".
"where rv.pid='$pid' and rv.eid='$eid'");
while (($node,$allocstate) = $db_result->fetchrow_array) {
while (my ($node,$allocstate) = $db_result->fetchrow_array) {
if ($allocstate ne TBDB_ALLOCSTATE_RES_READY()) {
push(@failedNodes, $node);
}
......@@ -642,6 +641,64 @@ sub doSwapin($) {
print "Mapped to physical reality!\n";
}
#
# Look for any nodes in RES_TEARDOWN. These need to be released,
# and if a virtnode, they need to be torn down. We cannot wait for
# the virtnodes to go down with the physnode they are hosted on,
# so teardown and release the virtnodes first, and then do the
# physnodes.
#
# Errors are fatal; no recovery or retry.
#
if ($type == UPDATE) {
my $allocstate = TBDB_ALLOCSTATE_RES_TEARDOWN();
$db_result =
DBQueryFatal("select r.node_id,nt.isvirtnode,nt.isremotenode ".
" from reserved as r ".
"left join nodes as n on n.node_id=r.node_id ".
"left join node_types as nt on nt.type=n.type ".
"where r.pid='$pid' and r.eid='$eid' and ".
" n.allocstate='$allocstate'");
if ($db_result->numrows) {
my @virtnodes = ();
my @physnodes = ();
print "Tearing down and releasing unused nodes\n";
# First teardown/release virtnodes.
while (my ($node,$isvirt,$isrem) = $db_result->fetchrow_array()) {
if ($isvirt) {
push(@virtnodes, $node);
}
elsif (!$isrem) {
push(@physnodes, $node);
}
}
if (@virtnodes) {
TBDebugTimeStamp("vnode_setup started");
if (system("vnode_setup -f -k $pid $eid @virtnodes")) {
print "Failed to tear down unused virtnodes!\n";
return 1;
}
TBDebugTimeStamp("vnode_setup finished");
if (system("nfree $pid $eid @virtnodes")) {
print "Failed to nfree unused virtnodes!\n";
return 1;
}
}
if (@physnodes) {
if (system("nfree $pid $eid @physnodes")) {
print "Failed to nfree unused physnodes!\n";
return 1;
}
}
}
}
# Exit here if we are testing.
if ($TESTMODE) {
print "Testing run - Stopping here.\n";
......@@ -688,7 +745,7 @@ sub doSwapin($) {
"n.node_id = rv.node_id ".
"where rv.pid='$pid' and rv.eid='$eid'");
while (($node) = $db_result->fetchrow_array) {
while (my ($node) = $db_result->fetchrow_array) {
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_INIT_DIRTY() );
}
}
......@@ -707,10 +764,11 @@ sub doSwapin($) {
}
#
# XXX: Don't add any steps between here and the waitpid() call below without
# verifying that 1) It's OK for nodes to come up before the step has
# completed and 2) It's OK for the command to run in parallel with os_setup
# (no DB dependencies, etc.)
# XXX
# Don't add any steps between here and the waitpid() call below
# without verifying that 1) It's OK for nodes to come up before
# the step has completed and 2) It's OK for the command to run in
# parallel with os_setup (no DB dependencies, etc.)
#
print "Setting up VLANs.\n";
......
......@@ -5,7 +5,6 @@
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
# All rights reserved.
#
use English;
use Getopt::Std;
......@@ -19,11 +18,20 @@ use Getopt::Std;
#
sub usage()
{
print STDOUT "Usage: vnode_setup [-f] [-k] <pid> <eid>\n";
print STDOUT "Usage: vnode_setup [-f] [-k] <pid> <eid> [node ...]\n";
exit(-1);
}
my $optlist = "fdk";
#
# We don't want to run this script unless its the real version.
# That is, it must be setuid root.
#
if ($EUID != 0) {
die("*** $0:\n".
" Must be root! Maybe its a development version?\n");
}
#
# Configure variables
#
......@@ -40,7 +48,6 @@ my $failed = 0;
my $killmode = 0;
my $mode = "setup";
my $dbuid;
my $exptstate;
#
# Load the Testbed support stuff.
......@@ -64,7 +71,7 @@ $| = 1;
if (! getopts($optlist, \%options)) {
usage();
}
if (@ARGV != 2) {
if (@ARGV < 2) {
usage();
}
if (defined($options{"f"})) {
......@@ -77,8 +84,8 @@ if (defined($options{"k"})) {
$killmode = 1;
$mode = "teardown";
}
my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $pid = shift(@ARGV);
my $eid = shift(@ARGV);
#
# Untaint the arguments.
......@@ -94,23 +101,6 @@ if ($eid =~ /^([-\@\w]+)$/) {
}
else {
die("*** Bad data in eid: $eid\n");
}
#
# We don't want to run this script unless its the real version.
# That is, it must be setuid root.
#
if ($EUID != 0) {
die("*** $0:\n".
" Must be root! Maybe its a development version?\n");
}
#
# Verify actual user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
die("*** $0:\n".
" You do not exist in the Emulab Database.\n");
}
#
......@@ -123,19 +113,46 @@ if (!TBAdmin($UID) &&
}
#
# Get the list of nodes.
# Get the list of nodes in this experiment.
#
my @nodes = ExpNodes($pid, $eid);
if (! @nodes) {
warn("*** $0:\n".
" No allocated nodes in experiment $pid/$eid!\n");
exit(0);
die("*** $0:\n".
" No allocated nodes in experiment $pid/$eid!\n");
}
# Nodes on the command line. Operate only on this set.
if (@ARGV) {
my %fulllist = ();
# Temporary hash list for searching.
foreach my $node ( @nodes ) {
$fulllist{$node} = 1;
}
@nodes = ();
foreach my $node ( @ARGV ) {
if ($node =~ /^([-\@\w]+)$/) {
$node = $1;
if (!defined($fulllist{$node})) {
die("*** $0:\n".
" Node $node is not allocated to $pid/$eid!\n");
}
}
else {
die("Bad node name: $node.");
}
push(@nodes, $node);
}
}
my $exptstate = ExpState($pid, $eid);
# Just the vnodes mam.
foreach my $node (@nodes) {
my $pnode;
my $jailed;
my $allocstate;
if (! TBIsNodeVirtual($node, \$jailed)) {
next;
......@@ -144,20 +161,34 @@ foreach my $node (@nodes) {
die("*** $0:\n".
" No physical node for $node!\n");
}
if (!TBIsNodeRemote($node) && !$force) {
#
# A local vnode. If the experiment is activating, then the
# underlying physnode will setup the vnode as it boots, so
# no need to do anything. Ditto for swapping out; the vnode
# will get torn down when the node goes down, obviously.
#
my $exptstate = ExpState($pid, $eid);
if ($exptstate eq EXPTSTATE_SWAPPING ||
$exptstate eq EXPTSTATE_ACTIVATING) {
print "$node will automatically $mode with local node $pnode\n";
#
# On remote nodes, or when forcemode is on, always do the deed.
# Otherwise, look at experiment state.
#
if (!TBIsNodeRemote($node) && !$force) {
if ($exptstate eq EXPTSTATE_SWAPPING) {
#
# When swapping, local vnodes go down with the physnode.
#
print "$node will $mode with local node $pnode\n";
next;
}
elsif ($exptstate eq EXPTSTATE_ACTIVATING) {
#
# The allocstate determines if the vnode actually needs to be
# setup. See os_setup and assign_wrapper; the idea is to
# avoid setting up vnodes on machines that are rebooting
# anyway. Complicated by modify which could add a vnode to an
# existing machine, but not reboot the machine.
#
TBGetNodeAllocState($node, \$allocstate);
if ($allocstate eq TBDB_ALLOCSTATE_RES_READY()) {
print "$node will $mode with local node $pnode\n";
next;
}
}
}
#
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment