Commit e928fbe9 authored by Chad Barb's avatar Chad Barb

Here it is; reswap.

nfree
   - modified to put node in FREE_DIRTY when it is freed

assign_wrapper
   - '-u' update switch added.

os_setup
   - doesn't reboot node which is already in RES_READY

tbswap
   - calls all this stuff appropriately
parent 069b7dd2
......@@ -275,10 +275,13 @@ foreach my $n (@freed_nodes) {
#}
#print "\n";
#}
my $allocFreeState = TBDB_ALLOCSTATE_FREE_DIRTY();
DBQueryWarn("update nodes set startupcmd='',rpms='',deltas='', ".
"tarballs='',failureaction='fatal', routertype='none', ".
"def_boot_cmd_line='',next_boot_cmd_line='', ".
"update_accounts=0,ipport_next=ipport_low ".
"update_accounts=0,ipport_next=ipport_low, ".
"allocstate='$allocFreeState' ".
"where node_id='$n'") || $error++;
# Clean out the current_reloads table (a just in case measure).
......
......@@ -70,21 +70,28 @@ $| = 1;
use Getopt::Std;
getopts('v',\%opt);
getopts('vu',\%opt);
sub usage {
print "Usage: $0 [-v] pid eid\n";
print " -v enables verbose output\n";
print " -u enables update functionality\n";
exit(-1);
}
my $verbose = 0;
my $updating = 0;
if ($opt{v}) {
$verbose = 1;
$verbose = 1;
}
if ($opt{u}) {
$updating = 1;
}
if (@ARGV != 2) {
usage();
usage();
}
($pid,$eid) = @ARGV;
......@@ -242,19 +249,30 @@ my $remotecount = 0;
my $virtcount = 0;
my $virtnode_id = 0;
my %alreadyAllocated = ();
printdb "Loading virt_nodes.\n";
$result =
DBQueryFatal("select distinct vname,ips,vn.type,fixed, ".
" nt.isremotenode,nt.isvirtnode ".
DBQueryFatal("select distinct vn.vname,vn.ips,vn.type,vn.fixed, ".
" nt.isremotenode,nt.isvirtnode,rv.node_id ".
" from virt_nodes as vn ".
"left join node_types as nt on ".
" nt.type=vn.type or nt.class=vn.type ".
"where pid='$pid' and eid='$eid'");
while (($vname,$ips,$type,$fixed,$isremote,$isvirt) = $result->fetchrow_array){
"left join reserved as rv on ".
" rv.pid = vn.pid and ".
" rv.eid = vn.eid and ".
" rv.vname = vn.vname ".
"where vn.pid='$pid' and vn.eid='$eid'");
while (($vname,$ips,$type,$fixed,$isremote,$isvirt,$reserved) =
$result->fetchrow_array){
if (defined($fixed) && $fixed eq "") {
undef($fixed);
}
if (defined($reserved) && $reserved eq "") {
undef($reserved);
}
# REMOTENODE HACK
#
......@@ -299,6 +317,12 @@ while (($vname,$ips,$type,$fixed,$isremote,$isvirt) = $result->fetchrow_array){
$isvirtnode{$vname} = $isvirt;
$virtcount++
if ($isvirt);
if ($updating && $reserved) {
$fixed = $reserved;
$alreadyAllocated{$reserved} = 1;
}
if (defined($fixed)) {
$fixed_nodes{$vname} = $fixed;
}
......@@ -787,7 +811,16 @@ while (1) {
TBDebugTimeStamp("ptopgen started");
# Snapshot
system("ptopgen > $ptopfile");
#
# if updating (-u), include any resources that may already be
# allocated to experiment in the PTOP results.
#
if ($updating) {
system("ptopgen -e $pid/$eid > $ptopfile");
} else {
system("ptopgen > $ptopfile");
}
TBDebugTimeStamp("ptopgen finished");
# Get number of nodes
......@@ -852,7 +885,10 @@ while (1) {
if (defined($lannodes{$virtual})) {
next;
}
$toreserve{$physical} = 1;
if (!$alreadyAllocated{$physical}) {
$toreserve{$physical} = 1;
}
if ($isvirtnode{$virtual}) {
#
......@@ -939,6 +975,12 @@ while (1) {
print "Failed to reserve nodes. Trying again.\n";
} else {
print "Successfully reserved physical nodes\n";
foreach $node (keys(%toreserve)) {
# in future, this will be a fully enforced state machine.
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_INIT_DIRTY() );
}
TBDebugTimeStamp("reserving finished");
last;
}
......
......@@ -151,8 +151,17 @@ while (my %row = $db_result->fetchhash()) {
$jailnode = 1;
}
else {
my $nodeAllocState;
TBGetNodeAllocState( $node, \$nodeAllocState );
$nodes{$node} = $node;
$reboots{$node} = 1;
$nodeAllocStates{$node} = $nodeAllocState;
# only reboot node if assign_wrapper just pulled it into expt.
# (e.g. it isnt ALLOCSTATE_RES_READY)
#if (($nodeAllocState eq TBDB_ALLOCSTATE_RES_INIT_DIRTY()) ||
# ($nodeAllocState eq TBDB_ALLOCSTATE_RES_INIT_CLEAN())) {
if ($nodeAllocState ne TBDB_ALLOCSTATE_RES_READY()) {
$reboots{$node} = 1;
}
}
$osids{$node} = $osid;
......@@ -409,6 +418,16 @@ if (!$TESTMODE) {
my $cmd;
if (keys(%reboots)) {
foreach my $node (keys(%reboots)) {
if ($nodeAllocStates{$node} eq TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_REBOOT_CLEAN() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_REBOOT_CLEAN();
} else {
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_REBOOT_DIRTY() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_REBOOT_DIRTY();
}
}
$cmd = "$nodereboot " . join(" ", keys(%reboots));
$pids{$cmd} = ForkCmd($cmd);
}
......@@ -416,6 +435,11 @@ if (!$TESTMODE) {
foreach my $imageid ( keys(%reloads) ) {
my @list = @{ $reloads{$imageid} };
foreach my $node (@list) {
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_RELOAD() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_RELOAD();
}
sleep(5);
$pids{"$os_load -m $imageid @list"} =
ForkCmd("$os_load -m $imageid @list");
......@@ -490,6 +514,8 @@ while ( @nodelist ) {
if (!TBNodeStateWait($node, TBDB_NODESTATE_ISUP, $wstart, (60*7))) {
print "$node is alive and well\n";
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_READY() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
next;
}
......@@ -535,6 +561,8 @@ while ( @nodelist ) {
# Reserve it to down experiment.
MarkNodeDown($node);
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_DOWN() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_DOWN();
# Send mail to testbed-ops about it
SENDMAIL($TBOPS, "Node $node is down",
......@@ -542,7 +570,7 @@ while ( @nodelist ) {
"$node has been taken out of the pool until this matter ".
"is resolved.\n");
print "*** Experiment will be terminated automatically.\n";
# print "*** Experiment will be terminated automatically.\n";
$failed++;
}
TBDebugTimeStamp("Local node waiting finished");
......
......@@ -357,6 +357,7 @@ sub doSwapout {
#
# We're not attempting a retry;
# remove all nodes from the experiment.
# (nfree will send them to RES_FREE_DIRTY)
#
print STDERR "Freeing nodes.\n";
TBDebugTimeStamp("nfree started");
......@@ -370,7 +371,7 @@ sub doSwapout {
# Since $retry == 1, we are preparing for an experiment retry.
# Therefore, don't deallocate nodes which have been successfully
# incorporated into the experiment (i.e., are RES_READY).
# Set these deallocated nodes to RES_FREE_DIRTY.
# (nfree will send deallocated nodes to RES_FREE_DIRTY)
#
my @failedNodes = ();
......@@ -390,14 +391,14 @@ sub doSwapout {
if (@failedNodes > 0) {
TBDebugTimeStamp("nfree started");
if (system("nfree $pid $eid " . join(" ", @failedNodes))) {
#
# Specify -x switch so when a physical node gets freed,
# any virtual nodes (owned by this experiment)
# sitting on top of it are freed as well.
#
if (system("nfree -x $pid $eid " . join(" ", @failedNodes))) {
print STDERR "*** Could not free nodes.\n";
$swapout_errors = 1;
} else {
# set nodes as free in ALLOC state machine.
foreach my $i (@failedNodes) {
TBSetNodeAllocState( $i, TBDB_ALLOCSTATE_FREE_DIRTY() );
}
}
TBDebugTimeStamp("nfree finished");
}
......@@ -473,8 +474,13 @@ sub doSwapin {
print "Mapping to physical reality ...\n";
TBDebugTimeStamp("assign_wrapper started");
#
# Pass the -u (update) switch into assign_wrapper,
# So any nodes already in the experiment are
# fixed.
#
my $exitcode;
if ($exitcode = system("assign_wrapper $pid $eid")) {
if ($exitcode = system("assign_wrapper -u $pid $eid")) {
print STDERR "*** Failed to map to reality.\n";
# Pass exit code through
return $exitcode >> 8;
......@@ -579,11 +585,8 @@ sub doSwapin {
# that it may be beneficial to attempt
# a doSwapin() again.
#
# Disabled for now, until appropriate changes
# are made in assign_wrapper and os_setup.
#
# $retry = 1;
$retry = 1;
return 1;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment