Commit 0197f41d authored by Leigh B. Stoller's avatar Leigh B. Stoller

Some batch mode changes. In the early days we did not have such fancy

tb tools! I've changed the batch system to "preload" the experiment in
foreground mode (results of parse spit back to user directly). The
batch daemon now uses swapexp instead of startexp. Upon failure, the
experiment goes back to the "swapped" state; previously its virt state
was blasted, and rentered again next try. This is nice cause you can
actually look at the batch experiment (vis, virt tables, etc) while it
is posted and not running.

Not sure if all the Ts are crossed. Will find out ...
parent 3179793f
......@@ -53,8 +53,7 @@ use libtestbed;
$libdb::DBQUERY_MAXTRIES = 10;
my $tbbindir = "$TB/bin/";
my $batchdir = "$TB/batch";
my $startexp = "$TB/bin/startexp";
my $swapexp = "$TB/bin/swapexp";
my $endexp = "$TB/bin/endexp";
my $savelogs = "$TB/bin/savelogs";
my $avail = "$TB/sbin/avail";
......@@ -349,9 +348,9 @@ sub startexp($)
my $attempts = $exphash{'attempts'};
#
# Try to start the experiment.
# Try to swap the experiment in.
#
system("$startexp -b -g $gid $pid $eid $nsfile");
system("$swapexp -b -s in $pid $eid");
$exit_status = $? >> 8;
$running = 1;
if ($exit_status) {
......@@ -429,7 +428,7 @@ sub startexp($)
# There is some state that needs to be reset so that another
# attempt can be made.
#
SetExpState($pid, $eid, EXPTSTATE_NEW);
SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
TBSetBatchState($pid, $eid, $BSTATE_POSTED);
exit($exit_status);
......@@ -491,17 +490,12 @@ sub cancelexp($$)
TBSetBatchState($pid, $eid, $BSTATE_TERMINATING);
if ($running) {
system("$endexp -b $pid $eid");
if ($?) {
#
# TB admin is going to have to clean up.
#
fatal("Terminating Batch Mode experiment $pid/$eid");
}
}
else {
TBExptDestroy($pid, $eid);
system("$endexp -b $pid $eid");
if ($?) {
#
# TB admin is going to have to clean up.
#
fatal("Terminating Batch Mode experiment $pid/$eid");
}
donotify("Your Batch Mode experiment has been canceled!", "Canceled", 0);
......
......@@ -224,11 +224,17 @@ if ($immediate) {
if (system("$startexp $farg -g $gid $pid $eid $nsfile")) {
fatal("Failed to start experiment $pid/$eid!");
}
exit(0);
}
if (! TBSetBatchState($pid, $eid, BATCHSTATE_POSTED)) {
fatal("DB Error in batch system insertion!");
else {
#
# Preload the experiment in the foreground. User sees parse errors
# right away, and the experiment is now in the system so we can look
# at it.
#
if (system("$startexp -f -b -g $gid $pid $eid $nsfile")) {
fatal("Failed to preload batch experiment $pid/$eid!");
}
TBSetBatchState($pid, $eid, BATCHSTATE_POSTED);
}
exit(0);
......
......@@ -183,7 +183,7 @@ if ($isbatchexpt && $ebatchstate ne BATCHSTATE_TERMINATING) {
if ($ebatchstate ne BATCHSTATE_POSTED) {
#
# Daemon does the rest ... Exit with non zero status so that caller
# knows (web server) that the batch experiement cannot be ended
# knows (web server) that the batch experiment cannot be ended
# at this time.
#
print "Batch Experiment $eid in project $pid is currently running.\n".
......@@ -191,14 +191,9 @@ if ($isbatchexpt && $ebatchstate ne BATCHSTATE_TERMINATING) {
"torn down and you can reuse the experiment name\n";
exit(0);
}
DBQueryFatal("unlock tables");
#
# Cleanup Experiment state.
# Let termination proceed normally.
#
TBExptDestroy($pid, $eid);
exit(1);
}
#
......
......@@ -21,7 +21,7 @@ use Getopt::Std;
sub usage()
{
print STDOUT
"Usage: startexp [-b | -f] [-g gid] <pid> <eid> <nsfile>\n";
"Usage: startexp [-f [-b]] [-g gid] <pid> <eid> <nsfile>\n";
exit(-1);
}
my $optlist = "bg:f";
......@@ -94,10 +94,9 @@ if (defined($options{"b"})) {
if (defined($options{"f"})) {
$frontend = 1;
}
if ($batch && $frontend) {
if ($batch && !$frontend) {
usage();
}
if (defined($options{"g"})) {
$gid = $options{"g"};
}
......@@ -281,14 +280,6 @@ else {
GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_START, 0);
}
#
# In batchmode, send the report to stdout for the batch daemon.
#
if ($batch) {
system("$tbdir/tbreport -b $pid $eid");
print STDOUT "\n\n";
}
# Yippie!
print STDOUT "Setup Success\n";
......@@ -379,6 +370,13 @@ sub fatal()
GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_START, $errorstat);
}
#
# Must clean state.
#
if ($estate ne EXPTSTATE_NEW) {
tbendit();
}
#
# In batch mode, exit. Must unlock the experiment since the record
# is kept by the batch system until it is finished or canceled.
......@@ -389,13 +387,6 @@ sub fatal()
exit($errorstat);
}
#
# If we got far enough to allocate nodes, must run tbend.
#
if ($estate ne EXPTSTATE_NEW) {
tbendit();
}
#
# Okay, we *are* going to terminate the experiment.
#
......
......@@ -16,11 +16,11 @@ use Getopt::Std;
sub usage()
{
print STDOUT "Usage: swapexp [-i] [-r] <-s in | out | restart | modify> " .
"<pid> <eid> [<nsfile>]\n";
print STDOUT "Usage: swapexp [-b] [-i] [-r] ".
"<-s in | out | restart | modify> <pid> <eid> [<nsfile>]\n";
exit(-1);
}
my $optlist = "s:ir";
my $optlist = "s:irb";
#
# Configure variables
......@@ -82,6 +82,9 @@ if (! getopts($optlist, \%options)) {
if (defined($options{"i"})) {
$idleswap = 1;
}
if (defined($options{"b"})) {
$batch = 1;
}
if (defined($options{"r"})) {
$reboot = 1;
}
......@@ -94,6 +97,10 @@ if (defined($options{"s"})) {
$inout ne "modify") {
usage();
}
if ($batch &&
($inout ne "out" && $inout ne "in")) {
usage();
}
}
else {
usage();
......@@ -203,9 +210,10 @@ if (defined($hashrow{'expt_locked'})) {
}
#
# Disallow batch experiment swaps for now.
# Do not allow the user to swap a batch experiment. It has to come via
# the batch daemon.
#
if ($isbatchexpt) {
if ($isbatchexpt && !$batch) {
die("*** $0:\n".
" Batch experiments cannot be swapped or modified yet!");
}
......@@ -557,7 +565,7 @@ sub fatal($)
#
if ($batch) {
TBUnLockExp($pid, $eid);
exit(-1);
exit($errorstat);
}
#
......@@ -591,7 +599,7 @@ sub fatal($)
TBExptDestroy($pid, $eid);
}
exit(-1);
exit($errorstat);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment