Commit da97ba35 authored by Leigh Stoller's avatar Leigh Stoller

Reorg the batch system slightly as per Eric's request that batch mode

experiments look more like regular experiments. Batch mode experiments
can now be preloaded and swapped. When preloaded, they go into a
"Pause" state. Swapping a batch mode experiment in puts them into the
"posted" state so the batch daemon will see them. Swapping out a
batchmode experiment does the expected; it puts them back into the
Pause state. Terminating a batch mode experiment does the expected;
its gone. When a batch mode experiment finishes normally, it goes back
into the pause state, which allows batches to be reinjected as many
times as Eric likes.
parent 821dcf4a
......@@ -63,7 +63,8 @@ use Exporter;
EXPTSTATE_TERMINATING EXPTSTATE_TERMINATED EXPTSTATE_UPDATING
BATCHSTATE_POSTED BATCHSTATE_RUNNING BATCHSTATE_TERMINATING
BATCHSTATE_ACTIVATING
BATCHSTATE_ACTIVATING BATCHSTATE_PAUSED
BATCHMODE_CANCELTERM BATCHMODE_CANCELSWAP
TBBatchState TBSetBatchState
TB_NODELOGTYPE_MISC TB_NODELOGTYPES TB_DEFAULT_NODELOGTYPE
......@@ -261,7 +262,11 @@ sub EXPTSTATE_UPDATING() { "updating"; }
sub BATCHSTATE_POSTED() { "posted"; }
sub BATCHSTATE_ACTIVATING() { "activating"; }
sub BATCHSTATE_RUNNING() { "active"; }
sub BATCHSTATE_PAUSED() { "paused"; }
sub BATCHSTATE_TERMINATING() { "terminating"; }
# Cancel flags
sub BATCHMODE_CANCELTERM { 1 ;}
sub BATCHMODE_CANCELSWAP { 2 ;}
sub USERSTATUS_ACTIVE() { "active"; }
sub USERSTATUS_FROZEN() { "frozen"; }
......
......@@ -65,6 +65,7 @@ my $BSTATE_POSTED = BATCHSTATE_POSTED;
my $BSTATE_ACTIVATING = BATCHSTATE_ACTIVATING;
my $BSTATE_RUNNING = BATCHSTATE_RUNNING;
my $BSTATE_TERMINATING = BATCHSTATE_TERMINATING;
my $BSTATE_PAUSED = BATCHSTATE_PAUSED;
#
# These are valid in the children, not the parent. I suppose I could use
......@@ -220,11 +221,17 @@ while (1) {
while (%row = $running_result->fetchhash()) {
my $canceled = $row{'canceled'};
if ($canceled) {
dosomething("cancel", %row);
# Look at the cancel flag.
if ($canceled == BATCHMODE_CANCELTERM) {
dosomething("cancel", %row);
}
elsif ($canceled == BATCHMODE_CANCELSWAP) {
dosomething("swap", %row);
}
next;
}
if (isexpdone(%row)) {
dosomething("end", %row);
dosomething("swap", %row);
next;
}
}
......@@ -328,8 +335,8 @@ sub dosomething($$)
if ($dowhat eq "start") {
startexp(%exphash);
}
elsif ($dowhat eq "end") {
endexp(%exphash);
elsif ($dowhat eq "swap") {
swapexp(%exphash);
}
elsif ($dowhat eq "cancel") {
cancelexp(1, %exphash);
......@@ -449,30 +456,37 @@ sub startexp($)
}
#
# End an experiment. Never returns.
# A batch has completed. Swap it out.
#
sub endexp($)
sub swapexp($)
{
my(%exphash) = @_;
my $canceled = $exphash{'canceled'};
#
# Save tiplogs
#
system("$savelogs $pid $eid");
#
# Have to set the state to terminating or else endexp will not accept it.
# Have to set the state to terminating or else swapexp will not accept it.
#
TBSetBatchState($pid, $eid, $BSTATE_TERMINATING);
system("$endexp -b $pid $eid");
system("$swapexp -b -s out $pid $eid");
if ($?) {
#
# TB admin is going to have to clean up.
#
fatal("Terminating Batch Mode experiment $pid/$eid");
fatal("Swapping out Batch Mode experiment $pid/$eid");
}
#
# Set the state to paused to ensure that it is not run again until
# the user wants to.
#
TBSetBatchState($pid, $eid, $BSTATE_PAUSED);
if ($canceled) {
email_status("Batch Mode experiment $pid/$eid has been stopped!");
}
else {
email_status("Batch Mode experiment $pid/$eid has finished!");
}
email_status("Batch Mode experiment $pid/$eid has finished!");
#
# Child must exit!
......
......@@ -82,10 +82,6 @@ if (! UNIX2DBUID($UID, \$dbuid)) {
# Parse command arguments.
#
ParseArgs();
if (!$immediate) {
die("*** $0:\n".
" Batch system is disabled while we do some testing! Sorry.\n");
}
#
# Sanity check them.
......@@ -248,7 +244,10 @@ else {
if (system("$startexp -f -b -g $gid $pid $eid $nsfile")) {
fatal("Failed to preload batch experiment $pid/$eid!");
}
TBSetBatchState($pid, $eid, BATCHSTATE_POSTED);
# And drop the batch into the queue unless the user was preloading
# a batch experiment.
TBSetBatchState($pid, $eid,
($frontend ? BATCHSTATE_PAUSED : BATCHSTATE_POSTED));
}
exit(0);
......@@ -311,9 +310,6 @@ sub ParseArgs()
if (defined($options{"f"})) {
$frontend = 1;
}
if ($frontend && !$immediate) {
usage();
}
if (defined($options{"p"})) {
$pid = $options{"p"};
......
......@@ -171,25 +171,44 @@ if (defined($hashrow{'expt_locked'})) {
# to determine when this was invoked from the batch daemon for a valid
# teardown.
#
if ($isbatchexpt && $ebatchstate ne BATCHSTATE_TERMINATING) {
if ($isbatchexpt) {
#
# Set the canceled state. This will prevent the batch_daemon from trying
# to run it (once the table is unlocked). It might already be running,
# but we deal with that by looking at the batch state.
# Sanity Check. If called from the daemon, must be in the proper state.
#
DBQueryFatal("UPDATE experiments set canceled=1 ".
"WHERE eid='$eid' and pid='$pid'");
if ($ebatchstate ne BATCHSTATE_POSTED) {
if ($batch) {
die("*** $0:\n".
" Batch experiment $pid/$eid is not in the correct state!\n".
" Currently $ebatchstate, but should be TERMINATING\n")
if ($ebatchstate ne BATCHSTATE_TERMINATING);
}
else {
#
# Daemon does the rest ... Exit with non zero status so that caller
# knows (web server) that the batch experiment cannot be ended
# at this time.
# Set the canceled flag. This will prevent the batch_daemon
# from trying to run it (once the table is unlocked). It might
# already be running, but we deal with that by looking at the
# batch state.
#
print "Batch Experiment $eid in project $pid is currently running.\n".
my $flag = BATCHMODE_CANCELTERM;
DBQueryFatal("UPDATE experiments set canceled=$flag ".
"WHERE eid='$eid' and pid='$pid'");
#
# If the state is POSTED or PAUSED, we can do it right away.
# Otherwise, have to let the batch daemon deal with it.
#
if ($ebatchstate ne BATCHSTATE_POSTED &&
$ebatchstate ne BATCHSTATE_PAUSED) {
#
# Exit with non zero status so that caller knows (web
# server) that the batch experiment cannot be ended at
# this time.
#
print "Batch experiment $pid/$eid is currently running.\n".
"You will receive email notification when the experiment is\n".
"torn down and you can reuse the experiment name\n";
exit(0);
exit(0);
}
}
#
# Let termination proceed normally.
......
......@@ -97,10 +97,6 @@ if (defined($options{"s"})) {
$inout ne "modify") {
usage();
}
if ($batch &&
($inout ne "out" && $inout ne "in")) {
usage();
}
}
else {
usage();
......@@ -210,12 +206,60 @@ if (defined($hashrow{'expt_locked'})) {
}
#
# Do not allow the user to swap a batch experiment. It has to come via
# the batch daemon.
# Batchmode.
#
if ($isbatchexpt && !$batch) {
die("*** $0:\n".
" Batch experiments cannot be swapped or modified yet!");
if ($isbatchexpt) {
#
# When coming from the daemon, sanity check the batch state.
#
if ($batch) {
if ($inout eq "in") {
die("*** $0:\n".
" Batch experiment $pid/$eid is not in the proper state!\n".
" Currently $ebatchstate, but should be ACTIVATING\n")
if ($ebatchstate ne BATCHSTATE_ACTIVATING);
}
elsif ($inout eq "out") {
die("*** $0:\n".
" Batch experiment $pid/$eid is not in the proper state!\n".
" Currently $ebatchstate, but should be TERMINATING\n")
if ($ebatchstate ne BATCHSTATE_TERMINATING);
}
else {
die("*** $0:\n".
" Improper request from batch daemon for $pid/$eid!\n");
}
}
else {
#
# User is requesting that a batch either be injected or paused.
# Sanity check the state, but otherwise let the batch daemon
# handle it.
#
if ($inout eq "in") {
die("*** $0:\n".
" Batch experiment $pid/$eid is not in the proper state!\n".
" Currently $ebatchstate. Must be PAUSED to swap in.\n")
if ($ebatchstate ne BATCHSTATE_PAUSED);
TBSetBatchState($pid, $eid, BATCHSTATE_POSTED);
}
elsif ($inout eq "out") {
die("*** $0:\n".
" Batch experiment $pid/$eid is not in the proper state!\n".
" Currently $ebatchstate. Must be RUNNING to swap out.\n")
if ($ebatchstate ne BATCHSTATE_RUNNING);
my $flag = BATCHMODE_CANCELSWAP;
DBQueryFatal("UPDATE experiments set canceled=$flag ".
"WHERE eid='$eid' and pid='$pid'");
}
else {
die("*** $0:\n",
" Batch experiments can only be swapped in or out!\n");
}
exit(0);
}
}
#
......
......@@ -342,7 +342,7 @@ function SPITFORM($formfields, $errors)
<li>Check this if you want to load the experiment, but not
configure it (assign physical resources). You may swap in the
experiment later, or terminate it without ever swapping
it. This option is not compatible with batch mode.\n";
it.\n";
echo "</ol>
</blockquote></blockquote></blockquote>\n";
......@@ -486,16 +486,6 @@ if (isset($formfields[exp_priority]) &&
$errors["Priority"] = "Bad Value";
}
#
# Preload and Batch are mutually exclusive.
#
if (isset($formfields[exp_batched]) &&
!strcmp($formfields[exp_batched], "Yep") &&
isset($formfields[exp_preload]) &&
!strcmp($formfields[exp_preload], "Yep")) {
$errors["Preload"] = "Cannot use with Batch Mode";
}
#
# If any errors, respit the form with the current values and the
# error messages displayed. Iterate until happy.
......@@ -665,12 +655,11 @@ if (isset($formfields[exp_batched]) &&
else {
$exp_batched = 0;
$batcharg = "-i";
if (isset($formfields[exp_preload]) &&
strcmp($formfields[exp_preload], "Yep") == 0) {
$exp_preload = 1;
$batcharg .= " -f";
}
}
if (isset($formfields[exp_preload]) &&
strcmp($formfields[exp_preload], "Yep") == 0) {
$exp_preload = 1;
$batcharg .= " -f";
}
#
......@@ -810,14 +799,8 @@ echo "<font size=+1>
in project <A href='showproject.php3?pid=$exp_pid'>$exp_pid</A>
is configuring!<br><br>\n";
if ($exp_batched) {
echo "Batch Mode experiments will be run when enough resources become
available. This might happen immediately, or it may take hours
or days. You will be notified via email when the experiment has
been run. If you do not receive email notification within a
reasonable amount of time, please contact $TBMAILADDR.\n";
}
elseif ($exp_preload) {
if ($exp_preload) {
echo "Since you are only pre-loading the experiment, this will typically
take less than one minute. If you do not receive email notification
within a reasonable amount of time, please contact $TBMAILADDR.<br>
......@@ -826,6 +809,13 @@ elseif ($exp_preload) {
in <a target=_blank href=spewlogfile.php3?pid=$exp_pid&eid=$exp_id>
realtime</a>.\n";
}
elseif ($exp_batched) {
echo "Batch Mode experiments will be run when enough resources become
available. This might happen immediately, or it may take hours
or days. You will be notified via email when the experiment has
been run. If you do not receive email notification within a
reasonable amount of time, please contact $TBMAILADDR.\n";
}
else {
echo "You will be notified via email when the experiment has been fully
configured and you are able to proceed. This typically takes less
......
......@@ -120,9 +120,13 @@ if (!$confirmed) {
echo "<center><h2><br>
Are you sure you want to ";
if ($force) {
echo "<font color=red><br>forcibly</br></font> ";
echo "<font color=red><br>forcibly</br></font> ";
}
echo "$action experiment '$exp_eid?'
echo "$action ";
if ($batch) {
echo "batch mode ";
}
echo "experiment '$exp_eid?'
</h2>\n";
echo "<form action='swapexp.php3?inout=$inout&pid=$exp_pid&eid=$exp_eid'
......@@ -210,14 +214,24 @@ if ($retval) {
#
# Exit status 0 means the experiment is swapping, or will be.
#
echo "<br><br><h3>\n";
echo "<br><h3>\n";
if ($retval == 0) {
if (strcmp($inout, "in") == 0)
$howlong = "two to ten";
else
$howlong = "less than two";
if ($batch &&
strcmp($inout, "in") == 0) {
echo "Batch Mode experiments will be run when enough resources
become available. This might happen immediately, or it
may take hours or days. You will be notified via email
when the experiment has been run. If you do not receive
email notification within a reasonable amount of time,
please contact $TBMAILADDR.\n";
}
else {
if (strcmp($inout, "in") == 0)
$howlong = "two to ten";
else
$howlong = "less than two";
echo "Experiment
echo "Experiment
<a href='showexp.php3?pid=$exp_pid&eid=$exp_eid'>$exp_eid</a>
in project <A href='showproject.php3?pid=$exp_pid'>$exp_pid</A>
has started its $action.
......@@ -231,6 +245,7 @@ if ($retval == 0) {
While you are waiting, you can watch the log
in <a target=_blank href=spewlogfile.php3?pid=$exp_pid&eid=$exp_eid>
realtime</a>.\n";
}
}
echo "</h3>\n";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment