Commit 29b820b1 authored by Leigh Stoller's avatar Leigh Stoller

Some cleanup on the batch mode stuff. Make it more explicit in the

showexp page that its a batch experiment, by the menu options. Same
deal in the swapexp output, plus some other minor cleanup. The only
bug I found while trying to figure out the batchmode problem reported
this morning by the FileMover people, is that the cancelflag is not
cleared after swaping a running batch experiment out, so even after
reinjecting it into the queue, it will not run. Still, that does seem
to be what the FileMover people reported.
parent 6a26e2e2
......@@ -64,8 +64,8 @@ use Exporter;
BATCHSTATE_POSTED BATCHSTATE_RUNNING BATCHSTATE_TERMINATING
BATCHSTATE_ACTIVATING BATCHSTATE_PAUSED
BATCHMODE_CANCELTERM BATCHMODE_CANCELSWAP
TBBatchState TBSetBatchState
BATCHMODE_CANCELTERM BATCHMODE_CANCELSWAP BATCHMODE_CANCELCLEAR
TBBatchState TBSetBatchState TBSetBatchCancelFlag
TB_NODELOGTYPE_MISC TB_NODELOGTYPES TB_DEFAULT_NODELOGTYPE
......@@ -284,6 +284,7 @@ sub BATCHSTATE_RUNNING() { "active"; }
sub BATCHSTATE_PAUSED() { "paused"; }
sub BATCHSTATE_TERMINATING() { "terminating"; }
# Cancel flags
sub BATCHMODE_CANCELCLEAR { 0 ;}
sub BATCHMODE_CANCELTERM { 1 ;}
sub BATCHMODE_CANCELSWAP { 2 ;}
......@@ -1337,7 +1338,7 @@ sub TBBatchState($$)
}
#
# Set BatctMode state.
# Set BatchMode state.
#
# usage: SetBatchState(char *pid, char *eid, char *state)
# returns 1 if okay.
......@@ -1358,6 +1359,28 @@ sub TBSetBatchState($$$)
return 1;
}
#
# Set BatchMode cancel flag,
#
# usage: SetBatchCancel(char *pid, char *eid, char *flag)
# returns 1 if okay.
# returns 0 if an invalid pid/eid or if an error.
#
sub TBSetBatchCancelFlag($$$)
{
my($pid, $eid, $flag) = @_;
my $query_result =
DBQueryWarn("update experiments set canceled='$flag' ".
"where eid='$eid' and pid='$pid'");
if (! $query_result ||
$query_result->numrows == 0) {
return 0;
}
return 1;
}
#
# Return a list of all the nodes in an experiment.
#
......
......@@ -482,8 +482,9 @@ sub swapexp($)
# Set the state to paused to ensure that it is not run again until
# the user wants to.
#
TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELCLEAR);
TBSetBatchState($pid, $eid, $BSTATE_PAUSED);
if ($canceled) {
email_status("Batch Mode experiment $pid/$eid has been stopped!");
}
......
......@@ -188,10 +188,7 @@ if ($isbatchexpt) {
# already be running, but we deal with that by looking at the
# batch state.
#
my $flag = BATCHMODE_CANCELTERM;
DBQueryFatal("UPDATE experiments set canceled=$flag ".
"WHERE eid='$eid' and pid='$pid'");
TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELTERM);
#
# If the state is POSTED or PAUSED, we can do it right away.
......
......@@ -342,7 +342,7 @@ my $message;
if ($frontend) {
$message =
"Your experiment `$eid' in project `$pid' is now preloaded.\n" .
"Your experiment `$eid' in project `$pid' has been created.\n" .
"You can check the web interface to see if it looks the way\n" .
"you expected it to. If so, you may swap the experiment in,\n" .
"or terminate it, at any time.\n" .
......@@ -350,7 +350,7 @@ if ($frontend) {
}
else {
$message =
"Your experiment `$eid' in project `$pid' is now configured.\n" .
"Your experiment `$eid' in project `$pid' has been started.\n" .
"Here is the experiment summary detailing the nodes that were\n" .
"allocated to you. You may use the `Qualified Name' to log on\n" .
"to your nodes. See /etc/hosts on your nodes (when running\n" .
......@@ -372,7 +372,7 @@ $message .=
"in your message to $TBOPS";
SENDMAIL("$user_name <$user_email>",
"New Experiment " . (($frontend == 0) ? "Created" : "Preloaded") .
"New Experiment " . (($frontend == 0) ? "Started" : "Created") .
": $pid/$eid",
$message,
"$user_name <$user_email>",
......
......@@ -17,7 +17,7 @@ use Getopt::Std;
sub usage()
{
print STDOUT "Usage: swapexp [-b] [-i | -a | -f] [-r] ".
"<-s in | out | restart | modify> <pid> <eid> [<nsfile>]\n";
"<-s in | out | restart | modify | pause> <pid> <eid> [<nsfile>]\n";
exit(-1);
}
my $optlist = "biafrs:";
......@@ -103,6 +103,7 @@ if (defined($options{"s"})) {
if ($inout ne "out" &&
$inout ne "in" &&
$inout ne "restart" &&
$inout ne "pause" &&
$inout ne "modify") {
usage();
}
......@@ -287,10 +288,14 @@ if ($isbatchexpt) {
" Currently $ebatchstate. Must be RUNNING to swap out.\n")
if ($ebatchstate ne BATCHSTATE_RUNNING);
my $flag = BATCHMODE_CANCELSWAP;
DBQueryFatal("UPDATE experiments set canceled=$flag ".
"WHERE eid='$eid' and pid='$pid'");
TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELSWAP);
}
elsif ($inout eq "pause") {
die("*** $0:\n".
" Batch experiment $pid/$eid is not in the proper state!\n".
" Currently $ebatchstate. Must be POSTED to pause.\n")
if ($ebatchstate ne BATCHSTATE_POSTED);
TBSetBatchState($pid, $eid, BATCHSTATE_PAUSED);
}
elsif ($inout eq "modify") {
die("*** $0:\n".
......
......@@ -34,6 +34,15 @@ define("TBDB_USERSTATUS_UNAPPROVED", "unapproved");
define("TBDB_USERSTATUS_UNVERIFIED", "unverified");
define("TBDB_USERSTATUS_FROZEN", "frozen");
#
# Batch experiment strings.
#
define("TBDB_BATCHSTATE_POSTED", "posted");
define("TBDB_BATCHSTATE_ACTIVATING", "activating");
define("TBDB_BATCHSTATE_RUNNING", "active");
define("TBDB_BATCHSTATE_PAUSED", "paused");
define("TBDB_BATCHSTATE_TERMINATING", "terminating");
#
# Trust. Define the trust level as an increasing value. Then define a
# function to return whether the given trust is high enough.
......@@ -1252,6 +1261,25 @@ function TBExptState($pid, $eid)
return $state;
}
function TBExptBatchState($pid, $eid, &$bstate)
{
$query_result =
DBQueryFatal("select batchmode,batchstate from experiments ".
"where eid='$eid' and pid='$pid'");
if (mysql_num_rows($query_result) == 0) {
return 0;
}
$row = mysql_fetch_array($query_result);
$mode = $row[batchmode];
$bstate = $row[batchstate];
if (!$mode) {
return 0;
}
return 1;
}
function TBExptIndex($pid, $eid)
{
$query_result =
......
......@@ -149,12 +149,12 @@ if ($retval < 0) {
#
echo "<br><br><h3>\n";
if ($retval) {
echo "Experiment `$exp_eid' in project `$exp_pid' has been terminated!
echo "Your experiment has been terminated!
<br><br>
You may now reuse the experiment name.\n";
}
else {
echo "Experiment `$exp_eid' in project `$exp_pid' is terminating!<br><br>
echo "Your experiment is terminating!<br><br>
You will be notified via email when the experiment has been torn
down, and you can reuse the experiment name.
This typically takes less than two minutes, depending on the
......
......@@ -50,6 +50,7 @@ if (! TBExptAccessCheck($uid, $exp_pid, $exp_eid, $TB_EXPT_READINFO)) {
$expindex = TBExptIndex($exp_pid, $exp_eid);
$expstate = TBExptState($exp_pid, $exp_eid);
$isbatch = TBExptBatchState($exp_pid, $exp_eid, $batchstate);
echo "<font size=+2>Experiment <b>".
"<a href='showproject.php3?pid=$pid'>$pid</a>/".
......@@ -61,7 +62,7 @@ SUBMENUSTART("Experiment Options");
if ($expstate) {
if (TBExptLogFile($exp_pid, $exp_eid)) {
WRITESUBMENUBUTTON("View Activation Logfile",
WRITESUBMENUBUTTON("View Activity Logfile",
"spewlogfile.php3?pid=$exp_pid&eid=$exp_eid");
}
......@@ -69,10 +70,6 @@ if ($expstate) {
WRITESUBMENUBUTTON("Visualization, NS File, Mapping",
"shownsfile.php3?pid=$exp_pid&eid=$exp_eid");
}
elseif (strcmp($expstate, $TB_EXPTSTATE_SWAPPED) == 0) {
WRITESUBMENUBUTTON("Visualization and NS File",
"shownsfile.php3?pid=$exp_pid&eid=$exp_eid");
}
else {
WRITESUBMENUBUTTON("Visualization and NS File",
"shownsfile.php3?pid=$exp_pid&eid=$exp_eid");
......@@ -81,22 +78,39 @@ if ($expstate) {
"spitnsdata.php3?pid=$exp_pid&eid=$exp_eid");
# Swap option.
if (strcmp($expstate, $TB_EXPTSTATE_SWAPPED) == 0) {
WRITESUBMENUBUTTON("Swap this Experiment In",
"swapexp.php3?inout=in&pid=$exp_pid&eid=$exp_eid");
if ($isbatch) {
if (strcmp($batchstate, TBDB_BATCHSTATE_PAUSED) == 0) {
WRITESUBMENUBUTTON("Queue Batch Experiment",
"swapexp.php3?inout=in&pid=$exp_pid&eid=$exp_eid");
}
elseif (strcmp($batchstate, TBDB_BATCHSTATE_RUNNING) == 0) {
WRITESUBMENUBUTTON("Stop Batch Experiment",
"swapexp.php3?inout=out&pid=$exp_pid&eid=$exp_eid");
}
elseif (strcmp($batchstate, TBDB_BATCHSTATE_POSTED) == 0) {
WRITESUBMENUBUTTON("Pause Batch Experiment",
"swapexp.php3?inout=pause&pid=$exp_pid&eid=$exp_eid");
}
}
elseif (strcmp($expstate, $TB_EXPTSTATE_ACTIVE) == 0) {
WRITESUBMENUBUTTON("Swap this Experiment Out",
"swapexp.php3?inout=out&pid=$exp_pid&eid=$exp_eid");
else {
if (strcmp($expstate, $TB_EXPTSTATE_SWAPPED) == 0) {
WRITESUBMENUBUTTON("Swap Experiment In",
"swapexp.php3?inout=in&pid=$exp_pid&eid=$exp_eid");
}
elseif (strcmp($expstate, $TB_EXPTSTATE_ACTIVE) == 0) {
WRITESUBMENUBUTTON("Swap Experiment Out",
"swapexp.php3?inout=out&pid=$exp_pid&eid=$exp_eid");
}
}
WRITESUBMENUBUTTON("Terminate Experiment",
"endexp.php3?pid=$exp_pid&eid=$exp_eid");
if (strcmp($expstate, $TB_EXPTSTATE_ACTIVE) == 0) {
WRITESUBMENUBUTTON("Modify Traffic Shaping",
"delaycontrol.php3?pid=$exp_pid&eid=$exp_eid");
}
}
WRITESUBMENUBUTTON("Terminate this Experiment",
"endexp.php3?pid=$exp_pid&eid=$exp_eid");
$editflip = ($edit ? 0 : 1);
WRITESUBMENUBUTTON("Edit Experiment Metadata",
"showexp.php3?pid=$exp_pid&eid=$exp_eid&edit=$editflip");
......@@ -113,9 +127,11 @@ if (TBExptAccessCheck($uid, $exp_pid, $exp_eid, $TB_EXPT_UPDATEACCOUNTS)) {
if (TBExptAccessCheck($uid, $exp_pid, $exp_eid, $TB_EXPT_MODIFY)) {
WRITESUBMENUBUTTON("Reboot All Nodes",
"boot.php3?pid=$exp_pid&eid=$exp_eid");
WRITESUBMENUBUTTON("Modify this Experiment",
"modifyexp.php3?pid=$exp_pid&eid=$exp_eid");
# Batch experiments can be modifed only when paused.
if (! ($isbatch && strcmp($batchstate, TBDB_BATCHSTATE_PAUSED))) {
WRITESUBMENUBUTTON("Modify Experiment",
"modifyexp.php3?pid=$exp_pid&eid=$exp_eid");
}
}
# History
......@@ -125,7 +141,7 @@ WRITESUBMENUBUTTON("Show History",
if (ISADMIN($uid)) {
if (strcmp($expstate, $TB_EXPTSTATE_ACTIVE) == 0) {
SUBMENUSECTION("Beta-Test Options");
WRITESUBMENUBUTTON("Restart this Experiment",
WRITESUBMENUBUTTON("Restart Experiment",
"swapexp.php3?inout=restart&pid=$exp_pid".
"&eid=$exp_eid");
......
......@@ -6,11 +6,6 @@
#
include("defs.php3");
#
# Standard Testbed Header
#
PAGEHEADER("Swap/Restart an Experiment");
#
# Only known and logged in users can end experiments.
#
......@@ -32,10 +27,21 @@ if (!isset($eid) ||
if (!isset($inout) ||
(strcmp($inout, "in") && strcmp($inout, "out") &&
strcmp($inout, "restart"))) {
strcmp($inout, "pause") && strcmp($inout, "restart"))) {
USERERROR("The argument must be either in, out, or restart!", 1);
}
# Canceled operation redirects back to showexp page. See below.
if ($canceled) {
header("Location: showexp.php3?pid=$pid&eid=$eid");
return;
}
#
# Standard Testbed Header, after cancel above.
#
PAGEHEADER("Swap Control");
#
# Only admins can issue a force swapout
#
......@@ -59,16 +65,6 @@ else {
$exp_eid = $eid;
$exp_pid = $pid;
if (!strcmp($inout, "in")) {
$action = "swapin";
}
elseif (!strcmp($inout, "out")) {
$action = "swapout";
}
elseif (!strcmp($inout, "restart")) {
$action = "restart";
}
#
# Check to make sure thats this is a valid PID/EID tuple.
#
......@@ -79,14 +75,13 @@ if (mysql_num_rows($query_result) == 0) {
USERERROR("The experiment $exp_eid is not a valid experiment ".
"in project $exp_pid.", 1);
}
$row = mysql_fetch_array($query_result);
$exp_gid = $row[gid];
$batch = $row[batchmode];
$swappable=$row[swappable];
$idleswap_bit=$row[idleswap];
$idleswap_time=$row[idleswap_timeout];
$idlethresh=min($idleswap_time/60.0,TBGetSiteVar("idle/threshold"));
$row = mysql_fetch_array($query_result);
$exp_gid = $row[gid];
$isbatch = $row[batchmode];
$swappable = $row[swappable];
$idleswap_bit = $row[idleswap];
$idleswap_time = $row[idleswap_timeout];
$idlethresh = min($idleswap_time/60.0,TBGetSiteVar("idle/threshold"));
#
# Look for transition in progress and exit with error.
......@@ -108,23 +103,34 @@ if (! TBExptAccessCheck($uid, $exp_pid, $exp_eid, $TB_EXPT_MODIFY)) {
USERERROR("You do not have permission for $exp_eid!", 1);
}
# Convert inout to informative text.
if (!strcmp($inout, "in")) {
if ($isbatch)
$action = "queue";
else
$action = "swapin";
}
elseif (!strcmp($inout, "out")) {
if ($isbatch)
$action = "swapout";
else
$action = "swapout";
}
elseif (!strcmp($inout, "pause")) {
if (!$isbatch)
USERERROR("Only batch experiments can be 'paused!'", 1);
$action = "pause";
}
elseif (!strcmp($inout, "restart")) {
$action = "restart";
}
#
# We run this twice. The first time we are checking for a confirmation
# by putting up a form. The next time through the confirmation will be
# set. Or, the user can hit the cancel button, in which case we should
# probably redirect the browser back up a level.
# set. Or, the user can hit the cancel button, in which case we
# redirect the browser back to the experiment page (see above).
#
if ($canceled) {
echo "<center><h2><br>
Experiment $action canceled <br> for experiment
<A href='showproject.php3?pid=$exp_pid'>$exp_pid</A>/<a
href='showexp.php3?pid=$exp_pid&eid=$exp_eid'>$exp_eid</a>!
</h2></center>\n";
PAGEFOOTER();
return;
}
if (!$confirmed) {
echo "<center><h2><br>
Are you sure you want to ";
......@@ -132,7 +138,7 @@ if (!$confirmed) {
echo "<font color=red><br>forcibly</br></font> ";
}
echo "$action ";
if ($batch) {
if ($isbatch) {
echo "batch mode ";
}
echo "experiment '$exp_eid?'
......@@ -191,14 +197,19 @@ if (!$confirmed) {
#
TBGroupUnixInfo($exp_pid, $exp_gid, $unix_gid, $unix_name);
echo "<font size=+2>Experiment <b>".
"<a href='showproject.php3?pid=$pid'>$pid</a>/".
"<a href='showexp.php3?pid=$pid&eid=$eid'>$eid</a></b></font>\n";
echo "<br><br>\n";
#
# We run a wrapper script that does all the work of terminating the
# experiment.
#
# tbstopit <pid> <eid>
#
echo "<center><br>";
echo "<h2>Starting experiment $action. Please wait a moment ...
echo "<center>";
echo "<h2>Starting experiment state change. Please wait a moment ...
</h2></center>";
flush();
......@@ -228,7 +239,7 @@ $result = exec("$TBSUEXEC_PATH $uid $unix_gid ".
if ($retval) {
echo "<br><br><h2>
$action failure($retval): Output as follows:
State change failure($retval): Output as follows:
</h2>
<br>
<XMP>\n";
......@@ -246,14 +257,29 @@ if ($retval) {
#
echo "<br><h3>\n";
if ($retval == 0) {
if ($batch &&
strcmp($inout, "in") == 0) {
echo "Batch Mode experiments will be run when enough resources
become available. This might happen immediately, or it
may take hours or days. You will be notified via email
when the experiment has been run. If you do not receive
email notification within a reasonable amount of time,
please contact $TBMAILADDR.\n";
if ($isbatch) {
if (strcmp($inout, "in") == 0) {
echo "Batch Mode experiments will be run when enough resources
become available. This might happen immediately, or it
may take hours or days. You will be notified via email
when the experiment has been run. In the meantime, you can
check the web page to see how many attempts have been made,
and when the last attempt was.\n";
}
elseif (strcmp($inout, "out") == 0) {
echo "Batch mode experiments take a few moments to stop. Once
it does, the experiment will enter the 'paused' state.
You can requeue the batch experiment at that time.\n";
echo "<br><br>
If you do not receive
email notification within a reasonable amount of time,
please contact $TBMAILADDR.\n";
}
elseif (strcmp($inout, "pause") == 0) {
echo "Your experiment has been paused. You may requeue your
experiment at any time.\n";
}
}
else {
if (strcmp($inout, "in") == 0)
......@@ -261,20 +287,17 @@ if ($retval == 0) {
else
$howlong = "less than two";
echo "Experiment
<a href='showexp.php3?pid=$exp_pid&eid=$exp_eid'>$exp_eid</a>
in project <A href='showproject.php3?pid=$exp_pid'>$exp_pid</A>
has started its $action.
<br><br>
You will be notified via email when the operation is complete.
This typically takes $howlong minutes, depending on the
number of nodes in the experiment.
If you do not receive email notification within a reasonable amount
of time, please contact $TBMAILADDR.
<br><br>
While you are waiting, you can watch the log
in <a target=_blank href=spewlogfile.php3?pid=$exp_pid&eid=$exp_eid>
realtime</a>.\n";
echo "Your experiment has started its $action.
You will be notified via email when the operation is complete.
This typically takes $howlong minutes, depending on the
number of nodes in the experiment.
<br><br>
If you do not receive email notification within a reasonable
amount of time, please contact $TBMAILADDR.
<br><br>
While you are waiting, you can watch the log in
<a target=_blank href=spewlogfile.php3?pid=$exp_pid&eid=$exp_eid>
realtime</a>.\n";
}
}
echo "</h3>\n";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment