From da97ba3509bc824df80a823cf63680ef9b3e6861 Mon Sep 17 00:00:00 2001 From: "Leigh B. Stoller" Date: Thu, 22 May 2003 21:35:51 +0000 Subject: [PATCH] Reorg the batch system slightly as per Eric's request that batch mode experiments look more like regular experiments. Batch mode experiments can now be preloaded and swapped. When preloaded, they go into a "Pause" state. Swapping a batch mode experiment in puts them into the "posted" state so the batch daemon will see them. Swapping out a batchmode experiment does the expected; it puts them back into the Pause state. Terminating a batch mode experiment does the expected; its gone. When a batch mode experiment finishes normally, it goes back into the pause state, which allows batches to be reinjected as many times as Eric likes. --- db/libdb.pm.in | 7 ++++- tbsetup/batch_daemon.in | 44 +++++++++++++++++++---------- tbsetup/batchexp.in | 12 +++----- tbsetup/endexp.in | 45 +++++++++++++++++++++--------- tbsetup/swapexp.in | 62 +++++++++++++++++++++++++++++++++++------ www/beginexp.php3 | 40 ++++++++++---------------- www/swapexp.php3 | 31 +++++++++++++++------ 7 files changed, 162 insertions(+), 79 deletions(-) diff --git a/db/libdb.pm.in b/db/libdb.pm.in index 6408297b3..1ce176213 100644 --- a/db/libdb.pm.in +++ b/db/libdb.pm.in @@ -63,7 +63,8 @@ use Exporter; EXPTSTATE_TERMINATING EXPTSTATE_TERMINATED EXPTSTATE_UPDATING BATCHSTATE_POSTED BATCHSTATE_RUNNING BATCHSTATE_TERMINATING - BATCHSTATE_ACTIVATING + BATCHSTATE_ACTIVATING BATCHSTATE_PAUSED + BATCHMODE_CANCELTERM BATCHMODE_CANCELSWAP TBBatchState TBSetBatchState TB_NODELOGTYPE_MISC TB_NODELOGTYPES TB_DEFAULT_NODELOGTYPE @@ -261,7 +262,11 @@ sub EXPTSTATE_UPDATING() { "updating"; } sub BATCHSTATE_POSTED() { "posted"; } sub BATCHSTATE_ACTIVATING() { "activating"; } sub BATCHSTATE_RUNNING() { "active"; } +sub BATCHSTATE_PAUSED() { "paused"; } sub BATCHSTATE_TERMINATING() { "terminating"; } +# Cancel flags +sub BATCHMODE_CANCELTERM { 1 ;} +sub BATCHMODE_CANCELSWAP { 2 ;} sub USERSTATUS_ACTIVE() { "active"; } sub USERSTATUS_FROZEN() { "frozen"; } diff --git a/tbsetup/batch_daemon.in b/tbsetup/batch_daemon.in index b65925419..6f91439f4 100644 --- a/tbsetup/batch_daemon.in +++ b/tbsetup/batch_daemon.in @@ -65,6 +65,7 @@ my $BSTATE_POSTED = BATCHSTATE_POSTED; my $BSTATE_ACTIVATING = BATCHSTATE_ACTIVATING; my $BSTATE_RUNNING = BATCHSTATE_RUNNING; my $BSTATE_TERMINATING = BATCHSTATE_TERMINATING; +my $BSTATE_PAUSED = BATCHSTATE_PAUSED; # # These are valid in the children, not the parent. I suppose I could use @@ -220,11 +221,17 @@ while (1) { while (%row = $running_result->fetchhash()) { my $canceled = $row{'canceled'}; if ($canceled) { - dosomething("cancel", %row); + # Look at the cancel flag. + if ($canceled == BATCHMODE_CANCELTERM) { + dosomething("cancel", %row); + } + elsif ($canceled == BATCHMODE_CANCELSWAP) { + dosomething("swap", %row); + } next; } if (isexpdone(%row)) { - dosomething("end", %row); + dosomething("swap", %row); next; } } @@ -328,8 +335,8 @@ sub dosomething($$) if ($dowhat eq "start") { startexp(%exphash); } - elsif ($dowhat eq "end") { - endexp(%exphash); + elsif ($dowhat eq "swap") { + swapexp(%exphash); } elsif ($dowhat eq "cancel") { cancelexp(1, %exphash); @@ -449,30 +456,37 @@ sub startexp($) } # -# End an experiment. Never returns. +# A batch has completed. Swap it out. # -sub endexp($) +sub swapexp($) { my(%exphash) = @_; + my $canceled = $exphash{'canceled'}; # - # Save tiplogs - # - system("$savelogs $pid $eid"); - - # - # Have to set the state to terminating or else endexp will not accept it. + # Have to set the state to terminating or else swapexp will not accept it. # TBSetBatchState($pid, $eid, $BSTATE_TERMINATING); - system("$endexp -b $pid $eid"); + system("$swapexp -b -s out $pid $eid"); if ($?) { # # TB admin is going to have to clean up. # - fatal("Terminating Batch Mode experiment $pid/$eid"); + fatal("Swapping out Batch Mode experiment $pid/$eid"); + } + # + # Set the state to paused to ensure that it is not run again until + # the user wants to. + # + TBSetBatchState($pid, $eid, $BSTATE_PAUSED); + + if ($canceled) { + email_status("Batch Mode experiment $pid/$eid has been stopped!"); + } + else { + email_status("Batch Mode experiment $pid/$eid has finished!"); } - email_status("Batch Mode experiment $pid/$eid has finished!"); # # Child must exit! diff --git a/tbsetup/batchexp.in b/tbsetup/batchexp.in index 9bfc1511f..683ef5820 100755 --- a/tbsetup/batchexp.in +++ b/tbsetup/batchexp.in @@ -82,10 +82,6 @@ if (! UNIX2DBUID($UID, \$dbuid)) { # Parse command arguments. # ParseArgs(); -if (!$immediate) { - die("*** $0:\n". - " Batch system is disabled while we do some testing! Sorry.\n"); -} # # Sanity check them. @@ -248,7 +244,10 @@ else { if (system("$startexp -f -b -g $gid $pid $eid $nsfile")) { fatal("Failed to preload batch experiment $pid/$eid!"); } - TBSetBatchState($pid, $eid, BATCHSTATE_POSTED); + # And drop the batch into the queue unless the user was preloading + # a batch experiment. + TBSetBatchState($pid, $eid, + ($frontend ? BATCHSTATE_PAUSED : BATCHSTATE_POSTED)); } exit(0); @@ -311,9 +310,6 @@ sub ParseArgs() if (defined($options{"f"})) { $frontend = 1; } - if ($frontend && !$immediate) { - usage(); - } if (defined($options{"p"})) { $pid = $options{"p"}; diff --git a/tbsetup/endexp.in b/tbsetup/endexp.in index b6f5d4768..fb61d0159 100755 --- a/tbsetup/endexp.in +++ b/tbsetup/endexp.in @@ -171,25 +171,44 @@ if (defined($hashrow{'expt_locked'})) { # to determine when this was invoked from the batch daemon for a valid # teardown. # -if ($isbatchexpt && $ebatchstate ne BATCHSTATE_TERMINATING) { +if ($isbatchexpt) { # - # Set the canceled state. This will prevent the batch_daemon from trying - # to run it (once the table is unlocked). It might already be running, - # but we deal with that by looking at the batch state. + # Sanity Check. If called from the daemon, must be in the proper state. # - DBQueryFatal("UPDATE experiments set canceled=1 ". - "WHERE eid='$eid' and pid='$pid'"); - - if ($ebatchstate ne BATCHSTATE_POSTED) { + if ($batch) { + die("*** $0:\n". + " Batch experiment $pid/$eid is not in the correct state!\n". + " Currently $ebatchstate, but should be TERMINATING\n") + if ($ebatchstate ne BATCHSTATE_TERMINATING); + } + else { # - # Daemon does the rest ... Exit with non zero status so that caller - # knows (web server) that the batch experiment cannot be ended - # at this time. + # Set the canceled flag. This will prevent the batch_daemon + # from trying to run it (once the table is unlocked). It might + # already be running, but we deal with that by looking at the + # batch state. # - print "Batch Experiment $eid in project $pid is currently running.\n". + my $flag = BATCHMODE_CANCELTERM; + + DBQueryFatal("UPDATE experiments set canceled=$flag ". + "WHERE eid='$eid' and pid='$pid'"); + + # + # If the state is POSTED or PAUSED, we can do it right away. + # Otherwise, have to let the batch daemon deal with it. + # + if ($ebatchstate ne BATCHSTATE_POSTED && + $ebatchstate ne BATCHSTATE_PAUSED) { + # + # Exit with non zero status so that caller knows (web + # server) that the batch experiment cannot be ended at + # this time. + # + print "Batch experiment $pid/$eid is currently running.\n". "You will receive email notification when the experiment is\n". "torn down and you can reuse the experiment name\n"; - exit(0); + exit(0); + } } # # Let termination proceed normally. diff --git a/tbsetup/swapexp.in b/tbsetup/swapexp.in index 9363d4e35..dc5507078 100644 --- a/tbsetup/swapexp.in +++ b/tbsetup/swapexp.in @@ -97,10 +97,6 @@ if (defined($options{"s"})) { $inout ne "modify") { usage(); } - if ($batch && - ($inout ne "out" && $inout ne "in")) { - usage(); - } } else { usage(); @@ -210,12 +206,60 @@ if (defined($hashrow{'expt_locked'})) { } # -# Do not allow the user to swap a batch experiment. It has to come via -# the batch daemon. +# Batchmode. # -if ($isbatchexpt && !$batch) { - die("*** $0:\n". - " Batch experiments cannot be swapped or modified yet!"); +if ($isbatchexpt) { + # + # When coming from the daemon, sanity check the batch state. + # + if ($batch) { + if ($inout eq "in") { + die("*** $0:\n". + " Batch experiment $pid/$eid is not in the proper state!\n". + " Currently $ebatchstate, but should be ACTIVATING\n") + if ($ebatchstate ne BATCHSTATE_ACTIVATING); + } + elsif ($inout eq "out") { + die("*** $0:\n". + " Batch experiment $pid/$eid is not in the proper state!\n". + " Currently $ebatchstate, but should be TERMINATING\n") + if ($ebatchstate ne BATCHSTATE_TERMINATING); + } + else { + die("*** $0:\n". + " Improper request from batch daemon for $pid/$eid!\n"); + } + } + else { + # + # User is requesting that a batch either be injected or paused. + # Sanity check the state, but otherwise let the batch daemon + # handle it. + # + if ($inout eq "in") { + die("*** $0:\n". + " Batch experiment $pid/$eid is not in the proper state!\n". + " Currently $ebatchstate. Must be PAUSED to swap in.\n") + if ($ebatchstate ne BATCHSTATE_PAUSED); + TBSetBatchState($pid, $eid, BATCHSTATE_POSTED); + } + elsif ($inout eq "out") { + die("*** $0:\n". + " Batch experiment $pid/$eid is not in the proper state!\n". + " Currently $ebatchstate. Must be RUNNING to swap out.\n") + if ($ebatchstate ne BATCHSTATE_RUNNING); + + my $flag = BATCHMODE_CANCELSWAP; + + DBQueryFatal("UPDATE experiments set canceled=$flag ". + "WHERE eid='$eid' and pid='$pid'"); + } + else { + die("*** $0:\n", + " Batch experiments can only be swapped in or out!\n"); + } + exit(0); + } } # diff --git a/www/beginexp.php3 b/www/beginexp.php3 index 3ce34280d..d44adc93b 100644 --- a/www/beginexp.php3 +++ b/www/beginexp.php3 @@ -342,7 +342,7 @@ function SPITFORM($formfields, $errors)
  • Check this if you want to load the experiment, but not configure it (assign physical resources). You may swap in the experiment later, or terminate it without ever swapping - it. This option is not compatible with batch mode.\n"; + it.\n"; echo " \n"; @@ -486,16 +486,6 @@ if (isset($formfields[exp_priority]) && $errors["Priority"] = "Bad Value"; } -# -# Preload and Batch are mutually exclusive. -# -if (isset($formfields[exp_batched]) && - !strcmp($formfields[exp_batched], "Yep") && - isset($formfields[exp_preload]) && - !strcmp($formfields[exp_preload], "Yep")) { - $errors["Preload"] = "Cannot use with Batch Mode"; -} - # # If any errors, respit the form with the current values and the # error messages displayed. Iterate until happy. @@ -665,12 +655,11 @@ if (isset($formfields[exp_batched]) && else { $exp_batched = 0; $batcharg = "-i"; - - if (isset($formfields[exp_preload]) && - strcmp($formfields[exp_preload], "Yep") == 0) { - $exp_preload = 1; - $batcharg .= " -f"; - } +} +if (isset($formfields[exp_preload]) && + strcmp($formfields[exp_preload], "Yep") == 0) { + $exp_preload = 1; + $batcharg .= " -f"; } # @@ -810,14 +799,8 @@ echo " in project $exp_pid is configuring!

    \n"; -if ($exp_batched) { - echo "Batch Mode experiments will be run when enough resources become - available. This might happen immediately, or it may take hours - or days. You will be notified via email when the experiment has - been run. If you do not receive email notification within a - reasonable amount of time, please contact $TBMAILADDR.\n"; -} -elseif ($exp_preload) { + +if ($exp_preload) { echo "Since you are only pre-loading the experiment, this will typically take less than one minute. If you do not receive email notification within a reasonable amount of time, please contact $TBMAILADDR.
    @@ -826,6 +809,13 @@ elseif ($exp_preload) { in realtime.\n"; } +elseif ($exp_batched) { + echo "Batch Mode experiments will be run when enough resources become + available. This might happen immediately, or it may take hours + or days. You will be notified via email when the experiment has + been run. If you do not receive email notification within a + reasonable amount of time, please contact $TBMAILADDR.\n"; +} else { echo "You will be notified via email when the experiment has been fully configured and you are able to proceed. This typically takes less diff --git a/www/swapexp.php3 b/www/swapexp.php3 index f664e3abe..c3cbd51c1 100644 --- a/www/swapexp.php3 +++ b/www/swapexp.php3 @@ -120,9 +120,13 @@ if (!$confirmed) { echo "


    Are you sure you want to "; if ($force) { - echo "
    forcibly
    "; + echo "
    forcibly
    "; } - echo "$action experiment '$exp_eid?' + echo "$action "; + if ($batch) { + echo "batch mode "; + } + echo "experiment '$exp_eid?'

    \n"; echo "

    \n"; +echo "

    \n"; if ($retval == 0) { - if (strcmp($inout, "in") == 0) - $howlong = "two to ten"; - else - $howlong = "less than two"; + if ($batch && + strcmp($inout, "in") == 0) { + echo "Batch Mode experiments will be run when enough resources + become available. This might happen immediately, or it + may take hours or days. You will be notified via email + when the experiment has been run. If you do not receive + email notification within a reasonable amount of time, + please contact $TBMAILADDR.\n"; + } + else { + if (strcmp($inout, "in") == 0) + $howlong = "two to ten"; + else + $howlong = "less than two"; - echo "Experiment + echo "Experiment $exp_eid in project $exp_pid has started its $action. @@ -231,6 +245,7 @@ if ($retval == 0) { While you are waiting, you can watch the log in realtime.\n"; + } } echo "

    \n"; -- GitLab