#!/usr/bin/perl -wT use English; use Getopt::Std; # # Create a batch experiment. # # usage: batch_daemon # # TODO: Use "logger" instead of writing a log file. # sub usage() { print STDOUT "Usage: batch_daemon [-d]\n" . "Use the -d option to prevent daemonization\n"; exit(-1); } my $optlist = "d"; # # Configure variables # my $TB = "@prefix@"; my $DBNAME = "@TBDBNAME@"; my $TBOPS = "@TBOPSEMAIL@"; my $TBLOGS = "@TBLOGSEMAIL@"; # # Testbed Support libraries # use lib "@prefix@/lib"; use libdb; use libtestbed; # # Ug, exit value from startexp when not enough nodes. # my $TOOFEWNODES = 2; my $tbbindir = "$TB/bin/"; my $batchdir = "$TB/batch"; my $startexp = "$TB/bin/startexp"; my $endexp = "$TB/bin/endexp"; my $savelogs = "$TB/bin/savelogs"; my $avail = "$TB/sbin/avail"; my $batchlog = "$TB/log/batchlog"; my $projroot = "/proj"; my $debug = 0; my $dirname; my $BSTATE_POSTED = BATCHSTATE_POSTED; my $BSTATE_ACTIVATING = BATCHSTATE_ACTIVATING; my $BSTATE_RUNNING = BATCHSTATE_RUNNING; my $BSTATE_TERMINATING = BATCHSTATE_TERMINATING; # # These are valid in the children, not the parent. I suppose I could use # dynamically scoped variables, but hardly worth it. # my $eid; my $pid; my $gid; my $logname; my $nsfile; my $user_name = "Testbed Operations"; my $user_email = "$TBOPS"; # # Turn off line buffering on output # $| = 1; # # Untaint the path # $ENV{'PATH'} = "/bin:/usr/bin:"; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; # # Parse command arguments. Once we return from getopts, all that should be # left are the required arguments. # %options = (); if (! getopts($optlist, \%options)) { usage(); } if (@ARGV != 0) { usage(); } if (defined($options{"d"})) { $debug = $options{"d"}; } # Go to ground. if (! $debug) { if (TBBackGround($batchlog)) { exit(0); } } # # Loop, looking for batch experiments that want to run. # while (1) { my($count, $i, $query_result, $pending_result, $running_result); my(%row, %pending_row); # # Need to lock the table here because of cancelation in endexp. # See the comments in there. We need to atomically grab the next # batch experiment we want to try, and then change its state from # new to configuring. We want to grab just one experiment, since # it takes a while to configure an experiment, and grabbing a bunch and # locking them up might result in having to wait a really long time # to cancel a batch experiment that hasn't really tried to start yet! # Thats would ne annoying to users. # # So, now you're wondering what my selection criteria is? Well, its # damn simplistic. I set the "started" datetime field each attempt, # and I pick the batch_experiment with the oldest time, thereby cycling # through in a "least recently attempted" manner. # $query_result = DBQuery("lock tables experiments write"); if (! $query_result) { print "DB Error locking tables. Waiting a bit ...\n"; sleep(10); next; } $pending_result = DBQueryWarn("SELECT * FROM experiments ". "WHERE batchmode=1 and canceled=0 and ". " batchstate='$BSTATE_POSTED' and ". " (attempts=0 or ". " ((UNIX_TIMESTAMP() - ". " UNIX_TIMESTAMP(expt_start) > (60 * 15)))) ". "ORDER BY expt_start LIMIT 1"); $running_result = DBQuery("select * from experiments ". "where batchmode=1 and batchstate='$BSTATE_RUNNING' ". "ORDER BY expt_start"); if (!$pending_result || !$running_result) { print "DB Error getting batch info. Waiting a bit ...\n"; DBQuery("unlock tables"); sleep(10); next; } if (!$pending_result->numrows && !$running_result->numrows) { DBQuery("unlock tables"); sleep(10); next; } # # If we have a pending experiment to run, set its state to configuring # right away, while we have the tables locked. This prevents endexp # from seeing it as something it can cancel. # if ($pending_result->numrows) { %pending_row = $pending_result->fetchhash(); # Local vars! my $eid = $pending_row{'eid'}; my $pid = $pending_row{'pid'}; $query_result = DBQuery("update experiments set expt_start=now(), ". "batchstate='$BSTATE_ACTIVATING' ". "where eid='$eid' and pid='$pid'"); if (! $query_result) { print "DB error setting batch $pid/$eid to configuring.\n"; DBQuery("unlock tables"); sleep(10); next; } } DBQueryWarn("unlock tables"); # # Okay, first we check the status of running batch mode experiments # since we want to end those before trying to start any new ones, cause # it would be nice to have as many nodes available as possible before # trying to add a new one. This can potentially delay startup, but thats # okay. Its a batch system. # # If you are wondering why I check for finished experiments in the main # loop instead of in the child that started the experiment, its so that # we fire up again and look for them in the event that paper goes down. # while (%row = $running_result->fetchhash()) { my $canceled = $row{'canceled'}; if ($canceled) { dosomething("cancel", %row); next; } if (isexpdone(%row)) { dosomething("end", %row); next; } } # # Finally start an actual experiment! # if ($pending_result->numrows) { dosomething("start", %pending_row); } sleep(15); } # # Do something as the user. Either, start, end, or cancel an experiment. # sub dosomething($$) { my($dowhat) = shift; my(%exphash) = @_; my($unix_uid, $unix_gid, $row, $query_result); # Global vars $eid = $exphash{'eid'}; $pid = $exphash{'pid'}; $gid = $exphash{'gid'}; $dirname = $exphash{'path'}; $nsfile = "$eid.ns"; # Locals my $creator = $exphash{'expt_head_uid'}; print "Doing a '$dowhat' to batch experiment $pid/$eid\n"; # # Create a temporary name for a log file. We do this in the parent so # we can remove it when the child ends. The child could remove it, but # since it is open in the child, it has the tendency to stick around. # $logname = `mktemp /tmp/$dowhat-batch-$pid-$eid.XXXXXX`; # Note different taint check (allow /). if ($logname =~ /^([-\@\w.\/]+)$/) { $logname = $1; } else { die "Bad data in $logname"; } # # Start up a child to run the guts. The parent waits. If the # experiment configures okay, the parent can return to try something # else. # $childpid = fork(); if ($childpid) { print "Child PID $childpid started to $dowhat $pid/$eid\n"; waitpid($childpid, 0); my $status = $? >> 8; print "Child PID $childpid exited with exit status $status\n"; sleep(5); unlink($logname); return $status; } openlog($logname); # # Get some user information. # if (!UserDBInfo($creator, \$user_name, \$user_email)) { fatal("DB Error getting user information for uid $creator"); } chdir("$dirname/tbdata") or fatal("Could not cd into $dirname/tbdata!"); # # Figure out the unix uid/gid that the experiment configuration is # going to run as. # (undef,undef,$unix_uid) = getpwnam($creator) or fatal("No such user $creator"); (undef,undef,$unix_gid) = getgrnam($gid) or fatal("No such group $gid"); # # Change the ownership of the log file before we flip. # chown($unix_uid, $unix_gid, $logname) or fatal("Could not chown $logname to $unix_uid/$unix_gid!"); # Flip to the user. We never flip back. $EGID = $GID = $unix_gid; $EUID = $UID = $unix_uid; $ENV{'USER'} = $creator; if ($dowhat eq "start") { startexp(%exphash); } elsif ($dowhat eq "end") { endexp(%exphash); } elsif ($dowhat eq "cancel") { cancelexp(1, %exphash); } exit(0); } # # Try to start an experiment. Never returns. # sub startexp($) { my(%exphash) = @_; my($exit_status, $running, $query_result); my $attempts = $exphash{'attempts'}; # # Try to start the experiment. # system("$startexp -b $logname -g $gid $pid $eid $nsfile"); $exit_status = $? >> 8; $running = 1; if ($exit_status) { $running = 0; } # # Look for cancelation. If we get a DB error on this, just continue cause # we can pick up the cancelation later. # $query_result = DBQueryWarn("select canceled from experiments ". "where eid='$eid' and pid='$pid'"); if ($query_result) { @row = $query_result->fetchrow_array(); if ($row[0]) { cancelexp($running); # # Never returns, but just to be safe ... # exit(0); } } # # If the configuration failed for lack of nodes, then don't send # email unless the number of attempts starts to get big. # # If the configuration failed for some other reason, then send email. # We have to reset the state to "new" so that it will be retried again # later. # if (! $running) { # # XXX - What if this update fails? # $query_result = DBQueryWarn("update experiments set attempts=attempts+1 ". "where eid='$eid' and pid='$pid'"); if ($exit_status == $TOOFEWNODES) { if (($attempts % 5) == 0) { $attempts++; my $msg = "Could not configure Batch Mode experiment $pid/$eid.\n". "\n". "There are not enough free nodes at this time.\n". "Another attempt will be made in a little while.\n". "\n". "There have been $attempts attempts to start this batch."; email_status($msg); } # # There is some state that needs to be reset so that another # attempt can be made. # SetExpState($pid, $eid, EXPTSTATE_NEW); TBSetBatchState($pid, $eid, $BSTATE_POSTED); exit($exit_status); } email_status("Experiment startup exited with error code $exit_status.". "\n". "Batch has been removed from the system."); ExptCleanup(); exit($exit_status); } # # Well, it configured! Lets set it state to running. # TBSetBatchState($pid, $eid, $BSTATE_RUNNING); email_status("Batch Mode experiment $pid/$eid is now running!\n". "Please consult the Web interface to see how it is doing."); # # Done with this phase. Must exit. # exit(0); } # # End an experiment. Never returns. # sub endexp($) { my(%exphash) = @_; # # Save tiplogs # system("$savelogs $pid $eid"); # # Have to set the state to terminating or else endexp will not accept it. # TBSetBatchState($pid, $eid, $BSTATE_TERMINATING); system("$endexp -b $pid $eid"); my $exit_status = $? >> 8; if ($exit_status) { # # TB admin is going to have to clean up. # fatal("Terminating Batch Mode experiment $pid/$eid"); } ExptCleanup(); email_status("Batch Mode experiment $pid/$eid has finished!"); # # Child must exit! # exit(0); } # # Cancel an experiment. Never returns. # sub cancelexp($$) { my($running) = shift; my(%exphash) = @_; TBSetBatchState($pid, $eid, $BSTATE_TERMINATING); if ($running) { system("$endexp -b $pid $eid"); } ExptCleanup(); donotify("Your Batch Mode experiment has been canceled!", "Canceled", 0); # # Child must exit! # exit(0); } # # Check experiment status. Looks to see if all of the nodes in an # experiment have reported in. # sub isexpdone($) { my(%exphash) = @_; my($query_result, @row); # Global vars $eid = $exphash{'eid'}; $pid = $exphash{'pid'}; print "Checking to see if $pid/$eid has finished up yet\n"; # # Look to see if any nodes yet to report status. If so, spin again. # $query_result = DBQueryWarn("SELECT startstatus,bootstatus FROM nodes ". "LEFT JOIN reserved ON nodes.node_id=reserved.node_id ". "WHERE reserved.eid='$eid' and reserved.pid='$pid'"); if (! $query_result) { return 0; } # # Well, right now a node is considered finished up only if its # boot did not fail, and it has reported start command status. # The idea being that if the boot failed, then its status will # never be reported anyway, and we might as well consider the node # done (else the experiment would never end). # while (@row = $query_result->fetchrow_array()) { if ($row[1] eq NODEBOOTSTATUS_FAILED) { next; } if ($row[0] eq NODESTARTSTATUS_NOSTATUS) { return 0; } } return 1; } # # Remove all trace. # sub ExptCleanup() { if (system("rm -rf $dirname")) { print "*** WARNING: Not able to remove experiment directory.\n"; print " Someone will need to do this by hand.\n"; } # # Remove all trace from the DB. # DBQueryWarn("DELETE from nsfiles ". "WHERE eid='$eid' and pid='$pid'"); DBQueryWarn("DELETE from exppid_access ". "WHERE exp_eid='$eid' and exp_pid='$pid'"); DBQueryWarn("DELETE from experiments ". "WHERE eid='$eid' and pid='$pid'"); } # # Start up a child, and set its descriptors talking to a log file. # The log file already exists, created with mktemp above. # sub openlog($) { my($logname) = $_[0]; # # We have to disconnect from the caller by redirecting both STDIN and # STDOUT away from the pipe. Otherwise the caller will continue to wait # even though the parent has exited. # open(STDIN, "< /dev/null") or fatal("opening /dev/null for STDIN: $!"); open(STDERR, ">> $logname") or fatal("opening $logname for STDERR: $!"); open(STDOUT, ">> $logname") or fatal("opening $logname for STDOUT: $!"); return 0; } # # A fatal error is something that the user does not need to know about. # Caused by a breakdown in the TB system. Generally speaking, once the # experiment is running, this should not be used. # sub fatal($) { my($mesg) = $_[0]; donotify($mesg, "Failure", 1); exit(-1); } # # Something the user cares about. # sub email_status($) { my($mesg) = $_[0]; donotify($mesg, "Status", 0); } sub donotify($$$) { my($mesg, $subtext, $iserr) = @_; my($subject, $from, $to, $hdrs); my $MAIL; $mesg = "$mesg\n"; print STDOUT "$mesg"; $subject = "TESTBED: Batch Mode Experiment $subtext $pid/$eid"; $from = $TBOPS; $hdrs = "Reply-To: $TBOPS"; # # An error goes just to Testbed Operations. Normal status messages go # to the user and to the Testbed Logs address. # if ($iserr) { $to = "$TBOPS"; } else { $to = "$user_name <$user_email>"; $hdrs = "Bcc: $TBLOGS\n". "$hdrs"; } SENDMAIL($to, $subject, $mesg, $from, $hdrs, ($logname, "assign.log", $nsfile)); }