#!/usr/bin/perl -wT # # EMULAB-COPYRIGHT # Copyright (c) 2000-2003 University of Utah and the Flux Group. # All rights reserved. # use English; use Getopt::Std; # # This gets invoked from the Web interface. # Swap an experiment in, swap it out, restart or modify. # sub usage() { print STDOUT "Usage: swapexp [-b] [-i | -a | -f] [-r] ". "<-s in | out | restart | modify> []\n"; exit(-1); } my $optlist = "biafrs:"; # # Configure variables # my $TB = "@prefix@"; my $TBOPS = "@TBOPSEMAIL@"; my $TBLOGS = "@TBLOGSEMAIL@"; my $TBINFO = "$TB/expinfo"; my $TBDOCBASE = "@TBDOCBASE@"; # # Testbed Support libraries # use lib "@prefix@/lib"; use libdb; use libtestbed; my $tbdir = "$TB/bin/"; my $tbdata = "tbdata"; my $batch = 0; my $idleswap = 0; my $autoswap = 0; my $force = 0; my $reboot = 0; my $errorstat= -1; my $inout; my $logname; my $dbuid; my $user_name; my $user_email; my @allnodes; my @row; my $action; # # Untaint the path # $ENV{'PATH'} = '/bin:/usr/bin'; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; # # Turn off line buffering on output # $| = 1; # # Set umask for start/swap. We want other members in the project to be # able to swap/end experiments, so the log and intermediate files need # to be 664 since some are opened for append. # umask(0002); # # Parse command arguments. Once we return from getopts, all that should # left are the required arguments. # %options = (); if (! getopts($optlist, \%options)) { usage(); } if (defined($options{"i"})) { $idleswap = 1; } if (defined($options{"a"})) { $autoswap = 1; } if (defined($options{"f"})) { $force = 1; } if (defined($options{"b"})) { $batch = 1; } if (defined($options{"r"})) { $reboot = 1; } if (defined($options{"s"})) { $inout = $options{"s"}; if ($inout ne "out" && $inout ne "in" && $inout ne "restart" && $inout ne "modify") { usage(); } } else { usage(); } if (@ARGV != (($inout eq "modify") ? 3 : 2)) { usage(); } my $pid = $ARGV[0]; my $eid = $ARGV[1]; my $nsfile; if ($inout eq "modify") { $nsfile = $ARGV[2]; # # Untaint nsfile argument; Allow slash. # if ($nsfile =~ /^([-\w.\/]+)$/) { $nsfile = $1; } else { die("Tainted nsfile name: $nsfile"); } } # # Untaint the arguments. # if ($pid =~ /^([-\@\w.]+)$/) { $pid = $1; } else { die("Tainted argument $pid!\n"); } if ($eid =~ /^([-\@\w.]+)$/) { $eid = $1; } else { die("Tainted argument $eid!\n"); } my $repfile = "$eid.report"; my $workdir = TBExptWorkDir($pid, $eid); my $userdir = TBExptUserDir($pid, $eid); # # Verify user and get his DB uid. # if (! UNIX2DBUID($UID, \$dbuid)) { die("*** $0:\n". " You do not exist in the Emulab Database.\n"); } # # Get email info for user. # if (! UserDBInfo($dbuid, \$user_name, \$user_email)) { die("*** $0:\n". " Cannot determine your name and email address.\n"); } # # Verify that this person can muck with the experiment. # Note that any script down the line has to do an admin check also. # if ($UID && !TBAdmin($UID) && !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) { die("*** $0:\n". " You do not have permission to swap or modify this experiment!\n"); } # Must do this before lock tables! # idleswap is in minutes, threshold is in hours $idleswap_time = 60 * TBGetSiteVar("idle/threshold"); # # We have to protect against trying to end an experiment that is currently # in the process of being terminated. We use a "wrapper" state (actually # a timestamp so we can say when termination was requested) since # terminating consists of a couple of different experiment states down inside # the tb scripts. # DBQueryFatal("lock tables experiments write"); $query_result = DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'"); if (! $query_result->numrows) { die("*** $0:\n". " No such experiment $pid/$eid exists!\n"); } my %hashrow = $query_result->fetchhash(); my $expt_head_login = $hashrow{'expt_head_uid'}; my $estate = $hashrow{'state'}; my $expt_path = $hashrow{'path'}; my $isbatchexpt = $hashrow{'batchmode'}; my $ebatchstate = $hashrow{'batchstate'}; my $swappablebit= $hashrow{'swappable'}; my $idleswapbit = $hashrow{'idleswap'}; my $autoswapbit = $hashrow{'autoswap'}; my $swappablestr= ( $swappablebit ? "Yes" : "No" ); my $idleswapstr = ( $idleswapbit ? "Yes" : "No" ); my $autoswapstr = ( $autoswapbit ? "Yes" : "No" ); my $noswap = $hashrow{'noswap_reason'}; my $noidleswap = $hashrow{'noidleswap_reason'}; my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0; my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0; if ($inout ne "out") { # I'm going to update this below, so fix the value before I use it. $idleswaptime = $idleswap_time / 60.0; } my $swapsettings = #"Name: $expt_name\n" . "Swappable: $swappablestr". ($swappablebit ? "\n" : " (Reason: $noswap)\n"). "Idle-Swap: $idleswapstr". ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n"). "Auto-Swap: $autoswapstr". ($autoswapbit ? ", at $autoswaptime hours\n" : "\n"); if (! chdir($workdir)) { die("*** $0:\n". " Could not chdir to $workdir: $!\n"); } if (defined($hashrow{'expt_locked'})) { $val = $hashrow{'expt_locked'}; die("*** $0:\n". " It appears that $pid/$eid went into transition at $val.\n". " You will be notified via email when experiment transition ". " is complete.\n"); } # # Batchmode. # if ($isbatchexpt) { # # When coming from the daemon, sanity check the batch state. # if ($batch) { if ($inout eq "in") { die("*** $0:\n". " Batch experiment $pid/$eid is not in the proper state!\n". " Currently $ebatchstate, but should be ACTIVATING\n") if ($ebatchstate ne BATCHSTATE_ACTIVATING); } elsif ($inout eq "out") { die("*** $0:\n". " Batch experiment $pid/$eid is not in the proper state!\n". " Currently $ebatchstate, but should be TERMINATING\n") if ($ebatchstate ne BATCHSTATE_TERMINATING); } else { die("*** $0:\n". " Improper request from batch daemon for $pid/$eid!\n"); } } else { # # User is requesting that a batch either be injected or paused. # Sanity check the state, but otherwise let the batch daemon # handle it. # if ($inout eq "in") { die("*** $0:\n". " Batch experiment $pid/$eid is not in the proper state!\n". " Currently $ebatchstate. Must be PAUSED to swap in.\n") if ($ebatchstate ne BATCHSTATE_PAUSED); TBSetBatchState($pid, $eid, BATCHSTATE_POSTED); } elsif ($inout eq "out") { die("*** $0:\n". " Batch experiment $pid/$eid is not in the proper state!\n". " Currently $ebatchstate. Must be RUNNING to swap out.\n") if ($ebatchstate ne BATCHSTATE_RUNNING); my $flag = BATCHMODE_CANCELSWAP; DBQueryFatal("UPDATE experiments set canceled=$flag ". "WHERE eid='$eid' and pid='$pid'"); } elsif ($inout eq "modify") { die("*** $0:\n". " Batch experiment $pid/$eid is not in the proper state!\n". " Currently $ebatchstate. Must be PAUSED to modify.\n") if ($ebatchstate ne BATCHSTATE_PAUSED); # Otherwise, proceed with the modify. The experiment will be # locked below, and so it cannot be injected (posted) until # that is finished cause of the expt_locked test above. goto doit; } else { die("*** $0:\n", " Batch experiments can only be swapped in or out!\n"); } exit(0); } doit: } # # Okay, check state. We do not allow modification or swapping to start when the # experiment is in transition. A future task would be to allow this, # but for now the experiment must be in one of a few states to proceed # # Seems like too many states! # if ($estate ne EXPTSTATE_ACTIVE && $estate ne EXPTSTATE_SWAPPED) { die("*** $0:\n". " It appears that experiment $pid/$eid is in transition.\n". " The user that created the experiment will be notified via\n". " email when the experiment is no longer in transition.\n"); } if ($inout eq "in" && $estate eq EXPTSTATE_ACTIVE) { die("*** $0:\n". " It appears that experiment $pid/$eid is already swapped in!"); } if ($inout eq "out" && $estate eq EXPTSTATE_SWAPPED) { die("*** $0:\n". " It appears that experiment $pid/$eid is already swapped out!"); } if ($inout eq "restart" && $estate ne EXPTSTATE_ACTIVE) { die("*** $0:\n". " It appears that experiment $pid/$eid is not active!"); } # if $inout eq "modify", either EXPTSTATE_ACTIVE -or- EXPTSTATE_SWAPPED is ok. # Set the timestamp now, and unlock the experiments table. # Also update idleswap_timeout to whatever the current value is if ($inout ne "out") { $idlestr = ", idleswap_timeout='$idleswap_time'"; } else { $idlestr = ""; } DBQueryFatal("UPDATE experiments SET expt_locked=now() $idlestr ". "WHERE eid='$eid' and pid='$pid'"); DBQueryFatal("unlock tables"); # # XXX - At this point a failure is going to leave things in an # inconsistent state. # if ($inout eq "in") { $action = "swapped in"; } if ($inout eq "out") { $action = "swapped out"; } if ($inout eq "restart") { $action = "restarted"; } if ($inout eq "modify") { $action = "modified"; } # # Get email address of the experiment head, which may be different than # the person who is actually terminating the experiment, since its polite # to let the original creator know whats going on. # my $expt_head_name; my $expt_head_email; if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) { print STDERR "*** WARNING: ". "Could not determine name/email for $expt_head_login.\n"; $expt_head_name = "TBOPS"; $expt_head_email = $TBOPS; } # # If not in batch mode, go into the background. Parent exits. # if (! $batch) { $logname = TBExptCreateLogFile($pid, $eid, "swapexp"); TBExptSetLogFile($pid, $eid, $logname); TBExptOpenLogFile($pid, $eid); if (TBBackGround($logname)) { # # Parent exits normally # print "Experiment $pid/$eid is now being $action.\n". "You will be notified via email when the this is done.\n"; exit(0); } } # # Remove old report file since its contents are going to be invalid. # if ($inout ne "restart" && -e $repfile) { unlink("$repfile"); } # # Sanity check states in case someone changes something. # if ($inout eq "out") { print STDOUT "Running 'tbswap out' with arguments: $pid $eid\n"; if (system("$tbdir/tbswap out $pid $eid") != 0) { $errorstat = $? >> 8; fatal("tbswap out failed!\n"); } $estate = ExpState($pid,$eid); if ($estate ne EXPTSTATE_SWAPPED) { fatal("Experiment is in the wrong state: $estate\n"); } } elsif ($inout eq "in") { print STDOUT "Running 'tbswap in' with arguments: $pid $eid\n"; if (system("$tbdir/tbswap in $pid $eid") != 0) { $errorstat = $? >> 8; fatal("tbswap in failed!\n"); } $estate = ExpState($pid,$eid); if ($estate ne EXPTSTATE_ACTIVE) { fatal("Experiment is in the wrong state: $estate\n"); } system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile"); } elsif ($inout eq "modify") { my $modifyError = ""; GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY); print "Backing up old experiment state ... " . TBTimeStamp() . "\n"; if (TBExptBackupVirtualState($pid, $eid, $$)) { fatal("*** $0:\n". " Could not backup experiment state; cannot safely continue!\n"); } TBExptRemoveVirtualState($pid, $eid); # # Rerun tbprerun if modifying. # if (system("$tbdir/tbprerun -m $pid $eid $nsfile") != 0) { $modifyError = "tbprerun failed!"; } # # If experiment is currently swapped out, no need to do an update # after modifying it. # if (! $modifyError && $estate eq EXPTSTATE_ACTIVE) { print STDOUT "Running 'tbswap update' with arguments: $pid $eid\n"; my $rebootSwitch = ""; if ($reboot) { $rebootSwitch = "-reboot"; } if (system("$tbdir/tbswap update $rebootSwitch $pid $eid") != 0) { $errorstat = $? >> 8; $modifyError = "tbswap update failed!"; } if (! $modifyError) { $estate = ExpState($pid, $eid); if ($estate ne EXPTSTATE_ACTIVE) { fatal("Experiment is in the wrong state: $estate!"); } } if (! $modifyError) { system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile"); } } if ($modifyError) { print STDERR "*** $0:\n". " $modifyError\n"; print STDOUT "Recovering experiment state...\n"; TBExptRemoveVirtualState($pid, $eid ); if (0 == TBExptRestoreVirtualState($pid, $eid, $$)) { fatal("*** Update aborted; old state restored.\n"); } else { # Set state to NEW so experiment will get wiped. SetExpState($pid, $eid, EXPTSTATE_NEW); fatal("*** Experiment state could not be restored!\n"); } } } else { # $inout eq "restart" assumed. print STDOUT "Running tbrestart with arguments: $pid $eid\n"; if (system("$tbdir/tbrestart $pid $eid") != 0) { fatal("tbrestart failed!\n"); } } # # Try to copy off the files for testbed information gathering. # TBSaveExpLogFiles($pid, $eid); # # Make a copy of the work dir in the user visible space so the user # can see the log files. This overwrites existing files of course, # but thats okay. # system("cp -Rfp $workdir/ $userdir/tbdata/"); # # Gather stats. # if ($inout eq "in") { GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0); } elsif ($inout eq "out") { GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0, ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0)); } elsif ($inout eq "modify") { GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0); } # # Set the swapper uid on success only, and *after* gathering swap stats! # TBExptSetSwapUID($pid, $eid, $dbuid); # # Must unlock before exit. # TBUnLockExp($pid, $eid); print "Done!\n"; # # In batch mode, just exit without sending email. # if ($batch) { exit(0); } # # HACK! if successful, put new NS file in DB. # if ($inout eq "modify") { $nsdata_string = `cat $nsfile`; if (defined($nsdata_string)) { $nsdata_string = DBQuoteSpecial($nsdata_string); DBQueryWarn("delete from nsfiles WHERE eid='$eid' and pid='$pid'"); DBQueryWarn("insert into nsfiles (pid, eid, nsfile) ". "VALUES('$pid', '$eid', $nsdata_string)"); } else { print "Warning!! Couldn't read nsfile '$nsfile'!\n"; } } # # Clear the log file so the web page stops spewing. # if (defined($logname)) { TBExptCloseLogFile($pid, $eid); } # # Send email notification to user. # my $message = "Experiment $eid in project $pid has been "; if ($inout eq "out" && ($idleswap || $autoswap || $force) ) { $message .= "forcibly swapped out by\nTestbed Operations"; if ($idleswap) { $message .= " because it was idle for too long (Idle-Swap).\n". "(See also the Idle-Swap info in \n". "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n"; } elsif ($autoswap) { $message .= " because it was swapped in too long (Auto-Swap).\n". "(See also the Auto-Swap info in \n". "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n"; } elsif ($force) { $message .= ". (See also our Node Usage Policies in \n". "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n"; } } else { $message .= "$action.\n"; } if ($inout eq "in") { # Add the swap settings... $message .="\nCurrent swap settings:\n$swapsettings"; } $message .= "\n". "Appended below is the output. If you have any questions or comments,\n" . "please include the output in your message to $TBOPS\n"; SENDMAIL("$user_name <$user_email>", "Experiment $pid/$eid \u$action", $message, ($idleswap ? $TBOPS : "$user_name <$user_email>"), "Cc: $expt_head_name <$expt_head_email>\n". "Bcc: $TBLOGS", (($inout eq "restart") ? ($logname) : ($repfile, $logname))); exit 0; sub fatal($) { my($mesg) = $_[0]; print STDOUT $mesg; # # Gather stats. # if ($inout eq "in") { GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat); } elsif ($inout eq "out") { GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat); } elsif ($inout eq "modify") { GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat); } # # if $hosed == 1, we entirely terminate the experiment. # my $hosed = 0; # If we're doing a modify, # and tbprerun sent the experiment to "NEW", # we're hosed. if ($inout eq "modify" && ExpState($pid,$eid) eq EXPTSTATE_NEW) { $hosed = 1; } if ($hosed) { # # Note: $estate is still set to the state which the experiment was in # when we began. # if ($estate eq EXPTSTATE_ACTIVE) { print "Running 'tbswap out' with arguments: $pid $eid\n"; if (system("$tbdir/tbswap out -force $pid $eid") != 0) { print "tbswap out failed!\n"; } } print "Running tbend with arguments: -force $pid $eid\n"; if (system("$tbdir/tbend -force $pid $eid") != 0) { print "tbend failed!\n"; } } # # Kill this for convenience later. # DBQueryWarn("update experiments set expt_locked=NULL ". "WHERE eid='$eid' and pid='$pid'"); # Copy over the log files so the user can see them. system("/bin/cp -Rfp $workdir/ $userdir/tbdata"); # # In batch mode, exit without sending the email. # if ($batch) { TBUnLockExp($pid, $eid); exit($errorstat); } # # Clear the log file so the web page stops spewing. # if (defined($logname)) { TBExptCloseLogFile($pid, $eid); } # # Send a message to the testbed list. Append the logfile. # SENDMAIL("$user_name <$user_email>", "Swap ${inout} Failure: $pid/$eid", $mesg, ($idleswap ? $TBOPS : "$user_name <$user_email>"), "Cc: $expt_head_name <$expt_head_email>\n". "Cc: $TBOPS", ($logname)); if ($hosed) { # # Copy off the workdir to the user directory, Then back up both of # them for post-mortem debugging. # system("/bin/cp -Rfp $workdir/ $userdir/tbdata"); system("/bin/rm -rf ${workdir}-failed"); system("/bin/mv -f $workdir ${workdir}-failed"); system("/bin/rm -rf ${userdir}-failed"); system("/bin/mv -f $userdir ${userdir}-failed"); TBExptDestroy($pid, $eid); } exit($errorstat); }