Commit 94b8349c authored by Leigh B. Stoller's avatar Leigh B. Stoller

Major changes to experiment setup. Like the previous changes to

termination, setup now exits immediately and sends email to the user
when the experiment is fully configured.
parent 8b697bd3
#!/usr/bin/perl -wT
use English;
#
# This gets invoked from the Web interface. CD into the proper directory
# and do the tb stuff.
#
# usage: tbdoit <pid> <eid> <temp_nsfile>
#
#
# Configure variables
#
my $TB = "@prefix@";
my $TB = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS = "@TBOPSEMAIL@";
my $tbdir = "$TB/bin/";
my $projroot = "/proj";
my $tbdata = "tbdata";
my $cleanme = 0;
#
# For debugging all this goo. Leaves the experiment directory intact,
......@@ -17,26 +31,6 @@ my $debug = 1;
#
$| = 1;
#
# We need to protect this from signals. 1st argument is signal name
#
sub sighandler {
my($sig) = @_;
print "Caught a SIG$sig--shutting down\n";
tbendit();
fatal();
}
#
# This gets invoked from the Web interface. CD into the proper directory
# and do the tb stuff.
#
# usage: tbdoit <pid> <eid> <temp_nsfile>
#
my $tbdir = "$TB/bin/";
my $projroot = "/proj";
my $tbdata = "tbdata";
#
# Untaint the path
#
......@@ -50,127 +44,296 @@ if (@ARGV != 3) {
print STDOUT "Usage: tbdoit <pid> <eid> <temp_nsfile>\n";
exit(-1);
}
my $project = $ARGV[0];
my $eid = $ARGV[1];
my $tempfile= $ARGV[2];
my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $tempfile = $ARGV[2];
#
# Untaint the arguments.
#
if ($project =~ /^([-\@\w.]+)$/) {
$project = $1;
if ($pid =~ /^([-\@\w.]+)$/) {
$pid = $1;
}
if ($eid =~ /^([-\@\w.]+)$/) {
$eid = $1;
}
if ($tempfile =~ /^([-\@\w.]+)$/) {
# Note different taint check (allow /).
if ($tempfile =~ /^([\/-\@\w.]+)$/) {
$tempfile = $1;
}
my $piddir = "$projroot/$project";
my $piddir = "$projroot/$pid";
my $expdir = "$piddir/exp";
my $eiddir = "$expdir/$eid";
my $nsfile = "$eid.ns";
my $irfile = "$eid.ir";
my $repfile = "$eid.report";
my $tempcopy= "$tempfile.$$";
#
# Create a directory structure for the experiment in the project directory.
# Set up for querying the database.
#
use Mysql;
my $DB = Mysql->connect("localhost", $DBNAME, "script", "none");
#
# Check to make sure the experiment record exists. To prevent mishap,
# also check to make sure the experiment is in the "not ready" state
# to prevent this record from being used to configure two sets of nodes!
# This could happen if the user invokes this script directly, and that
# is eventually what I want anyway (instead of users using the tb scripts).
#
$query_result =
$DB->query("SELECT expt_ready FROM experiments ".
"WHERE eid='$eid' and pid='$pid'");
if (! $query_result) {
fatal("DB Error getting experiment record $pid/$eid\n");
}
if ($query_result->numrows < 1) {
print STDOUT "No experiment record for $pid/$eid exists!\n";
exit(1);
}
@row = $query_result->fetchrow_array();
if ($row[0]) {
fatal("Experiment $pid/$eid is already configured!\n".
"You are not allowed to reconfigure experiments unless you\n".
"first terminate the existing experiment via the web interface.\n");
}
#
if (system("$TB/libexec/mkexpdir $project $eid") != 0) {
print STDOUT "$tbdir/mkexpdir failed\n";
# Figure out who is going to get the email!
#
$query_result =
$DB->query("SELECT usr_name,usr_email from users ".
"WHERE unix_uid='$EUID'");
if (! $query_result) {
fatal("DB Error getting user information for uid $EUID\n");
}
if ($query_result->numrows < 1) {
print STDOUT "Go Away! You do not exist in the Emulab Database.\n";
exit(1);
}
@row = $query_result->fetchrow_array();
$user_name = $row[0];
$user_email = $row[1];
#
# Copy the nsfile from wherever the web server stuffed it into a temporary
# file. The web server is going to delete it once this script returns.
#
if (system("/bin/cp", "$tempfile", "$tempcopy") != 0) {
fatal("Could not copy $tempfile to $tempcopy: $!\n");
}
#
# The rest of this goes into the background so that the user sees
# immediate response. We will send email later when the experiment
# is actually torn down.
#
$mypid = fork();
if ($mypid) {
#
# Parent exits normally
#
print STDOUT
"Experiment $pid/$eid is now configuring\n".
"You will be notified via email when the experiment is ready to use\n";
exit(0);
}
#
# We have to disconnect from the caller by redirecting both STDIN and
# STDOUT away from the pipe. Otherwise the caller (the web server) will
# continue to wait even though the parent has exited.
#
open(STDIN, "< /dev/null") or
die("opening /dev/null for STDIN: $!");
#
# Create a temporary name for a log file and untaint it.
#
$logname = `mktemp /tmp/$pid-$eid.XXXXXX`;
# Note different taint check (allow /).
if ($logname =~ /^([-\@\w.\/]+)$/) {
$logname = $1;
} else {
die "Bad data in $logname";
}
open(STDERR, ">> $logname") or die("opening $logname for STDERR: $!");
open(STDOUT, ">> $logname") or die("opening $logname for STDOUT: $!");
#
# Create a directory structure for the experiment in the project directory.
#
if (system("$TB/libexec/mkexpdir $pid $eid") != 0) {
fatal("$tbdir/mkexpdir failed\n");
}
#
# Copy the nsfile from wherever the web server stuffed it, into the
# experiment directory.
#
if (! chdir("$eiddir/$tbdata")) {
print STDOUT "Could not chdir to $tbdata in $eiddir: $!\n";
fatal();
fatal("Could not chdir to $tbdata in $eiddir: $!\n");
}
if (system("/bin/cp", "$tempfile", "$nsfile") != 0) {
print STDOUT
"Could not copy $tempfile to $eiddir/$tbdata/$nsfile: $!\n";
fatal();
if (system("/bin/cp", "$tempcopy", "$nsfile") != 0) {
fatal("Could not copy $tempcopy to $eiddir/$tbdata/$nsfile: $!\n");
}
unlink("$tempcopy");
#
# Run the various scripts.
#
#print STDOUT "Running $tbdir/tbprerun with arguments: ".
# "$project $eid $nsfile\n";
if (system("$tbdir/tbprerun $project $eid $nsfile") != 0) {
print STDOUT "tbprerun failed!\n";
dumplog();
fatal();
if (system("$tbdir/tbprerun -nologfile $pid $eid $nsfile") != 0) {
fatal("tbprerun failed!\n");
}
# So fatal errors run tbend.
$cleanme = 1;
#print STDOUT "Running $tbdir/tbrun with arguments: $project $eid $irfile\n";
if (system("$tbdir/tbrun $project $eid $irfile") != 0) {
print STDOUT "tbrun failed!\n";
dumplog();
tbendit();
fatal();
if (system("$tbdir/tbrun -nologfile $pid $eid $irfile") != 0) {
fatal("tbrun failed!\n");
}
#print STDOUT "Running tbreport with arguments: ".
# "-v $project $eid $irfile 2>&1 > $repfile\n";
if (system("$tbdir/tbreport -v $project $eid $irfile 2>&1 > $repfile") != 0) {
print STDOUT "tbreport failed!\n";
dumplog();
tbendit();
fatal();
if (system("$tbdir/tbreport -v $pid $eid $irfile 2>&1 > $repfile") != 0) {
fatal("tbreport failed!\n");
}
#
# The web server will not be able to access the report file, so just
# dump it STDOUT and let the php script do something with it.
# Done! Set the ready bit in the experiment record.
#
$query_result = $DB->query("UPDATE experiments SET expt_ready=1 ".
"WHERE eid='$eid' and pid='$pid'");
if (! $query_result) {
fatal("DB Error setting ready bit in experiment record for $pid/$eid\n");
}
#
dumpreport();
# Grab the entire experiment record for the mail message below.
#
$query_result =
$DB->query("SELECT expt_name,expt_created,expt_expires from experiments ".
"WHERE eid='$eid' and pid='$pid'");
if (! $query_result) {
fatal("DB Error getting experiment record for $pid/$eid\n");
}
@row = $query_result->fetchrow_array();
$expt_name = $row[0];
$expt_created = $row[1];
$expt_expires = $row[2];
print STDOUT "Setup Success\n";
#
# Dump the report file and the log file to the user via email.
#
open(MAIL, "| /usr/bin/mail ".
"-s \"TESTBED: New Experiment Created: $pid/$eid\" ".
"-c $TBOPS \"$user_name <$user_email>\" >/dev/null 2>&1")
or die "Cannot start mail program: $!";
print MAIL "Your experiment `$eid' in project `$pid' is now configured.\n";
print MAIL "Here is the experiment summary detailing the nodes that were\n";
print MAIL "allocated to you. You may use the `Qualified Name' to log on\n";
print MAIL "to your nodes. See /etc/hosts on your nodes (when running\n";
print MAIL "FreeBSD, Linux, or NetBSD) for the IP name mapping on each node\n";
print MAIL "\n";
print MAIL "User: $user_name\n";
print MAIL "EID: $eid\n";
print MAIL "PID: $pid\n";
print MAIL "Name: $expt_name\n";
print MAIL "Created: $expt_created\n";
print MAIL "Expires: $expt_expires\n";
print MAIL "Directory: $eiddir\n\n";
if (open(IN, "$repfile")) {
while (<IN>) {
print MAIL "$_";
}
close(IN);
}
print MAIL "\n\n---------\n\n";
print MAIL "Here is the log of the configuration process.\n";
print MAIL "If you have any questions or problems, please include the\n";
print MAIL "output below in your message to $TBOPS\n\n";
if (open(IN, "$logname")) {
while (<IN>) {
print MAIL "$_";
}
close(IN);
}
close(MAIL);
unlink("$logname");
exit 0;
sub fatal()
{
if (! chdir($expdir)) {
print STDOUT "In Fatal: Could not chdir to $expdir!\n";
exit(-1);
}
if ($debug) {
$save = "$eid-$PID";
system("mv $eid $save");
my($mesg) = $_[0];
print STDOUT "$mesg\n";
print STDOUT "Cleaning up ...\n";
#
# If we got far enough to allocate nodes, must run tbend.
#
if ($cleanme) {
tbendit();
}
else {
system("rm -r $eid");
#
# Now we can remove all trace from the DB since it failed.
#
$query_result =
$DB->query("DELETE from experiments WHERE eid='$eid' and pid='$pid'");
if (! $query_result) {
print STDOUT "DB Error deleting experiment record for $pid/$eid\n";
}
exit(-1);
}
sub dumplog()
{
if (open(IN, "$eid.log")) {
print STDOUT "Dumping $eid.log\n";
while (<IN>) {
print STDOUT "$_";
}
close(IN);
#
# For debugging failures?
#
if (chdir($expdir)) {
$save = "$eid-$PID";
# Fucking taint checks. Whats wrong with rename("$eid", "$save");
system("/bin/mv", "-f", "$eid", "$save");
}
}
sub dumpreport()
{
if (open(IN, "$eid.report")) {
print STDOUT "Dumping $eid.report\n";
#
# Send a message to the testbed list. Append the logfile if it got
# that far.
#
open(MAIL, "| /usr/bin/mail ".
"-s \"TESTBED: Experiment Configure Failure $pid/$eid\" ".
"$TBOPS >/dev/null 2>&1")
or die "Cannot start mail program: $!";
print MAIL $mesg;
if (open(IN, "$logname")) {
print MAIL "\n\n---------\n\n";
while (<IN>) {
print STDOUT "$_";
print MAIL "$_";
}
close(IN);
}
close(MAIL);
unlink("$logname");
exit(-1);
}
#
......@@ -179,11 +342,8 @@ sub dumpreport()
#
sub tbendit()
{
print STDOUT "Running tbend with arguments: $project $eid\n";
if (system("$tbdir/tbend $project $eid") != 0) {
print STDOUT "Running tbend with arguments: -nologfile $pid $eid\n";
if (system("$tbdir/tbend -nologfile $pid $eid") != 0) {
print STDOUT "tbend failed!\n";
}
}
......@@ -3,6 +3,14 @@ use English;
#
# This gets invoked from the Web interface. Terminate an experiment.
# Most of the STDOUT prints are never seen since the web interface
# repeats only errors. My plan is make this script the front end to
# experiment termination and make tbend a backend program that no one
# uses.
#
# TODO: Perhaps setup a watchdog timer to guard against hangs. Not sure
# why this would happen, but it would be bad for a termination to
# hang up.
#
# usage: tbstopit <pid> <eid>
#
......@@ -64,10 +72,15 @@ my $DB = Mysql->connect("localhost", $DBNAME, "script", "none");
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a timestamp for this purpose.
# If the timestamp is ever non-null, then something is wrong and we should
# never proceed.
# not proceed.
#
$query_result = $DB->query("SELECT expt_terminating FROM experiments ".
"WHERE eid='$eid' and pid='$pid'");
# We also have to guard against trying to terminate an experiment that
# is still in the process of configuring. Its easiest to force the user
# to wait!
#
$query_result =
$DB->query("SELECT expt_terminating,expt_ready FROM experiments ".
"WHERE eid='$eid' and pid='$pid'");
if (! $query_result) {
fatal("DB Error getting experiment termination date for $pid/$eid\n");
......@@ -85,6 +98,13 @@ if (defined($row[0])) {
"torn down\n";
exit(1);
}
if (! $row[1]) {
print STDOUT
"It appears that experiment $pid/$eid is still configuring.\n".
"The user that created the experiment will be notified via email\n".
"when it has been fully configured and is ready for use\n";
exit(1);
}
#
# Figure out who is going to get the email!
......@@ -93,7 +113,7 @@ $query_result = $DB->query("SELECT usr_name,usr_email from users ".
"WHERE unix_uid='$EUID'");
if (! $query_result) {
fatal("DB Error getting user informarion for uid $EUID\n");
fatal("DB Error getting user information for uid $EUID\n");
}
if ($query_result->numrows < 1) {
print STDOUT "Go Away! You do not exist in the Emulab Database.\n";
......@@ -145,6 +165,7 @@ open(STDIN, "< /dev/null") or
#
$logname = `mktemp /tmp/tbend-$pid-$eid.XXXXXX`;
# Note different taint check (allow /).
if ($logname =~ /^([-\@\w.\/]+)$/) {
$logname = $1;
} else {
......@@ -180,9 +201,9 @@ if (! $query_result) {
print STDOUT "Termination Success\n";
open(MAIL, "| /usr/bin/mail -s \"Experiment $pid/$eid Terminated\" ".
"-c $TBOPS ".
"\"$user_name <$user_email>\" >/dev/null 2>&1")
open(MAIL, "| /usr/bin/mail ".
"-s \"TESTBED: Experiment $pid/$eid Terminated\" ".
"-c $TBOPS \"$user_name <$user_email>\" >/dev/null 2>&1")
or die "Cannot start mail program: $!";
print MAIL "Your experiment `$eid' in project `$pid' has been terminated.\n";
......@@ -200,7 +221,7 @@ if (open(IN, "$logname")) {
}
close(MAIL);
system("rm -f $logname");
unlink("$logname");
exit 0;
sub fatal()
......@@ -209,7 +230,8 @@ sub fatal()
if (chdir($expdir)) {
$save = "$eid-$PID";
system("mv $eid $save");
# Fucking taint checks. Whats wrong with rename("$eid", "$save");
system("/bin/mv", "-f", "$eid", "$save");
}
print STDOUT $mesg;
......@@ -218,7 +240,7 @@ sub fatal()
# that far.
#
open(MAIL, "| /usr/bin/mail ".
"-s \"Experiment Termination Failure $pid/$eid\" ".
"-s \"TESTBED: Termination Failure $pid/$eid\" ".
"$TBOPS >/dev/null 2>&1")
or die "Cannot start mail program: $!";
......@@ -234,7 +256,7 @@ sub fatal()
}
close(MAIL);
system("rm -f $logname");
unlink("$logname");
exit(-1);
}
......@@ -121,6 +121,16 @@ if (($row = mysql_fetch_row($query_result)) == 0) {
$user_name = $row[0];
$user_email = $row[1];
#
# Set the experiment ready bit to 1 if its a shell experiment.
#
if ($nonsfile) {
$expt_ready = 1;
}
else {
$expt_ready = 0;
}
#
# At this point enter the exp_id into the database so that it shows up as
# valid when the tb scripts run. We need to remove the entry if any of
......@@ -129,9 +139,9 @@ $user_email = $row[1];
$query_result = mysql_db_query($TBDBNAME,
"INSERT INTO experiments ".
"(eid, pid, expt_created, expt_expires, expt_name, ".
"expt_head_uid, expt_start, expt_end) ".
"expt_head_uid, expt_start, expt_end, expt_ready) ".
"VALUES ('$exp_id', '$exp_pid', '$exp_created', '$exp_expires', ".
"'$exp_name', '$uid', '$exp_start', '$exp_end')");
"'$exp_name', '$uid', '$exp_start', '$exp_end', '$expt_ready')");
if (! $query_result) {
$err = mysql_error();
TBERROR("Database Error adding new experiment $exp_id: $err\n", 1);
......@@ -154,7 +164,8 @@ if ($nonsfile) {
echo "<center><br>
<h2>Experiment Configured!
</center><br><br>
The ID for your experiment in project $exp_pid is $exp_id.<br><br>
The ID for your experiment in project `$exp_pid' is `$exp_id.'
<br><br>
Since you did not provide an NS script, no nodes have been
allocated. You must log in and run the tbsetup scripts
yourself. For your convenience, we have created a directory
......@@ -181,25 +192,9 @@ if ($nonsfile) {
die("");
}
#
# We run a wrapper script that does all the work of creating the directory
# and running the tb scripts. If it fails, we get back all the output and
# give that to the user. If it succeeds, we go and find the report file
# and give that to the user.
#
# tbdoit <pid> <eid> <temp_nsfile>
#
# Later, when the experiment is ended, the directory will be deleted.
#
# There is similar path stuff in endexp.php3. Be sure to sync that up
# if you change things here.
#
echo "<center><br>";
echo "<h3>Setting up experiment. This may take a few minutes ...
echo "<h3>Starting experiment configuration. Please wait a moment ...
</center><br><br>
Please do <em>not</em> click the 'Stop' button. This will cause
the experiment creation to terminate prematurely, which can cause
problems for future (other) experiments.
</h3>";
flush();
......@@ -215,42 +210,30 @@ chmod($exp_nsfile, 0666);
#
# Run the scripts. We use a script wrapper to deal with changing
# to the proper directory and to keep some of these details out
# of this. We just want to know if the experiment setup worked.
# The wrapper is going to go the extra step of running tbreport
# so that we can give the user warm fuzzies.
# to the proper directory and to keep most of these details out
# of this. The user will be notified later. Its possible that the
# script will return non-zero status, but there are just a couple
# of conditions. Generally, the script exits and the user is told
# of errors later.
#
$output = array();
$retval = 0;
$last = time();
if (($pipe = popen("$TBSUEXEC_PATH $uid $gid ".
"tbdoit $exp_pid $exp_id $exp_nsfile", "r")) == 0) {
$query_result = mysql_db_query($TBDBNAME,
"DELETE FROM experiments WHERE eid='$exp_id' and pid=\"$exp_pid\"");
TBERROR("Opening pipe to tbdoit", 1);
}
$count = 0;
echo "<XMP>\n";
while (!feof($pipe)) {
$line = fgets($pipe, 1024);
$output[$count] = $line;
$count++;
echo "$line";
flush();
}
echo "</XMP>\n";
$retval = pclose($pipe);
$result = exec("$TBSUEXEC_PATH $uid $gid tbdoit $exp_pid $exp_id $exp_nsfile",
$output, $retval);
if ($retval) {
echo "<center><br><br><h2>
Setup Failure($retval)
</h2><br></center>\n";
echo "<br><br><h2>
Setup Failure($retval): Output as follows:
</h2>
<br>
<XMP>\n";
for ($i = 0; $i < count($output); $i++) {
echo "$output[$i]\n";
}
echo "</XMP>\n";
$query_result = mysql_db_query($TBDBNAME,