Commit 94b8349c authored by Leigh B. Stoller's avatar Leigh B. Stoller

Major changes to experiment setup. Like the previous changes to

termination, setup now exits immediately and sends email to the user
when the experiment is fully configured.
parent 8b697bd3
#!/usr/bin/perl -wT #!/usr/bin/perl -wT
use English; use English;
#
# This gets invoked from the Web interface. CD into the proper directory
# and do the tb stuff.
#
# usage: tbdoit <pid> <eid> <temp_nsfile>
#
# #
# Configure variables # Configure variables
# #
my $TB = "@prefix@"; my $TB = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS = "@TBOPSEMAIL@";
my $tbdir = "$TB/bin/";
my $projroot = "/proj";
my $tbdata = "tbdata";
my $cleanme = 0;
# #
# For debugging all this goo. Leaves the experiment directory intact, # For debugging all this goo. Leaves the experiment directory intact,
...@@ -17,26 +31,6 @@ my $debug = 1; ...@@ -17,26 +31,6 @@ my $debug = 1;
# #
$| = 1; $| = 1;
#
# We need to protect this from signals. 1st argument is signal name
#
sub sighandler {
my($sig) = @_;
print "Caught a SIG$sig--shutting down\n";
tbendit();
fatal();
}
#
# This gets invoked from the Web interface. CD into the proper directory
# and do the tb stuff.
#
# usage: tbdoit <pid> <eid> <temp_nsfile>
#
my $tbdir = "$TB/bin/";
my $projroot = "/proj";
my $tbdata = "tbdata";
# #
# Untaint the path # Untaint the path
# #
...@@ -50,127 +44,296 @@ if (@ARGV != 3) { ...@@ -50,127 +44,296 @@ if (@ARGV != 3) {
print STDOUT "Usage: tbdoit <pid> <eid> <temp_nsfile>\n"; print STDOUT "Usage: tbdoit <pid> <eid> <temp_nsfile>\n";
exit(-1); exit(-1);
} }
my $project = $ARGV[0]; my $pid = $ARGV[0];
my $eid = $ARGV[1]; my $eid = $ARGV[1];
my $tempfile= $ARGV[2]; my $tempfile = $ARGV[2];
# #
# Untaint the arguments. # Untaint the arguments.
# #
if ($project =~ /^([-\@\w.]+)$/) { if ($pid =~ /^([-\@\w.]+)$/) {
$project = $1; $pid = $1;
} }
if ($eid =~ /^([-\@\w.]+)$/) { if ($eid =~ /^([-\@\w.]+)$/) {
$eid = $1; $eid = $1;
} }
if ($tempfile =~ /^([-\@\w.]+)$/) { # Note different taint check (allow /).
if ($tempfile =~ /^([\/-\@\w.]+)$/) {
$tempfile = $1; $tempfile = $1;
} }
my $piddir = "$projroot/$project"; my $piddir = "$projroot/$pid";
my $expdir = "$piddir/exp"; my $expdir = "$piddir/exp";
my $eiddir = "$expdir/$eid"; my $eiddir = "$expdir/$eid";
my $nsfile = "$eid.ns"; my $nsfile = "$eid.ns";
my $irfile = "$eid.ir"; my $irfile = "$eid.ir";
my $repfile = "$eid.report"; my $repfile = "$eid.report";
my $tempcopy= "$tempfile.$$";
# #
# Create a directory structure for the experiment in the project directory. # Set up for querying the database.
#
use Mysql;
my $DB = Mysql->connect("localhost", $DBNAME, "script", "none");
#
# Check to make sure the experiment record exists. To prevent mishap,
# also check to make sure the experiment is in the "not ready" state
# to prevent this record from being used to configure two sets of nodes!
# This could happen if the user invokes this script directly, and that
# is eventually what I want anyway (instead of users using the tb scripts).
#
$query_result =
$DB->query("SELECT expt_ready FROM experiments ".
"WHERE eid='$eid' and pid='$pid'");
if (! $query_result) {
fatal("DB Error getting experiment record $pid/$eid\n");
}
if ($query_result->numrows < 1) {
print STDOUT "No experiment record for $pid/$eid exists!\n";
exit(1);
}
@row = $query_result->fetchrow_array();
if ($row[0]) {
fatal("Experiment $pid/$eid is already configured!\n".
"You are not allowed to reconfigure experiments unless you\n".
"first terminate the existing experiment via the web interface.\n");
}
# #
if (system("$TB/libexec/mkexpdir $project $eid") != 0) { # Figure out who is going to get the email!
print STDOUT "$tbdir/mkexpdir failed\n"; #
$query_result =
$DB->query("SELECT usr_name,usr_email from users ".
"WHERE unix_uid='$EUID'");
if (! $query_result) {
fatal("DB Error getting user information for uid $EUID\n");
}
if ($query_result->numrows < 1) {
print STDOUT "Go Away! You do not exist in the Emulab Database.\n";
exit(1); exit(1);
} }
@row = $query_result->fetchrow_array();
$user_name = $row[0];
$user_email = $row[1];
#
# Copy the nsfile from wherever the web server stuffed it into a temporary
# file. The web server is going to delete it once this script returns.
#
if (system("/bin/cp", "$tempfile", "$tempcopy") != 0) {
fatal("Could not copy $tempfile to $tempcopy: $!\n");
}
#
# The rest of this goes into the background so that the user sees
# immediate response. We will send email later when the experiment
# is actually torn down.
#
$mypid = fork();
if ($mypid) {
#
# Parent exits normally
#
print STDOUT
"Experiment $pid/$eid is now configuring\n".
"You will be notified via email when the experiment is ready to use\n";
exit(0);
}
#
# We have to disconnect from the caller by redirecting both STDIN and
# STDOUT away from the pipe. Otherwise the caller (the web server) will
# continue to wait even though the parent has exited.
#
open(STDIN, "< /dev/null") or
die("opening /dev/null for STDIN: $!");
#
# Create a temporary name for a log file and untaint it.
#
$logname = `mktemp /tmp/$pid-$eid.XXXXXX`;
# Note different taint check (allow /).
if ($logname =~ /^([-\@\w.\/]+)$/) {
$logname = $1;
} else {
die "Bad data in $logname";
}
open(STDERR, ">> $logname") or die("opening $logname for STDERR: $!");
open(STDOUT, ">> $logname") or die("opening $logname for STDOUT: $!");
#
# Create a directory structure for the experiment in the project directory.
#
if (system("$TB/libexec/mkexpdir $pid $eid") != 0) {
fatal("$tbdir/mkexpdir failed\n");
}
# #
# Copy the nsfile from wherever the web server stuffed it, into the # Copy the nsfile from wherever the web server stuffed it, into the
# experiment directory. # experiment directory.
# #
if (! chdir("$eiddir/$tbdata")) { if (! chdir("$eiddir/$tbdata")) {
print STDOUT "Could not chdir to $tbdata in $eiddir: $!\n"; fatal("Could not chdir to $tbdata in $eiddir: $!\n");
fatal();
} }
if (system("/bin/cp", "$tempfile", "$nsfile") != 0) { if (system("/bin/cp", "$tempcopy", "$nsfile") != 0) {
print STDOUT fatal("Could not copy $tempcopy to $eiddir/$tbdata/$nsfile: $!\n");
"Could not copy $tempfile to $eiddir/$tbdata/$nsfile: $!\n";
fatal();
} }
unlink("$tempcopy");
# #
# Run the various scripts. # Run the various scripts.
# #
#print STDOUT "Running $tbdir/tbprerun with arguments: ". if (system("$tbdir/tbprerun -nologfile $pid $eid $nsfile") != 0) {
# "$project $eid $nsfile\n"; fatal("tbprerun failed!\n");
if (system("$tbdir/tbprerun $project $eid $nsfile") != 0) {
print STDOUT "tbprerun failed!\n";
dumplog();
fatal();
} }
# So fatal errors run tbend.
$cleanme = 1;
#print STDOUT "Running $tbdir/tbrun with arguments: $project $eid $irfile\n"; if (system("$tbdir/tbrun -nologfile $pid $eid $irfile") != 0) {
if (system("$tbdir/tbrun $project $eid $irfile") != 0) { fatal("tbrun failed!\n");
print STDOUT "tbrun failed!\n";
dumplog();
tbendit();
fatal();
} }
#print STDOUT "Running tbreport with arguments: ". if (system("$tbdir/tbreport -v $pid $eid $irfile 2>&1 > $repfile") != 0) {
# "-v $project $eid $irfile 2>&1 > $repfile\n"; fatal("tbreport failed!\n");
if (system("$tbdir/tbreport -v $project $eid $irfile 2>&1 > $repfile") != 0) {
print STDOUT "tbreport failed!\n";
dumplog();
tbendit();
fatal();
} }
# #
# The web server will not be able to access the report file, so just # Done! Set the ready bit in the experiment record.
# dump it STDOUT and let the php script do something with it. #
$query_result = $DB->query("UPDATE experiments SET expt_ready=1 ".
"WHERE eid='$eid' and pid='$pid'");
if (! $query_result) {
fatal("DB Error setting ready bit in experiment record for $pid/$eid\n");
}
# #
dumpreport(); # Grab the entire experiment record for the mail message below.
#
$query_result =
$DB->query("SELECT expt_name,expt_created,expt_expires from experiments ".
"WHERE eid='$eid' and pid='$pid'");
if (! $query_result) {
fatal("DB Error getting experiment record for $pid/$eid\n");
}
@row = $query_result->fetchrow_array();
$expt_name = $row[0];
$expt_created = $row[1];
$expt_expires = $row[2];
print STDOUT "Setup Success\n"; print STDOUT "Setup Success\n";
#
# Dump the report file and the log file to the user via email.
#
open(MAIL, "| /usr/bin/mail ".
"-s \"TESTBED: New Experiment Created: $pid/$eid\" ".
"-c $TBOPS \"$user_name <$user_email>\" >/dev/null 2>&1")
or die "Cannot start mail program: $!";
print MAIL "Your experiment `$eid' in project `$pid' is now configured.\n";
print MAIL "Here is the experiment summary detailing the nodes that were\n";
print MAIL "allocated to you. You may use the `Qualified Name' to log on\n";
print MAIL "to your nodes. See /etc/hosts on your nodes (when running\n";
print MAIL "FreeBSD, Linux, or NetBSD) for the IP name mapping on each node\n";
print MAIL "\n";
print MAIL "User: $user_name\n";
print MAIL "EID: $eid\n";
print MAIL "PID: $pid\n";
print MAIL "Name: $expt_name\n";
print MAIL "Created: $expt_created\n";
print MAIL "Expires: $expt_expires\n";
print MAIL "Directory: $eiddir\n\n";
if (open(IN, "$repfile")) {
while (<IN>) {
print MAIL "$_";
}
close(IN);
}
print MAIL "\n\n---------\n\n";
print MAIL "Here is the log of the configuration process.\n";
print MAIL "If you have any questions or problems, please include the\n";
print MAIL "output below in your message to $TBOPS\n\n";
if (open(IN, "$logname")) {
while (<IN>) {
print MAIL "$_";
}
close(IN);
}
close(MAIL);
unlink("$logname");
exit 0; exit 0;
sub fatal() sub fatal()
{ {
if (! chdir($expdir)) { my($mesg) = $_[0];
print STDOUT "In Fatal: Could not chdir to $expdir!\n";
exit(-1); print STDOUT "$mesg\n";
} print STDOUT "Cleaning up ...\n";
if ($debug) {
$save = "$eid-$PID"; #
system("mv $eid $save"); # If we got far enough to allocate nodes, must run tbend.
#
if ($cleanme) {
tbendit();
} }
else {
system("rm -r $eid"); #
# Now we can remove all trace from the DB since it failed.
#
$query_result =
$DB->query("DELETE from experiments WHERE eid='$eid' and pid='$pid'");
if (! $query_result) {
print STDOUT "DB Error deleting experiment record for $pid/$eid\n";
} }
exit(-1);
}
sub dumplog() #
{ # For debugging failures?
if (open(IN, "$eid.log")) { #
print STDOUT "Dumping $eid.log\n"; if (chdir($expdir)) {
while (<IN>) { $save = "$eid-$PID";
print STDOUT "$_"; # Fucking taint checks. Whats wrong with rename("$eid", "$save");
} system("/bin/mv", "-f", "$eid", "$save");
close(IN);
} }
}
sub dumpreport() #
{ # Send a message to the testbed list. Append the logfile if it got
if (open(IN, "$eid.report")) { # that far.
print STDOUT "Dumping $eid.report\n"; #
open(MAIL, "| /usr/bin/mail ".
"-s \"TESTBED: Experiment Configure Failure $pid/$eid\" ".
"$TBOPS >/dev/null 2>&1")
or die "Cannot start mail program: $!";
print MAIL $mesg;
if (open(IN, "$logname")) {
print MAIL "\n\n---------\n\n";
while (<IN>) { while (<IN>) {
print STDOUT "$_"; print MAIL "$_";
} }
close(IN); close(IN);
} }
close(MAIL);
unlink("$logname");
exit(-1);
} }
# #
...@@ -179,11 +342,8 @@ sub dumpreport() ...@@ -179,11 +342,8 @@ sub dumpreport()
# #
sub tbendit() sub tbendit()
{ {
print STDOUT "Running tbend with arguments: $project $eid\n"; print STDOUT "Running tbend with arguments: -nologfile $pid $eid\n";
if (system("$tbdir/tbend $project $eid") != 0) { if (system("$tbdir/tbend -nologfile $pid $eid") != 0) {
print STDOUT "tbend failed!\n"; print STDOUT "tbend failed!\n";
} }
} }
...@@ -3,6 +3,14 @@ use English; ...@@ -3,6 +3,14 @@ use English;
# #
# This gets invoked from the Web interface. Terminate an experiment. # This gets invoked from the Web interface. Terminate an experiment.
# Most of the STDOUT prints are never seen since the web interface
# repeats only errors. My plan is make this script the front end to
# experiment termination and make tbend a backend program that no one
# uses.
#
# TODO: Perhaps setup a watchdog timer to guard against hangs. Not sure
# why this would happen, but it would be bad for a termination to
# hang up.
# #
# usage: tbstopit <pid> <eid> # usage: tbstopit <pid> <eid>
# #
...@@ -64,10 +72,15 @@ my $DB = Mysql->connect("localhost", $DBNAME, "script", "none"); ...@@ -64,10 +72,15 @@ my $DB = Mysql->connect("localhost", $DBNAME, "script", "none");
# We have to protect against trying to end an experiment that is currently # We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a timestamp for this purpose. # in the process of being terminated. We use a timestamp for this purpose.
# If the timestamp is ever non-null, then something is wrong and we should # If the timestamp is ever non-null, then something is wrong and we should
# never proceed. # not proceed.
# #
$query_result = $DB->query("SELECT expt_terminating FROM experiments ". # We also have to guard against trying to terminate an experiment that
"WHERE eid='$eid' and pid='$pid'"); # is still in the process of configuring. Its easiest to force the user
# to wait!
#
$query_result =
$DB->query("SELECT expt_terminating,expt_ready FROM experiments ".
"WHERE eid='$eid' and pid='$pid'");
if (! $query_result) { if (! $query_result) {
fatal("DB Error getting experiment termination date for $pid/$eid\n"); fatal("DB Error getting experiment termination date for $pid/$eid\n");
...@@ -85,6 +98,13 @@ if (defined($row[0])) { ...@@ -85,6 +98,13 @@ if (defined($row[0])) {
"torn down\n"; "torn down\n";
exit(1); exit(1);
} }
if (! $row[1]) {
print STDOUT
"It appears that experiment $pid/$eid is still configuring.\n".
"The user that created the experiment will be notified via email\n".
"when it has been fully configured and is ready for use\n";
exit(1);
}
# #
# Figure out who is going to get the email! # Figure out who is going to get the email!
...@@ -93,7 +113,7 @@ $query_result = $DB->query("SELECT usr_name,usr_email from users ". ...@@ -93,7 +113,7 @@ $query_result = $DB->query("SELECT usr_name,usr_email from users ".
"WHERE unix_uid='$EUID'"); "WHERE unix_uid='$EUID'");
if (! $query_result) { if (! $query_result) {
fatal("DB Error getting user informarion for uid $EUID\n"); fatal("DB Error getting user information for uid $EUID\n");
} }
if ($query_result->numrows < 1) { if ($query_result->numrows < 1) {
print STDOUT "Go Away! You do not exist in the Emulab Database.\n"; print STDOUT "Go Away! You do not exist in the Emulab Database.\n";
...@@ -145,6 +165,7 @@ open(STDIN, "< /dev/null") or ...@@ -145,6 +165,7 @@ open(STDIN, "< /dev/null") or
# #
$logname = `mktemp /tmp/tbend-$pid-$eid.XXXXXX`; $logname = `mktemp /tmp/tbend-$pid-$eid.XXXXXX`;
# Note different taint check (allow /).
if ($logname =~ /^([-\@\w.\/]+)$/) { if ($logname =~ /^([-\@\w.\/]+)$/) {
$logname = $1; $logname = $1;
} else { } else {
...@@ -180,9 +201,9 @@ if (! $query_result) { ...@@ -180,9 +201,9 @@ if (! $query_result) {
print STDOUT "Termination Success\n"; print STDOUT "Termination Success\n";
open(MAIL, "| /usr/bin/mail -s \"Experiment $pid/$eid Terminated\" ". open(MAIL, "| /usr/bin/mail ".
"-c $TBOPS ". "-s \"TESTBED: Experiment $pid/$eid Terminated\" ".
"\"$user_name <$user_email>\" >/dev/null 2>&1") "-c $TBOPS \"$user_name <$user_email>\" >/dev/null 2>&1")
or die "Cannot start mail program: $!"; or die "Cannot start mail program: $!";
print MAIL "Your experiment `$eid' in project `$pid' has been terminated.\n"; print MAIL "Your experiment `$eid' in project `$pid' has been terminated.\n";
...@@ -200,7 +221,7 @@ if (open(IN, "$logname")) { ...@@ -200,7 +221,7 @@ if (open(IN, "$logname")) {
} }
close(MAIL); close(MAIL);
system("rm -f $logname"); unlink("$logname");
exit 0; exit 0;
sub fatal() sub fatal()
...@@ -209,7 +230,8 @@ sub fatal() ...@@ -209,7 +230,8 @@ sub fatal()
if (chdir($expdir)) { if (chdir($expdir)) {
$save = "$eid-$PID"; $save = "$eid-$PID";
system("mv $eid $save"); # Fucking taint checks. Whats wrong with rename("$eid", "$save");
system("/bin/mv", "-f", "$eid", "$save");
} }
print STDOUT $mesg; print STDOUT $mesg;
...@@ -218,7 +240,7 @@ sub fatal() ...@@ -218,7 +240,7 @@ sub fatal()
# that far. # that far.
# #
open(MAIL, "| /usr/bin/mail ". open(MAIL, "| /usr/bin/mail ".
"-s \"Experiment Termination Failure $pid/$eid\" ". "-s \"TESTBED: Termination Failure $pid/$eid\" ".
"$TBOPS >/dev/null 2>&1") "$TBOPS >/dev/null 2>&1")
or die "Cannot start mail program: $!"; or die "Cannot start mail program: $!";
...@@ -234,7 +256,7 @@ sub fatal() ...@@ -234,7 +256,7 @@ sub fatal()
} }
close(MAIL); close(MAIL);
system("rm -f $logname"); unlink("$logname");
exit(-1); exit(-1);
} }