Commit 54f289a4 authored by Leigh Stoller's avatar Leigh Stoller

First cut at batch mode experiments.

parent e6a5db44
......@@ -43,6 +43,8 @@ install-mkdirs:
-mkdir -p $(INSTALL_TOPDIR)/log
-mkdir -p $(INSTALL_TOPDIR)/lists
-mkdir -p $(INSTALL_TOPDIR)/backup
-mkdir -p $(INSTALL_TOPDIR)/batch
-chmod 777 $(INSTALL_TOPDIR)/batch
clean: clean-subdirs
distclean: distclean-subdirs
......
......@@ -957,6 +957,8 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/os_load tbsetup/os_setup tbsetup/mkprojdir tbsetup/power \
tbsetup/resetvlans tbsetup/rmacct-ctrl tbsetup/rmproj \
tbsetup/sched_reload \
tbsetup/batchexp tbsetup/killbatchexp tbsetup/batch_daemon \
tbsetup/webbatchexp tbsetup/webkillbatchexp \
tbsetup/startexp tbsetup/endexp tbsetup/webstartexp tbsetup/webendexp \
tbsetup/ir/GNUmakefile tbsetup/ir/postassign tbsetup/snmpit \
tbsetup/ir/assign_wrapper tbsetup/ns2ir/GNUmakefile \
......
......@@ -116,6 +116,8 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/os_load tbsetup/os_setup tbsetup/mkprojdir tbsetup/power \
tbsetup/resetvlans tbsetup/rmacct-ctrl tbsetup/rmproj \
tbsetup/sched_reload \
tbsetup/batchexp tbsetup/killbatchexp tbsetup/batch_daemon \
tbsetup/webbatchexp tbsetup/webkillbatchexp \
tbsetup/startexp tbsetup/endexp tbsetup/webstartexp tbsetup/webendexp \
tbsetup/ir/GNUmakefile tbsetup/ir/postassign tbsetup/snmpit \
tbsetup/ir/assign_wrapper tbsetup/ns2ir/GNUmakefile \
......
......@@ -11,13 +11,14 @@ include $(OBJDIR)/Makeconf
SUBDIRS = checkpass ir ns2ir
BIN_STUFF = power snmpit tbend tbrun tbprerun tbreport \
os_load savevlans startexp endexp
os_load savevlans startexp endexp batchexp killbatchexp
SBIN_STUFF = resetvlans console_setup.proxy sched_reload named_setup
SBIN_STUFF = resetvlans console_setup.proxy sched_reload named_setup \
batch_daemon
LIBEXEC_STUFF = mkprojdir rmproj mkacct-ctrl rmacct-ctrl \
os_setup mkexpdir console_setup \
webstartexp webendexp
webstartexp webendexp webbatchexp webkillbatchexp
LIB_STUFF = libtbsetup.pm
......@@ -76,6 +77,8 @@ post-install:
chmod u+s $(INSTALL_BINDIR)/savevlans
chown root $(INSTALL_LIBEXECDIR)/console_setup
chmod u+s $(INSTALL_LIBEXECDIR)/console_setup
chown root $(INSTALL_SBINDIR)/batch_daemon
chmod u+s $(INSTALL_SBINDIR)/batch_daemon
#
# Control node installation (okay, plastic)
......
This diff is collapsed.
#!/usr/bin/perl -wT
use English;
use Getopt::Std;
#
# Create a batch experiment.
#
# usage: batchexp <batchfile>
#
sub usage()
{
print STDOUT "Usage: batchexp <batchfile>\n";
exit(-1);
}
my $optlist = "";
#
# Configure variables
#
my $TB = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS = "@TBOPSEMAIL@";
my $tbbindir = "$TB/bin/";
my $batchdir = "$TB/batch";
my $projroot = "/proj";
my $dirname;
#
# Turn off line buffering on output
#
$| = 1;
#
# Untaint the path
#
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec:$TB/libexec/ir".
":$TB/libexec/ns2ir:$TB/sbin:$TB/bin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
$TBIRLIB = "$TB/lib/ir";
push(@INC,$TBIRLIB);
require libir;
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
usage();
}
if (@ARGV != 1) {
usage();
}
my $tempfile = $ARGV[0];
#
# Untaint the arguments.
#
# Note different taint check (allow /).
if ($tempfile =~ /^([-\@\w.\/]+)$/) {
$tempfile = $1;
}
else {
fatal("Tainted argument $tempfile");
}
#
# Set up for querying the database.
#
use Mysql;
my $DB = Mysql->connect("localhost", $DBNAME, "script", "none");
#
# Parse the batchfile.
#
my $eid;
my $pid;
my $longname;
my $expires;
my $webnsfile;
parse_batchfile($tempfile) or
fatal("Could not parse batchfile $tempfile");
#
# Sanity check a few things.
#
if (!defined($eid) || !defined($pid) || !defined($longname) ||
!defined($expires) || !defined($webnsfile)) {
fatal("Batchfile is incomplete!");
}
$nsfile = "$eid.ns";
$irfile = "$eid.ir";
#
# Create a subdir in the batch directory to work in.
#
$dirname = "$batchdir/$pid-$eid";
mkdir($dirname, 0775) or
fatal("Could not mkdir $dirname");
chdir($dirname) or
fatal("Could not chdir to $dirname");
#
# Copy in the batch file. Web script is responsible for removing the
# original.
#
if (system("/bin/cp", "$tempfile", "batchfile")) {
fatal("Could not copy $tempfile to $dirname");
}
#
# Now a bunch of DB checks.
#
# First off, get some user information.
#
$query_result =
DBquery("SELECT uid,usr_name,usr_email from users ".
"WHERE unix_uid='$EUID'");
if ($query_result->numrows < 1) {
fatal("Go Away! You do not exist in the Emulab Database.");
}
@row = $query_result->fetchrow_array();
$uid = $row[0];
$user_name = $row[1];
$user_email = $row[2];
#
# Make sure UID is allowed to create experiments in this project.
#
$query_result =
DBquery("SELECT trust from proj_memb WHERE uid='$uid' and pid='$pid'");
if ($query_result->numrows == 0) {
fatal("Go Away! You are not a member of project $pid!");
}
@row = $query_result->fetchrow_array();
$trust = $row[0];
if ($trust ne "local_root" &&
$trust ne "group_root") {
fatal("Go Away! You are not a trusted member of project $pid!");
}
#
# The pid/eid pair has to be unique. LOCKING!
#
$query_result =
DBquery("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");
if ($query_result->numrows) {
fatal("Experiment $eid in project $pid already exists!");
}
$query_result =
DBquery("SELECT * FROM batch_experiments WHERE eid='$eid' and pid='$pid'");
if ($query_result->numrows) {
fatal("Batch experiment $eid in project $pid already exists!");
}
#
# Now we can get the NS file!
#
if (system("/bin/cp", "$webnsfile", "$nsfile")) {
fatal("Could not copy $webnsfile to $dirname/$nsfile");
}
#
# Do a firstcut parse on the NS file, converting it to IR format. This
# operates as a syntax check on the NS file, so we can kick back bad NS
# files now instead of later. It also means we don't need the NS file after
# this.
#
# XXX This is copied from tbprerun.
#
$tbcmdfile = "tbcmds";
$id = "$pid-$eid";
if (system("parse.tcl $id $nsfile $irfile") != 0) {
fatal("NS Parse failed!");
}
if (system("extract_tb $nsfile $tbcmdfile") != 0) {
fatal("NS extract_tb pass failed!");
}
if (system("postparse $tbcmdfile $irfile") != 0) {
fatal("NS postparse pass failed!");
}
#
# Figure out what resources are needed so the batch daemon can make an
# informed decision about whether to even try.
#
$pcs = 0;
$sharks = 0;
&ir_read($irfile);
foreach my $foo (split("\n", &ir_get("/topology/nodes"))) {
($node,$type) = split(' ', $foo);
if ($type eq "pc") {
$pcs++;
}
if ($type eq "sh") {
$sharks++;
}
}
#
# Gen up the creation time.
#
$created = `date '+%Y:%m:%d %H:%M:%S'`;
#
# Insert the record. We leave this to very last cause the batch daemon
# is looking for batch experiments to run. Easy race avoidance.
#
DBquery("INSERT INTO batch_experiments ".
"(eid, pid, created, started, expires, ".
" name, creator_uid, numpcs, numsharks, status) ".
"VALUES ('$eid', '$pid', '$created', '$created', '$expires', ".
"'$longname', '$uid', $pcs, $sharks, 'new')");
exit 0;
sub fatal($)
{
my($mesg) = $_[0];
print STDOUT "$mesg\n";
print STDOUT "Cleaning up ...\n";
# system("/bin/rm", "-rf", "$dirname");
exit(-1);
}
#
# Open up the batch file and parse it.
#
sub parse_batchfile()
{
my($batchfile) = $_[0];
if (! open(BATCH, "$batchfile")) {
print STDERR "Could not open $batchfile\n";
return 0;
}
while (<BATCH>) {
if ($_ =~ /^EID:\s+([-\@\w.]*)/) {
$eid = $1;
next;
}
if ($_ =~ /^PID:\s+([-\@\w.]*)/) {
$pid = $1;
next;
}
if ($_ =~ /^name:\s+([-\@\w.]*)/) {
$longname = $1;
next;
}
if ($_ =~ /^expires:\s+([-\@\w.: ]*)$/) {
$expires = $1;
next;
}
if ($_ =~ /^nsfile:\s+([-\@\w.\/]*)/) {
$webnsfile = $1;
next;
}
}
close(BATCH);
return 1;
}
sub DBquery()
{
my($query) = $_[0];
$query_result = $DB->query($query);
if (! $query_result) {
fatal("DB Error: $query");
}
return $query_result;
}
#!/usr/bin/perl -wT
use English;
use Getopt::Std;
#
# Create a batch experiment.
#
# usage: killbatchexp $pid $eid
#
sub usage()
{
print STDOUT "Usage: killbatchexp $pid $eid\n";
exit(-1);
}
my $optlist = "";
#
# Configure variables
#
my $TB = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS = "@TBOPSEMAIL@";
my $tbbindir = "$TB/bin/";
my $batchdir = "$TB/batch";
my $projroot = "/proj";
#
# Turn off line buffering on output
#
$| = 1;
#
# Untaint the path
#
$ENV{'PATH'} = "/bin:/usr/bin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
#
# Parse command arguments. Once we return from getopts, all that should
# be left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
usage();
}
if (@ARGV != 2) {
usage();
}
my $pid = $ARGV[0];
my $eid = $ARGV[1];
#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w.]+)$/) {
$pid = $1;
}
if ($eid =~ /^([-\@\w.]+)$/) {
$eid = $1;
}
#
# Set up for querying the database.
#
use Mysql;
my $DB = Mysql->connect("localhost", $DBNAME, "script", "none");
#
# This is where we currently hold the batch goo.
#
$dirname = "$batchdir/$pid-$eid";
#
# Need to lock the table for this. We could avoid the locking if we just
# set the canceled bit and let the batch_daemon clean things up, but that
# introduces needless (and annoying) delay when killing a batch experiment
# that is not even running. See corresponding lock in the batch_daemon.
#
DBquery("lock tables batch_experiments write");
#
# Set the canceled state right away. This will prevent the batch_daemon
# from trying to run it. It might already be running, but thats okay.
#
DBquery("UPDATE batch_experiments set canceled=1 ".
"WHERE eid='$eid' and pid='$pid'");
#
# Now its safe to look at the state. If its in the "new" state, then
# all we need to do is kill the record and the directory, since the
# batch daemon will not bother with it once the canceled bit is set.
#
$query_result =
DBquery("SELECT status from batch_experiments ".
"WHERE eid='$eid' and pid='$pid'");
DBquery("unlock tables");
@row = $query_result->fetchrow_array();
$state = $row[0];
if ($state ne "new") {
#
# Daemon does the rest ...
#
print STDOUT
"Batch Experiment $eid in project $pid is running on the testbed\n".
"You will receive email notification when the experiment is torn\n".
"down and you can reuse the experiment name\n";
#
# exit status is special. Tells the caller that cancelation is pending.
# The web script will say something useful.
#
exit(1);
}
#
# Delete the DB record. LOCKING!
#
DBquery("DELETE from batch_experiments WHERE eid='$eid' and pid='$pid'");
#
# And kill the directory.
#
system("rm -rf $dirname");
#
# Lets not bother with an email message. Just print out something nice
# and tell the caller (the php script) to say something nice too).
#
print STDOUT
"Batch Experiment $eid in project $pid has been canceled!\n";
exit(0);
sub fatal($)
{
my($mesg) = $_[0];
print STDOUT "$mesg\n";
#
# Send a message to the testbed list
#
open(MAIL, "| /usr/bin/mail ".
"-s \"TESTBED: Batch Mode Cancelation Failure $pid/$eid\" ".
"$TBOPS >/dev/null 2>&1")
or die "Cannot start mail program: $!";
print MAIL $mesg;
close(MAIL);
exit(-1);
}
sub DBquery()
{
my($query) = $_[0];
$query_result = $DB->query($query);
if (! $query_result) {
fatal("DB Error: $query");
}
return $query_result;
}
......@@ -51,18 +51,18 @@ if (! chdir($expdir)) {
}
if (! mkdir($eid, 0770)) {
print STDOUT "Could not mkdir $eid in $piddir: $!\n";
print STDOUT "Could not mkdir $eid in $expdir: $!\n";
exit(-1);
}
if (! chmod(0770, "$eid")) {
print STDOUT "Could not chmod $eid to 0770 in $piddir: $!\n";
print STDOUT "Could not chmod $eid to 0770 in $expdir: $!\n";
rmdir($eid);
exit(-1);
}
if (! chdir($eid)) {
print STDOUT "Could not chdir to $eid in $piddir: $!\n";
print STDOUT "Could not chdir to $eid in $expdir: $!\n";
rmdir($eid);
exit(-1);
}
......
......@@ -78,9 +78,12 @@ if ($eid =~ /^([-\@\w.]+)$/) {
$eid = $1;
}
# Note different taint check (allow /).
if ($tempfile =~ /^([\/-\@\w.]+)$/) {
if ($tempfile =~ /^([-\w.\/]+)$/) {
$tempfile = $1;
}
else {
die("Tainted tempfile name: $tempfile");
}
my $piddir = "$projroot/$pid";
my $expdir = "$piddir/exp";
......@@ -360,7 +363,7 @@ sub fatal()
#
open(MAIL, "| /usr/bin/mail ".
"-s \"TESTBED: Experiment Configure Failure $pid/$eid\" ".
"$TBOPS \"$user_name <$user_email>\" >/dev/null 2>&1")
"-c $TBOPS \"$user_name <$user_email>\" >/dev/null 2>&1")
or die "Cannot start mail program: $!";
print MAIL $mesg;
......
#!/usr/bin/perl -w
use English;
#
# This gets invoked from the Web interface. Simply a wrapper.
#
# usage: webbatchexp arguments ...
#
#
# Configure variables
#
my $TB = "@prefix@";
#
# Run the real thing, and never return.
#
exec "$TB/bin/batchexp", @ARGV;
die("webbatchexp: Could not exec batchexp: $!");
#!/usr/bin/perl -w
use English;
#
# This gets invoked from the Web interface. Simply a wrapper.
#
# usage: webkillbatchexp arguments ...
#
#
# Configure variables
#
my $TB = "@prefix@";
#
# Run the real thing, and never return.
#
exec "$TB/bin/killbatchexp", @ARGV;
die("webkillbatchexp: Could not exec killbatchexp: $!");
<?php
include("defs.php3");
#
# Standard Testbed Header
#
PAGEHEADER("Create a Batch Mode Experiment");
$mydebug = 0;
#
# First off, sanity check the form to make sure all the required fields
# were provided. I do this on a per field basis so that we can be
# informative. Be sure to correlate these checks with any changes made to
# the project form.
#
if (!isset($uid) ||
strcmp($uid, "") == 0) {
FORMERROR("Username");
}
if (!isset($exp_pid) ||
strcmp($exp_pid, "") == 0) {
FORMERROR("Select Project");
}
if (!isset($exp_id) ||
strcmp($exp_id, "") == 0) {
FORMERROR("Experiment Name (short)");
}
if (!isset($exp_name) ||
strcmp($exp_name, "") == 0) {
FORMERROR("Experiment Name (long)");
}
#
# Only known and logged in users can begin experiments. Name came in as
# a POST var.
#
LOGGEDINORDIE($uid);
#
# Database limits
#
if (strlen($exp_id) > $TBDB_EIDLEN) {
USERERROR("The experiment name \"$exp_id\" is too long! ".
"Please select another.", 1);
}
#
# Certain of these values must be escaped or otherwise sanitized.
#
$exp_name = addslashes($exp_name);
#
# Must provide an NS file!
#
$nonsfile = 0;
if (!isset($exp_nsfile) ||
strcmp($exp_nsfile, "") == 0 ||
strcmp($exp_nsfile, "none") == 0) {
USERERROR("The NS file '$exp_nsfile_name' does not appear to be a ".
"valid filename. Please go back and try again.", 1);
}
#
# Make sure the PID/EID tuple does not already exist in the database.
# It may not exist in either the current experiments list, or the
# batch experiments list.
#
$query_result = mysql_db_query($TBDBNAME,
"SELECT eid FROM experiments ".
"WHERE eid=\"$exp_id\" and pid=\"$exp_pid\"");
if ($row = mysql_fetch_row($query_result)) {
USERERROR("The experiment name \"$exp_id\" you have chosen is already ".
"in use in project $exp_pid. Please select another.", 1);
}
$query_result = mysql_db_query($TBDBNAME,
"SELECT eid FROM batch_experiments ".
"WHERE eid=\"$exp_id\" and pid=\"$exp_pid\"");
if ($row = mysql_fetch_row($query_result)) {
USERERROR("The experiment name \"$exp_id\" you have chosen is already ".
"in use in project $exp_pid. Please select another.", 1);
}
#
# Next, is this person a member of the project specified, and is the trust
# equal to group or local root?
#
$query_result = mysql_db_query($TBDBNAME,
"SELECT * FROM proj_memb WHERE pid=\"$exp_pid\" and uid=\"$uid\"");
if (($row = mysql_fetch_array($query_result)) == 0) {
USERERROR("You are not a member of Project $exp_pid, so you cannot begin ".
"an experiment in that project.", 1);
}
$trust = $row[trust];
if (strcmp($trust, "group_root") && strcmp($trust, "local_root")) {
USERERROR("You are not group or local root in Project $exp_pid, so you ".
"cannot begin an experiment in that project.", 1);
}
#
# We need the unix gid for the project for running the scripts below.
#
$query_result = mysql_db_query($TBDBNAME,
"SELECT unix_gid from projects where pid=\"$exp_pid\"");
if (($row = mysql_fetch_row($query_result)) == 0) {
TBERROR("Database Error: Getting GID for project $exp_pid.", 1);
}
$gid = $row[0];
#
# Create a temporary file with the goo in it.
#
$tmpfname = tempnam( "/tmp", "batch-$pid-$eid" );
$fp = fopen($tmpfname, "w");
if (! $fp) {
TBERROR("Opening temporary file $tmpfname.", 1);
}
#
# XXX The batchexp script parses this file, so if you change something
# here, go change it there too!
#
fputs($fp, "EID: $exp_id\n");
fputs($fp, "PID: $exp_pid\n");
fputs($fp, "name: $exp_name\n");
fputs($fp, "expires: $exp_expires\n");
fputs($fp, "nsfile: $exp_nsfile\n");
fclose($fp);
#
# XXX
# Set the permissions on the files so that the scripts can get to them.
# It is owned by nobody, and most likely protected. This leaves the
# script open for a short time. A potential security hazard we should
# deal with at some point, but since the files are on paper:/tmp, its
# a minor problem.
#
chmod($tmpfname, 0666);
chmod($exp_nsfile, 0666);