Commit 533dc18f authored by Leigh Stoller's avatar Leigh Stoller

Reorg of working directory and log file stuff for start/swap/end

experiment. Here is mail to tbops:

* Moved the working directory for experiment setup/swap/end to a new
  directory located on boss instead of over NFS to /proj/$pid/$eid. This
  new location is /usr/testbed/expwork/$pid/$eid.

* Changed the name of the directories we create in /usr/testbed/expinfo to
  $pid-$eid.$index where $index is a new autoincrement field in the DB
  table. I really hated the names that were created before.

* Changed where logs are written from /tmp to the new location in
  /usr/testbed/expwork/$pid/$eid.

Okay, why.

* We no longer operate on NFS mounted directories that might hang. Its
  easier to catch the situation where a copy of the log file over at the
  end of experiment creation fails cause of an NFS problem.

* We no longer have user writable files that are inputs to other parts of
  the system (like top and ptop files).  Not that a user would be bad, but
  it closes a hole.

* We no longer copy user writable files from /proj to boss where we might
  fill up an important filesystem cause the user put a .ndz file in the the
  working directory. Not that a user would be bad, but it closes a hole.

* Its easier to save all the log files this way, for each swap in and
  out.

* Removing a directory over NFS is a royal irritant when someone is CD'ed
  into that directory or looking at a file on the other side (the astute
  observer will peg this as the reason I went down this idiotic path in the
  first place!).

* About 6 other reasons that I can no longer remember. Seriously, I really
  had more reasons I can no longer remember! :-)
parent 1d14a4dc
......@@ -57,6 +57,7 @@ install-mkdirs:
-mkdir -p $(INSTALL_TOPDIR)/lists
-mkdir -p $(INSTALL_TOPDIR)/backup
-mkdir -p $(INSTALL_TOPDIR)/etc
-mkdir -p $(INSTALL_TOPDIR)/expwork
-mkdir -p $(INSTALL_TOPDIR)/batch
-chmod 777 $(INSTALL_TOPDIR)/batch
-mkdir -p $(INSTALL_TOPDIR)/expinfo
......
......@@ -82,6 +82,7 @@ use Exporter;
TBDB_NODEOPMODE_NORMAL TBDB_NODEOPMODE_DELAYING
TBDB_NODEOPMODE_UNKNOWNOS TBDB_NODEOPMODE_RELOADING
TBDB_NODEOPMODE_UNKNOWN
TBDB_EXPT_WORKDIR
TBSetNodeEventState TBGetNodeEventState
TBSetNodeOpMode TBGetNodeOpMode
......@@ -98,6 +99,7 @@ use Exporter;
TBIsNodeVirtual TBControlNetIP TBPhysNodeID
TBExptOpenLogFile TBExptCloseLogFile TBExptCreateLogFile
TBNodeUpdateAccountsByPid TBNodeUpdateAccountsByType
TBSaveExpLogFiles TBExptWorkDir TBExptUserDir TBExptLogDir
TBDB_WIDEAREA_LOCALNODE
TBWideareaNodeID
......@@ -178,6 +180,11 @@ sub TBdbfork()
#
my $DBErrorString = "";
#
# Needs to be config'ed.
#
sub TBDB_EXPT_WORKDIR() { "/usr/testbed/expwork"; }
#
# Define exported "constants". Basically, these are just perl subroutines
# that look like constants cause you do not need to call a perl subroutine
......@@ -1717,17 +1724,66 @@ sub TBExptGetLogFile($$$$)
return 0;
}
#
# Return the working directory name for an experiment. This is where
# the scripts work. The logs are copied over to the user's version of
# the directory later.
#
sub TBExptLogDir($$)
{
my($pid, $eid) = @_;
$query_result =
DBQueryFatal("select idx from experiments ".
"where pid='$pid' and eid='$eid'");
my ($idx) = $query_result->fetchrow_array;
return "$TB/expinfo/${pid}-${eid}.${idx}";
}
#
# Return the log directory name for an experiment. This is where
# we keep copies of the files for later inspection.
#
sub TBExptWorkDir($$)
{
my($pid, $eid) = @_;
return TBDB_EXPT_WORKDIR() . "/${pid}/${eid}";
}
#
# Return the user's experiment directory name. This is a path in the /proj
# tree. We keep these separate to avoid NFS issues, and users generally
# messing with things they should not (by accident or otherwise).
#
sub TBExptUserDir($$)
{
my($pid, $eid) = @_;
$query_result =
DBQueryFatal("select path from experiments ".
"where pid='$pid' and eid='$eid'");
my ($path) = $query_result->fetchrow_array;
return $path;
}
#
# Create a temp logfile name for an experiment, create it, and untaint it!
# Note that we create the file in /tmp initially, mostly as a convenience,
# and then move it into the experiment directory later.
# The file is created the experiment working directory and moved later
# to the user visible directory.
#
sub TBExptCreateLogFile($$$)
{
my($pid, $eid, $prefix) = @_;
my $logdir;
my $logname;
$logname = `mktemp /tmp/${prefix}-${pid}-${eid}.XXXXXX`;
$logdir = TBExptWorkDir($pid, $eid);
$logname = `mktemp $logdir/${prefix}.XXXXXX`;
if ($logname =~ /^([-\@\w\.\/]+)$/) {
$logname = $1;
......@@ -2098,5 +2154,20 @@ sub MapNodeOSID($$)
return $n_osid;
}
#
# Save off the log files for an experiment.
#
sub TBSaveExpLogFiles($$)
{
my($pid, $eid) = @_;
my $workdir = TBExptWorkDir($pid, $eid);
my $logdir = TBExptLogDir($pid, $eid);
system("/bin/cp -Rpf $workdir/ $logdir");
return 1;
}
1;
......@@ -39,7 +39,6 @@ my $startexp = "$TB/bin/startexp";
my $tbdata = "tbdata";
my $immediate= 0;
my $frontend = 0;
my $dirname;
my $dbuid;
my @row;
......@@ -156,26 +155,19 @@ if (system("$mkexpdir $pid $gid $eid") != 0) {
}
#
# Grab that path from the DB (set by mkexpdir).
# Grab the working directory path, and thats where we work.
# The user's experiment directory is off in /proj space.
#
$query_result =
DBQueryWarn("select path from experiments ".
"where pid='$pid' and eid='$eid'");
if (! $query_result ||
! $query_result->numrows) {
fatal("Unexpected DB Error! Experiment $pid/$eid does not exist!");
}
@row = $query_result->fetchrow_array();
$dirname = $row[0];
my $workdir = TBExptWorkDir($pid, $eid);
chdir("$dirname/$tbdata") or
fatal("Could not chdir to $dirname/$tbdata: $!");
chdir("$workdir") or
fatal("Could not chdir to $workdir: $!");
#
# Now we can get the NS file!
#
if (system("/bin/cp", "$tempnsfile", "$nsfile")) {
fatal("Could not copy $tempnsfile to $dirname/$nsfile");
fatal("Could not copy $tempnsfile to $workdir/$nsfile");
}
chmod(0770, "$nsfile");
......@@ -219,9 +211,8 @@ if ($immediate) {
#
# Save a copy of the failed experiment directory for debugging.
#
system("/bin/rm", "-rf", "${dirname}-TBfailed");
system("/bin/mv", "-f", "${dirname}", "${dirname}-TBfailed");
system("/bin/rm", "-rf", "${workdir}-TBfailed");
system("/bin/mv", "-f", "${workdir}", "${workdir}-TBfailed");
fatal("Failed to start experiment $pid/$eid!");
}
......@@ -255,8 +246,8 @@ sub fatal($)
DBQueryWarn("DELETE from experiments ".
"WHERE eid='$eid' and pid='$pid'");
if (defined($dirname)) {
system("/bin/rm", "-rf", "$dirname");
if (defined($workdir)) {
system("/bin/rm", "-rf", "$workdir");
}
exit(-1);
}
......
......@@ -100,6 +100,8 @@ if ($eid =~ /^([-\@\w.]+)$/) {
else {
die("Tainted argument $eid!\n");
}
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
#
# Verify user and get his DB uid.
......@@ -248,7 +250,7 @@ if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
$logname = TBExptCreateLogFile($pid, $eid, "end");
$logname = TBExptCreateLogFile($pid, $eid, "endexp");
TBExptSetLogFile($pid, $eid, $logname);
TBExptOpenLogFile($pid, $eid);
......@@ -294,6 +296,14 @@ if ($estate ne EXPTSTATE_TERMINATED && $estate ne EXPTSTATE_NEW) {
fatal("Experiment is in the wrong state: $estate\n");
}
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);
# Terminate the log so the web page stops spewing.
TBExptClearLogFile($pid, $eid);
#
# Cleanup DB state and remove directory.
#
......@@ -313,22 +323,15 @@ if ($batch) {
#
my $message =
"Experiment `$eid' in project `$pid' has been terminated.\n" .
"You may now reuse `$eid' as an experiment name.\n\n" .
"Appended below is the output of the experiment teardown. If you\n" .
"have any questions or comments, please include the output below\n" .
"in your message to $TBOPS\n";
"You may now reuse `$eid' as an experiment name.\n";
SENDMAIL("$user_name <$user_email>",
"Experiment $pid/$eid Terminated",
$message,
"$user_name <$user_email>",
"Cc: $expt_head_name <$expt_head_email>\n".
"Bcc: $TBLOGS",
($logname));
"Bcc: $TBLOGS");
if (defined($logname)) {
unlink("$logname");
}
exit 0;
sub fatal($)
......@@ -343,6 +346,9 @@ sub fatal($)
DBQueryWarn("update experiments set expt_locked=NULL ".
"WHERE eid='$eid' and pid='$pid'");
# Copy over the log files so the user can see them.
system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
#
# In batch mode, exit without sending the email.
#
......@@ -350,6 +356,13 @@ sub fatal($)
exit(-1);
}
#
# Clear the log file so the web page stops spewing.
#
if (defined($logname)) {
TBExptClearLogFile($pid, $eid);
}
#
# Send a message to the testbed list. Append the logfile.
#
......@@ -361,20 +374,6 @@ sub fatal($)
"Bcc: $TBOPS",
($logname));
#
# Move the temporary log file into the experiment directory so its kept.
#
if (defined($logname)) {
#
# Move the temporary log file into the experiment directory and
# change the name in the DB. This makes it available to the web
# interface later on if desired.
#
system("cp -pf $logname $expt_path/tbdata/endexp.log");
TBExptCloseLogFile($pid, $eid);
TBExptSetLogFile($pid, $eid, "$expt_path/tbdata/endexp.log");
unlink("$logname");
}
exit(-1);
}
......@@ -387,9 +386,13 @@ sub ExptCleanup()
# in which case it won't be allowed cause of directory permissions. Thats
# okay since admin types should rarely end experiments in other projects.
#
print STDOUT "Removing experiment directory: $expt_path ... \n";
if (system("rm -rf $expt_path")) {
print "*** WARNING: Not able to remove experiment directory.\n";
print STDOUT "Removing experiment directories ... \n";
if (system("/bin/rm -rf $userdir")) {
print "*** WARNING: Not able to remove $userdir\n";
print " Someone will need to do this by hand.\n";
}
if (system("/bin/rm -rf $workdir")) {
print "*** WARNING: Not able to remove $workdir\n";
print " Someone will need to do this by hand.\n";
}
......
......@@ -86,6 +86,7 @@ else {
my $piddir = "$projroot/$pid";
my $expdir = "$piddir/exp";
my $eiddir = "$expdir/$eid";
my $workdir = TBExptWorkDir($pid, $eid);
#
# Unix info for the group
......@@ -150,16 +151,24 @@ foreach $dir (@dirlist) {
DBQueryFatal("update experiments set path='$eiddir' ".
"where pid='$pid' and eid='$eid'");
#
# Create the working directory.
#
if (-e $workdir) {
fatal("*** $0:\n".
" $workdir already exists!\n");
}
if (! mkdir($workdir, 0775)) {
fatal("*** $0:\n".
" Could not create $workdir: $!\n");
}
exit(0);
sub fatal()
{
if (! chdir($expdir)) {
print STDOUT "In Fatal: Could not chdir to $expdir!\n";
exit(-1);
}
system("rm -r $eid");
system("rm -r $eiddir");
system("rm -r $workdir");
exit(-1);
}
......@@ -197,6 +197,21 @@ if (! chown($uid, $gid, "$GRPROOT/$pid")) {
fatal("Could not chown $GRPROOT/$pid to $uid/$gid: $!");
}
#
# Create experiment working directory.
#
my $workdir = TBExptWorkDir($pid, $eid);
if (! mkdir("$workdir", 0770)) {
fatal("Could not make directory $workdir: $!");
}
if (! chmod(0770, "$workdir")) {
fatal("Could not chmod directory $workdir: $!");
}
if (! chown($uid, $gid, "$workdir")) {
fatal("Could not chown $workdir to $uid/$gid: $!");
}
exit(0);
sub fatal {
......
......@@ -112,6 +112,30 @@ if (! chown(0, 0, $newname)) {
die("*** Could not chown directory $newname to 0/0: $!\n");
}
#
# Ditto for the experiment working directory.
#
my $workdir = TBDB_EXPT_WORKDIR();
if (! chdir($workdir)) {
die("*** Could not chdir to $workdir: $!\n");
}
if (! -e $pid) {
die("*** Project directory '$pid' does not exist in $workdir!\n");
}
if (! rename($pid, $newname)) {
die("*** Could not rename $pid to $newname in $workdir: $!\n");
}
if (! chmod(0700, $newname)) {
die("*** Could not chmod directory $newname to 0700: $!\n");
}
if (! chown(0, 0, $newname)) {
die("*** Could not chown directory $newname to 0/0: $!\n");
}
#
# Now remove the group from the group file on both plastic and paper.
#
......
......@@ -147,9 +147,11 @@ else {
}
if (!defined($logname)) {
$logname = TBExptCreateLogFile($pid, $eid, "start");
$logname = TBExptCreateLogFile($pid, $eid, "startexp");
}
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
my $repfile = "$eid.report";
my $user_name;
my $user_email;
......@@ -204,9 +206,9 @@ if ($estate ne EXPTSTATE_NEW) {
" Experiment $pid/$eid is already configured (or configuring)!\n");
}
if (! chdir("$expt_path/$tbdata")) {
if (! chdir($workdir)) {
die("*** $0:\n".
" Could not chdir to $expt_path/$tbdata: $!\n");
" Could not chdir to $workdir: $!\n");
}
#
......@@ -305,24 +307,19 @@ if ($batch) {
print STDOUT "\n\n";
}
# Yippie!
print STDOUT "Setup Success\n";
#
# Try to copy off the files for testbed information gathering.
#
my $infodir = "$pid-$eid-" . TBDateTimeFSSafe();
if (mkdir("$TBINFO/$infodir", 0770)) {
system("cp $nsfile $TBINFO/$infodir");
system("cp $logname $TBINFO/$infodir/$eid.log");
if (-e "assign.log") {
system("cp *.ptop *.top $TBINFO/$infodir");
system("cp assign.log $TBINFO/$infodir");
}
if (-e "wanassign.log") {
system("cp wanassign.log $TBINFO/$infodir");
}
}
TBSaveExpLogFiles($pid, $eid);
print STDOUT "Setup Success\n";
#
# Make a copy of the work dir in the user visible space so the user
# can see the log files.
#
system("cp -Rfp $workdir/ $userdir/tbdata");
#
# In batch mode, just exit without sending email.
......@@ -331,6 +328,11 @@ if ($batch) {
exit(0);
}
#
# Close up the log file so the webpage stops.
#
TBExptCloseLogFile($pid, $eid);
#
# Dump the report file and the log file to the user via email.
#
......@@ -363,17 +365,8 @@ SENDMAIL("$user_name <$user_email>",
"Bcc: $TBLOGS",
($repfile, $logname, $nsfile));
#
# Move the temporary log file into the experiment directory and change the
# name in the DB. This makes it available to the web interface later on
# if desired.
#
system("cp -pf $logname $expt_path/tbdata/startexp.log");
TBExptCloseLogFile($pid, $eid);
TBExptSetLogFile($pid, $eid, "$expt_path/tbdata/startexp.log");
unlink($logname);
exit 0;
# Done!
exit(0);
sub fatal()
{
......@@ -398,6 +391,9 @@ sub fatal()
exit($errorstat);
}
# Clear the logfile so the webpage stops.
TBExptClearLogFile($pid, $eid);
#
# Send a message to the testbed list.
#
......@@ -422,22 +418,15 @@ sub fatal()
"WHERE eid='$eid' and pid='$pid'");
#
# Move the temporary log file into the experiment directory so its kept.
#
system("cp -pf $logname $expt_path/tbdata/startexp.log");
TBExptClearLogFile($pid, $eid);
#
# Save a copy of the failed experiment directory for debugging.
# This must be done *after* sending email so that the nsfile and
# log files are where the mail function expects them to be.
# Copy off the workdir to the user directory, Then back up both of
# them for post-mortem debugging.
#
if (defined($expt_path)) {
system("/bin/rm", "-rf", "${expt_path}-TBfailed");
system("/bin/mv", "-f", "${expt_path}", "${expt_path}-TBfailed");
}
system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
system("/bin/rm -rf ${workdir}-failed");
system("/bin/mv -f $workdir ${workdir}-failed");
system("/bin/rm -rf ${userdir}-failed");
system("/bin/mv -f $userdir ${userdir}-failed");
unlink($logname);
exit($errorstat);
}
......
......@@ -18,10 +18,10 @@ use Getopt::Std;
#
sub usage()
{
print STDOUT "Usage: swapexp [-b] <-s in | out> <pid> <eid>\n";
print STDOUT "Usage: swapexp <-s in | out> <pid> <eid>\n";
exit(-1);
}
my $optlist = "bs:";
my $optlist = "s:";
#
# Configure variables
......@@ -110,6 +110,8 @@ else {
die("Tainted argument $eid!\n");
}
my $repfile = "$eid.report";
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
#
# Verify user and get his DB uid.
......@@ -160,9 +162,9 @@ my $expt_path = $hashrow{'path'};
my $isbatchexpt = $hashrow{'batchmode'};
my $ebatchstate = $hashrow{'batchstate'};
if (! chdir("$expt_path/$tbdata")) {
if (! chdir($workdir)) {
die("*** $0:\n".
" Could not chdir to $expt_path/$tbdata: $!\n");
" Could not chdir to $workdir: $!\n");
}
if (defined($hashrow{'expt_locked'})) {
......@@ -237,7 +239,7 @@ if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
$logname = TBExptCreateLogFile($pid, $eid, "end");
$logname = TBExptCreateLogFile($pid, $eid, "swapexp");
TBExptSetLogFile($pid, $eid, $logname);
TBExptOpenLogFile($pid, $eid);
......@@ -286,24 +288,19 @@ else {
}
system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
}
#
# Try to copy off the files for testbed information gathering.
#
my $infodir = "$pid-$eid-" . TBDateTimeFSSafe();
if (mkdir("$TBINFO/$infodir", 0770)) {
system("cp $logname $TBINFO/$infodir/$eid.log");
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);
if (-e "assign.log") {
system("cp *.ptop *.top $TBINFO/$infodir");
system("cp assign.log $TBINFO/$infodir");
}
if (-e "wanassign.log") {
system("cp wanassign.log $TBINFO/$infodir");
}
}
}
#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
system("cp -Rfp $workdir/ $userdir/tbdata");
#
# Must unlock before exit.
......@@ -319,6 +316,13 @@ if ($batch) {
exit(0);
}
#
# Clear the log file so the web page stops spewing.
#
if (defined($logname)) {
TBExptCloseLogFile($pid, $eid);
}
#
# Send email notification to user.
#
......@@ -336,17 +340,6 @@ SENDMAIL("$user_name <$user_email>",
"Bcc: $TBLOGS",
($repfile, $logname));
if (defined($logname)) {
#
# Move the temporary log file into the experiment directory and
# change the name in the DB. This makes it available to the web
# interface later on if desired.
#
system("cp -pf $logname $expt_path/tbdata/swapexp.log");
TBExptCloseLogFile($pid, $eid);
TBExptSetLogFile($pid, $eid, "$expt_path/tbdata/swapexp.log");
unlink("$logname");
}
exit 0;
sub fatal($)
......@@ -361,6 +354,9 @@ sub fatal($)
DBQueryWarn("update experiments set expt_locked=NULL ".
"WHERE eid='$eid' and pid='$pid'");
# Copy over the log files so the user can see them.
system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
#
# In batch mode, exit without sending the email.
#
......@@ -369,6 +365,13 @@ sub fatal($)
exit(-1);
}
#
# Clear the log file so the web page stops spewing.
#
if (defined($logname)) {
TBExptCloseLogFile($pid, $eid);
}
#
# Send a message to the testbed list. Append the logfile.
#
......@@ -380,17 +383,6 @@ sub fatal($)
"Cc: $TBOPS",
($logname));
if (defined($logname)) {
#
# Move the temporary log file into the experiment directory and
# change the name in the DB. This makes it available to the web
# interface later on if desired.
#
system("cp -pf $logname $expt_path/tbdata/swapexp.log");
TBExptCloseLogFile($pid, $eid);
TBExptSetLogFile($pid, $eid, "$expt_path/tbdata/swapexp.log");
unlink("$logname");
}
exit(-1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment