Commit b4e203a5 authored by Leigh Stoller's avatar Leigh Stoller

A bunch of changes for experiment runs ... the central change is to

use loghole to capture the log files and store them in the experiment
archive before the commit point between runs (and at swapout).

Some other small unrelated changes as well.
parent 359118f0
......@@ -27,6 +27,7 @@ use overload ('""' => 'Stringify');
my $TB = "@prefix@";
my $MD5 = "/sbin/md5";
my $makegraph = "$TB/bin/template_graph";
my $TEVC = "$TB/bin/tevc";
# Cache of template instances to avoid regenerating them.
my %templates = ();
......@@ -1144,6 +1145,7 @@ use libdb;
use libtestbed;
use libtblog;
use English;
use libArchive;
use overload ('""' => 'Stringify');
#
......@@ -1696,6 +1698,36 @@ sub NewRunBinding($$$)
return 0;
}
#
# Use tevc to tell loghole to sync. We have to send a bunch of extra args
# to get loghole to do what we want.
#
sub LogHole($)
{
my ($self) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $pid = $self->pid();
my $eid = $self->eid();
my $archivedir = libArchive::TBUserFileArchiveDirectory($pid, $eid);
my $logdir = "$archivedir/logs";
if (! -e $logdir) {
mkdir($logdir)
or return -1;
}
system("$TEVC -w -t 60 -e $pid/$eid now ns SNAPSHOT ".
" LOGHOLE_ARGS='-l $logdir -P -s'") == 0
or return -1;
return 0;
}
# _Always_ make sure that this 1 is at the end of the file...
1;
......@@ -13,12 +13,12 @@ use Getopt::Std;
sub usage()
{
print STDERR
"Usage: archive_control [-f] [-t tag] [-m file] commit <pid> <eid>\n".
"Usage: archive_control [-f] [-t tag [-u]] [-m file] commit <pid> <eid>\n".
" archive_control [-f] [-a] addfile <pid> <eid> [files ...]\n".
" archive_control checktag <pid> <eid> <tag>\n";
exit(-1);
}
my $optlist = "dfat:m:";
my $optlist = "dfat:m:u";
my $debug = 0;
my $force = 0;
my $dbuid;
......@@ -150,18 +150,10 @@ if ($expstate ne EXPTSTATE_ACTIVE &&
# Allow the user to force a commit of the archive.
#
if ($action eq "commit") {
my $tag = "user_commit";
my $tag = "commit";
my $usertag = 0;
my $mfile = undef;
#
# Audit this operation for now.
#
if (AuditStart(0)) {
#
# Parent exits normally
#
exit(0);
}
if (defined($options{"t"})) {
$tag = $options{"t"};
......@@ -172,7 +164,12 @@ if ($action eq "commit") {
# Force a taint check; the library will escape it for the shell.
$tag =~ /(.*)/;
$tag = $1;
if (defined($options{"u"})) {
$usertag = 1;
}
}
if (defined($options{"m"})) {
#
# Argument is a pathname to a tempfile.
......@@ -221,7 +218,7 @@ if ($action eq "commit") {
# And commit the archive.
print "Doing a commit on the experiment archive ...\n";
if (libArchive::TBCommitExperimentArchive($pid, $eid, $tag,
1, $mfile) < 0) {
$usertag, $mfile) < 0) {
fatal("Failed to commit experiment archive!");
}
}
......@@ -300,6 +297,14 @@ elsif ($action eq "list" ||
}
if ($action eq "missing") {
#
# This program asks the nodes for the trace results.
#
if ($expstate ne EXPTSTATE_ACTIVE) {
print "Getting files accessed via NFS.\n";
system("$NFSTRACE transfer $pid $eid");
}
#
# Figure out what files were accessed by NFS, but are not in
# archive directory.
......
......@@ -495,7 +495,7 @@ sub ArchiveSavePoint($;$$$)
}
mysystem("$IMPORTER -no_user_input file://$repodir ".
" $view/savepoint . " .
($debug < 2 ? "> /dev/null" : ""))
($debug < 2 ? "> /dev/null 2>&1" : ""))
== 0 or goto bad;
#
......
......@@ -596,7 +596,7 @@ sub RebootNode {
if ($syspid) {
local $SIG{ALRM} = sub { kill("TERM", $syspid); };
alarm 20;
alarm 30;
waitpid($syspid, 0);
alarm 0;
......
......@@ -32,7 +32,7 @@ sub usage()
"-a <action> -e <eid> <guid/vers>\n".
"switches and arguments:\n".
"-a <action> - start or stop\n".
"-w - wait for template to be instantiated\n".
"-w - wait for run to start\n".
"-q - be less chatty\n".
"-E <str> - A pithy sentence describing the run\n".
"-r <runid> - A token ... we will make on up for you\n".
......@@ -41,11 +41,11 @@ sub usage()
"<guid/vers> - GUID and version to swapin\n");
exit(-1);
}
my $optlist = "qwp:E:a:r:e:";
my $optlist = "qwp:E:a:r:e:d";
my %options = ();
my $quiet = 0;
my $waitmode = 0;
my $foreground = 0;
my $debug = 0;
my $paramfile;
my %parameters = ();
my $action;
......@@ -71,6 +71,7 @@ my $CONTROL = "@USERNODE@";
# Locals
my $user_name;
my $user_email;
my $logname;
my $dbuid;
my $pid;
my $exptidx;
......@@ -97,6 +98,7 @@ use lib "@prefix@/lib";
use libdb;
use libtestbed;
use libtblog;
use libArchive;
use Template;
# Be careful not to exit on transient error
......@@ -234,10 +236,81 @@ if (defined($paramfile)) {
if ($donebad);
}
#
# Catch this so we can clean up.
#
$SIG{TERM} = \&sighandler;
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $debug) {
$logname = TBExptCreateLogFile($pid, $eid, "newrun");
TBExptSetLogFile($pid, $eid, $logname);
TBExptOpenLogFile($pid, $eid);
if (my $childpid = TBBackGround($logname)) {
#
# Parent exits normally, unless in waitmode. We have to set
# justexit to make sure the END block below does not run.
#
$justexit = 1;
if (!$waitmode) {
print("A new run is being started for $pid/$eid.\n")
if (! $quiet);
exit(0);
}
print("Waiting for new run to start.\n")
if (! $quiet);
if (-t STDIN && !$quiet) {
print("You may type ^C at anytime.".
"\n".
"You will not actually interrupt the experiment itself.\n");
}
# Give child a chance to run.
select(undef, undef, undef, 0.25);
#
# Reset signal handlers. User can now kill this process, without
# stopping the child.
#
$SIG{TERM} = 'DEFAULT';
$SIG{INT} = 'DEFAULT';
$SIG{QUIT} = 'DEFAULT';
#
# Wait until child exits or until user gets bored and types ^C.
#
waitpid($childpid, 0);
print("Done. Exited with status: $?\n")
if (! $quiet);
exit($? >> 8);
}
TBdbfork();
}
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
POSIX::setsid();
}
# This sets the stop time.
$instance->StopCurrentRun() == 0
or fatal(-1, "Could not stop experiment run for $instance!");
# This runs loghole.
print "Asking loghole to sync the logfiles ... this will take a minute.\n";
$instance->LogHole() == 0
or fatal(-1, "Loghole failed");
#
# Commit the archive.
#
......@@ -278,10 +351,16 @@ if ($paramfile) {
#
# Restart the event stream from the beginning.
#
print "Asking the event system to replay events ...\n";
system("$eventcontrol replay $pid $eid") == 0
or fatal(-1, "Could not restart the event system!");
done:
# Stop the web interface from spewing.
TBExptCloseLogFile($pid, $eid)
if (defined($logname));
exit(0);
#
......@@ -364,6 +443,9 @@ sub ParseArgs()
if (defined($options{"w"})) {
$waitmode = 1;
}
if (defined($options{"d"})) {
$debug = 1;
}
if (defined($options{"E"})) {
if (! TBcheck_dbslot($options{"E"},
"experiment_templates", "description",
......@@ -433,6 +515,10 @@ sub fatal($$)
tberror $msg;
tbinfo "Cleaning up and exiting with status $errorstat ...";
# Stop the web interface from spewing.
TBExptCloseLogFile($pid, $eid)
if (defined($logname));
#
# This exit will drop into the END block below.
#
......@@ -446,7 +532,7 @@ sub sighandler ($) {
my $pgrp = getpgrp(0);
kill('TERM', -$pgrp);
sleep(1);
fatal(-1, "Caught SIG${signame}! Killing experiment setup ...");
fatal(-1, "Caught SIG${signame}!");
}
END {
......
......@@ -238,6 +238,11 @@ if ($waitmode) {
$instance->Stop() == 0
or fatal(-1, "Could not stop experiment instance!");
# This runs loghole.
print "Asking loghole to sync the logfiles ... this will take a minute.\n";
$instance->LogHole() == 0
or fatal(-1, "Loghole failed");
#
# Now do the swapout.
#
......
......@@ -231,7 +231,7 @@ if ($retval) {
}
SUEXEC($uid, "$pid,$gid",
"webarchive_control $tagarg $message commit $pid $eid",
"webarchive_control $tagarg $message -u commit $pid $eid",
SUEXEC_ACTION_DIE);
header("Location: archive_view.php3/$exptidx/?exptidx=$exptidx");
......
......@@ -80,9 +80,11 @@ function STARTWATCHER($pid, $eid)
echo "<script type='text/javascript' language='javascript'
src='showexp.js'></script>\n";
$currentstate = TBExptState($pid, $eid);
echo "<script type='text/javascript' language='javascript'>\n";
sajax_show_javascript();
echo "StartStateChangeWatch('$pid', '$eid', 'unimportant');\n";
echo "StartStateChangeWatch('$pid', '$eid', '$currentstate');\n";
echo "</script>\n";
}
......
......@@ -469,7 +469,7 @@ echo "</script>\n";
echo "<center>\n";
echo "<b>Starting template creation!</b> ...<br>\n";
echo "This will take a few moments; please be patient.<br>\n";
echo "<br><br>\n";
echo "<br>\n";
echo "<img id='busy' src='busy.gif'><span id='loading'> Working ...</span>";
echo "<br><br>\n";
echo "</center>\n";
......
......@@ -259,14 +259,13 @@ else {
#
# Description:
#
if (!isset($formfields[description]) || $formfields[description] == "") {
$errors["Description"] = "Missing Field";
}
elseif (!TBvalid_template_description($formfields[description])) {
if (isset($formfields[description]) && $formfields[description] != "") {
if (!TBvalid_template_description($formfields[description])) {
$errors["Description"] = TBFieldErrorString();
}
else {
}
else {
$command_options .= " -E " . escapeshellarg($formfields[description]);
}
}
#
......@@ -358,9 +357,9 @@ echo "</script>\n";
echo "<center>\n";
echo "<b>Starting experiment run!</b> ...<br>\n";
echo "This will take a few moments; please be patient.<br>\n";
echo "<br><br>\n";
echo "<br>\n";
echo "<img id='busy' src='busy.gif'><span id='loading'> Working ...</span>";
echo "<br><br>\n";
echo "<br>\n";
echo "</center>\n";
flush();
......@@ -394,15 +393,7 @@ if ($retval) {
return;
}
echo "<script type='text/javascript' language='javascript'>\n";
echo "PageReplace('template_show.php?guid=$guid&version=$version');\n";
echo "</script>\n";
#
# In case the above fails.
#
echo "<center><b>Done!</b></center>";
echo "<br><br>\n";
STARTLOG($pid, $eid);
#
# Standard Testbed Footer
......
......@@ -242,7 +242,7 @@ TBGroupUnixInfo($pid, $gid, $unix_gid, $unix_name);
echo "<center>\n";
echo "<b>Starting template modification!</b> ...<br>\n";
echo "This will take a few moments; please be patient.<br>\n";
echo "<br><br>\n";
echo "<br>\n";
echo "<img id='busy' src='busy.gif'><span id='loading'> Working ...</span>";
echo "<br><br>\n";
echo "</center>\n";
......
......@@ -521,14 +521,13 @@ if (count($parameter_masterlist)) {
#
# Description:
#
if (!isset($formfields[description]) || $formfields[description] == "") {
$errors["Description"] = "Missing Field";
}
elseif (!TBvalid_template_description($formfields[description])) {
if (isset($formfields[description]) && $formfields[description] != "") {
if (!TBvalid_template_description($formfields[description])) {
$errors["Description"] = TBFieldErrorString();
}
else {
}
else {
$command_options .= " -E " . escapeshellarg($formfields[description]);
}
}
#
......@@ -571,9 +570,9 @@ echo "</script>\n";
echo "<center>\n";
echo "<b>Starting template instantiation!</b> ...<br>\n";
echo "This will take several minutes; please be patient.<br>\n";
echo "<br><br>\n";
echo "<br>\n";
echo "<img id='busy' src='busy.gif'><span id='loading'> Working ...</span>";
echo "<br><br>\n";
echo "<br>\n";
echo "</center>\n";
flush();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment