Commit 087dbfff authored by Leigh Stoller's avatar Leigh Stoller

A bunch of template changes resulting from meetings last week.

* Add XMLRPC interface for template swapin,stoprun,startrun,swapout and
  add the appropriate wrappers to the script_wrapper on ops.

* Allow parameter descriptions in NS files. This is probably not in its
  final form since its a bit confusing as to what has priority; something
  in the NS file or a metadata item. Anyway, you can do this in your NS
  file:

	$ns define-template-parameter GUID "0/0" "The GUID to be analyzed"

  The rules are currently that the NS file description has priority and
  is copied to child templates, unless the user has modified a description
  via the web interface, in which case the NS file description is ignored.
  I know, sounds awful, but for the most part people are going to use the
  NS file anyway.

* Add "clear" option when starting a new experiment run; the per
  experiment DB at the logholes are cleared. Note that this is *not* the
  default behaviour; you have to either check the checkbox on the web form
  or use the -c option to the script wrapper, or clear=yes if talking
  directly to the XMLRPC server.

* Fix up how email is generated for template_swapin and template_create,
  so that Kevin can debug tblog/tbreport stuff, but also so that we maintain
  mail logs as before. I have made some improvements to libaudit so as to
  centralize the mail goo, and avoid duplicating all that stuff.

* Minor fixes to the program agent so that the new environment strings are
  sent before the program agent exits and reloads them!

* Other minor little things.
parent ceee0c13
......@@ -1037,6 +1037,10 @@ startrun_callback(event_handle_t handle,
return;
}
/*
* XXX Both of these need to send completion events
*/
if (strcmp(event, TBDB_EVENTTYPE_STOP) == 0) {
info("startrun_callback: Got a stop event.\n");
......@@ -1049,6 +1053,11 @@ startrun_callback(event_handle_t handle,
stop_program(pinfo, NULL);
}
}
return;
}
if (strcmp(event, TBDB_EVENTTYPE_RELOAD) == 0) {
info("startrun_callback: Got a reload event.\n");
/*
* Wrapper will restart us.
*/
......
......@@ -2895,6 +2895,7 @@ CREATE TABLE virt_parameters (
eid varchar(32) NOT NULL default '',
name varchar(64) NOT NULL default '',
value tinytext,
description text,
PRIMARY KEY (pid,eid,name)
) TYPE=MyISAM;
......
......@@ -792,6 +792,7 @@ REPLACE INTO table_regex VALUES ('virt_parameters','pid','text','redirect','proj
REPLACE INTO table_regex VALUES ('virt_parameters','eid','text','redirect','experiments:eid',0,0,NULL);
REPLACE INTO table_regex VALUES ('virt_parameters','name','text','regex','^\\w[-\\w]+$',1,64,NULL);
REPLACE INTO table_regex VALUES ('virt_parameters','value','text','redirect','default:tinytext',0,256,NULL);
REPLACE INTO table_regex VALUES ('virt_parameters','description','text','redirect','default:text',0,1024,NULL);
REPLACE INTO table_regex VALUES ('experiment_template_instance_bindings','name','text','regex','^\\w[-\\w]+$',1,64,NULL);
REPLACE INTO table_regex VALUES ('experiment_template_instance_bindings','value','text','redirect','default:tinytext',0,256,NULL);
REPLACE INTO table_regex VALUES ('experiment_runs','runid','text','redirect','experiments:eid',0,0,NULL);
......
......@@ -3674,3 +3674,10 @@ last_net_act,last_cpu_act,last_ext_act);
change latitude latitude float default NULL,
change longitude longitude float default NULL;
4.87: Add description field to virt_parameters so that we can pass
through a default description from the NS file for parameters.
**** Skip this stuff below if you just did 4.41 above.
alter table virt_parameters add description text;
......@@ -858,9 +858,9 @@ sub ModifyMetadata($$$$)
#
# Add a formal parameter to a template.
#
sub NewFormalParameter($$$)
sub NewFormalParameter($$$$$)
{
my ($self, $name, $value) = @_;
my ($self, $name, $value, $description, $dbuid) = @_;
# Must be a real reference.
return -1
......@@ -885,6 +885,12 @@ sub NewFormalParameter($$$)
" name='$name', value=$value")
or return -1;
if (defined($description) && $description ne "") {
$self->NewMetadata($name,
$description, $dbuid, "parameter_description")
== 0 or return -1;
}
return 0;
}
......@@ -1072,7 +1078,7 @@ sub CopyMetadata($$$)
# Copy the toplevel items.
#
my $query_result =
DBQueryWarn("select name,value,metadata_type,internal ".
DBQueryWarn("select name,value,metadata_type,internal,i.parent_guid ".
" from experiment_template_metadata as m ".
"left join experiment_template_metadata_items as i on ".
" i.guid=m.metadata_guid and ".
......@@ -1081,12 +1087,14 @@ sub CopyMetadata($$$)
" m.parent_vers='$from_vers'")
or return -1;
while (my ($name,$value,$type,$internal) =
while (my ($name,$value,$type,$internal,$hasparent) =
$query_result->fetchrow_array()) {
my $guid;
my $version = 1;
my $metadata_type = "NULL";
my $parent_guid;
my $parent_vers;
$name = DBQuoteSpecial($name);
$value = DBQuoteSpecial($value);
......@@ -1099,7 +1107,10 @@ sub CopyMetadata($$$)
next
if ($type eq "tid" or $type eq "template_description");
# XXX Skip parameters that were deleted.
#
# XXX Skip parameters that were deleted or whose description
# was modified after being created.
#
if ($type eq "parameter_description") {
my $param_result =
DBQueryWarn("select * ".
......@@ -1113,12 +1124,36 @@ sub CopyMetadata($$$)
next
if (!$param_result->numrows);
#
# If the current parameter has a metadata description, it
# came from the NS file parse. If the version we are
# copying from has no parent, it has not been modified and
# so we do not want to take that one; use the current one
# from the NS file instead. If on the other hand the user
# modified that description after its NS file parse, we take
# that since we assume its a better description. Hmm, this
# sounds awful when described.
#
my $param_row = $param_result->fetchrow_hashref();
if (defined($param_row->{"metadata_guid"})) {
next
if (! $hasparent);
$parent_guid = $param_row->{"metadata_guid"};
$parent_vers = $param_row->{"metadata_vers"};
# and of course we need to use this guid/vers.
$guid = $parent_guid;
$version = $parent_vers + 1;
}
}
$metadata_type = "'$type'";
}
return -1
if (NewGUID(\$guid) < 0);
if (!defined($guid) && NewGUID(\$guid) < 0);
DBQueryWarn("insert into experiment_template_metadata set ".
" parent_guid='$from_guid', ".
......@@ -1132,6 +1167,9 @@ sub CopyMetadata($$$)
DBQueryWarn("insert into experiment_template_metadata_items set ".
" guid='$guid', vers='$version', uid='$copier', ".
" template_guid='$from_guid', ".
(! defined($parent_guid) ? "" :
" parent_guid='$parent_guid',".
" parent_vers='$parent_vers',") .
" name=$name, value=$value, created=now()")
or return -1;
......@@ -1554,7 +1592,7 @@ sub CreateDirectory($)
print "*** Could not create directory $versdir: $!\n";
return $rval;
}
foreach my $token ("tbdata", "archive", "datastore") {
foreach my $token ("tbdata", "archive", "datastore", "logs") {
my $dir = "$versdir/$token";
if (! mkdir($dir, 0770) ||
......@@ -2172,6 +2210,33 @@ sub LastRun($)
return $query_result->fetchrow_hashref();
}
#
# Return current run.
#
sub CurrentRun($)
{
my ($self) = @_;
# Must be a real reference.
return undef
if (! ref($self));
my $exptidx = $self->exptidx();
my $runidx = $self->runidx();
return undef
if (!defined($runidx));
my $query_result =
DBQueryWarn("select * from experiment_runs ".
"where exptidx='$exptidx' and idx='$runidx'");
return undef
if (!$query_result);
return $query_result->fetchrow_hashref();
}
#
# Binding records for each Run.
#
......@@ -2234,7 +2299,7 @@ sub LogHole($)
#
# Do the loghole thing.
#
Template::mysystem("$TEVC -w -t 180 -e $pid/$eid now ns SNAPSHOT ".
Template::mysystem("$TEVC -w -t 300 -e $pid/$eid now ns SNAPSHOT ".
" LOGHOLE_ARGS='-s'") == 0
or return -1;
......@@ -2248,6 +2313,29 @@ sub LogHole($)
return 0;
}
#
# Use tevc to tell loghole to clean.
#
sub LogClean($)
{
my ($self) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $pid = $self->pid();
my $eid = $self->eid();
#
# Do the loghole thing.
#
Template::mysystem("$TEVC -w -t 300 -e $pid/$eid now ns RESET") == 0
or return -1;
return 0;
}
#
# Dump the instance DB into the archive directory.
#
......@@ -2278,6 +2366,26 @@ sub DumpDB($)
return 0;
}
#
# Clean the instance DB.
#
sub CleanDB($)
{
my ($self) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $pid = $self->pid();
my $eid = $self->eid();
Template::mysystem("$DBCONTROL cleanexpdb $pid $eid") == 0
or return -1;
return 0;
}
#
# Grab the current archive tag. I think this info belongs in the instance
# record.
......@@ -2537,6 +2645,35 @@ sub CopyTemplateEvents($)
return 0;
}
#
# Create a log file for an instance, in the template directory.
#
sub CreateLogFile($$$)
{
my($self, $token, $ppath) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $idx = $self->idx();
my $path = $self->template()->path();
my $logdir = "$path/logs";
my $logname = "$logdir/instance${idx}.${token}";
return -1
if (-e $logname);
return -1
if (! -d $logdir && !mkdir($logdir, 0775));
Template::mysystem("touch $logname") == 0
or return -1;
$$ppath = $logname;
return 0;
}
# _Always_ make sure that this 1 is at the end of the file...
1;
......@@ -259,8 +259,9 @@ Node instproc unknown {m args} {
Simulator instproc connect {src dst} {
}
Simulator instproc define-template-parameter {name value} {
Simulator instproc define-template-parameter {name args} {
# install the name/value in the outer environment.
set value [lindex $args 0]
uplevel 1 set \{$name\} \{$value\}
}
......
......@@ -89,6 +89,8 @@ Simulator instproc init {args} {
$self instvar parameter_list;
array set parameter_list {}
$self instvar parameter_descriptions;
array set parameter_descriptions {}
var_import ::GLOBALS::last_class
set last_class $self
......@@ -356,6 +358,7 @@ Simulator instproc run {} {
$self instvar tiptunnel_list
$self instvar topography_list
$self instvar parameter_list
$self instvar parameter_descriptions
$self instvar simulated
$self instvar nseconfig
var_import ::GLOBALS::pid
......@@ -679,8 +682,17 @@ Simulator instproc run {} {
foreach name [array names parameter_list] {
set default_value $parameter_list($name)
set description $parameter_descriptions($name)
$self spitxml_data "virt_parameters" [list "name" "value"] [list $name $default_value ]
set p_fields [list "name" "value"]
set p_values [list $name $default_value]
if {$description != {}} {
lappend p_fields "description"
lappend p_values $description
}
$self spitxml_data "virt_parameters" $p_fields $p_values
}
foreach sourcefile $sourcefile_list {
......@@ -1108,14 +1120,31 @@ Simulator instproc add_topography {tg} {
return 0
}
Simulator instproc define-template-parameter {name value} {
Simulator instproc define-template-parameter {name args} {
$self instvar parameter_list
$self instvar parameter_descriptions
var_import ::TBCOMPAT::parameter_list_defaults
if {$args == {}} {
perror "\[define-template-parameter] not enough arguments!"
return
}
if {[llength $args] > 2} {
perror "\[define-template-parameter] too many arguments!"
return
}
set value [lindex $args 0]
set description {}
if {[llength $args] == 2} {
set description [lindex $args 1]
}
if {[info exists parameter_list_defaults($name)]} {
set value $parameter_list_defaults($name)
}
set parameter_list($name) $value
set parameter_descriptions($name) $description
# And install the name/value in the outer environment.
uplevel 1 real_set \{$name\} \{$value\}
......
......@@ -221,7 +221,7 @@ $eid = $template->eid();
#
# Use the logonly option to audit so that we get a record mailed.
#
if (my $childpid = AuditStart(1, undef, 1)) {
if (my $childpid = AuditStart(LIBAUDIT_DAEMON, undef, LIBAUDIT_LOGONLY)) {
#
# Parent exits normally, unless in waitmode. We have to set
# justexit to make sure the END block below does not run.
......@@ -377,14 +377,14 @@ libArchive::TBCommitExperimentArchive($pid, $eid, "TemplateCreate")
# info forever (after the underlying experiment is terminated).
#
my $query_result =
DBQueryWarn("select name,value from virt_parameters ".
DBQueryWarn("select name,value,description from virt_parameters ".
"where pid='$pid' and eid='$eid'");
fatal(-1, "Could not get virt_parameters for $pid/$eid")
if (! $query_result);
while (my ($name, $value) = $query_result->fetchrow_array()) {
$template->NewFormalParameter($name, $value) == 0
while (my ($name, $value, $description) = $query_result->fetchrow_array()) {
$template->NewFormalParameter($name, $value, $description, $dbuid) == 0
or fatal(-1, "Could not set formal parameter for $pid/$eid")
}
......
......@@ -33,6 +33,7 @@ sub usage()
"switches and arguments:\n".
"-a <action> - start or stop\n".
"-w - wait for run to start\n".
"-s - save DB contents at end of run; default is clean\n".
"-q - be less chatty\n".
"-E <str> - A pithy sentence describing the run\n".
"-r <runid> - A token ... we will make on up for you\n".
......@@ -41,11 +42,12 @@ sub usage()
"<guid/vers> - GUID and version to swapin\n");
exit(-1);
}
my $optlist = "qwp:E:a:r:e:d";
my $optlist = "qwp:E:a:r:e:dsc";
my %options = ();
my $quiet = 0;
my $waitmode = 0;
my $debug = 0;
my $clean = 0;
my $paramfile;
my %parameters = ();
my $action;
......@@ -152,6 +154,10 @@ if (system("$checkquota $dbuid") != 0) {
# Now parse arguments.
ParseArgs();
if ($action eq "start" && !defined($runid)) {
tbdie("Must provide a run ID (-r argument) when starting a new run!");
}
#
# In wait mode, block SIGINT until we spin off the background process.
#
......@@ -313,7 +319,8 @@ if (defined($instance->runidx())) {
# Ug. I need to figure out how to hook into the event sequence
# mechanism so I can use a completion event.
sleep(2);
print "Asking program agents to stop ... this will take a moment.\n";
sleep(5);
# This sets the stop time.
$instance->StopCurrentRun() == 0
......@@ -331,16 +338,36 @@ if (defined($instance->runidx())) {
#
# Commit the archive.
#
system("$archcontrol -t $runid commit $pid $eid");
my $this_run = $instance->CurrentRun();
my $this_runid = $this_run->{"runid"};
system("$archcontrol -t ${action}run_${this_runid} commit $pid $eid");
# This has to be done after the archive commit, so we can find the tag.
$instance->FinalizeCurrentRun() == 0
or fatal(-1, "Could not finalize experiment run for $instance!");
print "Experiment run '$this_runid' has been stopped.\n";
}
goto done
if ($action eq "stop");
#
# Clean/Clear if requested before generating the new run, in case there
# is a problem.
#
if ($clean) {
# This runs loghole.
print "Asking loghole to clean the logs ... this will take a moment.\n";
$instance->LogClean() == 0
or fatal(-1, "Loghole failed");
print "Cleaning the instance database ... this will take a moment.\n";
$instance->CleanDB() == 0
or fatal(-1, "Dump Database failed");
}
#
# Generate a new run.
#
......@@ -380,6 +407,8 @@ if ($paramfile) {
" pid='$pid', eid='$eid'");
}
SignalProgAgents("RELOAD");
# XXX Need to use a completion event!
sleep(5);
}
#
......@@ -389,6 +418,7 @@ print "Asking the event system to replay events ...\n";
system("$eventcontrol replay $pid $eid") == 0
or fatal(-1, "Could not restart the event system!");
print "Experiment run '$runid' has been started.\n";
done:
# Stop the web interface from spewing.
......@@ -456,9 +486,6 @@ sub ParseArgs()
tbdie("Improper experiment name (id)!");
}
}
else {
tbdie("Must provide a run ID (-r argument)!");
}
if (defined($options{"a"})) {
$action = $options{"a"};
......@@ -466,6 +493,10 @@ sub ParseArgs()
if ($action ne "start" and $action ne "stop") {
tbdie("Improper -a argument: $action.");
}
# Need the equiv of a taint check.
if ($action =~ /^([\w]+)$/) {
$action = $1;
}
}
else {
tbdie("Must provide an action (-a argument)!");
......@@ -480,6 +511,9 @@ sub ParseArgs()
if (defined($options{"d"})) {
$debug = 1;
}
if (defined($options{"c"})) {
$clean = 1;
}
if (defined($options{"E"})) {
if (! TBcheck_dbslot($options{"E"},
"experiment_templates", "description",
......
......@@ -113,6 +113,7 @@ use libtblog;
use Template;
use Experiment;
use event;
use libaudit;
# Be careful not to exit on transient error
$libdb::DBQUERY_MAXTRIES = 0;
......@@ -265,6 +266,85 @@ if ($paramfile) {
my $pid = $template->pid();
my $copyid = $template->pid() . "," . $template->eid();
#
# Go to the background now so we have a proper log of what happened.
#
#
$SIG{TERM} = \&sighandler;
#
# Use the logonly option to audit so that we get a record mailed.
#
if (! ($foreground || $batchmode)) {
if ($instance->CreateLogFile("swapin", \$logname) < 0) {
fatal(-1, "Could not create logfile!");
}
if (my $childpid = AuditStart(LIBAUDIT_DAEMON, $logname,
LIBAUDIT_LOGONLY|LIBAUDIT_NODELETE)) {
#
# Parent exits normally, unless in waitmode. We have to set
# justexit to make sure the END block below does not run.
#
$justexit = 1;
if (!$waitmode) {
#
# Before we can actually exit, we need to wait. Totally ick;
# the logfile stuff needs work.
#
while (1) {
my ($tmp1, $tmp2);
last
if (TBExptGetLogFile($pid, $eid, \$tmp1, \$tmp2));
sleep(2);
}
if ($batchmode) {
print("Experiment $pid/$eid has entered the batch system.\n".
"You will be notified when it is fully instantiated.\n")
if (! $quiet);
}
else {
print("Experiment $pid/$eid is now being instantiated.\n".
"You will be notified via email when this is done.\n")
if (! $quiet);
}
exit(0);
}
print("Waiting for experiment $eid to fully instantiate.\n")
if (! $quiet);
if (-t STDIN && !$quiet) {
print("You may type ^C at anytime; you will be notified via ".
"email.\n".
"You will not actually interrupt the instantiation.\n");
}
# Give child a chance to run.
select(undef, undef, undef, 0.25);
#
# Reset signal handlers. User can now kill this process, without
# stopping the child.
#
$SIG{TERM} = 'DEFAULT';
$SIG{INT} = 'DEFAULT';
$SIG{QUIT} = 'DEFAULT';
#
# Wait until child exits or until user gets bored and types ^C.
#
waitpid($childpid, 0);
print("Done. Exited with status: $?\n")
if (! $quiet);
exit($? >> 8);
}
TBdbfork();
}
#
# Build up arguments to batchexp. I do not want to bother with shell
# escapes, hence the list argument to system instead of a long string.
......@@ -305,6 +385,14 @@ fatal($? >> 8, "Could not pre-instantiate the experiment")
# Need to kill the experiment if we fail after this point.
$exptcreated = 1;
#
# Now we can do this ...
#
if (defined($logname) && ! ($foreground || $batchmode)) {
TBExptSetLogFile($pid, $eid, $logname);
TBExptOpenLogFile($pid, $eid);
}
# Grab the experiment record for below.
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
......@@ -362,82 +450,6 @@ if ($paramfile) {
or tbdie("Could not copy out $paramfile");
}
#
# Catch this so we can clean up.
#
$SIG{TERM} = \&sighandler;
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $foreground) {
$logname = TBExptCreateLogFile($pid, $eid, "instantiate");
if (! $batchmode) {
TBExptSetLogFile($pid, $eid, $logname);
TBExptOpenLogFile($pid, $eid);
}
if (my $childpid = TBBackGround($logname)) {
#
# Parent exits normally, unless in waitmode. We have to set
# justexit to make sure the END block below does not run.
#
$justexit = 1;
if (!$waitmode) {
if ($batchmode) {
print("Experiment $pid/$eid has entered the batch system.\n".
"You will be notified when it is fully instantiated.\n")
if (! $quiet);
}
else {
print("Experiment $pid/$eid is now being instantiated.\n".
"You will be notified via email when this is done.\n")
if (! $quiet);
}
exit(0);
}
print("Waiting for experiment $eid to fully instantiate.\n")
if (! $quiet);
if (-t STDIN && !$quiet) {
print("You may type ^C at anytime; you will be notified via email.".
"\n".
"You will not actually interrupt the experiment itself.\n");
}
# Give child a chance to run.
select(undef, undef, undef, 0.25);
#
# Reset signal handlers. User can now kill this process, without
# stopping the child.
#
$SIG{TERM} = 'DEFAULT';
$SIG{INT} = 'DEFAULT';
$SIG{QUIT} = 'DEFAULT';
#
# Wait until child exits or until user gets bored and types ^C.
#
waitpid($childpid, 0);
print("Done. Exited with status: $?\n")
if (! $quiet);
exit($? >> 8);
}
TBdbfork();
}
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
POSIX::setsid();
}
#
# Hmm. Before we actually swapin, copy the parameters to the environment
# table so that they are available to the program agent when it starts up
......@@ -610,6 +622,7 @@ $instance->Start() == 0
TBExptCloseLogFile($pid, $eid)
if (defined($logname) && !$batchmode);
# Email is sent from libaudit at exit ...
exit(0);
#
......
......@@ -113,6 +113,7 @@ use libtblog;
use Template;
use Experiment;
use event;
use libaudit;
# Be careful not to exit on transient error
$libdb::DBQUERY_MAXTRIES = 0;
......@@ -265,6 +266,85 @@ if ($paramfile) {
my $pid = $template->pid();
my $copyid = $template->pid() . "," . $template->eid();
#
# Go to the background now so we have a proper log of what happened.
#
#
$SIG{TERM} = \&sighandler;
#
# Use the logonly option to audit so that we get a record mailed.