Commit cbdc4178 authored by Leigh Stoller's avatar Leigh Stoller

This started out as a simple little hack to add a StopRun "ns" event, but

it got more complicated as it progressed.

The bulk of the change was changing template_exprun so that it can take a
pid/eid as an alternative to eid/guid. This is a big convenience since its
easy to find the template from a running experiment, and it makes it
possible to invoke from the event scheduler, which has never heard of a
template before (and its not something I wanted to teach it about).  Its
also easier on users.

Anyway, back to the stoprun event. You can now do this:

	$ns at 100 "$ns stoprun"
or
	tevc -e pid/eid now ns stoprun

You can add the -w option to wait for the completion event that is sent,
but this brings me to the glaring problems with this whole thing.

* First, the scheduler has to fire off the stoprun in the background,
  since if it waits, we get deadlock. Why? Cause the implementation of
  stoprun uses the event system (SNAPSHOT event, other things), and if
  the scheduler is sitting and waiting, nothing happens.

  Okay, the solution to this was to generate a COMPLETION event from
  template_exprun once the stop operation is complete. This brings me
  to the second problem ...

* Worse, is that the "ns" events that are sent to implement stoprun (like
  snapshot) send their own completion events, and that confuses anyone
  waiting on the original stoprun event (it returns early).

  So what to do about this? There is a "token" field in the completion
  event structure, which I presume is to allow you to match things up.  But
  there is no way to set this token using tevc (and then wait for it), and
  besides, the event scheduler makes them up anyway and sticks them into
  the event. So, the seed of a fix are already germinating in my mind, but
  I wanted to get this commit in so that Mike would have fun reading this
  commit log.
parent 84cf1d12
......@@ -401,6 +401,21 @@ static int do_snapshot(simulator_agent_t sa, char *args)
return retval;
}
static int do_stoprun(simulator_agent_t sa, char *args)
{
int retval = 0;
assert(sa != NULL);
assert(args != NULL);
if (systemf("template_stoprun -p %s -e %s", pid, eid) != 0) {
error("failed to stop current run\n");
retval = -1;
}
return retval;
}
static int strreltime(char *buf, size_t buflen, time_t secs)
{
int hours, mins, retval = 0;
......@@ -526,6 +541,9 @@ static void *simulator_agent_looper(void *arg)
else if (strcmp(evtype, TBDB_EVENTTYPE_SNAPSHOT) == 0){
do_snapshot(sa, argsbuf);
}
else if (strcmp(evtype, TBDB_EVENTTYPE_STOPRUN) == 0){
do_stoprun(sa, argsbuf);
}
else {
error("cannot handle SIMULATOR event %s.",
evtype);
......
......@@ -67,6 +67,7 @@ char *tbdb_eventtypes[] = {
TBDB_EVENTTYPE_MESSAGE,
TBDB_EVENTTYPE_RUN,
TBDB_EVENTTYPE_CREATE,
TBDB_EVENTTYPE_STOPRUN,
TBDB_NODESTATE_ISUP,
TBDB_NODESTATE_REBOOTED,
TBDB_NODESTATE_REBOOTING,
......
......@@ -78,6 +78,7 @@
#define TBDB_EVENTTYPE_MESSAGE "MESSAGE"
#define TBDB_EVENTTYPE_RUN "RUN"
#define TBDB_EVENTTYPE_CREATE "CREATE"
#define TBDB_EVENTTYPE_STOPRUN "STOPRUN"
/*
* Global event passthru sentinal - does _NOT_ go in either event array
......
......@@ -102,6 +102,7 @@ REPLACE INTO event_eventtypes VALUES (21,'SNAPSHOT');
REPLACE INTO event_eventtypes VALUES (22,'RELOAD');
REPLACE INTO event_eventtypes VALUES (23,'CLEAR');
REPLACE INTO event_eventtypes VALUES (24,'CREATE');
REPLACE INTO event_eventtypes VALUES (25,'STOPRUN');
--
-- Dumping data for table `event_objecttypes`
......
......@@ -2273,6 +2273,55 @@ sub NewRunBinding($$$)
return 0;
}
#
# Make up a new run name based on the previous run name.
#
sub NewRunID($$)
{
my ($self, $prval) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $exptidx = $self->exptidx();
my $lastname;
my $newname;
my $query_result =
DBQueryWarn("select MAX(idx) from experiment_runs ".
"where exptidx='$exptidx'");
return -1
if (!$query_result || !$query_result->numrows);
my ($maxidx) = $query_result->fetchrow_array();
if (defined($self->runidx())) {
my $this_run = $self->CurrentRun();
$lastname = $this_run->{"runid"};
}
else {
my $last_run = $self->LastRun();
$lastname = $last_run->{"runid"};
}
#
# Look for the typical "-RX" extension and increment X. Otherwise append
# a -RX where X is the max run number plus one.
#
if ($lastname =~ /^([\w]*)\-R(\d*)$/) {
$newname = $1 . "-R" . ($maxidx + 1);
}
else {
$newname = $lastname . "-R" . ($maxidx + 1);
}
$$prval = $newname;
return 0;
}
#
# Use tevc to tell loghole to sync. We have to send a bunch of extra args
# to get loghole to do what we want.
......
......@@ -172,7 +172,7 @@ Node instproc updatedb {DB} {
var_import ::TBCOMPAT::hwtype_class
#
# Rserved name; conflicts with kludgy manner in which a program
# Reserved name; conflicts with kludgy manner in which a program
# agent can used on ops.
#
if {"$self" == "ops"} {
......
......@@ -1621,6 +1621,10 @@ Simulator instproc make_event {outer event} {
set otype SIMULATOR
set etype SWAPOUT
}
"stoprun" {
set otype SIMULATOR
set etype STOPRUN
}
"trace-for" {
set vname "slothd"
set otype SLOTHD
......
......@@ -29,7 +29,7 @@ sub usage()
{
print(STDERR
"Usage: template_exprun [-q] [-w] [-r <runid>] ".
"-a <action> -e <eid> <guid/vers>\n".
"-a <action> -e <eid> [-p <pid> | <guid/vers>]\n".
"switches and arguments:\n".
"-a <action> - start or stop\n".
"-w - wait for run to start\n".
......@@ -37,12 +37,13 @@ sub usage()
"-q - be less chatty\n".
"-E <str> - A pithy sentence describing the run\n".
"-r <runid> - A token ... we will make on up for you\n".
"-p <file> - XML file of parameter bindings\n".
"-x <file> - XML file of parameter bindings\n".
"-e <eid> - The instance name\n".
"-p <pid> - Use the pid/eid to find the template GUID\n".
"<guid/vers> - GUID and version to swapin\n");
exit(-1);
}
my $optlist = "qwp:E:a:r:e:dscf";
my $optlist = "qwx:p:E:a:r:e:dscf";
my %options = ();
my $quiet = 0;
my $waitmode = 0;
......@@ -54,6 +55,7 @@ my %parameters = ();
my $action;
my $description;
my $runid;
my $pid;
my $eid;
my $guid;
my $version;
......@@ -77,7 +79,6 @@ my $user_name;
my $user_email;
my $logname;
my $dbuid;
my $pid;
my $exptidx;
my $template;
my $instance;
......@@ -94,7 +95,8 @@ my $eventcontrol= "$TB/bin/eventsys_control";
sub ParseArgs();
sub fatal($$);
sub sighandler($);
sub SignalProgAgents($);
sub SignalProgAgents($);
sub SendCompletionEvent();
#
# Testbed Support libraries
......@@ -105,6 +107,7 @@ use libtestbed;
use libtblog;
use libArchive;
use Template;
use Experiment;
use event;
# Be careful not to exit on transient error
......@@ -155,10 +158,6 @@ if (system("$checkquota $dbuid") != 0) {
# Now parse arguments.
ParseArgs();
if ($action eq "start" && !defined($runid)) {
tbdie("Must provide a run ID (-r argument) when starting a new run!");
}
#
# In wait mode, block SIGINT until we spin off the background process.
#
......@@ -169,40 +168,71 @@ if ($waitmode) {
}
#
# Grab template and do access check.
# This script allows pid/eid to be used to find the template.
#
$template = Template->Lookup($guid, $version);
if (defined($guid)) {
$template = Template->Lookup($guid, $version);
if (!defined($template)) {
tbdie("Experiment template $guid/$version does not exist!");
}
if (! TBProjAccessCheck($dbuid,
$template->pid(), $template->gid(),
TB_PROJECT_CREATEEXPT)) {
tberror("You do not have permission to instantiate template ".
"$guid/$version");
exit(1);
if (!defined($template)) {
tbdie("Experiment template $guid/$version does not exist!");
}
$pid = $template->pid();
}
$pid = $template->pid();
#
# Grab Instance.
# Find the experiment ...
#
if (! TBExptIDX($pid, $eid, \$exptidx)) {
tbdie("Could not get experiment index for $pid,$eid!");
my $experiment = Experiment->Lookup($pid, $eid);
if (! defined($experiment)) {
tbdie("Experiment $pid/$eid does not exist!");
}
$instance = Template::Instance->LookupByExptidx($exptidx);
#
# And then the template instance from that ...
#
$instance = Template::Instance->LookupByExptidx($experiment->idx());
if (!defined($instance)) {
tbdie("Experiment instance $eid in $guid/$version does not exist!");
tbdie("Experiment instance $eid in project $pid does not exist!");
}
# In case we got here by the pid instead of guid.
if (!defined($template)) {
$template = $instance->template();
if (!defined($template)) {
tbdie("Cannot find template for $instance!");
}
$guid = $template->guid();
$version = $template->vers();
}
if (ExpState($pid, $eid) ne EXPTSTATE_ACTIVE()) {
#
# Check project permission.
#
if (! TBProjAccessCheck($dbuid,
$template->pid(), $template->gid(),
TB_PROJECT_CREATEEXPT)) {
tberror("You do not have permission to instantiate template ".
"$guid/$version");
exit(1);
}
if ($experiment->state() ne EXPTSTATE_ACTIVE()) {
tberror("Template instance experiment $pid/$eid in not active!");
exit(1);
}
if ($action eq "start" && !defined($runid)) {
if ($instance->NewRunID(\$runid) < 0) {
tbdie("Could not determine a new runid; please use the -r option!");
}
else {
print "Using new run ID '$runid' ...\n";
}
}
#
# If we have a parameter file, we need to copyin the values and store
# them in the DB for this experiment. Note that these override existing
......@@ -353,8 +383,12 @@ if (defined($instance->runidx())) {
print "Experiment run '$this_runid' has been stopped.\n";
}
goto done
if ($action eq "stop");
if ($action eq "stop") {
# Send completion event only on "stop"; otherwise it gets lost.
SendCompletionEvent();
goto done;
}
#
# Clean/Clear if requested before generating the new run, in case there
......@@ -443,20 +477,40 @@ sub ParseArgs()
usage();
}
if (@ARGV != 1) {
usage();
}
#
# Pick up guid/version first and untaint.
# Allow pid to be used instead of GUID.
#
my $tmp = shift(@ARGV);
if (@ARGV == 1) {
#
# Pick up guid/version first and untaint.
#
my $tmp = shift(@ARGV);
if ($tmp =~ /^([\w]*)\/([\d]*)$/) {
$guid = $1;
$version = $2;
}
else {
tbdie("Bad data in argument: $tmp");
}
}
elsif (defined($options{"p"})) {
$pid = $options{"p"};
if ($tmp =~ /^([\w]*)\/([\d]*)$/) {
$guid = $1;
$version = $2;
if ($pid =~ /^([-\w]+)$/) {
$pid = $1;
}
else {
tbdie("Bad data in argument: $pid.");
}
if (! TBcheck_dbslot($pid, "projects", "pid",
TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) {
tbdie("Improper project name (pid)!");
}
}
else {
tbdie("Bad data in argument: $tmp");
tberror("Must provide GUID or -p option!");
exit(1);
}
if (defined($options{"e"})) {
......@@ -532,8 +586,8 @@ sub ParseArgs()
$description = $options{"E"};
}
if (defined($options{"p"})) {
my $inputfile = $options{"p"};
if (defined($options{"x"})) {
my $inputfile = $options{"x"};
# Note different taint check (allow /).
if ($inputfile =~ /^([-\w\.\/]+)$/) {
......@@ -667,6 +721,29 @@ sub SignalProgAgents($)
}
}
sub SendCompletionEvent()
{
fatal(-1, "Could not connect to event system!")
if (!$handle);
my $tuple = address_tuple_alloc();
fatal(-1, "Could not allocate an address tuple\n")
if (!$tuple);
%$tuple = (objtype => "SIMULATOR",
objname => "ns",
eventtype => "COMPLETE",
expt => "$pid/$eid");
my $notification = event_notification_alloc($handle, $tuple);
fatal(-1, "Could not allocate a notification\n")
if (!$notification);
if (!event_notify($handle, $notification)) {
fatal(-1, "could not send environment event notification!");
}
}
END {
# Normal exit, nothing to do.
if (!$? || $justexit) {
......
......@@ -36,7 +36,7 @@ sub usage()
"-w - wait for template to be instantiated\n".
"-q - be less chatty\n".
"-E <str> - A pithy sentence describing the instance\n".
"-p <file>- XML file of parameter bindings\n".
"-x <file>- XML file of parameter bindings\n".
"-S <str> - Instance cannot be swapped; must provide reason\n".
"-L <str> - Instance cannot be IDLE swapped; must provide reason\n".
"-n - Do not send idle email (internal option only)\n".
......@@ -47,7 +47,7 @@ sub usage()
"<guid/vers> - GUID and version to swapin\n");
exit(-1);
}
my $optlist = "qwe:S:L:na:l:se:p:bE:t:";
my $optlist = "qwe:S:L:na:l:se:x:bE:t:";
my %options = ();
my $quiet = 0;
my $waitmode = 0;
......@@ -711,8 +711,8 @@ sub ParseArgs()
}
}
if (defined($options{"p"})) {
my $inputfile = $options{"p"};
if (defined($options{"x"})) {
my $inputfile = $options{"x"};
# Note different taint check (allow /).
if ($inputfile =~ /^([-\w\.\/]+)$/) {
......
......@@ -36,7 +36,7 @@ sub usage()
"-w - wait for template to be instantiated\n".
"-q - be less chatty\n".
"-E <str> - A pithy sentence describing the instance\n".
"-p <file>- XML file of parameter bindings\n".
"-x <file>- XML file of parameter bindings\n".
"-S <str> - Instance cannot be swapped; must provide reason\n".
"-L <str> - Instance cannot be IDLE swapped; must provide reason\n".
"-n - Do not send idle email (internal option only)\n".
......@@ -47,7 +47,7 @@ sub usage()
"<guid/vers> - GUID and version to swapin\n");
exit(-1);
}
my $optlist = "qwe:S:L:na:l:se:p:bE:t:";
my $optlist = "qwe:S:L:na:l:se:x:bE:t:";
my %options = ();
my $quiet = 0;
my $waitmode = 0;
......@@ -711,8 +711,8 @@ sub ParseArgs()
}
}
if (defined($options{"p"})) {
my $inputfile = $options{"p"};
if (defined($options{"x"})) {
my $inputfile = $options{"x"};
# Note different taint check (allow /).
if ($inputfile =~ /^([-\w\.\/]+)$/) {
......
......@@ -435,7 +435,7 @@ if (count($parameter_masterlist)) {
fclose($fp);
chmod($parameter_xmlfile, 0666);
}
$command_options .= " -p $parameter_xmlfile";
$command_options .= " -x $parameter_xmlfile";
}
if (count($errors)) {
......
......@@ -542,7 +542,7 @@ if (count($parameter_masterlist)) {
fclose($fp);
chmod($parameter_xmlfile, 0666);
}
$command_options .= " -p $parameter_xmlfile";
$command_options .= " -x $parameter_xmlfile";
}
#
......
......@@ -33,7 +33,7 @@ SYMLINKS = node_admin node_reboot os_load create_image node_list \
readycount nscheck startexp batchexp startexp swapexp endexp \
modexp expinfo node_avail tbuisp expwait template_commit \
template_export template_swapin template_swapout \
template_startrun template_stoprun
template_stoprun
#
# Force dependencies on the scripts so that they will be rerun through
......
......@@ -4692,7 +4692,7 @@ class template:
pass
if xmlfilename:
argstr += " -p " + xmlfilename
argstr += " -x " + xmlfilename
pass
argstr += " -e " + escapeshellarg(argdict["exp"])
......@@ -4723,10 +4723,11 @@ class template:
except NoLoginsError, e:
return EmulabResponse(RESPONSE_REFUSED, output=str(e))
argerror = CheckRequiredArgs(argdict, ("guid", "exp", "runid"))
argerror = CheckRequiredArgs(argdict, ("exp", ))
if (argerror):
return argerror
pid = None
xmlfilename = None
argstr = "-q"
......@@ -4738,6 +4739,13 @@ class template:
elif opt == "clear":
argstr += " -c "
pass
elif opt == "runid":
argstr += " -r "
argstr += escapeshellarg(val)
pass
elif opt == "pid":
pid = escapeshellarg(val)
pass
elif opt == "wait":
if xbool(val):
argstr += " -w "
......@@ -4761,14 +4769,23 @@ class template:
pass
pass
if pid == None and not argdict.has_key("guid"):
return EmulabResponse(RESPONSE_BADARGS,
output="Must supply pid or guid")
if xmlfilename:
argstr += " -p " + xmlfilename
argstr += " -x " + xmlfilename
pass
argstr += " -e " + escapeshellarg(argdict["exp"])
argstr += " -r " + escapeshellarg(argdict["runid"])
argstr += " -a start"
argstr += " " + escapeshellarg(argdict["guid"])
if pid == None:
argstr += " " + escapeshellarg(argdict["guid"])
pass
else:
argstr += " -p " + pid
pass
(exitval, output) = runcommand(TBDIR +
"/bin/template_exprun " + argstr)
......@@ -4795,12 +4812,12 @@ class template:
except NoLoginsError, e:
return EmulabResponse(RESPONSE_REFUSED, output=str(e))
argerror = CheckRequiredArgs(argdict, ("guid", "exp"))
argerror = CheckRequiredArgs(argdict, ("exp",))
if (argerror):
return argerror
xmlfilename = None
argstr = "-q"
pid = None
argstr = "-q"
for opt, val in argdict.items():
if opt == "wait":
......@@ -4808,12 +4825,30 @@ class template:
argstr += " -w "
pass
pass
if opt == "quiet":
if xbool(val):
argstr += " -q "
pass
pass
elif opt == "pid":
pid = escapeshellarg(val)
pass
pass
if pid == None and not argdict.has_key("guid"):
return EmulabResponse(RESPONSE_BADARGS,
output="Must supply pid or guid")
argstr += " -e " + escapeshellarg(argdict["exp"])
argstr += " -a stop"
argstr += " " + escapeshellarg(argdict["guid"])
if pid == None:
argstr += " " + escapeshellarg(argdict["guid"])
pass
else:
argstr += " -p " + pid
pass
(exitval, output) = runcommand(TBDIR +
"/bin/template_exprun " + argstr)
if exitval:
......
......@@ -1901,7 +1901,7 @@ class template_swapin:
def apply(self):
try:
opts, req_args = getopt.getopt(self.argv,
"bwqS:L:a:l:E:p:e:",
"bwqS:L:a:l:E:e:x:",
[ "help" ]);
pass
except getopt.error, e:
......@@ -1926,7 +1926,7 @@ class template_swapin:
elif opt == "-e":
params["exp"] = val;
pass
elif opt == "-p":
elif opt == "-x":
xmlfilename = val;
pass
elif opt == "-S":
......@@ -1992,7 +1992,7 @@ class template_swapin:
print " -a - Auto swapout NN minutes after instance is swapped in";
print " -l - Auto swapout NN minutes after instance goes idle";
print " -E - A pithy sentence describing your experiment";
print " -p - XML file of parameter bindings";
print " -x - XML file of parameter bindings";
print " -e - The instance name (unique, alphanumeric, no blanks)";
print " guid - Template GUID";
print ""
......@@ -2079,7 +2079,7 @@ class template_startrun:
def apply(self):
try:
opts, req_args = getopt.getopt(self.argv, "we:E:r:p:c",
opts, req_args = getopt.getopt(self.argv, "we:E:r:p:cx:",
[ "help" ]);
pass
except getopt.error, e:
......@@ -2087,6 +2087,7 @@ class template_startrun:
self.usage();
return -1;
pid = None
guid = None
xmlfilename = None
params = {}
......@@ -2108,6 +2109,9 @@ class template_startrun:
params["runid"] = val
pass
elif opt == "-p":
pid = val
pass
elif opt == "-x":
xmlfilename = val;
pass
elif opt == "-e":
......@@ -2116,21 +2120,26 @@ class template_startrun:
pass
# Try to infer the template guid/vers from the current path.
if len(req_args) == 0:
guid = infer_guid(os.getcwd())
if guid == None:
if pid == None:
if len(req_args) == 0:
guid = infer_guid(os.getcwd())
if guid == None:
self.usage();
return -1
pass
elif len(req_args) == 1:
guid = req_args[0]
pass
else:
self.usage();
return -1
pass
elif len(req_args) == 1:
guid = req_args[0]
params["guid"] = guid
pass
else:
self.usage();
return -1
params["guid"] = guid
params["pid"] = pid
pass
if xmlfilename:
try:
params["xmlfilestr"] = open(xmlfilename).read();
......@@ -2144,10 +2153,10 @@ class template_startrun:
return rval;
def usage(self):
print "template_startrun [-r <id>] [-E <descr>] -e id [<guid/vers>]";
print "template_startrun [-r <id>] [-E <descr>] -e id [-p <pid> | <guid/vers>]";
print "where:";
print " -E - A pithy sentence describing your run";
print " -p - XML file of parameter bindings";
print " -x - XML file of parameter bindings";
print " -r - A token (id) for the run";
print " -e - Instance ID (aka eid)";
print " guid - Template GUID";
......@@ -2170,15 +2179,15 @@ class template_stoprun:
def apply(self):
try:
opts, req_args = getopt.getopt(self.argv, "we:", [ "help" ]);
opts, req_args = getopt.getopt(self.argv, "we:qp:", [ "help" ]);
pass
except getopt.error, e:
print e.args[0]
self.usage();
return -1;
pid = None
guid = None
xmlfilename = None
params = {}
for opt, val in opts:
......@@ -2188,32 +2197,42 @@ class template_stoprun:
elif opt == "-w":
params["wait"] = "yes"
pass
elif opt == "-q":
params["quiet"] = "yes"
pass
elif opt == "-p":
pid = val
pass
elif opt == "-e":
params["exp"] = val;
pass
pass
# Try to infer the template guid/vers from the current path.
if len(req_args) == 0:
guid = infer_guid(os.getcwd())
if guid == None:
if pid == None:
if len(req_args) == 0:
guid = infer_guid(os.getcwd())
if guid == None:
self.usage();
return -1
pass
elif len(req_args) == 1:
guid = req_args[0]