Commit 898cf9a2 authored by Timothy Stack's avatar Timothy Stack

Checkin some changes related to experiment automation and vnode feedback:

	* configure, configure.in: Add sensors/canaryd/feedbacklogs
	template.

	* db/libdb.pm.in, db/xmlconvert.in: Add "virt_user_environment"
	table that holds environment variable names and values.

	* event/lib/event.c: Allocate memory of the right size for
	event_notifications.

	* event/program-agent/GNUmakefile.in: Add version.c file and
	add install targets for the man page.

	* event/program-agent/program-agent.8: Man page describing the
	program-agent daemon.

	* event/program-agent/program-agent.c: Add a bunch of convenience
	features: let the user specify the working directory for commands;
	save output to separate files on every invocation of an agent; let
	the user specify a timeout for a command; make the set of
	environment variables sane and add vars given in the NS file in
	the opt array; a "status" file containing process information is
	written out when children are collected.  Internal changes: child
	processes are collected immediately, instead of waiting for the
	next START event, so we can send back COMPLETE events; the daemon
	now runs with a real-time priority, to increase the chances of
	receiving events.

	* event/proxy/evproxy.c: Made it bidirectional so the
	program-agent's COMPLETE events make it back to the scheduler.

	* event/sched/error-record.c: Change the default log directory.

	* event/sched/event-sched.h, event/sched/event-sched.c: Setup an
	environment similar to a program-agent to run the user's log
	digester.

	* event/sched/node-agent.cc: Add a handler for the SNAPSHOT event
	that runs create_image for the node.

	* event/sched/simulator-agent.h, event/sched/simulator-agent.cc:
	Let the user specify a "DIGESTER" script that digests the log
	files into a summary of the results.  Add event handler for
	remapping a vnode experiment.

	* event/sched/timeline-agent.c: Accept the RUN event as well as
	the START event.

	* os/GNUmakefile.in: Install the install-tarfile.1 man page.

	* os/install-tarfile: Automatically chown/chgrp any files that do
	not have valid user or group IDs, the new owner will be the user
	that swapped in the experiment.  Include the install directory in
	the DB file.  Add a "list" mode that just dumps what files have
	been installed and where.  Add a "force" option so the user can
	forcefully install the file, even though the DB says its already
	there.

	* os/install-tarfile.1: Man page describing the install-tarfile
	tool.

	* os/syncd/GNUmakefile.in: Install man pages on ops.

	* sensors/canaryd/GNUmakefile.in: Link canaryd statically and
	install "feedbacklogs" tool.

	* sensors/canaryd/canaryd.c: Dump dummynet pipe data.

	* sensors/canaryd/canarydEvents.c: Log errors.

	* sensors/canaryd/feedbacklogs.in: Tool used to generate feedback
	data from canaryd log files.

	* sensors/slothd/GNUmakefile.in: Install digest-slothd on ops.

	* sensors/slothd/digest-slothd: Fix some bugs and write out an
	"alert" file with all the nodes/links that were overloaded.

	* tbsetup/os_load.in, tbsetup/libosload.pm.in: Add "waitmode"
	argument that lets you specify that you want to wait for the disk
	to finish loading and/or wait for the node to come back up in the
	new OS.

	* tbsetup/power.in: Remove debugging printf.

	* tbsetup/ns2ir/node.tcl, tbsetup/ns2ir/program.tcl,
	tbsetup/ns2ir/sequence.tcl, tbsetup/ns2ir/sim.tcl.in: Fix some
	quoting problems with event-sequences.  Add -expected-exit-code
	and -tag options to the "$program run" event.  Add -digester to
	the "$ns report" event that lets the user specify a program to run
	to digest the log files.

	* tbsetup/ns2ir/tb_compat.tcl.in: Change the initial scaling
	factor for feedback nodes to 1%, instead of 100%.

	* tmcd/tmcd.c, tmcd/common/libtmcc.pm: Add "userenv" command that
	returns the values in "virt_user_environment".  Return new program
	agent fields: dir, timeout, and expected_exit_code.

	* tmcd/common/GNUmakefile.in: Install rc.canaryd.

	* tmcd/common/bootvnodes: Add hack to boost the program-agents to
	a real-time priority, they can't do it from inside the jail.

	* tmcd/common/rc.canaryd: Rc script for canaryd.

	* tmcd/common/watchdog: Don't fail outright if there is a bad line
	in the battery.log

	* tmcd/common/rc.progagent: Append "userenv" data to the
	program-agent config file.

	* utils/GNUmakefile.in: Install loghole and its man page on ops.

	* utils/loghole.1: Document "clean" command and the change in
	loghole directories.

	* utils/loghole.in: Add "clean" command and parallelization.

	* xmlrpc/emulabserver.py.in: Add "virt_user_environment" table.
	Order the eventlist by "idx" and time, needed for sequences.  And
	removed unnecessary nologin checks.
parent 00965f77
......@@ -2149,8 +2149,8 @@ outfiles="$outfiles Makeconf GNUmakefile \
security/GNUmakefile security/lastlog_daemon \
sensors/GNUmakefile sensors/slothd/GNUmakefile \
sensors/slothd/sdisrunning sensors/slothd/sddeploy \
sensors/canaryd/GNUmakefile sensors/and/GNUmakefile \
sensors/and/and-emulab.conf \
sensors/canaryd/GNUmakefile sensors/canaryd/feedbacklogs \
sensors/and/GNUmakefile sensors/and/and-emulab.conf \
account/GNUmakefile account/tbacct account/webtbacct \
account/addpubkey account/webaddpubkey \
account/addsfskey account/webaddsfskey \
......
......@@ -644,8 +644,8 @@ outfiles="$outfiles Makeconf GNUmakefile \
security/GNUmakefile security/lastlog_daemon \
sensors/GNUmakefile sensors/slothd/GNUmakefile \
sensors/slothd/sdisrunning sensors/slothd/sddeploy \
sensors/canaryd/GNUmakefile sensors/and/GNUmakefile \
sensors/and/and-emulab.conf \
sensors/canaryd/GNUmakefile sensors/canaryd/feedbacklogs \
sensors/and/GNUmakefile sensors/and/and-emulab.conf \
account/GNUmakefile account/tbacct account/webtbacct \
account/addpubkey account/webaddpubkey \
account/addsfskey account/webaddsfskey \
......
......@@ -3132,6 +3132,7 @@ sub TBRobotLabExpt($$)
"virt_node_desires",
"virt_node_startloc",
"virt_simnode_attributes",
"virt_user_environment",
# vis_nodes is locked during update in prerender, so we
# will get a consistent dataset when we backup.
"vis_nodes",
......
......@@ -107,6 +107,10 @@ my %virtual_tables =
tag => "programs",
row => "program",
attrs => [ "vname", "vnode" ]},
"virt_user_environment" => { rows => undef,
tag => "user_environments",
row => "user_environment",
attrs => [ "name", "value" ]},
"nseconfigs" => { rows => undef,
tag => "nseconfigs",
row => "nseconfig",
......
......@@ -531,7 +531,7 @@ event_notification_alloc(event_handle_t handle, address_tuple_t tuple)
TRACE("allocating notification (tuple=%p)\n", tuple);
notification = xmalloc(sizeof(event_notification_t));
notification = xmalloc(sizeof(struct event_notification));
elvin_notification = elvin_notification_alloc(handle->status);
if (elvin_notification == NULL) {
ERROR("elvin_notification_alloc failed: ");
......@@ -620,7 +620,7 @@ event_notification_clone(event_handle_t handle,
TRACE("cloning notification %p\n", notification);
clone = xmalloc(sizeof(event_notification_t));
clone = xmalloc(sizeof(struct event_notification));
if (! (clone->elvin_notification =
elvin_notification_clone(notification->elvin_notification,
handle->status))) {
......
......@@ -22,8 +22,9 @@ CFLAGS += -DDEBUG
CFLAGS += -O -g -Wall
CFLAGS += -I. -I${OBJDIR} -I$(SRCDIR)/../lib -I$(TESTBED_SRCDIR)/lib/libtb
CFLAGS += `$(ELVIN_CONFIG) --cflags vin4c`
CFLAGS += -DCLIENT_BINDIR='"$(CLIENT_BINDIR)"'
LDFLAGS += -L../lib -L${OBJDIR}/lib/libtb
LDFLAGS += -g -L../lib -L${OBJDIR}/lib/libtb
LIBS += -levent -ltb -lcrypto
LIBS += `$(ELVIN_CONFIG) --libs vin4c`
......@@ -40,16 +41,24 @@ LIBS += -ldl
endif
endif
program-agent-debug: program-agent.o
$(CC) $(LDFLAGS) -o $@ program-agent.o $(LIBS)
version.c: program-agent.c
echo >$@ "char build_info[] = \"Built on `date +%d-%b-%Y` by `id -nu`@`hostname | sed 's/\..*//'`:`pwd`\";"
program-agent-debug: program-agent.o version.o
$(CC) $(LDFLAGS) -o $@ program-agent.o version.o $(LIBS)
$(PROGRAMS): ../lib/libevent.a ../lib/event.h
install:
-mkdir -p $(INSTALL_DIR)/opsdir/man/man8
$(INSTALL) -m 0644 $(SRCDIR)/program-agent.8 \
$(INSTALL_DIR)/opsdir/man/man8/program-agent.8
client: $(PROGRAMS)
client-install: client
$(INSTALL_PROGRAM) program-agent$(EXE) $(DESTDIR)$(CLIENT_BINDIR)/program-agent$(EXE)
chmod u+s-w $(DESTDIR)$(CLIENT_BINDIR)/program-agent
$(INSTALL) -m 644 $(SRCDIR)/program-agent.8 $(DESTDIR)$(CLIENT_MANDIR)/man8/program-agent.8
clean:
/bin/rm -f *.o $(PROGRAMS)
.\"
.\" EMULAB-COPYRIGHT
.\" Copyright (c) 2004, 2005 University of Utah and the Flux Group.
.\" All rights reserved.
.\"
.TH PROGRAM-AGENT 8 "November 6, 2004" "Emulab" "Emulab Commands Manual"
.OS
.SH NAME
program-agent \- Event-based program executor/manager.
.SH SYNOPSIS
.BI program-agent
[\fB-hVd\fR]
[\fB-s \fIserver\fR]
[\fB-p \fIport\fR]
[\fB-l \fIlogfile\fR]
[\fB-k \fIkeyfile\fR]
[\fB-u \fIuser-name\fR]
[\fB-i \fIpidfile\fR]
[\fB-e \fIpid/eid\fR]
[\fB-c \fIconfigfile\fR]
.SH DESCRIPTION
The
.B program-agent
daemon runs on the Emulab experimental nodes and executes programs as directed
by the event system. The daemon is automatically started when the node boots
or can be started manually by running:
.P
.RS
$ sudo /usr/local/etc/emulab/rc/rc.progagent boot
.RE
.P
The programs managed by the daemon are specified in the NS file. For example,
the following NS fragment creates an agent named "webserver" for the "server"
node:
.P
.RS
.PD 0
set server [$ns node]
.P
set webserver [$server program-agent]
.RE
.PD
.P
The set of agents for a particular node are then aggregated within a
.B program-agent
daemon on a node. Events sent to the daemon are then dispatched internally to
the appropriate agent instance.
.P
Available options:
.P
.TP
.B -h
Print out a usage message.
.TP
.B -V
Print out version information and exit.
.TP
.B -d
Turn on debugging mode. The server will not daemonize and output will go to
standard out or to a log file, if one is specified.
.TP
.B -s \fIserver\fR
The host name where the elvin event server is located. (Default: localhost)
.TP
.B -p \fIport\fR
The port where the elvin event server is listening.
.TP
.B -l \fIlogfile\fR
The log file name, defaults to syslog(3), or standard out if in debug mode.
(Default: /var/emulab/logs/program-agent.debug)
.TP
.B -k \fIkeyfile\fR
The private key file used to sign/verify events. (Default:
/proj/\fIpid\fR/exp/\fIeid\fR/tbdata/eventkey)
.TP
.B -u \fIuser-name\fR
The name of the user the program should switch to after execution.
.TP
.B -i \fIpidfile\fR
The name of the file to write the process ID to. (Default:
/var/run/progagent.pid)
.TP
.B -e \fIpid\fR/\fIeid\fR
The project and experiment identifiers that determine what events the agent
should listen for.
.TP
.B -c \fIconfigfile\fR
The configuration file that specifies the programs to manage and their initial
commands and directory settings. See the
.B CONFIGURATION FILE FORMAT
section for more information about the file format. (Default:
/var/emulab/boot/progagents)
.SH FILES
.TP
/local/logs/\fIagent\fR.\fItoken\fR.(out|err)
The standard out and standard error for a particular invocation of a program.
The
.I token
value is a unique ID attached to every event sent by the
.B event-sched
scheduler so you can distinguish multiple invocations of the program.
.TP
/local/logs/\fIagent\fR.(out|err)
Symbolic links to the standard out and error of the last invocation of a
particular program agent. These links make it easy to monitor the output of a
particular program, even through multiple invocations (using "tail -F").
.TP
/local/logs/\fIagent\fR.\fItoken\fR.status
Exit status information for a particular invocation of a program. The file
contains the following details about the invocation.
.RS
.TP
.B NAME
The name of the program agent.
.TP
.B DIR
The directory where the program was started.
.TP
.B TIMEOUT
The timeout, in seconds, for this invocation. A value of zero means there was
no timeout.
.TP
.B TIMEOUT_FIRED
An indicator of whether or not the timeout fired and caused the program to
exit.
.TP
.B COMMAND
The command line that was executed.
.TP
.B TOKEN
The unique identifier for this invocation.
.TP
.B START_TIME_SECS
The time that the program was started, in seconds.
.TP
.B START_TIME
The time that the program was started, in human-readable form as returned by
.B ctime(3)\fR.
.TP
.B END_TIME_SECS
The time that the program finished, in seconds.
.TP
.B END_TIME
The time that the program finished, in human-readable form as returned by
.B ctime(3)\fR.
.TP
.B EXIT_CODE
The command's exit code.
.TP
.B EXPECTED_EXIT_CODE
The expected exit code, as specified by the user. If this value is the same as
EXIT_CODE, then the invocation was considered a success.
.TP
.B CPU_TIME
The amount of CPU time consumed by the command, in
.I secs\fR.\fIusecs
form.
.TP
.B MAXRSS
The maximum resident set size, in kilobytes.
.RE
.TP
/local/logs/\fIagent\fR.status
Symbolic link to the status information for the last invocation of a program.
.TP
/var/emulab/logs/program-agent.debug
The program-agent daemon's log file.
.TP
/var/run/progagent.pid
The default location for the
.B program-agent\fR's
process ID file.
.SH COMMAND ENVIRONMENT
The command will be executed with the following environment variables set:
.TP
.B PATH
The default path for binaries is set to the standard path (e.g. /usr/bin, /bin,
/usr/sbin, /sbin), the binary directories in /usr/local, and the directory
containing Emulab specific binaries.
.TP
.B EXPDIR
The experiment's directory in NFS space (e.g. /proj/foo/exp/bar).
.TP
.B LOGDIR
The preferred directory for log files.
.TP
.B USER
The name of the user that swapped in this experiment.
.TP
.B HOME
The path to the user's home directory.
.TP
.B GROUP
The name of the unix group for the user that swapped in this experiment.
.TP
.B PID
The project ID for the experiment this agent is running within.
.TP
.B EID
The experiment ID for the experiment this agent is running within.
.TP
.B NODE
The name of the node this program agent is running on. Note that this is the
short name and
.I not
the fully qualified host name that would refer to the control interface.
.TP
.B NODEIP
The IP address of the experiment network interface that the node name maps to.
For nodes with no experimental interfaces, this variable will not be set.
.TP
set opt(\f(BIVAR\fR) \f(BIvalues\fR
Any entries in the "opt" array of the NS file will automatically be added to
the environment. For example, to set a variable named "DURATION" with a value
of "100", you would add "set opt(DURATION) 100" to the top of your NS file.
.SH CONFIGURATION FILE FORMAT
The configuration file format is line-oriented with each line being a series of
key-value pairs.
.TP
.B UID
The user name to switch to before the daemon begins to process events. The
.B -u
option will override this value. There are no other attributes on this line.
.TP
.B AGENT
An individual program agent, possible attributes include COMMAND, TIMEOUT,
EXPECTED_EXIT_CODE, and DIR.
.SH EVENTS
.TP
.B TIME START
This event is sent by
.B event-sched
shortly after it finishes starting to indicate the start of virtual time for
the experiment. The event will cause any running commands to be stopped, the
state for the agents will be reverted to their initial state as specified by
the configuration file, and the log files in "/local/logs" will be deleted.
.TP
.B PROGRAM START
Starts the program by running the command-line in the specified directory and
capturing its standard output and error. The agent will then switch into
"management" mode and only accept
.I STOP
and
.i KILL
events until the command terminates. The arguments to this event are:
.RS
.TP
.I COMMAND
Specifies the command-line to run. Defaults to the last command that was run
or the command specified in the NS file.
.TP
.I DIR
Specifies the directory to run the command within. Defaults to the last
directory that was specified, the directory in the NS file, or "/tmp".
.TP
.I TIMEOUT
Specifies the timeout, in seconds, for the command or zero for no timeout. If
the command does not complete before the timeout, it will be stopped
forcefully. Defaults to the last timeout used for this agent or no timeout.
.TP
.I EXPECTED_EXIT_CODE
The expected exit code for the command, this value is compared against the
actual exit code to determine whether or not it completed successfully.
Defaults to the last value used or zero.
.RE
.TP
.B PROGRAM STOP
Stops the program, if it is currently running, by sending a SIGTERM to the
process group.
.TP
.B PROGRAM KILL
Signals the program with the signal name given as the "SIGNAL" argument. For
example, to send a SIGHUP to the process you would use "SIGNAL=SIGHUP".
.TP
.B PROGRAM COMPLETE
This event is sent
.I from
the agent when a command has terminated and includes an indicator of success or
failure based on the EXPECTED_EXIT_CODE and the real exit code. The event will
eventually be received by the event-sched(8) daemon which will determine what
event to send next.
.SH SIGNALS
.TP
SIGINFO
Dumps the list of programs managed by the daemon and some of their attributes.
.SH PROCESS PRIORITY
The
.B program-agent
runs at the maximum real-time priority so it has a good chance of receiving and
processing events in case one of the processes it manages is hogging the CPU.
.SH SEE ALSO
tevc(1), csh(1), event-sched(8)
.SH AUTHOR
The Emulab project at the University of Utah.
.SH NOTES
The Emulab project can be found on the web at
.IR http://www.emulab.net
This diff is collapsed.
......@@ -25,6 +25,7 @@
static int debug = 0;
static event_handle_t localhandle;
static event_handle_t bosshandle;
void
usage(char *progname)
......@@ -37,10 +38,13 @@ static void
callback(event_handle_t handle,
event_notification_t notification, void *data);
static void
sched_callback(event_handle_t handle,
event_notification_t notification, void *data);
int
main(int argc, char **argv)
{
event_handle_t bosshandle;
address_tuple_t tuple;
char *progname;
char *server = NULL;
......@@ -156,6 +160,13 @@ main(int argc, char **argv)
fatal("could not subscribe to events on remote server");
}
tuple->host = ADDRESSTUPLE_ALL;
tuple->scheduler = 1;
if (! event_subscribe(localhandle, sched_callback, tuple, NULL)) {
fatal("could not subscribe to events on remote server");
}
/*
* Stash the pid away.
*/
......@@ -214,3 +225,12 @@ callback(event_handle_t handle, event_notification_t notification, void *data)
if (! event_notify(localhandle, notification))
error("Failed to deliver notification!");
}
static void
sched_callback(event_handle_t handle,
event_notification_t notification,
void *data)
{
if (! event_notify(bosshandle, notification))
error("Failed to deliver scheduled notification!");
}
/*
* EMULAB-COPYRIGHT
* Copyright (c) 2004 University of Utah and the Flux Group.
* Copyright (c) 2004, 2005 University of Utah and the Flux Group.
* All rights reserved.
*/
......@@ -105,9 +105,9 @@ static int dump_agent_status(error_record_t er, FILE *out)
{
/*
* The format of the path for the status file:
* logs/<node>/usr/testbed/logs/<agent>.<token>.status
* logs/<node>/<LOGDIR>/<agent>.<token>.status
*/
static char *file_format = "logs/%s/usr/testbed/logs/%s.%lu.status";
static char *file_format = "logs/%s" LOGDIR "/%s.%lu.status";
/*
* A map of status file 'keys' that people may be interested in and
......@@ -252,8 +252,8 @@ int dump_error_record(error_record_t er, FILE *out)
* be sent back to the user.
*/
static char *filename_formats[] = {
"logs/%s/usr/testbed/logs/%s.%lu.out",
"logs/%s/usr/testbed/logs/%s.%lu.err",
"logs/%s" LOGDIR "/%s.%lu.out",
"logs/%s" LOGDIR "/%s.%lu.err",
NULL
};
......
/*
* EMULAB-COPYRIGHT
* Copyright (c) 2004 University of Utah and the Flux Group.
* Copyright (c) 2004, 2005 University of Utah and the Flux Group.
* All rights reserved.
*/
......@@ -14,6 +14,8 @@
using namespace emulab;
#define REBOOT_TIMEOUT (6 * 60)
/**
* A "looper" function for node agents that dequeues and processes events for
* a particular node. This function will be passed to pthread_create when a
......@@ -240,6 +242,65 @@ static int do_reboot(node_agent_t na, char *nodeids)
return retval;
}
static int do_snapshot(node_agent_t na, char *nodeids, char *args)
{
event_handle_t handle;
EmulabResponse er;
char *image_name;
int rc, retval;
assert(na != NULL);
assert(node_agent_invariant(na));
assert(nodeids != NULL);
assert(strlen(nodeids) > 0);
assert(args != NULL);
handle = na->na_local_agent.la_handle;
/*
* Get any logs off the node(s) before we destroy them with the disk
* reload, then
*/
if (systemf("loghole --port=%d --quiet sync %s",
DEFAULT_RPC_PORT,
nodeids) != 0) {
warning("failed to sync log hole for node %s\n", nodeids);
}
/* ... reload the default image, or */
if ((rc = event_arg_get(args, "IMAGE", &image_name)) < 0) {
warning("no image name given: %s\n", nodeids);
}
/* ... a user-specified image. */
else {
image_name[rc] = '\0';
if ((retval = RPC_invoke("node.create_image",
&er,
SPA_String, "node", nodeids,
SPA_String, "imageproj", pid,
SPA_String, "imagename", image_name,
SPA_Boolean, "wait", true,
SPA_Boolean, "bootwait", true,
SPA_TAG_DONE)) != 0) {
warning("could not snapshot: %s\n", nodeids);
}
/* XXX Kinda hacky way to wait for the node to come up. */
else if ((retval =
RPC_invoke("node.statewait",
&er,
SPA_String, "node", nodeids,
SPA_Integer, "timeout", REBOOT_TIMEOUT,
SPA_String, "state", "ISUP",
SPA_TAG_DONE)) != 0) {
warning("timeout waiting for node: %s\n", nodeids);
}
}
/* XXX dump output to a file. */
return retval;
}
static void *node_agent_looper(void *arg)
{
node_agent_t na = (node_agent_t)arg;
......@@ -288,6 +349,10 @@ static void *node_agent_looper(void *arg)
else if (strcmp(evtype, TBDB_EVENTTYPE_RELOAD) == 0) {
rc = reload_with(na, nodeids, argsbuf);
}
else if (strcmp(evtype,
TBDB_EVENTTYPE_SNAPSHOT) == 0) {
rc = do_snapshot(na, nodeids, argsbuf);
}
else if (strcmp(evtype, TBDB_EVENTTYPE_SETDEST) == 0) {
event_notify(handle, en);
se.length = 0;
......
/*
* EMULAB-COPYRIGHT
* Copyright (c) 2004 University of Utah and the Flux Group.
* Copyright (c) 2004, 2005 University of Utah and the Flux Group.
* All rights reserved.
*/
......@@ -19,6 +19,8 @@
#include "rpc.h"
#include "simulator-agent.h"
using namespace emulab;
/**
* A "looper" function for the simulator agent that dequeues and processes
* events destined for the Simulator object. This function will be passed to
......@@ -149,13 +151,106 @@ static int add_report_data(simulator_agent_t sa,
return retval;
}
static int send_report(simulator_agent_t sa)
static int remap_experiment(simulator_agent_t sa, int token)
{
char nsfile[BUFSIZ];
EmulabResponse er;
int retval;
rename("tbdata/feedback_data.tcl",
"tbdata/feedback_data_old.tcl");
snprintf(nsfile, sizeof(nsfile),
"/proj/%s/exp/%s/tbdata/%s-modify.ns",
pid, eid, eid);
if (access(nsfile, R_OK) == -1) {
snprintf(nsfile, sizeof(nsfile),
"/proj/%s/exp/%s/tbdata/%s.ns",
pid, eid, eid);
}
RPC_grab();
retval = RPC_invoke("experiment.modify",
&er,
SPA_String, "proj", pid,
SPA_String, "exp", eid,
SPA_Boolean, "wait", true,
SPA_Boolean, "reboot", true,
SPA_Boolean, "restart_eventsys", true,
SPA_String, "nsfilepath", nsfile,
SPA_TAG_DONE);
RPC_drop();
if (retval != 0) {
rename("tbdata/feedback_data.tcl",
"tbdata/feedback_data_failed.tcl");
rename("tbdata/feedback_data_old.tcl",
"tbdata/feedback_data.tcl");
}
return retval;
}
static int do_modify(simulator_agent_t sa, int token, char *args)
{
int rc, retval = 0;
char *mode;
assert(sa != NULL);
assert(args != NULL);
if ((rc = event_arg_get(args, "MODE", &mode)) <= 0) {
error("no mode specified\n");
}
else if (strncasecmp("stabilize", mode, rc) == 0) {
if (systemf("loghole --port=%d --quiet sync",
DEFAULT_RPC_PORT) != 0) {
error("failed to sync log holes\n");
}
else if (systemf("feedbacklogs %s %s", pid, eid) != 0) {
if (sa->sa_flags & SAF_STABLE) {
/* XXX log error */
warning("unstabilized!\n");
}
else {
retval = remap_experiment(sa, token);
}
}
else {
info("stabilized\n");
sa->sa_flags |= SAF_STABLE;
}
}
else {
warning("unknown mode %s\n", mode);
}
return retval;
}
static void dump_report_data(FILE *file,
simulator_agent_t sa,
sa_report_data_kind_t srdk)
{
assert(file != NULL);
assert(sa != NULL);
assert(srdk >= 0);
assert(srdk < SA_RDK_MAX);
if ((sa->sa_report_data[srdk] != NULL) &&
(strlen(sa->sa_report_data[srdk]) > 0)) {
fprintf(file, "\n%s\n", sa->sa_report_data[srdk]);
free(sa->sa_report_data[srdk]);
sa->sa_report_data[srdk] = NULL;
}
}
static int send_report(simulator_agent_t sa, char *args)
{
struct lnList error_records;
int retval;
FILE *file;
assert(sa != NULL);
assert(args != NULL);
/*
* Atomically move the error records from the agent object onto our
......@@ -186,21 +281,59 @@ static int send_report(simulator_agent_t sa)
retval = -1;
}
else {
int lpc;
char *digester;
int rc, lpc;
FILE *dfile;
retval = 0;
/* Dump user supplied stuff first then */
for (lpc = 0; lpc < SA_RDK_MAX; lpc++) {
if ((sa->sa_report_data[lpc] != NULL) &&
(strlen(sa->sa_report_data[lpc]) > 0)) {
/* Dump user supplied stuff first, */
dump_report_data(file, sa, SA_RDK_MESSAGE);
/* ... run the user-specified log digester, then */
if ((rc = event_arg_get(args, "DIGESTER", &digester)) > 0) {
digester[rc] = '\0';
if ((dfile = popenf("%s | tee logs/digest.out",
"r",
digester)) == NULL) {
fprintf(file,
"\n%s\n",
sa->sa_report_data[lpc]);
free(sa->sa_report_data[lpc]);
sa->sa_report_data[lpc] = NULL;
"[failed to run digester %s]\n",
digester);
}
else {
char buf[BUFSIZ];
while ((rc = fread(buf,
1,
sizeof(buf),
dfile)) > 0) {
fwrite(buf, 1, rc, file);
}
pclose(dfile);
dfile =