Commit 41d4c6d4 authored by Timothy Stack's avatar Timothy Stack

Event system documentation and tweaks:

	* event/sched/event-sched.c: The snapshot event got left out of
	the list of events that send back a complete event.  If a top
	level sequence stops with an error, automatically send a report.

	* event/sched/group-agent.c, event/sched/timeline-agent.c: When
	handling a complete event, return true if the event was consumed.

	* event/sched/node-agent.cc: Clean the "/local/logs" loghole on
	the node before taking a snapshot of the disk image.

	* event/sched/simulator-agent.cc, event/sched/simulator-agent.h:
	Made send_report non-static so it can be called when a sequence
	errors out.  Actually check the clear flag when dumping report
	data.

	* lib/libtb/tbdefs.c: Add a comment about updating the event
	scheduler when a new object type is added.

	* tbsetup/ns2ir/sim.tcl.in: Check the OSIDs passed to the "reload"
	and "snapshot-to" events.

	* utils/loghole.1: Mention early and often the directories that
	gets sync'd.  Document the 'change' action and new options for
	'clean'.

	* utils/loghole.in: Use rsync's '--include-from' option so we
	don't have to run rsync separately for each directory.  Add a
	'change' action that lets people mess with the metadata of an
	existing archive.  Make the 'clean' action also remove log files
	on the nodes.  Only rsync node's whose OS's support ssh.  Grab
	logs from delay nodes.

	* www/tutorial/advanced.html: Update the program agent examples to
	use the new form.

	* www/tutorial/eventsystem.html: First cut at an event system
	reference manual that includes sequences, timelines, and other
	new features.

	* www/tutorial/loghole.html: Just a pointer to the man page on ops
	for now.

	* www/tutorial/nscommands.html: Add a section about capturing
	parameters from the 'opt' array.

	* xmlrpc/emulabserver.py.in: Add an osid.info() method that
	returns information about a single OSID.
parent 88f6efeb
......@@ -526,9 +526,14 @@ int sends_complete(struct agent *agent, const char *evtype)
TBDB_EVENTTYPE_REBOOT,
TBDB_EVENTTYPE_RELOAD,
TBDB_EVENTTYPE_SETDEST,
TBDB_EVENTTYPE_SNAPSHOT,
NULL
};
/*
* Map of object types to events that send back COMPLETEs. If no
* events send a COMPLETE, just use a NULL for the evtypes slot.
*/
static struct {
char *objtype;
char **evtypes;
......@@ -547,6 +552,7 @@ int sends_complete(struct agent *agent, const char *evtype)
{ TBDB_OBJECTTYPE_SEQUENCE, run_completes },
{ TBDB_OBJECTTYPE_CONSOLE, NULL },
{ TBDB_OBJECTTYPE_TOPOGRAPHY, NULL },
{ TBDB_OBJECTTYPE_LINKTRACE, NULL },
{ NULL, NULL }
};
......@@ -1353,7 +1359,7 @@ handle_completeevent(event_handle_t handle, sched_event_t *eventp)
char *value, argsbuf[BUFSIZ] = "";
char objname[TBDB_FLEN_EVOBJNAME];
char objtype[TBDB_FLEN_EVOBJTYPE];
int rc, ctoken = ~0, agerror = 0;
int rc, ctoken = ~0, agerror = 0, handled = 0;
event_notification_get_objname(handle, eventp->notification,
objname, sizeof(objname));
......@@ -1397,23 +1403,32 @@ handle_completeevent(event_handle_t handle, sched_event_t *eventp)
}
}
sequence_agent_handle_complete(handle,
&sequences,
eventp->agent.s,
ctoken,
agerror);
handled = sequence_agent_handle_complete(handle,
&sequences,
eventp->agent.s,
ctoken,
agerror);
group_agent_handle_complete(handle,
&groups,
eventp->agent.s,
ctoken,
agerror);
handled += group_agent_handle_complete(handle,
&groups,
eventp->agent.s,
ctoken,
agerror);
if ((strcmp(objtype, TBDB_OBJECTTYPE_TIMELINE) == 0) ||
(strcmp(objtype, TBDB_OBJECTTYPE_SEQUENCE) == 0)) {
RPC_grab();
RPC_notifystart(pid, eid, objname, 0);
RPC_drop();
}
if ((primary_simulator_agent != NULL) &&
(agerror != 0) &&
((strcmp(objtype, TBDB_OBJECTTYPE_SEQUENCE) == 0) ||
(strcmp(objtype, TBDB_OBJECTTYPE_GROUP) == 0)) &&
!handled) {
send_report(primary_simulator_agent, "");
}
return 1;
......
......@@ -183,6 +183,8 @@ int group_agent_handle_complete(event_handle_t handle,
ga->ga_token = ~0;
ga->ga_remaining = -1;
retval += 1;
}
else {
#if 0
......
......@@ -263,6 +263,11 @@ static int do_snapshot(node_agent_t na, char *nodeids, char *args)
nodeids) != 0) {
warning("failed to sync log hole for node %s\n", nodeids);
}
else if (systemf("loghole --port=%d --quiet clean -fn %s",
DEFAULT_RPC_PORT,
nodeids) != 0) {
warning("failed to clean log hole on node(s): %s\n", nodeids);
}
if ((rc = event_arg_get(args, "IMAGE", &image_name)) < 0) {
warning("no image name given: %s\n", nodeids);
......
......@@ -36,30 +36,6 @@ using namespace emulab;
*/
static void *simulator_agent_looper(void *arg);
/**
* Sends a summary report to the user via e-mail and also sort of marks the end
* of a run of the experiment. The content of the report is partially
* generated by the user with the rest being automatically generated by the
* testbed. First, the function will sync the logholes so any data needed to
* automatically generate parts of the report are readily available. Then, the
* body of the mail is constructed by appending any user provided messages and
* log data with the digested error records. Ideally, the user provided
* messages should provide a human readable summary of the success/failure of
* their experiment. Any log data from the user should report any performance
* metrics, warnings, or any other salient data. Finally, the function will
* iterate through the list of error records and append any available log files
* or messages paired with those records.
*
* After sending the mail, the simulator object will be reset to a pristine
* state so another experimental run can begin with a clean slate.
*
* @param sa The simulator agent object to summarize.
* @return Zero on success, -1 otherwise.
*
* @see dump_error_records
*/
static int send_report(simulator_agent_t sa);
simulator_agent_t create_simulator_agent(void)
{
simulator_agent_t sa, retval;
......@@ -226,12 +202,14 @@ static void dump_report_data(FILE *file,
if ((sa->sa_report_data[srdk] != NULL) &&
(strlen(sa->sa_report_data[srdk]) > 0)) {
fprintf(file, "\n%s\n", sa->sa_report_data[srdk]);
free(sa->sa_report_data[srdk]);
sa->sa_report_data[srdk] = NULL;
if (clear) {
free(sa->sa_report_data[srdk]);
sa->sa_report_data[srdk] = NULL;
}
}
}
static int send_report(simulator_agent_t sa, char *args)
int send_report(simulator_agent_t sa, char *args)
{
struct lnList error_records;
char loghole_name[BUFSIZ];
......@@ -336,7 +314,7 @@ static int send_report(simulator_agent_t sa, char *args)
if (loghole_name[len - 1] == '\n')
loghole_name[len - 1] = '\0';
}
if (systemf("mail -s \"%s: %s/%s experiment report\" %s "
"< logs/report.mail",
OURDOMAIN,
......
......@@ -107,6 +107,31 @@ int add_report_data(simulator_agent_t sa,
char *data,
unsigned long flags);
/**
* Sends a summary report to the user via e-mail and also sort of marks the end
* of a run of the experiment. The content of the report is partially
* generated by the user with the rest being automatically generated by the
* testbed. First, the function will sync the logholes so any data needed to
* automatically generate parts of the report are readily available. Then, the
* body of the mail is constructed by appending any user provided messages and
* log data with the digested error records. Ideally, the user provided
* messages should provide a human readable summary of the success/failure of
* their experiment. Any log data from the user should report any performance
* metrics, warnings, or any other salient data. Finally, the function will
* iterate through the list of error records and append any available log files
* or messages paired with those records.
*
* After sending the mail, the simulator object will be reset to a pristine
* state so another experimental run can begin with a clean slate.
*
* @param sa The simulator agent object to summarize.
* @param args The event arguments.
* @return Zero on success, -1 otherwise.
*
* @see dump_error_records
*/
int send_report(simulator_agent_t sa, char *args);
#ifdef __cplusplus
}
#endif
......
......@@ -252,6 +252,8 @@ int sequence_agent_handle_complete(event_handle_t handle,
#endif
sa->ta_current_event += 1;
sequence_agent_enqueue_next(sa);
retval += 1;
}
else {
event_do(handle,
......@@ -263,6 +265,8 @@ int sequence_agent_handle_complete(event_handle_t handle,
EA_ArgInteger, "CTOKEN", sa->ta_token,
EA_TAG_DONE);
sa->ta_current_event = -1;
retval += 1;
}
sa = (sequence_agent_t)sa->ta_local_agent.la_link.ln_Succ;
}
......
......@@ -30,6 +30,10 @@ char *tbdb_objecttypes[] = {
TBDB_OBJECTTYPE_CONSOLE,
TBDB_OBJECTTYPE_TOPOGRAPHY,
TBDB_OBJECTTYPE_LINKTRACE,
/*
* NOTE: Add the object type and any events that send back COMPLETEs to
* the objtype2complete array in event-sched.c:sends_complete().
*/
0,
};
......
......@@ -1035,6 +1035,7 @@ Simulator instproc add_topography {tg} {
Simulator instproc make_event {outer event} {
var_import ::GLOBALS::simulated
var_import ::TBCOMPAT::osids
var_import ::TBCOMPAT::hwtype_class
set obj [lindex $event 0]
......@@ -1047,7 +1048,7 @@ Simulator instproc make_event {outer event} {
set args {}
set atstring ""
if {$obj == "#"} {
if {[string index $obj 0] == "#"} {
return {}
}
......@@ -1225,6 +1226,12 @@ Simulator instproc make_event {outer event} {
return
}
set image [lindex $evargs 0]
if {! ${GLOBALS::anonymous} && ! ${GLOBALS::passmode}} {
if {![info exists osids($image)]} {
perror "Unknown image in snapshot-to event: $image"
return
}
}
set args "IMAGE=${image}"
}
"reload" {
......@@ -1233,6 +1240,12 @@ Simulator instproc make_event {outer event} {
-image {}
}
if {$(-image) != {}} {
if {! ${GLOBALS::anonymous} &&
! ${GLOBALS::passmode} &&
! [info exists osids($image)]} {
perror "Unknown image in reload event: $(-image)"
return
}
set args "IMAGE=$(-image)"
}
}
......@@ -1331,7 +1344,8 @@ Simulator instproc make_event {outer event} {
switch -- $cmd {
"set" -
"run" {
"run" -
"start" {
switch -- $cmd {
"set" {
set etype MODIFY
......@@ -1339,6 +1353,9 @@ Simulator instproc make_event {outer event} {
"run" {
set etype RUN
}
"start" {
set etype START
}
}
if {[$obj info class] == "EventGroup"} {
set default_command {}
......@@ -1374,12 +1391,6 @@ Simulator instproc make_event {outer event} {
set args "${args}COMMAND=$(-command)"
}
}
"start" {
set etype START
if {[$obj info class] != "EventGroup"} {
set args "COMMAND=[$obj set command]"
}
}
"stop" {
set etype STOP
}
......
......@@ -3,7 +3,7 @@
.\" Copyright (c) 2004, 2005 University of Utah and the Flux Group.
.\" All rights reserved.
.\"
.TH LOGHOLE 1 "November 11, 2004" "Emulab" "Emulab Commands Manual"
.TH LOGHOLE 1 "June 16, 2005" "Emulab" "Emulab Commands Manual"
.OS
.SH NAME
loghole \- Log management tool for experiments.
......@@ -29,6 +29,14 @@ loghole \- Log management tool for experiments.
[\fIarchive-name\fR]
.P
.BI loghole
.BI change
[\fB-k \fR(\fBi-delete\fR|\fBspace-is-needed\fR)]
[\fB-a \fIdays\fR]
[\fB-c \fIcomment\fR]
.I archive-name1
[\fIarchive-name2 ...\fR]
.P
.BI loghole
.BI list
[\fB-O1!Xo\fR]
[\fB-m \fIatmost\fR]
......@@ -40,6 +48,8 @@ loghole \- Log management tool for experiments.
.P
.BI loghole
.BI clean
[\fB-fne\fR]
[\fInode1 node2 ...\fR]
.P
.BI loghole
.BI gc
......@@ -49,25 +59,26 @@ loghole \- Log management tool for experiments.
.SH DESCRIPTION
The
.B loghole
utility is used to download log files from the experimental nodes to the Emulab
users machine and assist in managing archives of these logs. Using this
utility downloads log files from certain directories on the experimental nodes
(e.g. "/local/logs") to the Emulab users machine. After downloading, it can
also be used to produce and manage archives of the log files. Using this
utility to manage an experiment's log files is encouraged because it will
transfer the logs in a network-friendly manner and is already integrated with
the rest of the system. For example, any programs executed using the Emulab
event-system will have their standard output/error automatically gathered up by
this tool. The tool can also be used to preserve multiple runs of an
experiment by producing and managing zip archives of the logs.
the rest of Emulab. For example, any programs executed using the Emulab
event-system will have their standard output/error automatically placed in the
"/local/logs" directory. The tool can also be used to preserve multiple trials
of an experiment by producing and managing zip archives of the logs.
.P
The set of logs that are actually downloaded by the tool are those located in
.I logholes
on the nodes, where a loghole is simply a well-known directory that acts like a
blackhole for log files. Any files found in these directories are downloaded
to the experiment's log directory and placed under separate directories for
each node and loghole. The referent of symbolic links are also downloaded, so
if you do not want an entire directory downloaded, you can create links in a
loghole to those files of interest.
to the experiment's log directory (i.e. "/proj/<pid>/exp/<eid>/logs") and
placed under separate directories for each node and loghole. The referent of
symbolic links are also downloaded, so if you do not want an entire directory
downloaded, you can create links in a loghole to those files of interest.
.P
To perform all of these tasks, the
To perform its various tasks, the
.B loghole
utility is broken up into several sub-actions that you can apply to an
experiment's log holes or log archives. As a quick example, to synchronize the
......@@ -92,6 +103,9 @@ directory.
.B archive
Archive the contents of the experiment's log directory into a zip file.
.TP
.B change
Change the metadata of an existing archive.
.TP
.B list
Print a brief listing of the archives in the experiment's log directory.
.TP
......@@ -99,7 +113,8 @@ Print a brief listing of the archives in the experiment's log directory.
Print a detailed listing of the archives in the experiment's log directory.
.TP
.B clean
Clean out the experiment log directory by removing any subdirectories.
Clean out the experiment log directory by removing any subdirectories and/or
clean the log directories on the nodes.
.TP
.B gc
Garbage collect old archives to free up disk space.
......@@ -223,6 +238,15 @@ dash (\fB-\fR) the comment will be read from standard in.
\fB-d\fR, \fB--delete
Delete the files in the log directory after producing the archive. The default
action is to leave the files in place until the user is ready to delete them.
.SH CHANGE
The
.B change
action is used to change the metadata of an existing archive. For example, if
after analyzing the log files, you decide that they represent "good" data, you
can add a comment stating that fact and mark the archive as not garbage
collectable. The action takes the same set of options as the
.B archive
action.
.SH LIST
The
.B list
......@@ -275,6 +299,25 @@ Optional arguments:
The full or partial name of the archive to display. If a partial name is
given, any archive names that start with the argument are displayed. The
default behavior is to display all of the archives in an experiment.
.SH CLEAN
The
.B clean
action is used to clean out log files from the experiment's log directory and
the log directories on the nodes. The default action is to ask confirmation
and then clean out all of the log files.
.P
Available
.B clean
options:
.TP
\fB-f\fR, \fB--force
Do not prompt for confirmation.
.TP
\fB-n\fR, \fB--nodes
Only remove log files on the nodes.
.TP
\fB-e\fR, \fB--experiment
Only remove log directories in the experiment's log directory.
.SH GC
The
.B gc
......@@ -315,12 +358,15 @@ experiment. (Default: 3.0 MB)
.SH ENVIRONMENT
By default, the project and experiment ID will be inferred from the current
working directory, if it is inside the experiment's directory
(e.g. /proj/\fIpid\fR/exp/\fIeid\fR). This behavior can be overridden using
(i.e. /proj/\fIpid\fR/exp/\fIeid\fR). This behavior can be overridden using
the
.B -e
option.
.SH RETURN VALUES
.TP
3
If rsync reports an error.
.TP
2
If there was an error processing the command line arguments.
.TP
......@@ -345,6 +391,15 @@ file:
.TP
/proj/\fIpid\fR/exp/\fIeid\fR/logs
The log directory for an experiment.
.TP
/local/logs
One of the log directories on experimental nodes that is automatically sync'd.
Users should place any logs/data they want transferred back in this directory.
.TP
/var/emulab/logs
Another log directory on experimental nodes that is automatically sync'd. This
directory usually holds logs generated by the Emulab software running on the
node.
.SH SEE ALSO
event-sched(8), tevc(1), zip(1), rsync(1)
.SH AUTHOR
......
This diff is collapsed.
<!--
EMULAB-COPYRIGHT
Copyright (c) 2000-2004 University of Utah and the Flux Group.
Copyright (c) 2000-2005 University of Utah and the Flux Group.
All rights reserved.
-->
<center>
......@@ -505,13 +505,9 @@ to be started and stopped with NS <tt>at</tt> statements. To define a
program object:
<code><pre>
set prog0 [new Program $ns]
$prog0 set node $nodeA
$prog0 set command "/bin/ls -lt >& /users/joe/logs/prog0"
set prog0 [$nodeA program-agent -command "/bin/ls -lt"]
set prog1 [new Program $ns]
$prog1 set node $nodeB
$prog1 set command "/bin/sleep 60 >& /tmp/sleep.debug"</pre></code>
set prog1 [$nodeB program-agent -command "/bin/sleep 60"]</pre></code>
Then in your NS file a set of static events to run these commands:
......@@ -525,13 +521,13 @@ If you want to schedule starts and stops using dynamic events:
<code><pre>
tevc -e testbed/myexp now prog0 start
tevc -e testbed/myexp now prog1 start
tevc -e testbed/myexp +20 prog1 stop</code></pre>
tevc -e testbed/myexp +20 prog1 stop</pre></code>
If you want to change the command that is run (override the command
you specified in your NS file), then:
<code><pre>
tevc -e testbed/myexp now prog0 start COMMAND='ls >/tmp/foo'</code></pre>
tevc -e testbed/myexp now prog0 start COMMAND='ls'</pre></code>
Some points worth mentioning:
......
This diff is collapsed.
<!--
EMULAB-COPYRIGHT
Copyright (c) 2000-2005 University of Utah and the Flux Group.
All rights reserved.
-->
<center>
<h1>Loghole - Emulab Log Management Utility</h1>
</center>
The <b>loghole</b> utility downloads log files from certain directories on the
experimental nodes (e.g. "/local/logs") to the Emulab users machine. After
downloading, it can also be used to produce and manage archives of the log
files. Using this utility to manage an experiment's log files is encouraged
because it will transfer the logs in a network-friendly manner and is already
integrated with the rest of Emulab. For example, any programs executed using
the Emulab event-system will have their standard output/error automatically
placed in the "/local/logs" directory. The tool can also be used to preserve
multiple trials of an experiment by producing and managing zip archives of the
logs.
You can learn more about the <b>loghole</b> utility by reading its man page on
<i>users.emulab.net</i>.
<!--
EMULAB-COPYRIGHT
Copyright (c) 2000-2004 University of Utah and the Flux Group.
Copyright (c) 2000-2005 University of Utah and the Flux Group.
All rights reserved.
-->
<center>
......@@ -12,6 +12,7 @@
<ul>
<li><a href="#INTRO">Introduction</a>
<li><a href="#TCL">TCL, NS, and node names</a>
<li><a href="#PARAMETERS">Captured NS file parameters</a>
<li><a href="#ORDER">Ordering issues</a>
<li><a href="#HARD">Hardware Commands</a>
<li><a href="#IP">IP Address Commands</a>
......@@ -118,6 +119,57 @@ tb-set-ip-link $node0 $link1 10.1.0.128
<hr>
<a name="PARAMETERS"></a><h3>Captured NS file parameters</h3>
<p>A common convention when writing NS files is to place any parameters in an
array named "opt" at the beginning of the file. For example:
<code><pre>
set opt(CLIENT_COUNT) 5
set opt(BW) 10mb; Link bandwidth
set opt(LAT) 10ms; Link latency
...
$ns duplex-link $server $router $opt(BW) $opt(LAT) DropTail
for {set i 0} {$i < $opt(CLIENT_COUNT)} {incr i} {
set nodes($i) [$ns node]
...
}
set serverprog [$server program-agent -command "starter.sh"]
</pre></code>
Normally, this convention is only used to help organize the parameters. In
Emulab, however, the contents of the "opt" array are captured and made
available to the emulated environment. For instance, the parameters are added
as environment variables to any commands run by <a
href="docwrapper.php3?docname=eventsystem.html#PROGRAM">program-agents</a>
(only available on recent FBSD410-STD and RHL90-STD images). So, in the above
example of NS code, the "starter.sh" script will be able to reference
parameters by name, like so:
<code><pre>
#! /bin/sh
echo "Testing with $CLIENT_COUNT clients."
...
</pre></code>
Note that the contents of the "opt" array are not ordered, so you should not
reference other parameters and expect the shell to expand them appropriately:
<code><pre>
set opt(prefix) "/foo/bar"
set opt(BINDIR) '$prefix/bin'; # BAD
set opt(prefix) "/foo/bar"
set opt(BINDIR) "$opt(prefix)/bin"; # Good
</pre></code>
<hr>
<a name="ORDER"></a><h3>Ordering Issues</h3>
<p>tb- commands have the same status as all other Tcl and NS commands.
......
......@@ -814,6 +814,42 @@ class osid:
return EmulabResponse(RESPONSE_SUCCESS,
value=result,
output=str(result))
def info(self, version, argdict):
# Check for valid arguments.
argerror = CheckRequiredArgs(argdict, ("osid",))
if (argerror):
return argerror
if not re.match("^[-\w]*$", argdict["osid"]):
return EmulabResponse(RESPONSE_BADARGS,
output="Improperly formed osid!")
# Get the listing that is accessible to this user and
res = DBQueryFatal("SELECT distinct "
"o.* FROM "
"os_info as o "
"left join group_membership as g on g.pid=o.pid "
"where g.uid=%s or o.shared=1 and "
"(o.osname=%s or o.osid=%s)",
(self.uid, argdict["osid"], argdict["osid"]),
True)
if len(res) == 0:
return EmulabResponse(RESPONSE_ERROR,
output="Unknown OS ID?")
osid = res[0]
osid["fullosid"] = osid["osid"]
osid["osid"] = osid["osname"]
if not osid["OS"] or len(osid["OS"]) == 0:
osid["OS"] = "(None)"
pass
return EmulabResponse(RESPONSE_SUCCESS,
value=scrubdict(osid),
output=str(osid))
pass
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment