Commit dbe06350 authored by Timothy Stack's avatar Timothy Stack

Some more error recording/reporting for node events in the scheduler.

	* error-record.c: Include node event related errors in the dump.

	* node-agent.h, node-agent.cc: Dump error output from node events
	into a file so it can be included in the automatically generated
	report.

	* simulator-agent.cc: Put the number of errors at the top of the
	generated report.
parent 71901cf5
...@@ -271,6 +271,27 @@ int dump_error_record(error_record_t er, FILE *out) ...@@ -271,6 +271,27 @@ int dump_error_record(error_record_t er, FILE *out)
retval = tail_file(path, out); retval = tail_file(path, out);
} }
} }
else if (strcmp(er->er_agent->objtype, TBDB_OBJECTTYPE_NODE) == 0) {
/**
* NULL-terminated array of log file name formats that should
* be sent back to the user.
*/
static char *filename_formats[] = {
"logs/%s/node-control.%lu",
NULL
};
/* Pass the logs through. */
for (lpc = 0; filename_formats[lpc] && (retval == 0); lpc++) {
snprintf(path,
sizeof(path),
filename_formats[lpc],
er->er_agent->vnode,
er->er_token);
retval = tail_file(path, out);
}
}
return retval; return retval;
} }
......
...@@ -61,7 +61,8 @@ static char *expand_nodeids(sched_event_t *se); ...@@ -61,7 +61,8 @@ static char *expand_nodeids(sched_event_t *se);
* *
* @see expand_nodeids * @see expand_nodeids
*/ */
static int reload_with(node_agent_t na, char *nodeids, char *args); static int reload_with(node_agent_t na, struct agent **agent_array, int aa_len,
int token, char *nodeids, char *args);
/** /**
* Handler for the REBOOT event of the node object. First, the function will * Handler for the REBOOT event of the node object. First, the function will
...@@ -76,7 +77,8 @@ static int reload_with(node_agent_t na, char *nodeids, char *args); ...@@ -76,7 +77,8 @@ static int reload_with(node_agent_t na, char *nodeids, char *args);
* *
* @see expand_nodeids * @see expand_nodeids
*/ */
static int do_reboot(node_agent_t na, char *nodeids); static int do_reboot(node_agent_t na, struct agent **agent_array, int aa_len,
int token, char *nodeids);
node_agent_t create_node_agent(void) node_agent_t create_node_agent(void)
{ {
...@@ -154,7 +156,31 @@ static char *expand_nodeids(sched_event_t *se) ...@@ -154,7 +156,31 @@ static char *expand_nodeids(sched_event_t *se)
return retval; return retval;
} }
static int reload_with(node_agent_t na, char *nodeids, char *args) static void dump_node_logs(int token, struct agent **agent_array, int aa_len,
EmulabResponse *er)
{
const char *output;
FILE *file;
int lpc;
output = er->getOutput().c_str();
for (lpc = 0; lpc < aa_len; lpc++) {
char filename[1024];
FILE *file;
snprintf(filename, sizeof(filename),
NODE_DUMP_FILE,
agent_array[lpc]->name, token);
if ((file = fopen(filename, "w")) != NULL) {
fprintf(file, "%s", output);
fclose(file);
}
}
}
static int reload_with(node_agent_t na, struct agent **agent_array, int aa_len,
int token, char *nodeids, char *args)
{ {
event_handle_t handle; event_handle_t handle;
EmulabResponse er; EmulabResponse er;
...@@ -188,6 +214,7 @@ static int reload_with(node_agent_t na, char *nodeids, char *args) ...@@ -188,6 +214,7 @@ static int reload_with(node_agent_t na, char *nodeids, char *args)
SPA_Boolean, "bootwait", true, SPA_Boolean, "bootwait", true,
SPA_TAG_DONE)) != 0) { SPA_TAG_DONE)) != 0) {
warning("could not reload: %s\n", nodeids); warning("could not reload: %s\n", nodeids);
dump_node_logs(token, agent_array, aa_len, &er);
} }
} }
/* ... a user-specified image. */ /* ... a user-specified image. */
...@@ -202,6 +229,7 @@ static int reload_with(node_agent_t na, char *nodeids, char *args) ...@@ -202,6 +229,7 @@ static int reload_with(node_agent_t na, char *nodeids, char *args)
SPA_Boolean, "bootwait", true, SPA_Boolean, "bootwait", true,
SPA_TAG_DONE)) != 0) { SPA_TAG_DONE)) != 0) {
warning("could not reload: %s\n", nodeids); warning("could not reload: %s\n", nodeids);
dump_node_logs(token, agent_array, aa_len, &er);
} }
} }
...@@ -210,7 +238,8 @@ static int reload_with(node_agent_t na, char *nodeids, char *args) ...@@ -210,7 +238,8 @@ static int reload_with(node_agent_t na, char *nodeids, char *args)
return retval; return retval;
} }
static int do_reboot(node_agent_t na, char *nodeids) static int do_reboot(node_agent_t na, struct agent **agent_array, int aa_len,
int token, char *nodeids)
{ {
event_handle_t handle; event_handle_t handle;
EmulabResponse er; EmulabResponse er;
...@@ -223,7 +252,7 @@ static int do_reboot(node_agent_t na, char *nodeids) ...@@ -223,7 +252,7 @@ static int do_reboot(node_agent_t na, char *nodeids)
handle = na->na_local_agent.la_handle; handle = na->na_local_agent.la_handle;
/* Sync the logholes in case the OS clears out /tmp, then */ /* Sync the logholes in case the OS clears out /tmp, then */
if (0 && systemf("loghole --port=%d --quiet sync %s", if (systemf("loghole --port=%d --quiet sync %s",
DEFAULT_RPC_PORT, DEFAULT_RPC_PORT,
nodeids) != 0) { nodeids) != 0) {
warning("failed to sync log hole for node %s\n", nodeids); warning("failed to sync log hole for node %s\n", nodeids);
...@@ -238,12 +267,14 @@ static int do_reboot(node_agent_t na, char *nodeids) ...@@ -238,12 +267,14 @@ static int do_reboot(node_agent_t na, char *nodeids)
SPA_Boolean, "wait", true, SPA_Boolean, "wait", true,
SPA_TAG_DONE)) != 0) { SPA_TAG_DONE)) != 0) {
warning("could not reboot: %s\n", nodeids); warning("could not reboot: %s\n", nodeids);
dump_node_logs(token, agent_array, aa_len, &er);
} }
return retval; return retval;
} }
static int do_snapshot(node_agent_t na, char *nodeids, char *args) static int do_snapshot(node_agent_t na, struct agent **agent_array, int aa_len,
int token, char *nodeids, char *args)
{ {
event_handle_t handle; event_handle_t handle;
EmulabResponse er; EmulabResponse er;
...@@ -258,7 +289,7 @@ static int do_snapshot(node_agent_t na, char *nodeids, char *args) ...@@ -258,7 +289,7 @@ static int do_snapshot(node_agent_t na, char *nodeids, char *args)
handle = na->na_local_agent.la_handle; handle = na->na_local_agent.la_handle;
if (systemf("loghole --port=%d --quiet sync %s", if (systemf("loghole --port=%d -vvv sync %s",
DEFAULT_RPC_PORT, DEFAULT_RPC_PORT,
nodeids) != 0) { nodeids) != 0) {
warning("failed to sync log hole for node %s\n", nodeids); warning("failed to sync log hole for node %s\n", nodeids);
...@@ -285,6 +316,8 @@ static int do_snapshot(node_agent_t na, char *nodeids, char *args) ...@@ -285,6 +316,8 @@ static int do_snapshot(node_agent_t na, char *nodeids, char *args)
SPA_Boolean, "bootwait", true, SPA_Boolean, "bootwait", true,
SPA_TAG_DONE)) != 0) { SPA_TAG_DONE)) != 0) {
warning("could not snapshot: %s\n", nodeids); warning("could not snapshot: %s\n", nodeids);
dump_node_logs(token, agent_array, aa_len, &er);
} }
/* XXX Kinda hacky way to wait for the node to come up. */ /* XXX Kinda hacky way to wait for the node to come up. */
else if ((retval = else if ((retval =
...@@ -295,6 +328,7 @@ static int do_snapshot(node_agent_t na, char *nodeids, char *args) ...@@ -295,6 +328,7 @@ static int do_snapshot(node_agent_t na, char *nodeids, char *args)
SPA_String, "state", "ISUP", SPA_String, "state", "ISUP",
SPA_TAG_DONE)) != 0) { SPA_TAG_DONE)) != 0) {
warning("timeout waiting for node: %s\n", nodeids); warning("timeout waiting for node: %s\n", nodeids);
dump_node_logs(token, agent_array, aa_len, &er);
} }
} }
...@@ -345,15 +379,28 @@ static void *node_agent_looper(void *arg) ...@@ -345,15 +379,28 @@ static void *node_agent_looper(void *arg)
(int32_t *)&token); (int32_t *)&token);
argsbuf[sizeof(argsbuf) - 1] = '\0'; argsbuf[sizeof(argsbuf) - 1] = '\0';
if (se.length == 0) {
}
else if (se.length == 1) {
agent_singleton[0] = se.agent.s;
agent_array = agent_singleton;
}
else {
agent_array = &se.agent.m[1];
}
if (strcmp(evtype, TBDB_EVENTTYPE_REBOOT) == 0) { if (strcmp(evtype, TBDB_EVENTTYPE_REBOOT) == 0) {
rc = do_reboot(na, nodeids); rc = do_reboot(na, agent_array, se.length,
token, nodeids);
} }
else if (strcmp(evtype, TBDB_EVENTTYPE_RELOAD) == 0) { else if (strcmp(evtype, TBDB_EVENTTYPE_RELOAD) == 0) {
rc = reload_with(na, nodeids, argsbuf); rc = reload_with(na, agent_array, se.length,
token, nodeids, argsbuf);
} }
else if (strcmp(evtype, else if (strcmp(evtype,
TBDB_EVENTTYPE_SNAPSHOT) == 0) { TBDB_EVENTTYPE_SNAPSHOT) == 0) {
rc = do_snapshot(na, nodeids, argsbuf); rc = do_snapshot(na, agent_array, se.length,
token, nodeids, argsbuf);
} }
else if (strcmp(evtype, TBDB_EVENTTYPE_SETDEST) == 0) { else if (strcmp(evtype, TBDB_EVENTTYPE_SETDEST) == 0) {
event_notify(handle, en); event_notify(handle, en);
...@@ -368,22 +415,13 @@ static void *node_agent_looper(void *arg) ...@@ -368,22 +415,13 @@ static void *node_agent_looper(void *arg)
rc = -1; rc = -1;
} }
if (se.length == 0) {
}
else if (se.length == 1) {
agent_singleton[0] = se.agent.s;
agent_array = agent_singleton;
}
else {
agent_array = &se.agent.m[1];
}
for (lpc = 0; lpc < se.length; lpc++) { for (lpc = 0; lpc < se.length; lpc++) {
event_do(handle, event_do(handle,
EA_Experiment, pideid, EA_Experiment, pideid,
EA_Type, TBDB_OBJECTTYPE_NODE, EA_Type, TBDB_OBJECTTYPE_NODE,
EA_Name, agent_array[lpc]->name, EA_Name, agent_array[lpc]->name,
EA_Event, TBDB_EVENTTYPE_COMPLETE, EA_Event, TBDB_EVENTTYPE_COMPLETE,
EA_ArgInteger, "ERROR", retval, EA_ArgInteger, "ERROR", rc,
EA_ArgInteger, "CTOKEN", token, EA_ArgInteger, "CTOKEN", token,
EA_TAG_DONE); EA_TAG_DONE);
} }
......
/* /*
* EMULAB-COPYRIGHT * EMULAB-COPYRIGHT
* Copyright (c) 2004 University of Utah and the Flux Group. * Copyright (c) 2004, 2005 University of Utah and the Flux Group.
* All rights reserved. * All rights reserved.
*/ */
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
extern "C" { extern "C" {
#endif #endif
#define NODE_DUMP_FILE "logs/%s/node-control.%d"
/** /**
* A local agent structure for Node objects. * A local agent structure for Node objects.
*/ */
......
...@@ -245,11 +245,21 @@ int send_report(simulator_agent_t sa, char *args) ...@@ -245,11 +245,21 @@ int send_report(simulator_agent_t sa, char *args)
retval = -1; retval = -1;
} }
else { else {
int rc, lpc, error_count;
char *digester; char *digester;
int rc, lpc;
retval = 0; retval = 0;
error_count = lnCountNodes(&error_records);
if (error_count > 0) {
fprintf(file,
"\n"
" *** %d error(s) were detected!\n"
" Details should be below in the logs.\n"
"\n",
error_count);
}
/* Dump user supplied stuff first, */ /* Dump user supplied stuff first, */
dump_report_data(file, sa, SA_RDK_MESSAGE, 1); dump_report_data(file, sa, SA_RDK_MESSAGE, 1);
...@@ -285,7 +295,7 @@ int send_report(simulator_agent_t sa, char *args) ...@@ -285,7 +295,7 @@ int send_report(simulator_agent_t sa, char *args)
fprintf(file, "Configuration:\n"); fprintf(file, "Configuration:\n");
dump_report_data(file, sa, SA_RDK_CONFIG, 0); dump_report_data(file, sa, SA_RDK_CONFIG, 0);
fprintf(file, "Log:\n"); fprintf(file, "\nLog:\n");
dump_report_data(file, sa, SA_RDK_LOG, 1); dump_report_data(file, sa, SA_RDK_LOG, 1);
/* ... dump the error records. */ /* ... dump the error records. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment