Commit 4dc769db authored by Timothy Stack's avatar Timothy Stack

Add a 'validate' action to loghole, which performs some simple sanity

checks on the log files that were retrieved.  Also update the event
scheduler to run the validation after a sync.
parent 24678a9e
......@@ -247,6 +247,7 @@ int send_report(simulator_agent_t sa, char *args)
else {
int rc, lpc, error_count;
char *digester;
FILE *vfile;
retval = 0;
......@@ -260,6 +261,29 @@ int send_report(simulator_agent_t sa, char *args)
error_count);
}
if ((vfile = popenf("loghole --port=%d validate",
"r",
DEFAULT_RPC_PORT)) == NULL) {
fprintf(file, "[unable to validate logs]\n");
}
else {
char buf[BUFSIZ];
int total = 0;
while ((rc = fread(buf,
1,
sizeof(buf),
vfile)) > 0) {
fwrite(buf, 1, rc, file);
total += rc;
}
pclose(vfile);
vfile = NULL;
if (total > 0)
fprintf(file, "\n");
}
/* Dump user supplied stuff first, */
dump_report_data(file, sa, SA_RDK_MESSAGE, 1);
......
......@@ -21,6 +21,9 @@ loghole \- Log management tool for experiments.
[\fInode1 node2 ...\fR]
.P
.BI loghole
.BI validate
.P
.BI loghole
.BI archive
[\fB-k \fR(\fBi-delete\fR|\fBspace-is-needed\fR)]
[\fB-a \fIdays\fR]
......@@ -100,6 +103,9 @@ more detail can be found in later sections of this manual:
Synchronize the experiment's log holes on the nodes with the experiment's log
directory.
.TP
.B validate
Validate that the logs were sync'd correctly by performing some sanity checks.
.TP
.B archive
Archive the contents of the experiment's log directory into a zip file.
.TP
......@@ -208,6 +214,18 @@ Optional arguments:
.I node1 ...
Specify a subset of virtual or physical nodes that should be synchronized,
otherwise all of the nodes will be synchronized.
.SH VALIDATE
The
.B validate
action is used to check that the logs were sync'd correctly. Currently, the
following checks are performed:
.TP
program-agent logs
The stdout and stderr logs from program agents are checked by comparing their
metadata against that saved in the accompanying ".status" files.
.TP
valid soft links
All soft links are checked to ensure the referent exists.
.SH ARCHIVE
The
.B archive
......
......@@ -198,7 +198,7 @@ def usage():
print " experiment's log directory."
print " show Show a detailed listing of the archive(s) in the"
print " experiment directory."
#print " validate Validate the contents of an archive."
print " validate Validate the experiment logs."
print " clean Clean the experiment log directory and node local"
print " directories."
print " gc Garbage collect old archives."
......@@ -369,15 +369,16 @@ def change_usage():
# Print the usage statement for the "validate" action.
#
def validate_usage():
print "Usage: loghole validate <archive-name>"
print "Usage: loghole validate"
print
print "Validate the contents of an archive."
print "Validate the experiment logs. Currently, this action will:"
print
print "Required arguments:"
print " archive-name The name of the archive to validate."
print " 1. Examine the log files from individual logs from the "
print " program-agent to verify that the files are intact."
print " 2. Check that the referent of any soft-links exist."
print
print "Examples:"
print " $ loghole validate foobar.0"
print " $ loghole validate"
return
##
......@@ -1400,6 +1401,59 @@ def do_change(args):
return retval
def validate_visitor(arg, dirname, names):
for name in names:
fname = os.path.join(dirname, name)
mo = re.match(r'[^\.]+\.status\.([0-9]+)', name)
if mo:
try:
token = int(mo.group(1))
status = {}
for line in open(os.path.join(dirname, name)):
(key, value) = line.split("=")
status[key] = value
pass
if token != int(status.get("TOKEN", "-1")):
print "warning: ignoring %s" % name
continue
base, ext = os.path.splitext(name)
base, ext = os.path.splitext(base)
st = os.stat(os.path.join(dirname, base + ".out." + `token`))
if st[stat.ST_SIZE] != long(status.get("OUTSIZE", "-1")):
print "warning: '%s' size doesn't match status" % fname
arg["retval"] = 1
pass
if st[stat.ST_MTIME] != long(status.get("OUTMTIME", "-1")):
print "warning: '%s' mtime doesn't match status" % fname
arg["retval"] = 1
pass
st = os.stat(os.path.join(dirname, base + ".err." + `token`))
if st[stat.ST_SIZE] != long(status.get("ERRSIZE", "-1")):
print "warning: '%s' size doesn't match status" % fname
arg["retval"] = 1
pass
if st[stat.ST_MTIME] != long(status.get("ERRMTIME", "-1")):
print "warning: '%s' mtime doesn't match status" % fname
arg["retval"] = 1
pass
pass
except OSError:
pass
pass
if not os.path.exists(fname):
print "warning: '%s' references nonexistent file!" % fname
pass
pass
return
##
# Performs the "validate" action, which XXX
#
......@@ -1411,7 +1465,9 @@ def do_validate(args):
logdir = os.path.join(EXPDIR_FMT % { "PID" : PID, "EID" : EID }, "logs")
os.chdir(logdir)
rc = { "retval" : 0 }
os.path.walk(os.path.curdir, validate_visitor, rc)
retval = rc["retval"]
return retval
......@@ -1565,6 +1621,7 @@ def notimplemented():
ACTIONS = {
"get" : (do_sync, sync_usage),
"sync" : (do_sync, sync_usage),
"validate" : (do_validate, validate_usage),
"archive" : (do_archive, archive_usage),
"list" : (do_list, list_usage),
"show" : (do_show, show_usage),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment