Commit e89ee617 authored by Kevin Atkinson's avatar Kevin Atkinson

libaudit related changes:

  - Added "LIBAUDIT_FANCY" option to AuditStart.  When this option is
    used libaudit will send a different email than it normally sends,
    and on error call tblog_find_error() to determine the error.

  - Also add audit function AddAuditInfo which adds add additional
    information for libaudit to use in SendAuditMail when AUDIT_FANCY
    is set.

  - Modify template_swapin, template_instantiate, and template_create
    to use the new audit functionality.

  - Suppressing calling tblog_find_error and sending the error email
    when auditing in swapexp and batchexp

tblog changes:

  - Shorten the message sent to the user when the error in unknown.
    Remove all parts about lack of free nodes as it no longer really
    applies as tblog now correctly identified those errors and handles
    them separately.  The message is now just "Please look at the log
    below to see what happened."

  - Improve algo. used to determine the other error when canceled.
    Will now work by removing all errors related to the cancel request
    and the essentially rerunning tblog_find_error.  If the cause of
    the error is still canceled, repeat and try again until the cause
    is something other than canceled or no errors are left.

  - Refactor tblog_find_error, which involves creating new internal
    functions: tblog_determine_single_error, tblog_store_error,
    tblog_dump_error

  - Add section on Primary vs Secondary Errors to the inline POD
    documentation.

  - Other minor enhancements and bug fixes.
parent 02a43688
......@@ -882,23 +882,25 @@ sub cleanup()
# Clear the logfile so the webpage stops.
$experiment->CloseLogFile();
#
# Figure out the error if possible
#
my $error_data = tblog_find_error();
#
# Send a message to the testbed list.
#
tblog_email_error($error_data,
"$user_name <$user_email>",
"Config Failure", "$pid/$eid",
"$user_name <$user_email>",
"",
"Cc: $TBOPS",
"",
($logname, "assign.log", "wanassign.log", $nsfile));
if (!$ENV{'TBAUDITON'}) {
#
# Figure out the error if possible
#
my $error_data = tblog_find_error();
#
# Send a message to the testbed list.
#
tblog_email_error($error_data,
"$user_name <$user_email>",
"Config Failure", "$pid/$eid",
"$user_name <$user_email>",
"",
"Cc: $TBOPS",
"",
($logname, "assign.log", "wanassign.log", $nsfile));
}
#
# Back up the work dir for post-mortem debugging.
#
......
......@@ -12,8 +12,9 @@ use Exporter;
@ISA = "Exporter";
@EXPORT =
qw ( AuditStart AuditEnd AuditAbort AuditFork AuditSetARGV
AddAuditInfo
LogStart LogEnd
LIBAUDIT_NODAEMON LIBAUDIT_DAEMON LIBAUDIT_LOGONLY LIBAUDIT_NODELETE
LIBAUDIT_NODAEMON LIBAUDIT_DAEMON LIBAUDIT_LOGONLY LIBAUDIT_NODELETE LIBAUDIT_FANCY
);
# After package decl.
......@@ -21,6 +22,7 @@ use English;
use POSIX qw(isatty setsid);
use File::Basename;
use IO::Handle;
use Carp;
#
# Testbed Support libraries
......@@ -52,6 +54,13 @@ my $logonly = 0;
# Save log when logging only.
my $savelog = 0;
# If set than send "fancy" email and also call tblog_find_error
# on errors
my $fancy = 0;
# Extra info used when AUDIT_FANCY is set
my %AUDIT_INFO;
# Untainted scriptname for email below.
if ($PROGRAM_NAME =~ /^([-\w\.\/]+)$/) {
$SCRIPTNAME = basename($1);
......@@ -73,6 +82,8 @@ sub LIBAUDIT_NODAEMON { 0; }
sub LIBAUDIT_DAEMON { 0x01; }
sub LIBAUDIT_LOGONLY { 0x02; }
sub LIBAUDIT_NODELETE { 0x04; }
sub LIBAUDIT_FANCY { 0x08; } # Only use if libdb and libtblog are
# already in use
#
# Start an audit (or log) of a script. First arg is a flag indicating if
......@@ -102,6 +113,12 @@ sub AuditStart($;$$)
$savelog = 1;
}
}
if ($options & LIBAUDIT_FANCY()) {
if (!$INC{"libdb.pm"} || !$INC{"libtblog.pm"}) {
croak "libdb and libtblog must be loaded when using LIBAUDIT_FANCY";
}
$fancy = 1;
}
}
#
......@@ -135,6 +152,9 @@ sub AuditStart($;$$)
select(undef, undef, undef, 0.2);
return $mypid;
}
if (defined(&libtblog::tblog_new_child_process)) {
libtblog::tblog_new_child_process();
}
}
$auditing = 1;
......@@ -262,6 +282,7 @@ sub AuditFork()
# what happened is sent to the logs list so that we can go back and see the
# details if needed.
#
sub SendFancyMail($);
sub SendAuditMail($)
{
my($exitstatus) = @_;
......@@ -270,6 +291,12 @@ sub SendAuditMail($)
# Avoid duplicate messages.
$auditing = 0;
# Needs to called here before STDOUT and STDERR is
# redirectected below
if ($exitstatus && $fancy) {
&libtblog::tblog_find_error();
}
if (!$daemon && $PERL_VERSION >= 5.008) {
eval("open(STDOUT, \">&\", \$libaudit::SAVE_STDOUT); ".
"open(STDERR, \">&\", \$libaudit::SAVE_STDERR);");
......@@ -296,6 +323,11 @@ sub SendAuditMail($)
return;
}
if ($fancy) {
SendFancyMail($exitstatus);
return;
}
#
# Send logfile to tblogs. Carbon to tbops if it failed. If no logfile
# then no point in sending to tblogs, obviously.
......@@ -328,6 +360,129 @@ sub SendAuditMail($)
}
}
sub SendFancyMail($)
{
import libdb;
import libtblog;
my ($exitstatus) = @_;
my ($TO, $FROM);
my ($name, $email);
if (UserDBInfo($USERNAME, \$name, \$email)) {
$TO = "$name <$email>";
} else {
$TO = "$GCOS <${USERNAME}\@${OURDOMAIN}>";
}
$FROM = $TO;
my @FILES;
if (defined($logfile) && -s $logfile) {
@FILES = ($logfile);
}
# Avoid sending a person the same email twice
my $extra_cc;
if (defined ($AUDIT_INFO{cc})) {
my @cc;
my @prev_emails = ($email);
OUTER: foreach (@{$AUDIT_INFO{cc}}) {
($email) = /([^<> \t@]+@[^<> \t@]+)/;
foreach my $e (@prev_emails) {
next OUTER if $email eq $e;
push @prev_email, $e;
}
push @cc, $_;
}
if (@cc) {
$extra_cc = "Cc: ";
$extra_cc .= join(', ', @cc);
}
}
my $sendmail_res;
if ($exitstatus) {
my $d = tblog_lookup_error();
my $prefix;
$prefix .= "$SCRIPTNAME @SAVEARGV\n";
$prefix .= "Exited with status: $exitstatus";
my $what = "Failed: $SCRIPTNAME";
$what = $AUDIT_INFO{failure_frag} if defined $AUDIT_INFO{failure_frag};
$which = $AUDIT_INFO{which};
$sendmail_res
= tblog_email_error($d, $TO, $what, $which,
$FROM, $extra_cc, "Cc: $TBOPS",
$prefix, @FILES);
} else {
my $subject = "$SCRIPTNAME succeeded";
$subject = $AUDIT_INFO{success_frag} if defined $AUDIT_INFO{success_frag};
$subject .= ": $AUDIT_INFO{which}" if defined $AUDIT_INFO{which};
my $body = "$SCRIPTNAME @SAVEARGV\n";
my $HDRS;
$HDRS .= "$extra_cc\n" if defined $extra_cc;
$HDRS .= "Reply-To: $TBOPS\n";
$HDRS .= "Bcc: $TBLOGS";
$sendmail_res
= SENDMAIL($TO, $subject, $body, $FROM, $HDRS, @FILES);
}
if ($sendmail_res) {
unlink($logfile)
if (defined($logfile) && (! $savelog));
}
}
# Info on possibe values for AUDIT_INFO
# [KEY => string|list]
my %AUDIT_METAINFO =
( which => 'string', # ex "PROJ/EXP"
success_frag => 'string', # ex "T. Swapped In"
failure_frag => 'string', # ie "Bla Failure"
cc => 'list'); # Cc audit mail to these people
#
# AddAuditInfo($key, $value)
# add additional information for libaudit to use in SendAuditMail
# when AUDIT_FANCY is set
#
# TODO: Eventually child scripts should be able to use AddAuditInfo, not
# just the script in which AuditStart(...) was called. This will probably
# involve storing the values in the database somehow.
#
sub AddAuditInfo ($$) {
my ($key, $value) = @_;
if (!$auditing) {
carp "AddAuditInfo($key, ...) ignored since the script isn't being audited.";
return 0;
}
if ($AUDIT_METAINFO{$key} eq 'string') {
$AUDIT_INFO{$key} = $value;
return 1;
} elsif ($AUDIT_METAINFO{$key} eq 'list') {
push @{$AUDIT_INFO{$key}}, $value;
return 1;
} else {
carp "Unknown key, \"$key\" in AddAuditInfo";
return 0;
}
}
#
# When the script ends, if the audit has not been sent, send it.
#
......
This diff is collapsed.
......@@ -1421,32 +1421,34 @@ sub cleanup()
#
$experiment->SetCancelFlag(EXPTCANCEL_CLEAR);
#
# Figure out the error if possible
#
my $error_data = tblog_find_error();
#
# Send a message to the testbed list. Append the logfile.
#
#
# also try to send mail to robocops if it was a robot/mote exp
#
my $rcops = '';
if ($robotexp) {
$rcops = "\nCc: $TBROBOCOPS";
if (!$ENV{'TBAUDITON'}) {
#
# Figure out the error if possible
#
my $error_data = tblog_find_error();
#
# Send a message to the testbed list. Append the logfile.
#
#
# also try to send mail to robocops if it was a robot/mote exp
#
my $rcops = '';
if ($robotexp) {
$rcops = "\nCc: $TBROBOCOPS";
}
tblog_email_error($error_data,
"$user_name <$user_email>",
"Swap ${inout} Failure", "$pid/$eid",
($idleswap ? $TBOPS : "$user_name <$user_email>"),
"Cc: $expt_head_name <$expt_head_email> $rcops",
"Cc: $TBOPS $rcops",
$modifyError,
(($logname), (defined($modnsfile) ? ($modnsfile) : ())));
}
tblog_email_error($error_data,
"$user_name <$user_email>",
"Swap ${inout} Failure", "$pid/$eid",
($idleswap ? $TBOPS : "$user_name <$user_email>"),
"Cc: $expt_head_name <$expt_head_email> $rcops",
"Cc: $TBOPS $rcops",
$modifyError,
(($logname), (defined($modnsfile) ? ($modnsfile) : ())));
if ($modifyHosed) {
#
# Copy off the workdir to the user directory, Then back up both of
......
......@@ -221,7 +221,7 @@ $eid = $template->eid();
#
# Use the logonly option to audit so that we get a record mailed.
#
if (my $childpid = AuditStart(LIBAUDIT_DAEMON, undef, LIBAUDIT_LOGONLY)) {
if (my $childpid = AuditStart(LIBAUDIT_DAEMON, undef, LIBAUDIT_LOGONLY|LIBAUDIT_FANCY)) {
#
# Parent exits normally, unless in waitmode. We have to set
# justexit to make sure the END block below does not run.
......@@ -270,6 +270,13 @@ if (my $childpid = AuditStart(LIBAUDIT_DAEMON, undef, LIBAUDIT_LOGONLY)) {
exit $exit_code;
}
# Add audit info. Right now this will only work when called in
# the child of the script being audited. Eventually these can be
# set anywhere.
AddAuditInfo("which", "$pid/$tid");
AddAuditInfo("failure_frag", "Template Creation Failure");
AddAuditInfo("success_frag", "New Template Created");
#
# We need to catch TERM so we can kill the children and do a cleanup.
#
......
......@@ -353,7 +353,7 @@ if (! ($foreground || $batchmode)) {
}
if (my $childpid = AuditStart(LIBAUDIT_DAEMON, $logname,
LIBAUDIT_LOGONLY|LIBAUDIT_NODELETE)) {
LIBAUDIT_LOGONLY|LIBAUDIT_NODELETE|LIBAUDIT_FANCY)) {
#
# Parent exits normally, unless in waitmode. We have to set
# justexit to make sure the END block below does not run.
......@@ -415,6 +415,13 @@ if (! ($foreground || $batchmode)) {
exit($? >> 8);
}
TBdbfork();
# Add audit info. Right now this will only work when called in
# the child of the script being audited. Eventually these can be
# set anywhere.
AddAuditInfo("which", "$pid/$eid");
AddAuditInfo("failure_frag", "T. Instantiation Failure");
AddAuditInfo("success_frag", "New T. Instance Created");
}
#
......
......@@ -218,7 +218,7 @@ if (! ($foreground || $experiment->batchmode())) {
TBExptOpenLogFile($pid, $eid);
if (my $childpid = AuditStart(LIBAUDIT_DAEMON, $logname,
LIBAUDIT_LOGONLY|LIBAUDIT_NODELETE)) {
LIBAUDIT_LOGONLY|LIBAUDIT_NODELETE|LIBAUDIT_FANCY)) {
#
# Parent exits normally, unless in waitmode. We have to set
# justexit to make sure the END block below does not run.
......@@ -280,6 +280,22 @@ if (! ($foreground || $experiment->batchmode())) {
exit($? >> 8);
}
TBdbfork();
# Add audit info. Right now this will only work when called in
# the child of the script being audited. Eventually these can be
# set anywhere.
AddAuditInfo("which", "$pid/$eid");
AddAuditInfo("failure_frag", "T. Swapin Failure");
AddAuditInfo("success_frag", "T. Swapped In");
my $expt_head_login = $experiment->creator();
my $expt_head_name;
my $expt_head_email;
if (UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
AddAuditInfo("cc", "$expt_head_name <$expt_head_email>");
} else {
tbwarn("Could not determine name/email for $expt_head_login.");
}
}
# Event connect before starting swapin so we catch all the states.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment