From 95a3a6a7475eaef8b6ccf295606783da5e3c1cc9 Mon Sep 17 00:00:00 2001 From: Kevin Atkinson Date: Thu, 26 Oct 2006 16:21:16 +0000 Subject: [PATCH] Various tblog changes: Make an attempt to discover what the error was before an swap-* was canceled, if any. Both the main error (canceled) and the other error are stored in the error table. To support this a new column in the error table is added "rank". The primary error has a rank 0 while the other error has a rank 1. Make an attempt to determine when an error is a "me too" error or the real cause of the problem. "Me too" errors are errors which are generally reported when the callie script determined that the caller script fails. The caller script should have reported the error, but in some cases the error didn't make it into the database. Thus if a "me too" is reported as the cause of a "swap-*" more info is needed to determine the true cause. When a "me too" error is reported it is followed by a "..." on it's own line. It is also recorded in the errors table under the new column "need_more_info". Add inferred column to the errors table. This is the same value as the inferred variable in tblog_find_error. Add revision column to errors table to make it easy to tell which algorithm was used to determine the error. --- sql/database-create.sql | 7 ++- sql/database-migrate.txt | 11 ++++ tbsetup/assign_wrapper2.in | 6 +-- tbsetup/libtblog.pm.in | 105 ++++++++++++++++++++++++++++++------- www/showlasterror.php3 | 2 +- 5 files changed, 106 insertions(+), 25 deletions(-) diff --git a/sql/database-create.sql b/sql/database-create.sql index 36288d8fb..963de4cfd 100644 --- a/sql/database-create.sql +++ b/sql/database-create.sql @@ -366,15 +366,20 @@ CREATE TABLE emulab_indicies ( CREATE TABLE errors ( session int(10) unsigned NOT NULL default '0', + rank tinyint(1) NOT NULL default '0', stamp int(10) unsigned NOT NULL default '0', exptidx int(11) NOT NULL default '0', script smallint(3) NOT NULL default '0', cause varchar(16) NOT NULL default '', confidence float NOT NULL default '0', + inferred tinyint(1) default NULL, + need_more_info tinyint(1) default NULL, mesg text NOT NULL, - PRIMARY KEY (session) + tblog_revision varchar(8) NOT NULL, + PRIMARY KEY (session,rank) ) TYPE=MyISAM; + -- -- Table structure for table `event_eventtypes` -- diff --git a/sql/database-migrate.txt b/sql/database-migrate.txt index 59f83185a..b823bef5e 100644 --- a/sql/database-migrate.txt +++ b/sql/database-migrate.txt @@ -3760,3 +3760,14 @@ last_net_act,last_cpu_act,last_ext_act); 4.98: Drop the columns from the node_types table, that were supposed to be done in revision 4.74 above. Skip to next revision ... + +4.99: Add columns to errors table for better information on errors + + alter table errors + add column rank tinyint(1) not null default 0 after session, + drop primary key, + add primary key (session, rank), + add column inferred tinyint(1) null after confidence, + add column need_more_info tinyint(1) null after inferred, + add column tblog_revision varchar(8) not null after mesg; + diff --git a/tbsetup/assign_wrapper2.in b/tbsetup/assign_wrapper2.in index 3e16c37b2..0e4ac0625 100644 --- a/tbsetup/assign_wrapper2.in +++ b/tbsetup/assign_wrapper2.in @@ -85,10 +85,10 @@ if ($exitcode) { } my $sublevel = length($space) > 0 ? 1 : 0; if ($mesg =~ s/^warning:\s+//i) { - dblog(TBLOG_WARNING, {sublevel=>$sublevel}, $mesg); + dblog(TBLOG_WARNING, {sublevel=>$sublevel, type=>'primary'}, $mesg); } else { parse_error($mesg); - dblog(TBLOG_ERR, {sublevel=>$sublevel}, $mesg); + dblog(TBLOG_ERR, {sublevel=>$sublevel, type=>'primary'}, $mesg); } } } @@ -97,7 +97,7 @@ if ($exitcode) { # already found if (!$obvious_error) { parse_error($err); - dblog(TBLOG_ERR, {}, $err); + dblog(TBLOG_ERR, {type=>'primary'}, $err); } # create a log entry that assign failed diff --git a/tbsetup/libtblog.pm.in b/tbsetup/libtblog.pm.in index 1cdc6ae5a..7b6e6fef1 100644 --- a/tbsetup/libtblog.pm.in +++ b/tbsetup/libtblog.pm.in @@ -246,6 +246,8 @@ undef $SCRIPTNAME; # signal to use $ENV{TBLOG_SCRIPTNAME} my $DB; my $TBLOG_PID; +my $REVISION_STR = '$Revision: 2.22 $'; + # # Internal Utility Functions # @@ -983,7 +985,17 @@ The relevant errors are also flagged using "relevant" flag: =cut -sub tblog_find_error () { +sub tblog_find_error ( ;$$ ); + +sub tblog_find_error ( ;$$ ) { + + my ($rank, $filter) = @_; + $rank = 0 unless defined $rank; + if (defined $filter && $filter) { + $filter = "AND ($filter)"; + } else { + $filter = ''; + } my $saved_exitcode = $?; @@ -993,6 +1005,7 @@ sub tblog_find_error () { my $cause = ''; my $cause_desc; my $confidence = 0.0; + my @related_errors; local $DBQUERY_MAXTRIES = 3; @@ -1001,18 +1014,22 @@ sub tblog_find_error () { check_env(); $session = $ENV{TBLOG_SESSION}; + my ($tblog_revision) = $REVISION_STR =~ /Revision: ([^ \$]+)/ + or die "bad REVISON string"; + # # Build the Tree # # Tree data structure: # invocation = {invocation => INT, parent => INT, # [{seq => int ...} || {seq => int, child => invox}]} + # my $root = {invocation => 0, log => []}; my %lookup = (0 => $root); my @log; - my $query_result = DBQueryFatal "select seq,parent,invocation,sublevel,priority,mesg,cause,script_name,type,inferred from log natural join scripts where session = $session and priority <= $NOTICE and attempt <= 1 and not cleanup order by seq"; + my $query_result = DBQueryFatal "select seq,parent,invocation,sublevel,priority,mesg,cause,script_name,type,inferred from log natural join scripts where session = $session and priority <= $NOTICE and attempt <= 1 and not cleanup $filter order by seq"; for (my $i = 0; $i < $query_result->num_rows; $i++) { @@ -1114,6 +1131,7 @@ sub tblog_find_error () { } elsif ($_->{priority} <= $ERR) { push @posib_errs, $_; + push @related_errors, $_; if ($_->{type} eq 'summary') { last; @@ -1158,6 +1176,18 @@ sub tblog_find_error () { } $cause = 'unknown' unless $cause; + # + # Determine need_more_info from error type + # + my $need_more_info = -1; + foreach (@relevant) { + if ($_->{type} eq 'summary' || $_->{type} eq 'primary') { + $need_more_info = 0; + } elsif ($_->{type} eq 'secondary') { + $need_more_info = 1 if $need_more_info == -1; + } + } + # # From script determine confidence # @@ -1176,6 +1206,7 @@ sub tblog_find_error () { $confidence = 0.6; } + # # Finally print/store the relevant errors # @@ -1195,36 +1226,40 @@ sub tblog_find_error () { # if multiline don't reformat $error .= "$_->{mesg}\n"; } + + $error .= "...\n" if $need_more_info == 1; + $prev = $_; } - $error = "No clue as to what went wrong!\n" unless length $error > 0; + if (length $error == 0) { + if ($rank == 0) { + $error = "No clue as to what went wrong!\n"; + } else { + $error = ""; + } + } chop $error; $cause_desc = DBQuerySingleFatal "select cause_desc from causes where cause = '$cause'"; - - print SERR "**** Experimental information, please ignore ****\n"; - print SERR "Session ID = $session\n"; - print SERR "Likely Cause of the Problem:\n"; - print SERR indent($error, ' '); - print SERR "Cause: $cause\n"; - print SERR "Confidence: $confidence\n"; - print SERR "Script: $script\n"; - print SERR "**** End experimental information ****\n"; - + my $script_num = DBQuerySingleFatal sprintf("select script from scripts where script_name=%s", DBQuoteSpecial $script); DBQueryFatal sprintf("insert into errors ". - "(stamp, exptidx, session, cause, confidence, script, mesg) ". + "(stamp, exptidx, session, rank, cause, confidence, inferred, need_more_info, script, mesg, tblog_revision) ". "values ". - "(UNIX_TIMESTAMP(now()), %d, %d, %s, %f, %d, %s)", + "(UNIX_TIMESTAMP(now()), %d, %d, %d, %s, %f, %d, %s, %d, %s, %s)", $ENV{TBLOG_EXPTIDX}, - $session, DBQuoteSpecial($cause), $confidence, $script_num, - DBQuoteSpecial($error)); - + $session, $rank, DBQuoteSpecial($cause), + $confidence, $inferred, + ($need_more_info == -1 ? 'NULL' : $need_more_info), + $script_num, + DBQuoteSpecial($error), + DBQuoteSpecial($tblog_revision)); + DBQueryFatal sprintf("update log set relevant=1 where seq in (%s)", join(',', map {$_->{seq}} @relevant)) if @relevant; @@ -1244,7 +1279,37 @@ sub tblog_find_error () { eval {tblog $WARNING, add_prefix("tblog_find_error failed", $err)}; } - + + if ($rank == 0) { + my $other_error; + if ($cause eq 'canceled') { + my @to_skip = map {$_->{seq}} @related_errors; + my $to_skip = 'seq NOT IN ('.join(',',@to_skip).')'; + $other_error = tblog_find_error(1, "cause <> 'canceled' AND $to_skip"); + } + + print SERR "**** Experimental information, please ignore ****\n"; + print SERR "Session ID = $session\n"; + print SERR "Likely Cause of the Problem:\n"; + print SERR indent($error, ' '); + print SERR "Cause: $cause\n"; + print SERR "Confidence: $confidence\n"; + print SERR "Script: $script\n"; + if ($cause eq 'canceled') { + print SERR "\n"; + if (length($other_error->{mesg}) == 0) { + print SERR "No other error found.\n"; + } else { + print SERR "Possible Error Before Cancellation:\n"; + print SERR indent($other_error->{mesg}, ' '); + print SERR "Cause: $other_error->{cause}\n"; + print SERR "Confidence: $other_error->{confidence}\n"; + print SERR "Script: $other_error->{script}\n"; + } + } + print SERR "**** End experimental information ****\n"; + } + $? = $saved_exitcode; return {session=>$session, exptidx=>$ENV{TBLOG_EXPTIDX}, mesg=>$error, cause=>$cause, cause_desc=>$cause_desc, @@ -1271,7 +1336,7 @@ sub tblog_lookup_error ( ;$ ) { ("select session, exptidx, mesg, e.cause, cause_desc, confidence, script_name as script". " from errors as e, scripts as s, causes as c". " where e.script = s.script and e.cause = c.cause ". - " and session = $session"); + " and session = $session and rank = 0"); $? = $saved_exitcode; diff --git a/www/showlasterror.php3 b/www/showlasterror.php3 index 6d24e6830..6d851fdb2 100644 --- a/www/showlasterror.php3 +++ b/www/showlasterror.php3 @@ -62,7 +62,7 @@ if (!$isadmin) { $query_result = DBQueryFatal("SELECT e.cause,e.confidence,e.mesg,cause_desc FROM experiment_stats as s,errors as e, causes as c ". - "WHERE s.exptidx = $exptidx and e.cause = c.cause and s.last_error = e.session"); + "WHERE s.exptidx = $exptidx and e.cause = c.cause and s.last_error = e.session and rank = 0"); if (mysql_num_rows($query_result) != 0) { $row = mysql_fetch_array($query_result); -- GitLab