Commit 14a6c4f8 authored by Kevin Atkinson's avatar Kevin Atkinson

Added the ability to use an "exceptions" file.

Added the ability to avoid starting new experiments if an experiment
failed to swapout and the ability to avoiding swapping out an
experient on unexpected failures.
parent 53e16304
......@@ -3,7 +3,7 @@ use Exporter;
@ISA = "Exporter";
@EXPORT = qw (test test_cmd test_ssh test_rcmd test_experiment exit_str
ERR_MASK ERR_NONE ERR_FAILED ERR_FATAL ERR_INT
ERR_MASK ERR_NONE ERR_EXPECTED ERR_FAILED ERR_FATAL ERR_INT
STATUS_MASK STATUS_NONE STATUS_SWAPPEDIN STATUS_EXISTS STATUS_CLEANUP);
use IO::File;
......@@ -12,7 +12,7 @@ use strict;
use vars qw(%parms %dependencies %tally);
use vars qw($eid $pid $datadir $resultsdir);
use vars qw(@mapping @nodes @pnodes %to_physical %from_physical);
use vars qw($FAILED);
use vars qw($EXPECTED $FAILED);
use vars qw(%ERR %STATUS);
sub true() {1}
......@@ -22,21 +22,23 @@ sub false() {0}
# exit values, or two parts together
#
sub ERR_MASK {3};
sub ERR_NONE {0};
sub ERR_FAILED {1}; # tests failed
sub ERR_FATAL {2}; # fatal error
sub ERR_INT {3}; # interrupted
sub ERR_MASK {7};
sub ERR_NONE {0};
sub ERR_EXPECTED {1}; # expected failures
sub ERR_FAILED {2}; # tests failed
sub ERR_FATAL {3}; # fatal error
sub ERR_INT {4}; # interrupted
%ERR = (ERR_NONE, 'ERR_NONE',
ERR_EXPECTED, 'ERR_EXPECTED',
ERR_FAILED, 'ERR_FAILED',
ERR_FATAL, 'ERR_FATAL',
ERR_INT, 'ERR_INT');
sub STATUS_MASK {3 << 2};
sub STATUS_NONE {0 << 2};
sub STATUS_SWAPPEDIN {1 << 2}; # experment still swapped in
sub STATUS_EXISTS {2 << 2}; # experment still exists
sub STATUS_CLEANUP {3 << 2}; # requires cleanup
sub STATUS_MASK {3 << 3};
sub STATUS_NONE {0 << 3};
sub STATUS_SWAPPEDIN {1 << 3}; # experment still swapped in
sub STATUS_EXISTS {2 << 3}; # experment still exists
sub STATUS_CLEANUP {3 << 3}; # requires cleanup
%STATUS = (STATUS_NONE, 'STATUS_NONE',
STATUS_SWAPPEDIN, 'STATUS_SWAPPEDIN',
STATUS_EXISTS, 'STATUS_EXISTS',
......@@ -47,6 +49,18 @@ sub exit_str($) {
return join(' ', $ERR{$exit & ERR_MASK}, $STATUS{$exit & STATUS_MASK});
}
#
#
#
sub oneof ($$) {
my ($what, $which) = @_;
foreach (@{$parms{$which}}) {
return true if ($_ eq $what);
}
return false;
}
#
# Performs a test on a swapped in experient. Returns true if the test
# passed.
......@@ -64,6 +78,11 @@ sub exit_str($) {
#
sub test ($$&) {
my ($name,$requires,$test) = @_;
if (oneof($name, 'skip')) {
return 0;
}
$tally{total}++;
my $deps_sat = 1;
......@@ -85,14 +104,26 @@ sub test ($$&) {
print ">=== \"$name\" succeeded\n";
return true;
} elsif ($@) {
$tally{failed}++;
print $FAILED "$name\n";
print ">*** \"$name\" died: $@";
if (oneof($name, 'ignore')) {
$tally{expected}++;
print $EXPECTED "$name\n";
print ">=== \"$name\" died: $@ -- ignored";
} else {
$tally{failed}++;
print $FAILED "$name\n";
print ">*** \"$name\" died: $@";
}
return false;
} else {
$tally{failed}++;
print $FAILED "$name\n";
print ">*** \"$name\" failed\n";
if (oneof($name, 'ignore')) {
$tally{expected}++;
print $EXPECTED "$name\n";
print ">=== \"$name\" failed -- ignored\n";
} else {
$tally{failed}++;
print $FAILED "$name\n";
print ">*** \"$name\" failed\n";
}
return false;
}
}
......@@ -280,7 +311,7 @@ sub test_experiment (%) {
%parms = @_;
%dependencies = ();
%tally = (total => 0, passed => 0, failed => 0);
%tally = (total => 0, passed => 0, expected => 0, failed => 0);
$eid = $parms{eid};
$pid = $parms{pid};
......@@ -324,10 +355,15 @@ sub test_experiment (%) {
close $F;
$FAILED = new IO::File ">failed-tests" or die;
$EXPECTED = new IO::File ">failed-but-ignored" or die;
$F = new IO::File ">parms" or die;
foreach (sort keys %parms) {
print $F "$_: $parms{$_}\n";
unless (ref $parms{$_}) {
print $F "$_: $parms{$_}\n";
} else {
print $F "$_: @{$parms{$_}}\n";
}
}
close $F;
......@@ -422,8 +458,9 @@ sub test_experiment (%) {
}
}
if ($parms{stages} =~ /[oe]/) {
if ($parms{stages} =~ /[oe]/ &&
(!$parms{dont_swapout_unexpected} || $tally{failed} == 0)) {
test_cmd 'loghole', [], "loghole -e $pid/$eid sync";
foreach my $node (@nodes) {
......@@ -447,9 +484,11 @@ sub test_experiment (%) {
}
$err = ERR_FAILED if $err == ERR_NONE && $tally{failed} > 0;
$err = ERR_FAILED if $err == ERR_NONE && $tally{failed} > 0;
$err = ERR_EXPECTED if $err == ERR_NONE && $tally{expected} > 0;
if ($parms{stages} =~ /e/) {
if ($parms{stages} =~ /e/ &&
(!$parms{dont_swapout_unexpected} || $tally{failed} == 0)) {
sys("cp -pr /proj/$pid/exp/$eid exp-data");
if ($? >> 8 != 0) {
......@@ -469,11 +508,12 @@ sub test_experiment (%) {
if ($parms{stages} =~ /t/) {
print "\n";
print "Num Tests: $tally{total}\n";
print "Passed: $tally{passed}\n";
print "Failed: $tally{failed}\n";
my $unex = $tally{total} - $tally{passed} - $tally{failed};
print "Unable to Execute: $unex\n";
print "Num Tests: $tally{total}\n";
print "Passed: $tally{passed}\n";
print "Expected Failures: $tally{expected}\n";
print "Unexpected Failures: $tally{failed}\n";
my $unex = $tally{total} - $tally{passed} - $tally{expected} - $tally{failed};
print "Unable to Execute: $unex\n";
}
......
......@@ -35,8 +35,8 @@ Some of the files in this directory include:
log: all output to stdout and stderr is redirected here
nsfile.ns: the nsfile used
parms: the value of the different parmaters used in the experment.
This is diffrent from the one found in the test directory described
parms: the value of the different parmaters used in the experiment.
This is different from the one found in the test directory described
latter.
failed-tests: a list of tests that failed
exp-data/: a copy of the experiment data as found in /proj/PID/exp/EID
......@@ -54,14 +54,25 @@ more than NUM nodes it will be run in series after all the experiments
that can be run in parallel have finished. Thus setting NUM to 0 will
force all the experiments to run in series.
Setting -m sets a soft limit to the number of nodes to use. To set a
hard limit use "-M NUM". If an experemnt will use more nodes than NUM
hard limit use "-M NUM". If an experiment will use more nodes than NUM
it will simply not be run.
If an experiment fails to swap out, the number of available nodes will
decrees, but other experiments will continue to run as long as there
are enough nodes left. If there is only a soft limit all the
experiments will still run, but some of them may not be in parallel.
If a hard limit is specified than some experiments may be skipped if
there are no longer enough nodes available.
To prevent new experiments from starting on a failed swap out by use the
"-h" option. Any existing experiments already running will be allowed
to continue, however.
Experiments that fail to swap out decrees both the soft and hard
limits.
To prevent an experiment from being swapped out when a tests fails use
the "-u" option. Since there are some test which are expected to fail
occasionally, this option is most useful when an exceptions file is
given via the "-e" option. See "EXCEPTIONS FILE" for more info.
To only run particular experiment use:
......@@ -106,6 +117,7 @@ Or to just run the tests on a already swapped in experiment use:
./image-test -s t ...
TESTING FRAMEWORK
Each individual experiment is expected to be the directory "tests/EXP".
......@@ -152,8 +164,8 @@ For each node in the experiment the following tests will be run:
hostname-NODE: make sure host name is what it is expected to be
login_prompt-NODE: make sure that the login prompt appears
in the console
proj_mount-NODE: make sure that the approicate/proj directory is mounted
and readable
proj_mount-NODE: make sure that the appropriate "/proj" directory is
mounted and readable
For experiments with more than one node:
......@@ -181,3 +193,31 @@ Sending a SIGHUP to the the parent will only terminate the parent.
The test will proceed normally in the background. Similarly, sending
a SIGTSTP will only suspend the parent.
EXCEPTIONS FILE
An exceptions file may be specified with the "-e <FILE>" option. For
example:
./image-test -e FILE IMAGE
The exceptions provides allows you to specify 1) experiments to skip 2)
individual tests within an experiment to skip, and 3) tests to ignore
failures for.
(1) and (2) are useful if there are experiments or individual tests
which are not relevant for the image being tested. (3) is primary
useful with the "-u" option.
Each line of the exceptions file has is one of the following. Blank lines
and extra whitespace are ignored, as well as anything after the "#"
skip <exps>
in <exp> skip <tests>
in <exp> ignore <tests>
<exps> is a list of one or more experiments separated by spaces.
<exp> is a single experiment name. A "*" may be used as a wildcard.
<tests> is a list of one or more tests.
Like with the experiments specified on the command line a base name of
an experiment may also be specified.
......@@ -33,24 +33,27 @@ sub LARGE_NUM {100000}
#
sub usage() {
die "usage: $0 -h | -l | [OPTIONS] IMAGE [TESTS ...]\n";
die "usage: $0 -h | -l | [OPTIONS] IMAGE [EXPS ...]\n";
}
sub help() {
print
("usage: $0 -h | -l | [OPTIONS] IMAGE [TESTS ...]\n".
("usage: $0 -h | -l | [OPTIONS] IMAGE [EXPS ...]\n".
" IMAGE image to use\n".
" TESTS if present only run these particular tests\n".
" -h --help print this help message\n".
" EXPS if present only run these particular experiments\n".
" -? --help print this help message\n".
" -l --list just list the available experements\n".
"OPTIONS:\n".
" -s STAGES only execute particular stages of the test where\n".
" -s STAGES only execute particular stages of the experiment where\n".
" STAGES is any one of:\n".
" c: create, s: swapin, t: test o: swapout, e: end experment\n".
" (note 'e' implies 'o')\n".
" -p run experments in parallel\n".
" -m NUM soft limit on the maximum number of nodes to use at once\n".
" -M NUM hard limit on the maximum number of nodes to use at once\n".
" -e FILE exceptions file to use\n".
" -u don't swap an experment out with unexpected failed tests\n".
" -h halt on failed swapouts\n".
" --skip-std-tests skip standard tests expect for ssh-*\n");
}
......@@ -80,17 +83,23 @@ my $stages = 'cstoe';
my $in_parallel;
my $soft_limit = 5;
my $hard_limit = 0;
my $exceptions_file;
my $dont_swapout_unexpected = false;
my $halt_failed_swapouts = false;
my $just_list;
my $skip_std_tests;
my $skip_std_tests = false;
Getopt::Long::Configure 'no_ignore_case';
GetOptions
"h|help" => sub {help(); exit 0;},
"help|?" => sub {help(); exit 0;},
"s=s" => \$stages,
"p" => \$in_parallel,
"m=i" => \$soft_limit,
"M=i" => \$hard_limit,
"e=s" => \$exceptions_file,
"u" => \$dont_swapout_unexpected,
"h" => \$halt_failed_swapouts,
"l|list" => \$just_list,
"skip-std-tests" => \$skip_std_tests
or usage();
......@@ -99,6 +108,11 @@ $soft_limit = LARGE_NUM if $soft_limit == 0;
$hard_limit = LARGE_NUM if $hard_limit == 0;
$soft_limit = $hard_limit if $soft_limit > $hard_limit;
if (@ARGV == 0) {
print STDERR "Must specify an image.\n";
usage();
}
my $os = shift @ARGV;
my @exps_torun = map {lc} @ARGV;
......@@ -107,6 +121,48 @@ if ($stages =~ /([^cstoe])/) {
usage();
}
########################################################################
#
# Get the list of exceptions
#
my @exps_toskip;
my %tests_toskip;
my %tests_toignore;
if (defined $exceptions_file) {
open F, $exceptions_file;
my $lineno = 1;
while (<F>) {
chop;
s/\#.*//;
s/^\s+//;
next unless /\S/;
if (s/^in\s+(\S+)\s+//) {
my $exp = $1;
if (s/^skip\s+//) {
my @tests = split /\s+/;
push @{$tests_toskip{$exp}}, @tests;
} elsif (s/^ignore\s+//) {
my @tests = split /\s+/;
push @{$tests_toignore{$exp}}, @tests;
} else {
die "Syntax error in line $lineno of \"$exceptions_file\"\n";
}
} elsif (s/^skip\s+//) {
my @exps = split /\s+/;
push @exps_toskip, @exps;
} else {
die "Syntax error in line $lineno of \"$exceptions_file\"\n";
}
$lineno++;
}
}
########################################################################
#
# Get the list of experments to run
......@@ -154,7 +210,8 @@ sub get_numnodes ($) {
return $_;
}
sub use_exp ($);
sub use_exp ( $ );
sub get_exceptions ( $$ );
sub mktests ($$$@) {
my ($test, $numnodes, $what, @parms) = @_;
......@@ -170,6 +227,9 @@ sub mktests ($$$@) {
resultsdir => "$destdir/$t",
@$what,
stages => $stages,
dont_swapout_unexpected => $dont_swapout_unexpected,
skip => get_exceptions($t, \%tests_toskip),
ignore => get_exceptions($t, \%tests_toignore),
skip_std_tests => $skip_std_tests};
push @exps, $t;
} else {
......@@ -182,6 +242,10 @@ sub mktests ($$$@) {
sub use_exp ($) {
my ($exp) = @_;
foreach my $e (@exps_toskip) {
return false if ($e eq $exp);
return false if $exp =~ /^$e-/;
}
return true if (@exps_torun == 0);
foreach my $e (@exps_torun) {
return true if ($e eq $exp);
......@@ -190,6 +254,16 @@ sub use_exp ($) {
return false;
}
sub get_exceptions ($$) {
my ($exp, $exceptions) = @_;
my @res;
foreach my $e (keys %$exceptions) {
push(@res, @{$exceptions->{$e}})
if $e eq '*' || $e eq $exp || $exp =~ /^$e-/;
}
return \@res;
}
########################################################################
#
# Handle the just list case
......@@ -427,12 +501,18 @@ sub wait_exp () {
unless (($exit & STATUS_MASK) == STATUS_SWAPPEDIN) {
$avail_in_parallel += $exps{$exp}{numnodes};
} else {
# the experient failed to swap out, since the nodes are still in
# use substract that number from the soft and hard limits
$soft_limit -= $exps{$exp}{numnodes} unless $soft_limit == LARGE_NUM;
$hard_limit -= $exps{$exp}{numnodes} unless $hard_limit == LARGE_NUM;
print " only $hard_limit nodes now available for use\n"
unless $hard_limit == LARGE_NUM;
# the experient failed to swap out
if ($halt_failed_swapouts) {
print " will not start any new experiments\n";
$quit = 1;
} else {
# since the nodes are still in use substract that number from
# the soft and hard limits
$soft_limit -= $exps{$exp}{numnodes} unless $soft_limit == LARGE_NUM;
$hard_limit -= $exps{$exp}{numnodes} unless $hard_limit == LARGE_NUM;
print " only $hard_limit nodes now available for use\n"
unless $hard_limit == LARGE_NUM;
}
}
print "FINISHED TEST EXP $exp\n";
}
......@@ -504,6 +584,18 @@ foreach (@results) {
&print_results_summary(@$_);
}
sub failed_but_ignored($) {
my ($exp) = @_;
open FAILED, "$exps{$exp}{resultsdir}/failed-but-ignored";
my $res;
while (<FAILED>) {
chop;
$res .= "$_ ";
}
chop $res;
return $res;
}
sub failed_tests($) {
my ($exp) = @_;
open FAILED, "$exps{$exp}{resultsdir}/failed-tests";
......@@ -520,13 +612,23 @@ sub results_summary($$) {
my ($exp, $res) = @_;
return "passed"
if $res == (ERR_NONE | STATUS_NONE);
if $res == (ERR_NONE | STATUS_NONE);
return "passed but is still swapped in"
if $res == (ERR_NONE | STATUS_SWAPPEDIN);
if $res == (ERR_NONE | STATUS_SWAPPEDIN);
return "passed but still exists"
if $res == (ERR_NONE | STATUS_EXISTS);
if $res == (ERR_NONE | STATUS_EXISTS);
return "passed but requires cleanup"
if $res == (ERR_NONE | STATUS_CLEANUP);
if $res == (ERR_NONE | STATUS_CLEANUP);
return "passed with expected failures: ".failed_but_ignored($exp)
if $res == (ERR_EXPECTED | STATUS_NONE);
return "passed with expected failures but is still swapped in: ".failed_but_ignored($exp)
if $res == (ERR_EXPECTED | STATUS_SWAPPEDIN);
return "passed with expected failures but still exists: ".failed_but_ignored($exp)
if $res == (ERR_EXPECTED | STATUS_EXISTS);
return "passed with expected failures but requires cleanup: ".failed_but_ignored($exp)
if $res == (ERR_EXPECTED | STATUS_CLEANUP);
return "had failed tests: ".failed_tests($exp)
if $res == (ERR_FAILED | STATUS_NONE);
return "is still swapped in and had failed tests: ".failed_tests($exp)
......@@ -535,22 +637,24 @@ sub results_summary($$) {
if $res == (ERR_FAILED | STATUS_EXISTS);
return "requires cleanup and had failed tests: ".failed_tests($exp)
if $res == (ERR_FAILED | STATUS_CLEANUP);
return "had one of more fatal errors"
if $res == (ERR_FATAL | STATUS_NONE);
if $res == (ERR_FATAL | STATUS_NONE);
return "had one of more fatal errors and is still swapped in"
if $res == (ERR_FATAL | STATUS_SWAPPEDIN);
if $res == (ERR_FATAL | STATUS_SWAPPEDIN);
return "had one of more fatal errors and still exists"
if $res == (ERR_FATAL | STATUS_EXISTS);
if $res == (ERR_FATAL | STATUS_EXISTS);
return "had one of more fatal errors and requires cleanup"
if $res == (ERR_FATAL | STATUS_CLEANUP);
if $res == (ERR_FATAL | STATUS_CLEANUP);
return "was interrupted"
if $res == (ERR_INT | STATUS_NONE);
if $res == (ERR_INT | STATUS_NONE);
return "was interrupted and is still swapped in"
if $res == (ERR_INT | STATUS_SWAPPEDIN);
if $res == (ERR_INT | STATUS_SWAPPEDIN);
return "was interrupted and still exists"
if $res == (ERR_INT | STATUS_EXISTS);
if $res == (ERR_INT | STATUS_EXISTS);
return "was interrupted and requires cleanup"
if $res == (ERR_INT | STATUS_CLEANUP);
if $res == (ERR_INT | STATUS_CLEANUP);
return "exited with unknown code: $res";
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment