Commit 8a7e6259 authored by Kevin Tew's avatar Kevin Tew

Backoff and function documentation

parent debd552d
ImageTest
Parameters
OldTests
DOCS TODO
retry example
backoff example
TODO
TIMEOUT of XMLRPC Calls
......@@ -18,11 +22,10 @@ STILL MESSY
EXPAND CURRENT IMPLEMENTATION
event subsystem
parallel support (custom test harness)
Test::Builder support
LATER
Client.pm duplicate code elimination - Maybe this would make the code too unreadable
TestSuite::Experiment::Macros should be a monadic language like JQuery
test groupings
Large External Tars and Resources for experiements
buildup, teardown using Test::Class
......
......@@ -59,38 +59,53 @@ sub runtests {
$concurrent_node_count_usage ||= $TBConfig::concurrent_node_usage;
#prerun step
my $result = TestBed::ForkFramework::MaxWorkersScheduler::work($concurrent_pre_runs, sub { shift->prep }, $Executors);
my $result = TestBed::ForkFramework::ForEach::max_work($concurrent_pre_runs, sub { shift->prep }, $Executors);
if ($result->has_errors) {
sayd($result->errors);
warn 'TestBed::ParallelRunner::runtests died during test prep';
}
#create schedule step
my @schedule;
my $workscheduler = TestBed::ForkFramework::WeightedScheduler->new(
items => $Executors,
proc => &tap_wrapper,
maxnodes => $concurrent_node_count_usage,
);
#add taskss to scheduler step
my $total_test_count = 0;
for (@{$result->successes}) {
my $item_id = $_->itemid;
my $itemId = $_->itemid;
my $executor = $Executors->[$itemId];
my $maximum_nodes = $_->result->{'maximum_nodes'};
my $eid = $Executors->[$item_id]->e->eid;
#say "$eid $item_id $maximum_nodes";
my $eid = $executor->e->eid;
if ($maximum_nodes > $concurrent_node_count_usage) {
warn "$eid requires upto $maximum_nodes nodes, only $concurrent_node_count_usage concurrent nodes permitted\n$eid will not be run";
}
else {
push @schedule, [ +$maximum_nodes, +$item_id ];
$workscheduler->add_task($itemId, $maximum_nodes);
$total_test_count += $executor->test_count;
}
}
@schedule = sort { $a->[0] <=> $b->[0] } @schedule;
#count tests step
my $test_count = 0;
map { $test_count += $Executors->[$_->[1]]->test_count } @schedule;
USE_TESTBULDER_PREAMBLE: {
reset_test_builder($total_test_count, no_numbers => 1);
}
#run tests
reset_test_builder($test_count, no_numbers => 1);
$result = TestBed::ForkFramework::RateScheduler::work($concurrent_node_count_usage, \&tap_wrapper, \@schedule, $Executors);
set_test_builder_to_end_state($test_count);
$result = $workscheduler->run;
USE_TESTBULDER_POSTAMBLE: {
$total_test_count = 0;
for (@{$result->successes}) {
my $item_id = $_->itemid;
my $executor = $Executors->[$item_id];
$total_test_count += $executor->test_count;
}
set_test_builder_to_end_state($total_test_count);
}
if ($result->has_errors) {
sayd($result->errors);
die 'TestBed::ParallelRunner::runtests died during test execution';
......@@ -167,10 +182,14 @@ TestBed::ParallelRunner
=over 4
=item C<< add_executor >>
=item C<< build_executor >>
helper function called by rege.
creates a TestBed::ParallelRunner::Execu:or job and pushes it onto @$Executors
creates a TestBed::ParallelRunner::Executor job
=item C<< add_executor($executor) >>
pushes $executor onto @$Executors
=item C<< runtests >>
......
......@@ -9,3 +9,17 @@ our @EXPORT = qw(RETURN_AND_REPORT);
use constant RETURN_AND_REPORT => 4096;
1;
=head1 NAME
TestBed::ParallelRunner::ErrorConstants
contains RETURN_AND_REPORT error constant;
=over 4
=back
=cut
1;
......@@ -14,6 +14,7 @@ sub handleResult {
$s->executor($executor);
$s->scheduler($scheduler);
$s->result($result);
if ($result->is_error) {
my $error = $result->error;
if ( $error->isa ( 'TestBed::ParallelRunner::Executor::SwapinError')) { return $s->swapin_error ( @_); }
......@@ -38,6 +39,14 @@ sub xmlrpc_error_cause {
return;
}
sub is_retry_cause {
my $s = shift;
my $cause = $s->xmlrpc_error_cause;
my @retry_causes = qw(temp internal software hardware canceled unknown);
if ( grep { /$$cause/ } @retry_causes) { return 1; }
return 0;
}
sub swapin_error { return RETURN_AND_REPORT; }
sub run_error { return RETURN_AND_REPORT; }
sub swapout_error { return RETURN_AND_REPORT; }
......@@ -52,16 +61,73 @@ extends 'TestBed::ParallelRunner::ErrorStrategy';
sub swapin_error {
my ($s, $executor, $scheduler, $result) = @_;
if ($s->is_retry_cause) { return $scheduler->retry($result); }
if ($s->is_retry_cause) {
warn "Retrying";# . $executor->e->eid;
return $s->scheduler->retry($result);
}
else { return RETURN_AND_REPORT; }
}
sub is_retry_cause {
my $s = shift;
my $cause = $s->xmlrpc_error_cause;
my @retry_causes = qw(temp internal software hardware canceled unknown);
if ( grep { /$$cause/ } @retry_causes) { return 1; }
return 0;
package TestBed::ParallelRunner::BackoffStrategy;
use SemiModern::Perl;
use Mouse;
use TestBed::ParallelRunner::ErrorConstants;
has 'starttime' => (is => 'rw', default => sub { time; } );
has 'retries' => (is => 'rw', default => 0 );
has 'maxtime' => (is => 'rw', default => 5 * 60 * 60 );
has 'maxretries' => (is => 'rw', default => 3 );
has 'waittime' => (is => 'rw', default => 10 * 60 );
has 'exponent' => (is => 'rw', default => 0.0 );
extends 'TestBed::ParallelRunner::ErrorStrategy';
sub build {
shift;
my $s = TestBed::ParallelRunner::BackoffStrategy->new;
$s->parse(shift);
$s;
}
sub parse {
my ($s, $args) = @_;
my @args = split(/:/, $args);
my $val;
$val = shift @args; $s->waittime($val) if $val;
$val = shift @args; $s->maxretries($val) if $val;
$val = shift @args; $s->exponent($val) if $val;
$val = shift @args; $s->maxtime($val) if $val;
}
sub incr_retries { shift->{retries}++; }
sub backoff {
my ($s, $result) = @_;
my $maxretries = $s->maxretries;
if ($s->retries >= $maxretries) {
warn "Max retries $maxretries reached, terminating experiment";
return RETURN_AND_REPORT;
}
my $timeout = $s->starttime + $s->maxtime;
if (time >= $timeout) {
warn "Max timeout $timeout reached, terminating experiment";
return RETURN_AND_REPORT;
}
$s->incr_retries;
$s->waittime($s->waittime * (2 ** $s->exponent));
my $next_time = time + $s->waittime;
warn "Backing off " . ($next_time - time) . " seconds.";
return $s->scheduler->schedule_at($result, $next_time);
}
sub swapin_error {
my ($s, $executor, $scheduler, $result) = @_;
if ($s->is_retry_cause) {
return $s->backoff($result);
}
else { return RETURN_AND_REPORT; }
}
=head1 NAME
......@@ -76,13 +142,45 @@ handle parallel run errors;
dispatch experiment errors to the right handler
=item C<< swapin_error($error) >>
=item C<< run_error($error) >>
=item C<< swapout_error($error) >>
=item C<< end_error($error) >>
=item C<< $es->swapin_error($error) >>
=item C<< $es->run_error($error) >>
=item C<< $es->swapout_error($error) >>
=item C<< $es->end_error($error) >>
=item C<< $es->xmlrpc_error_cause($error) >>
parses the xmlrpc error cause out of the original embedded xmlrpc error
=item C<< es->is_retry_cause >>
return true if the error is retriable
=back
=head1 NAME
TestBed::ParallelRunner::BackoffStrategy
implement parallel backoff
=over 4
=item C<< $bs->build( "waittime:maxretires:exponent:maxtime" ) >>
builds backoff strategy from arg string
=item C<< $bs->parse( "waittime:maxretires:exponent:maxtime" ) >>
parses arg string and sets object members
=item C<< $bs->incr_retries >>
incrementes attempted retries
=item C<< $bs->incr_retries >>
schedules task for exectution after backoff time
=cut
1;
......@@ -44,25 +44,37 @@ has 'test_count' => ( isa => 'Any', is => 'rw');
has 'error_strategy' => ( is => 'rw', lazy => 1, default => sub { TestBed::ParallelRunner::ErrorStrategy->new; } );
has 'pre_result_handler' => ( isa => 'CodeRef', is => 'rw');
sub build {
my ($e, $ns, $sub, $test_count, $desc) = (shift, shift, shift, shift, shift);
sub parse_options {
my %options = @_;
if (defined (delete $options{retry})) {
$options{error_strategy} = TestBed::ParallelRunner::ErrorRetryStrategy->new;
}
if (defined (my $params = delete $options{backoff})) {
$options{error_strategy} = TestBed::ParallelRunner::BackoffStrategy->build($params);
}
if (defined (my $strategy = delete $options{strategy})) {
$options{error_strategy} = $strategy;
}
%options;
}
sub buildt { shift; TestBed::ParallelRunner::Executor->new( parse_options(@_)); }
sub build {
shift;
my ($e, $ns, $sub, $test_count, $desc) = (shift, shift, shift, shift, shift);
return TestBed::ParallelRunner::Executor->new(
'e' => $e,
'ns' => $ns,
'proc' => $sub,
'test_count' => $test_count,
'desc' => $desc,
%options
parse_options(@_)
);
}
......@@ -128,6 +140,15 @@ handles the result using a error strategy
swaps in the experiment and runs the specified test
it kills the experiment unconditionaly after the test returns
=item C<< $prt->parse_options >>
parses retry =>1, backoff => "\d+:\d+:\d+:\d+", strategy => '....' options
and build the appropriate error_strategy object
=item C<< $prt->buildt >>
builds a naked TestBed::ParallelRunner::Executor for testing purposes
=back
=cut
......
......@@ -140,6 +140,10 @@ creates a new experiment with pid, gid, and eid
registers experiement with parallel test running engine
=item C<pr_e($e, $ns_contents, &test_sub, $test_count, $desc, %options)>
registers experiement with parallel test running engine
=item C<runtests($concurrent_pre_runs, $concurrent_node_count_usage) >
allows a maximum of $concurrent_pre_runs during parallel execution
......
......@@ -212,7 +212,10 @@ sub linkdown {
TestBed::Wrap::tevc::tevc($e->pid, $e->eid, "now $link down");
}
=item C<< $e->pretty_list() >>
prints a list of all experiments and there status
=cut
sub pretty_list {
use TestBed::XMLRPC::Client::Pretty;
pretty_listexp(shift->getlist_full);
......
......@@ -30,9 +30,11 @@ executes hostname, sudo ls, mount via ssh on the remote node
sub single_node_tests {
my ($s) = @_;
my $ssh = $s->ssh();
$ssh->cmdsuccess("hostname");
$ssh->cmdsuccess("sudo id");
$ssh->cmdsuccess("mount");
my $eid = $s->experiment->eid;
my $name = $s->name;
$ssh->cmdmatch("hostname", qr/$name/, "$eid $name hostname died");
$ssh->cmdmatch("sudo id", qr/uid=0\(root\)/, "$eid $name sudo died");
$ssh->cmdmatch("mount", qr{/proj/}, "$eid $name mountdied");
}
=item C<< $n->ssh >>
......
......@@ -271,6 +271,14 @@ hook for generating a random $eid if desired
modifies the current experiment with the give $ns file
=item C<< $e->modify_ns_wait($ns) >>
modifies the current experiment with the give $ns file and waits for the modifications to complete
=item C<< $e->fqnodenames >>
returns a list of fully qualified node names in the experiement
=item C<< $e->noemail >>
B<INTERNAL>: generates the noemail attribute for xmlrpc calls if so configured in TBConfig
......
......@@ -23,7 +23,7 @@ sub instance {
}
sub wrapped_ssh {
my ($invocant, $user, $cmd, $checker) = @_;
my ($invocant, $user, $cmd, $checker, $diemessage) = @_;
my $ssh;
if (ref $invocant) { $ssh = $invocant }
else {
......@@ -36,7 +36,7 @@ sub wrapped_ssh {
}
if (defined $checker) {
&$checker(@results) || die "ssh checker of cmd $cmd failed";
&$checker(@results) || die ($diemessage || "ssh checker of cmd $cmd failed");
}
($results[2], @results);
}
......@@ -64,6 +64,11 @@ sub cmdsuccess {
return wrapped_ssh($host, $TBConfig::EMULAB_USER, $cmd, sub { $_[2] == 0; } );
}
sub cmdmatch {
my ($host, $cmd, $regex, $diemessage) = @_;
return wrapped_ssh($host, $TBConfig::EMULAB_USER, $cmd, sub { $_[0] =~ $regex; }, $diemessage );
}
sub cmdsuccessdump {
my ($host, $cmd) = @_;
return wrapped_ssh($host, $TBConfig::EMULAB_USER, $cmd, sub { print Dumper(\@_); $_[2] == 0; } );
......@@ -124,6 +129,10 @@ returns the ssh result code of executing $cmd as $TBConfig::EMULAB_USER
returns the ssh result code of executing $cmd as $TBConfig::EMULAB_USER and dumps the ssh stdout, stderr, resultcode
=item C<cmdmatch($host, $cmd, $regex, $diemessage)>
executes $cmd as $TBConfig::EMULAB_USER and dies with diemessage if stdout doesn't match $regex
=back
=cut
......
......@@ -48,10 +48,14 @@ Tools::TBSSH
B<LOWLEVEL SUB> execute $cmd on $host as $user by wrapping cmdline ssh
=item C<< $ssh->scp_worker(@files) >>
=item C<< $ssh->scp_worker(@files) >>
B<LOWLEVEL SUB> execute $scp with $files as arguments
=item C<< $ssh->saydebug() >>
B<LOWLEVEL SUB> prints out ssh command line
=back
=cut
......
......@@ -5,26 +5,34 @@ use TestBed::ParallelRunner::Executor;
use RPC::XML;
use Data::Dumper;
use Test::Exception;
use Test::More tests => 3;
use Test::More tests => 4;
my $results = TestBed::ForkFramework::MaxWorkersScheduler::work(2, sub { my $d = `date`; chomp($d); $_[0] . " $d " . $$; }, ['K1', 'K2', 'K3', 'K4'] );
ok($results->has_errors == 0 && @{ $results->successes } == 4, 'ForkFramework::MaxWorkersScheduler::work');
my $date_id_sub = sub { my $d = `date`; chomp($d); $_[0] . " $d " . $$; };
my $results = TestBed::ForkFramework::ForEach::max_work(2, $date_id_sub, ['K1', 'K2', 'K3', 'K4'] );
ok($results->has_errors == 0 && @{ $results->successes } == 4, 'ForkFramework::ForEach::max_work');
#say Dumper($results);
$results = TestBed::ForkFramework::RateScheduler::work(4, sub { my $d = `date`; chomp($d); $_[0] . " $d " . $$; },
[[1, 1], [1, 0], [2, 2], [3, 3]],
['K1', 'K2', 'K3', 'K4'] );
ok($results->has_errors == 0 && @{ $results->successes } == 4, 'ForkFramework::RateScheduler::work');
$results = TestBed::ForkFramework::WeightedScheduler::work(4, $date_id_sub, [['K1', 1], ['K2', 1], ['K3', 2], ['K4', 3] ] );
ok($results->has_errors == 0 && @{ $results->successes } == 4, 'ForkFramework::WeightedScheduler::work');
#say Dumper($results);
use TestBed::ParallelRunner::ErrorStrategy;
$results = TestBed::ForkFramework::RateScheduler::work(4,
$results = TestBed::ForkFramework::WeightedScheduler::work(1,
sub { die TestBed::ParallelRunner::Executor::SwapinError->new(
original => RPC::XML::struct->new( { value => { cause => 'temp' } })); },
[[1, 0]],
[ TestBed::ParallelRunner::Executor->new(test_count => 1,
error_strategy => TestBed::ParallelRunner::ErrorRetryStrategy->new) ]
[[ TestBed::ParallelRunner::Executor->buildt(test_count => 1, retry => 1), 1]]
);
ok($results->has_errors == 1 && @{ $results->errors } == 1, 'ForkFramework::WeightedScheduler::work, retry');
#say Dumper($results);
my $launchtime = time;
$results = TestBed::ForkFramework::WeightedScheduler::work(4,
sub {
die TestBed::ParallelRunner::Executor::SwapinError->new(
original => RPC::XML::struct->new( { value => { cause => 'temp' } })) if (time - $launchtime < 6); 1;},
[[ TestBed::ParallelRunner::Executor->buildt(test_count => 1, backoff => "2:10:0"), 1]]
);
ok($results->has_errors == 1 && @{ $results->errors} == 1, 'ForkFramework::RateScheduler::work, retry');
ok($results->has_errors == 0 && @{ $results->successes } == 1, 'ForkFramework::WeightedScheduler::work, backoff');
#say Dumper($results);
......@@ -14,5 +14,6 @@ set node1 [$ns node]
$ns run
NSEND
ok(e('sn1')->startrunkill($ns, sub { shift->single_node_tests }), 'single_node_tests');
#ok(e('sn1')->startrunkill($ns, sub { shift->single_node_tests }), 'single_node_tests');
ok(e('sn1')->startrun($ns, sub { shift->single_node_tests }), 'single_node_tests');
#ok(e('sn1')->single_node_tests, 'single_node_tests');
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment