Commit 852cc93c authored by Leigh B Stoller's avatar Leigh B Stoller
Browse files

More work on preventing linktest from looping; it hangs on the backend

cluster more then you think it would.
parent 33628084
......@@ -2148,10 +2148,12 @@ sub DoLinktest()
exit($status);
}
#
# Loop, asking each cluster for the linktest status,
# Loop, asking each cluster for the linktest status.
#
my $tlimit = 3600;
my $errors = 0;
my %running = map { $_->aggregate_urn() => $_ } @agglist;
while (keys(%running)) {
while ($tlimit > 0 && keys(%running)) {
foreach my $sliver (values(%running)) {
my $response = $sliver->RunLinktest("status");
if (!defined($response)) {
......@@ -2159,8 +2161,18 @@ sub DoLinktest()
next;
}
if ($response->code() != GENIRESPONSE_SUCCESS) {
if ($response->code() == GENIRESPONSE_SERVER_UNAVAILABLE ||
$response->code() == GENIRESPONSE_BUSY) {
next;
}
print STDERR "Could not get linktest status for sliver: ".
$response->output() . "\n";
delete($running{$sliver->aggregate_urn()});
# If the sliver was deleted during linktest, we do not
# consider it an error.
if ($response->code() != GENIRESPONSE_SEARCHFAILED) {
$errors++;
}
next;
}
my $blob = $response->value();
......@@ -2168,9 +2180,23 @@ sub DoLinktest()
delete($running{$sliver->aggregate_urn()});
}
}
$tlimit -= 5;
sleep(5);
}
if ($tlimit <= 0) {
print STDERR "Linktest run timed out!\n";
# Lets generate email for now, still debugging.
$errors++;
}
$instance->SetStatus($old_status);
if ($errors) {
SENDMAIL($TBOPS,
"Error running linktest",
"Error running linktest on $instance.\n",
$TBOPS, undef, $logfile);
}
unlink($logfile);
exit(0);
bad:
$instance->SetStatus($old_status);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment