Commit 22f04bc1 authored by Leigh B Stoller's avatar Leigh B Stoller

Better handling of determiming forward progress and retrying, as for

when no nodes can be allocated, but there are nodes available and the
experiment would map.
parent 03b5e6a4
......@@ -71,7 +71,7 @@ my $usecurrent = 0; # Only with regression mode, use current solution.
my $quiet = 0;
my $clear = 0;
my $warnings = 0;
my $maxrun = 3; # Maximum number of times we run assign.
my $maxrun = 3; # Maximum number of times we run assign.
my $gotlock = 0;
my $userspec = 0;
my $randomize = 0;
......@@ -301,6 +301,7 @@ sub AssignLoop()
{
my $currentrun = 1;
my $canceled = 0;
my $noprogress = 0;
my $tried_precheck = 0;
# Admission control counts
my %admission_control = ();
......@@ -354,7 +355,7 @@ sub AssignLoop()
# RunAssign returns 0 if successful.
# returns -1 if failure, but assign says to stop trying.
# returns 1 if failure, but assign says to try again.
# returns 2 if we made some forward progress.
# returns 2 if assign succeeds, but nodes were allocated
#
my $retval = RunAssign($precheck, $prefix);
......@@ -405,11 +406,14 @@ sub AssignLoop()
$tried_precheck = 1;
}
if ($currentrun >= $maxrun && $retval != 2) {
# If we made progress, keep trying until no progress twice in a row.
if ($currentrun >= $maxrun && $noprogress) {
fatal({type => 'primary', severity => SEV_ERROR,
error => ['reached_assign_run_limit']},
"Reached run limit. Giving up.");
}
# See if we made progress or not. We try one extra time. See above.
$noprogress = ($retval == 2);
chat("Waiting 5 seconds and trying again...\n");
sleep(5);
......@@ -694,8 +698,8 @@ sub RunAssign($$)
my $retval = $vtop->AllocNodes();
if ($retval != 0) {
if ($retval < 1) {
print("Could not allocate any nodes.\n");
return -1;
# Could not allocate any nodes.
return 2;
}
return 1;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment