Commit 63cfbd3a authored by David Johnson's avatar David Johnson

Attempt to fix a problem in which the window size (max plab nodes being

tested at once) gets negative and too many processes get forked.
parent 419ff684
......@@ -84,7 +84,7 @@ sub checknextnode($) {
# Nothinig to check!
if (!$pnode) {
return;
return 1;
}
# Grab the vnode for this pnode (service sliver vnode)
......@@ -92,7 +92,7 @@ sub checknextnode($) {
if (!defined($vnode)) {
print "Could not find vnode associated with $pnode!\n";
return;
return 1;
}
print "Pool: $self->{'NAME'}: Testing node $pnode->{'name'} at ".
......@@ -110,7 +110,7 @@ sub checknextnode($) {
$self->{'PENDING'}->{$pnode->{'name'}} = $pnode;
$self->{'CHPID2POOL'}->{$chpid} = $self;
return
return 0;
}
# Worker process.
......@@ -214,7 +214,7 @@ sub processchild($$$) {
if (!defined($pnode)) {
print "Pool: $self->{'NAME'}: $chpid not found in pending list!\n";
return 1;
return 0;
}
# Setup log entry prefix
......
......@@ -101,7 +101,7 @@ sub checknextnode($) {
# Nothing to check!
if (!$pnode) {
return;
return 1;
}
# Grab a new sliver to test with
......@@ -113,14 +113,14 @@ sub checknextnode($) {
'nodeid' => $pnode->{'name'});
if (Node::CreateVnodes(\@vnodes, \%options)) {
print "Failed to allocate vnode for $pnode->{'name'}!\n";
return;
return 1;
}
my $vnode = $vnodes[0];
if (!defined($vnode)) {
print "Could not create vnode associated with $pnode!\n";
return;
return 1;
}
print "Pool: $self->{'NAME'}: Testing node $pnode->{'name'} at ".
......@@ -138,7 +138,7 @@ sub checknextnode($) {
$self->{'PENDING'}->{$pnode->{'name'}} = $pnode;
$self->{'CHPID2POOL'}->{$chpid} = $self;
return;
return 0;
}
# Worker process.
......@@ -242,7 +242,7 @@ sub processchild($$$) {
if (!defined($pnode)) {
print "Pool: $self->{'NAME'}: $chpid not found in pending list!\n";
return 1;
return 0;
}
# Setup log entry prefix
......
......@@ -219,8 +219,11 @@ while (1) {
foreach my $pool (@allpools) {
# if pool still has nodes to test, get them going.
if ($pool->getnextchecktime() <= $now) {
$pool->checknextnode();
$windowsize++;
# Only increment the window if we successfully launched a
# process.
if (!$pool->checknextnode()) {
$windowsize++;
}
}
}
}
......@@ -256,6 +259,14 @@ while (1) {
}
}
# This is the best place to get an idea of running plabnode procs.
my @procs = `ps axwww | grep 'emulab-ops plab-' | wc -l`;
my $pcount = 0;
if (scalar(@procs) > 0) {
$procs[0] =~ /(\d+)/;
$pcount = $procs[0];
}
# Look for expired processes. Calling checkexpiration on a pool
# has the side effect of checking for ISUP (or ISUP expiration) for
# any nodes pending thusly in the pool. The return value is the
......@@ -266,6 +277,9 @@ while (1) {
$windowsize -= $numfinished;
}
# Log diff between believed window size, and "actual"
print "winsize = $windowsize / runprocs = $pcount\n";
# We may have just fired off a bunch of kills, so chill for a bit to
# let things quiesce.
sleep($CHILLTIME);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment