Commit bbccd21a authored by Kirk Webb's avatar Kirk Webb

More plab updates:

* created "-w" vnode_setup option that specifies how long to wait (per-vnode)
  for setup to complete before giving up.
* added sitevars for plab batch parallelism size and vnode setup timeout
* modified os_setup to use above sitevars when invoking vnode_setup for an
  experiment containing plab vnodes.
parent 7fbc4188
......@@ -32,3 +32,5 @@ INSERT INTO sitevariables VALUES ('watchdog/isalive/vnode',NULL,'10','Interval i
INSERT INTO sitevariables VALUES ('watchdog/isalive/plab',NULL,'10','Interval in minutes between planetlab node status reports (0==never report)');
INSERT INTO sitevariables VALUES ('watchdog/isalive/wa',NULL,'1','Interval in minutes between widearea node status reports (0==never report)');
INSERT INTO sitevariables VALUES ('watchdog/isalive/dead_time',NULL,'120','Time, in minutes, after which to consider a node dead if it has not checked in via tha watchdog');
INSERT INTO sitevariables VALUES ('plab/setup/vnode_batch_size',NULL,'40','Number of plab nodes to setup simultaneously');
INSERT INTO sitevariables VALUES ('plab/setup/vnode_wait_time',NULL,'960','Number of seconds to wait for a plab node to setup');
......@@ -749,9 +749,19 @@ elsif ($failed && @vnodelist) {
"failures!\n";
}
elsif (@vnodelist) {
my $vnode_setup_args = ""; # add any generic args here.
print "Setting up virtual testbed nodes ...\n";
system("$vnode_setup $pid $eid");
# If there are any plab vnodes, we have to adjust batching and timeouts
# accordingly.
if (grep($_, values(%plabvnodes))) {
my $plabnumbatch = TBGetSiteVar("plab/setup/vnode_batch_size");
my $plabwait = TBGetSiteVar("plab/setup/vnode_wait_time");
$vnode_setup_args .= " -n $plabnumbatch -w $plabwait ";
}
system("$vnode_setup $vnode_setup_args $pid $eid");
if ($?) {
die_noretry("*** $0:\n".
" Vnode setup failed!");
......
......@@ -18,10 +18,10 @@ use Getopt::Std;
#
sub usage()
{
print STDOUT "Usage: vnode_setup [-f] [-k] [-n <number>] <pid> <eid> [node ...]\n";
print STDOUT "Usage: vnode_setup [-f] [-k] [-n <numbatch>] [-w <wait_time>] <pid> <eid> [node ...]\n";
exit(-1);
}
my $optlist = "fdkn:";
my $optlist = "fdkn:w:";
#
# We don't want to run this script unless its the real version.
......@@ -47,10 +47,9 @@ my $force = 0;
my $failed = 0;
my $killmode = 0;
my $numbatch = 10;
my $childwait = 120;
my $dbuid;
my $CHILD_TIMEOUT = 960; # 15 + 1 mins slop - plab needs a LOOOONNNGG TIME....
#
# Load the Testbed support stuff.
#
......@@ -93,6 +92,14 @@ if (defined($options{"n"})) {
die ("*** Bad data in numbatch: $options{'n'}");
}
}
if (defined($options{"w"})) {
if ($options{"w"} =~ /^(\d+)$/) {
$childwait = $1;
}
else {
die ("*** Bad data in wait_time: $options{'w'}");
}
}
my $pid = shift(@ARGV);
my $eid = shift(@ARGV);
......@@ -273,6 +280,8 @@ foreach my $node (@nodes) {
# When setting up a vnode, force its event state into SHUTDOWN since
# no telling what its initial state is.
#
# XXX: Don't we always want to set this?
#
if ($mode eq "teardown" || $mode eq "reboot") {
TBSetNodeEventState($node, TBDB_NODESTATE_SHUTDOWN);
}
......@@ -287,7 +296,8 @@ foreach my $node (@nodes) {
my $children = 0;
my %child_vnodes = ();
print "Running at parallelization: $numbatch\n";
print "vnode_setup running at parallelization: $numbatch ".
"wait_time: $childwait\n";
while (1) {
# Space out the invocation of child processes a little.
......@@ -334,6 +344,10 @@ while (1) {
$UID = 0;
if ($plab && $mode eq "setup") {
# Make sure vnode is in the proper state before trying to
# bring it up.
# XXX: do this for all vnodes (see above)?
TBSetNodeEventState($vnode, TBDB_NODESTATE_SHUTDOWN);
if (TBForkCmd("$TB/sbin/plabnode ".
($force ? "-f" : "").
" alloc $pid $eid $vnode", 1)) {
......@@ -397,7 +411,7 @@ while (1) {
# back around the loop
#
my $now = time();
my $waittime = ($oldest + $CHILD_TIMEOUT) - time();
my $waittime = ($oldest + $childwait) - time();
#
# Kill of the oldest if he gets too old while we're waiting
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment