vnode_setup.in 19.1 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh Stoller's avatar
Leigh Stoller committed
2 3

#
4
# Copyright (c) 2000-2014, 2018 University of Utah and the Flux Group.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
Leigh Stoller's avatar
Leigh Stoller committed
24
#
25 26
use English;
use Getopt::Std;
27
use POSIX ":sys_wait_h";
28 29 30 31 32 33 34 35 36 37 38

#
# Set up the vnode state on a virtual (multiplexed) node.
#
# XXX - This script should only be run from os_setup!
#
# The output is all jumbled together since the updates are issued in parallel.
# Might be a pain when debugging. 
# 
sub usage()
{
39
    print STDOUT "Usage: vnode_setup [-m] [-q] [-f] [-k] [-j] [-p] [-n <numbatch>] [-w <wait_time>] <pid> <eid> [node ...]\n";
40 41
    exit(-1);
}
42
my  $optlist = "fdkjpn:w:mqh";
43 44 45 46 47 48 49 50 51 52

#
# We don't want to run this script unless its the real version.
# That is, it must be setuid root. 
#
if ($EUID != 0) {
    die("*** $0:\n".
	"    Must be root! Maybe its a development version?\n");
}

53 54 55 56 57 58 59
#
# Configure variables
#
my $TB		= "@prefix@";
my $TESTMODE    = @TESTMODE@;
my $TBOPS       = "@TBOPSEMAIL@";
my $TBLOGS      = "@TBLOGSEMAIL@";
60
my $CLIENT_BIN  = "@CLIENT_BINDIR@";
61
my $PGENISUPPORT= @PROTOGENI_SUPPORT@;
62

63
my $SAVEUID     = $UID;
64
my $ssh		= "$TB/bin/sshtb -n";
65
my $debug       = 1;
66 67 68
my $force       = 0;
my $failed      = 0;
my $killmode    = 0;
69
my $haltmode    = 0;
70
my $jailonly    = 0;
71 72
my $sendemail   = 0;
my $quiet       = 0;
73
my $plabonly    = 0;
Kirk Webb's avatar
Kirk Webb committed
74
my $numbatch    = 10;
75
my $childwait   = 3000;
76 77 78 79 80 81 82 83
my $dbuid;

#
# Load the Testbed support stuff. 
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
84 85 86
use libtblog;
use Experiment;
use Node;
87
use User;
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
$| = 1; 

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
104
if (@ARGV < 2) {
105 106 107 108 109
    usage();
}
if (defined($options{"f"})) {
    $force = 1;
}
110 111 112 113 114 115
if (defined($options{"m"})) {
    $sendemail = 1;
}
if (defined($options{"q"})) {
    $quiet = 1;
}
116 117 118 119 120 121
if (defined($options{"d"})) {
    $debug = 1;
}
if (defined($options{"k"})) {
    $killmode = 1;
}
122 123 124
if (defined($options{"h"})) {
    $haltmode = 1;
}
125 126 127 128 129 130
if (defined($options{"j"})) {
    $jailonly = 1;
}
if (defined($options{"p"})) {
    $plabonly = 1;
}
Kirk Webb's avatar
Kirk Webb committed
131 132 133 134 135 136 137 138
if (defined($options{"n"})) {
    if ($options{"n"} =~ /^(\d+)$/) {
        $numbatch = $1;
    }
    else {
        die ("*** Bad data in numbatch: $options{'n'}");
    }
}
Kirk Webb's avatar
Kirk Webb committed
139 140 141 142 143 144 145 146
if (defined($options{"w"})) {
    if ($options{"w"} =~ /^(\d+)$/) {
        $childwait = $1;
    }
    else {
        die ("*** Bad data in wait_time: $options{'w'}");
    }
}
Kirk Webb's avatar
Kirk Webb committed
147

148 149
my $pid   = shift(@ARGV);
my $eid   = shift(@ARGV);
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166

#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w]+)$/) {
    $pid = $1;
}
else {
    die("*** Bad data in pid: $pid\n");
}	
if ($eid =~ /^([-\@\w]+)$/) {
    $eid = $1;
}
else {
    die("*** Bad data in eid: $eid\n");
}

167
if ($plabonly && $jailonly) {
168 169 170 171 172 173 174 175 176 177 178 179 180
    tbdie("*** '-j' and '-p' are mutually exclusive.");
}

#
# Verify user and get his DB uid and other info for later.
#
my $this_user = User->ThisUser();
if (!defined($this_user) && $UID) {
    tbdie("You ($UID) do not exist!");
}
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    tbdie("Could not locate object for experiment $pid/$eid");
181 182
}

183
#
184 185
# Verify permission to muck with this experiment. Note that this script
# is run as root from the plab monitor daemon.
186
#
187 188 189
if (defined($this_user) && !$this_user->IsAdmin() &&
    !$experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
    tbdie("You do not have permission to mess with $pid/$eid!");
190 191 192
}

#
193
# Get the list of nodes in this experiment.
194
#
195
my @nodes = $experiment->NodeList(1, 1);
196
if (! @nodes) {
Leigh Stoller's avatar
Leigh Stoller committed
197
    # Silent.
198
    exit(0);
199
}
200 201

# Nodes on the command line. Operate only on this set.
202 203 204 205 206 207 208 209 210 211 212 213 214 215
if (@ARGV) {
    my %fulllist = ();

    # Temporary hash list for searching.
    foreach my $node ( @nodes ) {
	$fulllist{$node} = 1;
    }
    @nodes = ();

    foreach my $node ( @ARGV ) {
	if ($node =~ /^([-\@\w]+)$/) {
	    $node = $1;

	    if (!defined($fulllist{$node})) {
216
		tbdie("Node $node is not allocated to $pid/$eid!");
217 218 219
	    }
	}
	else {
220
	    tbdie("Bad node name: $node.");
221 222 223 224
	}
	push(@nodes, $node);
    }
}
225
my $exptstate = $experiment->state();
226 227 228

# Just the vnodes mam.
foreach my $node (@nodes) {
229
    my $mode = ($killmode ? "teardown" : ($haltmode ? "halt" : "setup"));
230 231 232 233
    
    my $nodeobj = Node->Lookup($node);
    if (!defined($nodeobj)) {
	tbdie("Could not map $node to its object");
234
    }
235 236 237 238 239 240
    my $jailed     = $nodeobj->jailflag();
    my $plab       = $nodeobj->isplabdslice();
    my $remote     = $nodeobj->isremotenode();
    my $pnode      = $nodeobj->phys_nodeid();
    my $allocstate = $nodeobj->allocstate();
    my $geninode   = $nodeobj->isfednode();
241
    my $shared     = defined($nodeobj->sharing_mode());
242
    
243 244 245
    next
	if (!$nodeobj->isvirtnode());

246 247 248 249 250
    # fake nodes that are not rebootable, also skip. Maybe we just
    # skip all fakenodes, but not sure yet.
    next
	if ($nodeobj->isfakenode() && !$nodeobj->rebootable());

251 252
    # Special hack for SPP nodes. Need to generalize this.
    if ($shared && $nodeobj->type eq "sppvm") {
253
	if ($mode eq "teardown" || $mode eq "halt") {
254 255 256 257 258 259 260 261
	    $nodeobj->SetEventState(TBDB_NODESTATE_SHUTDOWN());
	}
	else {
	    $nodeobj->SetEventState(TBDB_NODESTATE_ISUP());
	}
	next;
    }

262
    if (($plabonly || $jailonly) and 
263
        !(($plabonly && $plab) || ($jailonly && (($jailed || $remote) 
264
						 && !($plab || $geninode))))) {
265 266
        next;
    }
267 268
    if (!defined($pnode)) {
	tbdie("No physical node for $node!");
269
    }
270

271 272 273
    #
    # On remote nodes, or when forcemode is on, always do the deed.
    # Otherwise, look at experiment state.
274 275
    #
    if (!$force) {
276 277 278 279
	if ($exptstate eq EXPTSTATE_SWAPPING) {
	    #
	    # When swapping, local vnodes go down with the physnode.
	    #
280
	    if (! ($remote || $shared)) {
281 282 283 284
		print "$node will $mode with local node $pnode.\n";
		next;
	    }
	    elsif ($allocstate eq TBDB_ALLOCSTATE_DOWN) {
285 286 287 288 289 290 291 292 293 294 295
		if ($plab) {
		    # Plab nodes need to be cleaned up.
		    print "$node failed to boot; changing to cleanup.\n";
		    $mode = "cleanup";
		}
		elsif (!$nodeobj->IsUp()) {
		    # Node can fail to boot, but can still wind up
		    # booting later, say by hand.
		    print "$node appears to be up; will $mode.\n";
		}
		else {
296 297 298
		    print "$node failed to boot; skipping $mode.\n";
		    next;
		}
299
	    }
300
	}
301 302
	elsif ($exptstate eq EXPTSTATE_ACTIVATING ||
	       $exptstate eq EXPTSTATE_MODIFY_RESWAP) {
303
	    #
304 305 306 307 308 309 310 311 312 313 314
	    # The allocstate determines if the vnode actually needs to
	    # be setup or torndown. Note that a failed experiment will
	    # cause a bunch of vnodes to be torndown, while in the
	    # ACTIVATING state. See os_setup and assign_wrapper; the
	    # idea is to avoid doing setup/teardown up vnodes on
	    # machines that are rebooting anyway, or that failed.
	    # Complicated by modify which could add/subtract a vnode on
	    # an existing machine, but not reboot the machine. Note that
	    # free (now unused) vnodes will land in RES_TEARDOWN. It is
	    # assumed that these booted okay, and need to be torndown,
	    # even though they are not RES_READY.
315
	    #
316
	    if (! ($remote || $shared)) {
317 318
		if ($killmode) {
		    if ($allocstate eq TBDB_ALLOCSTATE_DOWN) {
319 320 321 322
			print "$node failed to boot; skipping $mode.\n";
			next;
		    }
		    elsif ($allocstate eq TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
323 324 325
			print "$node never booted; skipping $mode.\n";
			next;
		    }
326 327 328 329
		    elsif ($allocstate eq TBDB_ALLOCSTATE_RES_READY()) {
			print "$node will $mode with local node $pnode.\n";
			next;
		    }
330
		}
331 332 333 334
		elsif ($allocstate eq TBDB_ALLOCSTATE_RES_REBOOT()) {
		    print "$node needs a reboot on $pnode\n";
		    $mode = "reboot";
		}
335
		elsif ($allocstate eq TBDB_ALLOCSTATE_RES_READY()) {
336
		    print "$node is already setting up on local node $pnode\n";
337 338 339
		    next;
		}
	    }
340 341
	    else {
		if ($killmode) {
342 343 344
		    if ($allocstate eq TBDB_ALLOCSTATE_DEAD) {
			# plab only. See below.
			print "$node failed to initialize; skipping $mode.\n";
345 346
			next;
		    }
347
		    elsif ($allocstate eq TBDB_ALLOCSTATE_DOWN) {
348 349 350 351 352 353 354 355 356 357 358 359
			if ($plab) {
			    # Plab nodes need to be cleaned up.
			    print "$node failed to boot; ".
				"changing to cleanup.\n";
			    $mode = "cleanup";
			}
			elsif (!$nodeobj->IsUp()) {
			    # Node can fail to boot, but can still wind up
			    # booting later, say by hand.
			    print "$node appears to be up; will $mode.\n";
			}
			else {
360 361 362 363
			    print "$node failed to boot; skipping $mode.\n";
			    next;
			}
		    }
364 365 366 367
		    elsif ($allocstate eq TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
			print "$node never booted; skipping $mode.\n";
			next;
		    }
368
		}
369
		elsif ($allocstate eq TBDB_ALLOCSTATE_RES_READY()) {
370
		    print "$node is already set up on $pnode\n";
371 372
		    next;
		}
373 374
		elsif ($allocstate eq TBDB_ALLOCSTATE_RES_INIT_DIRTY() ||
		       $allocstate eq TBDB_ALLOCSTATE_RES_REBOOT()) {
375
		    print "$node needs a reboot on $pnode\n";
376 377
		    $mode = "reboot";
		}
378
		elsif ($allocstate eq TBDB_ALLOCSTATE_RES_RECONFIG()) {
379
		    print "$node needs a reconfig on $pnode\n";
380 381 382 383 384
		    # We do not actually reconfig virtual nodes; just
		    # reboot them. Might reconfig someday, in which case
		    # this would move up into os_setup.
		    $mode = "reboot";
		}
385 386 387 388 389 390 391 392 393 394 395 396 397
		elsif ($plab && $allocstate eq TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
		    # This is a special case.  If we reuse one of the plab 
		    # nodes, but use a different vname in the topo, there will
		    # be an entry in the database and a slice will be reserved
		    # because it's never torn down.  However, we need to 
		    # skip plabnode alloc and go straight to vnodesetup.
		    #
		    # BUT, we only can do this if the sliver entry is already
		    # in the database!
		    # Also note that this could eventually cause problems
		    # if there is a mistaken sliver entry in the DB.

		    $res = DBQueryFatal(
398 399 400 401 402 403 404
			"select ps.slicename,psn.node_id".
			" from plab_slices as ps" . 
			" left join plab_slice_nodes as psn" . 
			"   on (ps.slicename=psn.slicename" . 
			"       and ps.plc_idx=psn.plc_idx)" . 
			" where ps.pid='$pid' and ps.eid='$eid'" . 
                        "   and psn.node_id='$node'");
405 406 407 408 409 410 411
		    if ($res->numrows == 1) {
			# node exists; change mode to resetup
			$mode = "resetup";
			print "Doing a resetup on '$node'\n";
		    }
		    
		}
412 413
	    }
	}
414 415 416 417 418 419
    }

    #
    # When setting up a vnode, force its event state into SHUTDOWN since
    # no telling what its initial state is. 
    # 
Kirk Webb's avatar
Kirk Webb committed
420 421
    # XXX: Don't we always want to set this?
    #
422
    if ($mode eq "teardown" || $mode eq "reboot" || $mode eq "halt") {
423
	$nodeobj->SetEventState(TBDB_NODESTATE_SHUTDOWN);
424
    }
425 426

    #
427 428
    # Put this into the list of calls we have to make in the next loop
    #
429
    push @vnodes, [$nodeobj, $mode];
430 431 432

}

433

434 435
my $children = 0;
my %child_vnodes = ();
436 437 438
print STDOUT "vnode_setup running at parallelization: $numbatch ". 
    "wait_time: $childwait\n"
    if (!$quiet);
439 440
while (1) {

Kirk Webb's avatar
Kirk Webb committed
441
    # Space out the invocation of child processes a little.
Kirk Webb's avatar
Kirk Webb committed
442
    sleep(1);
Kirk Webb's avatar
Kirk Webb committed
443

444 445 446 447 448 449 450
    #
    # We're done when we've hit the last vnode, and we've outlived all of our
    # children
    #
    if ((!@vnodes) && ($children == 0)) {
	last;
    }
451

452 453 454
    #
    # There are more free slots
    #
Kirk Webb's avatar
Kirk Webb committed
455
    if (($children < $numbatch) && @vnodes) {
456 457 458 459
	#
	# Look for a vnode that is not on a pnode we're already working on
	# 
	# XXX - do this!
460 461 462 463 464 465 466 467

	my ($nodeobj, $mode) = @{pop @vnodes};
	my $vnode    = $nodeobj->node_id();
	my $jailed   = $nodeobj->jailflag();
	my $plab     = $nodeobj->isplabdslice();
	my $remote   = $nodeobj->isremotenode();
	my $pnode    = $nodeobj->phys_nodeid();
	my $geni     = $nodeobj->isfednode();
468
    
469 470
	print STDOUT "Doing $mode of vnode $vnode on $pnode ...\n"
	    if (!$quiet);
471 472 473 474 475 476 477 478 479 480 481 482

	#
	# Run an ssh command in a child process, protected by an alarm to
	# ensure that the ssh is not hung up forever if the machine is in some
	# funky state.
	# 
	my $syspid = fork();

	if ($syspid) {
	    #
	    # Just keep track of it, we'll wait for it finish down below
	    #
483
	    $child_vnodes{$syspid} = [$nodeobj, $mode, time()];
484 485
	    $children++;
	} else {
486 487
	    TBdbfork();	# So we get the event system fork too ...
	    
488
            my $exval = 0;
489 490 491
	    # Must change our real UID to root so that ssh will work.
	    $UID = 0;

492
	    if ($mode eq "setup" && ($plab || !$jailed || $geni)) {
Kirk Webb's avatar
Kirk Webb committed
493 494 495
                # Make sure vnode is in the proper state before trying to
                # bring it up.
                # XXX: do this for all vnodes (see above)?
496 497 498 499
                $nodeobj->SetEventState(TBDB_NODESTATE_SHUTDOWN);
		if ($geni) {
		    $UID  = $SAVEUID;
		    $EUID = $UID;
500
#		    $exval = GeniEmulab->StartSlivers($experiment, [$nodeobj]);
501 502
		}
		elsif ($plab) {
503 504 505 506 507 508 509 510 511 512 513 514 515 516
		    if (TBForkCmd("$TB/sbin/plabnode ". 
				  ($force ? "-f" : "").
				  " alloc $pid $eid $vnode", 1)) {
			print STDERR "*** $0:\n" .
			    "    Plab node allocation failed\n";
			# Should check DB state instead.
			exit(99);
		    }
		}
		else {
		    if (TBForkCmd("$ssh -host $pnode $CLIENT_BIN/vnodesetup ".
				  " -i $vnode", 1)) {
			exit(99);
		    }
517
		}
518
                # Make sure the system knows we now have state on the node!
519
		$nodeobj->SetAllocState(TBDB_ALLOCSTATE_RES_INIT_DIRTY);
520
	    }
521
	    
522 523
	    if ($geni &&
		($mode eq "teardown" || $mode eq "cleanup" || $mode eq "halt")){
524 525 526
		$nodeobj->SetEventState(TBDB_NODESTATE_SHUTDOWN);
		$UID  = $SAVEUID;
		$EUID = $UID;
527
#		$exval = GeniEmulab->DestroySlivers($experiment, [ $nodeobj ]);
528 529
	    }
	    elsif (!($plab && ($mode eq "cleanup" || $mode eq "teardown"))
530
		   && !($mode eq "setup" && $remote && !$plab)) {
531 532 533 534
		# Cleanup is used only on plab nodes.
		# Don't try to teardown plab vnodes; it's just asking for
		# trouble as the shutdown may hang or take too long.  It's
		# best to simply try and free the vserver below.
535
		my $args = (($mode eq "teardown") ? "-k " :
536 537
			    (($mode eq "reboot") ? "-r " :
			     (($mode eq "halt") ? "-h " : " ")));
538
		$args   .= ($jailed   ? "-jVt " : ($plab ? "-p " : "-i "));
539
		$args   .= "$vnode ";
540
		
541 542 543 544 545
		# If it's a plab node, we must ssh to the vnode, not pnode.
		my $pnodeOrVnode = $pnode;
		if ($plab) {
		    $pnodeOrVnode = $vnode;
		}
546 547 548 549 550 551
		my $cmd = "$ssh -host $pnodeOrVnode ".
		    "  $CLIENT_BIN/vnodesetup $args";
		if ($debug) {
		    print "Running: '$cmd'\n";
		}
		$exval = TBForkCmd($cmd, 1);
552 553 554 555
            }

            # Free the plab node lease if necessary.
            if ($plab && ($mode eq "teardown" || $mode eq "cleanup")) {
556
		$nodeobj->SetEventState(TBDB_NODESTATE_SHUTDOWN);
557 558 559 560
                exec("$TB/sbin/plabnode free $pid $eid $vnode");
                die("*** $0:\n".
                    "    exec failed!\n");
            }
561 562 563 564 565 566 567 568 569 570 571 572 573
	    #
	    # TBForkCmd() returns the full exit status, but we cannot
	    # pass that to exit directly. Watch for a TERM signal,
	    # so we can tell the parent we exited cause of the timeout.
	    #
	    if ($exval) {
		if (WIFSIGNALED($exval)) {
		    $exval = WTERMSIG($exval);
		}
		else {
		    $exval = $exval >> 8;
		}
	    }
574
	    exit($exval);
575 576 577 578 579 580 581 582 583 584 585
	}
    } else {
	#
	# We have too many of the little rugrats, wait for one to die
	#

	#
	# Set up a timer - we want to kill processes after they hit 120 seconds
	# old (not much of a life, is it?), so we find the first one marked for
	# death.
	#
586
	my $oldest;
587
	my $oldestpid = 0;
Kirk Webb's avatar
Kirk Webb committed
588
        my $oldestvnode = "";
589
	while (my ($pid, $aref) = each %child_vnodes) {
590 591
	    my ($nodeobj, $mode, $birthtime) = @$aref;
	    my $vnode = $nodeobj->node_id();
592
	    if ((!$oldestpid) || ($birthtime < $oldest)) {
593 594
		$oldest = $birthtime;
		$oldestpid = $pid;
Kirk Webb's avatar
Kirk Webb committed
595
                $oldestvnode = $vnode;
596 597 598 599 600 601 602 603 604 605 606
	    }
	}

	#
	# Sanity check
	#
	if (!$oldest) {
	    die "*** $0\n".
	    	"Uh oh, I have no children left, something is wrong!\n";
	}

607
	#
Kirk Webb's avatar
Kirk Webb committed
608
	# If the oldest has already expired, just kill it off right now, and go
609 610 611
	# back around the loop
	#
	my $now = time();
Kirk Webb's avatar
Kirk Webb committed
612
	my $waittime = ($oldest + $childwait) - time();
613

614
	#
Kirk Webb's avatar
Kirk Webb committed
615
	# Kill off the oldest if it gets too old while we're waiting.
616
	#
Kirk Webb's avatar
Kirk Webb committed
617 618
        my $childpid = -1;
        my $exitstatus = -1;
619

Kirk Webb's avatar
Kirk Webb committed
620 621 622 623
        eval {
            local $SIG{ALRM} = sub { die "alarm clock" };

            if ($waittime <= 0) {
624 625
                print STDERR
		    "*** $0: timeout waiting for vnode: $oldestvnode\n";
Kirk Webb's avatar
Kirk Webb committed
626 627 628 629
                kill("TERM",$oldestpid);
            } else {
                alarm($waittime);
            }
630

Kirk Webb's avatar
Kirk Webb committed
631 632 633
            $childpid = wait();
            alarm 0;
            $exitstatus = $?;
634
	    print STDERR "Child return $exitstatus\n";
Kirk Webb's avatar
Kirk Webb committed
635 636
        };
        if ($@) {
637 638 639 640
	    next
		if ($@ =~ /alarm clock/);

	    die("bad exit from eval\n");
641 642
        }

643 644 645 646 647 648 649 650 651 652 653 654
	#
	# Another sanity check
	#
	if ($childpid < 0) {
	    die "*** $0\n".
	    	"wait() returned <0, something is wrong!\n";
	}

	#
	# Look up to see what vnode, etc. this was associated with - if we
	# don't know about this child, ignore it
	#
655 656 657 658
	if (! exists($child_vnodes{$childpid})) {
	    print STDERR "Unknown child $childpid returned from wait\n";
	    next;
	}
659
	my $aref = $child_vnodes{$childpid};
660 661 662
	my ($nodeobj, $mode, $birthtime) = @$aref;
	my $vnode = $nodeobj->node_id();
	my $pnode = $nodeobj->phys_nodeid();	
663 664 665
	$children--;
	delete $child_vnodes{$childpid};

666
	#
667
	# Look for setup failure, reported back through ssh.
668
	# 
669
	if ($exitstatus) {
670 671
	    print STDERR "vnode $vnode $mode on $pnode returned $?.\n"
		if $debug;
672

673 674
	    if ($exitstatus == 15) {
		print STDERR "$pnode is wedged.\n" if $debug;
675
	    }
676 677 678
	    elsif ($exitstatus >> 8 == 99) {
		print STDERR "$vnode did not allocate properly.\n" if $debug;
	    }
679

680 681
	    push @failed_nodes, [$vnode, $pnode, $mode, $exitstatus];
	    
682 683
	    warn("*** $0:\n".
		 "    Virtual node $vnode $mode failure!\n");
684

685
	    #
686 687 688
	    # If the node was in the setup process, then set its state
	    # to TBFAILED so that anything waiting knows its toast.
	    # We set it to SHUTDOWN above. 
689
	    #
690 691 692 693 694
	    if ($mode eq "setup" || $mode eq "reboot") {
		$nodeobj->Refresh();
		# Avoid duplicate state as it annoys stated. 
		$nodeobj->SetEventState(TBDB_NODESTATE_TBFAILED())
		    if (!$nodeobj->eventstate() ne TBDB_NODESTATE_TBFAILED());
695 696
	    }
	}
697 698 699
    }
}

700 701 702 703
#
# In force node, do not bother with this other stuff. 
#
exit(scalar(@failed_nodes))
704
    if ($force && !$sendemail);
705

706 707 708 709 710 711
#
# Send mail to testbed-ops about failed nodes
#
if (@failed_nodes) { 
    my $failed_lines = join("\n",map { join("\t",@{$_}) } @failed_nodes);
    SENDMAIL($TBOPS, "Virtual Node failure for $pid/$eid",
712 713 714
	     "The following virtual nodes failed: \n" .
	     "vnode\t\tpnode\tmode\texit status\n" .
	     $failed_lines);
715 716
}

717
if ($killmode) {
718 719
    print STDOUT "Vnode teardown finished.\n"
	if (!$quiet);
720
}
721 722 723 724
elsif ($haltmode) {
    print STDOUT "Vnode halt finished.\n"
	if (!$quiet);
}
725
else {
726 727
    print STDOUT "Vnode setup initiated on all nodes ...\n"
	if (!$quiet);
728
}
729
exit(0);