tbswap.in 54.4 KB
Newer Older
1 2 3 4
#!/usr/bin/perl -w

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2007 University of Utah and the Flux Group.
6 7 8 9
# All rights reserved.
#
use English;

10 11 12
# Returns 0 on success.
# Returns 1 on non-assign_wrapper failure.
# Returns (1 | assign_wrapper's errorcode) on assign_wrapper failure.
13 14
# Returns |0x40 if update caused a swapout. Icky.
# Returns -1 on uncontrolled error (die called).
15

16 17 18 19 20
# XXX: handle error cases for update? (backup the db?)
# XXX: Shouldn't do idempotent stuff twice for update.
# XXX: repush/calc routing for update??? (tbprerun)
# XXX: previz for update???              (tbprerun)
# XXX: make snmpit faster for update.
Chad Barb's avatar
Chad Barb committed
21 22 23
#
# XXX: for update, expt is swapped out on os_setup fail.
#      (we only recover if assign fails)
24 25 26

sub usage()
{
27
    print STDERR "Usage: $0 -force { in | out | update [-reboot] [-eventsys_restart] } pid eid\n";
28 29 30 31 32 33 34
    exit(-1);
}

#
# Configure variables
#
my $TBROOT         = "@prefix@";
35
my $TBOPS          = "@TBOPSEMAIL@";
Timothy Stack's avatar
Timothy Stack committed
36
my $TBLOGS         = "@TBLOGSEMAIL@";
37
my $MAINSITE	   = @TBMAINSITE@;
38
my $THISHOMEBASE   = "@THISHOMEBASE@";
39 40
my $TESTMODE       = @TESTMODE@;
my $DISABLE_EVENTS = "@DISABLE_EVENT_SCHED@";
41
my $piper          = "$TBROOT/sbin/locpiper";
Timothy Stack's avatar
Timothy Stack committed
42
my $NFSTRACESUPPORT= @NFSTRACESUPPORT@;
43 44 45 46 47 48 49 50 51 52 53

# Untaint the path
$ENV{'PATH'} = "/usr/bin:$TBROOT/libexec:$TBROOT/libexec/ns2ir" . 
    ":$TBROOT/sbin:$TBROOT/bin";

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
54
use libadminctrl;
55
use libadminmfs;
56
use libtblog;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
57
use libArchive;
58
use Experiment;
59
use User;
60

61
#require exitonwarn; # exitonwarn isn't really a module, so just require it
62 63 64 65

#
# Actual swap-in and swap-out functions, defined below.
#
66 67 68
sub doSwapout($);
sub doSwapin($);

69 70 71 72
#
# Firewall stuff
# XXX maybe should be elsewhere
#
73 74 75 76 77
sub FWSETUP()     { return 1; }
sub FWADDNODES()  { return 2; }
sub FWDELNODES()  { return 3; }
sub FWTEARDOWN()  { return 4; }
sub doFW($$$$);
78 79

# XXX fixme: should not be hardwired!
80
my $cnetstack = "-S Control";
81 82 83
my $cnetvlanname = "Control";


84 85 86 87
sub REAL()    { return 4; }
sub CLEANUP() { return 3; }
sub RETRY()   { return 2; }
sub UPDATE()  { return 1; }
Chad Barb's avatar
Chad Barb committed
88
sub UPDATE_RECOVER()  { return 0; }
89

90 91 92
#
# Grab global enable of linkdelays.
#
93
my $enablelinkdelays = TBGetSiteVar("general/linux_endnodeshaping");
94

95 96 97 98 99 100
#
# Turn off line buffering on output
#

$| = 1;

101 102
my $updateReboot   = 0;
my $updateReconfig = 1;
103 104
my $update_Eventsys_restart = 0;
my $elabinelab     = 0;
105
my $plabinelab     = 0;
106 107
my $force  = 0;
my $errors = 0;
108
my $updatehosed = 0;
109
my $state;
110
my $canceled;
111 112
my $os_setup_pid;
my $cleanvlans;
113
my $nextState;
114 115

#
116
# First argument is either "in", "out", or "update";
117 118 119 120 121
# this value goes into $swapop.
#

my $swapop = shift;	

122 123 124 125
if (!$swapop || 
    (($swapop ne "in") && 
     ($swapop ne "out") &&
     ($swapop ne "update"))) {
126 127 128 129 130 131 132 133 134 135 136
    usage();
}

#
# Get other arguments.
#

while ($#ARGV > 1) {
    $arg = shift;
    if ($arg eq "-force") {
	$force = 1;
Chad Barb's avatar
Chad Barb committed
137 138
    } elsif ($arg eq "-reboot") {
	$updateReboot = 1;
139
	$updateReconfig = 0;
140 141 142
    } elsif ($arg eq "-noreconfig") {
	$updateReboot   = 0;
	$updateReconfig = 0;
143 144
    } elsif ($arg eq "-eventsys_restart" && $swapop eq "update") {
	$update_Eventsys_restart = 1;
145 146 147 148 149 150 151 152 153
    } else {
	usage();
    }
}
if ($#ARGV < 1) {
    usage();
}
my ($pid,$eid) = @ARGV;

154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w.]+)$/) {
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
if ($eid =~ /^([-\@\w.]+)$/) {
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}

170 171 172 173 174 175
#
# Set Error reporting info
# 
tblog_set_info($pid,$eid,$UID);

#
176
# Turn on timestamps
177
#
178 179
TBDebugTimeStampsOn();

180 181 182 183 184 185
#
# Verify user and get his DB uid and other info for later.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
186
}
187 188 189
my $user_uid   = $this_user->uid();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
190

191 192 193 194 195 196
# Slowly convert to using Experiment module.
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    tbdie("Could not lookup experiment object!")
}

197 198 199
#
# Print starting message.
#
200 201
my $exptidx;
TBExptIDX($pid, $eid, \$exptidx);
202

203 204
print "Beginning swap-$swapop for $pid/$eid ($exptidx). " .
    TBTimeStampWithDate() . "\n";
205 206 207 208 209 210
TBDebugTimeStamp("tbswap $swapop started");

#
# Get experiment state; verify that experiment exists.
#
if (! ($state = ExpState($pid, $eid))) {
211
    tbdie "No such experiment $pid/$eid";
212
}
213 214 215
# Sanity check the current state. 
if (!$force) {
    if ($swapop eq "in") {
216
	tbdie("Experiment should be ACTIVATING. Currently $state.")
217
	    if ($state ne EXPTSTATE_ACTIVATING);
218
    }
219
    elsif ($swapop eq "out") {
220
	tbdie("Experiment should be SWAPPING. Currently $state.")
221
	    if ($state ne EXPTSTATE_SWAPPING);
222
    }
223
    elsif ($swapop eq "update") {
224
	tbdie("Experiment should be MODIFY_RESWAP. Currently $state.")
225
	    if ($state ne EXPTSTATE_MODIFY_RESWAP);
226 227
    }
}
228 229
# Get elabinelab status. See below.
if (! TBExptIsElabInElab($pid, $eid, \$elabinelab)) {
230
    tbdie("Could not get elabinelab status for experiment $pid/$eid");
231
}
232 233 234 235 236 237 238 239 240 241 242
# and plabinelab status.
if (! TBExptIsPlabInElab($pid, $eid, \$plabinelab)) {
    tbdie("Could not get plabinelab status for experiment $pid/$eid");
}
if ($elabinelab && $plabinelab) {
    tberror "Cannot get my head around Plab in Elab in Elab!\n";
    print "Failingly finished swap-$swapop for $pid/$eid. " .
	TBTimeStamp() . "\n";
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
    exit(1);
}
243

244 245 246 247 248
#
# See if the experiment is firewalled
#
my $firewalled = TBExptFirewall($pid, $eid);

249 250 251
#
# Do actual swapping
#
252 253 254 255
if ($swapop eq "out") {
    #
    # Swap out
    #
256
    tblog_set_attempt(1);
257
    $errors = doSwapout(REAL);
258
}
259
elsif ($swapop eq "update") {
260 261 262 263 264 265
    #
    # Before swapout, do cursory admission control to see if the
    # modified experiment will be swappable. assign_wrapper does a
    # more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
266
    if (! TBAdmissionControlCheck(undef, $experiment, undef)) {
267 268 269
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
270 271 272 273 274 275
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
    
276 277
    #
    # Update.
278 279 280
    #
    tblog_set_attempt(1);

281 282 283 284 285
    #
    # Phase One -- swap experiment partially out.
    #
    print STDERR "Backing up physical state...\n";
    TBExptBackupPhysicalState($pid,$eid);
286

287
    $errors = doSwapout(UPDATE);
Chad Barb's avatar
Chad Barb committed
288

289 290 291 292 293 294
    if (0) {
	print STDERR "Doing a swapmodswapout on the experiment archive ...\n";
	if (libArchive::TBExperimentArchiveSwapModSwapOut($pid, $eid) < 0) {
	    tberror("Failed swapmodswapout on the experiment archive!");
	    $errors = 1;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
295 296
    }

297
    if ($errors) {
298
	#
299 300 301 302 303
	# Clean up the mess, leaving the experiment in the SWAPPED state,
	# 
	print STDERR "Cleaning up after errors.\n";
	doSwapout(CLEANUP);
	$updatehosed = 1;
304
    }
305
    else {
306
	#
307
	# Phase Two -- swap experiment back in.
308
	#
Chad Barb's avatar
Chad Barb committed
309 310
	$errors = doSwapin(UPDATE);

311
	if ($errors) {
Chad Barb's avatar
Chad Barb committed
312 313 314
	    #
	    # There were errors; see if we can recover.
	    #
315
	    my $CanRecover = 1;
Chad Barb's avatar
Chad Barb committed
316 317 318

	    if ($errors != 7) {
		print STDERR "Update failure occurred _after_ assign phase; ";
319
		$CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
320 321
	    }

322 323 324 325 326 327 328 329
	    if ($CanRecover) {
		print STDERR "Recovering virtual and physical state.\n";

		if (TBExptRemoveVirtualState($pid, $eid) ||
		    TBExptRestoreVirtualState($pid, $eid) ||
		    TBExptRestorePhysicalState($pid,$eid)) {
		    print STDERR "Could not restore backed-up state; ";
		    $CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
330
		}
331 332 333 334 335 336 337
		else {
		    print STDERR "Doing a recovery swap-in of old state.\n";

		    if (doSwapin(UPDATE_RECOVER)) {
			print STDERR "Could not swap in old physical state; ";
			$CanRecover = 0;
		    }
Chad Barb's avatar
Chad Barb committed
338 339
		}
	    }
340 341 342 343 344 345 346

	    #
	    # Some part of the recovery failed; must swap it out. swapexp
	    # (caller) will then have to do more clean up, hence the special
	    # exit status indicated by $updatehosed.
	    # 
	    if (! $CanRecover) {
347
		print STDERR "Recovery aborted! Swapping experiment out.\n";
Chad Barb's avatar
Chad Barb committed
348
		doSwapout(CLEANUP);
349 350 351 352
		$updatehosed = 1;
	    }
	    else {
		print STDERR "Update recovery successful.\n";
Chad Barb's avatar
Chad Barb committed
353
	    }
354
	}
Chad Barb's avatar
Chad Barb committed
355 356
    }
}
357 358 359 360 361
elsif ($swapop eq "in") {
    #
    # Swap in
    #
    my $retries = 2;
362 363 364 365 366 367

    #
    # Before real swapin, do cursory admission control. assign_wrapper does
    # a more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
368
    if (! TBAdmissionControlCheck(undef, $experiment, undef)) {
369 370 371
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
372 373 374 375 376
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
377 378

    tblog_set_attempt(1);
379
    $errors = doSwapin(REAL);
Chad Barb's avatar
Chad Barb committed
380

381 382 383 384 385 386 387 388 389 390
    #
    # Attempt a retry if: 
    #   a) there were errors, 
    #   b) doswapin() indicated (via return code 3) a retry is appropriate,
    #   c) we haven't tried too many times already.
    #   d) The cancelflag has not been set.
    #   e) $TESTMODE == 0.
    #
    while ($errors == 3 && $retries && !$canceled && !$TESTMODE) {
	$retries--;
391
	tblog_inc_attempt();
392

393 394
	print STDERR "Cleaning up after errors; will try again.\n";
	doSwapout(RETRY);
395

396 397
	print STDERR "Trying again...\n";
	$errors = doSwapin(RETRY);
398
    }
399 400
    if ($errors || $canceled) {
	print STDERR "Cleaning up after " .
401
	    ($canceled ? "cancelation" : "errors") . ".\n";
402
	doSwapout(CLEANUP);
403 404 405
    }
}

406 407 408 409
tblog_set_attempt(0);

TBGetCancelFlag($pid, $eid, \$canceled);
if ($canceled) {
410 411
    tberror ({type=>'summary', cause=>'canceled', 
	      severity=>SEV_IMMEDIATE, error=>['cancel_flag']},
412 413 414
	     "Experiment swap-$swapop canceled by user.");
}

415 416 417 418
#
# Write appropriate message and exit.
#
if ($errors) {
419
    print "Failingly finished swap-$swapop for $pid/$eid. ".TBTimeStamp()."\n";
420
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
Chad Barb's avatar
Chad Barb committed
421

422 423
    # Pass out magic value to indicate that update failed!
    exit(1 | ($updatehosed ? 0x40 : 0));
424
}
425
print "Successfully finished swap-$swapop for $pid/$eid. " .TBTimeStamp()."\n";
426 427
TBDebugTimeStamp("tbswap $swapop finished (succeeded)");
exit(0);
428 429 430 431

#################################

##
Chad Barb's avatar
Chad Barb committed
432
#
433 434
# doSwapout - Swaps experiment out.
#
Chad Barb's avatar
Chad Barb committed
435
#             If in REAL or CLEANUP,
436 437 438
#             this function will free all nodes for the 
#             experiment.
#
Chad Barb's avatar
Chad Barb committed
439
#             If in RETRY or UDPATE,
440 441 442 443 444 445
#             only nodes not in RES_READY will be freed.
#
#             Returns 0 on success, >0 on failure.
#
##

446 447
sub doSwapout($) {
    my $type = shift; # REAL==4, CLEANUP==3, RETRY==2, UPDATE==1.
448 449
    my $swapout_errors = 0;

450 451
    tblog_set_cleanup(1) if $type == CLEANUP;

452 453 454 455 456 457 458 459 460
    #
    # wait for os_setup;
    # this only applies if called after a failed doswapin.
    #
    if ($os_setup_pid) {
	print "Waiting for os_setup to finish\n";
	waitpid($os_setup_pid, 0);
	undef $os_setup_pid;
    }
Chad Barb's avatar
Chad Barb committed
461

462
    if (0 && $NFSTRACESUPPORT && $type == REAL) {
Timothy Stack's avatar
Timothy Stack committed
463 464
	print "Getting files accessed via NFS.\n";
	TBDebugTimeStamp("nfstrace started");
465
	system("nfstrace transfer $pid $eid");
Timothy Stack's avatar
Timothy Stack committed
466
	TBDebugTimeStamp("nfstrace finished");
467
    }
468

469
    if (! $TESTMODE) { 
470 471 472
	if (! ($DISABLE_EVENTS || $elabinelab)) {
	    if ($type >= RETRY ||
		($update_Eventsys_restart && $type == UPDATE) ) {
473
		print "Stopping the event system\n";
474
		if (system("eventsys_control stop $pid,$eid")) {
475 476 477
		    tberror({type => 'secondary', severity => SEV_SECONDARY,
			     error => ['eventsys_stop_failed']},
			    "Failed to stop the event system.");
478 479
		    $swapout_errors = 1;
		}
480 481 482 483 484 485 486

		#
		# Stop the location piper.
		#
		if (-x $piper) {
		    print "Stopping the location piper\n";
		    if (system("$piper -k $pid $eid")) {
487 488 489
			tberror({type => 'secondary', severity => SEV_SECONDARY,
				 error => ['piper_stop_failed']},
				"Failed to stop location piper.");
490 491 492
			$swapout_errors = 1;
		    }
		}
493 494
	    }
	}
495 496 497 498 499 500
	
	#
	# Do teardown of inner elab. We must do this before we teardown the
	# vlans since the inner control network is a vlan, and we want that
	# active so inner boss can reboot the inner nodes (avoid power cycle).
	#
501
	if ($elabinelab && $type >= CLEANUP) {
502 503
	    print "Tearing down elabinelab. This could take a while.\n";
	    if (system("elabinelab -k $pid $eid")) {
504 505 506
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['elabinelab_tear_down_failed']},
			"Failed to teardown elabinelab!");
507 508 509 510
		$swapout_errors = 1;
	    }
	}

511
	#
Chad Barb's avatar
Chad Barb committed
512
	# Clean up any VLANs in experiment.
513
	#
514 515 516 517 518 519 520
	# When modifying an elabinelab experiment, leave the vlans intact
	# so that the inner networks are not suddenly disconnected!
	#
	if (! ($elabinelab && $type == UPDATE)) {
	    TBDebugTimeStamp("snmpit started");
	    print STDERR "Removing VLANs.\n";
	    if (system("snmpit -r $pid $eid")) {
521 522 523
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vlan_reset_failed']},
			"Failed to reset VLANs");
524 525 526 527 528
		$swapout_errors = 1;
	    } else {
		$cleanvlans = 0;
	    }
	    TBDebugTimeStamp("snmpit finished");
529
	}
530
    }
Chad Barb's avatar
Chad Barb committed
531

532
    if ($type >= CLEANUP) {
533 534 535 536 537 538 539 540
	#
	# Undo plab in elab specialness.
	# No need to worry about VLANs here, as all the special abilities
	# involve the control network.
	#
	if (! $TESTMODE && $plabinelab) {
	    print "Tearing down plabinelab.\n";
	    if (system("plabinelab -k $pid $eid")) {
541 542 543
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['plabinelab_tear_down_failed']},
			"Failed to teardown plabinelab!");
544 545 546 547
		$swapout_errors = 1;
	    }
	}

548 549
	#
	# We're not attempting a retry;
550
	#
551
	# Stop all of the vnodes.
552
	#
553
	if (! $TESTMODE) { 	
554 555 556
	    print "Tearing down virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid")) {
557 558 559
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
560 561 562
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup finished");
563 564
	}

565 566 567 568 569
	#
	# Nodes behind a firewall are treated special.
	# See undoFWNodes for details.
	#
	if ($firewalled && undoFWNodes($pid, $eid)) {
570
	    tblog_set_cleanup(0);
571 572
	    return 1;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
573

574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
	#
	# Perform swapout time admin actions.  Right now there is at most
	# one of these.  It isn't really a general mechanism, just a hook
	# for state saving or data collection during swapout.
	# A couple of important "fer now" notes:
	#
	#	We don't do this for firewalled experiments.  We need a way
	#	to "tag" the saved disk state to ensure it doesn't get
	#	instantiated outside of a firewall.
	#
	#	We only do this on REAL swapouts, and not on CLEANUPs.
	#	There are some types of CLEANUPs where we may want to
	#	do this, in particular an invocation caused by a failed
	#	modify operation, where the admin action is to save the
	#	experiment state.  So we will need to revisit this.
	#
	my %soaction = ();
	if ($type == REAL && !$firewalled) {
	    TBExptGetSwapoutAction($pid, $eid, \%soaction);
	}
594
	if ($soaction{'command'} && doSwapoutAction($pid, $eid, %soaction)) {
595
	    tblog_set_cleanup(0);
596
	    return 1;
597 598
	}

599 600
	#
	# remove all nodes from the experiment.
Chad Barb's avatar
Chad Barb committed
601
	# (nfree will send them to RES_FREE_DIRTY)
602 603 604 605
	#
	print STDERR "Freeing nodes.\n";
	TBDebugTimeStamp("nfree started");
	if (system("nfree $pid $eid")) {
606 607 608
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['nfree_failed']},
		    "Could not free nodes.");
609 610 611
	    $swapout_errors = 1;
	}
	TBDebugTimeStamp("nfree finished");
612 613 614 615 616

	#
	# Since this is an actual swapout, 
	# reset our count of swap out nag emails sent.
	#
617
	DBQueryWarn("update experiments set swap_requests='',sim_reswap_count='0' ".
618
		    "where eid='$eid' and pid='$pid'");
619 620
    } else {
	#
621
	# $type == RETRY or $type == UPDATE.
622 623
	# Therefore, don't deallocate nodes which have been successfully
	# incorporated into the experiment (i.e., are RES_READY).
Chad Barb's avatar
Chad Barb committed
624
	# (nfree will send deallocated nodes to RES_FREE_DIRTY)
625
	#
626 627 628 629 630 631 632 633
	my @failedpnodes = ();
	my @failedvnodes = ();
	
	my $db_result =
	    DBQueryFatal("select rv.node_id,n.allocstate,nt.isvirtnode ".
                         "  from reserved as rv ".
			 "left join nodes as n on n.node_id = rv.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
634 635
			 "where rv.pid='$pid' and rv.eid='$eid'");

636
	while (my ($node,$allocstate,$isvirt) = $db_result->fetchrow_array) {
637
	    if ($allocstate ne TBDB_ALLOCSTATE_RES_READY()) {
638 639 640 641 642 643
		if ($isvirt) {
		    push(@failedvnodes, $node);
		}
		else {
		    push(@failedpnodes, $node);
		}
644 645 646
	    }
	}

647 648 649 650 651 652 653
	#
	# Tear down failed vnodes. Perhaps not needed?
	# 
	if (!$TESTMODE && @failedvnodes > 0) {
	    print "Tearing down failed virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid @failedvnodes")) {
654 655 656
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
657 658 659 660
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup -k finished");
	}
661

662 663 664 665 666 667 668 669 670 671 672
	#
	# Undo plabinelab setup
	#
	if (!$TESTMODE && $plabinelab && @failedpnodes > 0) {
	    print "Removing failed nodes from inner plab.\n";
	    if (system("plabinelab -r $pid $eid @failedpnodes")) {
		tberror "Failed to remove inner nodes!";
		$swapout_errors = 1;
	    }
	}

673 674 675 676
	#
	# Release all failed nodes.
	# 
	if (@failedpnodes > 0 || @failedvnodes > 0) {
677 678
	    print STDERR "Freeing failed nodes.\n";
	    
679
	    TBDebugTimeStamp("nfree started");
Chad Barb's avatar
Chad Barb committed
680 681 682 683 684
	    #
	    # Specify -x switch so when a physical node gets freed,
	    # any virtual nodes (owned by this experiment)
	    # sitting on top of it are freed as well.
	    #
685 686
	    if (system("nfree -x $pid $eid " .
		       join(" ", (@failedpnodes, @failedvnodes)))) {
687 688 689
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['nfree_failed']},
			"Could not free nodes.");
690 691 692 693 694 695
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("nfree finished");
	}
    }

696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
    if (! $TESTMODE) {
	#
	# If the experiment has no Plab dslice nodes left, but still has
	# a Plab slice, destroy the slice
	#

	# Does the slice exist?
	$db_result =
	    DBQueryFatal("select slicename from plab_slices ".
			 "where pid='$pid' and eid='$eid'");

	if ($db_result->numrows) {
	    # Are there any dslice nodes left?
	    $db_result =
		DBQueryFatal("select n.node_id from nodes as n ".
			     "left join node_types as nt on n.type = nt.type ".
			     "left join reserved as r ".
			     " on r.node_id = n.node_id ".
			     "where r.pid='$pid' and r.eid='$eid' ".
			     " and nt.isplabdslice = 1");

	    if (!$db_result->numrows) {
		print "Destroying Planetlab slice.\n";
		TBDebugTimeStamp("plabslice destroy started");
720
		if (system("plabslice destroy $pid $eid")) {
721
		    tberror "Failed to destroy Plab dslice.";
722 723 724 725 726 727 728
		    $swapout_errors = 1;
		}
		TBDebugTimeStamp("plabslice destroy finished");
	    }
	}
    }

729 730 731 732 733 734 735 736
    if (! $TESTMODE) {
	#
	# All of these errors are non-fatal on swapout. We find out about them
	# via email sent from the individual scripts.
	#

	#
	# Only reset mountpoints if this is an actual swapout, and
737
	# not a failed swapin(cleanup), update, or retry.
738
	#
739
	if ($type == REAL) {
740 741 742
	    print "Resetting mountpoints.\n";
	    TBDebugTimeStamp("exports started");
	    if (system("exports_setup")) {
743 744 745
		tberror({severity => SEV_WARNING,
			 error => ['mountpoint_reset_failed']},
			"Failed to reset mountpoints.");
746 747 748
	    }
	    TBDebugTimeStamp("exports finished");
	}
Chad Barb's avatar
Chad Barb committed
749

750 751 752 753
	#
	# Resetting named maps and email lists is fast and idempotent,
	# so whatever.
	#
754 755 756
	print "Resetting named maps.\n";
	TBDebugTimeStamp("named started");
	if (system("named_setup")) {
757
	    tbwarn "Failed to reset named map.";
758 759
	}
	TBDebugTimeStamp("named finished");
Chad Barb's avatar
Chad Barb committed
760

761 762
	print "Resetting email lists.\n";
	TBDebugTimeStamp("genelists started");
763
	if (system("genelists -t")) {
764
	    tbwarn "Failed to reset email lists.";
765 766 767 768
	}
	TBDebugTimeStamp("genelists finished");
    }

769
    #
770 771 772
    # Wipe the DB clean except during UPDATE or RETRY. In those
    #    cases, assign_wrapper will reset the DB after reading
    #    the info.
773
    #
774 775
    if ( $type >= CLEANUP ) {
	print STDERR "Resetting DB.\n";
776 777 778
	$experiment->RemovePhysicalState();
	# Special. 
	$experiment->ClearPortRegistration();
779
    }
780

781
    tblog_set_cleanup(0);
782 783 784 785
    return $swapout_errors;
}

##
Chad Barb's avatar
Chad Barb committed
786
#
787 788
# doSwapin - Swaps experiment in.
#
Chad Barb's avatar
Chad Barb committed
789 790 791 792 793
#            Returns:
#              0 - successful swapin
#              1 - failed swapin; cleanup required.
#              3 - failed swapin; cleanup required; can retry.
#              7 - failed swapin; assign failed; no cleanup.
794 795
##

796
sub doSwapin($) {
797 798 799
    my $type = shift; # REAL==4, RETRY==2, UPDATE==1, UPDATE_RECOVER=0.
    # Just the physnodes ...
    my @deleted_pnodes = ();
800

801 802 803 804
    #
    # assign_wrapper does all the virtual to physical mapping 
    # and updating the DB state.
    #
805
    
Chad Barb's avatar
Chad Barb committed
806
    if ($type > UPDATE_RECOVER) {
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
        #
        # Hacky test to allow disabling of linkdelays if the node is going
        # to run Linux. See sitevar above.
        #
	if (! $enablelinkdelays) {
	    $db_result =
		DBQueryFatal("select distinct e.pid,e.eid,vl.vnode,vn.osname ".
			 "  from experiments as e ".
			 "left join virt_lans as vl on vl.pid=e.pid and ".
			 "     vl.eid=e.eid ".
			 "left join virt_nodes as vn on vn.pid=e.pid and ".
			 "     vn.eid=e.eid and vn.vname=vl.vnode ".
			 "left join os_info as o on o.osname=vn.osname and ".
			 "  (o.pid=vl.pid or o.pid='" . TBOPSPID() . "') ".
			 "where (vl.uselinkdelay!=0 or e.uselinkdelays!=0 or ".
			 "       e.forcelinkdelays!=0) and ".
823 824
			 "     (o.os is NULL or o.os='Linux' or ".
			 "      o.os='Fedora') and ".
825 826 827
			 "     e.pid='$pid' and e.eid='$eid'");

	    if ($db_result->numrows) {
828 829
		tberror "Endnodeshaping is disabled on Linux Images!";
		tberror "You must modify your experiment to swap it in.";
830 831 832 833
		return 1;
	    }
	}
	
Chad Barb's avatar
Chad Barb committed
834 835 836 837
	print "Mapping to physical reality ...\n";
	TBDebugTimeStamp("assign_wrapper started");

	#
838 839 840 841
	# Pass the -u (update) switch into assign_wrapper, which turns on
	# update mode. When doing a retry, must also fix the current nodes
	# to avoid stuff jumping around when simply trying to replace a node
	# that did not boot.
Chad Barb's avatar
Chad Barb committed
842 843
	#
	my $exitcode;
844 845 846
	my $wrapper = "assign_wrapper -u";
	$wrapper .= " -f"
	    if ($type == RETRY);
847
	
848
	if (system("$wrapper $pid $eid")) {
849 850
	    $exitcode = $? >> 8;

851
	    tberror "Failed ($exitcode) to map to reality.";
852

853 854
	    # Wrapper sets this bit when recovery is possible.
	    if ($exitcode & 64) {
855
		# We can recover. 
856
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
857
		return 7;
858 859
	    }
	    else {
860
		# No recovery, no retry.
861
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
862 863 864 865
		return 1;
	    }
	}
	TBDebugTimeStamp("assign_wrapper finished");
866

Chad Barb's avatar
Chad Barb committed
867 868
	print "Mapped to physical reality!\n";
    }
869

870
    # Check cancel flag before continuing. No retry, 
871
    TBGetCancelFlag($pid, $eid, \$canceled);
872 873 874 875
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
876

877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
    #
    # Look for any nodes in RES_TEARDOWN. These need to be released,
    # and if a virtnode, they need to be torn down. We cannot wait for
    # the virtnodes to go down with the physnode they are hosted on,
    # so teardown and release the virtnodes first, and then do the
    # physnodes.
    #
    # Errors are fatal; no recovery or retry.
    #
    if ($type == UPDATE) {
	my $allocstate = TBDB_ALLOCSTATE_RES_TEARDOWN();
	
	$db_result =
	    DBQueryFatal("select r.node_id,nt.isvirtnode,nt.isremotenode ".
			 "  from reserved as r ".
			 "left join nodes as n on n.node_id=r.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
			 "where r.pid='$pid' and r.eid='$eid' and ".
			 "      n.allocstate='$allocstate'");

	if ($db_result->numrows) {
	    my @virtnodes = ();
	    my @physnodes = ();
	    
	    print "Tearing down and releasing unused nodes\n";

	    # First teardown/release virtnodes. 
	    while (my ($node,$isvirt,$isrem) = $db_result->fetchrow_array()) {
		if ($isvirt) {
		    push(@virtnodes, $node);
		}
		elsif (!$isrem) {
		    push(@physnodes, $node);
		}
	    }
912 913 914
	    # See below.
	    @deleted_pnodes = @physnodes;
	    
915 916 917
	    if (@virtnodes) {
		TBDebugTimeStamp("vnode_setup started");
		
918
		if (system("vnode_setup -k $pid $eid @virtnodes")) {
919
		    tberror "Failed to tear down unused virtnodes!\n";
920 921 922 923 924
		    return 1;
		}
		TBDebugTimeStamp("vnode_setup finished");
		
		if (system("nfree $pid $eid @virtnodes")) {
925
		    tberror "Failed to nfree unused virtnodes!\n";
926 927 928 929
		    return 1;
		}
	    }
	    if (@physnodes) {
930 931 932
		if ($elabinelab) {
		    print "Removing nodes from inner elab.\n";
		    if (system("elabinelab -r $pid $eid @physnodes")) {
933
			tberror "Failed to remove inner nodes!";
934 935
			return 1;
		    }
936 937 938 939 940 941
		} elsif ($plabinelab) {
		    print "Removing nodes from inner plab.\n";
		    if (system("plabinelab -r $pid $eid @physnodes")) {
			tberror "Failed to remove inner nodes!";
			return 1;
		    }
942 943 944
		}

		#
945 946
		# If the experiment is firewalled, cleanup the nodes
		# we are releasing.
947
		# 
948
		if ($firewalled && undoFWNodes($pid, $eid, @deleted_pnodes)) {
949 950 951
		    return 1;
		}
		
952
		if (system("nfree $pid $eid @physnodes")) {
953
		    tberror "Failed to nfree unused physnodes!\n";
954 955 956 957 958 959
		    return 1;
		}
	    }
	}
    }

960 961 962 963 964 965
    # Exit here if we are testing.
    if ($TESTMODE) {
	print "Testing run - Stopping here.\n";
	return 0;
    }

966 967 968 969 970 971 972 973 974 975 976
    #
    # Handle tarballs - we might need to fetch some from URLs if the user
    # asked for that.
    #
    print "Fetching tarballs and RPMs (if any) ...\n";
    TBDebugTimeStamp("tarfiles_setup started");

    if (system("tarfiles_setup $pid $eid")) {
	#
	# No recovery for now - what would we do?
	#
977 978 979
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['tarfiles_setup_failed']},
		"Failed to set up tarballs.");
980 981 982 983
	return 1;
    }
    TBDebugTimeStamp("tarfiles_setup finished");

984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
    #
    # If there are any Plab dslice nodes in the experiment, create the
    # dslice now
    #
    if ($type > UPDATE_RECOVER) {
	# Are there any Plab nodes?
	$db_result =
	    DBQueryFatal("select n.node_id from nodes as n ".
			 "left join node_types as nt on n.type = nt.type ".
			 "left join reserved as r on r.node_id = n.node_id ".
			 "where r.pid='$pid' and r.eid='$eid' ".
			 " and nt.isplabdslice = 1");

	if ($db_result->numrows) {
	    # Does slice already exist?
	    $db_result =
		DBQueryFatal("select slicename from plab_slices ".
			     "where pid='$pid' and eid='$eid'");

	    if (! $db_result->numrows) {
		my @plabnodes = ();
		
		while (my ($node) = $db_result->fetchrow_array()) {
		    push(@plabnodes, $node);
		}
		
		print "Creating Planetlab slice.\n";
		TBDebugTimeStamp("plabslice create started");
1012
		if (system("plabslice create $pid $eid")) {
1013 1014 1015
		    tberror({type => 'secondary', severity => SEV_SECONDARY,
			     error => ['plabslice_create_failed']},
			    "Failed to create Plab dslice");
1016 1017 1018 1019 1020 1021 1022
		    return 3;
		}
		TBDebugTimeStamp("plabslice alloc finished");
	    }
	}
    }

1023
    # Check cancel flag before continuing. No retry, 
1024
    TBGetCancelFlag($pid, $eid, \$canceled);
1025 1026 1027 1028
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
1029

1030 1031 1032 1033 1034 1035 1036 1037 1038
    #
    # These things need to get started before the nodes come up, so we'll
    # do them before the os_setup. Everything else can done in parallel with
    # os_setup. (Actually, these probably can too, since they should finish
    # long before the nodes reboot, but better safe than sorry)
    #
    print "Setting up mountpoints.\n";
    TBDebugTimeStamp("mountpoints started");
    if (system("exports_setup")) {
1039 1040 1041
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['mountpoints_setup_failed']},
		"Failed to setup mountpoints.");
1042 1043 1044
	return 1;
    }
    TBDebugTimeStamp("mountpoints finished");
Chad Barb's avatar
Chad Barb committed
1045

1046 1047 1048
    TBDebugTimeStamp("named started");
    print "Setting up named maps.\n";
    if (system("named_setup")) {
1049
	tbwarn "Failed to add node names to named map.";
1050 1051
	#
	# This is a non-fatal error.
Chad Barb's avatar
Chad Barb committed
1052
	#
1053 1054
    }
    TBDebugTimeStamp("named finished");
Chad Barb's avatar
Chad Barb committed
1055

Timothy Stack's avatar
Timothy Stack committed
1056 1057 1058 1059
    if ($NFSTRACESUPPORT) {
	print "Cleaning NFS traces.\n";
	TBDebugTimeStamp("nfstrace gc started");
	if (system("nfstrace gc $pid $eid")) {
1060 1061 1062
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['nfstrace_setup_failed']},
		    "Failed to setup nfstrace.");
Timothy Stack's avatar
Timothy Stack committed
1063 1064 1065
	    return 1;
	}
	TBDebugTimeStamp("nfstrace gc finished");
1066 1067
    }

1068
    # Check cancel flag before continuing. No retry, 
1069
    TBGetCancelFlag($pid, $eid, \$canceled);
1070 1071 1072 1073
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
Chad Barb's avatar
Chad Barb committed
1074

1075 1076 1077 1078
    #
    # Setup any control-net firewall.
    # This must be done before reloading and rebooting nodes.
    #
1079 1080
    if ($firewalled && ($type == REAL || $type == UPDATE) &&
	doFW($pid, $eid, (($type == UPDATE) ? FWADDNODES : FWSETUP), undef)) {
1081 1082 1083
	return 1;
    }

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095
    #
    # PlabinElab setup.  This is currently just tweaking out the dhcpd.conf
    # file and that must be done before os_setup (i.e., before nodes are
    # rebooted).
    #
    if ($plabinelab && !$TESTMODE && $type > UPDATE_RECOVER) {
	# for UPDATE and RETRY we pass in the -u to pick up new nodes
	my $optarg = ($type == REAL ? "" : "-u");
	
	print "Setting up plabinelab.\n";
	TBDebugTimeStamp("plabinelab setup started");
	if (system("plabinelab $optarg $pid $eid")) {
1096 1097 1098
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['plabinelab_setup_failed']},
		    "Failed to setup plabinelab!");
1099 1100 1101 1102 1103
	    return 1;
	}
	TBDebugTimeStamp("plabinelab setup finished");
    }

Chad Barb's avatar
Chad Barb committed
1104 1105
    #
    # If user specified -reboot to update,
1106
    # and we are successfully performing the update,
1107
    # then mark all nodes in experiment so os_setup will reboot them.
1108 1109
    # We must reboot nodes on a RETRY as well, since assign has been rerun
    # and may have remapped interfaces on the nodes.
Chad Barb's avatar
Chad Barb committed
1110
    #
1111 1112
    if ($type == RETRY ||
	($type == UPDATE && ($updateReboot || $updateReconfig))) {
Mike Hibler's avatar
Mike Hibler committed
1113
	my $needreboot = ($type == RETRY || $updateReboot) ? 1 : 0;
1114

Mike Hibler's avatar
Mike Hibler committed
1115 1116
	print STDERR "Marking nodes for ",
		     $needreboot ? "reboot" : "reconfig", ".\n";
Chad Barb's avatar
Chad Barb committed
1117
	$db_result =
1118 1119 1120
	    DBQueryFatal("select r.node_id,n.allocstate from reserved as r ".
			 "left join nodes as n on n.node_id=r.node_id ".
			 "where r.pid='$pid' and r.eid='$eid'");
Chad Barb's avatar
Chad Barb committed
1121

1122 1123 1124 1125 1126 1127 1128 1129
	while (my ($node,$allocstate) = $db_result->fetchrow_array) {
	    #
	    # If the node is INIT_CLEAN, leave it alone. It will still get
	    # rebooted, but will not falsely be tagged as dirty. This is
	    # important for vnodes too, where INIT_CLEAN indicated the vnode
	    # does not even exist yet (plab nodes).
	    #
	    if ($allocstate ne TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
1130
		TBSetNodeAllocState($node,
1131
				    ($needreboot ?
1132 1133
				     TBDB_ALLOCSTATE_RES_INIT_DIRTY() :
				     TBDB_ALLOCSTATE_RES_RECONFIG()));
1134
	    }
Chad Barb's avatar
Chad Barb committed
1135
	}
1136 1137 1138
	# Do this only when nodes are to be rebooted.
	$experiment->ClearPortRegistration()
	    if ($type == UPDATE);
Chad Barb's avatar
Chad Barb committed
1139 1140
    }

1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153
    #
    # Lets run gentopofile again, so we get ltmap right. This will come out
    # later, most likely.
    #
    TBDebugTimeStamp("gentopofile started");
    print "Generating ltmap (again) ...\n";

    if (system("gentopofile $pid $eid")) {
        tberror("gentopofile failed!");
	return 1;
    }
    TBDebugTimeStamp("gentopofile finished");

1154
    # XXX fer now hack
1155
    if (0 && !$firewalled && !$elabinelab && !$plabinelab &&
1156
	($pid eq "testbed" || $pid eq "tbres")) {
1157 1158 1159 1160 1161
	DBQueryWarn("update experiments set ".
		    "    savedisk=1 where pid='$pid' and eid='$eid'");
    }


1162 1163 1164 1165 1166
    #
    # Since it'll take a while for the nodes to reboot, we'll start now, and
    # wait for the os_setup to finish, down below
    #
    print "Resetting OS and rebooting.\n";
1167
    TBDebugTimeStamp("launching os_setup");
1168 1169 1170
    if (!($os_setup_pid = fork())) { 
	exec("os_setup $pid $eid") or return 1;
    } elsif ($os_setup_pid == -1) {
1171
	tberror "Fork failed.";
1172 1173
	return 1;
    }
Chad Barb's avatar
Chad Barb committed
1174

1175
    #
1176 1177 1178 1179 1180
    # XXX
    # Don't add any steps between here and the waitpid() call below
    # without verifying that 1) It's OK for nodes to come up before
    # the step has completed and 2) It's OK for the command to run in
    # parallel with os_setup (no DB dependencies, etc.)
1181 1182 1183 1184 1185
    #

    print "Setting up VLANs.\n";
    TBDebugTimeStamp("snmpit started");
    if (system("snmpit -t $pid $eid")) {
1186 1187 1188
	tberror({type => 'summary', severity => SEV_SECONDARY,
		 error => ['vlan_setup_failed']},
		"Failed to set up VLANs.");
1189 1190 1191
	return 1;
    }
    TBDebugTimeStamp("snmpit finished");
Chad Barb's avatar
Chad Barb committed
1192

1193 1194 1195 1196
    #
    # An error now means that the VLANS need to be cleaned up.
    #
    $cleanvlans = 1;
Chad Barb's avatar
Chad Barb committed
1197

1198 1199
    print "Setting up email lists.\n";
    TBDebugTimeStamp("genelists started");
1200
    if (system("genelists -t")) {
1201
	tbwarn "Failed to update email lists.";
1202 1203 1204 1205 1206
	#
	# This is a non-fatal error.
	# 
    }
    TBDebugTimeStamp("genelists finished");
Chad Barb's avatar
Chad Barb committed
1207

1208 1209 1210 1211 1212 1213 1214 1215
    #
    # Don't clear port counters on UPDATE.
    # (XXX should clear new nodes' port counters.)

    if ($type >= RETRY) {
	print "Clearing port counters.\n";
	TBDebugTimeStamp("portstats started");
	if (system("portstats -z -a -q $pid $eid")) {
1216
	    tbwarn "Failed to clear port counters.";
1217 1218 1219 1220 1221
	    #
	    # This is a non-fatal error.
	    # 
	}
	TBDebugTimeStamp("portstats finished");
1222
    }