tbswap.in 57.4 KB
Newer Older
1 2 3 4
#!/usr/bin/perl -w

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2009 University of Utah and the Flux Group.
6 7 8 9
# All rights reserved.
#
use English;

10 11 12
# Returns 0 on success.
# Returns 1 on non-assign_wrapper failure.
# Returns (1 | assign_wrapper's errorcode) on assign_wrapper failure.
13 14
# Returns |0x40 if update caused a swapout. Icky.
# Returns -1 on uncontrolled error (die called).
15

16 17 18 19 20
# XXX: handle error cases for update? (backup the db?)
# XXX: Shouldn't do idempotent stuff twice for update.
# XXX: repush/calc routing for update??? (tbprerun)
# XXX: previz for update???              (tbprerun)
# XXX: make snmpit faster for update.
Chad Barb's avatar
Chad Barb committed
21 22 23
#
# XXX: for update, expt is swapped out on os_setup fail.
#      (we only recover if assign fails)
24 25 26

sub usage()
{
27 28 29
    print STDERR
	"Usage: $0 {in|out|modify [-reboot] [-eventsys_restart]} ".
	"[-noswapout] [-genimode] pid eid\n";
30 31 32 33 34 35 36
    exit(-1);
}

#
# Configure variables
#
my $TBROOT         = "@prefix@";
37
my $TBOPS          = "@TBOPSEMAIL@";
Timothy Stack's avatar
Timothy Stack committed
38
my $TBLOGS         = "@TBLOGSEMAIL@";
39
my $MAINSITE	   = @TBMAINSITE@;
40
my $THISHOMEBASE   = "@THISHOMEBASE@";
41 42
my $TESTMODE       = @TESTMODE@;
my $DISABLE_EVENTS = "@DISABLE_EVENT_SCHED@";
43
my $piper          = "$TBROOT/sbin/locpiper";
Timothy Stack's avatar
Timothy Stack committed
44
my $NFSTRACESUPPORT= @NFSTRACESUPPORT@;
45 46 47 48 49 50 51 52 53 54 55

# Untaint the path
$ENV{'PATH'} = "/usr/bin:$TBROOT/libexec:$TBROOT/libexec/ns2ir" . 
    ":$TBROOT/sbin:$TBROOT/bin";

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
56
use libadminctrl;
57
use libadminmfs;
58
use libtblog;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
59
use libArchive;
60
use Experiment;
61
use User;
62
use Lan;
63

64
#require exitonwarn; # exitonwarn isn't really a module, so just require it
65 66 67 68

#
# Actual swap-in and swap-out functions, defined below.
#
69 70 71
sub doSwapout($);
sub doSwapin($);

72 73 74 75
#
# Firewall stuff
# XXX maybe should be elsewhere
#
76 77 78 79 80
sub FWSETUP()     { return 1; }
sub FWADDNODES()  { return 2; }
sub FWDELNODES()  { return 3; }
sub FWTEARDOWN()  { return 4; }
sub doFW($$$$);
81 82

# XXX fixme: should not be hardwired!
83
my $cnetstack = "-S Control";
84 85 86
my $cnetvlanname = "Control";


87 88 89 90
sub REAL()    { return 5; }
sub CLEANUP() { return 4; }
sub RETRY()   { return 3; }
sub MODIFY()  { return 2; }
91
sub UPDATE()  { return 1; }
92
sub MODIFY_RECOVER()  { return 0; }
93

94 95 96
#
# Grab global enable of linkdelays.
#
97
my $enablelinkdelays = TBGetSiteVar("general/linux_endnodeshaping");
98

99 100 101 102 103 104
#
# Turn off line buffering on output
#

$| = 1;

105 106
my $updateReboot   = 0;
my $updateReconfig = 1;
107 108
my $update_Eventsys_restart = 0;
my $elabinelab     = 0;
109
my $plabinelab     = 0;
110 111 112 113 114
my $force          = 0;
my $noswapout      = 0;
my $genimode       = 0;
my $errors         = 0;
my $updatehosed    = 0;
115
my $state;
116
my $canceled;
117
my $os_setup_pid;
118
my $nextState;
119 120

#
121
# First argument is either "in", "out", or "update";
122 123 124 125 126
# this value goes into $swapop.
#

my $swapop = shift;	

127 128 129
if (!$swapop || 
    (($swapop ne "in") && 
     ($swapop ne "out") &&
130
     ($swapop ne "modify") &&
131
     ($swapop ne "update"))) {
132 133 134 135 136 137 138 139 140 141 142
    usage();
}

#
# Get other arguments.
#

while ($#ARGV > 1) {
    $arg = shift;
    if ($arg eq "-force") {
	$force = 1;
Chad Barb's avatar
Chad Barb committed
143 144
    } elsif ($arg eq "-reboot") {
	$updateReboot = 1;
145
	$updateReconfig = 0;
146 147 148
    } elsif ($arg eq "-noreconfig") {
	$updateReboot   = 0;
	$updateReconfig = 0;
149
    } elsif ($arg eq "-eventsys_restart" && $swapop eq "modify") {
150
	$update_Eventsys_restart = 1;
151 152
    } elsif ($arg eq "-noswapout") {
	$noswapout = 0;
153 154 155 156 157 158 159 160 161
    } else {
	usage();
    }
}
if ($#ARGV < 1) {
    usage();
}
my ($pid,$eid) = @ARGV;

162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w.]+)$/) {
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
if ($eid =~ /^([-\@\w.]+)$/) {
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}

178 179 180 181 182 183
#
# Set Error reporting info
# 
tblog_set_info($pid,$eid,$UID);

#
184
# Turn on timestamps
185
#
186 187
TBDebugTimeStampsOn();

188 189 190 191 192 193
#
# Verify user and get his DB uid and other info for later.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
194
}
195 196 197
my $user_uid   = $this_user->uid();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
198

199 200 201 202 203 204
# Slowly convert to using Experiment module.
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    tbdie("Could not lookup experiment object!")
}

205 206 207
#
# Print starting message.
#
208 209
my $exptidx;
TBExptIDX($pid, $eid, \$exptidx);
210

211 212
print "Beginning swap-$swapop for $pid/$eid ($exptidx). " .
    TBTimeStampWithDate() . "\n";
213 214 215 216 217 218
TBDebugTimeStamp("tbswap $swapop started");

#
# Get experiment state; verify that experiment exists.
#
if (! ($state = ExpState($pid, $eid))) {
219
    tbdie "No such experiment $pid/$eid";
220
}
221 222 223
# Sanity check the current state. 
if (!$force) {
    if ($swapop eq "in") {
224
	tbdie("Experiment should be ACTIVATING. Currently $state.")
225
	    if ($state ne EXPTSTATE_ACTIVATING);
226
    }
227
    elsif ($swapop eq "out") {
228
	tbdie("Experiment should be SWAPPING. Currently $state.")
229
	    if ($state ne EXPTSTATE_SWAPPING);
230
    }
231
    elsif ($swapop eq "modify" || $swapop eq "update") {
232
	tbdie("Experiment should be MODIFY_RESWAP. Currently $state.")
233
	    if ($state ne EXPTSTATE_MODIFY_RESWAP);
234 235
    }
}
236 237
# Get elabinelab status. See below.
if (! TBExptIsElabInElab($pid, $eid, \$elabinelab)) {
238
    tbdie("Could not get elabinelab status for experiment $pid/$eid");
239
}
240 241 242 243 244 245 246 247 248 249 250
# and plabinelab status.
if (! TBExptIsPlabInElab($pid, $eid, \$plabinelab)) {
    tbdie("Could not get plabinelab status for experiment $pid/$eid");
}
if ($elabinelab && $plabinelab) {
    tberror "Cannot get my head around Plab in Elab in Elab!\n";
    print "Failingly finished swap-$swapop for $pid/$eid. " .
	TBTimeStamp() . "\n";
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
    exit(1);
}
251

252 253 254 255 256
#
# See if the experiment is firewalled
#
my $firewalled = TBExptFirewall($pid, $eid);

257 258 259
#
# Do actual swapping
#
260 261 262 263
if ($swapop eq "out") {
    #
    # Swap out
    #
264
    tblog_set_attempt(1);
265
    $errors = doSwapout(REAL);
266
}
267
elsif ($swapop eq "update" || $swapop eq "modify") {
268 269 270 271 272 273
    #
    # Before swapout, do cursory admission control to see if the
    # modified experiment will be swappable. assign_wrapper does a
    # more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
274
    if (! TBAdmissionControlCheck(undef, $experiment, undef)) {
275 276 277
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
278 279 280 281 282 283
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
    
284 285
    #
    # Update.
286 287 288
    #
    tblog_set_attempt(1);

289 290 291 292
    #
    # Phase One -- swap experiment partially out.
    #
    print STDERR "Backing up physical state...\n";
293
    $experiment->BackupPhysicalState();
294

295 296 297 298 299
    #
    # Actually, in update mode this is not done cause we are anticipating
    # adding nodes only.
    #
    $errors = ($swapop eq "modify" ? doSwapout(MODIFY) : 0);
Chad Barb's avatar
Chad Barb committed
300

301 302 303 304 305 306
    if (0) {
	print STDERR "Doing a swapmodswapout on the experiment archive ...\n";
	if (libArchive::TBExperimentArchiveSwapModSwapOut($pid, $eid) < 0) {
	    tberror("Failed swapmodswapout on the experiment archive!");
	    $errors = 1;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
307 308
    }

309
    if ($errors) {
310
	#
311 312 313 314 315
	# Clean up the mess, leaving the experiment in the SWAPPED state,
	# 
	print STDERR "Cleaning up after errors.\n";
	doSwapout(CLEANUP);
	$updatehosed = 1;
316
    }
317
    else {
318
	#
319
	# Phase Two -- swap experiment back in.
320
	#
321
	$errors = doSwapin(MODIFY);
Chad Barb's avatar
Chad Barb committed
322

323
	if ($errors) {
Chad Barb's avatar
Chad Barb committed
324 325 326
	    #
	    # There were errors; see if we can recover.
	    #
327
	    my $CanRecover = 1;
Chad Barb's avatar
Chad Barb committed
328 329 330

	    if ($errors != 7) {
		print STDERR "Update failure occurred _after_ assign phase; ";
331
		$CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
332 333
	    }

334 335 336
	    if ($CanRecover) {
		print STDERR "Recovering virtual and physical state.\n";

337 338 339
		# It is safe to remove the phystate since we know it was
		# backed up above, and cause we do not know if assign_wrapper
		# made it to that point before it failed. 
340 341 342 343
		if ($experiment->RemoveVirtualState($pid, $eid) ||
		    $experiment->RestoreVirtualState($pid, $eid) ||
		    $experiment->RemovePhysicalState($pid, $eid) ||
		    $experiment->RestorePhysicalState($pid,$eid)) {
344 345
		    print STDERR "Could not restore backed-up state; ";
		    $CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
346
		}
347 348 349
		else {
		    print STDERR "Doing a recovery swap-in of old state.\n";

350
		    if (doSwapin(MODIFY_RECOVER)) {
351 352 353
			print STDERR "Could not swap in old physical state; ";
			$CanRecover = 0;
		    }
Chad Barb's avatar
Chad Barb committed
354 355
		}
	    }
356 357 358 359 360 361

	    #
	    # Some part of the recovery failed; must swap it out. swapexp
	    # (caller) will then have to do more clean up, hence the special
	    # exit status indicated by $updatehosed.
	    # 
362 363 364 365 366 367 368 369 370 371 372
	    if (!$CanRecover) {
		if ($noswapout) {
		    print STDERR
			"No Recovery, but leaving experiment swapped in.\n";
		}
		else {
		    print STDERR
			"Recovery aborted! Swapping experiment out.\n";
		    doSwapout(CLEANUP);
		    $updatehosed = 1;
		}
373 374 375
	    }
	    else {
		print STDERR "Update recovery successful.\n";
Chad Barb's avatar
Chad Barb committed
376
	    }
377
	}
Chad Barb's avatar
Chad Barb committed
378 379
    }
}
380 381 382 383 384
elsif ($swapop eq "in") {
    #
    # Swap in
    #
    my $retries = 2;
385 386 387 388 389 390

    #
    # Before real swapin, do cursory admission control. assign_wrapper does
    # a more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
391
    if (! TBAdmissionControlCheck(undef, $experiment, undef)) {
392 393 394
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
395 396 397 398 399
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
400 401

    tblog_set_attempt(1);
402
    $errors = doSwapin(REAL);
Chad Barb's avatar
Chad Barb committed
403

404 405 406 407 408 409 410 411 412 413
    #
    # Attempt a retry if: 
    #   a) there were errors, 
    #   b) doswapin() indicated (via return code 3) a retry is appropriate,
    #   c) we haven't tried too many times already.
    #   d) The cancelflag has not been set.
    #   e) $TESTMODE == 0.
    #
    while ($errors == 3 && $retries && !$canceled && !$TESTMODE) {
	$retries--;
414
	tblog_inc_attempt();
415

416 417
	print STDERR "Cleaning up after errors; will try again.\n";
	doSwapout(RETRY);
418

419 420
	print STDERR "Trying again...\n";
	$errors = doSwapin(RETRY);
421
    }
422 423
    if ($errors || $canceled) {
	print STDERR "Cleaning up after " .
424
	    ($canceled ? "cancelation" : "errors") . ".\n";
425
	doSwapout(CLEANUP);
426 427 428
    }
}

429 430 431 432
tblog_set_attempt(0);

TBGetCancelFlag($pid, $eid, \$canceled);
if ($canceled) {
433 434
    tberror ({type=>'summary', cause=>'canceled', 
	      severity=>SEV_IMMEDIATE, error=>['cancel_flag']},
435 436 437
	     "Experiment swap-$swapop canceled by user.");
}

438 439 440 441
#
# Write appropriate message and exit.
#
if ($errors) {
442
    print "Failingly finished swap-$swapop for $pid/$eid. ".TBTimeStamp()."\n";
443
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
Chad Barb's avatar
Chad Barb committed
444

445 446
    # Pass out magic value to indicate that update failed!
    exit(1 | ($updatehosed ? 0x40 : 0));
447
}
448
print "Successfully finished swap-$swapop for $pid/$eid. " .TBTimeStamp()."\n";
449 450
TBDebugTimeStamp("tbswap $swapop finished (succeeded)");
exit(0);
451 452 453 454

#################################

##
Chad Barb's avatar
Chad Barb committed
455
#
456 457
# doSwapout - Swaps experiment out.
#
Chad Barb's avatar
Chad Barb committed
458
#             If in REAL or CLEANUP,
459 460 461
#             this function will free all nodes for the 
#             experiment.
#
Chad Barb's avatar
Chad Barb committed
462
#             If in RETRY or UDPATE,
463 464 465 466 467 468
#             only nodes not in RES_READY will be freed.
#
#             Returns 0 on success, >0 on failure.
#
##

469
sub doSwapout($) {
470
    my $type = shift;
471 472
    my $swapout_errors = 0;

473 474
    tblog_set_cleanup(1) if $type == CLEANUP;

475 476 477 478 479 480 481 482 483
    #
    # wait for os_setup;
    # this only applies if called after a failed doswapin.
    #
    if ($os_setup_pid) {
	print "Waiting for os_setup to finish\n";
	waitpid($os_setup_pid, 0);
	undef $os_setup_pid;
    }
Chad Barb's avatar
Chad Barb committed
484

485
    if (0 && $NFSTRACESUPPORT && $type == REAL) {
Timothy Stack's avatar
Timothy Stack committed
486 487
	print "Getting files accessed via NFS.\n";
	TBDebugTimeStamp("nfstrace started");
488
	system("nfstrace transfer $pid $eid");
Timothy Stack's avatar
Timothy Stack committed
489
	TBDebugTimeStamp("nfstrace finished");
490
    }
491

492
    if (! $TESTMODE) { 
493 494
	if (! ($DISABLE_EVENTS || $elabinelab)) {
	    if ($type >= RETRY ||
495
		($update_Eventsys_restart && $type == MODIFY) ) {
496
		print "Stopping the event system\n";
497
		if (system("eventsys_control stop $pid,$eid")) {
498 499 500
		    tberror({type => 'secondary', severity => SEV_SECONDARY,
			     error => ['eventsys_stop_failed']},
			    "Failed to stop the event system.");
501 502
		    $swapout_errors = 1;
		}
503 504 505 506 507 508 509

		#
		# Stop the location piper.
		#
		if (-x $piper) {
		    print "Stopping the location piper\n";
		    if (system("$piper -k $pid $eid")) {
510 511 512
			tberror({type => 'secondary', severity => SEV_SECONDARY,
				 error => ['piper_stop_failed']},
				"Failed to stop location piper.");
513 514 515
			$swapout_errors = 1;
		    }
		}
516 517
	    }
	}
518 519 520 521 522 523
	
	#
	# Do teardown of inner elab. We must do this before we teardown the
	# vlans since the inner control network is a vlan, and we want that
	# active so inner boss can reboot the inner nodes (avoid power cycle).
	#
524
	if ($elabinelab && $type >= CLEANUP) {
525 526
	    print "Tearing down elabinelab. This could take a while.\n";
	    if (system("elabinelab -k $pid $eid")) {
527 528 529
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['elabinelab_tear_down_failed']},
			"Failed to teardown elabinelab!");
530 531 532 533
		$swapout_errors = 1;
	    }
	}

534
	#
Chad Barb's avatar
Chad Barb committed
535
	# Clean up any VLANs in experiment.
536
	#
537 538 539
	# When modifying an elabinelab experiment, leave the vlans intact
	# so that the inner networks are not suddenly disconnected!
	#
540
	if ($type != MODIFY) {
541 542 543
	    TBDebugTimeStamp("snmpit started");
	    print STDERR "Removing VLANs.\n";
	    if (system("snmpit -r $pid $eid")) {
544 545 546
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vlan_reset_failed']},
			"Failed to reset VLANs");
547 548 549
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("snmpit finished");
550
	}
551 552 553 554 555 556 557 558 559 560 561 562 563 564
	#
	# Must check for stale vlans that we kept around in the above clause
	# since they will not be in the lans table anymore.
	#
	if ($type == CLEANUP) {
	    my @stale;
	    if (VLan->StaleVlanList($experiment, \@stale) != 0) {
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vlan_reset_failed']},
			"Failed to get stale VLANs");
		$swapout_errors = 1;
	    }
	    if (@stale) {
		print "Removing stale vlans @stale\n";
565
		system("snmpit -f ". join(" ", map("-o $_", @stale)));
566 567 568 569 570 571 572 573
		if ($?) {
		    tberror({type => 'summary', severity => SEV_SECONDARY,
			     error => ['vlan_reset_failed']},
			    "Failed to remove stale vlans");
		    $swapout_errors = 1;
		}
	    }
	}
574
    }
Chad Barb's avatar
Chad Barb committed
575

576
    if ($type >= CLEANUP) {
577 578 579 580 581 582 583 584
	#
	# Undo plab in elab specialness.
	# No need to worry about VLANs here, as all the special abilities
	# involve the control network.
	#
	if (! $TESTMODE && $plabinelab) {
	    print "Tearing down plabinelab.\n";
	    if (system("plabinelab -k $pid $eid")) {
585 586 587
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['plabinelab_tear_down_failed']},
			"Failed to teardown plabinelab!");
588 589 590 591
		$swapout_errors = 1;
	    }
	}

592 593
	#
	# We're not attempting a retry;
594
	#
595
	# Stop all of the vnodes.
596
	#
597
	if (! $TESTMODE) { 	
598 599 600
	    print "Tearing down virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid")) {
601 602 603
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
604 605 606
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup finished");
607 608
	}

609 610 611 612 613
	#
	# Nodes behind a firewall are treated special.
	# See undoFWNodes for details.
	#
	if ($firewalled && undoFWNodes($pid, $eid)) {
614
	    tblog_set_cleanup(0);
615 616
	    return 1;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
617

618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
	#
	# Perform swapout time admin actions.  Right now there is at most
	# one of these.  It isn't really a general mechanism, just a hook
	# for state saving or data collection during swapout.
	# A couple of important "fer now" notes:
	#
	#	We don't do this for firewalled experiments.  We need a way
	#	to "tag" the saved disk state to ensure it doesn't get
	#	instantiated outside of a firewall.
	#
	#	We only do this on REAL swapouts, and not on CLEANUPs.
	#	There are some types of CLEANUPs where we may want to
	#	do this, in particular an invocation caused by a failed
	#	modify operation, where the admin action is to save the
	#	experiment state.  So we will need to revisit this.
	#
	my %soaction = ();
	if ($type == REAL && !$firewalled) {
	    TBExptGetSwapoutAction($pid, $eid, \%soaction);
	}
638
	if ($soaction{'command'} && doSwapoutAction($pid, $eid, %soaction)) {
639
	    tblog_set_cleanup(0);
640
	    return 1;
641 642
	}

643 644
	#
	# remove all nodes from the experiment.
Chad Barb's avatar
Chad Barb committed
645
	# (nfree will send them to RES_FREE_DIRTY)
646 647 648
	#
	print STDERR "Freeing nodes.\n";
	TBDebugTimeStamp("nfree started");
649
	if (system("nfree -a $pid $eid")) {
650 651 652
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['nfree_failed']},
		    "Could not free nodes.");
653 654 655
	    $swapout_errors = 1;
	}
	TBDebugTimeStamp("nfree finished");
656 657 658 659 660

	#
	# Since this is an actual swapout, 
	# reset our count of swap out nag emails sent.
	#
661 662
	DBQueryWarn("update experiments set swap_requests='', ".
		    "   sim_reswap_count='0' ".
663
		    "where eid='$eid' and pid='$pid'");
664 665
    } else {
	#
666
	# $type == RETRY or $type == MODIFY.
667 668
	# Therefore, don't deallocate nodes which have been successfully
	# incorporated into the experiment (i.e., are RES_READY).
Chad Barb's avatar
Chad Barb committed
669
	# (nfree will send deallocated nodes to RES_FREE_DIRTY)
670
	#
671 672 673 674 675 676 677 678
	my @failedpnodes = ();
	my @failedvnodes = ();
	
	my $db_result =
	    DBQueryFatal("select rv.node_id,n.allocstate,nt.isvirtnode ".
                         "  from reserved as rv ".
			 "left join nodes as n on n.node_id = rv.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
679 680
			 "where rv.pid='$pid' and rv.eid='$eid' and ".
			 "      rv.genisliver_idx is null");
681

682
	while (my ($node,$allocstate,$isvirt) = $db_result->fetchrow_array) {
683
	    if ($allocstate ne TBDB_ALLOCSTATE_RES_READY()) {
684 685 686 687 688 689
		if ($isvirt) {
		    push(@failedvnodes, $node);
		}
		else {
		    push(@failedpnodes, $node);
		}
690 691 692
	    }
	}

693 694 695 696 697 698 699
	#
	# Tear down failed vnodes. Perhaps not needed?
	# 
	if (!$TESTMODE && @failedvnodes > 0) {
	    print "Tearing down failed virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid @failedvnodes")) {
700 701 702
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
703 704 705 706
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup -k finished");
	}
707

708 709 710 711 712 713 714 715 716 717 718
	#
	# Undo plabinelab setup
	#
	if (!$TESTMODE && $plabinelab && @failedpnodes > 0) {
	    print "Removing failed nodes from inner plab.\n";
	    if (system("plabinelab -r $pid $eid @failedpnodes")) {
		tberror "Failed to remove inner nodes!";
		$swapout_errors = 1;
	    }
	}

719 720 721 722
	#
	# Release all failed nodes.
	# 
	if (@failedpnodes > 0 || @failedvnodes > 0) {
723 724
	    print STDERR "Freeing failed nodes.\n";
	    
725
	    TBDebugTimeStamp("nfree started");
Chad Barb's avatar
Chad Barb committed
726 727 728 729 730
	    #
	    # Specify -x switch so when a physical node gets freed,
	    # any virtual nodes (owned by this experiment)
	    # sitting on top of it are freed as well.
	    #
731 732
	    if (system("nfree -x $pid $eid " .
		       join(" ", (@failedpnodes, @failedvnodes)))) {
733 734 735
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['nfree_failed']},
			"Could not free nodes.");
736 737 738 739 740 741
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("nfree finished");
	}
    }

742 743 744 745 746 747 748 749 750 751 752 753
    if (! $TESTMODE) {
	#
	# If the experiment has no Plab dslice nodes left, but still has
	# a Plab slice, destroy the slice
	#

	# Does the slice exist?
	$db_result =
	    DBQueryFatal("select slicename from plab_slices ".
			 "where pid='$pid' and eid='$eid'");

	if ($db_result->numrows) {
754
	    # Are there any nodes left in the slice?
755
	    $db_result =
756 757 758 759 760 761 762 763 764
		DBQueryFatal("select r.node_id ".
			     " from plab_slices as ps".
			     " left join plab_slice_nodes as psn ".
			     "   on (ps.slicename=psn.slicename ".
			     "       and ps.plc_idx=psn.plc_idx) ".
			     " left join reserved as r ".
			     "   on psn.node_id=r.node_id ".
			     " where ps.pid='$pid' and ps.eid='$eid'".
			     "   and r.node_id is not NULL");
765 766

	    if (!$db_result->numrows) {
767
		print "Tearing down Slices.\n";
768
		TBDebugTimeStamp("plabslice destroy started");
769
		if (system("plabslice destroy $pid $eid")) {
770
		    tberror "Failed to tear down Slices.";
771 772 773 774 775 776 777
		    $swapout_errors = 1;
		}
		TBDebugTimeStamp("plabslice destroy finished");
	    }
	}
    }

778 779 780 781 782 783 784 785
    if (! $TESTMODE) {
	#
	# All of these errors are non-fatal on swapout. We find out about them
	# via email sent from the individual scripts.
	#

	#
	# Only reset mountpoints if this is an actual swapout, and
786
	# not a failed swapin(cleanup), update, or retry.
787
	#
788
	if ($type == REAL) {
789 790 791
	    print "Resetting mountpoints.\n";
	    TBDebugTimeStamp("exports started");
	    if (system("exports_setup")) {
792 793 794
		tberror({severity => SEV_WARNING,
			 error => ['mountpoint_reset_failed']},
			"Failed to reset mountpoints.");
795 796
	    }
	    TBDebugTimeStamp("exports finished");
Chad Barb's avatar
Chad Barb committed
797

798 799 800 801 802 803 804 805 806
	    #
	    # Ditto these two. 
	    #
	    print "Resetting named maps.\n";
	    TBDebugTimeStamp("named started");
	    if (system("named_setup")) {
		tbwarn "Failed to reset named map.";
	    }
	    TBDebugTimeStamp("named finished");
Chad Barb's avatar
Chad Barb committed
807

808 809 810 811 812 813
	    print "Resetting email lists.\n";
	    TBDebugTimeStamp("genelists started");
	    if (system("genelists -t")) {
		tbwarn "Failed to reset email lists.";
	    }
	    TBDebugTimeStamp("genelists finished");
814 815 816
	}
    }

817
    #
818
    # Wipe the DB clean except during MODIFY or RETRY. In those
819 820
    #    cases, assign_wrapper will reset the DB after reading
    #    the info.
821
    #
822 823
    if ( $type >= CLEANUP ) {
	print STDERR "Resetting DB.\n";
824 825 826
	$experiment->RemovePhysicalState();
	# Special. 
	$experiment->ClearPortRegistration();
827
    }
828

829
    tblog_set_cleanup(0);
830 831 832 833
    return $swapout_errors;
}

##
Chad Barb's avatar
Chad Barb committed
834
#
835 836
# doSwapin - Swaps experiment in.
#
Chad Barb's avatar
Chad Barb committed
837 838 839 840 841
#            Returns:
#              0 - successful swapin
#              1 - failed swapin; cleanup required.
#              3 - failed swapin; cleanup required; can retry.
#              7 - failed swapin; assign failed; no cleanup.
842 843
##

844
sub doSwapin($) {
845
    my $type = shift; 
846 847
    # Just the physnodes ...
    my @deleted_pnodes = ();
848

849 850 851 852
    #
    # assign_wrapper does all the virtual to physical mapping 
    # and updating the DB state.
    #
853
    
854
    if ($type > MODIFY_RECOVER) {
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
        #
        # Hacky test to allow disabling of linkdelays if the node is going
        # to run Linux. See sitevar above.
        #
	if (! $enablelinkdelays) {
	    $db_result =
		DBQueryFatal("select distinct e.pid,e.eid,vl.vnode,vn.osname ".
			 "  from experiments as e ".
			 "left join virt_lans as vl on vl.pid=e.pid and ".
			 "     vl.eid=e.eid ".
			 "left join virt_nodes as vn on vn.pid=e.pid and ".
			 "     vn.eid=e.eid and vn.vname=vl.vnode ".
			 "left join os_info as o on o.osname=vn.osname and ".
			 "  (o.pid=vl.pid or o.pid='" . TBOPSPID() . "') ".
			 "where (vl.uselinkdelay!=0 or e.uselinkdelays!=0 or ".
			 "       e.forcelinkdelays!=0) and ".
871 872
			 "     (o.os is NULL or o.os='Linux' or ".
			 "      o.os='Fedora') and ".
873 874 875
			 "     e.pid='$pid' and e.eid='$eid'");

	    if ($db_result->numrows) {
876 877
		tberror "Endnodeshaping is disabled on Linux Images!";
		tberror "You must modify your experiment to swap it in.";
878 879 880 881
		return 1;
	    }
	}
	
Chad Barb's avatar
Chad Barb committed
882 883 884 885
	print "Mapping to physical reality ...\n";
	TBDebugTimeStamp("assign_wrapper started");

	#
886 887 888 889
	# Pass the -u (update) switch into assign_wrapper, which turns on
	# update mode. When doing a retry, must also fix the current nodes
	# to avoid stuff jumping around when simply trying to replace a node
	# that did not boot.
Chad Barb's avatar
Chad Barb committed
890 891
	#
	my $exitcode;
892 893 894
	my $wrapper = "assign_wrapper -u";
	$wrapper .= " -f"
	    if ($type == RETRY);
895
	
896
	if (system("$wrapper $pid $eid")) {
897 898
	    $exitcode = $? >> 8;

899
	    tberror "Failed ($exitcode) to map to reality.";
900

901 902
	    # Wrapper sets this bit when recovery is possible.
	    if ($exitcode & 64) {
903
		# We can recover. 
904
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
905
		return 7;
906 907
	    }
	    else {
908
		# No recovery, no retry.
909
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
910 911 912 913
		return 1;
	    }
	}
	TBDebugTimeStamp("assign_wrapper finished");
914

Chad Barb's avatar
Chad Barb committed
915 916
	print "Mapped to physical reality!\n";
    }
917

918
    # Check cancel flag before continuing. No retry, 
919
    TBGetCancelFlag($pid, $eid, \$canceled);
920 921 922 923
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
924

925 926 927 928 929 930 931 932 933
    #
    # Look for any nodes in RES_TEARDOWN. These need to be released,
    # and if a virtnode, they need to be torn down. We cannot wait for
    # the virtnodes to go down with the physnode they are hosted on,
    # so teardown and release the virtnodes first, and then do the
    # physnodes.
    #
    # Errors are fatal; no recovery or retry.
    #
934
    if ($type == MODIFY || $type == UPDATE) {
935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
	my $allocstate = TBDB_ALLOCSTATE_RES_TEARDOWN();
	
	$db_result =
	    DBQueryFatal("select r.node_id,nt.isvirtnode,nt.isremotenode ".
			 "  from reserved as r ".
			 "left join nodes as n on n.node_id=r.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
			 "where r.pid='$pid' and r.eid='$eid' and ".
			 "      n.allocstate='$allocstate'");

	if ($db_result->numrows) {
	    my @virtnodes = ();
	    my @physnodes = ();
	    
	    print "Tearing down and releasing unused nodes\n";

	    # First teardown/release virtnodes. 
	    while (my ($node,$isvirt,$isrem) = $db_result->fetchrow_array()) {
		if ($isvirt) {
		    push(@virtnodes, $node);
		}
		elsif (!$isrem) {
		    push(@physnodes, $node);
		}
	    }
960 961 962
	    # See below.
	    @deleted_pnodes = @physnodes;
	    
963 964 965
	    if (@virtnodes) {
		TBDebugTimeStamp("vnode_setup started");
		
966
		if (system("vnode_setup -k $pid $eid @virtnodes")) {
967
		    tberror "Failed to tear down unused virtnodes!\n";
968 969 970 971 972
		    return 1;
		}
		TBDebugTimeStamp("vnode_setup finished");
		
		if (system("nfree $pid $eid @virtnodes")) {
973
		    tberror "Failed to nfree unused virtnodes!\n";
974 975 976 977
		    return 1;
		}
	    }
	    if (@physnodes) {
978 979 980
		if ($elabinelab) {
		    print "Removing nodes from inner elab.\n";
		    if (system("elabinelab -r $pid $eid @physnodes")) {
981
			tberror "Failed to remove inner nodes!";
982 983
			return 1;
		    }
984 985 986 987 988 989
		} elsif ($plabinelab) {
		    print "Removing nodes from inner plab.\n";
		    if (system("plabinelab -r $pid $eid @physnodes")) {
			tberror "Failed to remove inner nodes!";
			return 1;
		    }
990 991 992
		}

		#
993 994
		# If the experiment is firewalled, cleanup the nodes
		# we are releasing.
995
		# 
996
		if ($firewalled && undoFWNodes($pid, $eid, @deleted_pnodes)) {
997 998 999
		    return 1;
		}
		
1000
		if (system("nfree $pid $eid @physnodes")) {
1001
		    tberror "Failed to nfree unused physnodes!\n";
1002 1003 1004 1005 1006 1007
		    return 1;
		}
	    }
	}
    }

1008 1009 1010 1011 1012 1013
    # Exit here if we are testing.
    if ($TESTMODE) {
	print "Testing run - Stopping here.\n";
	return 0;
    }

1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
    #
    # Handle tarballs - we might need to fetch some from URLs if the user
    # asked for that.
    #
    print "Fetching tarballs and RPMs (if any) ...\n";
    TBDebugTimeStamp("tarfiles_setup started");

    if (system("tarfiles_setup $pid $eid")) {
	#
	# No recovery for now - what would we do?
	#
1025 1026 1027
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['tarfiles_setup_failed']},
		"Failed to set up tarballs.");
1028 1029 1030 1031
	return 1;
    }
    TBDebugTimeStamp("tarfiles_setup finished");

1032 1033 1034 1035
    #
    # If there are any Plab dslice nodes in the experiment, create the
    # dslice now
    #
1036
    if ($type > MODIFY_RECOVER) {
1037 1038 1039
	# Are there any Plab nodes?  First get a list of node types in the exp;
	# if any are types hosted by any of the PLCs we know about, create
	# all slices necessary for the experiment in a single plabslice call.
1040
	$db_result =
1041 1042 1043 1044 1045 1046 1047 1048
	    DBQueryFatal("select nt.type,ppi.plc_name from nodes as n ".
			 "  left join node_types as nt on n.type = nt.type ".
			 "  left join reserved as r on r.node_id = n.node_id ".
			 "  left join plab_plc_info as ppi ".
			 "    on nt.type = ppi.node_type".
			 " where r.pid='$pid' and r.eid='$eid'".
			 "    and ppi.node_type is not NULL".
			 " group by nt.type");
1049 1050

	if ($db_result->numrows) {
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061
	    my $info_str = "";
	    while (my ($nt,$plc) = $db_result->fetchrow_array()) {
		$info_str .= " $plc($nt)";
	    }
	    print "Configuring Slices for $info_str.\n";
	    TBDebugTimeStamp("plabslice started");
	    if (system("plabslice create $pid $eid")) {
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['plabslice_create_failed']},
			"Failed to configure Slices.");
		return 3;
1062
	    }
1063
	    TBDebugTimeStamp("plabslice finished");
1064 1065 1066
	}
    }

1067
    # Check cancel flag before continuing. No retry, 
1068
    TBGetCancelFlag($pid, $eid, \$canceled);
1069 1070 1071 1072
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
1073

1074 1075 1076 1077 1078 1079 1080 1081 1082
    #
    # These things need to get started before the nodes come up, so we'll
    # do them before the os_setup. Everything else can done in parallel with
    # os_setup. (Actually, these probably can too, since they should finish
    # long before the nodes reboot, but better safe than sorry)
    #
    print "Setting up mountpoints.\n";
    TBDebugTimeStamp("mountpoints started");
    if (system("exports_setup")) {
1083 1084 1085
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['mountpoints_setup_failed']},
		"Failed to setup mountpoints.");
1086 1087 1088
	return 1;
    }
    TBDebugTimeStamp("mountpoints finished");
Chad Barb's avatar
Chad Barb committed
1089

1090 1091 1092
    TBDebugTimeStamp("named started");
    print "Setting up named maps.\n";
    if (system("named_setup")) {
1093
	tbwarn "Failed to add node names to named map.";
1094 1095
	#
	# This is a non-fatal error.
Chad Barb's avatar
Chad Barb committed
1096
	#
1097 1098
    }
    TBDebugTimeStamp("named finished");
Chad Barb's avatar
Chad Barb committed
1099

Timothy Stack's avatar
Timothy Stack committed
1100 1101 1102 1103
    if ($NFSTRACESUPPORT) {
	print "Cleaning NFS traces.\n";
	TBDebugTimeStamp("nfstrace gc started");
	if (system("nfstrace gc $pid $eid")) {
1104 1105 1106
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['nfstrace_setup_failed']},
		    "Failed to setup nfstrace.");
Timothy Stack's avatar
Timothy Stack committed
1107 1108 1109
	    return 1;
	}
	TBDebugTimeStamp("nfstrace gc finished");
1110 1111
    }

1112
    # Check cancel flag before continuing. No retry, 
1113
    TBGetCancelFlag($pid, $eid, \$canceled);
1114 1115 1116 1117
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
Chad Barb's avatar
Chad Barb committed
1118

1119 1120 1121 1122
    #
    # Setup any control-net firewall.
    # This must be done before reloading and rebooting nodes.
    #
1123 1124
    if ($firewalled && ($type == REAL || $type == MODIFY) &&
	doFW($pid, $eid, (($type == MODIFY) ? FWADDNODES : FWSETUP), undef)) {
1125 1126 1127
	return 1;
    }

1128 1129 1130 1131 1132
    #
    # PlabinElab setup.  This is currently just tweaking out the dhcpd.conf
    # file and that must be done before os_setup (i.e., before nodes are
    # rebooted).
    #
1133 1134
    if ($plabinelab && !$TESTMODE && $type > MODIFY_RECOVER) {
	# for MODIFY and RETRY we pass in the -u to pick up new nodes
1135 1136 1137 1138 1139
	my $optarg = ($type == REAL ? "" : "-u");
	
	print "Setting up plabinelab.\n";
	TBDebugTimeStamp("plabinelab setup started");
	if (system("plabinelab $optarg $pid $eid")) {
1140 1141 1142
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['plabinelab_setup_failed']},
		    "Failed to setup plabinelab!");
1143 1144 1145 1146 1147
	    return 1;
	}
	TBDebugTimeStamp("plabinelab setup finished");
    }

Chad Barb's avatar
Chad Barb committed
1148 1149
    #
    # If user specified -reboot to update,
1150
    # and we are successfully performing the update,
1151
    # then mark all nodes in experiment so os_setup will reboot them.
1152 1153
    # We must reboot nodes on a RETRY as well, since assign has been rerun
    # and may have remapped interfaces on the nodes.
Chad Barb's avatar
Chad Barb committed
1154
    #
1155
    if ($type == RETRY ||
1156
	($type == MODIFY && ($updateReboot || $updateReconfig))) {
Mike Hibler's avatar
Mike Hibler committed
1157
	my $needreboot = ($type == RETRY || $updateReboot) ? 1 : 0;
1158

Mike Hibler's avatar
Mike Hibler committed
1159 1160
	print STDERR "Marking nodes for ",
		     $needreboot ? "reboot" : "reconfig", ".\n";
Chad Barb's avatar
Chad Barb committed
1161
	$db_result =
1162 1163 1164
	    DBQueryFatal("select r.node_id,n.allocstate from reserved as r ".
			 "left join nodes as n on n.node_id=r.node_id ".
			 "where r.pid='$pid' and r.eid='$eid'");
Chad Barb's avatar
Chad Barb committed
1165

1166 1167 1168 1169 1170 1171 1172 1173
	while (my ($node,$allocstate) = $db_result->fetchrow_array) {
	    #
	    # If the node is INIT_CLEAN, leave it alone. It will still get
	    # rebooted, but will not falsely be tagged as dirty. This is
	    # important for vnodes too, where INIT_CLEAN indicated the vnode
	    # does not even exist yet (plab nodes).
	    #
	    if ($allocstate ne TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
1174
		TBSetNodeAllocState($node,
1175
				    ($needreboot ?
1176 1177
				     TBDB_ALLOCSTATE_RES_INIT_DIRTY() :
				     TBDB_ALLOCSTATE_RES_RECONFIG()));
1178
	    }
Chad Barb's avatar
Chad Barb committed
1179
	}
1180 1181
	# Do this only when nodes are to be rebooted.
	$experiment->ClearPortRegistration()
1182
	    if ($type == MODIFY);
Chad Barb's avatar
Chad Barb committed
1183 1184