tbswap.in 43.4 KB
Newer Older
1 2 3 4
#!/usr/bin/perl -w

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2009 University of Utah and the Flux Group.
6 7 8 9
# All rights reserved.
#
use English;

10 11 12
# Returns 0 on success.
# Returns 1 on non-assign_wrapper failure.
# Returns (1 | assign_wrapper's errorcode) on assign_wrapper failure.
13 14
# Returns |0x40 if update caused a swapout. Icky.
# Returns -1 on uncontrolled error (die called).
15

16 17 18 19 20
# XXX: handle error cases for update? (backup the db?)
# XXX: Shouldn't do idempotent stuff twice for update.
# XXX: repush/calc routing for update??? (tbprerun)
# XXX: previz for update???              (tbprerun)
# XXX: make snmpit faster for update.
Chad Barb's avatar
Chad Barb committed
21 22 23
#
# XXX: for update, expt is swapped out on os_setup fail.
#      (we only recover if assign fails)
24 25 26

sub usage()
{
27 28 29
    print STDERR
	"Usage: $0 {in|out|modify [-reboot] [-eventsys_restart]} ".
	"[-noswapout] [-genimode] pid eid\n";
30 31 32 33 34 35 36
    exit(-1);
}

#
# Configure variables
#
my $TBROOT         = "@prefix@";
37
my $TBOPS          = "@TBOPSEMAIL@";
Timothy Stack's avatar
Timothy Stack committed
38
my $TBLOGS         = "@TBLOGSEMAIL@";
39
my $MAINSITE	   = @TBMAINSITE@;
40
my $THISHOMEBASE   = "@THISHOMEBASE@";
41 42
my $TESTMODE       = @TESTMODE@;
my $DISABLE_EVENTS = "@DISABLE_EVENT_SCHED@";
43
my $piper          = "$TBROOT/sbin/locpiper";
Timothy Stack's avatar
Timothy Stack committed
44
my $NFSTRACESUPPORT= @NFSTRACESUPPORT@;
45 46 47 48 49 50 51 52 53 54 55

# Untaint the path
$ENV{'PATH'} = "/usr/bin:$TBROOT/libexec:$TBROOT/libexec/ns2ir" . 
    ":$TBROOT/sbin:$TBROOT/bin";

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
56
use libadminctrl;
57
use libadminmfs;
58
use libtblog;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
59
use libArchive;
60
use Experiment;
61
use User;
62
use Lan;
63
use Firewall;
64

65
#require exitonwarn; # exitonwarn isn't really a module, so just require it
66 67 68 69

#
# Actual swap-in and swap-out functions, defined below.
#
70 71 72
sub doSwapout($);
sub doSwapin($);

73
# XXX fixme: should not be hardwired!
74
my $cnetstack = "-S Control";
75 76 77
my $cnetvlanname = "Control";


78 79 80 81
sub REAL()    { return 5; }
sub CLEANUP() { return 4; }
sub RETRY()   { return 3; }
sub MODIFY()  { return 2; }
82
sub UPDATE()  { return 1; }
83
sub MODIFY_RECOVER()  { return 0; }
84

85 86 87
#
# Grab global enable of linkdelays.
#
88
my $enablelinkdelays = TBGetSiteVar("general/linux_endnodeshaping");
89

90 91 92 93 94 95
#
# Turn off line buffering on output
#

$| = 1;

96 97
my $updateReboot   = 0;
my $updateReconfig = 1;
98 99
my $update_Eventsys_restart = 0;
my $elabinelab     = 0;
100
my $plabinelab     = 0;
101 102 103 104 105
my $force          = 0;
my $noswapout      = 0;
my $genimode       = 0;
my $errors         = 0;
my $updatehosed    = 0;
106
my $state;
107
my $canceled;
108
my $os_setup_pid;
109
my $nextState;
110 111

#
112
# First argument is either "in", "out", or "update";
113 114 115 116 117
# this value goes into $swapop.
#

my $swapop = shift;	

118 119 120
if (!$swapop || 
    (($swapop ne "in") && 
     ($swapop ne "out") &&
121
     ($swapop ne "modify") &&
122
     ($swapop ne "update"))) {
123 124 125 126 127 128 129 130 131 132 133
    usage();
}

#
# Get other arguments.
#

while ($#ARGV > 1) {
    $arg = shift;
    if ($arg eq "-force") {
	$force = 1;
Chad Barb's avatar
Chad Barb committed
134 135
    } elsif ($arg eq "-reboot") {
	$updateReboot = 1;
136
	$updateReconfig = 0;
137 138 139
    } elsif ($arg eq "-noreconfig") {
	$updateReboot   = 0;
	$updateReconfig = 0;
140
    } elsif ($arg eq "-eventsys_restart" && $swapop eq "modify") {
141
	$update_Eventsys_restart = 1;
142 143
    } elsif ($arg eq "-noswapout") {
	$noswapout = 0;
144 145 146 147 148 149 150 151 152
    } else {
	usage();
    }
}
if ($#ARGV < 1) {
    usage();
}
my ($pid,$eid) = @ARGV;

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w.]+)$/) {
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
if ($eid =~ /^([-\@\w.]+)$/) {
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}

169 170 171 172 173 174
#
# Set Error reporting info
# 
tblog_set_info($pid,$eid,$UID);

#
175
# Turn on timestamps
176
#
177 178
TBDebugTimeStampsOn();

179 180 181 182 183 184
#
# Verify user and get his DB uid and other info for later.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
185
}
186 187 188
my $user_uid   = $this_user->uid();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
189

190 191 192 193 194 195
# Slowly convert to using Experiment module.
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    tbdie("Could not lookup experiment object!")
}

196 197 198
#
# Print starting message.
#
199 200
my $exptidx;
TBExptIDX($pid, $eid, \$exptidx);
201

202 203
print "Beginning swap-$swapop for $pid/$eid ($exptidx). " .
    TBTimeStampWithDate() . "\n";
204 205 206 207 208 209
TBDebugTimeStamp("tbswap $swapop started");

#
# Get experiment state; verify that experiment exists.
#
if (! ($state = ExpState($pid, $eid))) {
210
    tbdie "No such experiment $pid/$eid";
211
}
212 213 214
# Sanity check the current state. 
if (!$force) {
    if ($swapop eq "in") {
215
	tbdie("Experiment should be ACTIVATING. Currently $state.")
216
	    if ($state ne EXPTSTATE_ACTIVATING);
217
    }
218
    elsif ($swapop eq "out") {
219
	tbdie("Experiment should be SWAPPING. Currently $state.")
220
	    if ($state ne EXPTSTATE_SWAPPING);
221
    }
222
    elsif ($swapop eq "modify" || $swapop eq "update") {
223
	tbdie("Experiment should be MODIFY_RESWAP. Currently $state.")
224
	    if ($state ne EXPTSTATE_MODIFY_RESWAP);
225 226
    }
}
227 228
# Get elabinelab status. See below.
if (! TBExptIsElabInElab($pid, $eid, \$elabinelab)) {
229
    tbdie("Could not get elabinelab status for experiment $pid/$eid");
230
}
231 232 233 234 235 236 237 238 239 240 241
# and plabinelab status.
if (! TBExptIsPlabInElab($pid, $eid, \$plabinelab)) {
    tbdie("Could not get plabinelab status for experiment $pid/$eid");
}
if ($elabinelab && $plabinelab) {
    tberror "Cannot get my head around Plab in Elab in Elab!\n";
    print "Failingly finished swap-$swapop for $pid/$eid. " .
	TBTimeStamp() . "\n";
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
    exit(1);
}
242

243 244 245 246 247
#
# See if the experiment is firewalled
#
my $firewalled = TBExptFirewall($pid, $eid);

248 249 250
#
# Do actual swapping
#
251 252 253 254
if ($swapop eq "out") {
    #
    # Swap out
    #
255
    tblog_set_attempt(1);
256
    $errors = doSwapout(REAL);
257
}
258
elsif ($swapop eq "update" || $swapop eq "modify") {
259 260 261 262 263 264
    #
    # Before swapout, do cursory admission control to see if the
    # modified experiment will be swappable. assign_wrapper does a
    # more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
265
    if (! TBAdmissionControlCheck(undef, $experiment, undef)) {
266 267 268
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
269 270 271 272 273 274
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
    
275 276
    #
    # Update.
277 278 279
    #
    tblog_set_attempt(1);

280 281 282 283
    #
    # Phase One -- swap experiment partially out.
    #
    print STDERR "Backing up physical state...\n";
284
    $experiment->BackupPhysicalState();
285

286 287 288 289 290
    #
    # Actually, in update mode this is not done cause we are anticipating
    # adding nodes only.
    #
    $errors = ($swapop eq "modify" ? doSwapout(MODIFY) : 0);
Chad Barb's avatar
Chad Barb committed
291

292 293 294 295 296 297
    if (0) {
	print STDERR "Doing a swapmodswapout on the experiment archive ...\n";
	if (libArchive::TBExperimentArchiveSwapModSwapOut($pid, $eid) < 0) {
	    tberror("Failed swapmodswapout on the experiment archive!");
	    $errors = 1;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
298 299
    }

300
    if ($errors) {
301
	#
302 303 304 305 306
	# Clean up the mess, leaving the experiment in the SWAPPED state,
	# 
	print STDERR "Cleaning up after errors.\n";
	doSwapout(CLEANUP);
	$updatehosed = 1;
307
    }
308
    else {
309
	#
310
	# Phase Two -- swap experiment back in.
311
	#
312
	$errors = doSwapin(MODIFY);
Chad Barb's avatar
Chad Barb committed
313

314
	if ($errors) {
Chad Barb's avatar
Chad Barb committed
315 316 317
	    #
	    # There were errors; see if we can recover.
	    #
318
	    my $CanRecover = 1;
Chad Barb's avatar
Chad Barb committed
319 320 321

	    if ($errors != 7) {
		print STDERR "Update failure occurred _after_ assign phase; ";
322
		$CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
323 324
	    }

325 326 327
	    if ($CanRecover) {
		print STDERR "Recovering virtual and physical state.\n";

328 329 330
		# It is safe to remove the phystate since we know it was
		# backed up above, and cause we do not know if assign_wrapper
		# made it to that point before it failed. 
331 332 333 334
		if ($experiment->RemoveVirtualState($pid, $eid) ||
		    $experiment->RestoreVirtualState($pid, $eid) ||
		    $experiment->RemovePhysicalState($pid, $eid) ||
		    $experiment->RestorePhysicalState($pid,$eid)) {
335 336
		    print STDERR "Could not restore backed-up state; ";
		    $CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
337
		}
338 339 340
		else {
		    print STDERR "Doing a recovery swap-in of old state.\n";

341
		    if (doSwapin(MODIFY_RECOVER)) {
342 343 344
			print STDERR "Could not swap in old physical state; ";
			$CanRecover = 0;
		    }
Chad Barb's avatar
Chad Barb committed
345 346
		}
	    }
347 348 349 350 351 352

	    #
	    # Some part of the recovery failed; must swap it out. swapexp
	    # (caller) will then have to do more clean up, hence the special
	    # exit status indicated by $updatehosed.
	    # 
353 354 355 356 357 358 359 360 361 362 363
	    if (!$CanRecover) {
		if ($noswapout) {
		    print STDERR
			"No Recovery, but leaving experiment swapped in.\n";
		}
		else {
		    print STDERR
			"Recovery aborted! Swapping experiment out.\n";
		    doSwapout(CLEANUP);
		    $updatehosed = 1;
		}
364 365 366
	    }
	    else {
		print STDERR "Update recovery successful.\n";
Chad Barb's avatar
Chad Barb committed
367
	    }
368
	}
Chad Barb's avatar
Chad Barb committed
369 370
    }
}
371 372 373 374 375
elsif ($swapop eq "in") {
    #
    # Swap in
    #
    my $retries = 2;
376 377 378 379 380 381

    #
    # Before real swapin, do cursory admission control. assign_wrapper does
    # a more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
382
    if (! TBAdmissionControlCheck(undef, $experiment, undef)) {
383 384 385
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
386 387 388 389 390
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
391 392

    tblog_set_attempt(1);
393
    $errors = doSwapin(REAL);
Chad Barb's avatar
Chad Barb committed
394

395 396 397 398 399 400 401 402 403 404
    #
    # Attempt a retry if: 
    #   a) there were errors, 
    #   b) doswapin() indicated (via return code 3) a retry is appropriate,
    #   c) we haven't tried too many times already.
    #   d) The cancelflag has not been set.
    #   e) $TESTMODE == 0.
    #
    while ($errors == 3 && $retries && !$canceled && !$TESTMODE) {
	$retries--;
405
	tblog_inc_attempt();
406

407 408
	print STDERR "Cleaning up after errors; will try again.\n";
	doSwapout(RETRY);
409

410 411
	print STDERR "Trying again...\n";
	$errors = doSwapin(RETRY);
412
    }
413 414
    if ($errors || $canceled) {
	print STDERR "Cleaning up after " .
415
	    ($canceled ? "cancelation" : "errors") . ".\n";
416
	doSwapout(CLEANUP);
417 418 419
    }
}

420 421 422 423
tblog_set_attempt(0);

TBGetCancelFlag($pid, $eid, \$canceled);
if ($canceled) {
424 425
    tberror ({type=>'summary', cause=>'canceled', 
	      severity=>SEV_IMMEDIATE, error=>['cancel_flag']},
426 427 428
	     "Experiment swap-$swapop canceled by user.");
}

429 430 431 432
#
# Write appropriate message and exit.
#
if ($errors) {
433
    print "Failingly finished swap-$swapop for $pid/$eid. ".TBTimeStamp()."\n";
434
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
Chad Barb's avatar
Chad Barb committed
435

436 437
    # Pass out magic value to indicate that update failed!
    exit(1 | ($updatehosed ? 0x40 : 0));
438
}
439
print "Successfully finished swap-$swapop for $pid/$eid. " .TBTimeStamp()."\n";
440 441
TBDebugTimeStamp("tbswap $swapop finished (succeeded)");
exit(0);
442 443 444 445

#################################

##
Chad Barb's avatar
Chad Barb committed
446
#
447 448
# doSwapout - Swaps experiment out.
#
Chad Barb's avatar
Chad Barb committed
449
#             If in REAL or CLEANUP,
450 451 452
#             this function will free all nodes for the 
#             experiment.
#
Chad Barb's avatar
Chad Barb committed
453
#             If in RETRY or UDPATE,
454 455 456 457 458 459
#             only nodes not in RES_READY will be freed.
#
#             Returns 0 on success, >0 on failure.
#
##

460
sub doSwapout($) {
461
    my $type = shift;
462 463
    my $swapout_errors = 0;

464 465
    tblog_set_cleanup(1) if $type == CLEANUP;

466 467 468 469 470 471 472 473 474
    #
    # wait for os_setup;
    # this only applies if called after a failed doswapin.
    #
    if ($os_setup_pid) {
	print "Waiting for os_setup to finish\n";
	waitpid($os_setup_pid, 0);
	undef $os_setup_pid;
    }
Chad Barb's avatar
Chad Barb committed
475

476
    if (0 && $NFSTRACESUPPORT && $type == REAL) {
Timothy Stack's avatar
Timothy Stack committed
477 478
	print "Getting files accessed via NFS.\n";
	TBDebugTimeStamp("nfstrace started");
479
	system("nfstrace transfer $pid $eid");
Timothy Stack's avatar
Timothy Stack committed
480
	TBDebugTimeStamp("nfstrace finished");
481
    }
482

483
    if (! $TESTMODE) { 
484 485
	if (! ($DISABLE_EVENTS || $elabinelab)) {
	    if ($type >= RETRY ||
486
		($update_Eventsys_restart && $type == MODIFY) ) {
487
		print "Stopping the event system\n";
488
		if (system("eventsys_control stop $pid,$eid")) {
489 490 491
		    tberror({type => 'secondary', severity => SEV_SECONDARY,
			     error => ['eventsys_stop_failed']},
			    "Failed to stop the event system.");
492 493
		    $swapout_errors = 1;
		}
494 495 496 497 498 499 500

		#
		# Stop the location piper.
		#
		if (-x $piper) {
		    print "Stopping the location piper\n";
		    if (system("$piper -k $pid $eid")) {
501 502 503
			tberror({type => 'secondary', severity => SEV_SECONDARY,
				 error => ['piper_stop_failed']},
				"Failed to stop location piper.");
504 505 506
			$swapout_errors = 1;
		    }
		}
507 508
	    }
	}
509 510 511 512 513 514
	
	#
	# Do teardown of inner elab. We must do this before we teardown the
	# vlans since the inner control network is a vlan, and we want that
	# active so inner boss can reboot the inner nodes (avoid power cycle).
	#
515
	if ($elabinelab && $type >= CLEANUP) {
516 517
	    print "Tearing down elabinelab. This could take a while.\n";
	    if (system("elabinelab -k $pid $eid")) {
518 519 520
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['elabinelab_tear_down_failed']},
			"Failed to teardown elabinelab!");
521 522 523 524
		$swapout_errors = 1;
	    }
	}

525
	#
Chad Barb's avatar
Chad Barb committed
526
	# Clean up any VLANs in experiment.
527
	#
528
	# When modifying an elabinelab experiment, leave the vlans intact
529
	# so that the inner networks are not suddenly disconnected.
530
	#
531
	if ($type != MODIFY) {
532 533 534
	    TBDebugTimeStamp("snmpit started");
	    print STDERR "Removing VLANs.\n";
	    if (system("snmpit -r $pid $eid")) {
535 536 537
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vlan_reset_failed']},
			"Failed to reset VLANs");
538 539 540
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("snmpit finished");
541
	}
542 543 544 545 546 547 548 549 550 551 552 553 554 555
	#
	# Must check for stale vlans that we kept around in the above clause
	# since they will not be in the lans table anymore.
	#
	if ($type == CLEANUP) {
	    my @stale;
	    if (VLan->StaleVlanList($experiment, \@stale) != 0) {
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vlan_reset_failed']},
			"Failed to get stale VLANs");
		$swapout_errors = 1;
	    }
	    if (@stale) {
		print "Removing stale vlans @stale\n";
556
		system("snmpit -f ". join(" ", map("-o $_", @stale)));
557 558 559 560 561 562 563 564
		if ($?) {
		    tberror({type => 'summary', severity => SEV_SECONDARY,
			     error => ['vlan_reset_failed']},
			    "Failed to remove stale vlans");
		    $swapout_errors = 1;
		}
	    }
	}
565
    }
566
	
567
    if ($type >= CLEANUP) {
568 569 570 571 572 573 574 575
	#
	# Undo plab in elab specialness.
	# No need to worry about VLANs here, as all the special abilities
	# involve the control network.
	#
	if (! $TESTMODE && $plabinelab) {
	    print "Tearing down plabinelab.\n";
	    if (system("plabinelab -k $pid $eid")) {
576 577 578
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['plabinelab_tear_down_failed']},
			"Failed to teardown plabinelab!");
579 580 581 582
		$swapout_errors = 1;
	    }
	}

583 584
	#
	# We're not attempting a retry;
585
	#
586
	# Stop all of the vnodes.
587
	#
588
	if (! $TESTMODE) { 	
589 590 591
	    print "Tearing down virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid")) {
592 593 594
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
595 596 597
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup finished");
598 599
	}

600 601 602 603
	#
	# Nodes behind a firewall are treated special.
	# See undoFWNodes for details.
	#
604
	if ($firewalled && undoFWNodes($experiment)) {
605
	    tblog_set_cleanup(0);
606 607
	    return 1;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
608

609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
	#
	# Perform swapout time admin actions.  Right now there is at most
	# one of these.  It isn't really a general mechanism, just a hook
	# for state saving or data collection during swapout.
	# A couple of important "fer now" notes:
	#
	#	We don't do this for firewalled experiments.  We need a way
	#	to "tag" the saved disk state to ensure it doesn't get
	#	instantiated outside of a firewall.
	#
	#	We only do this on REAL swapouts, and not on CLEANUPs.
	#	There are some types of CLEANUPs where we may want to
	#	do this, in particular an invocation caused by a failed
	#	modify operation, where the admin action is to save the
	#	experiment state.  So we will need to revisit this.
	#
	my %soaction = ();
	if ($type == REAL && !$firewalled) {
	    TBExptGetSwapoutAction($pid, $eid, \%soaction);
	}
629
	if ($soaction{'command'} && doSwapoutAction($pid, $eid, %soaction)) {
630
	    tblog_set_cleanup(0);
631
	    return 1;
632 633
	}

634 635
	#
	# remove all nodes from the experiment.
Chad Barb's avatar
Chad Barb committed
636
	# (nfree will send them to RES_FREE_DIRTY)
637 638 639
	#
	print STDERR "Freeing nodes.\n";
	TBDebugTimeStamp("nfree started");
640
	if (system("nfree -a $pid $eid")) {
641 642 643
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['nfree_failed']},
		    "Could not free nodes.");
644 645 646
	    $swapout_errors = 1;
	}
	TBDebugTimeStamp("nfree finished");
647 648 649 650 651

	#
	# Since this is an actual swapout, 
	# reset our count of swap out nag emails sent.
	#
652 653
	DBQueryWarn("update experiments set swap_requests='', ".
		    "   sim_reswap_count='0' ".
654
		    "where eid='$eid' and pid='$pid'");
655 656
    } else {
	#
657
	# $type == RETRY or $type == MODIFY.
658 659
	# Therefore, don't deallocate nodes which have been successfully
	# incorporated into the experiment (i.e., are RES_READY).
Chad Barb's avatar
Chad Barb committed
660
	# (nfree will send deallocated nodes to RES_FREE_DIRTY)
661
	#
662 663 664 665 666 667 668 669
	my @failedpnodes = ();
	my @failedvnodes = ();
	
	my $db_result =
	    DBQueryFatal("select rv.node_id,n.allocstate,nt.isvirtnode ".
                         "  from reserved as rv ".
			 "left join nodes as n on n.node_id = rv.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
670 671
			 "where rv.pid='$pid' and rv.eid='$eid' and ".
			 "      rv.genisliver_idx is null");
672

673
	while (my ($node,$allocstate,$isvirt) = $db_result->fetchrow_array) {
674
	    if ($allocstate ne TBDB_ALLOCSTATE_RES_READY()) {
675 676 677 678 679 680
		if ($isvirt) {
		    push(@failedvnodes, $node);
		}
		else {
		    push(@failedpnodes, $node);
		}
681 682 683
	    }
	}

684 685 686 687 688 689 690
	#
	# Tear down failed vnodes. Perhaps not needed?
	# 
	if (!$TESTMODE && @failedvnodes > 0) {
	    print "Tearing down failed virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid @failedvnodes")) {
691 692 693
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
694 695 696 697
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup -k finished");
	}
698

699 700 701 702 703 704 705 706 707 708 709
	#
	# Undo plabinelab setup
	#
	if (!$TESTMODE && $plabinelab && @failedpnodes > 0) {
	    print "Removing failed nodes from inner plab.\n";
	    if (system("plabinelab -r $pid $eid @failedpnodes")) {
		tberror "Failed to remove inner nodes!";
		$swapout_errors = 1;
	    }
	}

710 711 712 713
	#
	# Release all failed nodes.
	# 
	if (@failedpnodes > 0 || @failedvnodes > 0) {
714 715
	    print STDERR "Freeing failed nodes.\n";
	    
716
	    TBDebugTimeStamp("nfree started");
Chad Barb's avatar
Chad Barb committed
717 718 719 720 721
	    #
	    # Specify -x switch so when a physical node gets freed,
	    # any virtual nodes (owned by this experiment)
	    # sitting on top of it are freed as well.
	    #
722 723
	    if (system("nfree -x $pid $eid " .
		       join(" ", (@failedpnodes, @failedvnodes)))) {
724 725 726
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['nfree_failed']},
			"Could not free nodes.");
727 728 729 730 731 732
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("nfree finished");
	}
    }

733 734 735 736 737 738 739 740 741 742 743 744
    if (! $TESTMODE) {
	#
	# If the experiment has no Plab dslice nodes left, but still has
	# a Plab slice, destroy the slice
	#

	# Does the slice exist?
	$db_result =
	    DBQueryFatal("select slicename from plab_slices ".
			 "where pid='$pid' and eid='$eid'");

	if ($db_result->numrows) {
745
	    # Are there any nodes left in the slice?
746
	    $db_result =
747 748 749 750 751 752 753 754 755
		DBQueryFatal("select r.node_id ".
			     " from plab_slices as ps".
			     " left join plab_slice_nodes as psn ".
			     "   on (ps.slicename=psn.slicename ".
			     "       and ps.plc_idx=psn.plc_idx) ".
			     " left join reserved as r ".
			     "   on psn.node_id=r.node_id ".
			     " where ps.pid='$pid' and ps.eid='$eid'".
			     "   and r.node_id is not NULL");
756 757

	    if (!$db_result->numrows) {
758
		print "Tearing down Slices.\n";
759
		TBDebugTimeStamp("plabslice destroy started");
760
		if (system("plabslice destroy $pid $eid")) {
761
		    tberror "Failed to tear down Slices.";
762 763 764 765 766 767 768
		    $swapout_errors = 1;
		}
		TBDebugTimeStamp("plabslice destroy finished");
	    }
	}
    }

769 770 771 772 773 774 775 776
    if (! $TESTMODE) {
	#
	# All of these errors are non-fatal on swapout. We find out about them
	# via email sent from the individual scripts.
	#

	#
	# Only reset mountpoints if this is an actual swapout, and
777
	# not a failed swapin(cleanup), update, or retry.
778
	#
779
	if ($type == REAL) {
780 781 782
	    print "Resetting mountpoints.\n";
	    TBDebugTimeStamp("exports started");
	    if (system("exports_setup")) {
783 784 785
		tberror({severity => SEV_WARNING,
			 error => ['mountpoint_reset_failed']},
			"Failed to reset mountpoints.");
786 787
	    }
	    TBDebugTimeStamp("exports finished");
Chad Barb's avatar
Chad Barb committed
788

789 790 791 792 793 794 795 796 797
	    #
	    # Ditto these two. 
	    #
	    print "Resetting named maps.\n";
	    TBDebugTimeStamp("named started");
	    if (system("named_setup")) {
		tbwarn "Failed to reset named map.";
	    }
	    TBDebugTimeStamp("named finished");
Chad Barb's avatar
Chad Barb committed
798

799 800 801 802 803 804
	    print "Resetting email lists.\n";
	    TBDebugTimeStamp("genelists started");
	    if (system("genelists -t")) {
		tbwarn "Failed to reset email lists.";
	    }
	    TBDebugTimeStamp("genelists finished");
805 806 807
	}
    }

808
    #
809
    # Wipe the DB clean except during MODIFY or RETRY. In those
810 811
    #    cases, assign_wrapper will reset the DB after reading
    #    the info.
812
    #
813 814
    if ( $type >= CLEANUP ) {
	print STDERR "Resetting DB.\n";
815 816 817
	$experiment->RemovePhysicalState();
	# Special. 
	$experiment->ClearPortRegistration();
818
    }
819

820
    tblog_set_cleanup(0);
821 822 823 824
    return $swapout_errors;
}

##
Chad Barb's avatar
Chad Barb committed
825
#
826 827
# doSwapin - Swaps experiment in.
#
Chad Barb's avatar
Chad Barb committed
828 829 830 831 832
#            Returns:
#              0 - successful swapin
#              1 - failed swapin; cleanup required.
#              3 - failed swapin; cleanup required; can retry.
#              7 - failed swapin; assign failed; no cleanup.
833 834
##

835
sub doSwapin($) {
836
    my $type = shift; 
837 838
    # Just the physnodes ...
    my @deleted_pnodes = ();
839

840 841 842 843
    #
    # assign_wrapper does all the virtual to physical mapping 
    # and updating the DB state.
    #
844
    
845
    if ($type > MODIFY_RECOVER) {
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
        #
        # Hacky test to allow disabling of linkdelays if the node is going
        # to run Linux. See sitevar above.
        #
	if (! $enablelinkdelays) {
	    $db_result =
		DBQueryFatal("select distinct e.pid,e.eid,vl.vnode,vn.osname ".
			 "  from experiments as e ".
			 "left join virt_lans as vl on vl.pid=e.pid and ".
			 "     vl.eid=e.eid ".
			 "left join virt_nodes as vn on vn.pid=e.pid and ".
			 "     vn.eid=e.eid and vn.vname=vl.vnode ".
			 "left join os_info as o on o.osname=vn.osname and ".
			 "  (o.pid=vl.pid or o.pid='" . TBOPSPID() . "') ".
			 "where (vl.uselinkdelay!=0 or e.uselinkdelays!=0 or ".
			 "       e.forcelinkdelays!=0) and ".
862 863
			 "     (o.os is NULL or o.os='Linux' or ".
			 "      o.os='Fedora') and ".
864 865 866
			 "     e.pid='$pid' and e.eid='$eid'");

	    if ($db_result->numrows) {
867 868
		tberror "Endnodeshaping is disabled on Linux Images!";
		tberror "You must modify your experiment to swap it in.";
869 870 871 872
		return 1;
	    }
	}
	
Chad Barb's avatar
Chad Barb committed
873 874 875 876
	print "Mapping to physical reality ...\n";
	TBDebugTimeStamp("assign_wrapper started");

	#
877 878 879 880
	# Pass the -u (update) switch into assign_wrapper, which turns on
	# update mode. When doing a retry, must also fix the current nodes
	# to avoid stuff jumping around when simply trying to replace a node
	# that did not boot.
Chad Barb's avatar
Chad Barb committed
881 882
	#
	my $exitcode;
883 884 885
	my $wrapper = "assign_wrapper -u";
	$wrapper .= " -f"
	    if ($type == RETRY);
886
	
887
	if (system("$wrapper $pid $eid")) {
888 889
	    $exitcode = $? >> 8;

890
	    tberror "Failed ($exitcode) to map to reality.";
891

892 893
	    # Wrapper sets this bit when recovery is possible.
	    if ($exitcode & 64) {
894
		# We can recover. 
895
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
896
		return 7;
897 898
	    }
	    else {
899
		# No recovery, no retry.
900
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
901 902 903 904
		return 1;
	    }
	}
	TBDebugTimeStamp("assign_wrapper finished");
905

Chad Barb's avatar
Chad Barb committed
906 907
	print "Mapped to physical reality!\n";
    }
908

909
    # Check cancel flag before continuing. No retry, 
910
    TBGetCancelFlag($pid, $eid, \$canceled);
911 912 913 914
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
915

916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
    #
    # When doing a modify, we have to compare vlans to determine which
    # vlans actually changed and need to be deleted, before processing
    # the new vlans for the experiment. Note that vlans that already
    # exist on the switches will be left as is by snmpit.
    #
    # We must do this before the nfree of deleted nodes in the next section
    # because the new test in nfree the prevents nodes from accidentally
    # getting released when they are in a vlan.
    #
    if ($type == MODIFY) {
	my @diff = ();
	my @same = ();
	if (Lan->CompareVlansWithSwitches($experiment, \@diff, \@same) != 0) {
	    tberror({type => 'summary', severity => SEV_SECONDARY,
		     error => ['vlan_setup_failed']},
		    "Failed to compare old vlans");
	    return 1;
	}
	if (@diff) {
	    print "Removing obsolete vlans @diff\n";
	    system("snmpit -f ". join(" ", map("-o $_", @diff)));
	    if ($?) {
		tberror({type => 'summary', severity => SEV_SECONDARY,
			 error => ['vlan_setup_failed']},
			"Failed to remove old vlans");
		return 1;
	    }
	}
    }

947 948 949 950 951 952 953 954 955
    #
    # Look for any nodes in RES_TEARDOWN. These need to be released,
    # and if a virtnode, they need to be torn down. We cannot wait for
    # the virtnodes to go down with the physnode they are hosted on,
    # so teardown and release the virtnodes first, and then do the
    # physnodes.
    #
    # Errors are fatal; no recovery or retry.
    #
956
    if ($type == MODIFY || $type == UPDATE) {
957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981
	my $allocstate = TBDB_ALLOCSTATE_RES_TEARDOWN();
	
	$db_result =
	    DBQueryFatal("select r.node_id,nt.isvirtnode,nt.isremotenode ".
			 "  from reserved as r ".
			 "left join nodes as n on n.node_id=r.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
			 "where r.pid='$pid' and r.eid='$eid' and ".
			 "      n.allocstate='$allocstate'");

	if ($db_result->numrows) {
	    my @virtnodes = ();
	    my @physnodes = ();
	    
	    print "Tearing down and releasing unused nodes\n";

	    # First teardown/release virtnodes. 
	    while (my ($node,$isvirt,$isrem) = $db_result->fetchrow_array()) {
		if ($isvirt) {
		    push(@virtnodes, $node);
		}
		elsif (!$isrem) {
		    push(@physnodes, $node);
		}
	    }
982 983 984
	    # See below.
	    @deleted_pnodes = @physnodes;
	    
985 986 987
	    if (@virtnodes) {
		TBDebugTimeStamp("vnode_setup started");
		
988
		if (system("vnode_setup -k $pid $eid @virtnodes")) {
989
		    tberror "Failed to tear down unused virtnodes!\n";
990 991 992 993 994
		    return 1;
		}
		TBDebugTimeStamp("vnode_setup finished");
		
		if (system("nfree $pid $eid @virtnodes")) {
995
		    tberror "Failed to nfree unused virtnodes!\n";
996 997 998 999
		    return 1;
		}
	    }
	    if (@physnodes) {
1000 1001 1002
		if ($elabinelab) {
		    print "Removing nodes from inner elab.\n";
		    if (system("elabinelab -r $pid $eid @physnodes")) {
1003
			tberror "Failed to remove inner nodes!";
1004 1005
			return 1;
		    }
1006 1007 1008 1009 1010 1011
		} elsif ($plabinelab) {
		    print "Removing nodes from inner plab.\n";
		    if (system("plabinelab -r $pid $eid @physnodes")) {
			tberror "Failed to remove inner nodes!";
			return 1;
		    }
1012 1013 1014
		}

		#
1015 1016
		# If the experiment is firewalled, cleanup the nodes
		# we are releasing.
1017
		# 
1018 1019
		if ($firewalled && undoFWNodes($experiment, 1,
					       @deleted_pnodes)) {
1020 1021 1022
		    return 1;
		}
		
1023
		if (system("nfree $pid $eid @physnodes")) {
1024
		    tberror "Failed to nfree unused physnodes!\n";
1025 1026 1027 1028 1029 1030
		    return 1;
		}
	    }
	}
    }

1031 1032 1033 1034 1035 1036
    # Exit here if we are testing.
    if ($TESTMODE) {
	print "Testing run - Stopping here.\n";
	return 0;
    }

1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
    #
    # Handle tarballs - we might need to fetch some from URLs if the user
    # asked for that.
    #
    print "Fetching tarballs and RPMs (if any) ...\n";
    TBDebugTimeStamp("tarfiles_setup started");

    if (system("tarfiles_setup $pid $eid")) {
	#
	# No recovery for now - what would we do?
	#
1048 1049 1050
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['tarfiles_setup_failed']},
		"Failed to set up tarballs.");
1051 1052 1053 1054
	return 1;
    }
    TBDebugTimeStamp("tarfiles_setup finished");

1055 1056 1057 1058
    #
    # If there are any Plab dslice nodes in the experiment, create the
    # dslice now
    #
1059
    if ($type > MODIFY_RECOVER) {
1060 1061 1062
	# Are there any Plab nodes?  First get a list of node types in the exp;
	# if any are types hosted by any of the PLCs we know about, create
	# all slices necessary for the experiment in a single plabslice call.
1063
	$db_result =
1064 1065 1066 1067 1068 1069 1070 1071
	    DBQueryFatal("select nt.type,ppi.plc_name from nodes as n ".
			 "  left join node_types as nt on n.type = nt.type ".
			 "  left join reserved as r on r.node_id = n.node_id ".
			 "  left join plab_plc_info as ppi ".
			 "    on nt.type = ppi.node_type".
			 " where r.pid='$pid' and r.eid='$eid'".
			 "    and ppi.node_type is not NULL".
			 " group by nt.type");
1072 1073

	if ($db_result->numrows) {
1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084
	    my $info_str = "";
	    while (my ($nt,$plc) = $db_result->fetchrow_array()) {
		$info_str .= " $plc($nt)";
	    }
	    print "Configuring Slices for $info_str.\n";
	    TBDebugTimeStamp("plabslice started");
	    if (system("plabslice create $pid $eid")) {
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['plabslice_create_failed']},
			"Failed to configure Slices.");
		return 3;
1085
	    }
1086
	    TBDebugTimeStamp("plabslice finished");
1087 1088 1089
	}
    }

1090
    # Check cancel flag before continuing. No retry, 
1091
    TBGetCancelFlag($pid, $eid, \$canceled);
1092 1093 1094 1095
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
1096

1097 1098 1099 1100 1101 1102 1103 1104 1105
    #
    # These things need to get started before the nodes come up, so we'll
    # do them before the os_setup. Everything else can done in parallel with
    # os_setup. (Actually, these probably can too, since they should finish
    # long before the nodes reboot, but better safe than sorry)
    #
    print "Setting up mountpoints.\n";
    TBDebugTimeStamp("mountpoints started");
    if (system("exports_setup")) {
1106 1107 1108
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['mountpoints_setup_failed']},
		"Failed to setup mountpoints.");
1109 1110 1111
	return 1;
    }
    TBDebugTimeStamp("mountpoints finished");
Chad Barb's avatar
Chad Barb committed
1112

1113 1114 1115
    TBDebugTimeStamp("named started");
    print "Setting up named maps.\n";
    if (system("named_setup")) {
1116
	tbwarn "Failed to add node names to named map.";
1117 1118
	#
	# This is a non-fatal error.
Chad Barb's avatar
Chad Barb committed
1119
	#
1120 1121
    }
    TBDebugTimeStamp("named finished");
Chad Barb's avatar
Chad Barb committed
1122

Timothy Stack's avatar
Timothy Stack committed
1123 1124 1125 1126
    if ($NFSTRACESUPPORT) {
	print "Cleaning NFS traces.\n";
	TBDebugTimeStamp("nfstrace gc started");
	if (system("nfstrace gc $pid $eid")) {
1127 1128 1129
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['nfstrace_setup_failed']},
		    "Failed to setup nfstrace.");
Timothy Stack's avatar
Timothy Stack committed
1130 1131 1132
	    return 1;
	}
	TBDebugTimeStamp("nfstrace gc finished");
1133 1134
    }

1135
    # Check cancel flag before continuing. No retry, 
1136
    TBGetCancelFlag($pid, $eid, \$canceled);
1137 1138 1139 1140
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
Chad Barb's avatar
Chad Barb committed
1141

1142 1143 1144 1145
    #
    # Setup any control-net firewall.
    # This must be done before reloading and rebooting nodes.
    #
1146
    if ($firewalled && ($type == REAL || $type == MODIFY) &&
1147 1148
	doFWlans($experiment,
		 (($type == MODIFY) ? FWADDNODES : FWSETUP), undef)) {
1149 1150 1151
	return 1;
    }

1152 1153 1154 1155 1156
    #
    # PlabinElab setup.  This is currently just tweaking out the dhcpd.conf
    # file and that must be done before os_setup (i.e., before nodes are
    # rebooted).
    #
1157 1158
    if ($plabinelab && !$TESTMODE && $type > MODIFY_RECOVER) {
	# for MODIFY and RETRY we pass in the -u to pick up new nodes
1159 1160 1161 1162 1163
	my $optarg = ($type == REAL ? "" : "-u");
	
	print "Setting up plabinelab.\n";
	TBDebugTimeStamp("plabinelab setup started");
	if (system("plabinelab $optarg $pid $eid")) {
1164 1165 1166
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['plabinelab_setup_failed']},
		    "Failed to setup plabinelab!");
1167 1168 1169 1170 1171
	    return 1;
	}
	TBDebugTimeStamp("plabinelab setup finished");
    }

Chad Barb's avatar
Chad Barb committed
1172 1173
    #
    # If user specified -reboot to update,
1174
    # and we are successfully performing the update,
1175
    # then mark all nodes in experiment so os_setup will reboot them.
1176 1177
    # We must reboot nodes on a RETRY as well, since assign has been rerun
    # and may have remapped interfaces on the nodes.
Chad Barb's avatar
Chad Barb committed
1178
    #
1179
    if ($type == RETRY ||
1180
	($type == MODIFY && ($updateReboot || $updateReconfig))) {
Mike Hibler's avatar
Mike Hibler committed
1181
	my $needreboot = ($type == RETRY || $updateReboot) ? 1 : 0;
1182

Mike Hibler's avatar
Mike Hibler committed
1183 1184
	print STDERR "Marking nodes for ",
		     $needreboot ? "reboot" : "reconfig", ".\n";
Chad Barb's avatar
Chad Barb committed
1185
	$db_result =
1186 1187 1188
	    DBQueryFatal("select r.node_id,n.allocstate from reserved as r ".
			 "left join nodes as n on n.node_id=r.node_id ".
			 "where r.pid='$pid' and r.eid='$eid'");
Chad Barb's avatar
Chad Barb committed
1189

1190 1191 1192 1193 1194 1195 1196 1197
	while (my ($node,$allocstate) = $db_result->fetchrow_array) {
	    #
	    # If the node is INIT_CLEAN, leave it alone. It will still get
	    # rebooted, but will not falsely be tagged as dirty. This is
	    # important for vnodes too, where INIT_CLEAN indicated the vnode
	    # does not even exist yet (plab nodes).
	    #
	    if ($allocstate ne TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {