tbswap.in 54.2 KB
Newer Older
1 2 3 4
#!/usr/bin/perl -w

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2007 University of Utah and the Flux Group.
6 7 8 9
# All rights reserved.
#
use English;

10 11 12
# Returns 0 on success.
# Returns 1 on non-assign_wrapper failure.
# Returns (1 | assign_wrapper's errorcode) on assign_wrapper failure.
13 14
# Returns |0x40 if update caused a swapout. Icky.
# Returns -1 on uncontrolled error (die called).
15

16 17 18 19 20
# XXX: handle error cases for update? (backup the db?)
# XXX: Shouldn't do idempotent stuff twice for update.
# XXX: repush/calc routing for update??? (tbprerun)
# XXX: previz for update???              (tbprerun)
# XXX: make snmpit faster for update.
Chad Barb's avatar
Chad Barb committed
21 22 23
#
# XXX: for update, expt is swapped out on os_setup fail.
#      (we only recover if assign fails)
24 25 26

sub usage()
{
27
    print STDERR "Usage: $0 -force { in | out | update [-reboot] [-eventsys_restart] } pid eid\n";
28 29 30 31 32 33 34
    exit(-1);
}

#
# Configure variables
#
my $TBROOT         = "@prefix@";
35
my $TBOPS          = "@TBOPSEMAIL@";
Timothy Stack's avatar
Timothy Stack committed
36
my $TBLOGS         = "@TBLOGSEMAIL@";
37
my $MAINSITE	   = @TBMAINSITE@;
38
my $THISHOMEBASE   = "@THISHOMEBASE@";
39 40
my $TESTMODE       = @TESTMODE@;
my $DISABLE_EVENTS = "@DISABLE_EVENT_SCHED@";
41
my $piper          = "$TBROOT/sbin/locpiper";
Timothy Stack's avatar
Timothy Stack committed
42
my $NFSTRACESUPPORT= @NFSTRACESUPPORT@;
43 44 45 46 47 48 49 50 51 52 53

# Untaint the path
$ENV{'PATH'} = "/usr/bin:$TBROOT/libexec:$TBROOT/libexec/ns2ir" . 
    ":$TBROOT/sbin:$TBROOT/bin";

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
54
use libadminctrl;
55
use libadminmfs;
56
use libtblog;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
57
use libArchive;
58
use Experiment;
59

60
#require exitonwarn; # exitonwarn isn't really a module, so just require it
61 62 63 64

#
# Actual swap-in and swap-out functions, defined below.
#
65 66 67
sub doSwapout($);
sub doSwapin($);

68 69 70 71
#
# Firewall stuff
# XXX maybe should be elsewhere
#
72 73 74 75 76
sub FWSETUP()     { return 1; }
sub FWADDNODES()  { return 2; }
sub FWDELNODES()  { return 3; }
sub FWTEARDOWN()  { return 4; }
sub doFW($$$$);
77 78

# XXX fixme: should not be hardwired!
79
my $cnetstack = "-S Control";
80 81 82
my $cnetvlanname = "Control";


83 84 85 86
sub REAL()    { return 4; }
sub CLEANUP() { return 3; }
sub RETRY()   { return 2; }
sub UPDATE()  { return 1; }
Chad Barb's avatar
Chad Barb committed
87
sub UPDATE_RECOVER()  { return 0; }
88

89 90 91
#
# Grab global enable of linkdelays.
#
92
my $enablelinkdelays = TBGetSiteVar("general/linux_endnodeshaping");
93

94 95 96 97 98 99
#
# Turn off line buffering on output
#

$| = 1;

100 101
my $updateReboot   = 0;
my $updateReconfig = 1;
102 103
my $update_Eventsys_restart = 0;
my $elabinelab     = 0;
104
my $plabinelab     = 0;
105 106
my $force  = 0;
my $errors = 0;
107
my $updatehosed = 0;
108
my $state;
109
my $canceled;
110 111
my $os_setup_pid;
my $cleanvlans;
112
my $nextState;
113 114

#
115
# First argument is either "in", "out", or "update";
116 117 118 119 120
# this value goes into $swapop.
#

my $swapop = shift;	

121 122 123 124
if (!$swapop || 
    (($swapop ne "in") && 
     ($swapop ne "out") &&
     ($swapop ne "update"))) {
125 126 127 128 129 130 131 132 133 134 135
    usage();
}

#
# Get other arguments.
#

while ($#ARGV > 1) {
    $arg = shift;
    if ($arg eq "-force") {
	$force = 1;
Chad Barb's avatar
Chad Barb committed
136 137
    } elsif ($arg eq "-reboot") {
	$updateReboot = 1;
138
	$updateReconfig = 0;
139 140 141
    } elsif ($arg eq "-noreconfig") {
	$updateReboot   = 0;
	$updateReconfig = 0;
142 143
    } elsif ($arg eq "-eventsys_restart" && $swapop eq "update") {
	$update_Eventsys_restart = 1;
144 145 146 147 148 149 150 151 152
    } else {
	usage();
    }
}
if ($#ARGV < 1) {
    usage();
}
my ($pid,$eid) = @ARGV;

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w.]+)$/) {
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
if ($eid =~ /^([-\@\w.]+)$/) {
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}

169 170 171 172 173 174
#
# Set Error reporting info
# 
tblog_set_info($pid,$eid,$UID);

#
175
# Turn on timestamps
176
#
177 178
TBDebugTimeStampsOn();

179 180 181 182 183 184
# Need this below.
my ($dbuid, $uname, $umail);
if (UNIX2DBUID($UID, \$dbuid)) {
    UserDBInfo($dbuid, \$uname, \$umail);
}

185 186 187 188 189 190
# Slowly convert to using Experiment module.
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    tbdie("Could not lookup experiment object!")
}

191 192 193
#
# Print starting message.
#
194 195
my $exptidx;
TBExptIDX($pid, $eid, \$exptidx);
196

197 198
print "Beginning swap-$swapop for $pid/$eid ($exptidx). " .
    TBTimeStampWithDate() . "\n";
199 200 201 202 203 204
TBDebugTimeStamp("tbswap $swapop started");

#
# Get experiment state; verify that experiment exists.
#
if (! ($state = ExpState($pid, $eid))) {
205
    tbdie "No such experiment $pid/$eid";
206
}
207 208 209
# Sanity check the current state. 
if (!$force) {
    if ($swapop eq "in") {
210
	tbdie("Experiment should be ACTIVATING. Currently $state.")
211
	    if ($state ne EXPTSTATE_ACTIVATING);
212
    }
213
    elsif ($swapop eq "out") {
214
	tbdie("Experiment should be SWAPPING. Currently $state.")
215
	    if ($state ne EXPTSTATE_SWAPPING);
216
    }
217
    elsif ($swapop eq "update") {
218
	tbdie("Experiment should be MODIFY_RESWAP. Currently $state.")
219
	    if ($state ne EXPTSTATE_MODIFY_RESWAP);
220 221
    }
}
222 223
# Get elabinelab status. See below.
if (! TBExptIsElabInElab($pid, $eid, \$elabinelab)) {
224
    tbdie("Could not get elabinelab status for experiment $pid/$eid");
225
}
226 227 228 229 230 231 232 233 234 235 236
# and plabinelab status.
if (! TBExptIsPlabInElab($pid, $eid, \$plabinelab)) {
    tbdie("Could not get plabinelab status for experiment $pid/$eid");
}
if ($elabinelab && $plabinelab) {
    tberror "Cannot get my head around Plab in Elab in Elab!\n";
    print "Failingly finished swap-$swapop for $pid/$eid. " .
	TBTimeStamp() . "\n";
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
    exit(1);
}
237

238 239 240 241 242
#
# See if the experiment is firewalled
#
my $firewalled = TBExptFirewall($pid, $eid);

243 244 245
#
# Do actual swapping
#
246 247 248 249
if ($swapop eq "out") {
    #
    # Swap out
    #
250
    tblog_set_attempt(1);
251
    $errors = doSwapout(REAL);
252
}
253
elsif ($swapop eq "update") {
254 255 256 257 258 259 260
    #
    # Before swapout, do cursory admission control to see if the
    # modified experiment will be swappable. assign_wrapper does a
    # more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
    if (! TBAdmissionControlCheck($UID, $pid, $eid, undef)) {
261 262 263
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
264 265 266 267 268 269
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
    
270 271
    #
    # Update.
272 273 274
    #
    tblog_set_attempt(1);

275 276 277 278 279
    #
    # Phase One -- swap experiment partially out.
    #
    print STDERR "Backing up physical state...\n";
    TBExptBackupPhysicalState($pid,$eid);
280

281
    $errors = doSwapout(UPDATE);
Chad Barb's avatar
Chad Barb committed
282

Leigh B. Stoller's avatar
Leigh B. Stoller committed
283 284 285 286 287 288
    print STDERR "Doing a swapmodswapout on the experiment archive ...\n";    
    if (libArchive::TBExperimentArchiveSwapModSwapOut($pid, $eid) < 0) {
	tberror("Failed to do a swapmodswapout on the experiment archive!");
	$errors = 1;
    }

289
    if ($errors) {
290
	#
291 292 293 294 295
	# Clean up the mess, leaving the experiment in the SWAPPED state,
	# 
	print STDERR "Cleaning up after errors.\n";
	doSwapout(CLEANUP);
	$updatehosed = 1;
296
    }
297
    else {
298
	#
299
	# Phase Two -- swap experiment back in.
300
	#
Chad Barb's avatar
Chad Barb committed
301 302
	$errors = doSwapin(UPDATE);

303
	if ($errors) {
Chad Barb's avatar
Chad Barb committed
304 305 306
	    #
	    # There were errors; see if we can recover.
	    #
307
	    my $CanRecover = 1;
Chad Barb's avatar
Chad Barb committed
308 309 310

	    if ($errors != 7) {
		print STDERR "Update failure occurred _after_ assign phase; ";
311
		$CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
312 313
	    }

314 315 316 317 318 319 320 321
	    if ($CanRecover) {
		print STDERR "Recovering virtual and physical state.\n";

		if (TBExptRemoveVirtualState($pid, $eid) ||
		    TBExptRestoreVirtualState($pid, $eid) ||
		    TBExptRestorePhysicalState($pid,$eid)) {
		    print STDERR "Could not restore backed-up state; ";
		    $CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
322
		}
323 324 325 326 327 328 329
		else {
		    print STDERR "Doing a recovery swap-in of old state.\n";

		    if (doSwapin(UPDATE_RECOVER)) {
			print STDERR "Could not swap in old physical state; ";
			$CanRecover = 0;
		    }
Chad Barb's avatar
Chad Barb committed
330 331
		}
	    }
332 333 334 335 336 337 338

	    #
	    # Some part of the recovery failed; must swap it out. swapexp
	    # (caller) will then have to do more clean up, hence the special
	    # exit status indicated by $updatehosed.
	    # 
	    if (! $CanRecover) {
339
		print STDERR "Recovery aborted! Swapping experiment out.\n";
Chad Barb's avatar
Chad Barb committed
340
		doSwapout(CLEANUP);
341 342 343 344
		$updatehosed = 1;
	    }
	    else {
		print STDERR "Update recovery successful.\n";
Chad Barb's avatar
Chad Barb committed
345
	    }
346
	}
Chad Barb's avatar
Chad Barb committed
347 348
    }
}
349 350 351 352 353
elsif ($swapop eq "in") {
    #
    # Swap in
    #
    my $retries = 2;
354 355 356 357 358 359 360

    #
    # Before real swapin, do cursory admission control. assign_wrapper does
    # a more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
    if (! TBAdmissionControlCheck($UID, $pid, $eid, undef)) {
361 362 363
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
364 365 366 367 368
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
369 370

    tblog_set_attempt(1);
371
    $errors = doSwapin(REAL);
Chad Barb's avatar
Chad Barb committed
372

373 374 375 376 377 378 379 380 381 382
    #
    # Attempt a retry if: 
    #   a) there were errors, 
    #   b) doswapin() indicated (via return code 3) a retry is appropriate,
    #   c) we haven't tried too many times already.
    #   d) The cancelflag has not been set.
    #   e) $TESTMODE == 0.
    #
    while ($errors == 3 && $retries && !$canceled && !$TESTMODE) {
	$retries--;
383
	tblog_inc_attempt();
384

385 386
	print STDERR "Cleaning up after errors; will try again.\n";
	doSwapout(RETRY);
387

388 389
	print STDERR "Trying again...\n";
	$errors = doSwapin(RETRY);
390
    }
391 392
    if ($errors || $canceled) {
	print STDERR "Cleaning up after " .
393
	    ($canceled ? "cancelation" : "errors") . ".\n";
394
	doSwapout(CLEANUP);
395 396 397
    }
}

398 399 400 401
tblog_set_attempt(0);

TBGetCancelFlag($pid, $eid, \$canceled);
if ($canceled) {
402 403
    tberror ({type=>'summary', cause=>'canceled', 
	      severity=>SEV_IMMEDIATE, error=>['cancel_flag']},
404 405 406
	     "Experiment swap-$swapop canceled by user.");
}

407 408 409 410
#
# Write appropriate message and exit.
#
if ($errors) {
411
    print "Failingly finished swap-$swapop for $pid/$eid. ".TBTimeStamp()."\n";
412
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
Chad Barb's avatar
Chad Barb committed
413

414 415
    # Pass out magic value to indicate that update failed!
    exit(1 | ($updatehosed ? 0x40 : 0));
416
}
417
print "Successfully finished swap-$swapop for $pid/$eid. " .TBTimeStamp()."\n";
418 419
TBDebugTimeStamp("tbswap $swapop finished (succeeded)");
exit(0);
420 421 422 423

#################################

##
Chad Barb's avatar
Chad Barb committed
424
#
425 426
# doSwapout - Swaps experiment out.
#
Chad Barb's avatar
Chad Barb committed
427
#             If in REAL or CLEANUP,
428 429 430
#             this function will free all nodes for the 
#             experiment.
#
Chad Barb's avatar
Chad Barb committed
431
#             If in RETRY or UDPATE,
432 433 434 435 436 437
#             only nodes not in RES_READY will be freed.
#
#             Returns 0 on success, >0 on failure.
#
##

438 439
sub doSwapout($) {
    my $type = shift; # REAL==4, CLEANUP==3, RETRY==2, UPDATE==1.
440 441
    my $swapout_errors = 0;

442 443
    tblog_set_cleanup(1) if $type == CLEANUP;

444 445 446 447 448 449 450 451 452
    #
    # wait for os_setup;
    # this only applies if called after a failed doswapin.
    #
    if ($os_setup_pid) {
	print "Waiting for os_setup to finish\n";
	waitpid($os_setup_pid, 0);
	undef $os_setup_pid;
    }
Chad Barb's avatar
Chad Barb committed
453

454
    if (0 && $NFSTRACESUPPORT && $type == REAL) {
Timothy Stack's avatar
Timothy Stack committed
455 456
	print "Getting files accessed via NFS.\n";
	TBDebugTimeStamp("nfstrace started");
457
	system("nfstrace transfer $pid $eid");
Timothy Stack's avatar
Timothy Stack committed
458
	TBDebugTimeStamp("nfstrace finished");
459
    }
460

461
    if (! $TESTMODE) { 
462 463 464
	if (! ($DISABLE_EVENTS || $elabinelab)) {
	    if ($type >= RETRY ||
		($update_Eventsys_restart && $type == UPDATE) ) {
465
		print "Stopping the event system\n";
466
		if (system("eventsys_control stop $pid,$eid")) {
467 468 469
		    tberror({type => 'secondary', severity => SEV_SECONDARY,
			     error => ['eventsys_stop_failed']},
			    "Failed to stop the event system.");
470 471
		    $swapout_errors = 1;
		}
472 473 474 475 476 477 478

		#
		# Stop the location piper.
		#
		if (-x $piper) {
		    print "Stopping the location piper\n";
		    if (system("$piper -k $pid $eid")) {
479 480 481
			tberror({type => 'secondary', severity => SEV_SECONDARY,
				 error => ['piper_stop_failed']},
				"Failed to stop location piper.");
482 483 484
			$swapout_errors = 1;
		    }
		}
485 486
	    }
	}
487 488 489 490 491 492
	
	#
	# Do teardown of inner elab. We must do this before we teardown the
	# vlans since the inner control network is a vlan, and we want that
	# active so inner boss can reboot the inner nodes (avoid power cycle).
	#
493
	if ($elabinelab && $type >= CLEANUP) {
494 495
	    print "Tearing down elabinelab. This could take a while.\n";
	    if (system("elabinelab -k $pid $eid")) {
496 497 498
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['elabinelab_tear_down_failed']},
			"Failed to teardown elabinelab!");
499 500 501 502
		$swapout_errors = 1;
	    }
	}

503
	#
Chad Barb's avatar
Chad Barb committed
504
	# Clean up any VLANs in experiment.
505
	#
506 507 508 509 510 511 512
	# When modifying an elabinelab experiment, leave the vlans intact
	# so that the inner networks are not suddenly disconnected!
	#
	if (! ($elabinelab && $type == UPDATE)) {
	    TBDebugTimeStamp("snmpit started");
	    print STDERR "Removing VLANs.\n";
	    if (system("snmpit -r $pid $eid")) {
513 514 515
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vlan_reset_failed']},
			"Failed to reset VLANs");
516 517 518 519 520
		$swapout_errors = 1;
	    } else {
		$cleanvlans = 0;
	    }
	    TBDebugTimeStamp("snmpit finished");
521
	}
522
    }
Chad Barb's avatar
Chad Barb committed
523

524
    if ($type >= CLEANUP) {
525 526 527 528 529 530 531 532
	#
	# Undo plab in elab specialness.
	# No need to worry about VLANs here, as all the special abilities
	# involve the control network.
	#
	if (! $TESTMODE && $plabinelab) {
	    print "Tearing down plabinelab.\n";
	    if (system("plabinelab -k $pid $eid")) {
533 534 535
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['plabinelab_tear_down_failed']},
			"Failed to teardown plabinelab!");
536 537 538 539
		$swapout_errors = 1;
	    }
	}

540 541
	#
	# We're not attempting a retry;
542
	#
543
	# Stop all of the vnodes.
544
	#
545
	if (! $TESTMODE) { 	
546 547 548
	    print "Tearing down virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid")) {
549 550 551
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
552 553 554
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup finished");
555 556
	}

557 558 559 560 561
	#
	# Nodes behind a firewall are treated special.
	# See undoFWNodes for details.
	#
	if ($firewalled && undoFWNodes($pid, $eid)) {
562
	    tblog_set_cleanup(0);
563 564
	    return 1;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
565

566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
	#
	# Perform swapout time admin actions.  Right now there is at most
	# one of these.  It isn't really a general mechanism, just a hook
	# for state saving or data collection during swapout.
	# A couple of important "fer now" notes:
	#
	#	We don't do this for firewalled experiments.  We need a way
	#	to "tag" the saved disk state to ensure it doesn't get
	#	instantiated outside of a firewall.
	#
	#	We only do this on REAL swapouts, and not on CLEANUPs.
	#	There are some types of CLEANUPs where we may want to
	#	do this, in particular an invocation caused by a failed
	#	modify operation, where the admin action is to save the
	#	experiment state.  So we will need to revisit this.
	#
	my %soaction = ();
	if ($type == REAL && !$firewalled) {
	    TBExptGetSwapoutAction($pid, $eid, \%soaction);
	}
586
	if ($soaction{'command'} && doSwapoutAction($pid, $eid, %soaction)) {
587
	    tblog_set_cleanup(0);
588
	    return 1;
589 590
	}

591 592
	#
	# remove all nodes from the experiment.
Chad Barb's avatar
Chad Barb committed
593
	# (nfree will send them to RES_FREE_DIRTY)
594 595 596 597
	#
	print STDERR "Freeing nodes.\n";
	TBDebugTimeStamp("nfree started");
	if (system("nfree $pid $eid")) {
598 599 600
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['nfree_failed']},
		    "Could not free nodes.");
601 602 603
	    $swapout_errors = 1;
	}
	TBDebugTimeStamp("nfree finished");
604 605 606 607 608

	#
	# Since this is an actual swapout, 
	# reset our count of swap out nag emails sent.
	#
609
	DBQueryWarn("update experiments set swap_requests='',sim_reswap_count='0' ".
610
		    "where eid='$eid' and pid='$pid'");
611 612
    } else {
	#
613
	# $type == RETRY or $type == UPDATE.
614 615
	# Therefore, don't deallocate nodes which have been successfully
	# incorporated into the experiment (i.e., are RES_READY).
Chad Barb's avatar
Chad Barb committed
616
	# (nfree will send deallocated nodes to RES_FREE_DIRTY)
617
	#
618 619 620 621 622 623 624 625
	my @failedpnodes = ();
	my @failedvnodes = ();
	
	my $db_result =
	    DBQueryFatal("select rv.node_id,n.allocstate,nt.isvirtnode ".
                         "  from reserved as rv ".
			 "left join nodes as n on n.node_id = rv.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
626 627
			 "where rv.pid='$pid' and rv.eid='$eid'");

628
	while (my ($node,$allocstate,$isvirt) = $db_result->fetchrow_array) {
629
	    if ($allocstate ne TBDB_ALLOCSTATE_RES_READY()) {
630 631 632 633 634 635
		if ($isvirt) {
		    push(@failedvnodes, $node);
		}
		else {
		    push(@failedpnodes, $node);
		}
636 637 638
	    }
	}

639 640 641 642 643 644 645
	#
	# Tear down failed vnodes. Perhaps not needed?
	# 
	if (!$TESTMODE && @failedvnodes > 0) {
	    print "Tearing down failed virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid @failedvnodes")) {
646 647 648
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
649 650 651 652
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup -k finished");
	}
653

654 655 656 657 658 659 660 661 662 663 664
	#
	# Undo plabinelab setup
	#
	if (!$TESTMODE && $plabinelab && @failedpnodes > 0) {
	    print "Removing failed nodes from inner plab.\n";
	    if (system("plabinelab -r $pid $eid @failedpnodes")) {
		tberror "Failed to remove inner nodes!";
		$swapout_errors = 1;
	    }
	}

665 666 667 668
	#
	# Release all failed nodes.
	# 
	if (@failedpnodes > 0 || @failedvnodes > 0) {
669 670
	    print STDERR "Freeing failed nodes.\n";
	    
671
	    TBDebugTimeStamp("nfree started");
Chad Barb's avatar
Chad Barb committed
672 673 674 675 676
	    #
	    # Specify -x switch so when a physical node gets freed,
	    # any virtual nodes (owned by this experiment)
	    # sitting on top of it are freed as well.
	    #
677 678
	    if (system("nfree -x $pid $eid " .
		       join(" ", (@failedpnodes, @failedvnodes)))) {
679 680 681
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['nfree_failed']},
			"Could not free nodes.");
682 683 684 685 686 687
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("nfree finished");
	}
    }

688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
    if (! $TESTMODE) {
	#
	# If the experiment has no Plab dslice nodes left, but still has
	# a Plab slice, destroy the slice
	#

	# Does the slice exist?
	$db_result =
	    DBQueryFatal("select slicename from plab_slices ".
			 "where pid='$pid' and eid='$eid'");

	if ($db_result->numrows) {
	    # Are there any dslice nodes left?
	    $db_result =
		DBQueryFatal("select n.node_id from nodes as n ".
			     "left join node_types as nt on n.type = nt.type ".
			     "left join reserved as r ".
			     " on r.node_id = n.node_id ".
			     "where r.pid='$pid' and r.eid='$eid' ".
			     " and nt.isplabdslice = 1");

	    if (!$db_result->numrows) {
		print "Destroying Planetlab slice.\n";
		TBDebugTimeStamp("plabslice destroy started");
712
		if (system("plabslice destroy $pid $eid")) {
713
		    tberror "Failed to destroy Plab dslice.";
714 715 716 717 718 719 720
		    $swapout_errors = 1;
		}
		TBDebugTimeStamp("plabslice destroy finished");
	    }
	}
    }

721 722 723 724 725 726 727 728
    if (! $TESTMODE) {
	#
	# All of these errors are non-fatal on swapout. We find out about them
	# via email sent from the individual scripts.
	#

	#
	# Only reset mountpoints if this is an actual swapout, and
729
	# not a failed swapin(cleanup), update, or retry.
730
	#
731
	if ($type == REAL) {
732 733 734
	    print "Resetting mountpoints.\n";
	    TBDebugTimeStamp("exports started");
	    if (system("exports_setup")) {
735 736 737
		tberror({severity => SEV_WARNING,
			 error => ['mountpoint_reset_failed']},
			"Failed to reset mountpoints.");
738 739 740
	    }
	    TBDebugTimeStamp("exports finished");
	}
Chad Barb's avatar
Chad Barb committed
741

742 743 744 745
	#
	# Resetting named maps and email lists is fast and idempotent,
	# so whatever.
	#
746 747 748
	print "Resetting named maps.\n";
	TBDebugTimeStamp("named started");
	if (system("named_setup")) {
749
	    tbwarn "Failed to reset named map.";
750 751
	}
	TBDebugTimeStamp("named finished");
Chad Barb's avatar
Chad Barb committed
752

753 754
	print "Resetting email lists.\n";
	TBDebugTimeStamp("genelists started");
755
	if (system("genelists -t")) {
756
	    tbwarn "Failed to reset email lists.";
757 758 759 760
	}
	TBDebugTimeStamp("genelists finished");
    }

761
    #
762 763 764
    # Wipe the DB clean except during UPDATE or RETRY. In those
    #    cases, assign_wrapper will reset the DB after reading
    #    the info.
765
    #
766 767
    if ( $type >= CLEANUP ) {
	print STDERR "Resetting DB.\n";
768 769 770
	$experiment->RemovePhysicalState();
	# Special. 
	$experiment->ClearPortRegistration();
771
    }
772

773
    tblog_set_cleanup(0);
774 775 776 777
    return $swapout_errors;
}

##
Chad Barb's avatar
Chad Barb committed
778
#
779 780
# doSwapin - Swaps experiment in.
#
Chad Barb's avatar
Chad Barb committed
781 782 783 784 785
#            Returns:
#              0 - successful swapin
#              1 - failed swapin; cleanup required.
#              3 - failed swapin; cleanup required; can retry.
#              7 - failed swapin; assign failed; no cleanup.
786 787
##

788
sub doSwapin($) {
789 790 791
    my $type = shift; # REAL==4, RETRY==2, UPDATE==1, UPDATE_RECOVER=0.
    # Just the physnodes ...
    my @deleted_pnodes = ();
792

793 794 795 796
    #
    # assign_wrapper does all the virtual to physical mapping 
    # and updating the DB state.
    #
797
    
Chad Barb's avatar
Chad Barb committed
798
    if ($type > UPDATE_RECOVER) {
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814
        #
        # Hacky test to allow disabling of linkdelays if the node is going
        # to run Linux. See sitevar above.
        #
	if (! $enablelinkdelays) {
	    $db_result =
		DBQueryFatal("select distinct e.pid,e.eid,vl.vnode,vn.osname ".
			 "  from experiments as e ".
			 "left join virt_lans as vl on vl.pid=e.pid and ".
			 "     vl.eid=e.eid ".
			 "left join virt_nodes as vn on vn.pid=e.pid and ".
			 "     vn.eid=e.eid and vn.vname=vl.vnode ".
			 "left join os_info as o on o.osname=vn.osname and ".
			 "  (o.pid=vl.pid or o.pid='" . TBOPSPID() . "') ".
			 "where (vl.uselinkdelay!=0 or e.uselinkdelays!=0 or ".
			 "       e.forcelinkdelays!=0) and ".
815 816
			 "     (o.os is NULL or o.os='Linux' or ".
			 "      o.os='Fedora') and ".
817 818 819
			 "     e.pid='$pid' and e.eid='$eid'");

	    if ($db_result->numrows) {
820 821
		tberror "Endnodeshaping is disabled on Linux Images!";
		tberror "You must modify your experiment to swap it in.";
822 823 824 825
		return 1;
	    }
	}
	
Chad Barb's avatar
Chad Barb committed
826 827 828 829
	print "Mapping to physical reality ...\n";
	TBDebugTimeStamp("assign_wrapper started");

	#
830 831 832 833
	# Pass the -u (update) switch into assign_wrapper, which turns on
	# update mode. When doing a retry, must also fix the current nodes
	# to avoid stuff jumping around when simply trying to replace a node
	# that did not boot.
Chad Barb's avatar
Chad Barb committed
834 835
	#
	my $exitcode;
836 837 838
	my $wrapper = "assign_wrapper -u";
	$wrapper .= " -f"
	    if ($type == RETRY);
839
	
840
	if (system("$wrapper $pid $eid")) {
841 842
	    $exitcode = $? >> 8;

843
	    tberror "Failed ($exitcode) to map to reality.";
844

845 846
	    # Wrapper sets this bit when recovery is possible.
	    if ($exitcode & 64) {
847
		# We can recover. 
848
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
849
		return 7;
850 851
	    }
	    else {
852
		# No recovery, no retry.
853
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
854 855 856 857
		return 1;
	    }
	}
	TBDebugTimeStamp("assign_wrapper finished");
858

Chad Barb's avatar
Chad Barb committed
859 860
	print "Mapped to physical reality!\n";
    }
861

862
    # Check cancel flag before continuing. No retry, 
863
    TBGetCancelFlag($pid, $eid, \$canceled);
864 865 866 867
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
868

869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903
    #
    # Look for any nodes in RES_TEARDOWN. These need to be released,
    # and if a virtnode, they need to be torn down. We cannot wait for
    # the virtnodes to go down with the physnode they are hosted on,
    # so teardown and release the virtnodes first, and then do the
    # physnodes.
    #
    # Errors are fatal; no recovery or retry.
    #
    if ($type == UPDATE) {
	my $allocstate = TBDB_ALLOCSTATE_RES_TEARDOWN();
	
	$db_result =
	    DBQueryFatal("select r.node_id,nt.isvirtnode,nt.isremotenode ".
			 "  from reserved as r ".
			 "left join nodes as n on n.node_id=r.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
			 "where r.pid='$pid' and r.eid='$eid' and ".
			 "      n.allocstate='$allocstate'");

	if ($db_result->numrows) {
	    my @virtnodes = ();
	    my @physnodes = ();
	    
	    print "Tearing down and releasing unused nodes\n";

	    # First teardown/release virtnodes. 
	    while (my ($node,$isvirt,$isrem) = $db_result->fetchrow_array()) {
		if ($isvirt) {
		    push(@virtnodes, $node);
		}
		elsif (!$isrem) {
		    push(@physnodes, $node);
		}
	    }
904 905 906
	    # See below.
	    @deleted_pnodes = @physnodes;
	    
907 908 909
	    if (@virtnodes) {
		TBDebugTimeStamp("vnode_setup started");
		
910
		if (system("vnode_setup -k $pid $eid @virtnodes")) {
911
		    tberror "Failed to tear down unused virtnodes!\n";
912 913 914 915 916
		    return 1;
		}
		TBDebugTimeStamp("vnode_setup finished");
		
		if (system("nfree $pid $eid @virtnodes")) {
917
		    tberror "Failed to nfree unused virtnodes!\n";
918 919 920 921
		    return 1;
		}
	    }
	    if (@physnodes) {
922 923 924
		if ($elabinelab) {
		    print "Removing nodes from inner elab.\n";
		    if (system("elabinelab -r $pid $eid @physnodes")) {
925
			tberror "Failed to remove inner nodes!";
926 927
			return 1;
		    }
928 929 930 931 932 933
		} elsif ($plabinelab) {
		    print "Removing nodes from inner plab.\n";
		    if (system("plabinelab -r $pid $eid @physnodes")) {
			tberror "Failed to remove inner nodes!";
			return 1;
		    }
934 935 936
		}

		#
937 938
		# If the experiment is firewalled, cleanup the nodes
		# we are releasing.
939
		# 
940
		if ($firewalled && undoFWNodes($pid, $eid, @deleted_pnodes)) {
941 942 943
		    return 1;
		}
		
944
		if (system("nfree $pid $eid @physnodes")) {
945
		    tberror "Failed to nfree unused physnodes!\n";
946 947 948 949 950 951
		    return 1;
		}
	    }
	}
    }

952 953 954 955 956 957
    # Exit here if we are testing.
    if ($TESTMODE) {
	print "Testing run - Stopping here.\n";
	return 0;
    }

958 959 960 961 962 963 964 965 966 967 968
    #
    # Handle tarballs - we might need to fetch some from URLs if the user
    # asked for that.
    #
    print "Fetching tarballs and RPMs (if any) ...\n";
    TBDebugTimeStamp("tarfiles_setup started");

    if (system("tarfiles_setup $pid $eid")) {
	#
	# No recovery for now - what would we do?
	#
969 970 971
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['tarfiles_setup_failed']},
		"Failed to set up tarballs.");
972 973 974 975
	return 1;
    }
    TBDebugTimeStamp("tarfiles_setup finished");

976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003
    #
    # If there are any Plab dslice nodes in the experiment, create the
    # dslice now
    #
    if ($type > UPDATE_RECOVER) {
	# Are there any Plab nodes?
	$db_result =
	    DBQueryFatal("select n.node_id from nodes as n ".
			 "left join node_types as nt on n.type = nt.type ".
			 "left join reserved as r on r.node_id = n.node_id ".
			 "where r.pid='$pid' and r.eid='$eid' ".
			 " and nt.isplabdslice = 1");

	if ($db_result->numrows) {
	    # Does slice already exist?
	    $db_result =
		DBQueryFatal("select slicename from plab_slices ".
			     "where pid='$pid' and eid='$eid'");

	    if (! $db_result->numrows) {
		my @plabnodes = ();
		
		while (my ($node) = $db_result->fetchrow_array()) {
		    push(@plabnodes, $node);
		}
		
		print "Creating Planetlab slice.\n";
		TBDebugTimeStamp("plabslice create started");
1004
		if (system("plabslice create $pid $eid")) {
1005 1006 1007
		    tberror({type => 'secondary', severity => SEV_SECONDARY,
			     error => ['plabslice_create_failed']},
			    "Failed to create Plab dslice");
1008 1009 1010 1011 1012 1013 1014
		    return 3;
		}
		TBDebugTimeStamp("plabslice alloc finished");
	    }
	}
    }

1015
    # Check cancel flag before continuing. No retry, 
1016
    TBGetCancelFlag($pid, $eid, \$canceled);
1017 1018 1019 1020
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
1021

1022 1023 1024 1025 1026 1027 1028 1029 1030
    #
    # These things need to get started before the nodes come up, so we'll
    # do them before the os_setup. Everything else can done in parallel with
    # os_setup. (Actually, these probably can too, since they should finish
    # long before the nodes reboot, but better safe than sorry)
    #
    print "Setting up mountpoints.\n";
    TBDebugTimeStamp("mountpoints started");
    if (system("exports_setup")) {
1031 1032 1033
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['mountpoints_setup_failed']},
		"Failed to setup mountpoints.");
1034 1035 1036
	return 1;
    }
    TBDebugTimeStamp("mountpoints finished");
Chad Barb's avatar
Chad Barb committed
1037

1038 1039 1040
    TBDebugTimeStamp("named started");
    print "Setting up named maps.\n";
    if (system("named_setup")) {
1041
	tbwarn "Failed to add node names to named map.";
1042 1043
	#
	# This is a non-fatal error.
Chad Barb's avatar
Chad Barb committed
1044
	#
1045 1046
    }
    TBDebugTimeStamp("named finished");
Chad Barb's avatar
Chad Barb committed
1047

Timothy Stack's avatar
Timothy Stack committed
1048 1049 1050 1051
    if ($NFSTRACESUPPORT) {
	print "Cleaning NFS traces.\n";
	TBDebugTimeStamp("nfstrace gc started");
	if (system("nfstrace gc $pid $eid")) {
1052 1053 1054
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['nfstrace_setup_failed']},
		    "Failed to setup nfstrace.");
Timothy Stack's avatar
Timothy Stack committed
1055 1056 1057
	    return 1;
	}
	TBDebugTimeStamp("nfstrace gc finished");
1058 1059
    }

1060
    # Check cancel flag before continuing. No retry, 
1061
    TBGetCancelFlag($pid, $eid, \$canceled);
1062 1063 1064 1065
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
Chad Barb's avatar
Chad Barb committed
1066

1067 1068 1069 1070
    #
    # Setup any control-net firewall.
    # This must be done before reloading and rebooting nodes.
    #
1071 1072
    if ($firewalled && ($type == REAL || $type == UPDATE) &&
	doFW($pid, $eid, (($type == UPDATE) ? FWADDNODES : FWSETUP), undef)) {
1073 1074 1075
	return 1;
    }

1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
    #
    # PlabinElab setup.  This is currently just tweaking out the dhcpd.conf
    # file and that must be done before os_setup (i.e., before nodes are
    # rebooted).
    #
    if ($plabinelab && !$TESTMODE && $type > UPDATE_RECOVER) {
	# for UPDATE and RETRY we pass in the -u to pick up new nodes
	my $optarg = ($type == REAL ? "" : "-u");
	
	print "Setting up plabinelab.\n";
	TBDebugTimeStamp("plabinelab setup started");
	if (system("plabinelab $optarg $pid $eid")) {
1088 1089 1090
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['plabinelab_setup_failed']},
		    "Failed to setup plabinelab!");
1091 1092 1093 1094 1095
	    return 1;
	}
	TBDebugTimeStamp("plabinelab setup finished");
    }

Chad Barb's avatar
Chad Barb committed
1096 1097
    #
    # If user specified -reboot to update,
1098
    # and we are successfully performing the update,
1099
    # then mark all nodes in experiment so os_setup will reboot them.
1100 1101
    # We must reboot nodes on a RETRY as well, since assign has been rerun
    # and may have remapped interfaces on the nodes.
Chad Barb's avatar
Chad Barb committed
1102
    #
1103 1104
    if ($type == RETRY ||
	($type == UPDATE && ($updateReboot || $updateReconfig))) {
Mike Hibler's avatar
Mike Hibler committed
1105
	my $needreboot = ($type == RETRY || $updateReboot) ? 1 : 0;
1106

Mike Hibler's avatar
Mike Hibler committed
1107 1108
	print STDERR "Marking nodes for ",
		     $needreboot ? "reboot" : "reconfig", ".\n";
Chad Barb's avatar
Chad Barb committed
1109
	$db_result =
1110 1111 1112
	    DBQueryFatal("select r.node_id,n.allocstate from reserved as r ".
			 "left join nodes as n on n.node_id=r.node_id ".
			 "where r.pid='$pid' and r.eid='$eid'");
Chad Barb's avatar
Chad Barb committed
1113

1114 1115 1116 1117 1118 1119 1120 1121
	while (my ($node,$allocstate) = $db_result->fetchrow_array) {
	    #
	    # If the node is INIT_CLEAN, leave it alone. It will still get
	    # rebooted, but will not falsely be tagged as dirty. This is
	    # important for vnodes too, where INIT_CLEAN indicated the vnode
	    # does not even exist yet (plab nodes).
	    #
	    if ($allocstate ne TBDB_ALLOCSTATE_RES_INIT_CLEAN()) {
1122
		TBSetNodeAllocState($node,
1123
				    ($needreboot ?
1124 1125
				     TBDB_ALLOCSTATE_RES_INIT_DIRTY() :
				     TBDB_ALLOCSTATE_RES_RECONFIG()));
1126
	    }
Chad Barb's avatar
Chad Barb committed
1127
	}
1128 1129 1130
	# Do this only when nodes are to be rebooted.
	$experiment->ClearPortRegistration()
	    if ($type == UPDATE);
Chad Barb's avatar
Chad Barb committed
1131 1132
    }

1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
    #
    # Lets run gentopofile again, so we get ltmap right. This will come out
    # later, most likely.
    #
    TBDebugTimeStamp("gentopofile started");
    print "Generating ltmap (again) ...\n";

    if (system("gentopofile $pid $eid")) {
        tberror("gentopofile failed!");
	return 1;
    }
    TBDebugTimeStamp("gentopofile finished");

1146
    # XXX fer now hack
1147
    if (0 && !$firewalled && !$elabinelab && !$plabinelab &&
1148
	($pid eq "testbed" || $pid eq "tbres")) {
1149 1150 1151 1152 1153
	DBQueryWarn("update experiments set ".
		    "    savedisk=1 where pid='$pid' and eid='$eid'");
    }


1154 1155 1156 1157 1158
    #
    # Since it'll take a while for the nodes to reboot, we'll start now, and
    # wait for the os_setup to finish, down below
    #
    print "Resetting OS and rebooting.\n";
1159
    TBDebugTimeStamp("launching os_setup");
1160 1161 1162
    if (!($os_setup_pid = fork())) { 
	exec("os_setup $pid $eid") or return 1;
    } elsif ($os_setup_pid == -1) {
1163
	tberror "Fork failed.";
1164 1165
	return 1;
    }
Chad Barb's avatar
Chad Barb committed
1166

1167
    #
1168 1169 1170 1171 1172
    # XXX
    # Don't add any steps between here and the waitpid() call below
    # without verifying that 1) It's OK for nodes to come up before
    # the step has completed and 2) It's OK for the command to run in
    # parallel with os_setup (no DB dependencies, etc.)
1173 1174 1175 1176 1177
    #

    print "Setting up VLANs.\n";
    TBDebugTimeStamp("snmpit started");
    if (system("snmpit -t $pid $eid")) {
1178 1179 1180
	tberror({type => 'summary', severity => SEV_SECONDARY,
		 error => ['vlan_setup_failed']},
		"Failed to set up VLANs.");
1181 1182 1183
	return 1;
    }
    TBDebugTimeStamp("snmpit finished");
Chad Barb's avatar
Chad Barb committed
1184

1185 1186 1187 1188
    #
    # An error now means that the VLANS need to be cleaned up.
    #
    $cleanvlans = 1;
Chad Barb's avatar
Chad Barb committed
1189

1190 1191
    print "Setting up email lists.\n";
    TBDebugTimeStamp("genelists started");
1192
    if (system("genelists -t")) {
1193
	tbwarn "Failed to update email lists.";
1194 1195 1196 1197 1198
	#
	# This is a non-fatal error.
	# 
    }
    TBDebugTimeStamp("genelists finished");
Chad Barb's avatar
Chad Barb committed
1199

1200 1201 1202 1203 1204 1205 1206 1207
    #
    # Don't clear port counters on UPDATE.
    # (XXX should clear new nodes' port counters.)

    if ($type >= RETRY) {
	print "Clearing port counters.\n";
	TBDebugTimeStamp("portstats started");
	if (system("portstats -z -a -q $pid $eid")) {
1208
	    tbwarn "Failed to clear port counters.";
1209 1210 1211 1212 1213
	    #
	    # This is a non-fatal error.
	    # 
	}
	TBDebugTimeStamp("portstats finished");
1214
    }
Chad Barb's avatar
Chad Barb committed
1215

1216 1217 1218 1219 1220 1221 1222
    #
    # OK, let's see how that os_setup did
    #
    $kid = waitpid($os_setup_pid,0);
    if ($kid == $os_setup_pid) {
	undef $os_setup_pid; # Make sure doswapout() doesn't wait for it.
	if ($CHILD_ERROR) {
1223
	    tberror "Failed to reset OS and reboot nodes.";
1224 1225 1226 1227 1228 1229 1230 1231 1232
	    #
	    # If there is a firewall involved, it could be that the
	    # firewall rules are preventing essential communication,
	    # so don't retry.
	    #
	    # XXX should only do this if the user has specified additional
	    # rules.  But right now, I may screw up too!
	    #
	    if ($firewalled) {
1233 1234 1235 1236
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['os_node_reset_failed', 'firewall']},
			"Not retrying, ".
			"