tbswap.in 48.7 KB
Newer Older
Chad Barb's avatar
 
Chad Barb committed
1 2 3 4
#!/usr/bin/perl -w

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2011 University of Utah and the Flux Group.
Chad Barb's avatar
 
Chad Barb committed
6 7 8 9
# All rights reserved.
#
use English;

Chad Barb's avatar
 
Chad Barb committed
10 11 12
# Returns 0 on success.
# Returns 1 on non-assign_wrapper failure.
# Returns (1 | assign_wrapper's errorcode) on assign_wrapper failure.
13 14
# Returns |0x40 if update caused a swapout. Icky.
# Returns -1 on uncontrolled error (die called).
Chad Barb's avatar
 
Chad Barb committed
15

Chad Barb's avatar
 
Chad Barb committed
16 17 18 19 20
# XXX: handle error cases for update? (backup the db?)
# XXX: Shouldn't do idempotent stuff twice for update.
# XXX: repush/calc routing for update??? (tbprerun)
# XXX: previz for update???              (tbprerun)
# XXX: make snmpit faster for update.
Chad Barb's avatar
Chad Barb committed
21 22 23
#
# XXX: for update, expt is swapped out on os_setup fail.
#      (we only recover if assign fails)
Chad Barb's avatar
 
Chad Barb committed
24 25 26

sub usage()
{
27 28 29
    print STDERR
	"Usage: $0 {in|out|modify [-reboot] [-eventsys_restart]} ".
	"[-noswapout] [-genimode] pid eid\n";
Chad Barb's avatar
 
Chad Barb committed
30 31 32 33 34 35 36
    exit(-1);
}

#
# Configure variables
#
my $TBROOT         = "@prefix@";
Mike Hibler's avatar
Mike Hibler committed
37
my $TBOPS          = "@TBOPSEMAIL@";
Timothy Stack's avatar
Timothy Stack committed
38
my $TBLOGS         = "@TBLOGSEMAIL@";
39
my $MAINSITE	   = @TBMAINSITE@;
40
my $ELABINELAB     = @ELABINELAB@;
41
my $THISHOMEBASE   = "@THISHOMEBASE@";
Chad Barb's avatar
 
Chad Barb committed
42 43
my $TESTMODE       = @TESTMODE@;
my $DISABLE_EVENTS = "@DISABLE_EVENT_SCHED@";
44
my $piper          = "$TBROOT/sbin/locpiper";
45 46
my $mapper         = "$TBROOT/bin/mapper";
my $wrapper        = "$TBROOT/libexec/assign_wrapper";
Timothy Stack's avatar
Timothy Stack committed
47
my $NFSTRACESUPPORT= @NFSTRACESUPPORT@;
48
my $PGENISUPPORT   = @PROTOGENI_SUPPORT@;
Chad Barb's avatar
 
Chad Barb committed
49 50 51 52 53 54 55 56 57 58 59

# Untaint the path
$ENV{'PATH'} = "/usr/bin:$TBROOT/libexec:$TBROOT/libexec/ns2ir" . 
    ":$TBROOT/sbin:$TBROOT/bin";

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
60
use libadminctrl;
Mike Hibler's avatar
Mike Hibler committed
61
use libadminmfs;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
62
use libtblog;
63
use EmulabFeatures;
64
use Experiment;
65
use User;
66
use Lan;
67
use Firewall;
Chad Barb's avatar
 
Chad Barb committed
68 69 70 71

#
# Actual swap-in and swap-out functions, defined below.
#
Chad Barb's avatar
 
Chad Barb committed
72 73 74
sub doSwapout($);
sub doSwapin($);

75
# XXX fixme: should not be hardwired!
76
my $cnetstack = "-S Control";
77 78 79
my $cnetvlanname = "Control";


80 81 82 83
sub REAL()    { return 5; }
sub CLEANUP() { return 4; }
sub RETRY()   { return 3; }
sub MODIFY()  { return 2; }
Chad Barb's avatar
 
Chad Barb committed
84
sub UPDATE()  { return 1; }
85
sub MODIFY_RECOVER()  { return 0; }
Chad Barb's avatar
 
Chad Barb committed
86

87 88 89
#
# Grab global enable of linkdelays.
#
Kirk Webb's avatar
 
Kirk Webb committed
90
my $enablelinkdelays = TBGetSiteVar("general/linux_endnodeshaping");
91

Chad Barb's avatar
 
Chad Barb committed
92 93 94 95 96 97
#
# Turn off line buffering on output
#

$| = 1;

98 99
my $updateReboot   = 0;
my $updateReconfig = 1;
100 101
my $update_Eventsys_restart = 0;
my $elabinelab     = 0;
102
my $plabinelab     = 0;
103 104 105 106 107
my $force          = 0;
my $noswapout      = 0;
my $genimode       = 0;
my $errors         = 0;
my $updatehosed    = 0;
108
my $canceled;
Chad Barb's avatar
 
Chad Barb committed
109
my $os_setup_pid;
110
my $nextState;
Chad Barb's avatar
 
Chad Barb committed
111 112

#
Chad Barb's avatar
 
Chad Barb committed
113
# First argument is either "in", "out", or "update";
Chad Barb's avatar
 
Chad Barb committed
114 115 116 117 118
# this value goes into $swapop.
#

my $swapop = shift;	

Chad Barb's avatar
 
Chad Barb committed
119 120 121
if (!$swapop || 
    (($swapop ne "in") && 
     ($swapop ne "out") &&
122
     ($swapop ne "modify") &&
Chad Barb's avatar
 
Chad Barb committed
123
     ($swapop ne "update"))) {
Chad Barb's avatar
 
Chad Barb committed
124 125 126 127 128 129 130 131 132 133 134
    usage();
}

#
# Get other arguments.
#

while ($#ARGV > 1) {
    $arg = shift;
    if ($arg eq "-force") {
	$force = 1;
Chad Barb's avatar
Chad Barb committed
135 136
    } elsif ($arg eq "-reboot") {
	$updateReboot = 1;
137
	$updateReconfig = 0;
138 139 140
    } elsif ($arg eq "-noreconfig") {
	$updateReboot   = 0;
	$updateReconfig = 0;
141
    } elsif ($arg eq "-eventsys_restart" && $swapop eq "modify") {
142
	$update_Eventsys_restart = 1;
143
    } elsif ($arg eq "-noswapout") {
144
	$noswapout = 1;
Chad Barb's avatar
 
Chad Barb committed
145 146 147 148 149 150 151 152 153
    } else {
	usage();
    }
}
if ($#ARGV < 1) {
    usage();
}
my ($pid,$eid) = @ARGV;

154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w.]+)$/) {
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
if ($eid =~ /^([-\@\w.]+)$/) {
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}

Kevin Atkinson's avatar
 
Kevin Atkinson committed
170 171 172 173 174 175
#
# Set Error reporting info
# 
tblog_set_info($pid,$eid,$UID);

#
Mike Hibler's avatar
Mike Hibler committed
176
# Turn on timestamps
Kevin Atkinson's avatar
 
Kevin Atkinson committed
177
#
Chad Barb's avatar
 
Chad Barb committed
178 179
TBDebugTimeStampsOn();

180 181 182 183 184 185
#
# Verify user and get his DB uid and other info for later.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
186
}
187 188 189
my $user_uid   = $this_user->uid();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
190

191 192 193 194 195 196
# Slowly convert to using Experiment module.
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    tbdie("Could not lookup experiment object!")
}

Chad Barb's avatar
 
Chad Barb committed
197 198 199
#
# Print starting message.
#
200 201 202
my $exptidx = $experiment->idx();
my $state   = $experiment->state();
my $group   = $experiment->GetGroup();
Chad Barb's avatar
 
Chad Barb committed
203

204 205
print "Beginning swap-$swapop for $pid/$eid ($exptidx). " .
    TBTimeStampWithDate() . "\n";
Chad Barb's avatar
 
Chad Barb committed
206 207
TBDebugTimeStamp("tbswap $swapop started");

208 209 210
# Sanity check the current state. 
if (!$force) {
    if ($swapop eq "in") {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
211
	tbdie("Experiment should be ACTIVATING. Currently $state.")
212
	    if ($state ne EXPTSTATE_ACTIVATING);
Chad Barb's avatar
 
Chad Barb committed
213
    }
214
    elsif ($swapop eq "out") {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
215
	tbdie("Experiment should be SWAPPING. Currently $state.")
216
	    if ($state ne EXPTSTATE_SWAPPING);
Chad Barb's avatar
 
Chad Barb committed
217
    }
218
    elsif ($swapop eq "modify" || $swapop eq "update") {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
219
	tbdie("Experiment should be MODIFY_RESWAP. Currently $state.")
220
	    if ($state ne EXPTSTATE_MODIFY_RESWAP);
Chad Barb's avatar
 
Chad Barb committed
221 222
    }
}
223 224
# Get elabinelab status. See below.
if (! TBExptIsElabInElab($pid, $eid, \$elabinelab)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
225
    tbdie("Could not get elabinelab status for experiment $pid/$eid");
226
}
227 228 229 230 231 232 233 234 235 236 237
# and plabinelab status.
if (! TBExptIsPlabInElab($pid, $eid, \$plabinelab)) {
    tbdie("Could not get plabinelab status for experiment $pid/$eid");
}
if ($elabinelab && $plabinelab) {
    tberror "Cannot get my head around Plab in Elab in Elab!\n";
    print "Failingly finished swap-$swapop for $pid/$eid. " .
	TBTimeStamp() . "\n";
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
    exit(1);
}
Chad Barb's avatar
 
Chad Barb committed
238

239 240 241 242 243 244 245 246
#
# See if we use the new version of SyncVlans.
#
my $syncvlans =
    (EmulabFeatures->FeatureEnabled("SyncVlans",
				    $this_user, $group, $experiment)
     || $pid eq "emulab-ops");

247 248 249 250 251
#
# See if the experiment is firewalled
#
my $firewalled = TBExptFirewall($pid, $eid);

Chad Barb's avatar
 
Chad Barb committed
252 253 254
#
# Do actual swapping
#
255 256 257 258
if ($swapop eq "out") {
    #
    # Swap out
    #
Kevin Atkinson's avatar
 
Kevin Atkinson committed
259
    tblog_set_attempt(1);
260
    $errors = doSwapout(REAL);
Chad Barb's avatar
 
Chad Barb committed
261
}
262
elsif ($swapop eq "update" || $swapop eq "modify") {
263 264 265 266 267 268
    #
    # Before swapout, do cursory admission control to see if the
    # modified experiment will be swappable. assign_wrapper does a
    # more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
269
    if (! TBAdmissionControlCheck(undef, $experiment, undef)) {
270 271 272
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
273 274 275 276 277 278
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
    
279 280
    #
    # Update.
Kevin Atkinson's avatar
 
Kevin Atkinson committed
281 282 283
    #
    tblog_set_attempt(1);

284 285 286 287
    #
    # Phase One -- swap experiment partially out.
    #
    print STDERR "Backing up physical state...\n";
288
    $experiment->BackupPhysicalState();
Chad Barb's avatar
 
Chad Barb committed
289

290 291 292 293 294
    #
    # Actually, in update mode this is not done cause we are anticipating
    # adding nodes only.
    #
    $errors = ($swapop eq "modify" ? doSwapout(MODIFY) : 0);
Chad Barb's avatar
Chad Barb committed
295

296
    if ($errors) {
Chad Barb's avatar
 
Chad Barb committed
297
	#
298
	# Clean up the mess, leaving the experiment in the SWAPPED state,
299
	#
300
	if ($noswapout || $elabinelab) {
301 302 303 304 305
	    print STDERR "Leaving experiment swapped in as directed.\n";
	}
	else {
	    print STDERR "Cleaning up after errors.\n";
	    doSwapout(CLEANUP);
306
	    $updatehosed = 1;
307
	}
Chad Barb's avatar
 
Chad Barb committed
308
    }
309
    else {
Chad Barb's avatar
 
Chad Barb committed
310
	#
311
	# Phase Two -- swap experiment back in.
Chad Barb's avatar
 
Chad Barb committed
312
	#
313
	$errors = doSwapin(MODIFY);
Chad Barb's avatar
Chad Barb committed
314

315
	if ($errors) {
Chad Barb's avatar
Chad Barb committed
316 317 318
	    #
	    # There were errors; see if we can recover.
	    #
319
	    my $CanRecover = 1;
Chad Barb's avatar
Chad Barb committed
320 321 322

	    if ($errors != 7) {
		print STDERR "Update failure occurred _after_ assign phase; ";
323
		$CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
324 325
	    }

326 327 328
	    if ($CanRecover) {
		print STDERR "Recovering virtual and physical state.\n";

329 330 331
		# It is safe to remove the phystate since we know it was
		# backed up above, and cause we do not know if assign_wrapper
		# made it to that point before it failed. 
332 333 334 335 336
		if ($experiment->RemoveVirtualState() ||
		    $experiment->RestoreVirtualState() ||
		    $experiment->ReserveSharedBandwidth(1, 1) ||
		    $experiment->RemovePhysicalState() ||
		    $experiment->RestorePhysicalState()) {
337 338
		    print STDERR "Could not restore backed-up state; ";
		    $CanRecover = 0;
Chad Barb's avatar
Chad Barb committed
339
		}
340 341 342
		else {
		    print STDERR "Doing a recovery swap-in of old state.\n";

343
		    if (doSwapin(MODIFY_RECOVER)) {
344 345 346
			print STDERR "Could not swap in old physical state; ";
			$CanRecover = 0;
		    }
Chad Barb's avatar
Chad Barb committed
347 348
		}
	    }
349 350 351 352 353 354

	    #
	    # Some part of the recovery failed; must swap it out. swapexp
	    # (caller) will then have to do more clean up, hence the special
	    # exit status indicated by $updatehosed.
	    # 
355
	    if (!$CanRecover) {
356
		if ($noswapout || $elabinelab) {
357 358 359 360 361 362 363 364 365
		    print STDERR
			"No Recovery, but leaving experiment swapped in.\n";
		}
		else {
		    print STDERR
			"Recovery aborted! Swapping experiment out.\n";
		    doSwapout(CLEANUP);
		    $updatehosed = 1;
		}
366 367 368
	    }
	    else {
		print STDERR "Update recovery successful.\n";
Chad Barb's avatar
Chad Barb committed
369
	    }
Chad Barb's avatar
 
Chad Barb committed
370
	}
Chad Barb's avatar
Chad Barb committed
371 372
    }
}
373 374 375 376 377
elsif ($swapop eq "in") {
    #
    # Swap in
    #
    my $retries = 2;
378 379 380 381 382 383

    #
    # Before real swapin, do cursory admission control. assign_wrapper does
    # a more stringent check using assign.
    #
    print STDERR "Checking with Admission Control ...\n";
384
    if (! TBAdmissionControlCheck(undef, $experiment, undef)) {
385 386 387
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['admission_control_failure']},
		"Admission control failure!\n");
388 389 390 391 392
	print "Failingly finished swap-$swapop for $pid/$eid. " .
	    TBTimeStamp() . "\n";
	TBDebugTimeStamp("tbswap $swapop finished (failed)");
	exit(1);
    }
Kevin Atkinson's avatar
 
Kevin Atkinson committed
393 394

    tblog_set_attempt(1);
395
    $errors = doSwapin(REAL);
Chad Barb's avatar
Chad Barb committed
396

397 398 399 400 401 402 403 404 405 406
    #
    # Attempt a retry if: 
    #   a) there were errors, 
    #   b) doswapin() indicated (via return code 3) a retry is appropriate,
    #   c) we haven't tried too many times already.
    #   d) The cancelflag has not been set.
    #   e) $TESTMODE == 0.
    #
    while ($errors == 3 && $retries && !$canceled && !$TESTMODE) {
	$retries--;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
407
	tblog_inc_attempt();
Chad Barb's avatar
 
Chad Barb committed
408

409 410
	print STDERR "Cleaning up after errors; will try again.\n";
	doSwapout(RETRY);
Chad Barb's avatar
 
Chad Barb committed
411

412 413
	print STDERR "Trying again...\n";
	$errors = doSwapin(RETRY);
Chad Barb's avatar
 
Chad Barb committed
414
    }
415 416
    if ($errors || $canceled) {
	print STDERR "Cleaning up after " .
417
	    ($canceled ? "cancelation" : "errors") . ".\n";
418
	doSwapout(CLEANUP);
Chad Barb's avatar
 
Chad Barb committed
419 420 421
    }
}

Kevin Atkinson's avatar
 
Kevin Atkinson committed
422 423 424 425
tblog_set_attempt(0);

TBGetCancelFlag($pid, $eid, \$canceled);
if ($canceled) {
426 427
    tberror ({type=>'summary', cause=>'canceled', 
	      severity=>SEV_IMMEDIATE, error=>['cancel_flag']},
Kevin Atkinson's avatar
 
Kevin Atkinson committed
428 429 430
	     "Experiment swap-$swapop canceled by user.");
}

Chad Barb's avatar
 
Chad Barb committed
431 432 433 434
#
# Write appropriate message and exit.
#
if ($errors) {
435
    print "Failingly finished swap-$swapop for $pid/$eid. ".TBTimeStamp()."\n";
Chad Barb's avatar
 
Chad Barb committed
436
    TBDebugTimeStamp("tbswap $swapop finished (failed)");
Chad Barb's avatar
Chad Barb committed
437

438 439
    # Pass out magic value to indicate that update failed!
    exit(1 | ($updatehosed ? 0x40 : 0));
Chad Barb's avatar
 
Chad Barb committed
440
}
441
print "Successfully finished swap-$swapop for $pid/$eid. " .TBTimeStamp()."\n";
442 443
TBDebugTimeStamp("tbswap $swapop finished (succeeded)");
exit(0);
Chad Barb's avatar
 
Chad Barb committed
444 445 446 447

#################################

##
Chad Barb's avatar
Chad Barb committed
448
#
Chad Barb's avatar
 
Chad Barb committed
449 450
# doSwapout - Swaps experiment out.
#
Chad Barb's avatar
Chad Barb committed
451
#             If in REAL or CLEANUP,
Chad Barb's avatar
 
Chad Barb committed
452 453 454
#             this function will free all nodes for the 
#             experiment.
#
Chad Barb's avatar
Chad Barb committed
455
#             If in RETRY or UDPATE,
Chad Barb's avatar
 
Chad Barb committed
456 457 458 459 460 461
#             only nodes not in RES_READY will be freed.
#
#             Returns 0 on success, >0 on failure.
#
##

Chad Barb's avatar
 
Chad Barb committed
462
sub doSwapout($) {
463
    my $type = shift;
Chad Barb's avatar
 
Chad Barb committed
464 465
    my $swapout_errors = 0;

Kevin Atkinson's avatar
 
Kevin Atkinson committed
466 467
    tblog_set_cleanup(1) if $type == CLEANUP;

Chad Barb's avatar
 
Chad Barb committed
468 469 470 471 472 473 474 475 476
    #
    # wait for os_setup;
    # this only applies if called after a failed doswapin.
    #
    if ($os_setup_pid) {
	print "Waiting for os_setup to finish\n";
	waitpid($os_setup_pid, 0);
	undef $os_setup_pid;
    }
Chad Barb's avatar
Chad Barb committed
477

Timothy Stack's avatar
 
Timothy Stack committed
478
    if (0 && $NFSTRACESUPPORT && $type == REAL) {
Timothy Stack's avatar
Timothy Stack committed
479 480
	print "Getting files accessed via NFS.\n";
	TBDebugTimeStamp("nfstrace started");
481
	system("nfstrace transfer $pid $eid");
Timothy Stack's avatar
Timothy Stack committed
482
	TBDebugTimeStamp("nfstrace finished");
483
    }
484

Chad Barb's avatar
 
Chad Barb committed
485
    if (! $TESTMODE) { 
486 487
	if (! ($DISABLE_EVENTS || $elabinelab)) {
	    if ($type >= RETRY ||
488
		($update_Eventsys_restart && $type == MODIFY) ) {
Chad Barb's avatar
 
Chad Barb committed
489
		print "Stopping the event system\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
490
		if (system("eventsys_control stop $pid,$eid")) {
491 492 493
		    tberror({type => 'secondary', severity => SEV_SECONDARY,
			     error => ['eventsys_stop_failed']},
			    "Failed to stop the event system.");
Chad Barb's avatar
 
Chad Barb committed
494 495
		    $swapout_errors = 1;
		}
496 497 498 499

		#
		# Stop the location piper.
		#
500
		if (0 && -x $piper) {
501 502
		    print "Stopping the location piper\n";
		    if (system("$piper -k $pid $eid")) {
503 504 505
			tberror({type => 'secondary', severity => SEV_SECONDARY,
				 error => ['piper_stop_failed']},
				"Failed to stop location piper.");
506 507 508
			$swapout_errors = 1;
		    }
		}
Chad Barb's avatar
 
Chad Barb committed
509 510
	    }
	}
511 512 513 514 515 516
	
	#
	# Do teardown of inner elab. We must do this before we teardown the
	# vlans since the inner control network is a vlan, and we want that
	# active so inner boss can reboot the inner nodes (avoid power cycle).
	#
517
	if ($elabinelab && $type >= CLEANUP) {
518 519
	    print "Tearing down elabinelab. This could take a while.\n";
	    if (system("elabinelab -k $pid $eid")) {
520 521 522
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['elabinelab_tear_down_failed']},
			"Failed to teardown elabinelab!");
523 524 525 526
		$swapout_errors = 1;
	    }
	}

527 528 529 530 531 532 533 534 535 536 537
	#
	# Grab our per-experiment switch stack name.
	#
	my @expswitches = $experiment->SwitchList(1);
	my ($perexpstack,$leader,@curswitches) = \
	    GetPerExperimentSwitchStack($experiment);
	my $stackarg = "";
	if (defined($perexpstack) && (@expswitches || @curswitches)) {
	    $stackarg = "-S $perexpstack --skip-supplied";
	}

Chad Barb's avatar
 
Chad Barb committed
538
	#
Chad Barb's avatar
Chad Barb committed
539
	# Clean up any VLANs in experiment.
Chad Barb's avatar
 
Chad Barb committed
540
	#
541
	# When modifying an elabinelab experiment, leave the vlans intact
542
	# so that the inner networks are not suddenly disconnected.
543
	#
544 545
	if ($type != MODIFY ||
	    ($type == MODIFY && $ELABINELAB && Lan->GotTrunks($experiment))) {
546 547
	    # Kill tag reservations on actual swapout.
	    my $tagopt = ($type != MODIFY ? "-C" : "");
548 549
	    TBDebugTimeStamp("snmpit started");
	    print STDERR "Removing VLANs.\n";
550
	    if (system("snmpit $stackarg --redirect-err $tagopt -r $pid $eid")) {
551 552 553
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vlan_reset_failed']},
			"Failed to reset VLANs");
554 555 556
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("snmpit finished");
Chad Barb's avatar
 
Chad Barb committed
557
	}
558 559 560 561 562 563 564 565 566 567 568 569 570 571
	#
	# Must check for stale vlans that we kept around in the above clause
	# since they will not be in the lans table anymore.
	#
	if ($type == CLEANUP) {
	    my @stale;
	    if (VLan->StaleVlanList($experiment, \@stale) != 0) {
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vlan_reset_failed']},
			"Failed to get stale VLANs");
		$swapout_errors = 1;
	    }
	    if (@stale) {
		print "Removing stale vlans @stale\n";
572
		system("snmpit $stackarg --redirect-err -f -C ".
573
                    join(" ", map("-o $_", @stale)));
574 575 576 577 578 579 580 581
		if ($?) {
		    tberror({type => 'summary', severity => SEV_SECONDARY,
			     error => ['vlan_reset_failed']},
			    "Failed to remove stale vlans");
		    $swapout_errors = 1;
		}
	    }
	}
582 583 584 585 586 587 588 589 590

	#
	# Remove per-experiment switch stacks -- don't bother to check if there
	# are any; it's cheap.
	#
	if ($type >= RETRY && defined($perexpstack)) {
	    print STDERR "Removing per-experiment switch stack.\n";
	    DeletePerExperimentSwitchStack($experiment);
	}
591
    }
592
	
593
    if ($type >= MODIFY) {
594 595
	# XXX Do this before releasing the nodes. Needs more thought.
	$experiment->DeleteInternalProgramAgents();
596 597
    }

598 599 600 601 602
    if ($type >= RETRY) {
	print "Removing dynamic blobs.\n";
	$experiment->RemoveBlobs();
    }

603
    if ($type >= CLEANUP) {
604 605 606 607 608 609 610 611
	#
	# Undo plab in elab specialness.
	# No need to worry about VLANs here, as all the special abilities
	# involve the control network.
	#
	if (! $TESTMODE && $plabinelab) {
	    print "Tearing down plabinelab.\n";
	    if (system("plabinelab -k $pid $eid")) {
612 613 614
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['plabinelab_tear_down_failed']},
			"Failed to teardown plabinelab!");
615 616 617 618
		$swapout_errors = 1;
	    }
	}

619 620
	#
	# We're not attempting a retry;
Chad Barb's avatar
 
Chad Barb committed
621
	#
622
	# Stop all of the vnodes.
Chad Barb's avatar
 
Chad Barb committed
623
	#
624
	if (! $TESTMODE) { 	
Chad Barb's avatar
 
Chad Barb committed
625 626 627
	    print "Tearing down virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid")) {
628 629 630
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
Chad Barb's avatar
 
Chad Barb committed
631 632 633
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup finished");
Chad Barb's avatar
 
Chad Barb committed
634 635
	}

Mike Hibler's avatar
Mike Hibler committed
636 637 638 639
	#
	# Nodes behind a firewall are treated special.
	# See undoFWNodes for details.
	#
640
	if ($firewalled && undoFWNodes($experiment)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
641
	    tblog_set_cleanup(0);
Mike Hibler's avatar
Mike Hibler committed
642 643
	    return 1;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
644

Mike Hibler's avatar
Mike Hibler committed
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
	#
	# Perform swapout time admin actions.  Right now there is at most
	# one of these.  It isn't really a general mechanism, just a hook
	# for state saving or data collection during swapout.
	# A couple of important "fer now" notes:
	#
	#	We don't do this for firewalled experiments.  We need a way
	#	to "tag" the saved disk state to ensure it doesn't get
	#	instantiated outside of a firewall.
	#
	#	We only do this on REAL swapouts, and not on CLEANUPs.
	#	There are some types of CLEANUPs where we may want to
	#	do this, in particular an invocation caused by a failed
	#	modify operation, where the admin action is to save the
	#	experiment state.  So we will need to revisit this.
	#
	my %soaction = ();
	if ($type == REAL && !$firewalled) {
	    TBExptGetSwapoutAction($pid, $eid, \%soaction);
	}
665
	if ($soaction{'command'} && doSwapoutAction($pid, $eid, %soaction)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
666
	    tblog_set_cleanup(0);
667
	    return 1;
668 669
	}

670 671 672 673
	#
	# Clear away Geni slivers. Do not proceed if this fails.
	#
	if ($PGENISUPPORT) {
674 675
	    require libGeni;
	    
676 677 678 679 680 681
	    if (libGeni::DeleteAllSlivers($experiment, $this_user)) {
		tberror("Could not delete slivers\n");
		return 1;
	    }
	}

682 683 684 685 686 687 688 689 690 691
	#
	# Clear logical interfaces and wires. This might move to support
	# swapmod.
	#
	print "Removing logical wires.\n";
	if (Interface::Wire->DeleteLogicalWires($experiment)) {
	    tberror("Could not delete logical wires and interfaces\n");
	    return 1;
	}

Chad Barb's avatar
 
Chad Barb committed
692 693
	#
	# remove all nodes from the experiment.
Chad Barb's avatar
 
Chad Barb committed
694
	# (nfree will send them to RES_FREE_DIRTY)
Chad Barb's avatar
 
Chad Barb committed
695 696 697
	#
	print STDERR "Freeing nodes.\n";
	TBDebugTimeStamp("nfree started");
698
	if (system("nfree -a $pid $eid")) {
699 700 701
	    tberror({type => 'secondary', severity => SEV_SECONDARY,
		     error => ['nfree_failed']},
		    "Could not free nodes.");
Chad Barb's avatar
 
Chad Barb committed
702 703 704
	    $swapout_errors = 1;
	}
	TBDebugTimeStamp("nfree finished");
Chad Barb's avatar
 
Chad Barb committed
705 706 707 708 709

	#
	# Since this is an actual swapout, 
	# reset our count of swap out nag emails sent.
	#
710 711
	DBQueryWarn("update experiments set swap_requests='', ".
		    "   sim_reswap_count='0' ".
712
		    "where eid='$eid' and pid='$pid'");
Chad Barb's avatar
 
Chad Barb committed
713 714
    } else {
	#
715
	# $type == RETRY or $type == MODIFY.
Chad Barb's avatar
 
Chad Barb committed
716 717
	# Therefore, don't deallocate nodes which have been successfully
	# incorporated into the experiment (i.e., are RES_READY).
Chad Barb's avatar
 
Chad Barb committed
718
	# (nfree will send deallocated nodes to RES_FREE_DIRTY)
Chad Barb's avatar
 
Chad Barb committed
719
	#
720 721 722 723 724 725 726 727
	my @failedpnodes = ();
	my @failedvnodes = ();
	
	my $db_result =
	    DBQueryFatal("select rv.node_id,n.allocstate,nt.isvirtnode ".
                         "  from reserved as rv ".
			 "left join nodes as n on n.node_id = rv.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
728 729
			 "where rv.pid='$pid' and rv.eid='$eid' and ".
			 "      rv.genisliver_idx is null");
Chad Barb's avatar
 
Chad Barb committed
730

731
	while (my ($node,$allocstate,$isvirt) = $db_result->fetchrow_array) {
Chad Barb's avatar
 
Chad Barb committed
732
	    if ($allocstate ne TBDB_ALLOCSTATE_RES_READY()) {
733 734 735 736 737 738
		if ($isvirt) {
		    push(@failedvnodes, $node);
		}
		else {
		    push(@failedpnodes, $node);
		}
Chad Barb's avatar
 
Chad Barb committed
739 740 741
	    }
	}

742 743 744 745 746 747 748
	#
	# Tear down failed vnodes. Perhaps not needed?
	# 
	if (!$TESTMODE && @failedvnodes > 0) {
	    print "Tearing down failed virtual nodes.\n";
	    TBDebugTimeStamp("vnode_setup -k started");
	    if (system("vnode_setup -d -k $pid $eid @failedvnodes")) {
749 750 751
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['vnode_tear_down_failed']},
			"Failed to tear down vnodes.");
752 753 754 755
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("vnode_setup -k finished");
	}
Chad Barb's avatar
 
Chad Barb committed
756

757 758 759 760 761 762 763 764 765 766 767
	#
	# Undo plabinelab setup
	#
	if (!$TESTMODE && $plabinelab && @failedpnodes > 0) {
	    print "Removing failed nodes from inner plab.\n";
	    if (system("plabinelab -r $pid $eid @failedpnodes")) {
		tberror "Failed to remove inner nodes!";
		$swapout_errors = 1;
	    }
	}

768 769 770 771
	#
	# Release all failed nodes.
	# 
	if (@failedpnodes > 0 || @failedvnodes > 0) {
772 773
	    print STDERR "Freeing failed nodes.\n";
	    
Chad Barb's avatar
 
Chad Barb committed
774
	    TBDebugTimeStamp("nfree started");
Chad Barb's avatar
 
Chad Barb committed
775 776 777 778 779
	    #
	    # Specify -x switch so when a physical node gets freed,
	    # any virtual nodes (owned by this experiment)
	    # sitting on top of it are freed as well.
	    #
780 781
	    if (system("nfree -x $pid $eid " .
		       join(" ", (@failedpnodes, @failedvnodes)))) {
782 783 784
		tberror({type => 'secondary', severity => SEV_SECONDARY,
			 error => ['nfree_failed']},
			"Could not free nodes.");
Chad Barb's avatar
 
Chad Barb committed
785 786 787 788 789 790
		$swapout_errors = 1;
	    }
	    TBDebugTimeStamp("nfree finished");
	}
    }

791 792 793 794 795 796 797 798 799 800 801 802
    if (! $TESTMODE) {
	#
	# If the experiment has no Plab dslice nodes left, but still has
	# a Plab slice, destroy the slice
	#

	# Does the slice exist?
	$db_result =
	    DBQueryFatal("select slicename from plab_slices ".
			 "where pid='$pid' and eid='$eid'");

	if ($db_result->numrows) {
803
	    # Are there any nodes left in the slice?
804
	    $db_result =
805 806 807 808 809 810 811 812 813
		DBQueryFatal("select r.node_id ".
			     " from plab_slices as ps".
			     " left join plab_slice_nodes as psn ".
			     "   on (ps.slicename=psn.slicename ".
			     "       and ps.plc_idx=psn.plc_idx) ".
			     " left join reserved as r ".
			     "   on psn.node_id=r.node_id ".
			     " where ps.pid='$pid' and ps.eid='$eid'".
			     "   and r.node_id is not NULL");
814 815

	    if (!$db_result->numrows) {
816
		print "Tearing down Slices.\n";
817
		TBDebugTimeStamp("plabslice destroy started");
818
		if (system("plabslice destroy $pid $eid")) {
819
		    tberror "Failed to tear down Slices.";
820 821 822 823 824 825 826
		    $swapout_errors = 1;
		}
		TBDebugTimeStamp("plabslice destroy finished");
	    }
	}
    }

Chad Barb's avatar
 
Chad Barb committed
827 828 829 830 831 832 833 834
    if (! $TESTMODE) {
	#
	# All of these errors are non-fatal on swapout. We find out about them
	# via email sent from the individual scripts.
	#

	#
	# Only reset mountpoints if this is an actual swapout, and
Chad Barb's avatar
 
Chad Barb committed
835
	# not a failed swapin(cleanup), update, or retry.
Chad Barb's avatar
 
Chad Barb committed
836
	#
Chad Barb's avatar
 
Chad Barb committed
837
	if ($type == REAL) {
Chad Barb's avatar
 
Chad Barb committed
838 839 840
	    print "Resetting mountpoints.\n";
	    TBDebugTimeStamp("exports started");
	    if (system("exports_setup")) {
841 842 843
		tberror({severity => SEV_WARNING,
			 error => ['mountpoint_reset_failed']},
			"Failed to reset mountpoints.");
Chad Barb's avatar
 
Chad Barb committed
844 845
	    }
	    TBDebugTimeStamp("exports finished");
Chad Barb's avatar
Chad Barb committed
846

847 848 849 850 851 852 853 854 855
	    #
	    # Ditto these two. 
	    #
	    print "Resetting named maps.\n";
	    TBDebugTimeStamp("named started");
	    if (system("named_setup")) {
		tbwarn "Failed to reset named map.";
	    }
	    TBDebugTimeStamp("named finished");
Chad Barb's avatar
Chad Barb committed
856

857 858 859 860 861 862
	    print "Resetting email lists.\n";
	    TBDebugTimeStamp("genelists started");
	    if (system("genelists -t")) {
		tbwarn "Failed to reset email lists.";
	    }
	    TBDebugTimeStamp("genelists finished");
Chad Barb's avatar
 
Chad Barb committed
863 864 865
	}
    }

Chad Barb's avatar
 
Chad Barb committed
866
    #
867
    # Wipe the DB clean except during MODIFY or RETRY. In those
868 869
    #    cases, assign_wrapper will reset the DB after reading
    #    the info.
Chad Barb's avatar
 
Chad Barb committed
870
    #
871 872
    if ( $type >= CLEANUP ) {
	print STDERR "Resetting DB.\n";
873 874 875
	$experiment->RemovePhysicalState();
	# Special. 
	$experiment->ClearPortRegistration();
876
    }
Chad Barb's avatar
 
Chad Barb committed
877

Kevin Atkinson's avatar
 
Kevin Atkinson committed
878
    tblog_set_cleanup(0);
Chad Barb's avatar
 
Chad Barb committed
879 880 881 882
    return $swapout_errors;
}

##
Chad Barb's avatar
Chad Barb committed
883
#
Chad Barb's avatar
 
Chad Barb committed
884 885
# doSwapin - Swaps experiment in.
#
Chad Barb's avatar
Chad Barb committed
886 887 888 889 890
#            Returns:
#              0 - successful swapin
#              1 - failed swapin; cleanup required.
#              3 - failed swapin; cleanup required; can retry.
#              7 - failed swapin; assign failed; no cleanup.
Chad Barb's avatar
 
Chad Barb committed
891 892
##

Chad Barb's avatar
 
Chad Barb committed
893
sub doSwapin($) {
894
    my $type = shift; 
895 896
    # Just the physnodes ...
    my @deleted_pnodes = ();
Chad Barb's avatar
 
Chad Barb committed
897

Chad Barb's avatar
 
Chad Barb committed
898 899 900 901
    #
    # assign_wrapper does all the virtual to physical mapping 
    # and updating the DB state.
    #
902
    
903
    if ($type > MODIFY_RECOVER) {
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
        #
        # Hacky test to allow disabling of linkdelays if the node is going
        # to run Linux. See sitevar above.
        #
	if (! $enablelinkdelays) {
	    $db_result =
		DBQueryFatal("select distinct e.pid,e.eid,vl.vnode,vn.osname ".
			 "  from experiments as e ".
			 "left join virt_lans as vl on vl.pid=e.pid and ".
			 "     vl.eid=e.eid ".
			 "left join virt_nodes as vn on vn.pid=e.pid and ".
			 "     vn.eid=e.eid and vn.vname=vl.vnode ".
			 "left join os_info as o on o.osname=vn.osname and ".
			 "  (o.pid=vl.pid or o.pid='" . TBOPSPID() . "') ".
			 "where (vl.uselinkdelay!=0 or e.uselinkdelays!=0 or ".
			 "       e.forcelinkdelays!=0) and ".
920 921
			 "     (o.os is NULL or o.os='Linux' or ".
			 "      o.os='Fedora') and ".
922 923 924
			 "     e.pid='$pid' and e.eid='$eid'");

	    if ($db_result->numrows) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
925 926
		tberror "Endnodeshaping is disabled on Linux Images!";
		tberror "You must modify your experiment to swap it in.";
927 928 929 930
		return 1;
	    }
	}
	
Chad Barb's avatar
Chad Barb committed
931
	print "Mapping to physical reality ...\n";
932
	TBDebugTimeStamp("mapper wrapper started");
Chad Barb's avatar
Chad Barb committed
933 934

	#
935 936 937 938
	# Pass the -u (update) switch into assign_wrapper, which turns on
	# update mode. When doing a retry, must also fix the current nodes
	# to avoid stuff jumping around when simply trying to replace a node
	# that did not boot.
Chad Barb's avatar
Chad Barb committed
939 940
	#
	my $exitcode;
941
	my $cmd = "$wrapper";
942 943 944
	if (EmulabFeatures->FeatureEnabled("NewMapper",
					   $this_user, $group, $experiment) ||
	    $this_user->stud() || $experiment->virtnode_count()) {
945 946 947
	    $cmd = "$mapper";
	}
	$cmd .= " -f"
948
	    if ($type == RETRY);
949
	
950
	if (system("$cmd -u $pid $eid")) {
951 952
	    $exitcode = $? >> 8;

Kevin Atkinson's avatar
 
Kevin Atkinson committed
953
	    tberror "Failed ($exitcode) to map to reality.";
954

955 956
	    # Wrapper sets this bit when recovery is possible.
	    if ($exitcode & 64) {
957
		# We can recover. 
958
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
959
		return 7;
960 961
	    }
	    else {
962
		# No recovery, no retry.
963
		tbreport(SEV_SECONDARY, 'assign_wrapper_failed', $exitcode);
Chad Barb's avatar
Chad Barb committed
964 965 966
		return 1;
	    }
	}
967
	TBDebugTimeStamp("mapper wrapper finished");
Chad Barb's avatar
 
Chad Barb committed
968

Chad Barb's avatar
Chad Barb committed
969 970
	print "Mapped to physical reality!\n";
    }
Chad Barb's avatar
 
Chad Barb committed
971

972
    # Check cancel flag before continuing. No retry, 
973
    TBGetCancelFlag($pid, $eid, \$canceled);
974 975 976 977
    if ($canceled) {
	tbreport(SEV_IMMEDIATE, 'cancel_flag');
	return 1
    }
978

979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
    #
    # Create/update per-experiment switch stacks if necessary.
    # Still haven't done any vlan synch for modify.
    #
    my @expswitches = $experiment->SwitchList(1);
    my ($perexpstack,$leader,@curswitches) = \
	GetPerExperimentSwitchStack($experiment);
    my $stackarg = "";
    if (($type > MODIFY && @expswitches)
	|| ($type == MODIFY && (!defined($perexpstack) && @expswitches))) {
	print STDERR "Creating per-experiment switch stack.\n";
	AddPerExperimentSwitchStack($experiment,@expswitches);
	$perexpstack = GetPerExperimentSwitchStackName($experiment);
    }
    elsif ($type == MODIFY && defined($perexpstack)
	   && (@expswitches || @curswitches)) {
	print STDERR "Updating per-experiment switch stack.\n";
	UpdatePerExperimentSwitchStack($experiment,@expswitches);
    }
    if (defined($perexpstack) && (@expswitches || @curswitches)) {
	print STDERR "Will configure per-experiment switch stack ($perexpstack) later.\n";
	$stackarg = "-S $perexpstack --skip-supplied";
    }

1003 1004 1005 1006 1007 1008 1009
    #
    # When doing a modify, we have to compare vlans to determine which
    # vlans actually changed and need to be deleted, before processing
    # the new vlans for the experiment. Note that vlans that already
    # exist on the switches will be left as is by snmpit.
    #
    # We must do this before the nfree of deleted nodes in the next section
1010
    # because the new test in nfree that prevents nodes from accidentally
1011 1012 1013 1014 1015
    # getting released when they are in a vlan.
    #
    if ($type == MODIFY) {
	my @diff = ();
	my @same = ();
1016 1017
	my $ret;

1018
	if ($ELABINELAB || !$syncvlans) {
1019 1020 1021 1022 1023 1024
	    $ret = Lan->CompareVlansWithSwitches($experiment, \@diff, \@same);
	}
	else {
	    $ret = Lan->CompareVlansWithSwitches2($experiment);
	}
	if ($ret) {
1025 1026 1027 1028 1029
	    tberror({type => 'summary', severity => SEV_SECONDARY,
		     error => ['vlan_setup_failed']},
		    "Failed to compare old vlans");
	    return 1;
	}
1030
	if ($ELABINELAB || !$syncvlans) {
1031 1032
	    if (@diff) {
		print "Removing obsolete vlans @diff\n";
1033
		system("snmpit $stackarg --redirect-err -f -C ".
1034
		       join(" ", map("-o $_", @diff)));
1035 1036 1037 1038 1039 1040
		if ($?) {
		    tberror({type => 'summary', severity => SEV_SECONDARY,
			     error => ['vlan_setup_failed']},
			    "Failed to remove obsolete VLANs.");
		    return 1;
		}
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
		if (defined($perexpstack)) {
		    system("snmpit -S $perexpstack --redirect-err -f -C ".
			   join(" ", map("-o $_", @diff)));
		    if ($?) {
			tberror({type => 'summary', severity => SEV_SECONDARY,
				 error => ['vlan_setup_failed']},
				"Failed to remove obsolete per-experiment VLANs.");
			return 1;
		    }
		}
1051 1052 1053 1054
	    }
	}
	else {
	    print "Synchronizing VLANs.\n";
1055
	    system("snmpit $stackarg --redirect-err -X $pid $eid");
1056 1057 1058
	    if ($?) {
		tberror({type => 'summary', severity => SEV_SECONDARY,
			 error => ['vlan_setup_failed']},
1059
			"Failed to synchronize VLANs.");
1060 1061
		return 1;
	    }
1062 1063 1064 1065 1066 1067 1068 1069 1070
	    if (defined($perexpstack)) {
		system("snmpit -S $perexpstack --redirect-err -X $pid $eid");
		if ($?) {
		    tberror({type => 'summary', severity => SEV_SECONDARY,
			     error => ['vlan_setup_failed']},
			    "Failed to synchronize per-experiment VLANs.");
		    return 1;
		}
	    }
1071 1072 1073
	}
    }

1074 1075 1076 1077 1078 1079 1080 1081 1082
    #
    # Look for any nodes in RES_TEARDOWN. These need to be released,
    # and if a virtnode, they need to be torn down. We cannot wait for
    # the virtnodes to go down with the physnode they are hosted on,
    # so teardown and release the virtnodes first, and then do the
    # physnodes.
    #
    # Errors are fatal; no recovery or retry.
    #
1083
    if ($type == MODIFY || $type == UPDATE) {
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
	my $allocstate = TBDB_ALLOCSTATE_RES_TEARDOWN();
	
	$db_result =
	    DBQueryFatal("select r.node_id,nt.isvirtnode,nt.isremotenode ".
			 "  from reserved as r ".
			 "left join nodes as n on n.node_id=r.node_id ".
			 "left join node_types as nt on nt.type=n.type ".
			 "where r.pid='$pid' and r.eid='$eid' and ".
			 "      n.allocstate='$allocstate'");

	if ($db_result->numrows) {
	    my @virtnodes = ();
	    my @physnodes = ();
	    
	    print "Tearing down and releasing unused nodes\n";

	    # First teardown/release virtnodes. 
	    while (my ($node,$isvirt,$isrem) = $db_result->fetchrow_array()) {
		if ($isvirt) {
		    push(@virtnodes, $node);
		}
1105
		else {
1106 1107 1108
		    push(@physnodes, $node);
		}
	    }
1109 1110 1111
	    # See below.
	    @deleted_pnodes = @physnodes;
	    
1112 1113 1114
	    if (@virtnodes) {
		TBDebugTimeStamp("vnode_setup started");
		
1115
		if (system("vnode_setup -k $pid $eid @virtnodes")) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
1116
		    tberror "Failed to tear down unused virtnodes!\n";
1117 1118 1119 1120 1121
		    return 1;
		}
		TBDebugTimeStamp("vnode_setup finished");
		
		if (system("nfree $pid $eid @virtnodes")) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
1122
		    tberror "Failed to nfree unused virtnodes!\n";
1123 1124 1125 1126
		    return 1;
		}
	    }
	    if (@physnodes) {
1127 1128 1129
		if ($elabinelab) {
		    print "Removing nodes from inner elab.\n";
		    if (system("elabinelab -r $pid $eid @physnodes")) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
1130
			tberror "Failed to remove inner nodes!";
1131 1132
			return 1;
		    }
1133 1134 1135 1136 1137 1138
		} elsif ($plabinelab) {
		    print "Removing nodes from inner plab.\n";
		    if (system("plabinelab -r $pid $eid @physnodes")) {
			tberror "Failed to remove inner nodes!";
			return 1;
		    }
1139 1140 1141
		}

		#
Mike Hibler's avatar
Mike Hibler committed
1142 1143
		# If the experiment is firewalled, cleanup the nodes
		# we are releasing.
1144
		# 
1145 1146
		if ($firewalled && undoFWNodes($experiment, 1,
					       @deleted_pnodes)) {
1147 1148 1149
		    return 1;
		}
		
1150
		if (system("nfree $pid $eid @physnodes")) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
1151
		    tberror "Failed to nfree unused physnodes!\n";
1152 1153 1154 1155 1156 1157
		    return 1;
		}
	    }
	}
    }

Chad Barb's avatar
 
Chad Barb committed
1158 1159 1160 1161 1162 1163
    # Exit here if we are testing.
    if ($TESTMODE) {
	print "Testing run - Stopping here.\n";
	return 0;
    }

1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
    #
    # Handle tarballs - we might need to fetch some from URLs if the user
    # asked for that.
    #
    print "Fetching tarballs and RPMs (if any) ...\n";
    TBDebugTimeStamp("tarfiles_setup started");

    if (system("tarfiles_setup $pid $eid")) {
	#
	# No recovery for now - what would we do?
	#
1175 1176 1177
	tberror({type => 'secondary', severity => SEV_SECONDARY,
		 error => ['tarfiles_setup_failed']},
		"Failed to set up tarballs.");
1178 1179 1180 1181
	return 1;
    }
    TBDebugTimeStamp("tarfiles_setup finished");

1182 1183 1184 1185
    #
    # Handle virt blobs.
    #
    if ($type >= RETRY) {
1186
	print "Creating dynamic blobs.\n";
1187 1188 1189
	$experiment->UploadBlobs(0);
    }
    elsif ($type == MODIFY) {
1190
	print "Updating dynamic blobs.\n";
1191 1192 1193
	$experiment->UploadBlobs(1);
    }

1194 1195 1196 1197
    #
    # If there are any Plab dslice nodes in the experiment, create the
    # dslice now
    #
1198
    if ($type > MODIFY_RECOVER) {
1199 1200 1201
	# Are there any Plab nodes?  First get a list of node types in the exp;
	# if any are types hosted by any of the PLCs we know about, create
	# all slices necessary for the experiment in a single plabslice call.
1202
	$db_result =
1203 1204 1205 1206 1207 1208 1209 1210
	    DBQueryFatal("select nt.type,ppi.plc_name from nodes as n ".
			 "  left join node_types as nt on n.type = nt.type ".
			 "  left join reserved as r on r.node_id = n.node_id ".
			 "  left join plab_plc_info as ppi ".
			 "    on nt.type = ppi.node_type".
			 " where r.pid='$pid' and r.eid='$eid'".
			 "    and ppi.node_type is not NULL".
			 " group by nt.type");
Austin Clements's avatar