swapexp.in 22.9 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10 11 12
use English;
use Getopt::Std;

#
Chad Barb's avatar
Chad Barb committed
13
# This gets invoked from the Web interface.
Chad Barb's avatar
 
Chad Barb committed
14
# Swap an experiment in, swap it out, restart or modify.
15
#
Chad Barb's avatar
Chad Barb committed
16

17 18
sub usage()
{
19
    print STDOUT "Usage: swapexp [-b] [-i | -a | -f] [-r] ".
20
	"<-s in | out | restart | modify | pause> <pid> <eid> [<nsfile>]\n";
21 22
    exit(-1);
}
23
my  $optlist = "biafrs:";
24

25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

49 50 51 52 53 54
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
55
my $TBINFO = "$TB/expinfo";
56
my $TBDOCBASE = "@TBDOCBASE@";
57 58 59 60 61 62 63 64 65

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

my $tbdir    = "$TB/bin/";
66
my $tbdata   = "tbdata";
67
my $batch    = 0;
68
my $idleswap = 0;
69 70
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
71
my $reboot   = 0;
72
my $errorstat= -1;
73
my $modifyHosed = 0;
Chad Barb's avatar
 
Chad Barb committed
74

75 76 77 78 79
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
80
my @allnodes;
81
my @row;
82
my $action;
83 84 85
my $nextswapstate;
my $tempswapstate;

Chad Barb's avatar
 
Chad Barb committed
86

87 88 89
#
# Untaint the path
# 
90
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
91 92 93 94 95 96 97
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

98 99 100 101 102 103 104
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

105 106 107 108 109 110 111 112
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
113 114 115
if (defined($options{"i"})) {
    $idleswap = 1;
}
116 117 118 119 120 121
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
122 123 124
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
 
Chad Barb committed
125 126 127
if (defined($options{"r"})) {
    $reboot = 1;
}
128 129 130
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
131 132 133
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
134
	$inout ne "pause"   &&
Chad Barb's avatar
 
Chad Barb committed
135
	$inout ne "modify") {
136 137 138 139 140 141 142
	usage();
    }
}
else {
    usage();
}

Chad Barb's avatar
 
Chad Barb committed
143 144 145 146 147 148
if (@ARGV != (($inout eq "modify") ? 3 : 2)) {
    usage();
}
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
#
# Untaint the arguments.
#
if ($pid =~ /^([-\@\w.]+)$/) {
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
if ($eid =~ /^([-\@\w.]+)$/) {
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
164
my $repfile = "$eid.report";
165 166
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
167 168 169 170 171 172 173 174 175 176 177
my $tempnsfile;
my $modnsfile;

if ($inout eq "modify") {
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
    if ($tempnsfile =~ /^([-\w.\/]+)$/) {
	$tempnsfile = $1;
178 179
    }
    else {
180 181 182 183
	die("Tainted nsfile name: $tempnsfile");
    }
    $modnsfile = "$eid-modify.ns";
}
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}

#
Chad Barb's avatar
 
Chad Barb committed
202
# Verify that this person can muck with the experiment.
203 204 205 206 207
# Note that any script down the line has to do an admin check also. 
#
if ($UID && !TBAdmin($UID) &&
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
 
Chad Barb committed
208
	"    You do not have permission to swap or modify this experiment!\n");
209 210
}

211 212 213 214
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

215 216 217 218 219
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
220
# the tb scripts.
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
my $expt_path       = $hashrow{'path'};
my $isbatchexpt     = $hashrow{'batchmode'};
my $ebatchstate     = $hashrow{'batchstate'};
237
my $cancelflag  = $hashrow{'canceled'};
238 239 240 241 242 243 244 245 246 247
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
248

249 250
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
251
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
252 253 254
    $idleswaptime = $idleswap_time / 60.0;
}

255 256
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
257
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
258 259
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
260

261
if (! chdir($workdir)) {
262
    die("*** $0:\n".
263
	"    Could not chdir to $workdir: $!\n");
264 265
}

266
#
267
# Batchmode.
268
#
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
if ($isbatchexpt) {
    #
    # When coming from the daemon, sanity check the batch state.
    #
    if ($batch) {
	if ($inout eq "in") {
	    die("*** $0:\n".
		"    Batch experiment $pid/$eid is not in the proper state!\n".
		"    Currently $ebatchstate, but should be ACTIVATING\n")
		if ($ebatchstate ne BATCHSTATE_ACTIVATING);
	}
	elsif ($inout eq "out") {
	    die("*** $0:\n".
		"    Batch experiment $pid/$eid is not in the proper state!\n".
		"    Currently $ebatchstate, but should be TERMINATING\n")
		if ($ebatchstate ne BATCHSTATE_TERMINATING);
	}
	else {
	    die("*** $0:\n".
		"    Improper request from batch daemon for $pid/$eid!\n");
	}
    }
    else {
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
296 297 298 299
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
	    if ($cancelflag);

300
	if ($inout eq "in") {
301 302 303
	    ExitWithStatus(1,
			   "Batch experiment $pid/$eid must be PAUSED to\n".
			   "swap in. Currently $ebatchstate.")
304 305 306 307
		if ($ebatchstate ne BATCHSTATE_PAUSED);
	    TBSetBatchState($pid, $eid, BATCHSTATE_POSTED);
	}
	elsif ($inout eq "out") {
308 309 310 311 312 313 314 315 316 317
	    ExitWithStatus(1,
			   "Batch experiment $pid/$eid must be RUNNING or\n".
			   "ACTIVATING to swap out. Currently $ebatchstate.")
		if ($ebatchstate ne BATCHSTATE_RUNNING &&
		    $ebatchstate ne BATCHSTATE_ACTIVATING);

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
318 319 320
	    TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELSWAP);
	}
	elsif ($inout eq "pause") {
321 322 323
	    ExitWithStatus(1,
			   "Batch experiment $pid/$eid must be POSTED to\n".
			   "pause. Currently $ebatchstate.")
324
		if ($ebatchstate ne BATCHSTATE_POSTED);
325 326 327 328 329

	    #
	    # If the batchstate is POSTED, we can just set it to PAUSED
	    # since the batch_daemon is locked out from messing with it.
	    #
330
	    TBSetBatchState($pid, $eid, BATCHSTATE_PAUSED);
331
	}
332
	elsif ($inout eq "modify") {
333 334 335 336 337 338
	    ExitWithStatus(1,
			   "Batch experiment $pid/$eid must be PAUSED or\n".
			   "RUNNING to modify. Currently $ebatchstate.")
		if ($ebatchstate ne BATCHSTATE_PAUSED &&
		    $ebatchstate ne BATCHSTATE_RUNNING);
	    #
339
	    # Otherwise, proceed with the modify. The experiment will be
340 341
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
342 343 344 345
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
346
	    #
347 348
	    goto doit;
	}
349 350
	else {
	    die("*** $0:\n",
351
		"    Operation $inout not allowed on a batch experiment!\n");
352
	}
353 354
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
355
    }
356
  doit:
357
}
358 359 360 361 362 363 364 365 366 367
else {
    #
    # If the cancel flag is set, then user must wait for that to clear before
    # we can do anything else.
    #
    ExitWithStatus(1,
		   "Experiment $pid/$eid has its cancel flag set!.\n".
		   "You must wait for that to clear before you can swap or\n".
		   "or modify the experiment.\n")
	if ($cancelflag);
368

369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
    #
    # Check the state for the various operations.
    #
    if (!$force) {
        SWITCH: for ($inout) {
	    /^in$/i && do {
		if ($ebatchstate ne BATCHSTATE_PAUSED()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is not swapped out!");
		}
		last SWITCH;
	    };
	    /^out$/i && do {
		if ($ebatchstate eq BATCHSTATE_PAUSED()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is swapped out!");
		}
		elsif ($ebatchstate ne BATCHSTATE_RUNNING() &&
		       $ebatchstate ne BATCHSTATE_ACTIVATING()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is not swapped in!");
		}

		if ($ebatchstate eq BATCHSTATE_ACTIVATING()) {
		    #
		    # All we can do is set the cancel flag and hope that
		    # it gets noticed. We do not wait. 
		    # 
		    TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELSWAP);

		    ExitWithStatus(0,
				   "Experiment $pid/$eid swapin has been  ".
				   "marked for cancelation.\n".
				   "You will receive email when the original ".
				   "swap request has finished.");
		}
		last SWITCH;
	    };
	    /^restart$/i && do {
		if ($ebatchstate ne BATCHSTATE_RUNNING()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is not swapped in!");
		}
		last SWITCH;
	    };
	    /^modify$/i && do {
		if ($ebatchstate ne BATCHSTATE_RUNNING() &&
		    $ebatchstate ne BATCHSTATE_PAUSED()) {
		    ExitWithStatus(1,
				   "Experiment $pid/$eid is in transition!");
		}
		last SWITCH;
	    };
	    die("*** $0:\n".
		"    Missing state check for action: $action\n");
	}
    }
426 427
}

428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
	$tempswapstate = BATCHSTATE_ACTIVATING();
	$nextswapstate = BATCHSTATE_RUNNING();
	last SWITCH;
    };
    /^out$/i && do {
	$tempswapstate = BATCHSTATE_TERMINATING();
	$nextswapstate = BATCHSTATE_PAUSED();
	last SWITCH;
    };
    /^restart$/i && do {
	$tempswapstate = BATCHSTATE_RUNNING_BUSY();
	$nextswapstate = BATCHSTATE_RUNNING();
	last SWITCH;
    };
    /^modify$/i && do {
	$tempswapstate = BATCHSTATE_MODIFYING();
	$nextswapstate = $ebatchstate;
	last SWITCH;
    };
454
    die("*** $0:\n".
455
	"    Missing state check for action: $action\n");
456
}
457 458
 
# Update idleswap_timeout to whatever the current value is.
459
if ($inout ne "out") {
460 461 462
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
463

464 465
# Lock the record, set the intermediate state, and unlock the table.
TBLockExp($pid, $eid, $tempswapstate);
466 467 468 469
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
470 471 472 473
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
474 475
#

476 477 478 479 480 481 482 483 484
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
 
Chad Barb committed
485 486 487
if ($inout eq "modify") {
    $action = "modified";
}
488

489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

504 505 506 507 508 509 510 511 512 513 514 515
#
# Before going to background, we have to copy out the NS file!
#
if ($inout eq "modify") {
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
	die("*** $0:\n".
	    "    Could not copy $tempnsfile to $modnsfile");
    }
    chmod(0664, "$modnsfile");
}

516 517 518 519
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
520
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
521
    TBExptSetLogFile($pid, $eid, $logname);
522
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
523

524 525 526 527
    if (TBBackGround($logname)) {
	#
	# Parent exits normally
	#
528 529
	print "Experiment $pid/$eid is now being $action.\n".
	    "You will be notified via email when the this is done.\n";
530 531 532 533
	exit(0);
    }
}

534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

550 551 552
#
# Remove old report file since its contents are going to be invalid.
#
553
if ($inout ne "restart" && -e $repfile) {
554 555 556
    unlink("$repfile");
}

557 558 559 560
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
561
    if ($force || $idleswap) { $arg = "-force"; } else { $arg = ""; }
Chad Barb's avatar
 
Chad Barb committed
562
    print STDOUT "Running 'tbswap out' with arguments: $pid $eid\n";
563
    if (system("$tbdir/tbswap out $arg $pid $eid") != 0) {
564
	$errorstat = $? >> 8;
565
	fatal("tbswap out failed!");
566
    }
Chad Barb's avatar
Chad Barb committed
567

568 569
    $estate = ExpState($pid,$eid);
    if ($estate ne EXPTSTATE_SWAPPED) {
570
	fatal("Experiment is in the wrong state: $estate");
571 572
    }
}
573
elsif ($inout eq "in") {
Chad Barb's avatar
 
Chad Barb committed
574 575
    print STDOUT "Running 'tbswap in' with arguments: $pid $eid\n";
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
576
	$errorstat = $? >> 8;
577
	fatal("tbswap in failed!");
578
    }
Chad Barb's avatar
Chad Barb committed
579

580 581
    $estate = ExpState($pid,$eid);
    if ($estate ne EXPTSTATE_ACTIVE) {
582
	fatal("Experiment is in the wrong state: $estate");
583
    }
584

585
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
586
}
Chad Barb's avatar
 
Chad Barb committed
587
elsif ($inout eq "modify") {
Chad Barb's avatar
Chad Barb committed
588 589
    my $modifyError = "";

590 591 592
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
593
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
594
    if (TBExptBackupVirtualState($pid, $eid)) {
595
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
596
    }
597 598 599
    # Must deal with the prerender explicitly since it runs background.
    system("prerender -r $pid $eid");
    
Chad Barb's avatar
Chad Barb committed
600 601 602 603 604
    TBExptRemoveVirtualState($pid, $eid);

    #
    # Rerun tbprerun if modifying.
    #
605
    if (system("$tbdir/tbprerun -m $pid $eid $modnsfile") != 0) {
Chad Barb's avatar
Chad Barb committed
606 607 608
	$modifyError = "tbprerun failed!";
    }

Chad Barb's avatar
 
Chad Barb committed
609 610 611 612
    #
    # If experiment is currently swapped out, no need to do an update 
    # after modifying it.
    #
Chad Barb's avatar
Chad Barb committed
613
    if (! $modifyError && $estate eq EXPTSTATE_ACTIVE) {
Chad Barb's avatar
 
Chad Barb committed
614
	print STDOUT "Running 'tbswap update' with arguments: $pid $eid\n";
Chad Barb's avatar
Chad Barb committed
615 616 617 618

	my $rebootSwitch = "";
	if ($reboot) {
	    $rebootSwitch = "-reboot";
Chad Barb's avatar
 
Chad Barb committed
619 620
	}

Chad Barb's avatar
Chad Barb committed
621 622 623
	if (system("$tbdir/tbswap update $rebootSwitch $pid $eid") != 0) {
	    $errorstat = $? >> 8;
	    $modifyError = "tbswap update failed!";
Chad Barb's avatar
 
Chad Barb committed
624
	}
Chad Barb's avatar
 
Chad Barb committed
625

626 627 628 629 630 631
	#
	# See what state tbswap left it in. It might have swapped it out
	# or restored it, if there was an error. 
	# 
	$estate = ExpState($pid, $eid);
	
Chad Barb's avatar
Chad Barb committed
632 633
	if (! $modifyError) {
	    if ($estate ne EXPTSTATE_ACTIVE) {
634
		$modifyHosed = 1;
Chad Barb's avatar
Chad Barb committed
635
		fatal("Experiment is in the wrong state: $estate!");
Chad Barb's avatar
 
Chad Barb committed
636
	    }
Chad Barb's avatar
Chad Barb committed
637 638
	    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
	}
639 640 641 642
	elsif ($estate ne EXPTSTATE_ACTIVE) {
	    # Was active, now its not! tbswap was not able to recover.
	    $modifyHosed = 1;
	}
Chad Barb's avatar
Chad Barb committed
643 644 645
    }

    if ($modifyError) {
646
	print STDOUT "Modify Error: $modifyError\n";
Chad Barb's avatar
Chad Barb committed
647
	print STDOUT "Recovering experiment state...\n";
648
	
649 650
	# Must deal with the prerender explicitly since it runs background.
	system("prerender -r $pid $eid");
651
	TBExptRemoveVirtualState($pid, $eid);
652
	
653 654
	if (TBExptRestoreVirtualState($pid, $eid) == 0) {
	    TBExptClearBackupState($pid, $eid);
655 656
	    # Must deal with the prerender explicitly since it runs background.
	    system("prerender -t $pid $eid");
657 658 659 660 661
	    fatal("Update aborted; old state restored.");
	}
	else {
	    $modifyHosed = 1;
	    fatal("Experiment state could not be restored!");
Chad Barb's avatar
Chad Barb committed
662
	}
Chad Barb's avatar
 
Chad Barb committed
663
    }
664
    TBExptClearBackupState($pid, $eid);
665
}
Chad Barb's avatar
 
Chad Barb committed
666
else { # $inout eq "restart" assumed.
667 668
    print STDOUT "Running tbrestart with arguments: $pid $eid\n";
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
669
	fatal("tbrestart failed!");
670 671
    }
}
672

673 674 675 676 677 678 679 680 681 682
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
683
system("cp -Rfp $workdir/ $userdir/tbdata/");
684

685 686 687 688
#
# Gather stats. 
#
if ($inout eq "in") {
689
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
690 691
}
elsif ($inout eq "out") {
692
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
693
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
694 695
}
elsif ($inout eq "modify") {
696
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
697 698
}

699 700 701 702 703 704
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
705 706
# In batch mode, just exit without sending email or unlocking. The
# batch daemon will take care of that and setting the proper state. 
707 708 709 710 711
#
if ($batch) {
    exit(0);
}

Chad Barb's avatar
 
Chad Barb committed
712 713 714 715 716
#
# HACK! if successful, put new NS file in DB.
#

if ($inout eq "modify") {
717
    $nsdata_string = `cat $modnsfile`;
Chad Barb's avatar
 
Chad Barb committed
718 719 720 721 722 723
    if (defined($nsdata_string)) {
	$nsdata_string = DBQuoteSpecial($nsdata_string);

	DBQueryWarn("delete from nsfiles WHERE eid='$eid' and pid='$pid'");
	DBQueryWarn("insert into nsfiles (pid, eid, nsfile) ".
		    "VALUES('$pid', '$eid', $nsdata_string)");
724 725 726
    }
    else {
	print "Warning!! Could not read nsfile '$modnsfile'!\n";
Chad Barb's avatar
 
Chad Barb committed
727 728 729
    }
}

730 731 732 733 734 735 736
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

737 738 739 740 741 742 743 744 745 746 747 748 749
#
# Must unlock before exit.
#
TBUnLockExp($pid, $eid, $nextswapstate);

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELCLEAR);

print "Swap Success!\n";

750 751 752 753
#
# Send email notification to user.
#
my $message =
754 755
    "Experiment $eid in project $pid has been ";

756
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
757
    $message .= "forcibly swapped out by\nEmulab";
758 759 760 761 762
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
763 764
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
765 766 767 768 769
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
770 771 772 773 774
}
else {
    $message .= "$action.\n";
}

775 776 777 778 779
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

780 781
$message .=
    "\n".
782 783
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
784 785

SENDMAIL("$user_name <$user_email>",
786
	 "Experiment $pid/$eid \u$action",
787
	 $message,
788
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
789 790
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
791 792
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
793 794 795 796 797 798

exit 0;

sub fatal($)
{
    my($mesg) = $_[0];
Chad Barb's avatar
Chad Barb committed
799

800 801
    print STDOUT "*** $0:\n".
	         "    $mesg\n";
802

803 804 805 806 807 808 809 810 811 812 813 814 815
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
    }

Chad Barb's avatar
 
Chad Barb committed
816
    #
817
    # If hosed, we entirely terminate the experiment.
Chad Barb's avatar
 
Chad Barb committed
818
    #
819
    if ($modifyHosed) {
Chad Barb's avatar
 
Chad Barb committed
820
	#
821
	# Note: $estate is indeed still set appropriately!
Chad Barb's avatar
 
Chad Barb committed
822 823 824 825 826 827 828
	#
	if ($estate eq EXPTSTATE_ACTIVE) {
	    print "Running 'tbswap out' with arguments: $pid $eid\n";
	    if (system("$tbdir/tbswap out -force $pid $eid") != 0) {
		print "tbswap out failed!\n";
	    }
	}
Chad Barb's avatar
Chad Barb committed
829

Chad Barb's avatar
 
Chad Barb committed
830 831 832 833
	print "Running tbend with arguments: -force $pid $eid\n";
	if (system("$tbdir/tbend -force $pid $eid") != 0) {
	    print "tbend failed!\n";
	}
834 835
	# Must override since we are so badly hosed. 
	$ebatchstate = BATCHSTATE_PAUSED;
Chad Barb's avatar
 
Chad Barb committed
836 837
    }

838 839 840
    # Copy over the log files so the user can see them.
    system("/bin/cp -Rfp $workdir/ $userdir/tbdata");

841
    #
842 843
    # In batch mode, exit without sending the email or unlocking. The
    # batch daemon will take care of that and setting the proper state. 
844 845
    #
    if ($batch) {
846
	exit($errorstat);
847 848
    }

849
    #
Chad Barb's avatar
Chad Barb committed
850
    # Clear the log file so the web page stops spewing.
851 852 853 854 855
    #
    if (defined($logname)) {
	TBExptCloseLogFile($pid, $eid);
    }

856 857 858 859 860 861 862 863 864
    # Unlock and reset state to its original value. 
    TBUnLockExp($pid, $eid, $ebatchstate);

    #
    # Clear the cancel flag now that the operation is complete. Must be done
    # after we change the experiment state (above).
    #
    TBSetBatchCancelFlag($pid, $eid, BATCHMODE_CANCELCLEAR);

865 866 867 868
    #
    # Send a message to the testbed list. Append the logfile.
    #
    SENDMAIL("$user_name <$user_email>",
869
	     "Swap ${inout} Failure: $pid/$eid",
870
	     $mesg,
871
	     ($idleswap ? $TBOPS : "$user_name <$user_email>"),
872
	     "Cc:  $expt_head_name <$expt_head_email>\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
873
	     "Cc:  $TBOPS",
874
	     (($logname), (defined($modnsfile) ? ($modnsfile) : ())));
875

Leigh B. Stoller's avatar
Leigh B. Stoller committed
876
    if ($modifyHosed) {
Chad Barb's avatar
 
Chad Barb committed
877 878 879 880 881 882 883 884 885
	#
	# Copy off the workdir to the user directory, Then back up both of
	# them for post-mortem debugging.
	#
	system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
	system("/bin/rm -rf  ${workdir}-failed");
	system("/bin/mv -f   $workdir ${workdir}-failed");
	system("/bin/rm -rf  ${userdir}-failed");
	system("/bin/mv -f   $userdir ${userdir}-failed");
Chad Barb's avatar
Chad Barb committed
886
	TBExptDestroy($pid, $eid);
Chad Barb's avatar
 
Chad Barb committed
887 888
    }

889
    exit($errorstat);
890
}