swapexp.in 26.9 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10
use English;
use Getopt::Std;
11
use POSIX qw(isatty setsid);
12 13

#
Chad Barb's avatar
Chad Barb committed
14
# This gets invoked from the Web interface.
Chad Barb's avatar
 
Chad Barb committed
15
# Swap an experiment in, swap it out, restart or modify.
16
#
Chad Barb's avatar
Chad Barb committed
17

18 19
sub usage()
{
20
    print(STDERR
21
	  "Usage: swapexp [-q] [-b | -w] [-i | -a | -f] [-r] [-e]\n".
22 23 24 25
	  "               <-s in | out | restart | modify | pause>\n".
	  "               <pid> <eid> [<nsfile>]\n".
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment swap/modify\n".
26
	  "-q       - be less chatty\n".
27 28 29 30 31 32
	  "-r       - reboot nodes when doing a modify experiment\n".
	  "-e       - restart event scheduler when doing a modify experiment\n".
	  "-s <op>  - Operation to perform; one of those listed above\n".
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n".
	  "<nsfile> - Optional NS file to parse for experiment modify\n");
33 34
    exit(-1);
}
35
my  $optlist = "biafres:wq";
36

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

61 62 63 64 65 66
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
67
my $TBINFO = "$TB/expinfo";
68
my $TBDOCBASE = "@TBDOCBASE@";
69 70 71 72 73 74 75 76 77

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

my $tbdir    = "$TB/bin/";
78
my $tbdata   = "tbdata";
79
my $batch    = 0;
80
my $idleswap = 0;
81 82
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
83
my $reboot   = 0;
84
my $waitmode = 0;
85
my $quiet    = 0;
86
my $eventsys_restart   = 0;
87
my $errorstat= -1;
88
my $modifyHosed = 0;
Chad Barb's avatar
 
Chad Barb committed
89

90 91 92 93 94
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
95
my @allnodes;
96
my @row;
97
my $action;
98
my $nextswapstate;
99
my $termswapstate;
Chad Barb's avatar
 
Chad Barb committed
100

101 102 103
#
# Untaint the path
# 
104
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
105 106 107 108 109 110 111
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

112 113 114 115 116 117 118
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

119 120 121 122 123 124 125 126
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
127 128 129
if (defined($options{"i"})) {
    $idleswap = 1;
}
130 131 132
if (defined($options{"w"})) {
    $waitmode = 1;
}
133 134 135 136 137 138
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
139 140 141
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
 
Chad Barb committed
142 143 144
if (defined($options{"r"})) {
    $reboot = 1;
}
145 146 147
if (defined($options{"e"})) {
    $eventsys_restart = 1;
}
148 149 150
if (defined($options{"q"})) {
    $quiet = 1;
}
151 152 153
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
154 155 156
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
157
	$inout ne "pause"   &&
Chad Barb's avatar
 
Chad Barb committed
158
	$inout ne "modify") {
159 160 161 162 163 164 165
	usage();
    }
}
else {
    usage();
}

166 167 168 169 170
usage()
    if (($waitmode && $batch) ||
	($inout ne "modify" && @ARGV != 2) ||
	(($waitmode || $batch) && ($idleswap || $autoswap || $force)));

171 172 173 174 175
if ($eventsys_restart && $inout ne "modify") {
    print STDOUT "Usage: swapexp: -e (eventsys_restart) can be used ".
                 "only with -s modify\n";
    usage();
}
Chad Barb's avatar
 
Chad Barb committed
176 177 178
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

179 180 181
#
# Untaint the arguments.
#
182
if ($pid =~ /^([-\w\.]+)$/) {
183 184 185 186 187
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
188
if ($eid =~ /^([-\w\.]+)$/) {
189 190 191 192 193
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
194
my $repfile = "$eid.report";
195 196
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
197 198 199
my $tempnsfile;
my $modnsfile;

Leigh B. Stoller's avatar
Leigh B. Stoller committed
200
if ($inout eq "modify" && @ARGV > 2) {
201 202 203 204 205
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
206
    if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
207
	$tempnsfile = $1;
208 209
    }
    else {
210 211 212 213 214 215 216 217 218 219
	die("Tainted nsfile name: $tempnsfile\n");
    }
    #
    # Called from ops interactively. Make sure NS file in /proj or /users.
    #
    # Use realpath to resolve any symlinks.
    #
    my $translated = `realpath $tempnsfile`;
    if ($translated =~ /^([-\w\.\/]+)$/) {
	$tempnsfile = $1;
220
    }
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
    else {
	die("Tainted nsfile returned by realpath: $translated\n");
    }

    #
    # The file must reside in /proj, /groups, or /users. Since this script
    # runs as the caller, regular file permission checks ensure its a file
    # the user is allowed to use. /tmp/$guid-$nsref.nsfile also allowed
    # since this script is invoked directly from web interface, which generates
    # a name that should not be guessable, so as long as it looks to be in
    # proper format, we accept it. 
    #
    if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
	! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
	! ($tempnsfile =~ /^\/proj/) &&
	! ($tempnsfile =~ /^\/groups/) &&
	! ($tempnsfile =~ /^\/users/)) {
	die("$tempnsfile does not resolve to an appropriate directory!\n");
    }

    if (! -f $tempnsfile || -z $tempnsfile || ! -r $tempnsfile) {
242 243 244
	die("*** $0:\n".
	    "    $tempnsfile does not look like an NS file!\n");
    }
245 246
    $modnsfile = "$eid-modify.ns";
}
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}

#
Chad Barb's avatar
 
Chad Barb committed
265
# Verify that this person can muck with the experiment.
266 267 268 269 270
# Note that any script down the line has to do an admin check also. 
#
if ($UID && !TBAdmin($UID) &&
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
 
Chad Barb committed
271
	"    You do not have permission to swap or modify this experiment!\n");
272 273
}

274 275 276 277
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

278 279 280 281 282 283 284 285 286
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

287 288 289 290 291
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
292
# the tb scripts.
293 294 295 296 297 298 299 300 301 302 303 304 305
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
306
my $batchstate      = $hashrow{'batchstate'};
307
my $expt_path       = $hashrow{'path'};
308
my $expt_locked     = $hashrow{'expt_locked'};
309
my $isbatchexpt     = $hashrow{'batchmode'};
310
my $canceled        = $hashrow{'canceled'};
311 312 313 314 315 316 317 318 319 320
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
321

322 323
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
324
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
325 326 327
    $idleswaptime = $idleswap_time / 60.0;
}

328 329
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
330
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
331 332
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
333

334
if (! chdir($workdir)) {
335
    die("*** $0:\n".
336
	"    Could not chdir to $workdir: $!\n");
337 338
}

339
#
340 341 342
# This script is called from the batch daemon.
# 
if ($batch) {
343
    #
344 345 346
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be in proper state for the
    # operation requested. 
347
    #
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());

    if ($inout eq "in") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be QUEUED.\n")
	    if ($estate ne EXPTSTATE_QUEUED);
	
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid has been canceled! Aborting.\n")
	    if ($canceled);
    }
    elsif ($inout eq "out") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be ACTIVE.\n")
	    if ($estate ne EXPTSTATE_ACTIVE);
372 373
    }
    else {
374 375 376 377 378 379
	die("*** $0:\n".
	    "    Improper request from batch daemon for $pid/$eid!\n");
    }
}
else {
    if ($isbatchexpt) {
380 381 382 383
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
384 385
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
386
	    if ($canceled);
387

388
	if ($inout eq "in") {
389
	    ExitWithStatus(1,
390 391 392 393
			   "Batch experiment $pid/$eid must be SWAPPED to\n".
			   "QUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_SWAPPED);
	    SetExpState($pid, $eid, EXPTSTATE_QUEUED);
394 395
	}
	elsif ($inout eq "out") {
396
	    ExitWithStatus(1,
397 398 399 400
			   "Batch experiment $pid/$eid must be ACTIVE or\n".
			   "ACTIVATING to swap out. Currently $estate.")
		if ($estate ne EXPTSTATE_ACTIVE &&
		    $estate ne EXPTSTATE_ACTIVATING);
401 402 403 404 405

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
406
	    TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
407 408
	}
	elsif ($inout eq "pause") {
409
	    ExitWithStatus(1,
410 411 412
			   "Batch experiment $pid/$eid must be QUEUED to\n".
			   "DEQUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_QUEUED);
413 414

	    #
415 416 417 418
	    # XXX. The batch daemon might already have the experiment, but
	    # not have shipped it off to startexp. Change the state
	    # anyway. The error will be noticed later when startexp dies,
	    # and the batch daemon gets the error back. This sucks.
419
	    #
420
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
421
	}
422
	elsif ($inout eq "modify") {
423
	    ExitWithStatus(1,
424 425 426 427 428 429
			   "Batch experiment $pid/$eid must be SWAPPED or\n".
			   "ACTIVE to modify. Currently $estate.")
		if (($estate ne EXPTSTATE_SWAPPED &&
		     $estate ne EXPTSTATE_ACTIVATING) ||
		    $batchstate != BATCHSTATE_UNLOCKED());

430
	    #
431
	    # Otherwise, proceed with the modify. The experiment will be
432 433
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
434 435 436 437
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
438
	    #
439 440
	    goto doit;
	}
441 442
	else {
	    die("*** $0:\n",
443
		"    Operation $inout not allowed on a batch experiment!\n");
444
	}
445 446
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
447
      doit:
448
    }
449 450 451 452 453 454 455 456 457 458
    else {
	#
	# If the cancel flag is set, then user must wait for that to
	# clear before we can do anything else.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!.\n".
		       "You must wait for that to clear before you can swap\n".
		       "or modify the experiment.\n")
	    if ($canceled);
459

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513
	#
	# Check the state for the various operations.
	#
	if (!$force) {
	  SWITCH: for ($inout) {
	      /^in$/i && do {
		  if ($estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped out!");
		  }
		  last SWITCH;
	      };
	      /^out$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_ACTIVATING()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in ".
				     "or activating!\n");
		  }
		  
		  if ($estate eq EXPTSTATE_ACTIVATING()) {
		      #
		      # All we can do is set the cancel flag and hope that
		      # it gets noticed. We do not wait. 
		      # 
		      TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
		      
		      ExitWithStatus(0,
				     "Experiment $pid/$eid swapin has been  ".
				     "marked for cancelation.\n".
				     "You will receive email when the original ".
				     "swap request has finished.");
		  }
		  last SWITCH;
	      };
	      /^restart$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in!");
		  }
		  last SWITCH;
	      };
	      /^modify$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid must be ACTIVE or\n".
				     "SWAPPED to modify!\n");
		  }
		  last SWITCH;
	      };
	      die("*** $0:\n".
		  "    Missing state check for action: $action\n");
	  }
514 515
	}
    }
516 517
}

518 519 520 521 522 523 524
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
525
	$nextswapstate = EXPTSTATE_ACTIVATING();
526 527 528
	last SWITCH;
    };
    /^out$/i && do {
529
	$nextswapstate = EXPTSTATE_SWAPPING();
530 531 532
	last SWITCH;
    };
    /^restart$/i && do {
533
	$nextswapstate = EXPTSTATE_RESTARTING();
534 535 536
	last SWITCH;
    };
    /^modify$/i && do {
537 538
	$nextswapstate = (($estate eq EXPTSTATE_SWAPPED()) ?
			  EXPTSTATE_MODIFY_PARSE() : EXPTSTATE_MODIFY_REPARSE());
539 540
	last SWITCH;
    };
541
    die("*** $0:\n".
542
	"    Missing state check for action: $action\n");
543
}
544 545
 
# Update idleswap_timeout to whatever the current value is.
546
if ($inout ne "out") {
547 548 549
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
550

551 552 553 554 555 556 557
#
# On a failure, we go back to this swapstate. Might be modified below.
# 
$termswapstate = $estate;

# Lock the record, set the nextstate, and unlock the table.
TBLockExp($pid, $eid, $nextswapstate);
558 559 560 561
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
562 563 564 565
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
566 567
#

568 569 570 571 572 573 574 575 576
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
 
Chad Barb committed
577 578 579
if ($inout eq "modify") {
    $action = "modified";
}
580

581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

596 597 598
#
# Before going to background, we have to copy out the NS file!
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
599
if ($inout eq "modify" && defined($modnsfile)) {
600 601 602 603 604 605 606 607
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
	die("*** $0:\n".
	    "    Could not copy $tempnsfile to $modnsfile");
    }
    chmod(0664, "$modnsfile");
}

608 609 610 611
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
612
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
613
    TBExptSetLogFile($pid, $eid, $logname);
614
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
615

616 617 618 619 620
    if (my $childpid = TBBackGround($logname)) {
	#
	# Parent exits normally, except if in waitmode. 
	#
	if (!$waitmode) {
621 622 623
	    print("Experiment $pid/$eid is now being $action.\n".
		  "You will be notified via email when the this is done.\n")
		if (! $quiet);
624 625
	    exit(0);
	}
626 627 628 629 630 631 632 633
	print("Waiting for experiment $eid to finish its swap${action}\n")
	    if (! $quiet);
	    
	if (isatty(STDIN) && !$quiet) {
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
	}
634 635 636 637 638 639 640 641 642 643 644 645
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

646
	#
647
	# Wait until child exits or until user gets bored and types ^C.
648
	#
649 650
	waitpid($childpid, 0);
	
651 652
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
653
	exit($? >> 8);
654 655 656
    }
}

657 658 659 660 661 662 663 664
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

681 682 683
#
# Remove old report file since its contents are going to be invalid.
#
684
if ($inout ne "restart" && -e $repfile) {
685 686 687
    unlink("$repfile");
}

688 689 690 691
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
692 693 694 695
    my $optarg = (($force || $idleswap) ? "-force" : "");
    
    print STDOUT "Running 'tbswap out $optarg $pid $eid'\n";
    if (system("$tbdir/tbswap out $optarg $pid $eid") != 0) {
696
	$errorstat = $? >> 8;
697
	fatal("tbswap out failed!");
698
    }
699
    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
700
}
701
elsif ($inout eq "in") {
702
    print STDOUT "Running 'tbswap in $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
703
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
704
	$errorstat = $? >> 8;
705
	fatal("tbswap in failed!");
706
    }
707
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
708

709
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
710
}
Chad Barb's avatar
 
Chad Barb committed
711
elsif ($inout eq "modify") {
Chad Barb's avatar
Chad Barb committed
712
    my $modifyError = "";
713
    my $oldstate    = $estate;
Chad Barb's avatar
Chad Barb committed
714

715 716 717
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
718
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
719
    if (TBExptBackupVirtualState($pid, $eid)) {
720
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
721 722 723
    }

    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
724 725
    # Rerun tbprerun if modifying, but only if new NS file provided.
    # Yep, we allow reswap without changing the NS file. For Shashi and SIM. 
Chad Barb's avatar
Chad Barb committed
726
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
727 728 729 730 731
    if (defined($modnsfile)) {
	print STDOUT "Running 'tbprerun $pid $eid $modnsfile'\n";
	if (system("$tbdir/tbprerun $pid $eid $modnsfile") != 0) {
	    $modifyError = "tbprerun failed!";
	}
Chad Barb's avatar
Chad Barb committed
732 733
    }

Chad Barb's avatar
 
Chad Barb committed
734
    #
735
    # Our next state depends on whether the experiment was active or swapped.
Chad Barb's avatar
 
Chad Barb committed
736
    #
737 738 739
    if (! $modifyError) {
	if ($estate eq EXPTSTATE_SWAPPED) {
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
Chad Barb's avatar
 
Chad Barb committed
740
	}
741 742 743 744
	else {
	    SetExpState($pid, $eid, EXPTSTATE_MODIFY_RESWAP);
	    
	    my $optarg = ($reboot ? "-reboot" : "");
745
	    $optarg .= ($eventsys_restart ? " -eventsys_restart" : "");
746 747 748 749 750 751

	    print STDOUT "Running 'tbswap update $optarg $pid $eid'\n";
	    if (system("$tbdir/tbswap update $optarg $pid $eid") != 0) {
		$errorstat = $? >> 8;
		$modifyError = "tbswap update failed!";
	    }
Chad Barb's avatar
 
Chad Barb committed
752

753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772
	    #
	    # See what tbswap did. It might have swapped it out if there
	    # was an error. 
	    # 
	    if (! $modifyError) {
		SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
		$estate = EXPTSTATE_ACTIVE;
	    }
	    elsif ($errorstat & 0x40) {
		#
		# Icky. Magic return code that says tbswap swapped it out.
		# We do not want tbswap to muck with states anymore, so
		# need to know what it did. At some point we should clean
		# up the exit reporting! Anyway, fatal() needs to know the
		# the right state to go back to (no longer ACTIVE).
		#
		$estate = EXPTSTATE_SWAPPED;
		$termswapstate = EXPTSTATE_SWAPPED;
                # Old accounting info.
		TBSetExpSwapTime($pid, $eid);
Chad Barb's avatar
 
Chad Barb committed
773
	    }
774
	}
Chad Barb's avatar
Chad Barb committed
775 776 777
    }

    if ($modifyError) {
778
	print STDOUT "Modify Error: $modifyError\n";
Chad Barb's avatar
Chad Barb committed
779
	print STDOUT "Recovering experiment state...\n";
780
	
781 782
	# Must deal with the prerender explicitly since it runs background.
	system("prerender -r $pid $eid");
783
	TBExptRemoveVirtualState($pid, $eid);
784
	
785
	if (TBExptRestoreVirtualState($pid, $eid) == 0) {
786 787
	    # Must deal with the prerender explicitly since it runs background.
	    system("prerender -t $pid $eid");
788 789 790 791 792
	    fatal("Update aborted; old state restored.");
	}
	else {
	    $modifyHosed = 1;
	    fatal("Experiment state could not be restored!");
Chad Barb's avatar
Chad Barb committed
793
	}
Chad Barb's avatar
 
Chad Barb committed
794
    }
795
    
796
    TBExptClearBackupState($pid, $eid);
797
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
798
}
Chad Barb's avatar
 
Chad Barb committed
799
else { # $inout eq "restart" assumed.
800
    print STDOUT "Running 'tbrestart $pid $eid'\n";
801
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
802
	fatal("tbrestart failed!");
803
    }
804
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
805
}
806

807 808 809 810 811 812 813 814 815 816
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
817
system("cp -Rfp $workdir/ $userdir/tbdata/");
818

819 820 821 822
#
# Gather stats. 
#
if ($inout eq "in") {
823
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
824 825
}
elsif ($inout eq "out") {
826
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
827
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
828 829
}
elsif ($inout eq "modify") {
830
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
831
}
832 833
# Old accounting info.
TBSetExpSwapTime($pid, $eid);
834

835 836 837 838 839 840
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
841
# In batch mode, just exit without sending email or unlocking. The
842
# batch daemon will take care of that.
843 844 845 846 847
#
if ($batch) {
    exit(0);
}

848 849 850 851 852 853 854
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

855 856 857
#
# Must unlock before exit.
#
858
TBUnLockExp($pid, $eid);
859 860 861 862 863

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
864
TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
865 866 867

print "Swap Success!\n";

868 869 870 871
#
# Send email notification to user.
#
my $message =
872 873
    "Experiment $eid in project $pid has been ";

874
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
875
    $message .= "forcibly swapped out by\nEmulab";
876 877 878 879 880
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
881 882
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
883 884 885 886 887
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
888 889 890 891 892
}
else {
    $message .= "$action.\n";
}

893 894 895 896 897
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

898 899
$message .=
    "\n".
900 901
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
902 903

SENDMAIL("$user_name <$user_email>",
904
	 "Experiment $pid/$eid \u$action",
905
	 $message,
906
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
907 908
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
909 910
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
911 912 913 914 915 916

exit 0;

sub fatal($)
{
    my($mesg) = $_[0];
Chad Barb's avatar
Chad Barb committed
917

918 919
    print STDOUT "*** $0:\n".
	         "    $mesg\n";
920

921 922 923 924 925 926 927 928 929 930 931 932 933
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
    }

934 935 936 937 938 939 940
    #
    # Clear backup state since not needed anymore; experiment is toast. 
    # 
    if ($inout eq "modify") {
	TBExptClearBackupState($pid, $eid);
    }

Chad Barb's avatar
 
Chad Barb committed
941
    #
942
    # If hosed, we entirely terminate the experiment.
Chad Barb's avatar
 
Chad Barb committed
943
    #
944
    if ($modifyHosed) {
Chad Barb's avatar
 
Chad Barb committed
945
	#
946
	# Note: $estate is indeed still set appropriately!
Chad Barb's avatar
 
Chad Barb committed
947 948
	#
	if ($estate eq EXPTSTATE_ACTIVE) {
949
	    print "Running 'tbswap out -force $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
950 951 952 953
	    if (system("$tbdir/tbswap out -force $pid $eid") != 0) {
		print "tbswap out failed!\n";
	    }
	}
Chad Barb's avatar
Chad Barb committed
954

955
	print "Running 'tbend -force $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
956 957 958
	if (system("$tbdir/tbend -force $pid $eid") != 0) {
	    print "tbend failed!\n";
	}
959
	# Must override since we are so badly hosed. 
960
	$termswapstate = EXPTSTATE_TERMINATED;
Chad Barb's avatar
 
Chad Barb committed
961 962
    }

963 964 965
    # Copy over the log files so the user can see them.
    system("/bin/cp -Rfp $workdir/ $userdir/tbdata");

966 967 968
    # Set proper state, which is typically the way we came in.
    SetExpState($pid, $eid, $termswapstate);

969
    #
970
    # In batch mode, exit without sending the email or unlocking. The
971
    # batch daemon will take care of that.
972 973
    #
    if ($batch) {
974
	exit($errorstat);
975 976
    }

977
    #
Chad Barb's avatar
Chad Barb committed
978
    # Clear the log file so the web page stops spewing.
979 980 981 982 983
    #
    if (defined($logname)) {
	TBExptCloseLogFile($pid, $eid);
    }

984 985
    # Unlock and reset state to its terminal value.
    TBUnLockExp($pid, $eid);
986 987 988 989 990

    #
    # Clear the cancel flag now that the operation is complete. Must be done
    # after we change the experiment state (above).
    #
991
    TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
992

993 994 995 996
    #
    # Send a message to the testbed list. Append the logfile.
    #
    SENDMAIL("$user_name <$user_email>",
997
	     "Swap ${inout} Failure: $pid/$eid",
998
	     $mesg,
999
	     ($idleswap ? $TBOPS : "$user_name <$user_email>"),
1000
	     "Cc:  $expt_head_name <$expt_head_email>\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1001
	     "Cc:  $TBOPS",
1002
	     (($logname), (defined($modnsfile) ? ($modnsfile) : ())));
1003

Leigh B. Stoller's avatar
Leigh B. Stoller committed
1004
    if ($modifyHosed) {
Chad Barb's avatar
 
Chad Barb committed
1005 1006 1007 1008 1009 1010 1011 1012 1013
	#
	# Copy off the workdir to the user directory, Then back up both of
	# them for post-mortem debugging.
	#
	system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
	system("/bin/rm -rf  ${workdir}-failed");
	system("/bin/mv -f   $workdir ${workdir}-failed");
	system("/bin/rm -rf  ${userdir}-failed");
	system("/bin/mv -f   $userdir ${userdir}-failed");
Chad Barb's avatar
Chad Barb committed
1014
	TBExptDestroy($pid, $eid);
Chad Barb's avatar
 
Chad Barb committed
1015 1016
    }

1017
    exit($errorstat);
1018
}