swapexp.in 26.7 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10
use English;
use Getopt::Std;
11
use POSIX qw(setsid);
12 13

#
Chad Barb's avatar
Chad Barb committed
14
# This gets invoked from the Web interface.
Chad Barb's avatar
 
Chad Barb committed
15
# Swap an experiment in, swap it out, restart or modify.
16
#
Chad Barb's avatar
Chad Barb committed
17

18 19
sub usage()
{
20 21 22 23 24 25 26 27 28 29 30 31
    print(STDERR
	  "Usage: swapexp [-b | -w] [-i | -a | -f] [-r] [-e]\n".
	  "               <-s in | out | restart | modify | pause>\n".
	  "               <pid> <eid> [<nsfile>]\n".
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment swap/modify\n".
	  "-r       - reboot nodes when doing a modify experiment\n".
	  "-e       - restart event scheduler when doing a modify experiment\n".
	  "-s <op>  - Operation to perform; one of those listed above\n".
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n".
	  "<nsfile> - Optional NS file to parse for experiment modify\n");
32 33
    exit(-1);
}
34
my  $optlist = "biafres:w";
35

36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

60 61 62 63 64 65
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
66
my $TBINFO = "$TB/expinfo";
67
my $TBDOCBASE = "@TBDOCBASE@";
68 69 70 71 72 73 74 75 76

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

my $tbdir    = "$TB/bin/";
77
my $tbdata   = "tbdata";
78
my $batch    = 0;
79
my $idleswap = 0;
80 81
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
82
my $reboot   = 0;
83
my $waitmode = 0;
84
my $eventsys_restart   = 0;
85
my $errorstat= -1;
86
my $modifyHosed = 0;
Chad Barb's avatar
 
Chad Barb committed
87

88 89 90 91 92
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
93
my @allnodes;
94
my @row;
95
my $action;
96
my $nextswapstate;
97
my $termswapstate;
Chad Barb's avatar
 
Chad Barb committed
98

99 100 101
#
# Untaint the path
# 
102
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
103 104 105 106 107 108 109
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

110 111 112 113 114 115 116
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

117 118 119 120 121 122 123 124
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
125 126 127
if (defined($options{"i"})) {
    $idleswap = 1;
}
128 129 130
if (defined($options{"w"})) {
    $waitmode = 1;
}
131 132 133 134 135 136
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
137 138 139
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
 
Chad Barb committed
140 141 142
if (defined($options{"r"})) {
    $reboot = 1;
}
143 144 145
if (defined($options{"e"})) {
    $eventsys_restart = 1;
}
146 147 148
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
149 150 151
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
152
	$inout ne "pause"   &&
Chad Barb's avatar
 
Chad Barb committed
153
	$inout ne "modify") {
154 155 156 157 158 159 160
	usage();
    }
}
else {
    usage();
}

161 162 163 164 165
usage()
    if (($waitmode && $batch) ||
	($inout ne "modify" && @ARGV != 2) ||
	(($waitmode || $batch) && ($idleswap || $autoswap || $force)));

166 167 168 169 170
if ($eventsys_restart && $inout ne "modify") {
    print STDOUT "Usage: swapexp: -e (eventsys_restart) can be used ".
                 "only with -s modify\n";
    usage();
}
Chad Barb's avatar
 
Chad Barb committed
171 172 173
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

174 175 176
#
# Untaint the arguments.
#
177
if ($pid =~ /^([-\w\.]+)$/) {
178 179 180 181 182
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
183
if ($eid =~ /^([-\w\.]+)$/) {
184 185 186 187 188
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
189
my $repfile = "$eid.report";
190 191
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
192 193 194
my $tempnsfile;
my $modnsfile;

Leigh B. Stoller's avatar
Leigh B. Stoller committed
195
if ($inout eq "modify" && @ARGV > 2) {
196 197 198 199 200
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
201
    if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
202
	$tempnsfile = $1;
203 204
    }
    else {
205 206 207 208 209 210 211 212 213 214
	die("Tainted nsfile name: $tempnsfile\n");
    }
    #
    # Called from ops interactively. Make sure NS file in /proj or /users.
    #
    # Use realpath to resolve any symlinks.
    #
    my $translated = `realpath $tempnsfile`;
    if ($translated =~ /^([-\w\.\/]+)$/) {
	$tempnsfile = $1;
215
    }
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
    else {
	die("Tainted nsfile returned by realpath: $translated\n");
    }

    #
    # The file must reside in /proj, /groups, or /users. Since this script
    # runs as the caller, regular file permission checks ensure its a file
    # the user is allowed to use. /tmp/$guid-$nsref.nsfile also allowed
    # since this script is invoked directly from web interface, which generates
    # a name that should not be guessable, so as long as it looks to be in
    # proper format, we accept it. 
    #
    if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
	! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
	! ($tempnsfile =~ /^\/proj/) &&
	! ($tempnsfile =~ /^\/groups/) &&
	! ($tempnsfile =~ /^\/users/)) {
	die("$tempnsfile does not resolve to an appropriate directory!\n");
    }

    if (! -f $tempnsfile || -z $tempnsfile || ! -r $tempnsfile) {
237 238 239
	die("*** $0:\n".
	    "    $tempnsfile does not look like an NS file!\n");
    }
240 241
    $modnsfile = "$eid-modify.ns";
}
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}

#
Chad Barb's avatar
 
Chad Barb committed
260
# Verify that this person can muck with the experiment.
261 262 263 264 265
# Note that any script down the line has to do an admin check also. 
#
if ($UID && !TBAdmin($UID) &&
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
 
Chad Barb committed
266
	"    You do not have permission to swap or modify this experiment!\n");
267 268
}

269 270 271 272
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

273 274 275 276 277 278 279 280 281
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

282 283 284 285 286
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
287
# the tb scripts.
288 289 290 291 292 293 294 295 296 297 298 299 300
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
301
my $batchstate      = $hashrow{'batchstate'};
302
my $expt_path       = $hashrow{'path'};
303
my $expt_locked     = $hashrow{'expt_locked'};
304
my $isbatchexpt     = $hashrow{'batchmode'};
305
my $canceled        = $hashrow{'canceled'};
306 307 308 309 310 311 312 313 314 315
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
316

317 318
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
319
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
320 321 322
    $idleswaptime = $idleswap_time / 60.0;
}

323 324
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
325
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
326 327
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
328

329
if (! chdir($workdir)) {
330
    die("*** $0:\n".
331
	"    Could not chdir to $workdir: $!\n");
332 333
}

334
#
335 336 337
# This script is called from the batch daemon.
# 
if ($batch) {
338
    #
339 340 341
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be in proper state for the
    # operation requested. 
342
    #
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());

    if ($inout eq "in") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be QUEUED.\n")
	    if ($estate ne EXPTSTATE_QUEUED);
	
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid has been canceled! Aborting.\n")
	    if ($canceled);
    }
    elsif ($inout eq "out") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be ACTIVE.\n")
	    if ($estate ne EXPTSTATE_ACTIVE);
367 368
    }
    else {
369 370 371 372 373 374
	die("*** $0:\n".
	    "    Improper request from batch daemon for $pid/$eid!\n");
    }
}
else {
    if ($isbatchexpt) {
375 376 377 378
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
379 380
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
381
	    if ($canceled);
382

383
	if ($inout eq "in") {
384
	    ExitWithStatus(1,
385 386 387 388
			   "Batch experiment $pid/$eid must be SWAPPED to\n".
			   "QUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_SWAPPED);
	    SetExpState($pid, $eid, EXPTSTATE_QUEUED);
389 390
	}
	elsif ($inout eq "out") {
391
	    ExitWithStatus(1,
392 393 394 395
			   "Batch experiment $pid/$eid must be ACTIVE or\n".
			   "ACTIVATING to swap out. Currently $estate.")
		if ($estate ne EXPTSTATE_ACTIVE &&
		    $estate ne EXPTSTATE_ACTIVATING);
396 397 398 399 400

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
401
	    TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
402 403
	}
	elsif ($inout eq "pause") {
404
	    ExitWithStatus(1,
405 406 407
			   "Batch experiment $pid/$eid must be QUEUED to\n".
			   "DEQUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_QUEUED);
408 409

	    #
410 411 412 413
	    # XXX. The batch daemon might already have the experiment, but
	    # not have shipped it off to startexp. Change the state
	    # anyway. The error will be noticed later when startexp dies,
	    # and the batch daemon gets the error back. This sucks.
414
	    #
415
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
416
	}
417
	elsif ($inout eq "modify") {
418
	    ExitWithStatus(1,
419 420 421 422 423 424
			   "Batch experiment $pid/$eid must be SWAPPED or\n".
			   "ACTIVE to modify. Currently $estate.")
		if (($estate ne EXPTSTATE_SWAPPED &&
		     $estate ne EXPTSTATE_ACTIVATING) ||
		    $batchstate != BATCHSTATE_UNLOCKED());

425
	    #
426
	    # Otherwise, proceed with the modify. The experiment will be
427 428
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
429 430 431 432
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
433
	    #
434 435
	    goto doit;
	}
436 437
	else {
	    die("*** $0:\n",
438
		"    Operation $inout not allowed on a batch experiment!\n");
439
	}
440 441
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
442
      doit:
443
    }
444 445 446 447 448 449 450 451 452 453
    else {
	#
	# If the cancel flag is set, then user must wait for that to
	# clear before we can do anything else.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!.\n".
		       "You must wait for that to clear before you can swap\n".
		       "or modify the experiment.\n")
	    if ($canceled);
454

455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
	#
	# Check the state for the various operations.
	#
	if (!$force) {
	  SWITCH: for ($inout) {
	      /^in$/i && do {
		  if ($estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped out!");
		  }
		  last SWITCH;
	      };
	      /^out$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_ACTIVATING()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in ".
				     "or activating!\n");
		  }
		  
		  if ($estate eq EXPTSTATE_ACTIVATING()) {
		      #
		      # All we can do is set the cancel flag and hope that
		      # it gets noticed. We do not wait. 
		      # 
		      TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
		      
		      ExitWithStatus(0,
				     "Experiment $pid/$eid swapin has been  ".
				     "marked for cancelation.\n".
				     "You will receive email when the original ".
				     "swap request has finished.");
		  }
		  last SWITCH;
	      };
	      /^restart$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in!");
		  }
		  last SWITCH;
	      };
	      /^modify$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid must be ACTIVE or\n".
				     "SWAPPED to modify!\n");
		  }
		  last SWITCH;
	      };
	      die("*** $0:\n".
		  "    Missing state check for action: $action\n");
	  }
509 510
	}
    }
511 512
}

513 514 515 516 517 518 519
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
520
	$nextswapstate = EXPTSTATE_ACTIVATING();
521 522 523
	last SWITCH;
    };
    /^out$/i && do {
524
	$nextswapstate = EXPTSTATE_SWAPPING();
525 526 527
	last SWITCH;
    };
    /^restart$/i && do {
528
	$nextswapstate = EXPTSTATE_RESTARTING();
529 530 531
	last SWITCH;
    };
    /^modify$/i && do {
532 533
	$nextswapstate = (($estate eq EXPTSTATE_SWAPPED()) ?
			  EXPTSTATE_MODIFY_PARSE() : EXPTSTATE_MODIFY_REPARSE());
534 535
	last SWITCH;
    };
536
    die("*** $0:\n".
537
	"    Missing state check for action: $action\n");
538
}
539 540
 
# Update idleswap_timeout to whatever the current value is.
541
if ($inout ne "out") {
542 543 544
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
545

546 547 548 549 550 551 552
#
# On a failure, we go back to this swapstate. Might be modified below.
# 
$termswapstate = $estate;

# Lock the record, set the nextstate, and unlock the table.
TBLockExp($pid, $eid, $nextswapstate);
553 554 555 556
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
557 558 559 560
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
561 562
#

563 564 565 566 567 568 569 570 571
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
 
Chad Barb committed
572 573 574
if ($inout eq "modify") {
    $action = "modified";
}
575

576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

591 592 593
#
# Before going to background, we have to copy out the NS file!
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
594
if ($inout eq "modify" && defined($modnsfile)) {
595 596 597 598 599 600 601 602
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
	die("*** $0:\n".
	    "    Could not copy $tempnsfile to $modnsfile");
    }
    chmod(0664, "$modnsfile");
}

603 604 605 606
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
607
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
608
    TBExptSetLogFile($pid, $eid, $logname);
609
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
610

611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
    if (my $childpid = TBBackGround($logname)) {
	#
	# Parent exits normally, except if in waitmode. 
	#
	if (!$waitmode) {
	    print "Experiment $pid/$eid is now being $action.\n".
		"You will be notified via email when the this is done.\n";
	    exit(0);
	}
	print("Waiting for experiment $eid to finish its swap${action}\n");
	print("You may type ^C at anytime; you will be notified via email;\n".
	      "later; you will not actually interrupt the experiment itself.\n");
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

635
	#
636
	# Wait until child exits or until user gets bored and types ^C.
637
	#
638 639 640 641
	waitpid($childpid, 0);
	
	print("Done. Exited with status: $?\n");
	exit($? >> 8);
642 643 644
    }
}

645 646 647 648 649 650 651 652
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

669 670 671
#
# Remove old report file since its contents are going to be invalid.
#
672
if ($inout ne "restart" && -e $repfile) {
673 674 675
    unlink("$repfile");
}

676 677 678 679
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
680 681 682 683
    my $optarg = (($force || $idleswap) ? "-force" : "");
    
    print STDOUT "Running 'tbswap out $optarg $pid $eid'\n";
    if (system("$tbdir/tbswap out $optarg $pid $eid") != 0) {
684
	$errorstat = $? >> 8;
685
	fatal("tbswap out failed!");
686
    }
687
    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
688
}
689
elsif ($inout eq "in") {
690
    print STDOUT "Running 'tbswap in $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
691
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
692
	$errorstat = $? >> 8;
693
	fatal("tbswap in failed!");
694
    }
695
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
696

697
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
698
}
Chad Barb's avatar
 
Chad Barb committed
699
elsif ($inout eq "modify") {
Chad Barb's avatar
Chad Barb committed
700
    my $modifyError = "";
701
    my $oldstate    = $estate;
Chad Barb's avatar
Chad Barb committed
702

703 704 705
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
706
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
707
    if (TBExptBackupVirtualState($pid, $eid)) {
708
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
709 710 711
    }

    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
712 713
    # Rerun tbprerun if modifying, but only if new NS file provided.
    # Yep, we allow reswap without changing the NS file. For Shashi and SIM. 
Chad Barb's avatar
Chad Barb committed
714
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
715 716 717 718 719
    if (defined($modnsfile)) {
	print STDOUT "Running 'tbprerun $pid $eid $modnsfile'\n";
	if (system("$tbdir/tbprerun $pid $eid $modnsfile") != 0) {
	    $modifyError = "tbprerun failed!";
	}
Chad Barb's avatar
Chad Barb committed
720 721
    }

Chad Barb's avatar
 
Chad Barb committed
722
    #
723
    # Our next state depends on whether the experiment was active or swapped.
Chad Barb's avatar
 
Chad Barb committed
724
    #
725 726 727
    if (! $modifyError) {
	if ($estate eq EXPTSTATE_SWAPPED) {
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
Chad Barb's avatar
 
Chad Barb committed
728
	}
729 730 731 732
	else {
	    SetExpState($pid, $eid, EXPTSTATE_MODIFY_RESWAP);
	    
	    my $optarg = ($reboot ? "-reboot" : "");
733
	    $optarg .= ($eventsys_restart ? " -eventsys_restart" : "");
734 735 736 737 738 739

	    print STDOUT "Running 'tbswap update $optarg $pid $eid'\n";
	    if (system("$tbdir/tbswap update $optarg $pid $eid") != 0) {
		$errorstat = $? >> 8;
		$modifyError = "tbswap update failed!";
	    }
Chad Barb's avatar
 
Chad Barb committed
740

741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
	    #
	    # See what tbswap did. It might have swapped it out if there
	    # was an error. 
	    # 
	    if (! $modifyError) {
		SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
		$estate = EXPTSTATE_ACTIVE;
	    }
	    elsif ($errorstat & 0x40) {
		#
		# Icky. Magic return code that says tbswap swapped it out.
		# We do not want tbswap to muck with states anymore, so
		# need to know what it did. At some point we should clean
		# up the exit reporting! Anyway, fatal() needs to know the
		# the right state to go back to (no longer ACTIVE).
		#
		$estate = EXPTSTATE_SWAPPED;
		$termswapstate = EXPTSTATE_SWAPPED;
                # Old accounting info.
		TBSetExpSwapTime($pid, $eid);
Chad Barb's avatar
 
Chad Barb committed
761
	    }
762
	}
Chad Barb's avatar
Chad Barb committed
763 764 765
    }

    if ($modifyError) {
766
	print STDOUT "Modify Error: $modifyError\n";
Chad Barb's avatar
Chad Barb committed
767
	print STDOUT "Recovering experiment state...\n";
768
	
769 770
	# Must deal with the prerender explicitly since it runs background.
	system("prerender -r $pid $eid");
771
	TBExptRemoveVirtualState($pid, $eid);
772
	
773
	if (TBExptRestoreVirtualState($pid, $eid) == 0) {
774 775
	    # Must deal with the prerender explicitly since it runs background.
	    system("prerender -t $pid $eid");
776 777 778 779 780
	    fatal("Update aborted; old state restored.");
	}
	else {
	    $modifyHosed = 1;
	    fatal("Experiment state could not be restored!");
Chad Barb's avatar
Chad Barb committed
781
	}
Chad Barb's avatar
 
Chad Barb committed
782
    }
783
    
784
    TBExptClearBackupState($pid, $eid);
785
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
786
}
Chad Barb's avatar
 
Chad Barb committed
787
else { # $inout eq "restart" assumed.
788
    print STDOUT "Running 'tbrestart $pid $eid'\n";
789
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
790
	fatal("tbrestart failed!");
791
    }
792
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
793
}
794

795 796 797 798 799 800 801 802 803 804
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
805
system("cp -Rfp $workdir/ $userdir/tbdata/");
806

807 808 809 810
#
# Gather stats. 
#
if ($inout eq "in") {
811
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
812 813
}
elsif ($inout eq "out") {
814
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
815
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
816 817
}
elsif ($inout eq "modify") {
818
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
819
}
820 821
# Old accounting info.
TBSetExpSwapTime($pid, $eid);
822

823 824 825 826 827 828
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
829
# In batch mode, just exit without sending email or unlocking. The
830
# batch daemon will take care of that.
831 832 833 834 835
#
if ($batch) {
    exit(0);
}

836 837 838 839 840 841 842
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

843 844 845
#
# Must unlock before exit.
#
846
TBUnLockExp($pid, $eid);
847 848 849 850 851

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
852
TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
853 854 855

print "Swap Success!\n";

856 857 858 859
#
# Send email notification to user.
#
my $message =
860 861
    "Experiment $eid in project $pid has been ";

862
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
863
    $message .= "forcibly swapped out by\nEmulab";
864 865 866 867 868
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
869 870
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
871 872 873 874 875
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
876 877 878 879 880
}
else {
    $message .= "$action.\n";
}

881 882 883 884 885
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

886 887
$message .=
    "\n".
888 889
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
890 891

SENDMAIL("$user_name <$user_email>",
892
	 "Experiment $pid/$eid \u$action",
893
	 $message,
894
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
895 896
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
897 898
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
899 900 901 902 903 904

exit 0;

sub fatal($)
{
    my($mesg) = $_[0];
Chad Barb's avatar
Chad Barb committed
905

906 907
    print STDOUT "*** $0:\n".
	         "    $mesg\n";
908

909 910 911 912 913 914 915 916 917 918 919 920 921
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
    }

922 923 924 925 926 927 928
    #
    # Clear backup state since not needed anymore; experiment is toast. 
    # 
    if ($inout eq "modify") {
	TBExptClearBackupState($pid, $eid);
    }

Chad Barb's avatar
 
Chad Barb committed
929
    #
930
    # If hosed, we entirely terminate the experiment.
Chad Barb's avatar
 
Chad Barb committed
931
    #
932
    if ($modifyHosed) {
Chad Barb's avatar
 
Chad Barb committed
933
	#
934
	# Note: $estate is indeed still set appropriately!
Chad Barb's avatar
 
Chad Barb committed
935 936
	#
	if ($estate eq EXPTSTATE_ACTIVE) {
937
	    print "Running 'tbswap out -force $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
938 939 940 941
	    if (system("$tbdir/tbswap out -force $pid $eid") != 0) {
		print "tbswap out failed!\n";
	    }
	}
Chad Barb's avatar
Chad Barb committed
942

943
	print "Running 'tbend -force $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
944 945 946
	if (system("$tbdir/tbend -force $pid $eid") != 0) {
	    print "tbend failed!\n";
	}
947
	# Must override since we are so badly hosed. 
948
	$termswapstate = EXPTSTATE_TERMINATED;
Chad Barb's avatar
 
Chad Barb committed
949 950
    }

951 952 953
    # Copy over the log files so the user can see them.
    system("/bin/cp -Rfp $workdir/ $userdir/tbdata");

954 955 956
    # Set proper state, which is typically the way we came in.
    SetExpState($pid, $eid, $termswapstate);

957
    #
958
    # In batch mode, exit without sending the email or unlocking. The
959
    # batch daemon will take care of that.
960 961
    #
    if ($batch) {
962
	exit($errorstat);
963 964
    }

965
    #
Chad Barb's avatar
Chad Barb committed
966
    # Clear the log file so the web page stops spewing.
967 968 969 970 971
    #
    if (defined($logname)) {
	TBExptCloseLogFile($pid, $eid);
    }

972 973
    # Unlock and reset state to its terminal value.
    TBUnLockExp($pid, $eid);
974 975 976 977 978

    #
    # Clear the cancel flag now that the operation is complete. Must be done
    # after we change the experiment state (above).
    #
979
    TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
980

981 982 983 984
    #
    # Send a message to the testbed list. Append the logfile.
    #
    SENDMAIL("$user_name <$user_email>",
985
	     "Swap ${inout} Failure: $pid/$eid",
986
	     $mesg,
987
	     ($idleswap ? $TBOPS : "$user_name <$user_email>"),
988
	     "Cc:  $expt_head_name <$expt_head_email>\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
989
	     "Cc:  $TBOPS",
990
	     (($logname), (defined($modnsfile) ? ($modnsfile) : ())));
991

Leigh B. Stoller's avatar
Leigh B. Stoller committed
992
    if ($modifyHosed) {
Chad Barb's avatar
 
Chad Barb committed
993 994 995 996 997 998 999 1000 1001
	#
	# Copy off the workdir to the user directory, Then back up both of
	# them for post-mortem debugging.
	#
	system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
	system("/bin/rm -rf  ${workdir}-failed");
	system("/bin/mv -f   $workdir ${workdir}-failed");
	system("/bin/rm -rf  ${userdir}-failed");
	system("/bin/mv -f   $userdir ${userdir}-failed");
Chad Barb's avatar
Chad Barb committed
1002
	TBExptDestroy($pid, $eid);
Chad Barb's avatar
 
Chad Barb committed
1003 1004
    }

1005
    exit($errorstat);
1006
}