swapexp.in 28.4 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10
use English;
use Getopt::Std;
11
use POSIX qw(isatty setsid);
12 13

#
Chad Barb's avatar
Chad Barb committed
14
# This gets invoked from the Web interface.
Chad Barb's avatar
 
Chad Barb committed
15
# Swap an experiment in, swap it out, restart or modify.
16
#
Chad Barb's avatar
Chad Barb committed
17

18 19
sub usage()
{
20
    print(STDERR
21
	  "Usage: swapexp [-q] [-b | -w] [-i | -a | -f] [-r] [-e]\n".
22 23 24 25
	  "               <-s in | out | restart | modify | pause>\n".
	  "               <pid> <eid> [<nsfile>]\n".
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment swap/modify\n".
26
	  "-q       - be less chatty\n".
27 28 29 30 31 32
	  "-r       - reboot nodes when doing a modify experiment\n".
	  "-e       - restart event scheduler when doing a modify experiment\n".
	  "-s <op>  - Operation to perform; one of those listed above\n".
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n".
	  "<nsfile> - Optional NS file to parse for experiment modify\n");
33 34
    exit(-1);
}
35
my  $optlist = "biafres:wq";
36

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

61 62 63 64 65 66
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
67
my $TBINFO = "$TB/expinfo";
68
my $TBDOCBASE = "@TBDOCBASE@";
69
my $TBBASE = "@TBBASE@";
70 71 72 73 74 75 76 77 78

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

my $tbdir    = "$TB/bin/";
79
my $tbdata   = "tbdata";
80
my $batch    = 0;
81
my $idleswap = 0;
82 83
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
84
my $reboot   = 0;
85
my $waitmode = 0;
86
my $quiet    = 0;
87
my $eventsys_restart   = 0;
88
my $errorstat= -1;
89
my $modifyHosed = 0;
Chad Barb's avatar
 
Chad Barb committed
90

91 92 93 94 95
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
96
my @allnodes;
97
my @row;
98
my $action;
99
my $nextswapstate;
100
my $termswapstate;
Chad Barb's avatar
 
Chad Barb committed
101

102 103 104
#
# Untaint the path
# 
105
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
106 107 108 109 110 111 112
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

113 114 115 116 117 118 119
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

120 121 122 123 124 125 126 127
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
128 129 130
if (defined($options{"i"})) {
    $idleswap = 1;
}
131 132 133
if (defined($options{"w"})) {
    $waitmode = 1;
}
134 135 136 137 138 139
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
140 141 142
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
 
Chad Barb committed
143 144 145
if (defined($options{"r"})) {
    $reboot = 1;
}
146 147 148
if (defined($options{"e"})) {
    $eventsys_restart = 1;
}
149 150 151
if (defined($options{"q"})) {
    $quiet = 1;
}
152 153 154
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
155 156 157
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
158
	$inout ne "pause"   &&
Chad Barb's avatar
 
Chad Barb committed
159
	$inout ne "modify") {
160 161 162 163 164 165 166
	usage();
    }
}
else {
    usage();
}

167 168 169 170 171
usage()
    if (($waitmode && $batch) ||
	($inout ne "modify" && @ARGV != 2) ||
	(($waitmode || $batch) && ($idleswap || $autoswap || $force)));

172 173 174 175 176
if ($eventsys_restart && $inout ne "modify") {
    print STDOUT "Usage: swapexp: -e (eventsys_restart) can be used ".
                 "only with -s modify\n";
    usage();
}
Chad Barb's avatar
 
Chad Barb committed
177 178 179
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

180 181 182
#
# Untaint the arguments.
#
183
if ($pid =~ /^([-\w\.]+)$/) {
184 185 186 187 188
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
189
if ($eid =~ /^([-\w\.]+)$/) {
190 191 192 193 194
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
195
my $repfile = "$eid.report";
196 197
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
198 199 200
my $tempnsfile;
my $modnsfile;

Leigh B. Stoller's avatar
Leigh B. Stoller committed
201
if ($inout eq "modify" && @ARGV > 2) {
202 203 204 205 206
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
207
    if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
208
	$tempnsfile = $1;
209 210
    }
    else {
211 212 213 214 215 216 217 218 219 220
	die("Tainted nsfile name: $tempnsfile\n");
    }
    #
    # Called from ops interactively. Make sure NS file in /proj or /users.
    #
    # Use realpath to resolve any symlinks.
    #
    my $translated = `realpath $tempnsfile`;
    if ($translated =~ /^([-\w\.\/]+)$/) {
	$tempnsfile = $1;
221
    }
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
    else {
	die("Tainted nsfile returned by realpath: $translated\n");
    }

    #
    # The file must reside in /proj, /groups, or /users. Since this script
    # runs as the caller, regular file permission checks ensure its a file
    # the user is allowed to use. /tmp/$guid-$nsref.nsfile also allowed
    # since this script is invoked directly from web interface, which generates
    # a name that should not be guessable, so as long as it looks to be in
    # proper format, we accept it. 
    #
    if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
	! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
	! ($tempnsfile =~ /^\/proj/) &&
	! ($tempnsfile =~ /^\/groups/) &&
	! ($tempnsfile =~ /^\/users/)) {
	die("$tempnsfile does not resolve to an appropriate directory!\n");
    }

    if (! -f $tempnsfile || -z $tempnsfile || ! -r $tempnsfile) {
243 244 245
	die("*** $0:\n".
	    "    $tempnsfile does not look like an NS file!\n");
    }
246 247
    $modnsfile = "$eid-modify.ns";
}
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}

#
Chad Barb's avatar
 
Chad Barb committed
266
# Verify that this person can muck with the experiment.
267 268 269 270 271
# Note that any script down the line has to do an admin check also. 
#
if ($UID && !TBAdmin($UID) &&
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
 
Chad Barb committed
272
	"    You do not have permission to swap or modify this experiment!\n");
273 274
}

275 276 277 278
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

279 280 281 282 283 284 285 286 287
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

288 289 290 291 292
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
293
# the tb scripts.
294 295 296 297 298 299 300 301 302 303 304 305 306
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
307
my $batchstate      = $hashrow{'batchstate'};
308
my $expt_path       = $hashrow{'path'};
309
my $expt_locked     = $hashrow{'expt_locked'};
310
my $isbatchexpt     = $hashrow{'batchmode'};
311
my $canceled        = $hashrow{'canceled'};
312
my $linktest_level  = $hashrow{'linktest_level'};
313 314 315 316 317 318 319 320 321 322
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
323

324 325
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
326
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
327 328 329
    $idleswaptime = $idleswap_time / 60.0;
}

330 331
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
332
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
333 334
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
335

336
if (! chdir($workdir)) {
337
    die("*** $0:\n".
338
	"    Could not chdir to $workdir: $!\n");
339 340
}

341
#
342 343 344
# This script is called from the batch daemon.
# 
if ($batch) {
345
    #
346 347 348
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be in proper state for the
    # operation requested. 
349
    #
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());

    if ($inout eq "in") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be QUEUED.\n")
	    if ($estate ne EXPTSTATE_QUEUED);
	
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid has been canceled! Aborting.\n")
	    if ($canceled);
    }
    elsif ($inout eq "out") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be ACTIVE.\n")
	    if ($estate ne EXPTSTATE_ACTIVE);
374 375
    }
    else {
376 377 378 379 380 381
	die("*** $0:\n".
	    "    Improper request from batch daemon for $pid/$eid!\n");
    }
}
else {
    if ($isbatchexpt) {
382 383 384 385
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
386 387
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
388
	    if ($canceled);
389

390
	if ($inout eq "in") {
391
	    ExitWithStatus(1,
392 393 394 395
			   "Batch experiment $pid/$eid must be SWAPPED to\n".
			   "QUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_SWAPPED);
	    SetExpState($pid, $eid, EXPTSTATE_QUEUED);
396 397
	}
	elsif ($inout eq "out") {
398
	    ExitWithStatus(1,
399 400 401 402
			   "Batch experiment $pid/$eid must be ACTIVE or\n".
			   "ACTIVATING to swap out. Currently $estate.")
		if ($estate ne EXPTSTATE_ACTIVE &&
		    $estate ne EXPTSTATE_ACTIVATING);
403 404 405 406 407

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
408
	    TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
409 410
	}
	elsif ($inout eq "pause") {
411
	    ExitWithStatus(1,
412 413 414
			   "Batch experiment $pid/$eid must be QUEUED to\n".
			   "DEQUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_QUEUED);
415 416

	    #
417 418 419 420
	    # XXX. The batch daemon might already have the experiment, but
	    # not have shipped it off to startexp. Change the state
	    # anyway. The error will be noticed later when startexp dies,
	    # and the batch daemon gets the error back. This sucks.
421
	    #
422
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
423
	}
424
	elsif ($inout eq "modify") {
425
	    ExitWithStatus(1,
426 427 428 429 430 431
			   "Batch experiment $pid/$eid must be SWAPPED or\n".
			   "ACTIVE to modify. Currently $estate.")
		if (($estate ne EXPTSTATE_SWAPPED &&
		     $estate ne EXPTSTATE_ACTIVATING) ||
		    $batchstate != BATCHSTATE_UNLOCKED());

432
	    #
433
	    # Otherwise, proceed with the modify. The experiment will be
434 435
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
436 437 438 439
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
440
	    #
441 442
	    goto doit;
	}
443 444
	else {
	    die("*** $0:\n",
445
		"    Operation $inout not allowed on a batch experiment!\n");
446
	}
447 448
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
449
      doit:
450
    }
451 452 453 454 455 456 457 458 459 460
    else {
	#
	# If the cancel flag is set, then user must wait for that to
	# clear before we can do anything else.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!.\n".
		       "You must wait for that to clear before you can swap\n".
		       "or modify the experiment.\n")
	    if ($canceled);
461

462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
	#
	# Check the state for the various operations.
	#
	if (!$force) {
	  SWITCH: for ($inout) {
	      /^in$/i && do {
		  if ($estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped out!");
		  }
		  last SWITCH;
	      };
	      /^out$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_ACTIVATING()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in ".
				     "or activating!\n");
		  }
		  
		  if ($estate eq EXPTSTATE_ACTIVATING()) {
		      #
		      # All we can do is set the cancel flag and hope that
		      # it gets noticed. We do not wait. 
		      # 
		      TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
		      
		      ExitWithStatus(0,
				     "Experiment $pid/$eid swapin has been  ".
				     "marked for cancelation.\n".
				     "You will receive email when the original ".
				     "swap request has finished.");
		  }
		  last SWITCH;
	      };
	      /^restart$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in!");
		  }
		  last SWITCH;
	      };
	      /^modify$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid must be ACTIVE or\n".
				     "SWAPPED to modify!\n");
		  }
		  last SWITCH;
	      };
	      die("*** $0:\n".
		  "    Missing state check for action: $action\n");
	  }
516 517
	}
    }
518 519
}

520 521 522 523 524 525 526
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
527
	$nextswapstate = EXPTSTATE_ACTIVATING();
528 529 530
	last SWITCH;
    };
    /^out$/i && do {
531
	$nextswapstate = EXPTSTATE_SWAPPING();
532 533 534
	last SWITCH;
    };
    /^restart$/i && do {
535
	$nextswapstate = EXPTSTATE_RESTARTING();
536 537 538
	last SWITCH;
    };
    /^modify$/i && do {
539 540
	$nextswapstate = (($estate eq EXPTSTATE_SWAPPED()) ?
			  EXPTSTATE_MODIFY_PARSE() : EXPTSTATE_MODIFY_REPARSE());
541 542
	last SWITCH;
    };
543
    die("*** $0:\n".
544
	"    Missing state check for action: $action\n");
545
}
546 547
 
# Update idleswap_timeout to whatever the current value is.
548
if ($inout ne "out") {
549 550 551
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
552

553 554 555 556 557 558 559
#
# On a failure, we go back to this swapstate. Might be modified below.
# 
$termswapstate = $estate;

# Lock the record, set the nextstate, and unlock the table.
TBLockExp($pid, $eid, $nextswapstate);
560 561 562 563
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
564 565 566 567
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
568 569
#

570 571 572 573 574 575 576 577 578
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
 
Chad Barb committed
579 580 581
if ($inout eq "modify") {
    $action = "modified";
}
582

583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

598 599 600
#
# Before going to background, we have to copy out the NS file!
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
601
if ($inout eq "modify" && defined($modnsfile)) {
602 603 604 605 606 607 608 609
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
	die("*** $0:\n".
	    "    Could not copy $tempnsfile to $modnsfile");
    }
    chmod(0664, "$modnsfile");
}

610 611 612 613
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
614
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
615
    TBExptSetLogFile($pid, $eid, $logname);
616
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
617

618 619 620 621 622
    if (my $childpid = TBBackGround($logname)) {
	#
	# Parent exits normally, except if in waitmode. 
	#
	if (!$waitmode) {
623 624 625
	    print("Experiment $pid/$eid is now being $action.\n".
		  "You will be notified via email when the this is done.\n")
		if (! $quiet);
626 627
	    exit(0);
	}
628 629 630 631 632 633 634 635
	print("Waiting for experiment $eid to finish its swap${action}\n")
	    if (! $quiet);
	    
	if (isatty(STDIN) && !$quiet) {
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
	}
636 637 638 639 640 641 642 643 644 645 646 647
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

648
	#
649
	# Wait until child exits or until user gets bored and types ^C.
650
	#
651 652
	waitpid($childpid, 0);
	
653 654
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
655
	exit($? >> 8);
656 657 658
    }
}

659 660 661 662 663 664 665 666
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

683 684 685
#
# Remove old report file since its contents are going to be invalid.
#
686
if ($inout ne "restart" && -e $repfile) {
687 688 689
    unlink("$repfile");
}

690 691 692 693
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
694 695 696 697
    my $optarg = (($force || $idleswap) ? "-force" : "");
    
    print STDOUT "Running 'tbswap out $optarg $pid $eid'\n";
    if (system("$tbdir/tbswap out $optarg $pid $eid") != 0) {
698
	$errorstat = $? >> 8;
699
	fatal("tbswap out failed!");
700
    }
701
    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
702
}
703
elsif ($inout eq "in") {
704
    print STDOUT "Running 'tbswap in $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
705
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
706
	$errorstat = $? >> 8;
707
	fatal("tbswap in failed!");
708
    }
709
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
710

711
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
712
}
Chad Barb's avatar
 
Chad Barb committed
713
elsif ($inout eq "modify") {
Chad Barb's avatar
Chad Barb committed
714
    my $modifyError = "";
715
    my $oldstate    = $estate;
Chad Barb's avatar
Chad Barb committed
716

717 718 719
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
720
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
721
    if (TBExptBackupVirtualState($pid, $eid)) {
722
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
723 724 725
    }

    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
726 727
    # Rerun tbprerun if modifying, but only if new NS file provided.
    # Yep, we allow reswap without changing the NS file. For Shashi and SIM. 
Chad Barb's avatar
Chad Barb committed
728
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
729 730 731 732 733
    if (defined($modnsfile)) {
	print STDOUT "Running 'tbprerun $pid $eid $modnsfile'\n";
	if (system("$tbdir/tbprerun $pid $eid $modnsfile") != 0) {
	    $modifyError = "tbprerun failed!";
	}
Chad Barb's avatar
Chad Barb committed
734 735
    }

Chad Barb's avatar
 
Chad Barb committed
736
    #
737
    # Our next state depends on whether the experiment was active or swapped.
Chad Barb's avatar
 
Chad Barb committed
738
    #
739 740 741
    if (! $modifyError) {
	if ($estate eq EXPTSTATE_SWAPPED) {
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
Chad Barb's avatar
 
Chad Barb committed
742
	}
743 744 745 746
	else {
	    SetExpState($pid, $eid, EXPTSTATE_MODIFY_RESWAP);
	    
	    my $optarg = ($reboot ? "-reboot" : "");
747
	    $optarg .= ($eventsys_restart ? " -eventsys_restart" : "");
748 749 750 751 752 753

	    print STDOUT "Running 'tbswap update $optarg $pid $eid'\n";
	    if (system("$tbdir/tbswap update $optarg $pid $eid") != 0) {
		$errorstat = $? >> 8;
		$modifyError = "tbswap update failed!";
	    }
Chad Barb's avatar
 
Chad Barb committed
754

755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
	    #
	    # See what tbswap did. It might have swapped it out if there
	    # was an error. 
	    # 
	    if (! $modifyError) {
		SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
		$estate = EXPTSTATE_ACTIVE;
	    }
	    elsif ($errorstat & 0x40) {
		#
		# Icky. Magic return code that says tbswap swapped it out.
		# We do not want tbswap to muck with states anymore, so
		# need to know what it did. At some point we should clean
		# up the exit reporting! Anyway, fatal() needs to know the
		# the right state to go back to (no longer ACTIVE).
		#
		$estate = EXPTSTATE_SWAPPED;
		$termswapstate = EXPTSTATE_SWAPPED;
                # Old accounting info.
		TBSetExpSwapTime($pid, $eid);
Chad Barb's avatar
 
Chad Barb committed
775
	    }
776
	}
Chad Barb's avatar
Chad Barb committed
777 778 779
    }

    if ($modifyError) {
780
	print STDOUT "Modify Error: $modifyError\n";
Chad Barb's avatar
Chad Barb committed
781
	print STDOUT "Recovering experiment state...\n";
782
	
783 784
	# Must deal with the prerender explicitly since it runs background.
	system("prerender -r $pid $eid");
785
	TBExptRemoveVirtualState($pid, $eid);
786
	
787
	if (TBExptRestoreVirtualState($pid, $eid) == 0) {
788 789
	    # Must deal with the prerender explicitly since it runs background.
	    system("prerender -t $pid $eid");
790 791 792 793 794
	    fatal("Update aborted; old state restored.");
	}
	else {
	    $modifyHosed = 1;
	    fatal("Experiment state could not be restored!");
Chad Barb's avatar
Chad Barb committed
795
	}
Chad Barb's avatar
 
Chad Barb committed
796
    }
797
    
798
    TBExptClearBackupState($pid, $eid);
799
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
800
}
Chad Barb's avatar
 
Chad Barb committed
801
else { # $inout eq "restart" assumed.
802
    print STDOUT "Running 'tbrestart $pid $eid'\n";
803
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
804
	fatal("tbrestart failed!");
805
    }
806
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
807
}
808

809 810 811 812 813 814 815 816 817 818
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
819
system("cp -Rfp $workdir/ $userdir/tbdata/");
820

821 822 823 824 825 826 827 828 829 830 831 832 833 834
#
# Deal with linktest. If requested, swapping in or modifying, and experiment
# is indeed active, then run it!
#
# XXX - linktest uses files from $userdir/tbdata/, so the above cp must
#       happen first!
#
if ($linktest_level && ExpState($pid, $eid) eq EXPTSTATE_ACTIVE) {
    #
    # Run it. No worries about failures.
    #
    my $output = "linktest.$$";
    my $optarg = "-l $linktest_level -o $output";
    
835 836
    print STDOUT "Running 'linktest_control $optarg $pid $eid'\n";
    if (system("$TB/sbin/linktest_control $optarg $pid $eid") != 0) {
837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853
	system("cat $output")
	    if (-r $output);
	
	print STDERR "*** WARNING: ".
	             "Linktest run returned non-zero status!\n";
	
	SENDMAIL("$user_name <$user_email>",
		 "Linktest Failure: $pid/$eid",
		 "Failure in linktest (level $linktest_level); ".
		 "returned non-zero status",
		 "$user_name <$user_email>",
		 "Cc: $expt_head_name <$expt_head_email>\n".
		 "Cc: $TBOPS",
		 ($output));
    }
}

854 855 856 857
#
# Gather stats. 
#
if ($inout eq "in") {
858
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
859 860
}
elsif ($inout eq "out") {
861
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
862
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
863 864
}
elsif ($inout eq "modify") {
865
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
866
}
867 868
# Old accounting info.
TBSetExpSwapTime($pid, $eid);
869

870 871 872 873 874 875
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
876
# In batch mode, just exit without sending email or unlocking. The
877
# batch daemon will take care of that.
878 879 880 881 882
#
if ($batch) {
    exit(0);
}

883 884 885 886 887 888 889
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

890 891 892
#
# Must unlock before exit.
#
893
TBUnLockExp($pid, $eid);
894 895 896 897 898

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
899
TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
900 901 902

print "Swap Success!\n";

903 904 905 906
#
# Send email notification to user.
#
my $message =
907 908
    "Experiment $eid in project $pid has been ";

909
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
910
    $message .= "forcibly swapped out by\nEmulab";
911 912 913 914 915
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
916 917
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
918 919 920 921 922
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
923 924 925 926 927
}
else {
    $message .= "$action.\n";
}

928 929 930 931 932
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

933 934
$message .=
    "\n".
935 936
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
937 938

SENDMAIL("$user_name <$user_email>",
939
	 "Experiment $pid/$eid \u$action",
940
	 $message,
941
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
942 943
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
944 945
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
946 947 948 949 950 951

exit 0;

sub fatal($)
{
    my($mesg) = $_[0];
Chad Barb's avatar
Chad Barb committed
952

953 954
    print STDOUT "*** $0:\n".
	         "    $mesg\n";
955

956 957 958 959 960 961 962 963 964 965 966 967 968
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
    }

969 970 971 972 973 974 975
    #
    # Clear backup state since not needed anymore; experiment is toast. 
    # 
    if ($inout eq "modify") {
	TBExptClearBackupState($pid, $eid);
    }

Chad Barb's avatar
 
Chad Barb committed
976
    #
977
    # If hosed, we entirely terminate the experiment.
Chad Barb's avatar
 
Chad Barb committed
978
    #
979
    if ($modifyHosed) {
Chad Barb's avatar
 
Chad Barb committed
980
	#
981
	# Note: $estate is indeed still set appropriately!
Chad Barb's avatar
 
Chad Barb committed
982 983
	#
	if ($estate eq EXPTSTATE_ACTIVE) {
984
	    print "Running 'tbswap out -force $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
985 986 987 988
	    if (system("$tbdir/tbswap out -force $pid $eid") != 0) {
		print "tbswap out failed!\n";
	    }
	}
Chad Barb's avatar
Chad Barb committed
989

990
	print "Running 'tbend -force $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
991 992 993
	if (system("$tbdir/tbend -force $pid $eid") != 0) {
	    print "tbend failed!\n";
	}
994
	# Must override since we are so badly hosed. 
995
	$termswapstate = EXPTSTATE_TERMINATED;
Chad Barb's avatar
 
Chad Barb committed
996 997
    }

998 999 1000
    # Copy over the log files so the user can see them.
    system("/bin/cp -Rfp $workdir/ $userdir/tbdata");

1001 1002 1003
    # Set proper state, which is typically the way we came in.
    SetExpState($pid, $eid, $termswapstate);

1004
    #
1005
    # In batch mode, exit without sending the email or unlocking. The
1006
    # batch daemon will take care of that.
1007 1008
    #
    if ($batch) {
1009
	exit($errorstat);
1010 1011
    }

1012
    #
Chad Barb's avatar
Chad Barb committed
1013
    # Clear the log file so the web page stops spewing.
1014 1015 1016 1017 1018
    #
    if (defined($logname)) {
	TBExptCloseLogFile($pid, $eid);
    }

1019 1020
    # Unlock and reset state to its terminal value.
    TBUnLockExp($pid, $eid);
1021 1022 1023 1024 1025

    #
    # Clear the cancel flag now that the operation is complete. Must be done
    # after we change the experiment state (above).
    #
1026
    TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
1027

1028 1029 1030 1031
    #
    # Send a message to the testbed list. Append the logfile.
    #
    SENDMAIL("$user_name <$user_email>",
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
	 "Swap ${inout} Failure: $pid/$eid",
	 $mesg . "\n\n" .
	 "Please look at the log below to see what happened. If the error\n".
	 "resulted from a lack of free nodes, you can use this web page to\n".
	 "get a summary of free nodes:\n\n".
	 "  ${TBBASE}/nodecontrol_list.php3\n".
	 "\n".
	 "Please do not try again until you see enough nodes free. Or, you\n".
	 "can use the batch system to swap your experiment in when enough\n".
	 "nodes are free:\n\n".
	 "  ${TBDOCBASE}/tutorial/docwrapper.php3?docname=tutorial.html".
	     "#BatchMode\n",
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Cc:  $TBOPS",
	 (($logname), (defined($modnsfile) ? ($modnsfile) : ())));
1048

Leigh B. Stoller's avatar
Leigh B. Stoller committed
1049
    if ($modifyHosed) {
Chad Barb's avatar
 
Chad Barb committed
1050 1051 1052 1053 1054 1055 1056 1057 1058
	#
	# Copy off the workdir to the user directory, Then back up both of
	# them for post-mortem debugging.
	#
	system("/bin/cp -Rfp $workdir/ $userdir/tbdata");
	system("/bin/rm -rf  ${workdir}-failed");
	system("/bin/mv -f   $workdir ${workdir}-failed");
	system("/bin/rm -rf  ${userdir}-failed");
	system("/bin/mv -f   $userdir ${userdir}-failed");
Chad Barb's avatar
Chad Barb committed
1059
	TBExptDestroy($pid, $eid);
Chad Barb's avatar
 
Chad Barb committed
1060 1061
    }

1062
    exit($errorstat);
1063
}