swapexp.in 29.1 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10
use English;
use Getopt::Std;
11
use POSIX qw(isatty setsid);
12 13

#
Chad Barb's avatar
Chad Barb committed
14
# This gets invoked from the Web interface.
Chad Barb's avatar
 
Chad Barb committed
15
# Swap an experiment in, swap it out, restart or modify.
16
#
Chad Barb's avatar
Chad Barb committed
17

18 19
sub usage()
{
20
    print(STDERR
21
	  "Usage: swapexp [-q] [-b | -w] [-i | -a | -f] [-r] [-e]\n".
22 23 24 25
	  "               <-s in | out | restart | modify | pause>\n".
	  "               <pid> <eid> [<nsfile>]\n".
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment swap/modify\n".
26
	  "-q       - be less chatty\n".
27 28 29 30 31 32
	  "-r       - reboot nodes when doing a modify experiment\n".
	  "-e       - restart event scheduler when doing a modify experiment\n".
	  "-s <op>  - Operation to perform; one of those listed above\n".
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n".
	  "<nsfile> - Optional NS file to parse for experiment modify\n");
33 34
    exit(-1);
}
35
my  $optlist = "biafres:wq";
36

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

61 62 63 64 65 66
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
67
my $TBINFO = "$TB/expinfo";
68
my $TBDOCBASE = "@TBDOCBASE@";
69
my $TBBASE = "@TBBASE@";
70 71 72 73 74 75 76 77 78

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

my $tbdir    = "$TB/bin/";
79
my $tbdata   = "tbdata";
80
my $batch    = 0;
81
my $idleswap = 0;
82 83
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
84
my $reboot   = 0;
85
my $waitmode = 0;
86
my $quiet    = 0;
87
my $eventsys_restart   = 0;
88
my $errorstat= -1;
89 90
my $modifyHosed   = 0;
my $modifySwapped = 0;
Chad Barb's avatar
 
Chad Barb committed
91

92 93 94 95 96
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
97
my @allnodes;
98
my @row;
99
my $action;
100
my $nextswapstate;
101
my $termswapstate;
Chad Barb's avatar
 
Chad Barb committed
102

103 104 105
#
# Untaint the path
# 
106
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
107 108 109 110 111 112 113
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

114 115 116 117 118 119 120
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

121 122 123 124 125 126 127 128
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
129 130 131
if (defined($options{"i"})) {
    $idleswap = 1;
}
132 133 134
if (defined($options{"w"})) {
    $waitmode = 1;
}
135 136 137 138 139 140
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
141 142 143
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
 
Chad Barb committed
144 145 146
if (defined($options{"r"})) {
    $reboot = 1;
}
147 148 149
if (defined($options{"e"})) {
    $eventsys_restart = 1;
}
150 151 152
if (defined($options{"q"})) {
    $quiet = 1;
}
153 154 155
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
156 157 158
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
159
	$inout ne "pause"   &&
Chad Barb's avatar
 
Chad Barb committed
160
	$inout ne "modify") {
161 162 163 164 165 166 167
	usage();
    }
}
else {
    usage();
}

168 169 170 171 172
usage()
    if (($waitmode && $batch) ||
	($inout ne "modify" && @ARGV != 2) ||
	(($waitmode || $batch) && ($idleswap || $autoswap || $force)));

173 174 175 176 177
if ($eventsys_restart && $inout ne "modify") {
    print STDOUT "Usage: swapexp: -e (eventsys_restart) can be used ".
                 "only with -s modify\n";
    usage();
}
Chad Barb's avatar
 
Chad Barb committed
178 179 180
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

181 182 183
#
# Untaint the arguments.
#
184
if ($pid =~ /^([-\w\.]+)$/) {
185 186 187 188 189
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
190
if ($eid =~ /^([-\w\.]+)$/) {
191 192 193 194 195
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
196
my $repfile = "$eid.report";
197 198
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
199 200 201
my $tempnsfile;
my $modnsfile;

Leigh B. Stoller's avatar
Leigh B. Stoller committed
202
if ($inout eq "modify" && @ARGV > 2) {
203 204 205 206 207
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
208
    if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
209
	$tempnsfile = $1;
210 211
    }
    else {
212 213 214 215 216 217 218 219 220 221
	die("Tainted nsfile name: $tempnsfile\n");
    }
    #
    # Called from ops interactively. Make sure NS file in /proj or /users.
    #
    # Use realpath to resolve any symlinks.
    #
    my $translated = `realpath $tempnsfile`;
    if ($translated =~ /^([-\w\.\/]+)$/) {
	$tempnsfile = $1;
222
    }
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
    else {
	die("Tainted nsfile returned by realpath: $translated\n");
    }

    #
    # The file must reside in /proj, /groups, or /users. Since this script
    # runs as the caller, regular file permission checks ensure its a file
    # the user is allowed to use. /tmp/$guid-$nsref.nsfile also allowed
    # since this script is invoked directly from web interface, which generates
    # a name that should not be guessable, so as long as it looks to be in
    # proper format, we accept it. 
    #
    if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
	! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
	! ($tempnsfile =~ /^\/proj/) &&
	! ($tempnsfile =~ /^\/groups/) &&
	! ($tempnsfile =~ /^\/users/)) {
	die("$tempnsfile does not resolve to an appropriate directory!\n");
    }

    if (! -f $tempnsfile || -z $tempnsfile || ! -r $tempnsfile) {
244 245 246
	die("*** $0:\n".
	    "    $tempnsfile does not look like an NS file!\n");
    }
247 248
    $modnsfile = "$eid-modify.ns";
}
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}

#
Chad Barb's avatar
 
Chad Barb committed
267
# Verify that this person can muck with the experiment.
268 269 270 271 272
# Note that any script down the line has to do an admin check also. 
#
if ($UID && !TBAdmin($UID) &&
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
 
Chad Barb committed
273
	"    You do not have permission to swap or modify this experiment!\n");
274 275
}

276 277 278 279
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

280 281 282 283 284 285 286 287 288
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

289 290 291 292 293
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
294
# the tb scripts.
295 296 297 298 299 300 301 302 303 304 305 306 307
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
308
my $batchstate      = $hashrow{'batchstate'};
309
my $expt_path       = $hashrow{'path'};
310
my $expt_locked     = $hashrow{'expt_locked'};
311
my $isbatchexpt     = $hashrow{'batchmode'};
312
my $canceled        = $hashrow{'canceled'};
313
my $linktest_level  = $hashrow{'linktest_level'};
314 315 316 317 318 319 320 321 322 323
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
324

325 326
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
327
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
328 329 330
    $idleswaptime = $idleswap_time / 60.0;
}

331 332
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
333
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
334 335
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
336

337
if (! chdir($workdir)) {
338
    die("*** $0:\n".
339
	"    Could not chdir to $workdir: $!\n");
340 341
}

342
#
343 344 345
# This script is called from the batch daemon.
# 
if ($batch) {
346
    #
347 348 349
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be in proper state for the
    # operation requested. 
350
    #
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());

    if ($inout eq "in") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be QUEUED.\n")
	    if ($estate ne EXPTSTATE_QUEUED);
	
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid has been canceled! Aborting.\n")
	    if ($canceled);
    }
    elsif ($inout eq "out") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be ACTIVE.\n")
	    if ($estate ne EXPTSTATE_ACTIVE);
375 376
    }
    else {
377 378 379 380 381 382
	die("*** $0:\n".
	    "    Improper request from batch daemon for $pid/$eid!\n");
    }
}
else {
    if ($isbatchexpt) {
383 384 385 386
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
387 388
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
389
	    if ($canceled);
390

391
	if ($inout eq "in") {
392
	    ExitWithStatus(1,
393 394 395 396
			   "Batch experiment $pid/$eid must be SWAPPED to\n".
			   "QUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_SWAPPED);
	    SetExpState($pid, $eid, EXPTSTATE_QUEUED);
397 398
	}
	elsif ($inout eq "out") {
399
	    ExitWithStatus(1,
400 401 402 403
			   "Batch experiment $pid/$eid must be ACTIVE or\n".
			   "ACTIVATING to swap out. Currently $estate.")
		if ($estate ne EXPTSTATE_ACTIVE &&
		    $estate ne EXPTSTATE_ACTIVATING);
404 405 406 407 408

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
409
	    TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
410 411
	}
	elsif ($inout eq "pause") {
412
	    ExitWithStatus(1,
413 414 415
			   "Batch experiment $pid/$eid must be QUEUED to\n".
			   "DEQUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_QUEUED);
416 417

	    #
418 419 420 421
	    # XXX. The batch daemon might already have the experiment, but
	    # not have shipped it off to startexp. Change the state
	    # anyway. The error will be noticed later when startexp dies,
	    # and the batch daemon gets the error back. This sucks.
422
	    #
423
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
424
	}
425
	elsif ($inout eq "modify") {
426
	    ExitWithStatus(1,
427 428 429 430 431 432
			   "Batch experiment $pid/$eid must be SWAPPED or\n".
			   "ACTIVE to modify. Currently $estate.")
		if (($estate ne EXPTSTATE_SWAPPED &&
		     $estate ne EXPTSTATE_ACTIVATING) ||
		    $batchstate != BATCHSTATE_UNLOCKED());

433
	    #
434
	    # Otherwise, proceed with the modify. The experiment will be
435 436
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
437 438 439 440
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
441
	    #
442 443
	    goto doit;
	}
444 445
	else {
	    die("*** $0:\n",
446
		"    Operation $inout not allowed on a batch experiment!\n");
447
	}
448 449
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
450
      doit:
451
    }
452 453 454 455 456 457 458 459 460 461
    else {
	#
	# If the cancel flag is set, then user must wait for that to
	# clear before we can do anything else.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!.\n".
		       "You must wait for that to clear before you can swap\n".
		       "or modify the experiment.\n")
	    if ($canceled);
462

463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
	#
	# Check the state for the various operations.
	#
	if (!$force) {
	  SWITCH: for ($inout) {
	      /^in$/i && do {
		  if ($estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped out!");
		  }
		  last SWITCH;
	      };
	      /^out$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_ACTIVATING()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in ".
				     "or activating!\n");
		  }
		  
		  if ($estate eq EXPTSTATE_ACTIVATING()) {
		      #
		      # All we can do is set the cancel flag and hope that
		      # it gets noticed. We do not wait. 
		      # 
		      TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
		      
		      ExitWithStatus(0,
				     "Experiment $pid/$eid swapin has been  ".
				     "marked for cancelation.\n".
				     "You will receive email when the original ".
				     "swap request has finished.");
		  }
		  last SWITCH;
	      };
	      /^restart$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in!");
		  }
		  last SWITCH;
	      };
	      /^modify$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid must be ACTIVE or\n".
				     "SWAPPED to modify!\n");
		  }
		  last SWITCH;
	      };
	      die("*** $0:\n".
		  "    Missing state check for action: $action\n");
	  }
517 518
	}
    }
519 520
}

521 522 523 524 525 526 527
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
528
	$nextswapstate = EXPTSTATE_ACTIVATING();
529 530 531
	last SWITCH;
    };
    /^out$/i && do {
532
	$nextswapstate = EXPTSTATE_SWAPPING();
533 534 535
	last SWITCH;
    };
    /^restart$/i && do {
536
	$nextswapstate = EXPTSTATE_RESTARTING();
537 538 539
	last SWITCH;
    };
    /^modify$/i && do {
540 541
	$nextswapstate = (($estate eq EXPTSTATE_SWAPPED()) ?
			  EXPTSTATE_MODIFY_PARSE() : EXPTSTATE_MODIFY_REPARSE());
542 543
	last SWITCH;
    };
544
    die("*** $0:\n".
545
	"    Missing state check for action: $action\n");
546
}
547 548
 
# Update idleswap_timeout to whatever the current value is.
549
if ($inout ne "out") {
550 551 552
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
553

554 555 556 557 558 559 560
#
# On a failure, we go back to this swapstate. Might be modified below.
# 
$termswapstate = $estate;

# Lock the record, set the nextstate, and unlock the table.
TBLockExp($pid, $eid, $nextswapstate);
561 562 563 564
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
565 566 567 568
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
569 570
#

571 572 573 574 575 576 577 578 579
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
 
Chad Barb committed
580 581 582
if ($inout eq "modify") {
    $action = "modified";
}
583

584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

599 600 601
#
# Before going to background, we have to copy out the NS file!
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
602
if ($inout eq "modify" && defined($modnsfile)) {
603 604 605 606 607 608 609 610
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
	die("*** $0:\n".
	    "    Could not copy $tempnsfile to $modnsfile");
    }
    chmod(0664, "$modnsfile");
}

611 612 613 614
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
615
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
616
    TBExptSetLogFile($pid, $eid, $logname);
617
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
618

619 620 621 622 623
    if (my $childpid = TBBackGround($logname)) {
	#
	# Parent exits normally, except if in waitmode. 
	#
	if (!$waitmode) {
624 625 626
	    print("Experiment $pid/$eid is now being $action.\n".
		  "You will be notified via email when the this is done.\n")
		if (! $quiet);
627 628
	    exit(0);
	}
629 630 631 632 633 634 635 636
	print("Waiting for experiment $eid to finish its swap${action}\n")
	    if (! $quiet);
	    
	if (isatty(STDIN) && !$quiet) {
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
	}
637 638 639 640 641 642 643 644 645 646 647 648
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

649
	#
650
	# Wait until child exits or until user gets bored and types ^C.
651
	#
652 653
	waitpid($childpid, 0);
	
654 655
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
656
	exit($? >> 8);
657
    }
658
    TBdbfork();
659 660
}

661 662 663 664 665 666 667 668
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

685 686 687
#
# Remove old report file since its contents are going to be invalid.
#
688
if ($inout ne "restart" && -e $repfile) {
689 690 691
    unlink("$repfile");
}

692 693 694 695
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
696 697 698 699
    my $optarg = (($force || $idleswap) ? "-force" : "");
    
    print STDOUT "Running 'tbswap out $optarg $pid $eid'\n";
    if (system("$tbdir/tbswap out $optarg $pid $eid") != 0) {
700
	$errorstat = $? >> 8;
701
	fatal("tbswap out failed!");
702
    }
703
    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
704
}
705
elsif ($inout eq "in") {
706
    print STDOUT "Running 'tbswap in $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
707
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
708
	$errorstat = $? >> 8;
709
	fatal("tbswap in failed!");
710
    }
711
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
712

713
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
714
}
Chad Barb's avatar
 
Chad Barb committed
715
elsif ($inout eq "modify") {
Chad Barb's avatar
Chad Barb committed
716
    my $modifyError = "";
717
    my $oldstate    = $estate;
Chad Barb's avatar
Chad Barb committed
718

719 720 721
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
722
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
723
    if (TBExptBackupVirtualState($pid, $eid)) {
724
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
725 726 727
    }

    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
728 729
    # Rerun tbprerun if modifying, but only if new NS file provided.
    # Yep, we allow reswap without changing the NS file. For Shashi and SIM. 
Chad Barb's avatar
Chad Barb committed
730
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
731 732 733 734 735
    if (defined($modnsfile)) {
	print STDOUT "Running 'tbprerun $pid $eid $modnsfile'\n";
	if (system("$tbdir/tbprerun $pid $eid $modnsfile") != 0) {
	    $modifyError = "tbprerun failed!";
	}
Chad Barb's avatar
Chad Barb committed
736 737
    }

Chad Barb's avatar
 
Chad Barb committed
738
    #
739
    # Our next state depends on whether the experiment was active or swapped.
Chad Barb's avatar
 
Chad Barb committed
740
    #
741 742 743
    if (! $modifyError) {
	if ($estate eq EXPTSTATE_SWAPPED) {
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
Chad Barb's avatar
 
Chad Barb committed
744
	}
745 746 747 748
	else {
	    SetExpState($pid, $eid, EXPTSTATE_MODIFY_RESWAP);
	    
	    my $optarg = ($reboot ? "-reboot" : "");
749
	    $optarg .= ($eventsys_restart ? " -eventsys_restart" : "");
750 751 752 753 754 755

	    print STDOUT "Running 'tbswap update $optarg $pid $eid'\n";
	    if (system("$tbdir/tbswap update $optarg $pid $eid") != 0) {
		$errorstat = $? >> 8;
		$modifyError = "tbswap update failed!";
	    }
Chad Barb's avatar
 
Chad Barb committed
756

757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772
	    #
	    # See what tbswap did. It might have swapped it out if there
	    # was an error. 
	    # 
	    if (! $modifyError) {
		SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
		$estate = EXPTSTATE_ACTIVE;
	    }
	    elsif ($errorstat & 0x40) {
		#
		# Icky. Magic return code that says tbswap swapped it out.
		# We do not want tbswap to muck with states anymore, so
		# need to know what it did. At some point we should clean
		# up the exit reporting! Anyway, fatal() needs to know the
		# the right state to go back to (no longer ACTIVE).
		#
773 774 775
		# XXX This errorstat (0x40) is important to testbed_stats!
		# We should probably put in a swapout record instead.
		#
776 777
		$estate = EXPTSTATE_SWAPPED;
		$termswapstate = EXPTSTATE_SWAPPED;
778
		$modifySwapped = 1;
779 780
                # Old accounting info.
		TBSetExpSwapTime($pid, $eid);
Chad Barb's avatar
 
Chad Barb committed
781
	    }
782
	}
Chad Barb's avatar
Chad Barb committed
783 784 785
    }

    if ($modifyError) {
786
	print STDOUT "Modify Error: $modifyError\n";
Chad Barb's avatar
Chad Barb committed
787
	print STDOUT "Recovering experiment state...\n";
788
	
789 790
	# Must deal with the prerender explicitly since it runs background.
	system("prerender -r $pid $eid");
791
	TBExptRemoveVirtualState($pid, $eid);
792
	
793
	if (TBExptRestoreVirtualState($pid, $eid) == 0) {
794 795
	    # Must deal with the prerender explicitly since it runs background.
	    system("prerender -t $pid $eid");
796 797 798 799 800
	    fatal("Update aborted; old state restored.");
	}
	else {
	    $modifyHosed = 1;
	    fatal("Experiment state could not be restored!");
Chad Barb's avatar
Chad Barb committed
801
	}
Chad Barb's avatar
 
Chad Barb committed
802
    }
803
    
804
    TBExptClearBackupState($pid, $eid);
805
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
806
}
Chad Barb's avatar
 
Chad Barb committed
807
else { # $inout eq "restart" assumed.
808
    print STDOUT "Running 'tbrestart $pid $eid'\n";
809
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
810
	fatal("tbrestart failed!");
811
    }
812
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
813
}
814

815 816 817 818 819 820 821 822 823 824
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
825
system("cp -Rfp $workdir/ $userdir/tbdata/");
826

827 828 829 830 831 832 833 834 835 836 837 838 839 840
#
# Deal with linktest. If requested, swapping in or modifying, and experiment
# is indeed active, then run it!
#
# XXX - linktest uses files from $userdir/tbdata/, so the above cp must
#       happen first!
#
if ($linktest_level && ExpState($pid, $eid) eq EXPTSTATE_ACTIVE) {
    #
    # Run it. No worries about failures.
    #
    my $output = "linktest.$$";
    my $optarg = "-l $linktest_level -o $output";
    
841 842
    print STDOUT "Running 'linktest_control $optarg $pid $eid'\n";
    if (system("$TB/sbin/linktest_control $optarg $pid $eid") != 0) {
843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
	system("cat $output")
	    if (-r $output);
	
	print STDERR "*** WARNING: ".
	             "Linktest run returned non-zero status!\n";
	
	SENDMAIL("$user_name <$user_email>",
		 "Linktest Failure: $pid/$eid",
		 "Failure in linktest (level $linktest_level); ".
		 "returned non-zero status",
		 "$user_name <$user_email>",
		 "Cc: $expt_head_name <$expt_head_email>\n".
		 "Cc: $TBOPS",
		 ($output));
    }
}

860 861 862 863
#
# Gather stats. 
#
if ($inout eq "in") {
864
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
865 866
}
elsif ($inout eq "out") {
867
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
868
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
869 870
}
elsif ($inout eq "modify") {
871
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
872
}
873 874
# Old accounting info.
TBSetExpSwapTime($pid, $eid);
875

876 877 878 879 880 881
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
882
# In batch mode, just exit without sending email or unlocking. The
883
# batch daemon will take care of that.
884 885 886 887 888
#
if ($batch) {
    exit(0);
}

889 890 891 892 893 894 895
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

896 897 898
#
# Must unlock before exit.
#
899
TBUnLockExp($pid, $eid);
900 901 902 903 904

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
905
TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
906 907 908

print "Swap Success!\n";

909 910 911 912
#
# Send email notification to user.
#
my $message =
913 914
    "Experiment $eid in project $pid has been ";

915
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
916
    $message .= "forcibly swapped out by\nEmulab";
917 918 919 920 921
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
922 923
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
924 925 926 927 928
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
929 930 931 932 933
}
else {
    $message .= "$action.\n";
}

934 935 936 937 938
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

939 940
$message .=
    "\n".
941 942
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
943 944

SENDMAIL("$user_name <$user_email>",
945
	 "Experiment $pid/$eid \u$action",
946
	 $message,
947
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
948 949
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
950 951
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
952 953 954 955 956 957

exit 0;

sub fatal($)
{
    my($mesg) = $_[0];
Chad Barb's avatar
Chad Barb committed
958

959 960
    print STDOUT "*** $0:\n".
	         "    $mesg\n";
961

962 963 964 965 966 967 968 969 970 971
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
972 973 974 975 976 977
	#
	# If the modify fails, and the experiment is swapped out, then
	# insert a record for that since that is different then a modify
	# that fails, but results in the experiment being restored the
	# way it was. 
	#
978
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
979 980 981 982
	
	if ($modifySwapped) {
	    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0);
	}
983 984
    }

985 986 987 988 989 990 991
    #
    # Clear backup state since not needed anymore; experiment is toast. 
    # 
    if ($inout eq "modify") {
	TBExptClearBackupState($pid, $eid);
    }

Chad Barb's avatar
 
Chad Barb committed
992
    #
993
    # If hosed, we entirely terminate the experiment.
Chad Barb's avatar
 
Chad Barb committed
994
    #
995
    if ($modifyHosed) {
996 997
	my $stat = 0;
	    
Chad Barb's avatar
 
Chad Barb committed
998
	#
999
	# Note: $estate is indeed still set appropriately!
Chad Barb's avatar
 
Chad Barb committed
1000 1001
	#
	if ($estate eq EXPTSTATE_ACTIVE) {
1002
	    print "Running 'tbswap out -force $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
1003 1004
	    if (system("$tbdir/tbswap out -force $pid $eid") != 0) {
		print "tbswap out failed!\n";
1005
		$stat = $? >> 8;
Chad Barb's avatar
 
Chad Barb committed
1006
	    }
1007
	    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $stat);
Chad Barb's avatar
 
Chad Barb committed
1008
	}
Chad Barb's avatar
Chad Barb committed
1009

1010
	$stat = 0;
1011
	print "Running 'tbend -force $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
1012 1013
	if (system("$tbdir/tbend -force $pid $eid") != 0) {
	    print "tbend failed!\n";
1014
	    $stat = $? >> 8;
Chad Barb's avatar
 
Chad Barb committed
1015
	}
1016 1017 1018 1019 1020 1021
	
	#
	# Okay, we are going to destroy the experiment below.
	# 
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_TERMINATE, 0);
	
1022
	# Must override since we are so badly hosed. 
1023
	$termswapstate = EXPTSTATE_TERMINATED;
Chad Barb's avatar
 
Chad Barb committed
1024 1025
    }

1026 1027 1028
    # Copy over the log files so the user can see them.
    system("/bin/cp -Rfp $workdir/ $userdir/tbdata");

1029 1030 1031
    # Set proper state, which is typically the way we came in.
    SetExpState($pid, $eid, $termswapstate);

1032
    #
1033
    # In batch mode, exit without sending the email or unlocking. The
1034
    # batch daemon will take care of that.
1035 1036
    #
    if ($batch) {
1037
	exit($errorstat);
1038 1039
    }

1040
    #
Chad Barb's avatar
Chad Barb committed
1041
    # Clear the log file so the web page stops spewing.
1042 1043 1044 1045 1046
    #
    if (defined($logname)) {
	TBExptCloseLogFile($pid, $eid);
    }

1047 1048
    # Unlock and reset state to its terminal value.
    TBUnLockExp($pid, $eid);
1049 1050 1051 1052 1053

    #
    # Clear the cancel flag now that the operation is complete. Must be done
    # after we change the experiment state (above).
    #
1054
    TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
1055

1056 1057 1058 1059
    #
    # Send a message to the testbed list. Append the logfile.
    #
    SENDMAIL("$user_name <$user_email>",
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
	 "Swap ${inout} Failure: $pid/$eid",
	 $mesg . "\n\n" .
	 "Please look at the log below to see what happened. If the error\n".
	 "resulted from a lack of free nodes, you can use this web page to\n".
	 "get a summary of free nodes:\n\n".
	 "  ${TBBASE}/nodecontrol_list.php3\n".
	 "\n".
	 "Please do not try again until you see enough nodes free. Or, you\n".
	 "can use the batch system to swap your experiment in when enough\n".
	 "nodes are free:\n\n".
	 "  ${TBDOCBASE}/tutorial/docwrapper.php3?docname=tutorial.html".
	     "#BatchMode\n",
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Cc:  $TBOPS",
	 (($logname), (defined($modnsfile) ? ($modnsfile) : ())));