swapexp.in 33 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10
use English;
use Getopt::Std;
11
use POSIX qw(isatty setsid);
12 13

#
Chad Barb's avatar
Chad Barb committed
14
# This gets invoked from the Web interface.
Chad Barb's avatar
 
Chad Barb committed
15
# Swap an experiment in, swap it out, restart or modify.
16
#
Chad Barb's avatar
Chad Barb committed
17

18 19
sub usage()
{
20
    print(STDERR
21
	  "Usage: swapexp [-q] [-b | -w] [-i | -a | -f] [-r] [-e]\n".
22 23 24 25
	  "               <-s in | out | restart | modify | pause>\n".
	  "               <pid> <eid> [<nsfile>]\n".
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment swap/modify\n".
26
	  "-q       - be less chatty\n".
27 28 29 30 31 32
	  "-r       - reboot nodes when doing a modify experiment\n".
	  "-e       - restart event scheduler when doing a modify experiment\n".
	  "-s <op>  - Operation to perform; one of those listed above\n".
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n".
	  "<nsfile> - Optional NS file to parse for experiment modify\n");
33 34
    exit(-1);
}
35
my  $optlist = "biafres:wq";
36

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

61 62 63 64 65 66
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
67
my $TBINFO = "$TB/expinfo";
68
my $TBDOCBASE = "@TBDOCBASE@";
69
my $TBBASE = "@TBBASE@";
70 71 72 73 74 75 76 77

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

78 79 80 81 82 83 84 85 86
# Be careful not to exit on transient error; 0 means infinite retry.
$libdb::DBQUERY_MAXTRIES = 0;

# For the END block below.
my $cleaning = 0;
my $justexit = 1;
my $signaled = 0;

my $tbdir    = "$TB/bin";
87
my $tbdata   = "tbdata";
88
my $batch    = 0;
89
my $idleswap = 0;
90 91
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
92
my $reboot   = 0;
93
my $waitmode = 0;
94
my $quiet    = 0;
95
my $eventsys_restart   = 0;
96
my $errorstat= -1;
97 98
my $modifyHosed   = 0;
my $modifySwapped = 0;
Chad Barb's avatar
 
Chad Barb committed
99

100 101 102 103 104
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
105
my @allnodes;
106
my @row;
107
my $action;
108
my $nextswapstate;
109
my $termswapstate;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
110
my $isadmin  = 0;
Chad Barb's avatar
 
Chad Barb committed
111

112 113 114
#
# Untaint the path
# 
115
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
116 117 118 119 120 121 122
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

123 124 125 126 127 128 129
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

130 131 132 133 134 135 136 137
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
138 139 140
if (defined($options{"i"})) {
    $idleswap = 1;
}
141 142 143
if (defined($options{"w"})) {
    $waitmode = 1;
}
144 145 146 147 148 149
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
150 151 152
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
 
Chad Barb committed
153 154 155
if (defined($options{"r"})) {
    $reboot = 1;
}
156 157 158
if (defined($options{"e"})) {
    $eventsys_restart = 1;
}
159 160 161
if (defined($options{"q"})) {
    $quiet = 1;
}
162 163 164
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
165 166 167
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
168
	$inout ne "pause"   &&
Chad Barb's avatar
 
Chad Barb committed
169
	$inout ne "modify") {
170 171 172 173 174 175 176
	usage();
    }
}
else {
    usage();
}

177 178 179 180 181
usage()
    if (($waitmode && $batch) ||
	($inout ne "modify" && @ARGV != 2) ||
	(($waitmode || $batch) && ($idleswap || $autoswap || $force)));

182 183 184 185 186
if ($eventsys_restart && $inout ne "modify") {
    print STDOUT "Usage: swapexp: -e (eventsys_restart) can be used ".
                 "only with -s modify\n";
    usage();
}
Chad Barb's avatar
 
Chad Barb committed
187 188 189
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

190 191 192
#
# Untaint the arguments.
#
193
if ($pid =~ /^([-\w\.]+)$/) {
194 195 196 197 198
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
199
if ($eid =~ /^([-\w\.]+)$/) {
200 201 202 203 204
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
205
my $repfile = "$eid.report";
206 207
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
208 209 210
my $tempnsfile;
my $modnsfile;

Leigh B. Stoller's avatar
Leigh B. Stoller committed
211
if ($inout eq "modify" && @ARGV > 2) {
212 213 214 215 216
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
217
    if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
218
	$tempnsfile = $1;
219 220
    }
    else {
221 222 223 224 225 226 227 228 229 230
	die("Tainted nsfile name: $tempnsfile\n");
    }
    #
    # Called from ops interactively. Make sure NS file in /proj or /users.
    #
    # Use realpath to resolve any symlinks.
    #
    my $translated = `realpath $tempnsfile`;
    if ($translated =~ /^([-\w\.\/]+)$/) {
	$tempnsfile = $1;
231
    }
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
    else {
	die("Tainted nsfile returned by realpath: $translated\n");
    }

    #
    # The file must reside in /proj, /groups, or /users. Since this script
    # runs as the caller, regular file permission checks ensure its a file
    # the user is allowed to use. /tmp/$guid-$nsref.nsfile also allowed
    # since this script is invoked directly from web interface, which generates
    # a name that should not be guessable, so as long as it looks to be in
    # proper format, we accept it. 
    #
    if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
	! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
	! ($tempnsfile =~ /^\/proj/) &&
	! ($tempnsfile =~ /^\/groups/) &&
	! ($tempnsfile =~ /^\/users/)) {
	die("$tempnsfile does not resolve to an appropriate directory!\n");
    }

    if (! -f $tempnsfile || -z $tempnsfile || ! -r $tempnsfile) {
253 254 255
	die("*** $0:\n".
	    "    $tempnsfile does not look like an NS file!\n");
    }
256 257
    $modnsfile = "$eid-modify.ns";
}
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
274
$isadmin = TBAdmin($UID);
275 276

#
Chad Barb's avatar
 
Chad Barb committed
277
# Verify that this person can muck with the experiment.
278 279
# Note that any script down the line has to do an admin check also. 
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
280
if ($UID && !$isadmin &&
281 282
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
 
Chad Barb committed
283
	"    You do not have permission to swap or modify this experiment!\n");
284 285
}

286 287 288 289
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

290 291 292 293 294 295 296 297 298
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

299 300 301 302 303
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
304
# the tb scripts.
305 306 307 308 309 310 311 312 313 314 315 316 317
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
318
my $batchstate      = $hashrow{'batchstate'};
319
my $expt_path       = $hashrow{'path'};
320
my $expt_locked     = $hashrow{'expt_locked'};
321
my $isbatchexpt     = $hashrow{'batchmode'};
322
my $canceled        = $hashrow{'canceled'};
323
my $linktest_level  = $hashrow{'linktest_level'};
324 325 326 327 328 329 330 331 332 333
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
334
my $rendering   = $hashrow{'prerender_pid'};
335
my $elabinelab  = $hashrow{'elab_in_elab'};
336

337 338
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
339
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
340 341 342
    $idleswaptime = $idleswap_time / 60.0;
}

343 344
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
345
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
346 347
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
348

349
if (! chdir($workdir)) {
350
    die("*** $0:\n".
351
	"    Could not chdir to $workdir: $!\n");
352 353
}

354
#
355 356 357
# This script is called from the batch daemon.
# 
if ($batch) {
358
    #
359 360 361
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be in proper state for the
    # operation requested. 
362
    #
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());

    if ($inout eq "in") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be QUEUED.\n")
	    if ($estate ne EXPTSTATE_QUEUED);
	
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid has been canceled! Aborting.\n")
	    if ($canceled);
    }
    elsif ($inout eq "out") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be ACTIVE.\n")
	    if ($estate ne EXPTSTATE_ACTIVE);
387 388
    }
    else {
389 390 391 392 393 394
	die("*** $0:\n".
	    "    Improper request from batch daemon for $pid/$eid!\n");
    }
}
else {
    if ($isbatchexpt) {
395 396 397 398
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
399 400
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
401
	    if ($canceled);
402

403
	if ($inout eq "in") {
404
	    ExitWithStatus(1,
405 406 407 408
			   "Batch experiment $pid/$eid must be SWAPPED to\n".
			   "QUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_SWAPPED);
	    SetExpState($pid, $eid, EXPTSTATE_QUEUED);
409 410
	}
	elsif ($inout eq "out") {
411
	    ExitWithStatus(1,
412 413 414 415
			   "Batch experiment $pid/$eid must be ACTIVE or\n".
			   "ACTIVATING to swap out. Currently $estate.")
		if ($estate ne EXPTSTATE_ACTIVE &&
		    $estate ne EXPTSTATE_ACTIVATING);
416 417 418 419 420

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
421
	    TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
422 423
	}
	elsif ($inout eq "pause") {
424
	    ExitWithStatus(1,
425 426 427
			   "Batch experiment $pid/$eid must be QUEUED to\n".
			   "DEQUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_QUEUED);
428 429

	    #
430 431 432 433
	    # XXX. The batch daemon might already have the experiment, but
	    # not have shipped it off to startexp. Change the state
	    # anyway. The error will be noticed later when startexp dies,
	    # and the batch daemon gets the error back. This sucks.
434
	    #
435
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
436
	}
437
	elsif ($inout eq "modify") {
438
	    ExitWithStatus(1,
439 440 441 442 443 444
			   "Batch experiment $pid/$eid must be SWAPPED or\n".
			   "ACTIVE to modify. Currently $estate.")
		if (($estate ne EXPTSTATE_SWAPPED &&
		     $estate ne EXPTSTATE_ACTIVATING) ||
		    $batchstate != BATCHSTATE_UNLOCKED());

445
	    #
446
	    # Otherwise, proceed with the modify. The experiment will be
447 448
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
449 450 451 452
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
453
	    #
454 455
	    goto doit;
	}
456 457
	else {
	    die("*** $0:\n",
458
		"    Operation $inout not allowed on a batch experiment!\n");
459
	}
460 461
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
462
      doit:
463
    }
464 465 466 467 468 469 470 471 472 473
    else {
	#
	# If the cancel flag is set, then user must wait for that to
	# clear before we can do anything else.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!.\n".
		       "You must wait for that to clear before you can swap\n".
		       "or modify the experiment.\n")
	    if ($canceled);
474

Leigh B. Stoller's avatar
Leigh B. Stoller committed
475 476 477 478 479 480 481 482 483 484 485
  	#
 	# Cannot swapmod an active elabinelab experiment, yet.
 	# 
 	ExitWithStatus(1,
 		       "Experiment $pid/$eid is an active ElabInElab.\n".
 		       "You cannot modify this type of experiment while it\n".
 		       "is swapped in. We hope to support this soon.\n")
 	    if ($inout eq "modify" &&
 		($elabinelab || defined($elabinelab_eid)) &&
 		$estate ne EXPTSTATE_SWAPPED());

486 487 488 489 490 491 492 493 494 495 496 497 498 499
	#
	# Check the state for the various operations.
	#
	if (!$force) {
	  SWITCH: for ($inout) {
	      /^in$/i && do {
		  if ($estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped out!");
		  }
		  last SWITCH;
	      };
	      /^out$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
Leigh B. Stoller's avatar
Leigh B. Stoller committed
500
 		      $estate ne EXPTSTATE_PANICED() &&
501 502 503 504 505 506
		      $estate ne EXPTSTATE_ACTIVATING()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in ".
				     "or activating!\n");
		  }
		  
Leigh B. Stoller's avatar
Leigh B. Stoller committed
507 508 509 510 511 512 513 514 515 516 517 518
 		  #
 		  # Must be an admin person to swap out an experiment that
 		  # has had its panic button pressed.
 		  #
 		  if ($estate eq EXPTSTATE_PANICED() && !$isadmin) {
 		      ExitWithStatus(1,
 				     "Experiment $pid/$eid had its panic ".
 				     "button pressed!\n".
 				     "Only a testbed administrator can swap ".
 				     "this experiment out.");
 		  }

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
		  if ($estate eq EXPTSTATE_ACTIVATING()) {
		      #
		      # All we can do is set the cancel flag and hope that
		      # it gets noticed. We do not wait. 
		      # 
		      TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
		      
		      ExitWithStatus(0,
				     "Experiment $pid/$eid swapin has been  ".
				     "marked for cancelation.\n".
				     "You will receive email when the original ".
				     "swap request has finished.");
		  }
		  last SWITCH;
	      };
	      /^restart$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in!");
		  }
		  last SWITCH;
	      };
	      /^modify$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid must be ACTIVE or\n".
				     "SWAPPED to modify!\n");
		  }
		  last SWITCH;
	      };
	      die("*** $0:\n".
		  "    Missing state check for action: $action\n");
	  }
553 554
	}
    }
555 556
}

557 558 559 560 561 562 563
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
564
	$nextswapstate = EXPTSTATE_ACTIVATING();
565 566 567
	last SWITCH;
    };
    /^out$/i && do {
568
	$nextswapstate = EXPTSTATE_SWAPPING();
569 570 571
	last SWITCH;
    };
    /^restart$/i && do {
572
	$nextswapstate = EXPTSTATE_RESTARTING();
573 574 575
	last SWITCH;
    };
    /^modify$/i && do {
576 577
	$nextswapstate = (($estate eq EXPTSTATE_SWAPPED()) ?
			  EXPTSTATE_MODIFY_PARSE() : EXPTSTATE_MODIFY_REPARSE());
578 579
	last SWITCH;
    };
580
    die("*** $0:\n".
581
	"    Missing state check for action: $action\n");
582
}
583 584
 
# Update idleswap_timeout to whatever the current value is.
585
if ($inout ne "out") {
586 587 588
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
589

590 591 592 593 594 595
#
# On a failure, we go back to this swapstate. Might be modified below.
# 
$termswapstate = $estate;

# Lock the record, set the nextstate, and unlock the table.
596 597 598 599 600 601 602 603 604 605
TBLockExp($pid, $eid, $nextswapstate)
    or die("*** $0:\n".
	   "Failed to set experiment state to $nextswapstate\n");

#
# At this point, we need to force a cleanup no matter how we exit.
# See the END block below.
#
$justexit = 0;

606 607 608 609
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
610 611 612 613
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
614 615
#

616 617 618 619 620 621 622 623 624
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
 
Chad Barb committed
625 626 627
if ($inout eq "modify") {
    $action = "modified";
}
628

629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

644 645 646
#
# Before going to background, we have to copy out the NS file!
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
647
if ($inout eq "modify" && defined($modnsfile)) {
648 649
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
650
	fatal("Could not copy $tempnsfile to $modnsfile");
651 652 653 654
    }
    chmod(0664, "$modnsfile");
}

655 656 657 658
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
659
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
660
    TBExptSetLogFile($pid, $eid, $logname);
661
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
662

663 664
    if (my $childpid = TBBackGround($logname)) {
	#
665 666
	# Parent exits normally, unless in waitmode. We have to set
	# justexit to make sure the END block below does not run.
667
	#
668 669
	$justexit = 1;

670
	if (!$waitmode) {
671 672 673
	    print("Experiment $pid/$eid is now being $action.\n".
		  "You will be notified via email when the this is done.\n")
		if (! $quiet);
674 675
	    exit(0);
	}
676 677 678 679 680 681 682 683
	print("Waiting for experiment $eid to finish its swap${action}\n")
	    if (! $quiet);
	    
	if (isatty(STDIN) && !$quiet) {
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
	}
684 685 686 687 688 689 690 691 692 693 694 695
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

696
	#
697
	# Wait until child exits or until user gets bored and types ^C.
698
	#
699 700
	waitpid($childpid, 0);
	
701 702
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
703
	exit($? >> 8);
704
    }
705
    TBdbfork();
706 707
}

708 709 710 711 712 713 714 715
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
#
# We need to catch TERM cause sometimes shit happens and we have to kill
# an experiment swap that is hung or otherwise scrogged. Rather then 
# trying to kill off the children one by one, lets arrange to catch it
# here and send a killpg to the children. This is not to be done lightly,
# cause it can leave things worse then they were before!
#
sub handler ($) {
    my ($signame) = @_;
    
    $SIG{TERM} = 'IGNORE';
    my $pgrp = getpgrp(0);
    kill('TERM', -$pgrp);
    sleep(1);
    $signaled = 1;
    fatal("Caught SIG${signame}! Killing experiment setup ...");
}
$SIG{TERM} = \&handler;
$SIG{QUIT} = 'DEFAULT';

736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

752 753 754
#
# Remove old report file since its contents are going to be invalid.
#
755
if ($inout ne "restart" && -e $repfile) {
756 757 758
    unlink("$repfile");
}

759 760 761 762
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
763
    my $optarg = (($force || $idleswap) ? "-force" : "");
764

765 766
    print STDOUT "Running 'tbswap out $optarg $pid $eid'\n";
    if (system("$tbdir/tbswap out $optarg $pid $eid") != 0) {
767
	$errorstat = $? >> 8;
768
	fatal("tbswap out failed!");
769
    }
770 771
    SetExpState($pid, $eid, EXPTSTATE_SWAPPED)
	or fatal("Failed to set experiment state to " . EXPTSTATE_SWAPPED());
Leigh B. Stoller's avatar
Leigh B. Stoller committed
772
    TBExptClearPanicBit($pid, $eid);
773
}
774
elsif ($inout eq "in") {
775 776 777
    my $optarg = "";
    
    print STDOUT "Running 'tbswap in $optarg $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
778
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
779
	$errorstat = $? >> 8;
780
	fatal("tbswap in failed!");
781
    }
782 783
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE)
	or fatal("Failed to set experiment state to " . EXPTSTATE_ACTIVE());
784

785
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
786
}
Chad Barb's avatar
 
Chad Barb committed
787
elsif ($inout eq "modify") {
788
    my $modifyError;
Chad Barb's avatar
Chad Barb committed
789

790 791 792
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
793
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
794
    if (TBExptBackupVirtualState($pid, $eid)) {
795
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
796 797 798
    }

    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
799
    # Rerun tbprerun if modifying, but only if new NS file provided.
800 801
    # Yep, we allow reswap without changing the NS file. For Shashi and SIM.
    # Note that tbprerun kills the renderer if its running.
Chad Barb's avatar
Chad Barb committed
802
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
803 804 805
    if (defined($modnsfile)) {
	print STDOUT "Running 'tbprerun $pid $eid $modnsfile'\n";
	if (system("$tbdir/tbprerun $pid $eid $modnsfile") != 0) {
806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
	    print STDOUT "Modify Error: tbprerun failed.\n";
	    print STDOUT "Recovering experiment state...\n";

	    if (TBExptRemoveVirtualState($pid, $eid) ||
		TBExptRestoreVirtualState($pid, $eid)) {
		$modifyHosed = 1;
		fatal("Experiment state could not be restored!");
		# Never returns;
	    }
	    #
	    # If the renderer was running when we started the swapmod, then we
	    # want to restart it. If it was stopped, then the renderer info
	    # was captured with the rest of the virtual state (restored above).
	    #
	    system("prerender -t $pid $eid")
		if ($rendering);

	    fatal("Update aborted; old virtual state restored.");
	    # Never returns;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
825
	}
Chad Barb's avatar
Chad Barb committed
826 827
    }

Chad Barb's avatar
 
Chad Barb committed
828
    #
829
    # Our next state depends on whether the experiment was active or swapped.
Chad Barb's avatar
 
Chad Barb committed
830
    #
831 832 833 834 835
    if ($estate eq EXPTSTATE_SWAPPED) {
	SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
    }
    else {
	SetExpState($pid, $eid, EXPTSTATE_MODIFY_RESWAP);
836 837 838 839 840 841 842 843 844 845 846 847 848

	my $optarg = "";
	#
	# For elabinelab experiments; ignore reboot/eventsys_restart,
	# and force noreconfig; none of it will work or make sense. 
	#
	if ($elabinelab) {
	    $optarg = "-noreconfig";
	}
	else {
	    $optarg  = ($reboot ? "-reboot" : "");
	    $optarg .= ($eventsys_restart ? " -eventsys_restart" : "");
	}
849

850 851 852 853 854 855 856 857 858 859 860
	print STDOUT "Running 'tbswap update $optarg $pid $eid'\n";
	if (system("$tbdir/tbswap update $optarg $pid $eid") == 0) {
	    #
	    # Success. Set the state back to active cause thats where it started.
	    # 
	    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
	    $estate = EXPTSTATE_ACTIVE;
	}
	else {
	    $modifyError = $errorstat = $? >> 8;
	    print STDOUT "Modify Error: tbswap update failed.\n";
Chad Barb's avatar
 
Chad Barb committed
861

862
	    #
863 864 865
	    # tbswap either restored the experiment to the way it was,
	    # or it swapped it out completely. In either case, it has
	    # also restored the virtual state. 
866
	    # 
867 868 869 870 871 872 873
	    # Icky. Magic return code that says tbswap swapped it out.
	    # We do not want tbswap to muck with states anymore, so
	    # need to know what it did. At some point we should clean
	    # up the exit reporting! Anyway, fatal() needs to know the
	    # the right state to go back to (no longer ACTIVE).
	    #
	    if ($errorstat & 0x40) {
874 875
		$estate = EXPTSTATE_SWAPPED;
		$termswapstate = EXPTSTATE_SWAPPED;
876
		$modifySwapped = 1;
877 878
                # Old accounting info.
		TBSetExpSwapTime($pid, $eid);
879 880 881 882
		$modifyError = "Update aborted; experiment swapped out.";
	    }
	    else {
		$modifyError = "Update aborted; old state restored.";
Chad Barb's avatar
 
Chad Barb committed
883
	    }
884
	}
Chad Barb's avatar
Chad Barb committed
885 886
    }

887 888 889 890 891 892 893 894 895
    #
    # We need to rerender only if the NS file was changed (ran tbprerun),
    # If the swapmod succeeded, then unconditionally run the renderer. If
    # swap failed, then need to run the renderer only if we stopped one in
    # progress.
    #
    if (defined($modnsfile)) {
	system("prerender -t $pid $eid")	
	    if (!defined($modifyError) || $rendering)
Chad Barb's avatar
 
Chad Barb committed
896
    }
897 898 899 900 901 902 903

    #
    # Swapmod failed ...
    #
    fatal($modifyError)
	if (defined($modifyError));

904
    TBExptClearBackupState($pid, $eid);
905
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
906
}
Chad Barb's avatar
 
Chad Barb committed
907
else { # $inout eq "restart" assumed.
908
    print STDOUT "Running 'tbrestart $pid $eid'\n";
909
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
910
	fatal("tbrestart failed!");
911
    }
912
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
913
}
914

915 916 917 918 919 920 921 922 923 924
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
925
system("cp -Rfp $workdir/ $userdir/tbdata/");
926

927 928 929 930 931 932 933 934 935 936 937
#
# Deal with linktest. If requested, swapping in or modifying, and experiment
# is indeed active, then run it!
#
# XXX - linktest uses files from $userdir/tbdata/, so the above cp must
#       happen first!
#
if ($linktest_level && ExpState($pid, $eid) eq EXPTSTATE_ACTIVE) {
    #
    # Run it. No worries about failures.
    #
938
    my $output = "$userdir/logs/linktest.log";
939 940
    my $optarg = "-l $linktest_level -o $output";
    
941 942
    print STDOUT "Running 'linktest_control $optarg $pid $eid'\n";
    if (system("$TB/sbin/linktest_control $optarg $pid $eid") != 0) {
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
	system("cat $output")
	    if (-r $output);
	
	print STDERR "*** WARNING: ".
	             "Linktest run returned non-zero status!\n";
	
	SENDMAIL("$user_name <$user_email>",
		 "Linktest Failure: $pid/$eid",
		 "Failure in linktest (level $linktest_level); ".
		 "returned non-zero status",
		 "$user_name <$user_email>",
		 "Cc: $expt_head_name <$expt_head_email>\n".
		 "Cc: $TBOPS",
		 ($output));
    }
}

960 961 962 963
#
# Gather stats. 
#
if ($inout eq "in") {
964
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
965 966
}
elsif ($inout eq "out") {
967
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
968
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
969 970
}
elsif ($inout eq "modify") {
971
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
972
}
973 974
# Old accounting info.
TBSetExpSwapTime($pid, $eid);
975

976 977 978 979 980 981
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
982
# In batch mode, just exit without sending email or unlocking. The
983
# batch daemon will take care of that.
984 985 986 987 988
#
if ($batch) {
    exit(0);
}

989 990 991 992 993 994 995
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

996 997 998
#
# Must unlock before exit.
#
999
TBUnLockExp($pid, $eid);
1000 1001 1002 1003 1004

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
1005
TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
1006 1007 1008

print "Swap Success!\n";

1009 1010 1011 1012
#
# Send email notification to user.
#
my $message =
1013 1014
    "Experiment $eid in project $pid has been ";

1015
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
1016
    $message .= "forcibly swapped out by\nEmulab";
1017 1018 1019 1020 1021
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
1022 1023
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
1024 1025 1026 1027 1028
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
1029 1030 1031 1032 1033
}
else {
    $message .= "$action.\n";
}

1034 1035 1036 1037 1038
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

1039 1040
$message .=
    "\n".
1041 1042
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
1043 1044

SENDMAIL("$user_name <$user_email>",
1045
	 "Experiment $pid/$eid \u$action",
1046
	 $message,
1047
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
1048 1049
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
1050 1051
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
1052

1053
exit(0);
1054

1055
sub cleanup()
1056
{
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
1067 1068 1069 1070 1071 1072
	#
	# If the modify fails, and the experiment is swapped out, then
	# insert a record for that since that is different then a modify
	# that fails, but results in the experiment being restored the
	# way it was. 
	#
1073
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
1074 1075 1076 1077
	
	if ($modifySwapped) {
	    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0);
	}
1078 1079
    }

1080 1081 1082 1083 1084 1085 1086
    #
    # Clear backup state since not needed anymore; experiment is toast. 
    # 
    if ($inout eq "modify") {
	TBExptClearBackupState($pid, $eid);
    }

Chad Barb's avatar
 
Chad Barb committed
1087
    #
1088
    # If hosed, we entirely terminate the experiment.
Chad Barb's avatar
 
Chad Barb committed
1089
    #
1090
    if ($modifyHosed) {
1091 1092
	my $stat = 0;
	    
Chad Barb's avatar
 
Chad Barb committed
1093
	#
1094
	# Note: $estate is indeed still set appropriately!
Chad Barb's avatar
 
Chad Barb committed
1095 1096
	#
	if ($estate eq EXPTSTATE_ACTIVE) {
1097 1098 1099 1100
	    my $optarg = "-force";
    
	    print "Running 'tbswap out $optarg $pid $eid'\n";
	    if (system("$tbdir/tbswap out $optarg $pid $eid") != 0) {
Chad Barb's avatar
 
Chad Barb committed
1101
		print "tbswap out failed!\n";
1102
		$stat = $? >> 8;
Chad Barb's avatar
 
Chad Barb committed
1103
	    }
Leigh B. Stoller's avatar