swapexp.in 33.6 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2005 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10
use English;
use Getopt::Std;
11
use POSIX qw(isatty setsid);
12 13

#
Chad Barb's avatar
Chad Barb committed
14
# This gets invoked from the Web interface.
Chad Barb's avatar
 
Chad Barb committed
15
# Swap an experiment in, swap it out, restart or modify.
16
#
Chad Barb's avatar
Chad Barb committed
17

18 19
sub usage()
{
20
    print(STDERR
21
	  "Usage: swapexp [-q] [-b | -w] [-i | -a | -f] [-r] [-e]\n".
22 23 24 25
	  "               <-s in | out | restart | modify | pause>\n".
	  "               <pid> <eid> [<nsfile>]\n".
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment swap/modify\n".
26
	  "-q       - be less chatty\n".
27 28 29 30 31 32
	  "-r       - reboot nodes when doing a modify experiment\n".
	  "-e       - restart event scheduler when doing a modify experiment\n".
	  "-s <op>  - Operation to perform; one of those listed above\n".
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n".
	  "<nsfile> - Optional NS file to parse for experiment modify\n");
33 34
    exit(-1);
}
35
my  $optlist = "biafres:wq";
36

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

61 62 63 64 65 66
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
my $TBLOGS = "@TBLOGSEMAIL@";
67
my $TBINFO = "$TB/expinfo";
68
my $TBDOCBASE = "@TBDOCBASE@";
69
my $TBBASE = "@TBBASE@";
70 71 72 73 74 75 76 77

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;

78 79 80 81 82 83 84 85 86
# Be careful not to exit on transient error; 0 means infinite retry.
$libdb::DBQUERY_MAXTRIES = 0;

# For the END block below.
my $cleaning = 0;
my $justexit = 1;
my $signaled = 0;

my $tbdir    = "$TB/bin";
87
my $tbdata   = "tbdata";
88
my $batch    = 0;
89
my $idleswap = 0;
90 91
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
92
my $reboot   = 0;
93
my $waitmode = 0;
94
my $quiet    = 0;
95
my $eventsys_restart   = 0;
96
my $errorstat= -1;
97 98
my $modifyHosed   = 0;
my $modifySwapped = 0;
Chad Barb's avatar
 
Chad Barb committed
99

100 101 102 103 104
my $inout;
my $logname;
my $dbuid;
my $user_name;
my $user_email;
105
my @allnodes;
106
my @row;
107
my $action;
108
my $nextswapstate;
109
my $termswapstate;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
110
my $isadmin  = 0;
Chad Barb's avatar
 
Chad Barb committed
111

112 113 114
#
# Untaint the path
# 
115
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
116 117 118 119 120 121 122
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

123 124 125 126 127 128 129
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

130 131 132 133 134 135 136 137
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
138 139 140
if (defined($options{"i"})) {
    $idleswap = 1;
}
141 142 143
if (defined($options{"w"})) {
    $waitmode = 1;
}
144 145 146 147 148 149
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
150 151 152
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
 
Chad Barb committed
153 154 155
if (defined($options{"r"})) {
    $reboot = 1;
}
156 157 158
if (defined($options{"e"})) {
    $eventsys_restart = 1;
}
159 160 161
if (defined($options{"q"})) {
    $quiet = 1;
}
162 163 164
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
165 166 167
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
168
	$inout ne "pause"   &&
Chad Barb's avatar
 
Chad Barb committed
169
	$inout ne "modify") {
170 171 172 173 174 175 176
	usage();
    }
}
else {
    usage();
}

177 178 179 180 181
usage()
    if (($waitmode && $batch) ||
	($inout ne "modify" && @ARGV != 2) ||
	(($waitmode || $batch) && ($idleswap || $autoswap || $force)));

182 183 184 185 186
if ($eventsys_restart && $inout ne "modify") {
    print STDOUT "Usage: swapexp: -e (eventsys_restart) can be used ".
                 "only with -s modify\n";
    usage();
}
Chad Barb's avatar
 
Chad Barb committed
187 188 189
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

190 191 192
#
# Untaint the arguments.
#
193
if ($pid =~ /^([-\w\.]+)$/) {
194 195 196 197 198
    $pid = $1;
}
else {
    die("Tainted argument $pid!\n");
}
199
if ($eid =~ /^([-\w\.]+)$/) {
200 201 202 203 204
    $eid = $1;
}
else {
    die("Tainted argument $eid!\n");
}
205
my $repfile = "$eid.report";
206 207
my $workdir = TBExptWorkDir($pid, $eid);
my $userdir = TBExptUserDir($pid, $eid);
208 209 210
my $tempnsfile;
my $modnsfile;

Leigh B. Stoller's avatar
Leigh B. Stoller committed
211
if ($inout eq "modify" && @ARGV > 2) {
212 213 214 215 216
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
217
    if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
218
	$tempnsfile = $1;
219 220
    }
    else {
221 222 223 224 225 226 227 228 229 230
	die("Tainted nsfile name: $tempnsfile\n");
    }
    #
    # Called from ops interactively. Make sure NS file in /proj or /users.
    #
    # Use realpath to resolve any symlinks.
    #
    my $translated = `realpath $tempnsfile`;
    if ($translated =~ /^([-\w\.\/]+)$/) {
	$tempnsfile = $1;
231
    }
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
    else {
	die("Tainted nsfile returned by realpath: $translated\n");
    }

    #
    # The file must reside in /proj, /groups, or /users. Since this script
    # runs as the caller, regular file permission checks ensure its a file
    # the user is allowed to use. /tmp/$guid-$nsref.nsfile also allowed
    # since this script is invoked directly from web interface, which generates
    # a name that should not be guessable, so as long as it looks to be in
    # proper format, we accept it. 
    #
    if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
	! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
	! ($tempnsfile =~ /^\/proj/) &&
	! ($tempnsfile =~ /^\/groups/) &&
	! ($tempnsfile =~ /^\/users/)) {
	die("$tempnsfile does not resolve to an appropriate directory!\n");
    }

    if (! -f $tempnsfile || -z $tempnsfile || ! -r $tempnsfile) {
253 254 255
	die("*** $0:\n".
	    "    $tempnsfile does not look like an NS file!\n");
    }
256 257
    $modnsfile = "$eid-modify.ns";
}
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273

#
# Verify user and get his DB uid.
#
if (! UNIX2DBUID($UID, \$dbuid)) {
    die("*** $0:\n".
	"    You do not exist in the Emulab Database.\n");
}

#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
    die("*** $0:\n".
	"    Cannot determine your name and email address.\n");
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
274
$isadmin = TBAdmin($UID);
275 276

#
Chad Barb's avatar
 
Chad Barb committed
277
# Verify that this person can muck with the experiment.
278 279
# Note that any script down the line has to do an admin check also. 
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
280
if ($UID && !$isadmin &&
281 282
    !TBExptAccessCheck($dbuid, $pid, $eid, TB_EXPT_DESTROY)) {
    die("*** $0:\n".
Chad Barb's avatar
 
Chad Barb committed
283
	"    You do not have permission to swap or modify this experiment!\n");
284 285
}

286 287 288 289
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

290 291 292 293 294 295 296 297 298
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

299 300 301 302 303 304
#
# Temp fix; Disallow swapmod to firewalled experiments. This will come
# out later.
#
my $firewalled = TBExptFirewall($pid, $eid);

305 306 307 308 309
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
310
# the tb scripts.
311 312 313 314 315 316 317 318 319 320 321 322 323
#
DBQueryFatal("lock tables experiments write");

$query_result =
    DBQueryFatal("SELECT * FROM experiments WHERE eid='$eid' and pid='$pid'");

if (! $query_result->numrows) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid exists!\n");
}
my %hashrow = $query_result->fetchhash();
my $expt_head_login = $hashrow{'expt_head_uid'};
my $estate          = $hashrow{'state'};
324
my $batchstate      = $hashrow{'batchstate'};
325
my $expt_path       = $hashrow{'path'};
326
my $expt_locked     = $hashrow{'expt_locked'};
327
my $isbatchexpt     = $hashrow{'batchmode'};
328
my $canceled        = $hashrow{'canceled'};
329
my $linktest_level  = $hashrow{'linktest_level'};
330 331 332 333 334 335 336 337 338 339
my $swappablebit= $hashrow{'swappable'};
my $idleswapbit = $hashrow{'idleswap'};
my $autoswapbit = $hashrow{'autoswap'};
my $swappablestr= ( $swappablebit ? "Yes" : "No" );
my $idleswapstr = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr = ( $autoswapbit ? "Yes" : "No" );
my $noswap      = $hashrow{'noswap_reason'};
my $noidleswap  = $hashrow{'noidleswap_reason'};
my $idleswaptime= $hashrow{'idleswap_timeout'} / 60.0;
my $autoswaptime= $hashrow{'autoswap_timeout'} / 60.0;
340
my $rendering   = $hashrow{'prerender_pid'};
341
my $elabinelab  = $hashrow{'elab_in_elab'};
342
my $lockdown    = $hashrow{'lockdown'};
343

344 345
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
346
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
347 348 349
    $idleswaptime = $idleswap_time / 60.0;
}

350 351
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
352
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
353 354
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
355

356
if (! chdir($workdir)) {
357
    die("*** $0:\n".
358
	"    Could not chdir to $workdir: $!\n");
359 360
}

361
#
362 363 364
# This script is called from the batch daemon.
# 
if ($batch) {
365
    #
366 367 368
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be in proper state for the
    # operation requested. 
369
    #
370 371 372 373 374 375 376 377 378
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());

379 380 381 382
    die("*** $0:\n".
	"    Batch experiment $pid/$eid is locked down; cannot be swapped!\n")
	if ($lockdown);

383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
    if ($inout eq "in") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be QUEUED.\n")
	    if ($estate ne EXPTSTATE_QUEUED);
	
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid has been canceled! Aborting.\n")
	    if ($canceled);
    }
    elsif ($inout eq "out") {
	die("*** $0:\n".
	    "    Batch experiment $pid/$eid is not in the proper state!\n".
	    "    Currently $estate, but should be ACTIVE.\n")
	    if ($estate ne EXPTSTATE_ACTIVE);
398 399
    }
    else {
400 401 402 403 404 405
	die("*** $0:\n".
	    "    Improper request from batch daemon for $pid/$eid!\n");
    }
}
else {
    if ($isbatchexpt) {
406 407 408 409
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
410 411
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
412
	    if ($canceled);
413

414 415 416
	ExitWithStatus(1, "Batch experiment $pid/$eid is locked down!")
	    if ($lockdown);

417
	if ($inout eq "in") {
418
	    ExitWithStatus(1,
419 420 421 422
			   "Batch experiment $pid/$eid must be SWAPPED to\n".
			   "QUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_SWAPPED);
	    SetExpState($pid, $eid, EXPTSTATE_QUEUED);
423 424
	}
	elsif ($inout eq "out") {
425
	    ExitWithStatus(1,
426 427 428 429
			   "Batch experiment $pid/$eid must be ACTIVE or\n".
			   "ACTIVATING to swap out. Currently $estate.")
		if ($estate ne EXPTSTATE_ACTIVE &&
		    $estate ne EXPTSTATE_ACTIVATING);
430 431 432 433 434

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
435
	    TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
436 437
	}
	elsif ($inout eq "pause") {
438
	    ExitWithStatus(1,
439 440 441
			   "Batch experiment $pid/$eid must be QUEUED to\n".
			   "DEQUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_QUEUED);
442 443

	    #
444 445 446 447
	    # XXX. The batch daemon might already have the experiment, but
	    # not have shipped it off to startexp. Change the state
	    # anyway. The error will be noticed later when startexp dies,
	    # and the batch daemon gets the error back. This sucks.
448
	    #
449
	    SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
450
	}
451
	elsif ($inout eq "modify") {
452
	    ExitWithStatus(1,
453 454 455 456
			   "Batch experiment $pid/$eid must be SWAPPED or\n".
			   "ACTIVE to modify. Currently $estate.")
		if (($estate ne EXPTSTATE_SWAPPED &&
		     $estate ne EXPTSTATE_ACTIVATING) ||
457
		    $batchstate ne BATCHSTATE_UNLOCKED());
458

459 460 461
	    ExitWithStatus(1, "Cannot modify a firewalled experiment (yet).")
		if ($firewalled);

462
	    #
463
	    # Otherwise, proceed with the modify. The experiment will be
464 465
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
466 467 468 469
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
470
	    #
471 472
	    goto doit;
	}
473 474
	else {
	    die("*** $0:\n",
475
		"    Operation $inout not allowed on a batch experiment!\n");
476
	}
477 478
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
479
      doit:
480
    }
481 482 483 484 485 486 487 488 489 490
    else {
	#
	# If the cancel flag is set, then user must wait for that to
	# clear before we can do anything else.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!.\n".
		       "You must wait for that to clear before you can swap\n".
		       "or modify the experiment.\n")
	    if ($canceled);
491

Leigh B. Stoller's avatar
Leigh B. Stoller committed
492 493 494 495 496 497 498
  	#
 	# Cannot swapmod an active elabinelab experiment, yet.
 	# 
 	ExitWithStatus(1,
 		       "Experiment $pid/$eid is an active ElabInElab.\n".
 		       "You cannot modify this type of experiment while it\n".
 		       "is swapped in. We hope to support this soon.\n")
499
 	    if ($inout eq "modify" && $elabinelab &&
Leigh B. Stoller's avatar
Leigh B. Stoller committed
500 501
 		$estate ne EXPTSTATE_SWAPPED());

502 503 504 505
 	ExitWithStatus(1,
		       "Experiment $pid/$eid is locked down; cannot swap!\n")
	    if ($lockdown);

506 507 508 509 510 511 512 513 514 515 516 517 518 519
	#
	# Check the state for the various operations.
	#
	if (!$force) {
	  SWITCH: for ($inout) {
	      /^in$/i && do {
		  if ($estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped out!");
		  }
		  last SWITCH;
	      };
	      /^out$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
Leigh B. Stoller's avatar
Leigh B. Stoller committed
520
 		      $estate ne EXPTSTATE_PANICED() &&
521 522 523 524 525 526
		      $estate ne EXPTSTATE_ACTIVATING()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in ".
				     "or activating!\n");
		  }
		  
Leigh B. Stoller's avatar
Leigh B. Stoller committed
527 528 529 530 531 532 533 534 535 536 537 538
 		  #
 		  # Must be an admin person to swap out an experiment that
 		  # has had its panic button pressed.
 		  #
 		  if ($estate eq EXPTSTATE_PANICED() && !$isadmin) {
 		      ExitWithStatus(1,
 				     "Experiment $pid/$eid had its panic ".
 				     "button pressed!\n".
 				     "Only a testbed administrator can swap ".
 				     "this experiment out.");
 		  }

539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
		  if ($estate eq EXPTSTATE_ACTIVATING()) {
		      #
		      # All we can do is set the cancel flag and hope that
		      # it gets noticed. We do not wait. 
		      # 
		      TBSetCancelFlag($pid, $eid, EXPTCANCEL_SWAP);
		      
		      ExitWithStatus(0,
				     "Experiment $pid/$eid swapin has been  ".
				     "marked for cancelation.\n".
				     "You will receive email when the original ".
				     "swap request has finished.");
		  }
		  last SWITCH;
	      };
	      /^restart$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in!");
		  }
		  last SWITCH;
	      };
	      /^modify$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid must be ACTIVE or\n".
				     "SWAPPED to modify!\n");
		  }
568 569 570 571
		  ExitWithStatus(1,
				 "Cannot modify a firewalled experiment (yet).")
		      if ($firewalled);
		  
572 573 574 575 576
		  last SWITCH;
	      };
	      die("*** $0:\n".
		  "    Missing state check for action: $action\n");
	  }
577 578
	}
    }
579 580
}

581 582 583 584 585 586 587
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
588
	$nextswapstate = EXPTSTATE_ACTIVATING();
589 590 591
	last SWITCH;
    };
    /^out$/i && do {
592
	$nextswapstate = EXPTSTATE_SWAPPING();
593 594 595
	last SWITCH;
    };
    /^restart$/i && do {
596
	$nextswapstate = EXPTSTATE_RESTARTING();
597 598 599
	last SWITCH;
    };
    /^modify$/i && do {
600 601
	$nextswapstate = (($estate eq EXPTSTATE_SWAPPED()) ?
			  EXPTSTATE_MODIFY_PARSE() : EXPTSTATE_MODIFY_REPARSE());
602 603
	last SWITCH;
    };
604
    die("*** $0:\n".
605
	"    Missing state check for action: $action\n");
606
}
607 608
 
# Update idleswap_timeout to whatever the current value is.
609
if ($inout ne "out") {
610 611 612
    DBQueryFatal("update experiments set idleswap_timeout='$idleswap_time' ".
		 "where eid='$eid' and pid='$pid'");
}
613

614 615 616 617 618 619
#
# On a failure, we go back to this swapstate. Might be modified below.
# 
$termswapstate = $estate;

# Lock the record, set the nextstate, and unlock the table.
620 621 622 623 624 625 626 627 628 629
TBLockExp($pid, $eid, $nextswapstate)
    or die("*** $0:\n".
	   "Failed to set experiment state to $nextswapstate\n");

#
# At this point, we need to force a cleanup no matter how we exit.
# See the END block below.
#
$justexit = 0;

630 631 632 633
DBQueryFatal("unlock tables");

#
# XXX - At this point a failure is going to leave things in an
634 635 636 637
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
638 639
#

640 641 642 643 644 645 646 647 648
if ($inout eq "in") {
    $action = "swapped in";
}
if ($inout eq "out") {
    $action = "swapped out";
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
 
Chad Barb committed
649 650 651
if ($inout eq "modify") {
    $action = "modified";
}
652

653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $expt_head_name;
my $expt_head_email;

if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
    print STDERR "*** WARNING: ".
	         "Could not determine name/email for $expt_head_login.\n";
    $expt_head_name  = "TBOPS";
    $expt_head_email = $TBOPS;
}

668 669 670
#
# Before going to background, we have to copy out the NS file!
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
671
if ($inout eq "modify" && defined($modnsfile)) {
672 673
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
674
	fatal("Could not copy $tempnsfile to $modnsfile");
675 676 677 678
    }
    chmod(0664, "$modnsfile");
}

679 680 681 682
#
# If not in batch mode, go into the background. Parent exits.
#
if (! $batch) {
683
    $logname = TBExptCreateLogFile($pid, $eid, "swapexp");
684
    TBExptSetLogFile($pid, $eid, $logname);
685
    TBExptOpenLogFile($pid, $eid);
Chad Barb's avatar
Chad Barb committed
686

687 688
    if (my $childpid = TBBackGround($logname)) {
	#
689 690
	# Parent exits normally, unless in waitmode. We have to set
	# justexit to make sure the END block below does not run.
691
	#
692 693
	$justexit = 1;

694
	if (!$waitmode) {
695 696 697
	    print("Experiment $pid/$eid is now being $action.\n".
		  "You will be notified via email when the this is done.\n")
		if (! $quiet);
698 699
	    exit(0);
	}
700 701 702 703 704 705 706 707
	print("Waiting for experiment $eid to finish its swap${action}\n")
	    if (! $quiet);
	    
	if (isatty(STDIN) && !$quiet) {
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
	}
708 709 710 711 712 713 714 715 716 717 718 719
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

720
	#
721
	# Wait until child exits or until user gets bored and types ^C.
722
	#
723 724
	waitpid($childpid, 0);
	
725 726
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
727
	exit($? >> 8);
728
    }
729
    TBdbfork();
730 731
}

732 733 734 735 736 737 738 739
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
#
# We need to catch TERM cause sometimes shit happens and we have to kill
# an experiment swap that is hung or otherwise scrogged. Rather then 
# trying to kill off the children one by one, lets arrange to catch it
# here and send a killpg to the children. This is not to be done lightly,
# cause it can leave things worse then they were before!
#
sub handler ($) {
    my ($signame) = @_;
    
    $SIG{TERM} = 'IGNORE';
    my $pgrp = getpgrp(0);
    kill('TERM', -$pgrp);
    sleep(1);
    $signaled = 1;
    fatal("Caught SIG${signame}! Killing experiment setup ...");
}
$SIG{TERM} = \&handler;
$SIG{QUIT} = 'DEFAULT';

760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "out") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
		    TBDB_STATS_FLAGS_START);
}
elsif ($inout eq "modify") {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0,
		    TBDB_STATS_FLAGS_START);
}

776 777 778
#
# Remove old report file since its contents are going to be invalid.
#
779
if ($inout ne "restart" && -e $repfile) {
780 781 782
    unlink("$repfile");
}

783 784 785 786
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
787
    my $optarg = (($force || $idleswap) ? "-force" : "");
788

789 790
    print STDOUT "Running 'tbswap out $optarg $pid $eid'\n";
    if (system("$tbdir/tbswap out $optarg $pid $eid") != 0) {
791
	$errorstat = $? >> 8;
792
	fatal("tbswap out failed!");
793
    }
794 795
    SetExpState($pid, $eid, EXPTSTATE_SWAPPED)
	or fatal("Failed to set experiment state to " . EXPTSTATE_SWAPPED());
Leigh B. Stoller's avatar
Leigh B. Stoller committed
796
    TBExptClearPanicBit($pid, $eid);
797
}
798
elsif ($inout eq "in") {
799 800 801
    my $optarg = "";
    
    print STDOUT "Running 'tbswap in $optarg $pid $eid'\n";
Chad Barb's avatar
 
Chad Barb committed
802
    if (system("$tbdir/tbswap in $pid $eid") != 0) {
803
	$errorstat = $? >> 8;
804
	fatal("tbswap in failed!");
805
    }
806 807
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE)
	or fatal("Failed to set experiment state to " . EXPTSTATE_ACTIVE());
808

809
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
Chad Barb's avatar
Chad Barb committed
810
}
Chad Barb's avatar
 
Chad Barb committed
811
elsif ($inout eq "modify") {
812
    my $modifyError;
Chad Barb's avatar
Chad Barb committed
813

814 815 816
    GatherSwapStats($pid, $eid, $dbuid,
		    TBDB_STATS_SWAPMODIFY, 0, TBDB_STATS_FLAGS_PREMODIFY);

Chad Barb's avatar
Chad Barb committed
817
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
818
    if (TBExptBackupVirtualState($pid, $eid)) {
819
	fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
820 821 822
    }

    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
823
    # Rerun tbprerun if modifying, but only if new NS file provided.
824 825
    # Yep, we allow reswap without changing the NS file. For Shashi and SIM.
    # Note that tbprerun kills the renderer if its running.
Chad Barb's avatar
Chad Barb committed
826
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
827 828 829
    if (defined($modnsfile)) {
	print STDOUT "Running 'tbprerun $pid $eid $modnsfile'\n";
	if (system("$tbdir/tbprerun $pid $eid $modnsfile") != 0) {
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
	    print STDOUT "Modify Error: tbprerun failed.\n";
	    print STDOUT "Recovering experiment state...\n";

	    if (TBExptRemoveVirtualState($pid, $eid) ||
		TBExptRestoreVirtualState($pid, $eid)) {
		$modifyHosed = 1;
		fatal("Experiment state could not be restored!");
		# Never returns;
	    }
	    #
	    # If the renderer was running when we started the swapmod, then we
	    # want to restart it. If it was stopped, then the renderer info
	    # was captured with the rest of the virtual state (restored above).
	    #
	    system("prerender -t $pid $eid")
		if ($rendering);

	    fatal("Update aborted; old virtual state restored.");
	    # Never returns;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
849
	}
Chad Barb's avatar
Chad Barb committed
850 851
    }

Chad Barb's avatar
 
Chad Barb committed
852
    #
853
    # Our next state depends on whether the experiment was active or swapped.
Chad Barb's avatar
 
Chad Barb committed
854
    #
855 856 857 858 859
    if ($estate eq EXPTSTATE_SWAPPED) {
	SetExpState($pid, $eid, EXPTSTATE_SWAPPED);
    }
    else {
	SetExpState($pid, $eid, EXPTSTATE_MODIFY_RESWAP);
860 861 862 863 864 865 866 867 868 869 870 871 872

	my $optarg = "";
	#
	# For elabinelab experiments; ignore reboot/eventsys_restart,
	# and force noreconfig; none of it will work or make sense. 
	#
	if ($elabinelab) {
	    $optarg = "-noreconfig";
	}
	else {
	    $optarg  = ($reboot ? "-reboot" : "");
	    $optarg .= ($eventsys_restart ? " -eventsys_restart" : "");
	}
873

874 875 876 877 878 879 880 881 882 883 884
	print STDOUT "Running 'tbswap update $optarg $pid $eid'\n";
	if (system("$tbdir/tbswap update $optarg $pid $eid") == 0) {
	    #
	    # Success. Set the state back to active cause thats where it started.
	    # 
	    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
	    $estate = EXPTSTATE_ACTIVE;
	}
	else {
	    $modifyError = $errorstat = $? >> 8;
	    print STDOUT "Modify Error: tbswap update failed.\n";
Chad Barb's avatar
 
Chad Barb committed
885

886
	    #
887 888 889
	    # tbswap either restored the experiment to the way it was,
	    # or it swapped it out completely. In either case, it has
	    # also restored the virtual state. 
890
	    # 
891 892 893 894 895 896 897
	    # Icky. Magic return code that says tbswap swapped it out.
	    # We do not want tbswap to muck with states anymore, so
	    # need to know what it did. At some point we should clean
	    # up the exit reporting! Anyway, fatal() needs to know the
	    # the right state to go back to (no longer ACTIVE).
	    #
	    if ($errorstat & 0x40) {
898 899
		$estate = EXPTSTATE_SWAPPED;
		$termswapstate = EXPTSTATE_SWAPPED;
900
		$modifySwapped = 1;
901 902
                # Old accounting info.
		TBSetExpSwapTime($pid, $eid);
903 904 905 906
		$modifyError = "Update aborted; experiment swapped out.";
	    }
	    else {
		$modifyError = "Update aborted; old state restored.";
Chad Barb's avatar
 
Chad Barb committed
907
	    }
908
	}
Chad Barb's avatar
Chad Barb committed
909 910
    }

911 912 913 914 915 916 917 918 919
    #
    # We need to rerender only if the NS file was changed (ran tbprerun),
    # If the swapmod succeeded, then unconditionally run the renderer. If
    # swap failed, then need to run the renderer only if we stopped one in
    # progress.
    #
    if (defined($modnsfile)) {
	system("prerender -t $pid $eid")	
	    if (!defined($modifyError) || $rendering)
Chad Barb's avatar
 
Chad Barb committed
920
    }
921 922 923 924 925 926 927

    #
    # Swapmod failed ...
    #
    fatal($modifyError)
	if (defined($modifyError));

928
    TBExptClearBackupState($pid, $eid);
929
    system("$tbdir/tbreport -b $pid $eid 2>&1 > $repfile");
930
}
Chad Barb's avatar
 
Chad Barb committed
931
else { # $inout eq "restart" assumed.
932
    print STDOUT "Running 'tbrestart $pid $eid'\n";
933
    if (system("$tbdir/tbrestart $pid $eid") != 0) {
934
	fatal("tbrestart failed!");
935
    }
936
    SetExpState($pid, $eid, EXPTSTATE_ACTIVE);
937
}
938

939 940 941 942 943 944 945 946 947 948
#
# Try to copy off the files for testbed information gathering.
#
TBSaveExpLogFiles($pid, $eid);

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files. This overwrites existing files of course,
# but thats okay.
#
949
system("cp -Rfp $workdir/ $userdir/tbdata/");
950

951 952 953 954 955 956 957 958 959 960 961
#
# Deal with linktest. If requested, swapping in or modifying, and experiment
# is indeed active, then run it!
#
# XXX - linktest uses files from $userdir/tbdata/, so the above cp must
#       happen first!
#
if ($linktest_level && ExpState($pid, $eid) eq EXPTSTATE_ACTIVE) {
    #
    # Run it. No worries about failures.
    #
962
    my $output = "$userdir/logs/linktest.log";
963 964
    my $optarg = "-l $linktest_level -o $output";
    
965 966
    print STDOUT "Running 'linktest_control $optarg $pid $eid'\n";
    if (system("$TB/sbin/linktest_control $optarg $pid $eid") != 0) {
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983
	system("cat $output")
	    if (-r $output);
	
	print STDERR "*** WARNING: ".
	             "Linktest run returned non-zero status!\n";
	
	SENDMAIL("$user_name <$user_email>",
		 "Linktest Failure: $pid/$eid",
		 "Failure in linktest (level $linktest_level); ".
		 "returned non-zero status",
		 "$user_name <$user_email>",
		 "Cc: $expt_head_name <$expt_head_email>\n".
		 "Cc: $TBOPS",
		 ($output));
    }
}

984 985 986 987
#
# Gather stats. 
#
if ($inout eq "in") {
988
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, 0);
989 990
}
elsif ($inout eq "out") {
991
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0,
992
		    ($idleswap ? TBDB_STATS_FLAGS_IDLESWAP() : 0));
993 994
}
elsif ($inout eq "modify") {
995
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, 0);
996
}
997 998
# Old accounting info.
TBSetExpSwapTime($pid, $eid);
999

1000 1001 1002 1003 1004 1005
#
# Set the swapper uid on success only, and *after* gathering swap stats!
#
TBExptSetSwapUID($pid, $eid, $dbuid);

#
1006
# In batch mode, just exit without sending email or unlocking. The
1007
# batch daemon will take care of that.
1008 1009 1010 1011 1012
#
if ($batch) {
    exit(0);
}

1013 1014 1015 1016 1017 1018 1019
#
# Clear the log file so the web page stops spewing. 
#
if (defined($logname)) {
    TBExptCloseLogFile($pid, $eid);
}

1020 1021 1022
#
# Must unlock before exit.
#
1023
TBUnLockExp($pid, $eid);
1024 1025 1026 1027 1028

#
# Since the swap completed, clear the cancel flag. This must be done
# after we change the experiment state (above). 
#
1029
TBSetCancelFlag($pid, $eid, EXPTCANCEL_CLEAR);
1030 1031 1032

print "Swap Success!\n";

1033 1034 1035 1036
#
# Send email notification to user.
#
my $message =
1037 1038
    "Experiment $eid in project $pid has been ";

1039
if ($inout eq "out" && ($idleswap || $autoswap || $force) ) {
1040
    $message .= "forcibly swapped out by\nEmulab";
1041 1042 1043 1044 1045
    if ($idleswap) {
	$message .= " because it was idle for too long (Idle-Swap).\n".
	  "(See also the Idle-Swap info in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($autoswap) {
1046 1047
	$message .= " because it exceeded its Maximum Duration.\n".
	  "(See also the Max. Duration info in \n".
1048 1049 1050 1051 1052
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    } elsif ($force) {
	$message .= ". (See also our Node Usage Policies in \n".
	  "$TBDOCBASE/docwrapper.php3?docname=swapping.html )\n";
    }
1053 1054 1055 1056 1057
}
else {
    $message .= "$action.\n";
}

1058 1059 1060 1061 1062
if ($inout eq "in") {
    # Add the swap settings...
    $message .="\nCurrent swap settings:\n$swapsettings";
}

1063 1064
$message .=
    "\n".
1065 1066
    "Appended below is the output. If you have any questions or comments,\n" .
    "please include the output in your message to $TBOPS\n";
1067 1068

SENDMAIL("$user_name <$user_email>",
1069
	 "Experiment $pid/$eid \u$action",
1070
	 $message,
1071
	 ($idleswap ? $TBOPS : "$user_name <$user_email>"),
1072 1073
	 "Cc:  $expt_head_name <$expt_head_email>\n".
	 "Bcc: $TBLOGS",
1074 1075
	 (($inout eq "restart") ? ($logname) :
	  (($repfile, $logname), (defined($modnsfile) ? ($modnsfile) : ()))));
1076

1077
exit(0);
1078

1079
sub cleanup()
1080
{
1081 1082 1083 1084 1085 1086 1087 1088 1089 1090
    #
    # Gather stats. 
    #
    if ($inout eq "in") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPIN, $errorstat);
    }
    elsif ($inout eq "out") {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, $errorstat);
    }
    elsif ($inout eq "modify") {
1091 1092 1093 1094 1095 1096
	#
	# If the modify fails, and the experiment is swapped out, then
	# insert a record for that since that is different then a modify
	# that fails, but results in the experiment being restored the
	# way it was. 
	#
1097
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPMODIFY, $errorstat);
1098 1099 1100 1101
	
	if ($modifySwapped) {
	    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_SWAPOUT, 0);
	}