swapexp.in 47.4 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3 4

#
# EMULAB-COPYRIGHT
5
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
6 7 8
# All rights reserved.
#

9 10
use English;
use Getopt::Std;
11
use POSIX qw(isatty setsid);
12
use RPC::XML;
13 14

#
Chad Barb's avatar
Chad Barb committed
15
# This gets invoked from the Web interface.
Chad Barb's avatar
 
Chad Barb committed
16
# Swap an experiment in, swap it out, restart or modify.
17
#
Chad Barb's avatar
Chad Barb committed
18

19 20
sub usage()
{
21
    print(STDERR
22
	  "Usage: swapexp [-q] [-b | -w] [-i | -a | -f] [-r] [-e] [-N]\n".
23 24 25 26
	  "               <-s in | out | restart | modify | pause>\n".
	  "               <pid> <eid> [<nsfile>]\n".
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment swap/modify\n".
27
	  "-q       - be less chatty\n".
28 29 30
	  "-r       - reboot nodes when doing a modify experiment\n".
	  "-e       - restart event scheduler when doing a modify experiment\n".
	  "-s <op>  - Operation to perform; one of those listed above\n".
31
	  "-N       - Suppress most email to the user and testbed-ops\n".
32 33 34
	  "<pid>    - The project the experiment belongs to\n".
	  "<eid>    - The experiment name (id)\n".
	  "<nsfile> - Optional NS file to parse for experiment modify\n");
35 36
    exit(-1);
}
37
my  $optlist = "biafres:wqxgNXn";
38

39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
54
	tbdie($message);
55
    }
56
    elsif ($status > 0) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
57
	tbnotice($message);
58
    }
59 60 61
    else {
	tbinfo($message);
    }
62 63 64
    exit($status);
}

65 66 67 68 69
#
# Configure variables
#
my $TB     = "@prefix@";
my $TBOPS  = "@TBOPSEMAIL@";
70
my $TBROBOCOPS = "@TBROBOCOPSEMAIL@";
71
my $TBLOGS = "@TBLOGSEMAIL@";
72
my $TBINFO = "$TB/expinfo";
73
my $TBDOCBASE = "@TBDOCBASE@";
74
my $TBBASE = "@TBBASE@";
75
my $CONTROL  = "@USERNODE@";
76 77 78 79 80 81 82

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
Kevin Atkinson's avatar
 
Kevin Atkinson committed
83
use libtblog;
84
use libArchive;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
85
use Template;
86
use Experiment;
87
use User;
88

89 90 91 92 93 94
# For the END block below.
my $cleaning = 0;
my $justexit = 1;
my $signaled = 0;

my $tbdir    = "$TB/bin";
95
my $tbdata   = "tbdata";
96
my $checkquota = "$TB/sbin/checkquota";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
97
my $vtopgen  = "$TB/bin/vtopgen";
98
my $batch    = 0;
99
my $idleswap = 0;
100 101
my $autoswap = 0;
my $force    = 0;
Chad Barb's avatar
Chad Barb committed
102
my $reboot   = 0;
103
my $waitmode = 0;
104
my $quiet    = 0;
105
my $genimode = 0;
106 107
my $noswapout= 0;
my $noreconfig=0;
108
my $eventsys_restart   = 0;
109
my $errorstat= -1;
110 111
my $modifyHosed   = 0;
my $modifySwapped = 0;
112
my $robotexp = 0;
113
my $template_node = 0;
114
my $noemail      = 0;
115
my $xmlout       = 0;
Chad Barb's avatar
 
Chad Barb committed
116

117
my $inout;
118
my $logfile;
119
my $logname;
120
my @allnodes;
121
my @row;
122
my $action;
123
my $tag;
124
my $nextswapstate;
125
my $termswapstate;
Chad Barb's avatar
 
Chad Barb committed
126

Kevin Atkinson's avatar
 
Kevin Atkinson committed
127 128
my $modifyError; # needed when emailing error

129
# Protos
130
sub fatal($;$);
131 132 133
sub CheckFWinfo($);
sub GatherFWinfo();
		
134 135 136
#
# Untaint the path
# 
137
$ENV{'PATH'} = "/bin:/usr/bin:$TB/libexec/vis";
138 139 140 141 142 143 144
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

145 146 147 148 149 150 151
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

152 153 154 155 156 157 158 159
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
160 161 162
if (defined($options{"i"})) {
    $idleswap = 1;
}
163 164 165
if (defined($options{"w"})) {
    $waitmode = 1;
}
166 167 168 169 170 171
if (defined($options{"a"})) {
    $autoswap = 1;
}
if (defined($options{"f"})) {
    $force = 1;
}
172
if (defined($options{"g"})) {
173 174 175 176 177 178
    $genimode  = 1;
    $noswapout = 1;
}
if (defined($options{"n"})) {
    $noswapout  = 1;
    $noreconfig = 1;
179
}
180 181 182
if (defined($options{"b"})) {
    $batch = 1;
}
Chad Barb's avatar
 
Chad Barb committed
183 184 185
if (defined($options{"r"})) {
    $reboot = 1;
}
186 187 188
if (defined($options{"e"})) {
    $eventsys_restart = 1;
}
189 190 191
if (defined($options{"q"})) {
    $quiet = 1;
}
192 193 194
if (defined($options{"x"})) {
    $template_mode = 1;
}
195 196 197
if (defined($options{"N"})) {
    $noemail = 1;
}
198 199 200 201
if (defined($options{"X"})) {
    $quiet = 1;
    $xmlout = 1;
}
202 203 204
if (defined($options{"s"})) {
    $inout = $options{"s"};

Chad Barb's avatar
Chad Barb committed
205 206 207
    if ($inout ne "out"     &&
	$inout ne "in"      &&
	$inout ne "restart" &&
208
	$inout ne "pause"   &&
Chad Barb's avatar
 
Chad Barb committed
209
	$inout ne "modify") {
210 211 212 213 214 215 216
	usage();
    }
}
else {
    usage();
}

217 218 219 220 221
usage()
    if (($waitmode && $batch) ||
	($inout ne "modify" && @ARGV != 2) ||
	(($waitmode || $batch) && ($idleswap || $autoswap || $force)));

222 223 224 225 226
if ($eventsys_restart && $inout ne "modify") {
    print STDOUT "Usage: swapexp: -e (eventsys_restart) can be used ".
                 "only with -s modify\n";
    usage();
}
Chad Barb's avatar
 
Chad Barb committed
227 228 229
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];

230 231 232
#
# Untaint the arguments.
#
233
if ($pid =~ /^([-\w\.]+)$/) {
234 235 236
    $pid = $1;
}
else {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
237
    tbdie("Tainted argument $pid!");
238
}
239
if ($eid =~ /^([-\w\.]+)$/) {
240 241 242
    $eid = $1;
}
else {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
243
    tbdie("Tainted argument $eid!");
244
}
245
my $repfile = "tbreport.log";
246 247
my $tempnsfile;
my $modnsfile;
248
my $nsfile;
249

Leigh B. Stoller's avatar
Leigh B. Stoller committed
250
if ($inout eq "modify" && @ARGV > 2) {
251 252 253 254 255
    $tempnsfile = $ARGV[2];

    #
    # Untaint nsfile argument; Allow slash.
    #
256
    if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
257
	$tempnsfile = $1;
258 259
    }
    else {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
260
	tbdie("Tainted nsfile name: $tempnsfile");
261 262 263 264 265 266 267 268 269
    }
    #
    # Called from ops interactively. Make sure NS file in /proj or /users.
    #
    # Use realpath to resolve any symlinks.
    #
    my $translated = `realpath $tempnsfile`;
    if ($translated =~ /^([-\w\.\/]+)$/) {
	$tempnsfile = $1;
270
    }
271
    else {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
272
	tbdie("Tainted nsfile returned by realpath: $translated");
273 274 275
    }

    #
276 277 278 279 280 281 282
    # The file must reside in an acceptible location. Since this script
    # runs as the caller, regular file permission checks ensure it is a
    # file the user is allowed to use.   So we don't have to be too tight
    # with the RE matching /tmp and /var/tmp files.  Note that
    # /tmp/$guid-$nsref.nsfile is also allowed since this script is
    # invoked directly from web interface which generates a name that
    # should not be guessable.
283 284 285
    #
    if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
	! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
286 287
	! TBValidUserDir($tempnsfile, 0)) {
	tbdie("$tempnsfile does not resolve to an allowed directory!");
288 289 290
    }

    if (! -f $tempnsfile || -z $tempnsfile || ! -r $tempnsfile) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
291
	tbdie("$tempnsfile does not look like an NS file!");
292
    }
293 294
    $nsfile    = "$eid.ns";
    $modnsfile = "${eid}-modify.ns";
295
}
296

297 298 299 300 301 302 303 304
my $experiment = Experiment->Lookup($pid, $eid);
if (! $experiment) {
    die("*** $0:\n".
	"    No such experiment $pid/$eid in the Emulab Database.\n");
}

my $workdir = $experiment->WorkDir();
my $userdir = $experiment->UserDir();
305
my $infodir = $experiment->InfoDir();
306

307 308 309 310
#
# See if this is a template instance; error if the -x option not provided,
# since that means user is trying to self-terminate; not in the program.
#
311
if (my $instance = Template::Instance->LookupByExptidx($experiment->idx())) {
312
    if ($inout ne "in" && !$template_mode) {
313 314 315 316 317
	die("*** $0:\n".
	    "    $pid/$eid is a template instance; use another command\n");
    }
}

318 319 320
# Sanity check.
if (! (-e $workdir && -e $userdir)) {
    die("*** $0:\n".
321
	"    $pid/$eid is missing a critical directory! Stopping now ...\n");
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
}
if (! -e $infodir) {
    if (! -e "$TB/expinfo/$pid") {
	die("*** $0:\n".
	    "    $TB/expinfo/$pid has not been created yet!\n".
	    "    Did you run 'fixexpinfo' like you were supposed to?\n");
    }
    #
    # This is not going to happen unless a devel tree has been
    # mixed with the main tree (or another devel tree). Try to
    # recover, bail of not possible.
    #
    if (system("/bin/mkdir -m 777 -p $infodir")) {
	die("*** $0:\n".
	    "    $infodir was missing and cannot be created!\n");
    }
}

340 341 342 343 344
# XXX Hack for geni mode.
if ($genimode) {
    $experiment->SetState(EXPTSTATE_ACTIVE);
}

345
#
346
# Verify user and get his DB uid and other info for later.
347
#
348 349 350
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
351
}
352 353 354 355 356
my $user_dbid  = $this_user->dbid();
my $user_uid   = $this_user->uid();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
my $isadmin    = TBAdmin();
357

358 359 360 361 362 363 364 365 366 367 368
#
# Get email address of the swapper/creator, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
#
my $swapper = $experiment->GetSwapper();
$swapper = $experiment->GetCreator()
    if (!defined($swapper));
my $expt_head_name  = $swapper->name();
my $expt_head_email = $swapper->email();

Kevin Atkinson's avatar
 
Kevin Atkinson committed
369 370 371 372 373
#
# Set error reporting info
#
tblog_set_info($pid,$eid,$UID);

374
#
Chad Barb's avatar
 
Chad Barb committed
375
# Verify that this person can muck with the experiment.
376 377
# Note that any script down the line has to do an admin check also. 
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
378
if ($UID && !$isadmin &&
379
    ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
380
    tbdie("You do not have permission to swap or modify this experiment!");
381 382
}

383 384 385 386
# Must do this before lock tables!
# idleswap is in minutes, threshold is in hours
$idleswap_time = 60 * TBGetSiteVar("idle/threshold");

387 388 389 390 391 392 393 394 395
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

396 397 398
#
# Check for overquota; we deal with it below, cause of the batch system.
#
399
my $overquota = system("$checkquota $user_uid");
400 401

if ($overquota) {
402 403
    tberror({cause => 'user', severity => SEV_WARNING,
	     error => ['over_disk_quota', $CONTROL]},
404 405
	    "You are over your disk quota on $CONTROL; ".
	    "please login there and cleanup!");
406 407
}

408 409 410 411
#
# Temp fix; Disallow swapmod to firewalled experiments. This will come
# out later.
#
412
my $firewalled = $experiment->IsFirewalled();
413

414 415 416 417 418 419 420 421 422 423
#
# see if we've got a robot exp (this isn't the only check; if this is a
# swapmod, we've got to check tbprerun as well...
#
$robotexp = 
    TBExptContainsNodeCT($pid,$eid,'robot') || 
    TBExptContainsNodeCT($pid,$eid,'mote') || 
    TBExptContainsNodeCT($pid,$eid,'motehost') || 
    TBExptContainsNodeCT($pid,$eid,'powermon');

424 425 426 427 428
#
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
# terminating consists of a couple of different experiment states down inside
Chad Barb's avatar
Chad Barb committed
429
# the tb scripts.
430
#
431
$experiment->LockTables() == 0
432 433 434 435 436 437 438
    or die("*** $0:\n".
	   "    Could not lock experiment tables for $pid/$eid!\n");

my $estate          = $experiment->state();
my $batchstate      = $experiment->batchstate();
my $expt_path       = $experiment->path();
my $isbatchexpt     = $experiment->batchmode();
439
my $canceled        = $experiment->canceled();
440 441 442 443 444 445 446 447 448 449 450 451 452 453
my $expt_locked     = $experiment->locked();
my $elabinelab      = $experiment->elabinelab();
my $lockdown        = $experiment->lockdown();
my $swappablebit    = $experiment->swappable();
my $idleswapbit     = $experiment->idleswap();
my $autoswapbit     = $experiment->autoswap();
my $swappablestr    = ( $swappablebit ? "Yes" : "No" );
my $idleswapstr     = ( $idleswapbit ? "Yes" : "No" );
my $autoswapstr     = ( $autoswapbit ? "Yes" : "No" );
my $noswap          = $experiment->noswap_reason();
my $noidleswap      = $experiment->noidleswap_reason();
my $idleswaptime    = $experiment->idleswap_timeout() / 60.0;
my $autoswaptime    = $experiment->autoswap_timeout() / 60.0;
my $rendering       = $experiment->prerender_pid();
454

455 456
if ($inout ne "out") {
    # I'm going to update this below, so fix the value before I use it.
457
    $idleswap_time = min($idleswaptime * 60, $idleswap_time);
458 459 460
    $idleswaptime = $idleswap_time / 60.0;
}

461 462
my $swapsettings = 
  "Idle-Swap:   $idleswapstr".
463
  ($idleswapbit ? ", at $idleswaptime hours\n" : " (Reason: $noidleswap)\n").
464 465
  "Auto-Swap:   $autoswapstr".
  ($autoswapbit ? ", at $autoswaptime hours\n" : "\n");
466

467
if (! chdir($workdir)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
468
    tbdie("Could not chdir to $workdir: $!");
469 470
}

471
#
472 473 474
# This script is called from the batch daemon.
# 
if ($batch) {
475
    #
476 477 478
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be in proper state for the
    # operation requested. 
479
    #
Kevin Atkinson's avatar
 
Kevin Atkinson committed
480
    tbdie("Experiment $pid/$eid is supposed to be a batch experiment!")
481 482
	if (!$isbatchexpt);
    
Kevin Atkinson's avatar
 
Kevin Atkinson committed
483
    tbdie("Batch experiment $pid/$eid should be locked!")
484 485
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());
Kevin Atkinson's avatar
 
Kevin Atkinson committed
486 487
    
    tbdie("Batch experiment $pid/$eid is locked down; cannot be swapped!")
488 489
	if ($lockdown);

490
    if ($inout eq "in") {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
491 492
	tbdie("Batch experiment $pid/$eid is not in the proper state!\n".
	      "Currently $estate, but should be QUEUED.")
493 494
	    if ($estate ne EXPTSTATE_QUEUED);
	
495 496 497
	tbdie({cause => 'canceled', severity => SEV_IMMEDIATE,
	       error => ['cancel_flag']},
	      "Batch experiment $pid/$eid has been canceled! Aborting.")
498
	    if ($canceled);
499 500

	# Do not allow it to swap in. What about swapout? 
501 502 503
	tbdie({type => 'primary', severity => SEV_ERROR,
	       error => ['over_disk_quota', $CONTROL]},
	      "Batch experiment cannot swap in when over quota! Aborting.")
504
	    if ($overquota);
505 506
    }
    elsif ($inout eq "out") {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
507 508
	tbdie("Batch experiment $pid/$eid is not in the proper state!\n".
	      "Currently $estate, but should be ACTIVE.")
509
	    if ($estate ne EXPTSTATE_ACTIVE);
510 511
    }
    else {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
512
	tbdie("Improper request from batch daemon for $pid/$eid!\n");
513 514 515 516
    }
}
else {
    if ($isbatchexpt) {
517 518 519 520
	#
	# User is requesting that a batch either be injected or paused.
	# Sanity check the state, but otherwise let the batch daemon
	# handle it.
521 522
	#
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
523
	    if ($canceled);
524

525 526 527
	ExitWithStatus(1, "Batch experiment $pid/$eid is locked down!")
	    if ($lockdown);

528
	if ($inout eq "in") {
529
	    ExitWithStatus(1,
530 531 532
			   "Batch experiment $pid/$eid must be SWAPPED to\n".
			   "QUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_SWAPPED);
533

534 535 536 537 538 539 540 541
	    #
	    # For the moment, only the creator of the batch can queue it. This
	    # avoids all kinds of problems inside the batch daemon, with it
	    # not knowing who to swap the experiment in as. This is a temporary
	    # solution, at least until I have time to decide what is correct.
	    #
	    ExitWithStatus(1,
			   "Only the creator of a batch experiment can queue it!")
542
		if ($experiment->creator() ne $user_uid);
543

544 545 546 547 548 549
	    if ($overquota) {
		tbreport(SEV_ERROR, 'over_disk_quota', $CONTROL);
		ExitWithStatus(1,
			       "Batch experiment $pid/$eid cannot swap in when ".
			       "over quota!\n")
	    }
550
	    
551
	    $experiment->SetState(EXPTSTATE_QUEUED);
552 553
	}
	elsif ($inout eq "out") {
554
	    ExitWithStatus(1,
555 556 557 558
			   "Batch experiment $pid/$eid must be ACTIVE or\n".
			   "ACTIVATING to swap out. Currently $estate.")
		if ($estate ne EXPTSTATE_ACTIVE &&
		    $estate ne EXPTSTATE_ACTIVATING);
559 560 561 562 563

	    #
	    # Since the batch daemon has control, all we can do is set
	    # the cancel bit.
	    # 
564
	    $experiment->SetCancelFlag(EXPTCANCEL_SWAP);
565 566
	}
	elsif ($inout eq "pause") {
567
	    ExitWithStatus(1,
568 569 570
			   "Batch experiment $pid/$eid must be QUEUED to\n".
			   "DEQUEUE. Currently $estate.")
		if ($estate ne EXPTSTATE_QUEUED);
571 572

	    #
573
	    # XXX. The batch daemon might already have the experiment, but
574 575
	    # not have shipped it off to startexp. Use a cancel flag since
	    # that is the only consistent mechanism to tell the batch daemon
576 577 578 579
	    # what it should do. Otherwise, we can just change its state
	    # to yank it from the queue.
	    #
	    if ($batchstate ne BATCHSTATE_UNLOCKED()) {
580
		$experiment->SetCancelFlag(EXPTCANCEL_DEQUEUE);
581 582
	    }
	    else {
583
		$experiment->SetState(EXPTSTATE_SWAPPED);
584
	    }
585
	}
586
	elsif ($inout eq "modify") {
587
	    ExitWithStatus(1,
588 589 590 591
			   "Batch experiment $pid/$eid must be SWAPPED or\n".
			   "ACTIVE to modify. Currently $estate.")
		if (($estate ne EXPTSTATE_SWAPPED &&
		     $estate ne EXPTSTATE_ACTIVATING) ||
592
		    $batchstate ne BATCHSTATE_UNLOCKED());
593

594 595 596
	    ExitWithStatus(1,
			"Cannot modify an active firewalled experiment (yet).")
		if ($firewalled && $estate ne EXPTSTATE_SWAPPED && !$isadmin);
597

598 599
	    ExitWithStatus(1,
			"Cannot modify an active ElabInElab experiment (yet).")
600
		if ($elabinelab && $estate ne EXPTSTATE_SWAPPED);
601

602 603 604 605 606 607
	    if ($overquota) {
		tbreport(SEV_ERROR, 'over_disk_quota', $CONTROL);
		ExitWithStatus(1,
			       "Cannot modify batch experiment $pid/$eid when ".
			       "over quota!\n")
	    }
608
	    
609
	    #
610
	    # Otherwise, proceed with the modify. The experiment will be
611 612
	    # locked below, and so it cannot be injected or otherwise messed
	    # with since its state is going to be changed before we unlock
613 614 615 616
	    # the experiments table. The batch daemon will leave it alone
	    # until the modify is done. If the modify fails and cannot recover
	    # it is going to get swapped out; that is okay since the batch
	    # daemon does not keep state internally. 
617
	    #
618 619
	    goto doit;
	}
620
	else {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
621
	    tbdie("Operation $inout not allowed on a batch experiment!");
622
	}
623 624
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid state has been changed.\n");
625
      doit:
626
    }
627 628 629 630 631 632 633 634 635 636
    else {
	#
	# If the cancel flag is set, then user must wait for that to
	# clear before we can do anything else.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!.\n".
		       "You must wait for that to clear before you can swap\n".
		       "or modify the experiment.\n")
	    if ($canceled);
637

638 639 640 641
 	ExitWithStatus(1,
		       "Experiment $pid/$eid is locked down; cannot swap!\n")
	    if ($lockdown);

642 643 644 645 646 647 648 649 650 651
	#
	# Check the state for the various operations.
	#
	if (!$force) {
	  SWITCH: for ($inout) {
	      /^in$/i && do {
		  if ($estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped out!");
		  }
652 653 654 655 656 657
		  if ($overquota) {
		      tbreport(SEV_ERROR, 'over_disk_quota', $CONTROL);
		      ExitWithStatus(1,
				     "Experiment $pid/$eid cannot swap in when ".
				     "over quota!\n")
		  }
658
		  
659 660 661 662
		  last SWITCH;
	      };
	      /^out$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
Leigh B. Stoller's avatar
Leigh B. Stoller committed
663
 		      $estate ne EXPTSTATE_PANICED() &&
664 665 666 667 668 669
		      $estate ne EXPTSTATE_ACTIVATING()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in ".
				     "or activating!\n");
		  }
		  
Leigh B. Stoller's avatar
Leigh B. Stoller committed
670 671 672 673 674 675 676 677 678 679 680 681
 		  #
 		  # Must be an admin person to swap out an experiment that
 		  # has had its panic button pressed.
 		  #
 		  if ($estate eq EXPTSTATE_PANICED() && !$isadmin) {
 		      ExitWithStatus(1,
 				     "Experiment $pid/$eid had its panic ".
 				     "button pressed!\n".
 				     "Only a testbed administrator can swap ".
 				     "this experiment out.");
 		  }

682 683 684 685 686
		  if ($estate eq EXPTSTATE_ACTIVATING()) {
		      #
		      # All we can do is set the cancel flag and hope that
		      # it gets noticed. We do not wait. 
		      # 
687
		      $experiment->SetCancelFlag(EXPTCANCEL_SWAP);
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
		      
		      ExitWithStatus(0,
				     "Experiment $pid/$eid swapin has been  ".
				     "marked for cancelation.\n".
				     "You will receive email when the original ".
				     "swap request has finished.");
		  }
		  last SWITCH;
	      };
	      /^restart$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid is not swapped in!");
		  }
		  last SWITCH;
	      };
	      /^modify$/i && do {
		  if ($estate ne EXPTSTATE_ACTIVE() &&
		      $estate ne EXPTSTATE_SWAPPED()) {
		      ExitWithStatus(1,
				     "Experiment $pid/$eid must be ACTIVE or\n".
				     "SWAPPED to modify!\n");
		  }
711
		  ExitWithStatus(1,
712 713 714 715
			"Cannot modify an active firewalled experiment (yet).")
		      if ($firewalled &&
			  $estate ne EXPTSTATE_SWAPPED && !$isadmin);

716
		  ExitWithStatus(1,
717 718 719
			"Cannot modify an active ElabInElab experiment (yet).")
		      if ($elabinelab &&
			  $estate ne EXPTSTATE_SWAPPED && !$isadmin);
720

721 722 723 724 725 726
		  if ($overquota) {
		      tbreport(SEV_ERROR, 'over_disk_quota', $CONTROL);
		      ExitWithStatus(1,
				     "Experiment $pid/$eid cannot be modified ".
				     "when over quota!\n")
		  }
727
		  
728 729
		  last SWITCH;
	      };
Kevin Atkinson's avatar
 
Kevin Atkinson committed
730
	      tbdie("Missing state check for action: $action");
731
	  }
732 733
	}
    }
734 735
}

736 737 738 739 740 741 742
#
# Determine the temporary and next state for experiment. If the experiment
# is a batch experiment, then the next state is actually handled by the
# batch daemon, but we still have to deal with the temporary state. 
#
SWITCH: for ($inout) {
    /^in$/i && do {
743
	$nextswapstate = EXPTSTATE_ACTIVATING();
744 745 746
	last SWITCH;
    };
    /^out$/i && do {
747
	$nextswapstate = EXPTSTATE_SWAPPING();
748 749 750
	last SWITCH;
    };
    /^restart$/i && do {
751
	$nextswapstate = EXPTSTATE_RESTARTING();
752 753 754
	last SWITCH;
    };
    /^modify$/i && do {
755 756
	$nextswapstate = (($estate eq EXPTSTATE_SWAPPED()) ?
			  EXPTSTATE_MODIFY_PARSE() : EXPTSTATE_MODIFY_REPARSE());
757 758
	last SWITCH;
    };
Kevin Atkinson's avatar
 
Kevin Atkinson committed
759
    tbdie("Missing state check for action: $action");
760
}
761 762
 
# Update idleswap_timeout to whatever the current value is.
763
if ($inout ne "out") {
764 765
    $experiment->UpdateIdleSwapTime($idleswap_time) == 0
	or tbdie("Could not update idleswap timeout for $pid/$eid");
766
}
767

768 769 770 771 772
#
# On a failure, we go back to this swapstate. Might be modified below.
# 
$termswapstate = $estate;

773 774 775
# Lock the record, set the nextstate, and unlock the table. Unlock
# tables at same time.
$experiment->Lock($nextswapstate, 1) == 0
776 777 778
    or tbdie({type => 'secondary', severity => SEV_SECONDARY,
	      error => ['set_experiment_state_failed', $nextswapstate]},
	     "Failed to set experiment state to $nextswapstate");
779 780 781 782 783 784 785

#
# At this point, we need to force a cleanup no matter how we exit.
# See the END block below.
#
$justexit = 0;

786 787 788 789 790 791
# Need the previous swapper for rollback below. Safe now that tables unlocked.
my $last_swapper = User->Lookup($experiment->swapper_idx());
if (! defined($last_swapper)) {
    tbdie("Error looking up object for last swapper!");
}

792 793
#
# XXX - At this point a failure is going to leave things in an
794 795 796 797
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
798 799
#

800 801
if ($inout eq "in") {
    $action = "swapped in";
802
    $tag    = "swapin";
803 804 805
}
if ($inout eq "out") {
    $action = "swapped out";
806
    $tag    = "swapout";
807 808 809 810
}
if ($inout eq "restart") {
    $action = "restarted";
}
Chad Barb's avatar
 
Chad Barb committed
811 812
if ($inout eq "modify") {
    $action = "modified";
813
    $tag    = "swapmod";
Chad Barb's avatar
 
Chad Barb committed
814
}
815

816 817 818
#
# Before going to background, we have to copy out the NS file!
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
819
if ($inout eq "modify" && defined($modnsfile)) {
820 821
    unlink($modnsfile);
    if (system("/bin/cp", "$tempnsfile", "$modnsfile")) {
822
	fatal("Could not copy $tempnsfile to $modnsfile");
823 824 825 826
    }
    chmod(0664, "$modnsfile");
}

827 828 829
#
# If not in batch mode, go into the background. Parent exits.
#
830
if (! $batch && ! $template_mode && !$genimode) {
831 832 833 834
    # Cleanup
    $experiment->CleanLogFiles() == 0
	or fatal("Could not clean up logfiles!");

835 836
    $logfile = $experiment->CreateLogFile("swapexp");
    if (!defined($logfile)) {
837 838
	fatal("Could not create logfile!");
    }
839 840 841 842 843
    $logname = $logfile->filename();
    # We want it to spew to the web.
    $experiment->SetLogFile($logfile);
    # Mark it open since we are going to start using it right away.
    $logfile->Open();
Chad Barb's avatar
Chad Barb committed
844

845 846
    if (my $childpid = TBBackGround($logname)) {
	#
847 848
	# Parent exits normally, unless in waitmode. We have to set
	# justexit to make sure the END block below does not run.
849
	#
850 851
	$justexit = 1;

852
	if (!$waitmode) {
853 854 855
	    print("Experiment $pid/$eid is now being $action.\n".
		  "You will be notified via email when the this is done.\n")
		if (! $quiet);
856 857
	    exit(0);
	}
858 859 860 861 862 863 864 865
	print("Waiting for experiment $eid to finish its swap${action}\n")
	    if (! $quiet);
	    
	if (isatty(STDIN) && !$quiet) {
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
	}
866 867 868 869 870 871 872 873 874 875 876 877
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

878
	#
879
	# Wait until child exits or until user gets bored and types ^C.
880
	#
881 882
	waitpid($childpid, 0);
	
883 884
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
Kevin Atkinson's avatar
 
Kevin Atkinson committed
885 886 887

	my $exit_code = $? >> 8;

888
	if ($exit_code != 0) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
889
	    my $d = tblog_lookup_error();
890 891 892 893 894 895 896 897 898 899 900 901 902
	    my $output = tblog_format_error($d);
	    if ($xmlout) {
		use libtblog '*SOUT'; # to avoid an unnecessary, and large,
                                      # log entry
		if (open(IN, "$logname")) {
		    $d->{log} = '';
		    while (<IN>) {
			$d->{log} .= $_;
		    }
		    close IN;
		}
		$d->{output} = $output;
	        print SOUT RPC::XML::response->new($d)->as_string(), "\n";
903
	    } elsif (!$quiet) {
904 905
		print $output;
	    }
Kevin Atkinson's avatar
 
Kevin Atkinson committed
906 907 908
	}
	exit $exit_code;

909
    }
910
    TBdbfork();
911 912
}

913 914 915 916 917 918 919 920
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
#
# We need to catch TERM cause sometimes shit happens and we have to kill
# an experiment swap that is hung or otherwise scrogged. Rather then 
# trying to kill off the children one by one, lets arrange to catch it
# here and send a killpg to the children. This is not to be done lightly,
# cause it can leave things worse then they were before!
#
sub handler ($) {
    my ($signame) = @_;
    
    $SIG{TERM} = 'IGNORE';
    my $pgrp = getpgrp(0);
    kill('TERM', -$pgrp);
    sleep(1);
    $signaled = 1;
    fatal("Caught SIG${signame}! Killing experiment setup ...");
}
$SIG{TERM} = \&handler;
$SIG{QUIT} = 'DEFAULT';

941 942 943 944
#
# Gather stats; start clock ticking
#
if ($inout eq "in") {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
945
    $experiment->PreSwap($this_user, TBDB_STATS_SWAPIN, $estate) == 0 or
946
	fatal("Preswap failed!");
947 948
}
elsif ($inout eq "out") {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
949
    $experiment->PreSwap($this_user, TBDB_STATS_SWAPOUT, $estate) == 0 or
950
	fatal("Preswap failed!");
951 952
}
elsif ($inout eq "modify") {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
953
    $experiment->PreSwap($this_user, TBDB_STATS_SWAPMODIFY, $estate) == 0 or
954
	fatal("Preswap failed!");
955 956
}

957 958 959
#
# Remove old report file since its contents are going to be invalid.
#
960
if ($inout ne "restart" && -e $repfile) {
961 962 963
    unlink("$repfile");
}

964 965 966 967
#
# Sanity check states in case someone changes something.
#
if ($inout eq "out") {
968
    my $optarg = (($force || $idleswap) ? "-force" : "");
969

970
    if ($experiment->Swap($Experiment::EXPT_SWAPOUT, $optarg) != 0) {
971
	$errorstat = $? >> 8;
972 973 974
	fatal({type => 'secondary', severity => SEV_SECONDARY,
	       error => ['tbswap_out_failed']},
	      "tbswap out failed!");
975
    }
976 977 978 979

    #
    # Add the files that have been detected by tracing to the archive.
    #
980 981 982 983 984 985 986
    if (!$template_mode) {
	if (libArchive::TBExperimentArchiveAddTracedFiles($pid, $eid) < 0) {
	    fatal({type => 'secondary', severity => SEV_SECONDARY,
		   error => ['archive_op_failed', 'add_traced_files',
			     undef, undef]},
		  "Failed to add traced files to the experiment archive!");
	}
987

988 989 990 991 992 993 994 995 996
	#
	# Add the experiment directory.
	#
	if (libArchive::TBExperimentArchiveAddUserFiles($pid, $eid) < 0) {
	    fatal({type => 'secondary', severity => SEV_SECONDARY,
		   error => ['archive_op_failed', 'add_user_files',
			     undef, undef]},
		  "Failed to add user files to the experiment archive!");
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
997 998
    }

999
    $experiment->SetState(EXPTSTATE_SWAPPED) == 0
1000 1001 1002
	or fatal({type => 'secondary', severity => SEV_SECONDARY,
		  error => ['set_experiment_state_failed', EXPTSTATE_SWAPPED()]},
	         "Failed to set experiment state to " . EXPTSTATE_SWAPPED());
1003
    
1004
    $experiment->SetPanicBit(0) == 0
1005
	or fatal("Failed to clear the panic bit!");
1006
}
1007
elsif ($inout eq "in") {
1008 1009
    # Set the swapper now so that nodes use the proper uid. If the swapin
    # fails, we leave the swapper as is, since its harmless and informative.
1010
    $experiment->SetSwapper($this_user);
1011

1012
    if ($experiment->Swap($Experiment::EXPT_SWAPIN) != 0) {
1013
	$errorstat = $? >> 8;
1014 1015 1016
	fatal({type => 'secondary', severity => SEV_SECONDARY,
	       error => ['tbswap_in_failed']},
	      "tbswap in failed!");
1017
    }
1018 1019
    
    $experiment->SetState(EXPTSTATE_ACTIVE) == 0
1020 1021 1022
	or fatal({type => 'secondary', severity => SEV_SECONDARY,
		  error => ['set_experiment_state_failed', EXPTSTATE_ACTIVE()]},
	         "Failed to set experiment state to " . EXPTSTATE_ACTIVE());
1023 1024
    
    $experiment->Report($repfile, "-b");
Chad Barb's avatar
Chad Barb committed
1025
}
Chad Barb's avatar
 
Chad Barb committed
1026
elsif ($inout eq "modify") {
1027 1028 1029
    #
    # Prepare the Archive for the swapmod, in case we have to "roll back".
    #
1030 1031 1032 1033 1034
    if (!$template_mode) {
	print "Doing a preswapmod on the experiment archive ...\n";
	if (libArchive::TBExperimentArchivePreSwapMod($pid, $eid) < 0) {
	    fatal("Failed to do a preswapmod on the experiment archive!");
	}
1035 1036
    }

1037 1038 1039 1040 1041
    # Gather up some firewall state for later comparison.
    if (GatherFWinfo() < 0) {
	fatal("Could not gather firewall info; cannot safely continue!");
    }

Chad Barb's avatar
Chad Barb committed
1042
    print "Backing up old experiment state ... " . TBTimeStamp() . "\n";
1043 1044
    $experiment->BackupVirtualState() == 0
	or fatal("Could not backup experiment state; cannot safely continue!");
Chad Barb's avatar
Chad Barb committed
1045 1046

    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1047
    # Rerun tbprerun if modifying, but only if new NS file provided.
1048 1049
    # Yep, we allow reswap without changing the NS file. For Shashi and SIM.
    # Note that tbprerun kills the renderer if its running.
Chad Barb's avatar
Chad Barb committed
1050
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1051
    if (defined($modnsfile)) {
1052
	if ($experiment->PreRun($modnsfile) != 0) {
1053
	    print STDOUT "Modify Error: tbprerun failed.\n";
1054
	  FWHOSED:
1055 1056
	    print STDOUT "Recovering experiment state...\n";

1057 1058
	    if ($experiment->RemoveVirtualState() ||
		$experiment->RestoreVirtualState()) {
1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
		$modifyHosed = 1;
		fatal("Experiment state could not be restored!");
		# Never returns;
	    }
	    #
	    # If the renderer was running when we started the swapmod, then we
	    # want to restart it. If it was stopped, then the renderer info
	    # was captured with the rest of the virtual state (restored above).
	    #
	    system("prerender -t $pid $eid")
		if ($rendering);

Kevin Atkinson's avatar
 
Kevin Atkinson committed
1071
	    $modifyError = "Update aborted; old virtual state restored.";
1072 1073 1074
	    fatal({type => 'secondary', severity => SEV_SECONDARY,
		   error => ['update_aborted', 'virtual_state_restored']},
		  $modifyError);
1075
	    # Never returns;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1076
	}
1077 1078 1079 1080 1081 1082 1083 1084 1085
	#
	# Okay, whenever a new NS file is presented, we need to do some
	# checks on the firewall to make sure the user is not trying to
	# do something "unsafe". 
	#
	if (CheckFWinfo($estate) != 0) {
	    # All the stuff for recovering is right above, so go there. 
	    goto FWHOSED;
	}
Chad Barb's avatar
Chad Barb committed
1086
    }
1087 1088 1089 1090 1091
    elsif ($genimode) {
	#
	# Need the min/max numbers, usually done during prerun.
	#
	print "Doing a pre-assign ...\n";
1092
	if (system("$vtopgen -p $pid $eid")) {
1093 1094 1095 1096 1097
	    fatal({type => 'secondary', severity => SEV_SECONDARY,
		   error => ['update_aborted', undef]},
		  "assign prerun failed!");
	}
    }
Chad Barb's avatar
Chad Barb committed
1098

Chad Barb's avatar
 
Chad Barb committed
1099
    #
1100
    # Our next state depends on whether the experiment was active or swapped.
Chad Barb's avatar
 
Chad Barb committed
1101
    #
1102
    if ($estate eq