endexp.in 16.1 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5 6
# All rights reserved.
#
7 8
use English;
use Getopt::Std;
9
use POSIX qw(isatty setsid);
10
use strict;
11 12 13 14

#
# This gets invoked from the Web interface. Terminate an experiment.
# Most of the STDOUT prints are never seen since the web interface
15 16
# reports only errors, but this script is also intended to be run by the
# user someday. Perhaps.
17 18 19 20 21 22 23 24
#
# The -b (batch) argument is so that this script can be part of a batchmode
# that starts/ends experiments offline. In that case, we don't want to put
# it into the background and send email, but just want an exit status 
# returned to the batch system.
#
sub usage()
{
25
    print(STDERR
26
	  "Usage: endexp [-q] [-p] [-b | -w] [-N] <eid>\n".
27 28
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment terminate\n".
29
	  "-q       - be less chatty\n".
30
	  "-N       - Suppress most email to the user and testbed-ops\n".
31
	  "<eid>    - The experiment id\n");
32 33
    exit(-1);
}
34

35
sub fatal($;$);
36

37
my $optlist  = "bwqxpN";
38 39
my $waitmode = 0;
my $batch    = 0;
40
my $quiet    = 0;
41
my $purge    = 0;
42
my $template_mode = 0;
43
my $noemail  = 0;
44

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

69 70 71 72 73 74
#
# Configure variables
#
my $TB     = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS  = "@TBOPSEMAIL@";
75
my $TBLOGS = "@TBLOGSEMAIL@";
76

77 78 79 80 81 82
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
83
use libtblog;
84
use libArchive;
85
use User;
86
use Template;
87
use Experiment;
88
use EmulabFeatures;
89

90 91 92
my $tbdir       = "$TB/bin/";
my $tbdata      = "tbdata";
my $archcontrol = "$TB/bin/archive_control";
93
my $nextstate;
94
my $logname;
95
my $logfile;
96
    
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
#
# Untaint the path
# 
$ENV{'PATH'} = '/bin:/usr/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
112
my %options = ();
113 114 115
if (! getopts($optlist, \%options)) {
    usage();
}
116
if (@ARGV != 1) {
117 118 119
    usage();
}
if (defined($options{"b"})) {
120 121
    $batch = 1;
}
122 123 124
if (defined($options{"p"})) {
   $purge = 1;
}
125 126
if (defined($options{"w"})) {
    $waitmode = 1;
127
}
128 129 130
if (defined($options{"q"})) {
    $quiet = 1;
}
131 132 133
if (defined($options{"x"})) {
    $template_mode = 1;
}
134 135 136
if (defined($options{"N"})) {
    $noemail = 1;
}
137 138
usage()
    if ($waitmode && $batch);
139

140 141 142 143
# Map invoking user to object.
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    fatal("You ($UID) do not exist!");
144
}
145 146 147 148
my $isadmin    = TBAdmin();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
my $user_uid   = $this_user->uid();
149 150 151 152

#
# Grab the experiment.
#
153
my $experiment = Experiment->Lookup($ARGV[0]);
154 155
if (! $experiment) {
    die("*** $0:\n".
156
	"    No such experiment in the Emulab Database.\n");
157
}
158 159 160
my $pid     = $experiment->pid();
my $eid     = $experiment->eid();
my $swapper = $experiment->GetSwapper();
161

162 163 164 165 166 167 168
my $workdir = $experiment->WorkDir();
my $userdir = $experiment->UserDir();
my $infodir = $experiment->InfoDir();

# Sanity check.
if (! (-e $workdir && -e $userdir)) {
    die("*** $0:\n".
169
	"    $pid/$eid is missing a critical directory!\n");
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
}
if (! -e $infodir) {
    if (! -e "$TB/expinfo/$pid") {
	die("*** $0:\n".
	    "    $TB/expinfo/$pid has not been created yet!\n".
	    "    Did you run 'fixexpinfo' like you were supposed to?\n");
    }
    #
    # This is not going to happen unless a devel tree has been
    # mixed with the main tree (or another devel tree). Try to
    # recover, bail of not possible.
    #
    if (system("/bin/mkdir -m 777 -p $infodir")) {
	die("*** $0:\n".
	    "    $infodir was missing and cannot be created!\n");
    }
}

188 189 190 191
#
# See if this is a template instance; error if the -x option not provided,
# since that means user is trying to self-terminate; not in the program.
#
192
if ($experiment->IsInstance() && !$template_mode) {
193
    die("*** $0:\n".
194
	"    $pid/$eid is a template instance; use another command!\n");
195
}
196 197

#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
198
# Verify that this person is allowed to end the experiment.
199
#
200
if ($UID && !$isadmin &&
201
    ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
202 203
    die("*** $0:\n".
	"    You do not have permission to end this experiment!\n");
204 205
}

206 207 208 209 210 211 212 213
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}
214

215
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
216 217 218
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
219
# terminating consists of a couple of different experiment states down inside
Leigh B. Stoller's avatar
Leigh B. Stoller committed
220 221
# the tb scripts. 
#
222
$experiment->LockTables() == 0
223 224
    or die("*** $0:\n".
	   "    Could not lock experiment tables for $pid/$eid!\n");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
225

226 227 228 229 230 231 232 233 234
my $expt_head_login = $experiment->creator();
my $estate          = $experiment->state();
my $batchstate      = $experiment->batchstate();
my $expt_path       = $experiment->path();
my $isbatchexpt     = $experiment->batchmode();
my $cancelflag      = $experiment->canceled();
my $expt_locked     = $experiment->locked();
my $elabinelab      = $experiment->elabinelab();
my $lockdown        = $experiment->lockdown();
235 236 237 238 239

#
# Batch experiments get a different protocol to avoid races with the
# batch daemon. We can kill the experiment directly, but only if the
# batch daemon is not currently working on it. In this case, its the
240
# same as killing an experiment that is SWAPPED.
241
#
242
# XXX: This script is run from the batch daemon. 
243
#
244
if ($batch) {
245
    #
246 247
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be ACTIVE or SWAPPED.
248
    #
249 250 251 252 253 254 255 256 257
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());
	
258 259 260 261
    die("*** $0:\n".
	"    Batch experiment $pid/$eid is locked down; cannot be swapped!\n")
	if ($lockdown);

262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
    die("*** $0:\n".
	"    Batch experiment $pid/$eid is not in the correct state!\n".
	"    Currently $estate, but should be SWAPPED,QUEUED, or ACTIVE\n")
	if ($estate ne EXPTSTATE_ACTIVE &&
	    $estate ne EXPTSTATE_QUEUED &&
	    $estate ne EXPTSTATE_SWAPPED);
}
else {
    #
    # Called from user (via web interface).
    #
    if ($isbatchexpt) {
	#
	# This cancels a batchexp, telling the daemon if it has control.
	# If the daemon is not currently working on the experiment, we
	# can do it right away in this script. Otherwise must wait.
	#
279 280 281
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
	    if ($cancelflag);

282 283 284
	ExitWithStatus(1, "Batch experiment $pid/$eid is locked down!")
	    if ($lockdown);

285
	#
286 287 288
	# Set the canceled flag. This will prevent the batch_daemon
	# from trying to run it (once the table is unlocked). It might
	# already be running, but we deal with that by looking at the
289
	# state below.
290
	#
291
	$experiment->SetCancelFlag(EXPTCANCEL_TERM);
292

293
	#
294
	# If the state is QUEUED or SWAPPED, we can do it right away.
295 296
	# Otherwise, have to let the batch daemon deal with it.
	# 
297 298 299 300
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid has been canceled.\n".
		       "You will receive email when the experiment is\n".
		       "torn down and you can reuse the experiment name.")
301 302
	    if (($estate ne EXPTSTATE_QUEUED &&
		 $estate ne EXPTSTATE_SWAPPED) ||
303
		$batchstate ne BATCHSTATE_UNLOCKED());
304
    }
305 306
    else {
	#
307 308
	# If the cancel flag is set, then user must wait for that to clear 
	# before we can do anything else.
309 310 311 312 313 314 315
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!\n".
		       "You must wait for that to clear before you can ".
		       "terminate the experiment.\n")
	    if ($cancelflag);
	
316 317 318 319
 	ExitWithStatus(1,
		       "Experiment $pid/$eid is locked down; cannot swap!\n")
	    if ($lockdown);

320 321 322 323 324 325 326 327
	#
	# Must be unlocked if called by the user.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid went into transition at ".
		       "$expt_locked!\n".
		       "You must wait until it is no longer in transition.\n")
	    if (defined($expt_locked));
328

329 330 331 332 333
	#
	# Okay, check state. We do not allow termination to start when the
	# experiment is in transition. A future task would be to allow this,
	# but for now it is not allowed. 
	#
334 335
	ExitWithStatus(1,
		       "Experiment $pid/$eid is currently in transition.\n".
336 337 338 339 340
		       "You must wait until it is no longer $estate!")
	    if ($estate ne EXPTSTATE_SWAPPED() &&
		$estate ne EXPTSTATE_NEW() &&
		$estate ne EXPTSTATE_TERMINATED() &&
		$estate ne EXPTSTATE_ACTIVE());
341 342 343 344 345 346 347 348 349 350 351 352
	
	#
	# Must be an admin person to swap out an experiment that
	# has had its panic button pressed.
	#
	if ($estate eq EXPTSTATE_PANICED() && !$isadmin) {
	    ExitWithStatus(1,
			   "Experiment $pid/$eid had its panic ".
			   "button pressed!\n".
			   "Only a testbed administrator can swap ".
			   "this experiment out.");
	}
353
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
354 355 356
}

#
357 358
# Lock the experiment and change state so no one can mess with it. We need
# to determine our next state before we unlock the table. 
359
#
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
if ($estate eq EXPTSTATE_ACTIVE) {
    $nextstate = EXPTSTATE_SWAPPING;
}
elsif ($estate eq EXPTSTATE_SWAPPED || 
       $estate eq EXPTSTATE_QUEUED) {
    $nextstate = EXPTSTATE_TERMINATING;
}
elsif ($estate eq EXPTSTATE_NEW ||
       $estate eq EXPTSTATE_TERMINATED) {
    $nextstate = EXPTSTATE_TERMINATED;
}
else {
    die("*** $0:\n".
	"    Experiment $pid/$eid appears to be in the wrong state: $estate\n");
}
375 376
# Unlock tables at same time.
$experiment->Lock($nextstate, 1);
377 378

#
379
# XXX - At this point a failure is going to leave things in an
380 381 382 383
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
384
#
385 386 387 388 389

#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
390
#
391 392
my $expt_head_name  = $swapper->name();
my $expt_head_email = $swapper->email();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
393

394 395 396
#
# If not in batch mode, go into the background. Parent exits.
#
397
if (!$batch && !$template_mode) {
398 399 400
    # Cleanup
    $experiment->CleanLogFiles() == 0
	or fatal("Could not clean up logfiles!");
401 402 403

    $logfile = $experiment->CreateLogFile("endexp");
    if (!defined($logfile)) {
404 405
	fatal("Could not create logfile!");
    }
406 407 408 409 410 411
    $logname = $logfile->filename();
    # We want it to spew to the web.
    $experiment->SetLogFile($logfile);
    # Mark it open since we are going to start using it right away.
    $logfile->Open();
    
412 413 414 415 416
    if (my $childpid = TBBackGround($logname)) {
	#
	# Parent exits normally, except if in waitmode. 
	#
	if (!$waitmode) {
417 418 419 420 421 422 423 424 425
	    print("Experiment $pid/$eid is now terminating.\n".
		  "You will be notified via email when termination is ".
		  "complete.\n")
		if (! $quiet);
	    exit(0);
	}
	print("Waiting for experiment $eid to finish terminating.\n")
		if (! $quiet);
	
426
	if (-t && !$quiet) {
427 428 429
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
430 431 432 433 434 435 436 437 438 439 440 441 442
	}
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

443
	#
444
	# Wait until child exits or until user gets bored and types ^C.
445
	#
446 447
	waitpid($childpid, 0);
	
448 449
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
450
	exit($? >> 8);
451
    }
452
    TBdbfork();
453
}
454 455 456
# Give the web page a chance to start looking at the log file before
# the entire experiment is gone.
sleep(1);
457

458 459 460 461 462 463 464 465
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

466 467 468
#
# Sanity check states in case someone changes something.
#
469
if ($estate eq EXPTSTATE_ACTIVE) { 
Leigh B. Stoller's avatar
Leigh B. Stoller committed
470
    $experiment->PreSwap($this_user, TBDB_STATS_SWAPOUT, $estate) == 0 or
471
	fatal("Preswap failed!");
472

473
    if ($experiment->Swap($Experiment::EXPT_SWAPOUT) != 0) {
474 475 476
	fatal({type => 'secondary', severity => SEV_SECONDARY,
	       error => ['tbswap_out_failed']},
	      "tbswap out failed!");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
477
    }
478 479 480
    $experiment->PostSwap($this_user, TBDB_STATS_SWAPOUT) == 0 or
	fatal("PostSwap failed!");

481 482 483 484
    $experiment->SetState(EXPTSTATE_TERMINATING) == 0
	or fatal("Failed to set experiment state to " .
		 EXPTSTATE_TERMINATING());

485
    $estate = EXPTSTATE_SWAPPED;
486

487 488
    # Commit the archive after swapout. Easier to use the script. 
    system("$archcontrol -t endexp commit $pid $eid");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
489
}
490

491 492
if ($estate eq EXPTSTATE_SWAPPED ||
    $estate eq EXPTSTATE_QUEUED) {
493 494 495
    $experiment->GatherSwapStats($this_user, TBDB_STATS_TERMINATE, 0,
				 TBDB_STATS_FLAGS_START)
	== 0 or fatal("GatherSwapStats failed");
496 497

    if ($experiment->End() != 0) {  
498 499
	fatal("tbend failed!\n");
    }
500 501 502
    $experiment->SetState(EXPTSTATE_TERMINATED) == 0
	or fatal("Failed to set experiment state to " .EXPTSTATE_TERMINATED());
    
503
    $estate = EXPTSTATE_TERMINATED;
504 505 506 507

    #
    # Gather statistics for the swapout.
    #
508 509
    $experiment->GatherSwapStats($this_user, TBDB_STATS_TERMINATE)
	== 0 or fatal("GatherSwapStats failed");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
510 511
}

512
# We better be here ...
513
$estate = $experiment->state();
514
if ($estate ne EXPTSTATE_TERMINATED) {
515 516
    fatal("Experiment is in the wrong state: $estate\n");
}
517

518 519 520
#
# Try to copy off the files for testbed information gathering.
#
521
$experiment->SaveLogFiles();
522

523 524 525 526
# Copy out the archive and then delete it.
print "Archiving and clearing the experiment archive ...\n";
libArchive::TBArchiveExperimentArchive($pid, $eid) == 0 or
    fatal("Could not archive experiment archive!");
527 528

#
529
# Terminate the log so the web page stops spewing.
530
#
531 532
if (!$template_mode) {
    print "Experiment $pid/$eid has been successfully terminated!\n";
533
    $experiment->ClearLogFile();
534
}
535

536 537 538 539 540 541
#
# Kill any features
#
EmulabFeatures->DeleteAll($experiment) == 0 or
    fatal("Could not delete all features for $experiment");

542
#
543 544
# Cleanup DB state and remove directory. Purge flag is optional and generally
# used by admins for cleaning up bad DB state caused by script errors.
545
#
546
$experiment->Delete($purge);
547 548

#
549
# In batch mode, exit now. 
550
#
551
if ($batch || $template_mode) {
552 553 554 555
    exit(0);
}

#
556
# Send email notification to user.
557
#
558 559
my $message =
    "Experiment `$eid' in project `$pid' has been terminated.\n" .
560
    "You may now reuse `$eid' as an experiment name.\n";
561

562
SENDMAIL(($noemail ? $TBLOGS : "$user_name <$user_email>"),
563
	 "Experiment $pid/$eid Terminated",
564 565
	 $message,
	 "$user_name <$user_email>",
566 567 568 569
	 ($noemail ? "" 
	  : "Cc:  $expt_head_name <$expt_head_email>\n".
	    "Bcc: $TBLOGS"), 
         ($logname));
570 571 572

exit 0;

573
sub fatal($;$)
574
{
575 576
    my $parms = {};
    $parms = shift if ref $_[0] eq 'HASH';
577
    my($mesg) = $_[0];
578
    
579
    tberror($parms, $mesg);
580

581
    $experiment->Unlock();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
582

583
    # Copy over the log files so the user can see them.
584
    $experiment->CopyLogFiles();    
585

586 587 588 589 590 591
    #
    # In batch mode, exit without sending the email. 
    #
    if ($batch) {
	exit(-1);
    }
592 593 594 595 596
    
    #
    # Clear the log file so the web page stops spewing. 
    #
    if (defined($logname)) {
597
	$experiment->CloseLogFile();
598
    }
599 600

    #
601
    # Send a message to the testbed list. Append the logfile.
602
    #
603
    SENDMAIL("$user_name <$user_email>",
604
	     "Termination Failure: $pid/$eid",
605 606 607 608 609
	     $mesg,
	     "$user_name <$user_email>",
	     "Cc:  $expt_head_name <$expt_head_email>\n".
	     "Bcc: $TBOPS",
	     ($logname));
610 611 612

    exit(-1);
}