endexp.in 15 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh Stoller's avatar
Leigh Stoller committed
2 3
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2007 University of Utah and the Flux Group.
Leigh Stoller's avatar
Leigh Stoller committed
5 6
# All rights reserved.
#
7 8
use English;
use Getopt::Std;
9
use POSIX qw(isatty setsid);
10
use strict;
11 12 13 14

#
# This gets invoked from the Web interface. Terminate an experiment.
# Most of the STDOUT prints are never seen since the web interface
15 16
# reports only errors, but this script is also intended to be run by the
# user someday. Perhaps.
17 18 19 20 21 22 23 24
#
# The -b (batch) argument is so that this script can be part of a batchmode
# that starts/ends experiments offline. In that case, we don't want to put
# it into the background and send email, but just want an exit status 
# returned to the batch system.
#
sub usage()
{
25
    print(STDERR
26
	  "Usage: endexp [-q] [-p] [-b | -w] <eid>\n".
27 28
	  "switches and arguments:\n".
	  "-w       - wait for non-batchmode experiment terminate\n".
29
	  "-q       - be less chatty\n".
30
	  "<eid>    - The experiment id\n");
31 32
    exit(-1);
}
33

34
sub fatal($;$);
35

36
my $optlist  = "bwqxp";
37 38
my $waitmode = 0;
my $batch    = 0;
39
my $quiet    = 0;
40
my $purge    = 0;
41
my $template_mode = 0;
42

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Termination is proceeding in the background. Notified later.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub ExitWithStatus($$)
{
    my ($status, $message) = @_;
    
    if ($status < 0) {
	die("*** $0:\n".
	    "    $message\n");
    }
    else {
	print STDERR "$message\n";
    }
    exit($status);
}

67 68 69 70 71 72
#
# Configure variables
#
my $TB     = "@prefix@";
my $DBNAME = "@TBDBNAME@";
my $TBOPS  = "@TBOPSEMAIL@";
73
my $TBLOGS = "@TBLOGSEMAIL@";
74

75 76 77 78 79 80
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
81
use libtblog;
82
use libArchive;
83
use User;
84
use Template;
85
use Experiment;
86

87 88 89
my $tbdir       = "$TB/bin/";
my $tbdata      = "tbdata";
my $archcontrol = "$TB/bin/archive_control";
90
my $nextstate;
91
my $logname;
92
my $logfile;
93
    
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
#
# Untaint the path
# 
$ENV{'PATH'} = '/bin:/usr/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Turn off line buffering on output
#
$| = 1;

#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
109
my %options = ();
110 111 112
if (! getopts($optlist, \%options)) {
    usage();
}
113
if (@ARGV != 1) {
114 115 116
    usage();
}
if (defined($options{"b"})) {
117 118
    $batch = 1;
}
119 120 121
if (defined($options{"p"})) {
   $purge = 1;
}
122 123
if (defined($options{"w"})) {
    $waitmode = 1;
124
}
125 126 127
if (defined($options{"q"})) {
    $quiet = 1;
}
128 129 130
if (defined($options{"x"})) {
    $template_mode = 1;
}
131 132
usage()
    if ($waitmode && $batch);
133

134 135 136 137
# Map invoking user to object.
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    fatal("You ($UID) do not exist!");
138
}
139 140 141 142
my $isadmin    = TBAdmin();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
my $user_uid   = $this_user->uid();
143 144 145 146

#
# Grab the experiment.
#
147
my $experiment = Experiment->Lookup($ARGV[0]);
148 149
if (! $experiment) {
    die("*** $0:\n".
150
	"    No such experiment in the Emulab Database.\n");
151
}
152 153 154
my $pid     = $experiment->pid();
my $eid     = $experiment->eid();
my $swapper = $experiment->GetSwapper();
155

156 157 158 159
#
# See if this is a template instance; error if the -x option not provided,
# since that means user is trying to self-terminate; not in the program.
#
160
if ($experiment->IsInstance() && !$template_mode) {
161
    die("*** $0:\n".
162
	"    $pid/$eid is a template instance; use another command!\n");
163
}
164 165

#
Leigh Stoller's avatar
Leigh Stoller committed
166
# Verify that this person is allowed to end the experiment.
167
#
168
if ($UID && !$isadmin &&
169
    ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
170 171
    die("*** $0:\n".
	"    You do not have permission to end this experiment!\n");
172 173
}

174 175 176 177 178 179 180 181
#
# In wait mode, block interrupt until we spin off the background process.
#
if ($waitmode) {
    $SIG{TERM} = 'IGNORE';
    $SIG{QUIT} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}
182

183
#
Leigh Stoller's avatar
Leigh Stoller committed
184 185 186
# We have to protect against trying to end an experiment that is currently
# in the process of being terminated. We use a "wrapper" state (actually
# a timestamp so we can say when termination was requested) since
187
# terminating consists of a couple of different experiment states down inside
Leigh Stoller's avatar
Leigh Stoller committed
188 189
# the tb scripts. 
#
190
$experiment->LockTables() == 0
191 192
    or die("*** $0:\n".
	   "    Could not lock experiment tables for $pid/$eid!\n");
Leigh Stoller's avatar
Leigh Stoller committed
193

194 195 196 197 198 199 200 201 202
my $expt_head_login = $experiment->creator();
my $estate          = $experiment->state();
my $batchstate      = $experiment->batchstate();
my $expt_path       = $experiment->path();
my $isbatchexpt     = $experiment->batchmode();
my $cancelflag      = $experiment->canceled();
my $expt_locked     = $experiment->locked();
my $elabinelab      = $experiment->elabinelab();
my $lockdown        = $experiment->lockdown();
203 204 205 206 207

#
# Batch experiments get a different protocol to avoid races with the
# batch daemon. We can kill the experiment directly, but only if the
# batch daemon is not currently working on it. In this case, its the
208
# same as killing an experiment that is SWAPPED.
209
#
210
# XXX: This script is run from the batch daemon. 
211
#
212
if ($batch) {
213
    #
214 215
    # Sanity Check. If called from the daemon, must already be locked,
    # must be a batch experiment, and must be ACTIVE or SWAPPED.
216
    #
217 218 219 220 221 222 223 224 225
    die("*** $0:\n".
	"    Experiment $pid/$eid is supposed to be a batch experiment!\n")
	if (!$isbatchexpt);
    
    die("*** $0:\n".
	"    Batch experiment $pid/$eid should be locked!\n")
	if (!defined($expt_locked) ||
	    $batchstate ne BATCHSTATE_LOCKED());
	
226 227 228 229
    die("*** $0:\n".
	"    Batch experiment $pid/$eid is locked down; cannot be swapped!\n")
	if ($lockdown);

230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
    die("*** $0:\n".
	"    Batch experiment $pid/$eid is not in the correct state!\n".
	"    Currently $estate, but should be SWAPPED,QUEUED, or ACTIVE\n")
	if ($estate ne EXPTSTATE_ACTIVE &&
	    $estate ne EXPTSTATE_QUEUED &&
	    $estate ne EXPTSTATE_SWAPPED);
}
else {
    #
    # Called from user (via web interface).
    #
    if ($isbatchexpt) {
	#
	# This cancels a batchexp, telling the daemon if it has control.
	# If the daemon is not currently working on the experiment, we
	# can do it right away in this script. Otherwise must wait.
	#
247 248 249
	ExitWithStatus(1, "Batch experiment $pid/$eid is still canceling!")
	    if ($cancelflag);

250 251 252
	ExitWithStatus(1, "Batch experiment $pid/$eid is locked down!")
	    if ($lockdown);

253
	#
254 255 256
	# Set the canceled flag. This will prevent the batch_daemon
	# from trying to run it (once the table is unlocked). It might
	# already be running, but we deal with that by looking at the
257
	# state below.
258
	#
259
	$experiment->SetCancelFlag(EXPTCANCEL_TERM);
260

261
	#
262
	# If the state is QUEUED or SWAPPED, we can do it right away.
263 264
	# Otherwise, have to let the batch daemon deal with it.
	# 
265 266 267 268
	ExitWithStatus(0, 
		       "Batch experiment $pid/$eid has been canceled.\n".
		       "You will receive email when the experiment is\n".
		       "torn down and you can reuse the experiment name.")
269 270
	    if (($estate ne EXPTSTATE_QUEUED &&
		 $estate ne EXPTSTATE_SWAPPED) ||
271
		$batchstate ne BATCHSTATE_UNLOCKED());
272
    }
273 274
    else {
	#
275 276
	# If the cancel flag is set, then user must wait for that to clear 
	# before we can do anything else.
277 278 279 280 281 282 283
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid has its cancel flag set!\n".
		       "You must wait for that to clear before you can ".
		       "terminate the experiment.\n")
	    if ($cancelflag);
	
284 285 286 287
 	ExitWithStatus(1,
		       "Experiment $pid/$eid is locked down; cannot swap!\n")
	    if ($lockdown);

288 289 290 291 292 293 294 295
	#
	# Must be unlocked if called by the user.
	#
	ExitWithStatus(1,
		       "Experiment $pid/$eid went into transition at ".
		       "$expt_locked!\n".
		       "You must wait until it is no longer in transition.\n")
	    if (defined($expt_locked));
296

297 298 299 300 301
	#
	# Okay, check state. We do not allow termination to start when the
	# experiment is in transition. A future task would be to allow this,
	# but for now it is not allowed. 
	#
302 303
	ExitWithStatus(1,
		       "Experiment $pid/$eid is currently in transition.\n".
304 305 306 307 308
		       "You must wait until it is no longer $estate!")
	    if ($estate ne EXPTSTATE_SWAPPED() &&
		$estate ne EXPTSTATE_NEW() &&
		$estate ne EXPTSTATE_TERMINATED() &&
		$estate ne EXPTSTATE_ACTIVE());
309 310 311 312 313 314 315 316 317 318 319 320
	
	#
	# Must be an admin person to swap out an experiment that
	# has had its panic button pressed.
	#
	if ($estate eq EXPTSTATE_PANICED() && !$isadmin) {
	    ExitWithStatus(1,
			   "Experiment $pid/$eid had its panic ".
			   "button pressed!\n".
			   "Only a testbed administrator can swap ".
			   "this experiment out.");
	}
321
    }
Leigh Stoller's avatar
Leigh Stoller committed
322 323 324
}

#
325 326
# Lock the experiment and change state so no one can mess with it. We need
# to determine our next state before we unlock the table. 
327
#
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
if ($estate eq EXPTSTATE_ACTIVE) {
    $nextstate = EXPTSTATE_SWAPPING;
}
elsif ($estate eq EXPTSTATE_SWAPPED || 
       $estate eq EXPTSTATE_QUEUED) {
    $nextstate = EXPTSTATE_TERMINATING;
}
elsif ($estate eq EXPTSTATE_NEW ||
       $estate eq EXPTSTATE_TERMINATED) {
    $nextstate = EXPTSTATE_TERMINATED;
}
else {
    die("*** $0:\n".
	"    Experiment $pid/$eid appears to be in the wrong state: $estate\n");
}
343 344
# Unlock tables at same time.
$experiment->Lock($nextstate, 1);
345 346

#
347
# XXX - At this point a failure is going to leave things in an
348 349 350 351
# inconsistent state. Be sure to call fatal() only since we are
# going into the background, and we have to send email since no
# one is going to see printed error messages (output goes into the
# log file, which will be sent along in the email). 
352
#
353 354 355 356 357

#
# Get email address of the experiment head, which may be different than
# the person who is actually terminating the experiment, since its polite
# to let the original creator know whats going on. 
358
#
359 360
my $expt_head_name  = $swapper->name();
my $expt_head_email = $swapper->email();
Leigh Stoller's avatar
Leigh Stoller committed
361

362 363 364
#
# If not in batch mode, go into the background. Parent exits.
#
365
if (!$batch && !$template_mode) {
366 367 368
    # Cleanup
    $experiment->CleanLogFiles() == 0
	or fatal("Could not clean up logfiles!");
369 370 371

    $logfile = $experiment->CreateLogFile("endexp");
    if (!defined($logfile)) {
372 373
	fatal("Could not create logfile!");
    }
374 375 376 377 378 379
    $logname = $logfile->filename();
    # We want it to spew to the web.
    $experiment->SetLogFile($logfile);
    # Mark it open since we are going to start using it right away.
    $logfile->Open();
    
380 381 382 383 384
    if (my $childpid = TBBackGround($logname)) {
	#
	# Parent exits normally, except if in waitmode. 
	#
	if (!$waitmode) {
385 386 387 388 389 390 391 392 393
	    print("Experiment $pid/$eid is now terminating.\n".
		  "You will be notified via email when termination is ".
		  "complete.\n")
		if (! $quiet);
	    exit(0);
	}
	print("Waiting for experiment $eid to finish terminating.\n")
		if (! $quiet);
	
394
	if (-t && !$quiet) {
395 396 397
	    print("You may type ^C at anytime; you will be notified via email.".
		  "\n".
		  "You will not actually interrupt the experiment itself.\n");
398 399 400 401 402 403 404 405 406 407 408 409 410
	}
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

411
	#
412
	# Wait until child exits or until user gets bored and types ^C.
413
	#
414 415
	waitpid($childpid, 0);
	
416 417
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
418
	exit($? >> 8);
419
    }
420
    TBdbfork();
421
}
422 423 424
# Give the web page a chance to start looking at the log file before
# the entire experiment is gone.
sleep(1);
425

426 427 428 429 430 431 432 433
#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

434 435 436
#
# Sanity check states in case someone changes something.
#
437
if ($estate eq EXPTSTATE_ACTIVE) { 
438 439
    $experiment->PreSwap($this_user, TBDB_STATS_SWAPOUT) == 0 or
	fatal("Preswap failed!");
440

441
    if ($experiment->Swap($Experiment::EXPT_SWAPOUT) != 0) {
442 443 444
	fatal({type => 'secondary', severity => SEV_SECONDARY,
	       error => ['tbswap_out_failed']},
	      "tbswap out failed!");
Leigh Stoller's avatar
Leigh Stoller committed
445
    }
446 447 448
    $experiment->PostSwap($this_user, TBDB_STATS_SWAPOUT) == 0 or
	fatal("PostSwap failed!");

449 450 451 452
    $experiment->SetState(EXPTSTATE_TERMINATING) == 0
	or fatal("Failed to set experiment state to " .
		 EXPTSTATE_TERMINATING());

453
    $estate = EXPTSTATE_SWAPPED;
454

455 456
    # Commit the archive after swapout. Easier to use the script. 
    system("$archcontrol -t endexp commit $pid $eid");
Leigh Stoller's avatar
Leigh Stoller committed
457
}
458

459 460
if ($estate eq EXPTSTATE_SWAPPED ||
    $estate eq EXPTSTATE_QUEUED) {
461 462 463
    $experiment->GatherSwapStats($this_user, TBDB_STATS_TERMINATE, 0,
				 TBDB_STATS_FLAGS_START)
	== 0 or fatal("GatherSwapStats failed");
464 465

    if ($experiment->End() != 0) {  
466 467
	fatal("tbend failed!\n");
    }
468 469 470
    $experiment->SetState(EXPTSTATE_TERMINATED) == 0
	or fatal("Failed to set experiment state to " .EXPTSTATE_TERMINATED());
    
471
    $estate = EXPTSTATE_TERMINATED;
472 473 474 475

    #
    # Gather statistics for the swapout.
    #
476 477
    $experiment->GatherSwapStats($this_user, TBDB_STATS_TERMINATE)
	== 0 or fatal("GatherSwapStats failed");
Leigh Stoller's avatar
Leigh Stoller committed
478 479
}

480
# We better be here ...
481
$estate = $experiment->state();
482
if ($estate ne EXPTSTATE_TERMINATED) {
483 484
    fatal("Experiment is in the wrong state: $estate\n");
}
485

486 487 488
#
# Try to copy off the files for testbed information gathering.
#
489
$experiment->SaveLogFiles();
490

491 492 493 494
# Copy out the archive and then delete it.
print "Archiving and clearing the experiment archive ...\n";
libArchive::TBArchiveExperimentArchive($pid, $eid) == 0 or
    fatal("Could not archive experiment archive!");
495 496

#
497
# Terminate the log so the web page stops spewing.
498
#
499 500
if (!$template_mode) {
    print "Experiment $pid/$eid has been successfully terminated!\n";
501
    $experiment->ClearLogFile();
502
}
503

504
#
505 506
# Cleanup DB state and remove directory. Purge flag is optional and generally
# used by admins for cleaning up bad DB state caused by script errors.
507
#
508
$experiment->Delete($purge);
509 510

#
511
# In batch mode, exit now. 
512
#
513
if ($batch || $template_mode) {
514 515 516 517
    exit(0);
}

#
518
# Send email notification to user.
519
#
520 521
my $message =
    "Experiment `$eid' in project `$pid' has been terminated.\n" .
522
    "You may now reuse `$eid' as an experiment name.\n";
523

524
SENDMAIL("$user_name <$user_email>",
525
	 "Experiment $pid/$eid Terminated",
526 527 528
	 $message,
	 "$user_name <$user_email>",
	 "Cc:  $expt_head_name <$expt_head_email>\n".
529
	 "Bcc: $TBLOGS", ($logname));
530 531 532

exit 0;

533
sub fatal($;$)
534
{
535 536
    my $parms = {};
    $parms = shift if ref $_[0] eq 'HASH';
537
    my($mesg) = $_[0];
538
    
539
    tberror($parms, $mesg);
540

541
    $experiment->Unlock();
Leigh Stoller's avatar
Leigh Stoller committed
542

543
    # Copy over the log files so the user can see them.
544
    $experiment->CopyLogFiles();    
545

546 547 548 549 550 551
    #
    # In batch mode, exit without sending the email. 
    #
    if ($batch) {
	exit(-1);
    }
552 553 554 555 556
    
    #
    # Clear the log file so the web page stops spewing. 
    #
    if (defined($logname)) {
557
	$experiment->CloseLogFile();
558
    }
559 560

    #
561
    # Send a message to the testbed list. Append the logfile.
562
    #
563
    SENDMAIL("$user_name <$user_email>",
564
	     "Termination Failure: $pid/$eid",
565 566 567 568 569
	     $mesg,
	     "$user_name <$user_email>",
	     "Cc:  $expt_head_name <$expt_head_email>\n".
	     "Bcc: $TBOPS",
	     ($logname));
570 571 572

    exit(-1);
}