batchexp.in 36.2 KB
Newer Older
1
#!/usr/bin/perl -wT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2 3
#
# EMULAB-COPYRIGHT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
4
# Copyright (c) 2000-2006 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5 6
# All rights reserved.
#
7 8
use English;
use Getopt::Std;
9
use POSIX qw(isatty setsid);
10
use POSIX qw(strftime);
11
use Errno qw(EDQUOT);
12 13

#
14 15 16 17
# Create an experiment. The experiment is either run immediately, or
# placed into the batch system to be run later. If no NS file is
# supplied, an experiment shell is created (this is currently an admin
# only option).
18
#
19
# TODO: Remove expt_expires and priority.
20
#       Add calls to check_slot() to verify inputs.
21
# 
Leigh B. Stoller's avatar
Leigh B. Stoller committed
22 23 24 25 26 27 28 29 30
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Everything okay.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
31 32
sub usage()
{
33
    print(STDERR
34
	  "Usage: batchexp [-q] [-i [-w]] [-n] [-f] [-E description] [-g gid]\n".
35
	  "                [-S reason] [-L reason] [-a <time>] [-l <time>]\n".
36
	  "                -p <pid> -e <eid> <nsfile>\n".
37
	  "switches and arguments:\n".
38 39 40
	  "-i       - swapin immediately; by default experiment is batched\n".
	  "-w       - wait for non-batchmode experiment to preload or swapin\n".
	  "-f       - preload experiment (do not swapin or queue yet)\n".
41
	  "-q       - be less chatty\n".
42 43
	  "-S <str> - Experiment cannot be swapped; must provide reason\n".
	  "-L <str> - Experiment cannot be IDLE swapped; must provide reason\n".
44
	  "-n       - Do not send idle email (internal option only)\n".
45 46
	  "-a <nnn> - Auto swapout nnn minutes after experiment is swapped in\n".
	  "-l <nnn> - Auto swapout nnn minutes after experiment goes idle\n".
47
	  "-s       - Save disk state on swapout\n".
48 49 50 51 52 53 54 55
	  "-E <str> - A pithy sentence describing your experiment\n".
	  "-p <pid> - The project in which to create the experiment\n".
	  "-g <gid> - The group in which to create the experiment\n".
	  "-e <eid> - The experiment name (unique, alphanumeric, no blanks)\n".
	  "<nsfile> - NS file to parse for experiment.\n");
    exit(-1);
}

56
sub ParseArgs();
57 58
sub CheckCopyArgs();
sub CopyInArchive();
59
sub fatal($;$);
60

61
my $optlist = "iE:g:e:p:S:L:a:l:sfwqt:nzc:bx:y:";	# Enough options?
62
my $batchmode= 1;
63 64
my $frontend = 0;
my $waitmode = 0;
65
my $quiet    = 0;
66
my $linktest = 0;	# non-zero means level to run at.
67 68
my $zeemode  = 0;	# Hey, out of options.
my $zeeopt   = "";	# To pass along.
69
my $savestate= 0;
70 71
my $template_mode  = 0;	# New stuff; experiment templates.
my $instance_idx   = 0;	# New stuff; experiment templates.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
72
my $archive_eid;	# New stuff; experiment templates.
73 74 75 76 77 78 79 80 81 82
# All of these are for experiment dup and branch. Really mucks things up.
# These globals are set when we parse the -c argument, but used later
my $copybranch = 0;	# A branch instead of a duplicate
my $copyfrom;		# Copy from where, archive or current experiment.
my $copyarg;            # The -c argument.
my $copyidx;            # The index of the experiment copied.
my $copypid;		# The pid of the experiment copied.
my $copyeid;		# The eid of the experiment copied.
my $copytag;		# The archive tag to us.
my $copydir;		# Directory extracted from archive, to delete.
83 84 85 86 87

#
# Configure variables
#
my $TB       = "@prefix@";
88
my $PROJROOT = "/proj";
89
my $EVENTSYS = @EVENTSYS@;
90 91
my $TBOPS    = "@TBOPSEMAIL@";
my $TBLOGS   = "@TBLOGSEMAIL@";
92 93
my $TBDOCBASE = "@TBDOCBASE@";
my $TBBASE   = "@TBBASE@";
94
my $CONTROL  = "@USERNODE@";
95

96 97 98 99 100 101
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
Kevin Atkinson's avatar
Kevin Atkinson committed
102
use libtblog;
103
use libArchive;
104
use Experiment;
105
use Template;
106

107 108 109 110
my $parser      = "$TB/libexec/parse-ns";
my $checkquota  = "$TB/sbin/checkquota";
my $tbbindir    = "$TB/bin/";
my $RSYNC	= "/usr/local/bin/rsync";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
111
my $errorstat=-1;
112 113
my $user_name;
my $user_email;
114
my $dbuid;
115
my $exptidx;
116
my $logname;
117

118 119 120 121 122 123 124
# Be careful not to exit on transient error; 0 means infinite retry.
$libdb::DBQUERY_MAXTRIES = 0;

# For the END block below.
my $cleaning = 0;
my $justexit = 1;
my $signaled = 0;
125 126 127 128 129 130

#
# Turn off line buffering on output
#
$| = 1;

131 132 133 134 135 136 137
#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

138 139
#
# Untaint the path
140
#
141 142
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
143 144
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

145 146 147 148 149
my $eid;
my $pid;
my $gid;
my $description;
my $tempnsfile;
150 151 152
my $swappable    = 1;
my $noswap_reason;
my $idleswap     = 1;
153
my $idleswaptime = 60 * TBGetSiteVar("idle/threshold");
154
my $noidleswap_reason;
155
my $autoswap     = 0;
156
my $autoswaptime = 10 * 60;
157 158 159
my $idleignore   = 0;
my $priority     = TB_EXPTPRIORITY_LOW;
my $exptstate    = EXPTSTATE_NEW();
160
my $batchstate   = BATCHSTATE_UNLOCKED();
161 162
my $now          = localtime();
my $committed    = 0;
163 164
my $experiment;
my $copy_experiment;
165

166
#
167
# Verify user and get his DB uid.
168
#
169
if (! UNIX2DBUID($UID, \$dbuid)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
170
    tbdie("You do not exist in the Emulab Database!");
171 172
}

173 174 175 176
#
# Get email info for user.
#
if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
177
    tbdie("Cannot determine your name and email address.");
178 179
}

180 181 182 183 184
#
# Before doing anything else, check for overquota ... lets not waste
# our time. Make sure user sees the error by exiting with 1.
#
if (system("$checkquota $dbuid") != 0) {
185 186
    tberror({cause => 'user', type => 'primary', severity => SEV_ERROR,
	     error => ['over_disk_quota', $CONTROL]},
187 188
	    "You are over your disk quota on $CONTROL; ".
	    "please login there and cleanup!");
189 190 191
    exit(1);
}

192
#
193
# Parse command arguments.
194
#
195
ParseArgs();
196
CheckCopyArgs();
197 198 199

#
# Sanity check them.
200
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
201 202 203
usage()
    if (!defined($pid) || !defined($eid));
usage()
204
    if (defined($tempnsfile) && defined($copyarg));
Leigh B. Stoller's avatar
Leigh B. Stoller committed
205

206 207 208
if (!defined($gid)) {
    $gid = $pid;
}
209
if (!defined($description)) {
210
    $description = "Created by $dbuid";
211
}
212
if (! $swappable && (!defined($noswap_reason) || $noswap_reason eq "")) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
213
    tbdie("Must provide a reason with -S option (not swappable reason)!");
214
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
215
if (! $idleswap && (!defined($noidleswap_reason) || $noidleswap_reason eq "")){
Kevin Atkinson's avatar
 
Kevin Atkinson committed
216
    tbdie("Must provide a reason with -L option (no idleswap reason)!");
217
}
218
if (!defined($tempnsfile) && !defined($copyarg) && !TBAdmin($dbuid)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
219
    tbdie("Only admins can create experiments with no NS file");
220
}
221 222
my $nsfile  = "$eid.ns";
my $repfile = "$eid.report";
223

224
# Defaults for the DB and for the email message. 
225
$noswap_reason = "None Given"
226
    if (!defined($noswap_reason));
227
$noidleswap_reason = "None Given"
228 229
    if (!defined($noidleswap_reason));

230
#
231
# Make sure UID is allowed to create experiments in this project.
232
#
233
if (! TBProjAccessCheck($dbuid, $pid, $gid, TB_PROJECT_CREATEEXPT)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
234
    die("You do not have permission to create experiments in $pid/$gid");
235 236
}

237 238 239
if (defined($copyarg)) {
    # This will be handled below.
    ;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
240 241 242
}
elsif (!defined($tempnsfile)) {
    # If no NS file, then override swap/idle stuff. 
243 244 245
    $swappable     = 0;
    $idleswap      = 0;
}
246
elsif (! -f $tempnsfile || ! -r $tempnsfile || -z $tempnsfile) {
247
    # Exit so that user sees the error, not us.
248 249 250
    tberror({type => 'primary', severity => SEV_ERROR,
	     error => ['bogus_ns_file', $tempnsfile]},
	    "$tempnsfile does not exist or is not a readable file!");
251 252
    exit(1);
}
253

254 255 256 257
#
# Batch jobs get a shorter idle time
#
my $swaptime = $idleswaptime;
258
if ($batchmode && TBSiteVarExists("idle/batch_threshold")) {
259 260 261 262 263 264
    my $batchidleswaptime = TBGetSiteVar("idle/batch_threshold");
    if ($swaptime > $batchidleswaptime) {
	$swaptime = $batchidleswaptime;
    }
}

265
#
266 267
# Grab me a secret key for accessing tar/rpm files via the web interface.
# Grab another secret key for the event system HMACs.
268
#
269 270
my $webkey   = TBGenSecretKey();
my $eventkey = TBGenSecretKey();
271

272 273 274 275 276 277 278 279 280
#
# In wait mode, block SIGINT until we spin off the background process.
#
if ($waitmode) {
    $SIG{QUIT} = 'IGNORE';
    $SIG{TERM} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

281
#
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
# Create an arg array of parameters.
#
my %args = ();

$args{'gid'}	  	   = $gid;
$args{'expt_head_uid'}	   = $dbuid;
$args{'expt_swap_uid'}	   = $dbuid;
$args{'state'}		   = $exptstate;
$args{'priority'}	   = $priority;
$args{'swappable'}	   = $swappable;
$args{'idleswap'}	   = $idleswap;
$args{'idleswap_timeout'}  = $swaptime;
$args{'autoswap'}	   = $autoswap;
$args{'autoswap_timeout'}  = $autoswaptime;
$args{'idle_ignore'}	   = $idleignore;
$args{'keyhash'}	   = $webkey;
$args{'eventkey'}	   = $eventkey;
$args{'batchmode'}	   = $batchmode;
$args{'batchstate'}	   = $batchstate;
$args{'linktest_level'}    = $linktest;
$args{'savedisk'}	   = $savestate;
$args{'instance_idx'}	   = $instance_idx;
# These are special; the library will DBQuote them. 
$args{'expt_name'}	   = $description;
$args{'noswap_reason'}	   = $noswap_reason;
$args{'noidleswap_reason'} = $noidleswap_reason;

# Now create the experiment; we get back a perl class instance.
if (! ($experiment = Experiment->Create($pid, $eid, \%args))) {
311 312 313
    tbdie({type => 'secondary', severity => SEV_SECONDARY,
	   error => ['create_experiment_record_failed']},
	  "Could not create a new experiment record!");
314 315
}

316 317 318 319 320 321
#
# At this point, we need to force a cleanup no matter how we exit.
# See the END block below.
#
$justexit = 0;

Kevin Atkinson's avatar
Kevin Atkinson committed
322 323 324 325 326
#
# Set error reporting info
# 
tblog_set_info($pid,$eid,$UID);

327
#
328
# Create a directory structure for the experiment.
329 330 331 332 333 334 335 336 337
# There is no need to do this for the template wrapper experiment; it was
# already done when the template was created.
#
if (!$template_mode || $instance_idx) {
    if ($experiment->CreateDirectory() != 0) {
	if (($? >> 8) == EDQUOT()) {
	    # Obey exit status protocol for web page; User should see this.
	    $errorstat = 1;
	}
338 339 340
	fatal({type => 'secondary', severity => SEV_SECONDARY,
	       error => ['create_experiment_directory_failed']},
	      "Failed to created experiment directory");
341
    }
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
}
else {
    #
    # But we do need to update the experiment record with the template path.
    #
    my $template = Template->LookupByPidEid($pid, $eid);

    fatal("Could not find template record for $pid/$eid")
	if (!defined($template));

    my %args = ();
    $args{'path'} = $template->path();
    
    $experiment->Update(\%args) == 0
	or fatal("Could not update experiment record!");
357 358
}

359 360 361 362
#
# Grab the working directory path, and thats where we work.
# The user's experiment directory is off in /proj space.
#
363 364
my $workdir = $experiment->WorkDir();
my $userdir = $experiment->UserDir();
365 366 367 368

chdir("$workdir") or
    fatal("Could not chdir to $workdir: $!");

369 370 371 372
#
# Create a new archive, which might actually be a branch of an existing one
# when doing an experiment branch (fork).
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
373
if ($template_mode && defined($archive_eid)) {
374 375 376
    fatal({type => 'secondary', severity => SEV_SECONDARY,
	   error => ['archive_op_failed', 'create', undef, undef]},
	  "Could not create experiment archive!")
Leigh B. Stoller's avatar
Leigh B. Stoller committed
377 378 379 380 381
	if (libArchive::TBForkExperimentArchive($pid, $eid,
						$pid, $archive_eid, undef)
	    < 0);
}
elsif ($copybranch) {
382
    # Currently, support branching from existing experiment only.
383 384 385
    fatal({type => 'secondary', severity => SEV_SECONDARY,
	   error => ['archive_op_failed', 'create', undef, undef]},
	  "Could not create experiment archive!")
386 387 388 389 390
	if (libArchive::TBForkExperimentArchive($pid, $eid,
						$copypid,
						$copyeid, $copytag) < 0);
}
elsif (libArchive::TBCreateExperimentArchive($pid, $eid) < 0) {
391 392 393
    fatal({type => 'secondary', severity => SEV_SECONDARY,
	   error => ['archive_op_failed', 'create', undef, undef]},
	  "Could not create experiment archive!");
394 395 396 397 398 399 400 401 402 403 404 405
}

#
# Okay, if copying/branching an experiment, we have to go find the
# NS file, extracting the special (currently by convention) archive
# directory into the new experiment. This will set the tempnsfile
# variable needed below.
#
if ($copyarg) {
    CopyInArchive();
}

406 407 408 409 410 411 412 413 414 415
#
# It would be nice to check for overquota during the rest of this
# setup, but thats going to be a pain. Besides, its very unlikely
# that someone goes overquota at this point. Typically, the person
# is already overquota at this point. If this turns out to be wrong,
# it will be easy to add a little test to make sure there is at least
# a reasonable amount of room to proceed (create temp file and stick
# some data in it).
# 

416 417 418 419
#
# Dump the eventkey into a file in the experiment directory. 
#
if ($EVENTSYS) {
420
    open(KEY, ">" . $experiment->EventKeyPath()) or
421 422 423 424 425
	fatal("Could not create eventkey file: $!");
    print KEY $eventkey;
    close(KEY);
}

426
# And dump the web key too.
427
open(KEY, ">" . $experiment->WebKeyPath()) or
428 429 430 431
    fatal("Could not create webkey file: $!");
print KEY $webkey;
close(KEY);

432
#
433 434 435
# If no NS file, we are done. We must unlock it and reset its state
# appropriately. We leave the experiment in the "new" state so that
# the user is forced to do a modify first (to give it a topology). 
436
#
437
if (!defined($tempnsfile)) {
438
    $experiment->Unlock(EXPTSTATE_NEW());
439 440
    exit(0);
}
441

442
#
443
# Now we can get the NS file!
444
#
445
if (system("/bin/cp", "$tempnsfile", "$nsfile")) {
446 447 448
    fatal({type => 'primary', severity => SEV_ERROR,
	   error => ['copy_ns_file_failed', $tempnsfile, $nsfile]},
	  "Could not copy $tempnsfile to $workdir/$nsfile");
449
}
450
chmod(0664, "$nsfile");
451

452
#
453 454
# Run parse in impotent mode on the NS file.  This has no effect but
# will display any errors.
455
#
456
if (system("$parser -n $zeeopt $pid $gid $eid $nsfile") != 0) {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
457 458
    # Obey exit status protocol for web page.
    $errorstat = 1;
459 460 461
    fatal({type => 'secondary', severity => SEV_SECONDARY,
	   error => ['ns_parse_failed']},
	  "NS Parse failed!");
462 463
}

464 465 466
#
# Gather statistics; start the clock ticking.
#
467
if ($frontend || $batchmode) {
468 469 470 471 472 473 474 475
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_PRELOAD, 0,
		    TBDB_STATS_FLAGS_START);
}
else {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_START, 0,
		    TBDB_STATS_FLAGS_START);
}

476 477 478
goto skiplog
    if ($template_mode);

479 480 481 482 483 484
#
# The rest of this goes into the background so that the user sees
# immediate response. We will send email later when the experiment
# is ready. In waitmode, we hold the parent waiting so that the user
# can script it. Must protect against async (^C) termination though.
#
485 486
if ($experiment->CreateLogFile("startexp", \$logname) != 0) {
    fatal("Could not create logfile!");
487
    
488 489 490 491
}
$experiment->SetLogFile($logname);
$experiment->OpenLogFile($logname);

492 493
if (my $childpid = TBBackGround($logname)) {
    #
494 495
    # Parent exits normally, unless in waitmode. We have to set
    # justexit to make sure the END block below does not run.
496
    #
497 498
    $justexit = 1;
    
499 500 501
    if (!$waitmode) {
	print("Experiment $pid/$eid is now configuring\n".
 	      "You will be notified via email when the experiment is ".
502 503
	      "ready to use\n")
	    if (! $quiet);
504 505
	exit(0);
    }
506
    print("Waiting for " . ($batchmode ? "batch " : "") . "experiment $eid ".
507 508 509 510 511 512 513
	  "to finish " . ($frontend ? "preloading." : "swapping in.") . "\n")
	if (! $quiet);

    if (isatty(STDIN) && !$quiet) {
	print("You may type ^C at anytime; you will be notified via email.\n".
	      "You will not actually interrupt the experiment itself.\n");
    }
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530

    # Give child a chance to run.
    select(undef, undef, undef, 0.25);
	
    #
    # Reset signal handlers. User can now kill this process, without
    # stopping the child.
    #
    $SIG{TERM} = 'DEFAULT';
    $SIG{QUIT} = 'DEFAULT';
    $SIG{INT}  = 'DEFAULT';

    #
    # Wait until child exits or until user gets bored and types ^C.
    #
    waitpid($childpid, 0);

531 532
    print("Done. Exited with status: $?\n")
	if (! $quiet);
Kevin Atkinson's avatar
 
Kevin Atkinson committed
533 534 535 536 537 538 539 540 541

    my $exit_code = $? >> 8;
    
    if ($exit_code != 0) {
	my $d = tblog_lookup_error();
	print tblog_format_error($d);
    }
    
    exit $exit_code;
542
}
543
TBdbfork();
544
skiplog:
545 546 547 548 549 550 551 552 553 554 555 556

# We are committed now. Affects how fatal() operates.
$committed = 1;

#
# When in waitmode, must put ourselves in another process group so that
# an interrupt to the parent will not have any effect on the backend.
#
if ($waitmode) {
    POSIX::setsid();
}

557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
#
# We need to catch TERM cause sometimes shit happens and we have to kill
# an experiment setup that is hung or otherwise scrogged. Rather then 
# trying to kill off the children one by one, lets arrange to catch it
# here and send a killpg to the children. This is not to be done lightly,
# cause it can leave things worse then they were before!
#
sub handler ($) {
    my ($signame) = @_;
    
    $SIG{TERM} = 'IGNORE';
    my $pgrp = getpgrp(0);
    kill('TERM', -$pgrp);
    sleep(1);
    $signaled = 1;
    fatal("Caught SIG${signame}! Killing experiment setup ...");
}
$SIG{TERM} = \&handler;
$SIG{QUIT} = 'DEFAULT';

577 578
#
# The guts of starting an experiment!
579
#
580 581
# A batch experiment is essentially preloaded (frontend mode) and then
# dropped into the batch queue, unless the user requested only preload.
582 583
#

584
#
585
# Run the various scripts. We want to propagate the error from tbprerun
586 587
# and tbrun back out, hence the bogus looking errorstat variable.
#
588
$experiment->SetState(EXPTSTATE_PRERUN) == 0
589
    or fatal("Failed to set experiment state to " . EXPTSTATE_PRERUN());
590 591

if ($experiment->PreRun($nsfile, ($zeeopt ? "-z" : ""))) {
592
    $errorstat = $? >> 8;
593 594 595
    fatal({type => 'secondary', severity => SEV_SECONDARY,
	   error => ['tbprerun_failed']},
	  "tbprerun failed!");
596
}
597 598

$experiment->SetState(EXPTSTATE_SWAPPED) == 0
599
    or fatal("Failed to set experiment state to " . EXPTSTATE_SWAPPED());
600 601 602 603

#
# If not in frontend mode (preload only) continue to swapping exp in.
# 
604
if (! ($frontend || $batchmode)) {
605
    $experiment->SetState(EXPTSTATE_ACTIVATING) == 0
606
	or fatal("Failed to set experiment state to ". EXPTSTATE_ACTIVATING());
607

608
    if ($experiment->Swap("in") != 0) {
609
	$errorstat = $? >> 8;
610 611 612
	fatal({type => 'secondary', severity => SEV_SECONDARY,
	       error => ['tbswap_in_failed']},
	      "tbswap in failed!");
613
    }
614 615
    
    $experiment->SetState(EXPTSTATE_ACTIVE) == 0
616
	or fatal("Failed to set experiment state to " . EXPTSTATE_ACTIVE());
617 618 619 620 621

    #
    # Look for the unsual case of more than 2 nodes and no vlans. Send a
    # warning message.
    #
622 623 624 625
    my @localnodes = ();
    fatal("Could not get local node list for $pid/$eid")
	if ($experiment->LocalNodeList(\@localnodes));

Leigh B. Stoller's avatar
Leigh B. Stoller committed
626
    if (@localnodes && scalar(@localnodes) > 2) {
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
	my $vlans_result =
	    DBQueryFatal("select pid from virt_lans ".
			 "where pid='$pid' and eid='$eid'");
    
	if (!$vlans_result->numrows) {
	    SENDMAIL("$user_name <$user_email>",
		     "WARNING: Experiment Configuration: $pid/$eid",
		     "This experiment has zero network links defined.\n".
		     "Please check your NS file to verify this is what you ".
		     "want!\n",
		     "$user_name <$user_email>",
		     "Cc: $TBOPS", ($nsfile));
	}
    }
}

# We append this report in the email message below.
644
if ($experiment->Report($repfile, "-b") != 0) {
645 646 647
    fatal({type => 'secondary', severity => SEV_SECONDARY,
	   error => ['tbreport_failed']},
	  "tbreport failed!");
648 649
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
650 651 652 653 654
# Latest log is always called the same thing.
if (defined($logname)) {
    system("cp -fp $logname $workdir/" . EXPTLOGNAME());
}

655
#
656
# Save a copy of the files for testbed information gathering (long term).
657
#
658
$experiment->SaveLogFiles();
659 660 661 662 663

#
# Make a copy of the work dir in the user visible space so the user
# can see the log files.
#
664
$experiment->CopyLogFiles();
665

666 667 668 669 670 671
#
# Copy out some files to the archive directory so that they get saved later.
#
my $archivedir = libArchive::TBUserFileArchiveDirectory($pid, $eid);

if (! -e "$archivedir/tbdata") {
672
    mkdir("$archivedir/tbdata", 0775)
673 674 675
	or fatal("Failed to mkdir $archivedir/tbdata");
}
if (! -e "$archivedir/nsdata") {
676
    mkdir("$archivedir/nsdata", 0775)
677 678 679
	or fatal("Failed to mkdir $archivedir/nsdata");
}

680
system("cp -fp $workdir/$nsfile $archivedir/nsdata/nsfile.ns") == 0
681 682
    or fatal("Failed to copy nsfile to $archivedir/tbdata");

Leigh B. Stoller's avatar
Leigh B. Stoller committed
683
if (defined($logname)) {
684
    system("cp -fp $logname $archivedir/tbdata/log.batchexp") == 0
685
    or fatal("Failed to copy logfile to $archivedir/tbdata");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
686
}
687

688
system("cp -fp $workdir/$repfile $archivedir/tbdata/tbreport.batchexp") == 0
689 690 691 692 693
    or fatal("Failed to copy nsfile to $archivedir/tbdata");

# And tell the archive library about the above files.
libArchive::TBExperimentArchiveAddUserFiles($pid, $eid) == 0
    or fatal("Failed to add user archive files to the archive!");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
694

695
#
696 697
# Do a SavePoint on the experiment files. In template mode, let the wrapper
# deal with this. Avoids duplication of work.
698
#
699 700 701 702 703 704 705
if (! $template_mode) {
    print "Doing a savepoint on the experiment archive ...\n";
    if (libArchive::TBExperimentArchiveSavePoint($pid, $eid, "startexp") < 0) {
	fatal({type => 'secondary', severity => SEV_SECONDARY,
	       error => ['archive_op_failed', 'savepoint', undef, undef]},
	      "Failed to do a savepoint on the experiment archive!");
    }
706 707
}

708 709 710 711
#
# If this is a branch, then do a commit. Otherwise, the archive looks
# wrong cause its a branch from the original and shows all those files.
#
712
if ($copybranch && !$template_mode) {
713 714
    print "Doing a commit on the experiment archive ...\n";
    libArchive::TBCommitExperimentArchive($pid, $eid, "branch_merge") == 0 or
715 716 717
	fatal({type => 'secondary', severity => SEV_SECONDARY,
	       error => ['archive_op_failed', 'commit', undef, undef]},
	      "Failed to commit experiment archive!");
718 719
}

720 721 722 723 724 725 726 727 728 729
#
# Gather statistics.
#
if ($frontend || $batchmode) {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_PRELOAD, 0);
}
else {
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_START, 0);
}

730
#
731
# Set accounting stuff, but on success only, and *after* gathering swap stats!
732
#
733
$experiment->SetSwapInfo($dbuid);
734

735 736 737
#
# Close up the log file so the webpage stops.
#
738 739
if (!$template_mode) {
    print "Experiment $pid/$eid has been successfully created!\n";
740
    $experiment->CloseLogFile();
741
}
742 743 744 745

#
# Must unlock and drop batch experiments into the queue before exit.
#
746
if ($batchmode && !$frontend) {
747
    $experiment->Unlock(EXPTSTATE_QUEUED());
748 749
}
else {
750
    $experiment->Unlock();
751 752 753 754 755 756
}

#
# Clear the cancel flag now that the operation is complete. Must be
# done after we change the experiment state (above).
#
757
$experiment->SetCancelFlag(EXPTCANCEL_CLEAR());
758

759 760 761 762 763 764
#
# In template_mode we are done; the caller finishes up.
#
exit(0)
    if ($template_mode);

765 766 767
#
# Dump the report file and the log file to the user via email. 
#
768
my ($expt_created) = $experiment->created();
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786
my $message;

if ($frontend) {
    $message =
	"Your experiment `$eid' in project `$pid' has been created.\n" .
	"You can check the web interface to see if it looks the way\n" .
	"you expected it to. If so, you may swap the experiment in,\n" .
	"or terminate it, at any time.\n" .
        "\n";
}
else {
    $message =
	"Your experiment `$eid' in project `$pid' has been started.\n" .
	"Here is the experiment summary detailing the nodes that were\n" .
	"allocated to you. You may use the `Qualified Name' to log on\n" .
	"to your nodes. See /etc/hosts on your nodes (when running\n" .
	"FreeBSD, Linux, or NetBSD) for the IP mapping on each node.\n" .
        "\n";
787
}
788 789 790 791 792 793 794 795
$message .=
    "User:        $user_name\n" .
    "EID:         $eid\n" .
    "PID:         $pid\n" .
    "GID:         $gid\n" .
    "Description: $description\n" .
    "Swappable:   " . ($swappable ? "Yes\n" :
		                    "No  (Reason: $noswap_reason)\n") .
796
    "Idle-Swap:   " . ($idleswap  ? "Yes, at " . $idleswaptime/60.0 . " hours\n":
797
		                    "No  (Reason: $noidleswap_reason)\n") .
798
    "Auto-Swap:   " . ($autoswap  ? "Yes, at " . $autoswaptime/60.0 . " hours\n":
799 800 801 802 803 804 805 806 807 808 809 810 811 812
		                    "No\n") .
    "Created:     $expt_created\n".
    "Directory:   $userdir\n".
    "\n".
    "Appended at the end is the output of the experiment setup. If you\n" .
    "have any questions or comments, please include the output below\n" .
    "in your message to $TBOPS";

SENDMAIL("$user_name <$user_email>",
	 "New Experiment " . (($frontend == 0) ? "Started" : "Created") .
	 ": $pid/$eid",
	 $message,
	 "$user_name <$user_email>",
	 "Bcc: $TBLOGS",
813 814
	 ($repfile, $logname, $nsfile))
    if (!$zeemode);
815 816

# Done!
817
exit(0);
818

819 820 821
#
#
#
822
sub cleanup()
823
{
824
    #
825 826 827 828 829 830
    # Failed early (say, in parsing). No point in keeping any of the
    # stats or resource records. Just a waste of space since the
    # testbed_stats log indicates there was a failure and why (sorta,
    # via the exit code).
    # 
    if (!$committed) {
831 832 833
	# Completely remove all trace of the archive.
        libArchive::TBDestroyExperimentArchive($pid, $eid);

834
	#
835
	# Clear the experiment record and cleanup directories
836
	#
837 838
	$experiment->Delete(1)
	    if (defined($experiment));
839

840
	return;
841
    }
842

843 844 845 846 847 848 849 850 851
    #
    # Gather statistics.
    #
    if ($frontend) {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_PRELOAD, $errorstat);
    }
    else {
	GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_START, $errorstat);
    }
852
    $experiment->Refresh();
853

854 855 856
    #
    # Must clean up the experiment if it made it our of NEW state.
    #
857
    my $estate = $experiment->state();
858 859 860 861 862 863 864 865
    if ($estate ne EXPTSTATE_NEW) {
	#
	# We do not know exactly where things stopped, so if the
	# experiment was activating when the signal was delivered,
	# run tbswap on it. 
	# 
	if ($estate eq EXPTSTATE_ACTIVE ||
	    ($estate eq EXPTSTATE_ACTIVATING && $signaled)) {
866 867
	    if ($experiment->Swap("out", "-force") != 0) {
		print "tbswap out -force failed!\n";
868
	    }
869
	    $experiment->SetState(EXPTSTATE_SWAPPED);
870
	}
871
	
872
	if ($experiment->End("-force") != 0) {
873
	    print "tbend failed!\n";
874 875
	}
    }
876
    $experiment->SetState(EXPTSTATE_TERMINATED);
877

878
    #
879 880 881 882
    # Okay, we *are* going to terminate the experiment.
    # 
    GatherSwapStats($pid, $eid, $dbuid, TBDB_STATS_TERMINATE, 0);

883 884
    # Clear the logfile so the webpage stops.
    $experiment->CloseLogFile();
885

Kevin Atkinson's avatar
 
Kevin Atkinson committed
886 887 888 889 890
    #
    # Figure out the error if possible
    #
    my $error_data = tblog_find_error();

891 892 893
    #
    # Send a message to the testbed list. 
    #
Kevin Atkinson's avatar
 
Kevin Atkinson committed
894 895 896 897 898 899
    tblog_email_error($error_data,
		      "$user_name <$user_email>",
		      "Config Failure", "$pid/$eid",
		      "$user_name <$user_email>",
		      "",
		      "Cc: $TBOPS",
Kevin Atkinson's avatar
 
Kevin Atkinson committed
900
		      "",
Kevin Atkinson's avatar
 
Kevin Atkinson committed
901
		      ($logname, "assign.log", "wanassign.log", $nsfile));
902

903
    #
904 905 906 907
    # Back up the work dir for post-mortem debugging. 
    #
    system("/bin/rm -rf  ${workdir}-failed");
    system("/bin/mv -f   $workdir ${workdir}-failed");
908

909 910
    #
    # Clear the record and cleanup.
911 912
    #
    $experiment->Delete();
913 914 915
}

#
916 917
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
918
#
919
sub ParseArgs()
920
{
921 922 923 924
    my %options = ();
    if (! getopts($optlist, \%options)) {
	usage();
    }
925

926
    if (@ARGV > 1) {
927 928
	usage();
    }
929 930
    if (@ARGV == 1) {
	$tempnsfile = $ARGV[0];
931

932
	# Note different taint check (allow /).
933
	if ($tempnsfile =~ /^([-\w\.\/]+)$/) {
934 935 936
	    $tempnsfile = $1;
	}
	else {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
937
	    tbdie("Bad data in nsfile: $tempnsfile");
938
	}
939

940 941 942 943 944 945 946 947 948 949
	#
	# Called from ops interactively. Make sure NS file in /proj or /users.
	#
	# Use realpath to resolve any symlinks.
	#
	my $translated = `realpath $tempnsfile`;
	if ($translated =~ /^([-\w\.\/]+)$/) {
	    $tempnsfile = $1;
	}
	else {
950 951 952
	    tbdie({type => 'primary', severity => SEV_ERROR,
		   error => ['bad_data', 'realpath', $translated]},
		  "Bad data returned by realpath: $translated");
953 954 955 956 957 958 959 960 961
	}

	#
	# The file must reside in /proj, /groups, or /users. Since this script
	# runs as the caller, regular file permission checks ensure its a file
	# the user is allowed to use. /tmp/$pid-$eid.nsfile.XXXXX also allowed
	# since this script is invoked directly from web interface.
	#
	if (! ($tempnsfile =~ /^\/tmp\/[-\w]+-\d+\.nsfile/) &&
962
	    ! ($tempnsfile =~ /^\/tmp\/\d+\.ns/) &&
963 964 965 966
	    ! ($tempnsfile =~ /^\/var\/tmp\/php\w+/) &&
	    ! ($tempnsfile =~ /^\/proj/) &&
	    ! ($tempnsfile =~ /^\/groups/) &&
	    ! ($tempnsfile =~ /^\/users/)) {
967 968 969
	    tberror({type => 'primary', severity => SEV_ERROR,
		     error => ['disallowed_directory', $tempnsfile]},
		    "$tempnsfile does not resolve to an allowed directory!");
970 971 972 973 974
	    # Note positive status; so error goes to user not tbops.
	    exit(1);
	}
    }
    
975
    if (defined($options{"i"})) {
976
	$batchmode = 0;
977
    }
978 979 980
    if (defined($options{"f"})) {
	$frontend = 1;
    }
981 982 983
    if (defined($options{"q"})) {
	$quiet = 1;
    }
984 985 986 987
    if (defined($options{"z"})) {
	$zeemode = 1;
	$zeeopt  = "-p";
    }
988 989 990 991
    # This option should not be exported via the XMLRPC server. 
    if (defined($options{"n"})) {
	$idleignore = 1;
    }
992

993 994 995 996
    if (defined($options{"s"})) {
	$savestate = 1;
    }

Leigh B. Stoller's avatar
Leigh B. Stoller committed
997 998 999 1000 1001
    #
    # Clone an experiment, either an existing experiment or an old one
    # (using the archive). 
    #
    if (defined($options{"c"})) {
1002
	$copyarg = $options{"c"};
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1003

1004 1005
	if (! (($copyarg =~ /^([-\w]+),([-\w]+)(?::[-\w]*)?$/) ||
	       ($copyarg =~ /^(\d+)(?::[-\w]*)?$/))) {
1006 1007 1008
	    tbdie({type => 'primary', severity => SEV_ERROR,
		   error => ['bad_data', 'argument', $copyarg]},
		  "Bad data in argument: $copyarg");
1009 1010 1011 1012
	}
	# This option only makes sense with -c option.
	if (defined($options{"b"})) {
	    $copybranch = 1;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1013 1014 1015
	}
    }

1016 1017 1018
    #
    # pid,eid,gid get passed along as shell commands args; must taint check.
    # 
1019 1020
    if (defined($options{"p"})) {
	$pid = $options{"p"};
1021

1022
	if ($pid =~ /^([-\w]+)$/) {
1023 1024
	    $pid = $1;
	}
1025
	else {
1026 1027 1028
	    tbdie({type => 'primary', severity => SEV_ERROR,
		   error => ['bad_data', 'argument', $pid]},
		  "Bad data in argument: $pid.");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1029
	}
1030 1031 1032 1033
    }
    if (defined($options{"e"})) {
	$eid = $options{"e"};

1034
	if ($eid =~ /^([-\w]+)$/) {
1035 1036 1037
	    $eid = $1;
	}
	else {
1038 1039 1040
	    tbdie({type => 'primary', severity => SEV_ERROR,
		   error => ['bad_data', 'argument', $eid]},
		  "Bad data in argument: $eid.");
1041
	}
1042 1043
	if (! TBcheck_dbslot($eid, "experiments", "eid",
			   TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) {
1044 1045 1046
	    tbdie({type => 'primary', severity => SEV_ERROR,
		   error => ['bad_data', 'eid', $eid]},
		  "Improper experiment name (id)!");
1047
	}
1048 1049 1050 1051
    }
    if (defined($options{"g"})) {
	$gid = $options{"g"};

1052
	if ($gid =~ /^([-\w]+)$/) {
1053
	    $gid = $1;
1054
	}
1055
	else {
1056 1057 1058
	    tbdie({type => 'primary', severity => SEV_ERROR,
		   error => ['bad_data', 'argument', $gid]},
		  "Bad data in argument: $gid.");
1059 1060
	}
    }
1061
    if (defined($options{"E"})) {
1062 1063
	if (! TBcheck_dbslot($options{"E"}, "experiments", "expt_name",
			   TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
1064
	    tbdie("Improper experiment description!");
1065
	}
1066
	$description = $options{"E"};
1067
    }
1068 1069 1070
    if (defined($options{"S"})) {
	if (! TBcheck_dbslot($options{"S"}, "experiments", "noswap_reason",
			   TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
1071
	    tbdie("Improper noswap reason!");
1072 1073
	}
	$swappable     = 0;
1074
	$noswap_reason = $options{"S"};
1075 1076 1077 1078
    }
    if (defined($options{"L"})) {
	if (! TBcheck_dbslot($options{"L"}, "experiments", "noidleswap_reason",
			   TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
1079
	    tbdie("Improper noidleswap reason!");
1080 1081
	}
	$idleswap          = 0;
1082
	$noidleswap_reason = $options{"L"};
1083
    }
1084
    if (defined($options{"l"})) {
1085 1086
	if (! TBcheck_dbslot($options{"l"}, "experiments", "idleswap_timeout",
			   TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) {
Kevin Atkinson's avatar
 
Kevin Atkinson committed
1087