All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

startexp.in 13 KB
Newer Older
1 2
#!/usr/bin/perl -wT
use English;
3
use Getopt::Std;
4

5 6 7 8
#
# This gets invoked from the Web interface. CD into the proper directory
# and do the tb stuff.
#
9 10 11 12
# The -b (batch) argument is so that this script can be part of a batchmode
# that starts/ends experiments offline. In that case, we don't want to put
# it into the background and send email, but just want an exit status 
# returned to the batch system.
13
#
14 15 16 17 18
# XXX - The -b option takes a logfile name. This is so this script can
#       save off the file in the expinfo directory. The caller (batch daemon) 
#       opens the file and just passes the name in. I do not like this!
#
# usage: startexp [-b logfile] <pid> <eid> <nsfile>
19 20 21
#
sub usage()
{
22
    print STDOUT "Usage: startexp [-b logfile] <pid> <eid> <nsfile>\n";
23 24
    exit(-1);
}
25
my  $optlist = "b:";
26

27 28 29
#
# Configure variables
#
30 31 32
my $TB       = "@prefix@";
my $DBNAME   = "@TBDBNAME@";
my $TBOPS    = "@TBOPSEMAIL@";
33
my $TBLOGS   = "@TBLOGSEMAIL@";
34
my $TBINFO   = "$TB/expinfo";
35 36 37 38

my $tbdir    = "$TB/bin/";
my $projroot = "/proj";
my $tbdata   = "tbdata";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
39
my $estate   = 0;
40
my $batch    = 0;
41
my $logname  = 0;
42
my $errorstat= -1;
43

44 45 46 47 48
#
# For debugging all this goo. Leaves the experiment directory intact,
# and placed in a subdir of the project directory.
# 
my $debug    = 1;
49

50 51 52 53 54
#
# Turn off line buffering on output
#
$| = 1;

55
#
56 57 58 59
# Untaint the path
# 
$ENV{'PATH'} = '/bin:/usr/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
60

61
# Testbed Support library
62 63
use lib "@prefix@/lib";
use libtbsetup;
64

65
#
66 67 68 69 70 71 72
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
73
if (@ARGV != 3) {
74
    usage();
75
}
76 77
my $pid   = $ARGV[0];
my $eid   = $ARGV[1];
78
my $tempfile = $ARGV[2];
79
if (defined($options{"b"})) {
80 81 82 83 84 85 86 87 88
    $batch   = 1;
    $logname = $options{"b"};

    # Note different taint check (allow /).
    if ($logname =~ /^([-\@\w.\/]+)$/) {
	$logname = $1;
    } else {
	die "Bad data in logfile name: $logname";
    }
89
}
90 91

#
92
# Untaint the arguments.
93
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
94
if ($pid =~ /^([-\@\w]+)$/) {
95
    $pid = $1;
96
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
97
if ($eid =~ /^([-\@\w]+)$/) {
98 99
    $eid = $1;
}
100
# Note different taint check (allow /).
101
if ($tempfile =~ /^([-\w.\/]+)$/) {
102
    $tempfile = $1;
103
}
104 105 106
else {
    die("Tainted tempfile name: $tempfile");
}
107

108
my $piddir  = "$projroot/$pid";
109 110 111 112 113
my $expdir  = "$piddir/exp";
my $eiddir  = "$expdir/$eid";
my $nsfile  = "$eid.ns";
my $irfile  = "$eid.ir";
my $repfile = "$eid.report";
114
my $tempns  = "$tempfile.$$";
115 116
my $user_name  = "Startexp Script";
my $user_email = "$TBOPS";
117 118

#
119 120 121 122 123 124
# Set up for querying the database.
# 
use Mysql;
my $DB = Mysql->connect("localhost", $DBNAME, "script", "none");

#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
125 126 127 128 129 130 131
# Check to make sure the experiment record exists. The experiment must
# be in the "new" state when using this interface. Th prevents the same
# pid/eid from being used twice, as could happen by mistake if the user
# invoked this directly without being clueful. The goal is to eventually
# script experiment creation and termination completely.
#
# Note that tbprerun is going verify and change this state.
132 133
#
$query_result =
Leigh B. Stoller's avatar
Leigh B. Stoller committed
134
    $DB->query("SELECT state FROM experiments ".
135 136 137 138 139 140 141 142 143 144
	       "WHERE eid='$eid' and pid='$pid'");

if (! $query_result) {
    fatal("DB Error getting experiment record $pid/$eid\n");
}
if ($query_result->numrows < 1) {
    print STDOUT "No experiment record for $pid/$eid exists!\n";
    exit(1);
}
@row = $query_result->fetchrow_array();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
145 146
if ($row[0] ne "new") {
    die("Experiment $pid/$eid is already configured (or still configuring)!\n".
147 148
	"You are not allowed to reconfigure experiments unless you\n".
	"first terminate the existing experiment via the web interface.\n");
149 150
}

151
#
152
# Get some user information. 
153 154
#
$query_result =
155
    $DB->query("SELECT uid,usr_name,usr_email from users ".
156 157 158 159 160 161 162
	       "WHERE unix_uid='$EUID'");

if (! $query_result) {
    fatal("DB Error getting user information for uid $EUID\n");
}
if ($query_result->numrows < 1) {
    print STDOUT "Go Away! You do not exist in the Emulab Database.\n";
163 164 165
    exit(1);
}

166
@row = $query_result->fetchrow_array();
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
$uid        = $row[0];
$user_name  = $row[1];
$user_email = $row[2];

#
# Verify that this person is allowed to start the experiment. Must be
# in the project membership table.
#
$query_result =
    $DB->query("SELECT pid FROM proj_memb ".
	       "WHERE uid=\"$uid\" and pid=\"$pid\"");

if (! $query_result) {
    fatal("DB Error getting project membership for uid $uid\n");
}
if ($query_result->numrows == 0) {
    print STDOUT "Go Away! You are not a member of project $pid\n";
    exit(1);
}
186 187 188 189 190

#
# Copy the nsfile from wherever the web server stuffed it into a temporary
# file. The web server is going to delete it once this script returns.
#
191
if (system("/bin/cp", "$tempfile", "$tempns") != 0) {
192 193
    print STDOUT "Could not copy $tempfile to $tempns.\n";
    exit(1);
194
}
195
chmod(0770, "$tempns");
196 197 198 199 200 201

#
# The rest of this goes into the background so that the user sees
# immediate response. We will send email later when the experiment
# is actually torn down.
#
202 203 204 205 206 207 208 209 210 211 212
if (! $batch) {
    if (background()) {
	#
	# Parent exits normally
	#
	print STDOUT
	    "Experiment $pid/$eid is now configuring\n".
	    "You will be notified via email when the experiment is ".
	    "ready to use\n";
	exit(0);
    }
213 214 215 216 217 218 219 220 221
}

#
# Create a directory structure for the experiment in the project directory.
#
if (system("$TB/libexec/mkexpdir $pid $eid") != 0) {
    fatal("$tbdir/mkexpdir failed\n");
}

222 223
#
# Copy the nsfile from wherever the web server stuffed it, into the
224
# experiment directory. We leave the tempns file around till later.
225 226
#
if (! chdir("$eiddir/$tbdata")) {
227
    fatal("Could not chdir to $tbdata in $eiddir: $!\n");
228 229
}

230 231
if (system("/bin/cp", "$tempns", "$nsfile") != 0) {
    fatal("Could not copy $tempns to $eiddir/$tbdata/$nsfile: $!\n");
232 233 234
}

#
235 236
# Run the various scripts. We want to propogate the error from tbprerun
# and tbrun back out, hence the bogus looking errorstat variable.
237
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
238
if (system("$tbdir/tbprerun -nologfile $pid $eid $nsfile") != 0) {
239
    $errorstat = $? >> 8;
240
    fatal("tbprerun failed!\n");
241
}
242
# So fatal errors run tbend.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
243
$estate = "prerunned";
244

Leigh B. Stoller's avatar
Leigh B. Stoller committed
245
if (system("$tbdir/tbswapin -nologfile $pid $eid") != 0) {
246
    $errorstat = $? >> 8;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
247
    fatal("tbswapin failed!\n");
248
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
249 250
# So fatal errors run tbswapout,
$estate = "active";
251

Leigh B. Stoller's avatar
Leigh B. Stoller committed
252
if (system("$tbdir/tbreport -v $pid $eid 2>&1 > $repfile") != 0) {
253
    fatal("tbreport failed!\n");
254 255
}

256 257 258 259 260 261 262
#
# Increment the experiment count, and date. This is informational. 
#
$DB->query("update projects ".
	   "set expt_count=expt_count+1, expt_last=NOW() ".
	   "where pid='$pid'");

263 264 265 266
#
# In batchmode, send the report to stdout for the batch daemon.
#
if ($batch) {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
267
    system("$tbdir/tbreport -v $pid $eid");
268
    print STDOUT "\n\n";
269 270
}

271
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
272
# Grab the experiment info for the mail message below.
273 274 275 276 277 278 279 280 281 282 283 284
#
$query_result =
    $DB->query("SELECT expt_name,expt_created,expt_expires from experiments ".
	       "WHERE eid='$eid' and pid='$pid'");

if (! $query_result) {
    fatal("DB Error getting experiment record for $pid/$eid\n");
}
@row = $query_result->fetchrow_array();
$expt_name     = $row[0];
$expt_created  = $row[1];
$expt_expires  = $row[2];
Kristin Wright's avatar
Kristin Wright committed
285

286
print STDOUT "Setup Success\n";
287

288
#
289
# Try to copy off the files for testbed information gathering.
290 291 292 293 294 295 296
#
my $infodir = "$pid-$eid-" . `date +20%y%m%d-%H.%M.%S`;

if ($infodir =~ /^([-\@\w.]+)$/) {
    $infodir = $1;

    if (mkdir("$TBINFO/$infodir", 0770)) {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
297
	system("cp $nsfile $TBINFO/$infodir");
298
	system("cp *.ptop *.top $TBINFO/$infodir");
299 300
	system("cp assign.log $TBINFO/$infodir");
	system("cp $logname $TBINFO/$infodir/$eid.log");
301 302 303
    }
}

304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
#
# Shove a copy of the NS file into the DB to make Mike happy.
#
$nsfile_string = `cat $nsfile`;

if ($nsfile_string) {
    $nsfile_string = $DB->quote($nsfile_string);

    $DB->query("delete from nsfiles WHERE eid='$eid' and pid='$pid'");

    #
    # I could strlen check the string, but the webserver has a limit,
    # plus the DB is going to truncate it if its longer. Doing it here
    # would be a third (call it redundant) check. 
    # 
    $DB->query("insert into nsfiles (pid, eid, nsfile) ".
	       "VALUES('$pid', '$eid', $nsfile_string)");
}

323 324 325 326 327 328 329 330
#
# In batch mode, just exit without sending email. Remove tempns file!
#
if ($batch) {
    unlink("$tempns");
    exit(0);
}

331 332 333
#
# Dump the report file and the log file to the user via email. 
#
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
if (! ($MAIL = OPENMAIL("$user_name <$user_email>",
			"TESTBED: New Experiment Created: $pid/$eid",
			undef, "Bcc: $TBLOGS"))) {
    tbendit();
    die("Cannot start mail program!");
}
    
print $MAIL "Your experiment `$eid' in project `$pid' is now configured.\n";
print $MAIL "Here is the experiment summary detailing the nodes that were\n";
print $MAIL "allocated to you. You may use the `Qualified Name' to log on\n";
print $MAIL "to your nodes. See /etc/hosts on your nodes (when running\n";
print $MAIL "FreeBSD, Linux, or NetBSD) for the IP mapping on each node\n";

print $MAIL "\n";
print $MAIL "User:        $user_name\n";
print $MAIL "EID:         $eid\n";
print $MAIL "PID:         $pid\n";
print $MAIL "Name:        $expt_name\n";
print $MAIL "Created:     $expt_created\n";
print $MAIL "Expires:     $expt_expires\n";
print $MAIL "Directory:   $eiddir\n\n";
355 356 357

if (open(IN, "$repfile")) {
    while (<IN>) {
358
	print $MAIL "$_";
359 360 361 362
    }
    close(IN);
}

363
print $MAIL "\n\n---------\n\n";
364

365 366 367
print $MAIL "Here is the log of the configuration process.\n";
print $MAIL "If you have any questions or problems, please include the\n";
print $MAIL "output below in your message to $TBOPS\n\n";
368 369 370

if (open(IN, "$logname")) {
    while (<IN>) {
371
	print $MAIL "$_";
372 373 374
    }
    close(IN);
}
375 376

if (open(IN, "$nsfile")) {
377 378
    print $MAIL "\n\n---------\n\n";
    print $MAIL "Here is the NS file\n\n";
379 380

    while (<IN>) {
381
	print $MAIL "$_";
382 383 384
    }
    close(IN);
}
385
close($MAIL);
386

387
unlink("$tempns");
388
unlink("$logname");
389 390
exit 0;

391 392
sub fatal()
{
393
    my($mesg)     = $_[0];
394
    local $MAIL;
395 396

    print STDOUT "$mesg\n";
397
    print STDOUT "Cleaning up and exiting with status $errorstat ...\n";
398 399 400 401

    #
    # If we got far enough to allocate nodes, must run tbend.
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
402
    if ($estate) {
403
	tbendit();
404
    }
405 406 407 408 409 410 411 412 413

    #
    # Now we can remove all trace from the DB since it failed.
    # 
    $query_result =
	$DB->query("DELETE from experiments WHERE eid='$eid' and pid='$pid'");

    if (! $query_result) {
	print STDOUT "DB Error deleting experiment record for $pid/$eid\n";
414
    }
415

Leigh B. Stoller's avatar
Leigh B. Stoller committed
416

417 418 419 420
    #
    # In batch mode, exit. Make sure to delete tempns file. 
    #
    if ($batch) {
421 422 423 424 425 426 427 428 429
	if (open(IN, "$eiddir/assign.log")) {
	    print STDOUT "\n\n--------- assign.log --------\n\n";
	    
	    while (<IN>) {
		print STDOUT "$_";
	    }
	    close(IN);
	}
	
Leigh B. Stoller's avatar
Leigh B. Stoller committed
430
	if (chdir($expdir)) {
431 432
	    system("/bin/rm", "-rf", "${eid}-TBfailed");
	    system("/bin/mv", "-f", "$eid", "${eid}-TBfailed");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
433
	}
434
	unlink("$tempns");
435
	exit($errorstat);
436 437
    }

438 439 440
    #
    # Send a message to the testbed list. Append the logfile if it got
    # that far.
441 442 443 444 445 446 447 448
    #
    if (! ($MAIL = OPENMAIL("$user_name <$user_email>",
			    "TESTBED: Experiment Configure Failure: $pid/$eid",
			    undef, "Cc: $TBOPS"))) {
	die("Cannot start mail program!");
    }
    
    print $MAIL $mesg;
449

450 451 452
    if (open(IN, "$logname")) {
	print $MAIL "\n\n--------- $logname ---------\n\n";
	
453
	while (<IN>) {
454
	    print $MAIL "$_";
455 456 457 458
	}
	close(IN);
    }

459 460
    if (open(IN, "$eiddir/assign.log")) {
	print $MAIL "\n\n--------- assign.log --------\n\n";
461
	
462
	while (<IN>) {
463
	    print $MAIL "$_";
464 465 466
	}
	close(IN);
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
467

468 469 470
    if (open(IN, "$tempns")) {
	print $MAIL "\n\n--------- $tempns ---------\n\n";

Leigh B. Stoller's avatar
Leigh B. Stoller committed
471
	while (<IN>) {
472
	    print $MAIL "$_";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
473 474
	}
	close(IN);
475
	unlink("$tempns");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
476
    }
477

478
    close($MAIL);
479

480
    unlink("$tempns");
481
    unlink("$logname");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
482
    if (chdir($expdir)) {
483 484
	system("/bin/rm", "-rf", "${eid}-TBfailed");
	system("/bin/mv", "-f", "$eid", "${eid}-TBfailed");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
485
    }
486
    exit($errorstat);
487
}
488 489 490 491 492 493 494

#
# If tbprerun finishes, but tbrun fails, lets do a tbend to make sure
# the nodes and vlans are released.
# 
sub tbendit()
{
Leigh B. Stoller's avatar
Leigh B. Stoller committed
495 496 497 498
    if ($estate eq "active") {
	print "Running tbswapout with arguments: -nologfile $pid $eid\n";
	if (system("$tbdir/tbswapout -nologfile $pid $eid") != 0) {
	    print "tbswapout failed!\n";
499
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
500 501 502 503 504
    }
    print "Running tbend with arguments: -nologfile -force $pid $eid\n";
    if (system("$tbdir/tbend -nologfile -force $pid $eid") != 0) {
	print "tbend failed!\n";
    }
505
}
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534

#
# Put ourselves into the background so that caller sees immediate response.
# Mail notification will happen later.
# 
sub background()
{
    $mypid = fork();
    if ($mypid) {
	return $mypid;
    }

    #
    # We have to disconnect from the caller by redirecting both STDIN and
    # STDOUT away from the pipe. Otherwise the caller (the web server) will
    # continue to wait even though the parent has exited. 
    #
    open(STDIN, "< /dev/null") or
	die("opening /dev/null for STDIN: $!");

    #
    # Create a temporary name for a log file and untaint it.
    #
    $logname = `mktemp /tmp/start-$pid-$eid.XXXXXX`;

    # Note different taint check (allow /).
    if ($logname =~ /^([-\@\w.\/]+)$/) {
	$logname = $1;
    } else {
535
	die "Bad data in logfile name: $logname";
536 537 538 539 540 541 542
    }

    open(STDERR, ">> $logname") or die("opening $logname for STDERR: $!");
    open(STDOUT, ">> $logname") or die("opening $logname for STDOUT: $!");

    return 0;
}