template_swapin.in 15 KB
Newer Older
1 2
#!/usr/bin/perl -wT
#
Leigh Stoller's avatar
Leigh Stoller committed
3
# Copyright (c) 2006, 2007, 2008 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23 24
#
use English;
25
use strict;
26
use Getopt::Std;
27
use POSIX qw(setsid);
28 29 30 31 32 33
use POSIX qw(strftime);
use Errno qw(EDQUOT);
use XML::Simple;
use Data::Dumper;

#
34
# Swapin a previously instantiated template.
35 36 37 38 39 40 41 42 43 44 45 46 47
#
# Exit codes are important; they tell the web page what has happened so
# it can say something useful to the user. Fatal errors are mostly done
# with die(), but expected errors use this routine. At some point we will
# use the DB to communicate the actual error.
#
# $status < 0 - Fatal error. Something went wrong we did not expect.
# $status = 0 - Everything okay.
# $status > 0 - Expected error. User not allowed for some reason. 
# 
sub usage()
{
    print(STDERR
48
	  "Usage: template_swapin [-q] [-w] -e <eid> <guid/vers>\n".
49 50 51 52 53 54 55
	  "switches and arguments:\n".
	  "-w       - wait for template to be instantiated\n".
	  "-q       - be less chatty\n".
	  "-e <eid> - The instance name (unique, alphanumeric, no blanks)\n".
	  "<guid/vers> - GUID and version to swapin\n");
    exit(-1);
}
56
my $optlist	 = "qwe:f";
57 58 59 60 61 62
my %options      = ();
my $quiet        = 0;
my $waitmode     = 0;
my $foreground   = 0;
my $guid;
my $version;
63
my $eid;
64 65 66 67 68 69 70 71 72 73 74

#
# Configure variables
#
my $TB		= "@prefix@";
my $EVENTSYS	= @EVENTSYS@;
my $TBOPS	= "@TBOPSEMAIL@";
my $TBLOGS	= "@TBLOGSEMAIL@";
my $TBDOCBASE	= "@TBDOCBASE@";
my $TBBASE	= "@TBBASE@";
my $CONTROL	= "@USERNODE@";
75
my $BOSSNODE    = "@BOSSNODE@";
76
my $OPSDBSUPPORT= @OPSDBSUPPORT@;
77 78

# Locals
79
my $EVhandle;
80 81
my $template;
my $instance;
82 83 84
my $experiment;
my %parameters;
my $pid;
85
my $run;
86
my $logfile;
87
my $logname;
88
my @ExptStates  = ();
89 90
# For the END block below.
my $cleaning    = 0;
91
my $exptactive  = 0;
92 93 94 95 96 97 98
my $justexit    = 1;

# Programs we need
my $checkquota  = "$TB/sbin/checkquota";
my $batchexp    = "$TB/bin/batchexp";
my $swapexp     = "$TB/bin/swapexp";
my $endexp      = "$TB/bin/endexp";
99
my $dbcontrol   = "$TB/sbin/opsdb_control";
100
my $archcontrol = "$TB/bin/archive_control";
101
my $CVSBIN      = "/usr/bin/cvs";
102
my $RLOG        = "/usr/bin/rlog";
103 104 105 106 107

# Protos
sub ParseArgs();
sub fatal($$);
sub sighandler($);
108
sub SetupEventHandler();
109 110 111 112 113 114 115 116

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use libtblog;
117
use User;
118
use Template;
119
use Experiment;
120
use event;
121
use libaudit;
122

123 124 125
# In libdb
my $projroot = PROJROOT();

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
# Be careful not to exit on transient error
$libdb::DBQUERY_MAXTRIES = 0;

#
# Turn off line buffering on output
#
$| = 1;

#
# Set umask for start/swap. We want other members in the project to be
# able to swap/end experiments, so the log and intermediate files need
# to be 664 since some are opened for append.
#
umask(0002);

#
# Untaint the path
#
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Verify user and get his DB uid.
#
151 152 153
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
154
}
155 156 157
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
my $user_uid   = $this_user->uid();
158 159 160 161 162

#
# Before doing anything else, check for overquota ... lets not waste
# our time. Make sure user sees the error by exiting with 1.
#
163
if (system("$checkquota $user_uid") != 0) {
164 165
    tberror("You are over your disk quota on $CONTROL; ".
	    "please login there and cleanup!");
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
    exit(1);
}

# Now parse arguments.
ParseArgs();

#
# In wait mode, block SIGINT until we spin off the background process.
#
if ($waitmode) {
    $SIG{QUIT} = 'IGNORE';
    $SIG{TERM} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';
}

#
182
# Grab template and do access check.
183
#
184
$template = Template->Lookup($guid, $version);
185

186 187 188
if (!defined($template)) {
    tbdie("Experiment template $guid/$version does not exist!");
}
189 190 191 192 193
my $project = $template->GetProject();
if (!defined($project)) {
    tbdie("Could not get project for $template");
}
if (! $project->AccessCheck($this_user, TB_PROJECT_CREATEEXPT)) {
194 195 196 197
    tberror("You do not have permission to instantiate template ".
	    "$guid/$version");
    exit(1);
}
198
$pid = $template->pid();
199

200
#
201
# Grab instance; better exist since this is a swapin of an instance!
202
#
203 204 205
# We need to find the experiment so we can find the instance.
# This is wrong, but necessary cause of how templates are layered over
# the existing experiment structure.
206
#
207 208 209
$experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    tbdie("Experiment $pid/$eid does not exist!");
210 211
}

212
$instance = Template::Instance->LookupByExptidx($experiment->idx());
213
if (!defined($instance)) {
214
    tbdie("Could not get instance record for experiment $pid/$eid!");
215 216
}

217 218 219
# Need these for default run below.
$instance->BindingList(\%parameters) == 0
    or fatal(-1, "Error getting bindings from $instance!");
220

221 222 223 224 225 226 227 228 229
#
# Go to the background now so we have a proper log of what happened.
#
#
$SIG{TERM} = \&sighandler;

#
# Use the logonly option to audit so that we get a record mailed.
#
Leigh Stoller's avatar
Leigh Stoller committed
230
if (! $foreground) {
231 232 233
    # Cleanup
    $experiment->CleanLogFiles() == 0
	or fatal(-1, "Could not clean up logfiles!");
Leigh Stoller's avatar
Leigh Stoller committed
234

235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
    #
    # If its a batch experiment, then the daemon will set the spewlog,
    # so attach it to the instance instead so that we can see both.
    #
    if ($experiment->batchmode()) {
	$logfile = $instance->CreateLogFile("batchswapin");
    }
    else {
	$logfile = $experiment->CreateLogFile("swapin");
	# We want it to spew to the web.
	$experiment->SetLogFile($logfile);
	# Mark it open since we are going to start using it right away.
	$logfile->Open();	
    }
    if (!defined($logfile)) {
	fatal(-1, "Could not create logfile");
Leigh Stoller's avatar
Leigh Stoller committed
251
    }
252
    $logname = $logfile->filename();
253 254

    if (my $childpid = AuditStart(LIBAUDIT_DAEMON, $logname,
255
			  LIBAUDIT_LOGONLY|LIBAUDIT_NODELETE|LIBAUDIT_FANCY)) {
256 257 258 259 260 261 262
	#
	# Parent exits normally, unless in waitmode. We have to set
	# justexit to make sure the END block below does not run.
	#
	$justexit = 1;

	if (!$waitmode) {
263
	    if ($experiment->batchmode()) {
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
		print("Experiment $pid/$eid has entered the batch system.\n".
		      "You will be notified when it is fully instantiated.\n")
		    if (! $quiet);
	    }
	    else {
		print("Experiment $pid/$eid is now being instantiated.\n".
		      "You will be notified via email when this is done.\n")
		    if (! $quiet);
	    }
	    exit(0);
	}
	print("Waiting for experiment $eid to fully instantiate.\n")
	    if (! $quiet);
	    
	if (-t STDIN && !$quiet) {
	    print("You may type ^C at anytime; you will be notified via ".
		  "email.\n".
		  "You will not actually interrupt the instantiation.\n");
	}
	
	# Give child a chance to run.
	select(undef, undef, undef, 0.25);
	
	#
	# Reset signal handlers. User can now kill this process, without
	# stopping the child.
	#
	$SIG{TERM} = 'DEFAULT';
	$SIG{INT}  = 'DEFAULT';
	$SIG{QUIT} = 'DEFAULT';

	#
	# Wait until child exits or until user gets bored and types ^C.
	#
	waitpid($childpid, 0);
	
	print("Done. Exited with status: $?\n")
	    if (! $quiet);
	exit($? >> 8);
    }
    TBdbfork();
305 306 307 308 309 310 311 312

    # Add audit info.  Right now this will only work when called in
    # the child of the script being audited.  Eventually these can be
    # set anywhere.
    AddAuditInfo("which", "$pid/$eid");
    AddAuditInfo("failure_frag", "T. Swapin Failure");
    AddAuditInfo("success_frag", "T. Swapped In");

313 314 315 316 317
    my $creator = $experiment->GetCreator();
    if (defined($creator)) {
	my $expt_head_name  = $creator->name();
	my $expt_head_email = $creator->email();
	
318
	AddAuditInfo("cc", "$expt_head_name <$expt_head_email>");
319 320 321
    }
    else {
	tbwarn("Could not determine name/email for experiment creator");
322
    }
323 324
}

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
#
# All instances currently start with a default run.
#
$run = $instance->NewRun($eid, $this_user, $instance->description());
if (!defined($run)) {
    fatal(-1, "Could not create new experiment run for $instance!");
}

#
# And the bindings for the default run ...
#
foreach my $name (keys(%parameters)) {
    my $value = $parameters{$name};

    $instance->NewRunBinding($name, $value) == 0
	or fatal(-1, "Error inserting run binding into DB!");
}

343 344 345
# Event connect before starting swapin so we catch all the states.
SetupEventHandler()
    if ($experiment->batchmode());
346

347
#
348
# Now do the swapin (or it gets queued if a batch experiment).
349
#
350
my @arguments = ($swapexp, "-q", "-x", "-s", "in", $pid, $eid);
351 352 353 354 355

system(@arguments);
fatal($? >> 8, "Could not instantiate the experiment")
    if ($?);

356 357 358 359 360 361
#
# At this point, we need to force a cleanup no matter how we exit.
# See the END block below.
#
$justexit = 0;

362 363 364
# And we need to terminate the experiment!
$exptactive = 1;

365 366 367 368
#
# We will spew forth info to the user each time the batch daemon tries to
# swap it in. 
#
369
if ($experiment->batchmode()) {
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
    #
    # Spin waiting for the state to change in swapexp. We are waiting for
    # it to swapin or go back to swapped.
    #
    my $queued = 0;
    
    while (1) {
	@ExptStates = ();
	event_poll_blocking($EVhandle, 500);

	next
	    if (! @ExptStates);

	foreach my $state (@ExptStates) {
	    if ($state eq EXPTSTATE_ACTIVATING()) {
		print "Experiment is starting a swapin attempt ...\n";
	    }
	    elsif ($state eq EXPTSTATE_ACTIVE()) {
		print "Experiment swapped in!\n";
		goto done;
	    }
	    elsif ($state eq EXPTSTATE_QUEUED()) {
		# Failed to swapin; still queued in the batch system.
		if (! $queued) {
		    print "Experiment has entered the batch system\n";
		    $queued = 1;
		}
		else {
		    print "Experiment swapin attempt failed.\n";
		}
	    }
	    elsif ($state eq EXPTSTATE_SWAPPED()) {
		# Dumped out of the batch system for some reason.
403
		print "Experiment has been removed from the batch queue.\n";
404

Leigh Stoller's avatar
Leigh Stoller committed
405 406 407
		$instance->DeleteCurrentRun() == 0
		    or exit(-1);

408
		#
Leigh Stoller's avatar
Leigh Stoller committed
409
		# We are done. User has to requeue batched instance.
410
		#
Leigh Stoller's avatar
Leigh Stoller committed
411
		exit(0);
412 413 414 415 416 417
	    }
	}
    }
  done:
}

418 419 420 421
#
# Lets commit the experiment archive now that it is active. The experiment is
# already running, but thats not a big deal.
#
422
system("$archcontrol -t swapin commit $pid $eid");
423 424 425 426
if ($?) {
    fatal(-1, "Could not commit archive!");
}

427 428 429 430 431 432 433 434
#
# Do the CVS stuff.
#
my $exptidx      = $instance->exptidx();
my $cvsdir       = "$projroot/$pid/templates/$guid/cvsrepo";
my $instance_tag = "I${exptidx}";
my $template_tag = "T${guid}-$version";

435 436 437 438 439
if (-e $cvsdir) {
    # Tag the template with an instance tag. This tag is used to associate
    # records with this instance with the actual template.
    # The rlog check is so we can deal with old templates that have an
    # incomplete CVS repo.
Leigh Stoller's avatar
Leigh Stoller committed
440
    System("$RLOG -h $cvsdir/setup/.template,v | ".
441 442 443
	   "  grep -q '${template_tag}:'");
    if (! $?) {
	System("$CVSBIN -d $cvsdir rtag -n -r ".
Leigh Stoller's avatar
Leigh Stoller committed
444
	       "  $template_tag $instance_tag setup") == 0
445 446 447
	       or fatal(-1, "Could not rtag with instance tag in $cvsdir");
    }
}
448

449
$instance->StartRun(Template::STARTRUN_FLAGS_FIRSTRUN()) == 0
450
    or fatal(-1, "Could not update start time in instance record!");
451

452
# Stop the web interface from spewing.
453 454 455 456 457 458
if ($experiment->batchmode()) {
    $logfile->Close();
}
else {
    $experiment->CloseLogFile();
}
459 460 461

# Make sure the most recent version gets copied out.
$experiment->CopyLogFiles();
462

463
# Email is sent from libaudit at exit ...
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
exit(0);

#
# Parse command arguments. Once we return from getopts, all that are
# left are the required arguments.
#
sub ParseArgs()
{
    if (! getopts($optlist, \%options)) {
	usage();
    }

    if (@ARGV != 1) {
	usage();
    }
    #
    # Pick up guid/version first and untaint.
    #
    my $tmp = shift(@ARGV);

    if ($tmp =~ /^([\w]*)\/([\d]*)$/) {
	$guid = $1;
	$version = $2;
    }
    else {
	tbdie("Bad data in argument: $tmp");
    }

    if (defined($options{"e"})) {
	$eid = $options{"e"};

	if ($eid =~ /^([-\w]+)$/) {
	    $eid = $1;
	}
	else {
	    tbdie("Bad data in argument: $eid.");
	}
	if (! TBcheck_dbslot($eid, "experiments", "eid",
			   TBDB_CHECKDBSLOT_WARN|TBDB_CHECKDBSLOT_ERROR)) {
	    tbdie("Improper experiment name (id)!");
	}
    }
    else {
	tberror("Must provide an experiment ID (-e option)!");
	exit(1);
    }

    if (defined($options{"q"})) {
	$quiet = 1;
    }
    if (defined($options{"w"})) {
	$waitmode = 1;
    }
517 518 519
    if (defined($options{"f"})) {
	$foreground = 1;
    }
520 521
}

522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
#
# Subscribe to experiment state change events.
#
sub SetupEventHandler()
{
    my $port = @BOSSEVENTPORT@;
    my $URL  = "elvin://localhost:$port";
    
    # Connect to the event system, and subscribe the the events we want
    $EVhandle = event_register($URL, 0);
    
    if (!$EVhandle) {
	tbdie("Unable to register with event system\n");
    }

    my $tuple = address_tuple_alloc();
    if (!$tuple) {
	tbdie("Could not allocate an address tuple\n");
    }

    %$tuple = ( objtype   => libdb::TBDB_TBEVENT_EXPTSTATE(),
		objname   => "$pid/$eid",
		expt      => "$pid/$eid",
		host      => $BOSSNODE,
	      );
    
    if (!event_subscribe($EVhandle, \&EventHandler, $tuple)) {
	tbdie("Could not subscribe to events\n");
    }
}

#
# Callback for above.
#
sub EventHandler($$$) {
    my ($handle,$notification,undef) = @_;
    
    my $objname   = event_notification_get_objname($handle,$notification);
    my $eventtype = event_notification_get_eventtype($handle,$notification);

    print "$objname, $eventtype\n";

    return
	if ($objname ne "$pid/$eid");

    push(@ExptStates, $eventtype);
}

570 571 572 573 574
#
# Cleanup the mess.
#
sub cleanup()
{
575 576 577 578
    # For debugging.
    $experiment->BackupUserData()
	if (defined($experiment));

579
    if ($exptactive) {
580
	$experiment->Swap($Experiment::EXPT_SWAPOUT, "-force") == 0
581 582 583
	    or exit(-1);
    }

584 585
    $instance->DeleteCurrentRun()
	if (defined($run));
586

587
    # Stop the web interface from spewing.
588 589 590 591 592 593 594 595
    if (defined($logfile)) {
	if ($experiment->batchmode()) {
	    $logfile->Close();
	}
	else {
	    $experiment->CloseLogFile();
	}
    }
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
}

sub fatal($$)
{
    my ($errorstat, $msg) = @_;
    
    tberror $msg;
    tbinfo "Cleaning up and exiting with status $errorstat ...";

    #
    # This exit will drop into the END block below.
    # 
    exit($errorstat);
}

sub sighandler ($) {
    my ($signame) = @_;
    
    $SIG{TERM} = 'IGNORE';
    my $pgrp = getpgrp(0);
    kill('TERM', -$pgrp);
    sleep(1);
    fatal(-1, "Caught SIG${signame}! Killing experiment setup ...");
}

END {
    # Normal exit, nothing to do.
    if (!$? || $justexit) {
	return;
    }
    my $saved_exitcode = $?;
    
    if ($cleaning) {
	#
	# We are screwed; a recursive error. Someone will have to clean
	# up by hand. 
	#
633 634
	SENDMAIL($TBOPS, 
		 "Template Creation Failure: $pid/$eid",
635 636 637 638 639 640 641 642 643
		 "Recursive error in cleanup! This is very bad.");
	$? = $saved_exitcode;
	return;
    }
    $cleaning = 1;
    cleanup();
    $? = $saved_exitcode;
}