mapper.in 24.6 KB
Newer Older
1 2 3
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2011 University of Utah and the Flux Group.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
# All rights reserved.
#
use strict;
use English;
use Getopt::Std;
use POSIX qw(setsid ceil);
use POSIX ":sys_wait_h";

#
# This function as the main assign loop.  It converts the virtual
# topology into a top input including LAN and delay translation.  It
# then snapshots the current testbed physical state and runs assign,
# looping a couple times if assign fails.  When assign successfully
# completes it will interpret the results.  Attempt to match any
# existing portmap entries and then update the delays and vlans table.
#
# XXX Internally created nodes (jailhost,delay,sim) are not treated
#     consistently. Needs more thought.
#
# Return codes: We catch all errors with the END block below, and if
# anyone calls die() (exit value is 255) we add in the CANRECOVER bit.
# Failures in assign always cause the caller to stop retrying. 
#
# The CANRECOVER bit indicates 'recoverability' (no db or physical
29
# state was modified by the time the error occurred). This is relevant
30 31 32 33 34 35 36 37 38 39 40
# to only modify operations (update).
#
my $WRAPPER_SUCCESS		 = 0x00;
my $WRAPPER_FAILED		 = 0x01;	# Failed (Add other values)
my  $WRAPPER_FAILED_CANRECOVER   = 0x40;        # Can recover from update
my  $WRAPPER_FAILED_FATALLY      = 0x80;	# Do not use this.
# Set this once we modify DB state; forces no recover in fatal().
my $NoRecover = 0;

sub usage ()
{
41
    print STDERR "Usage: $0 [-v] [-u [-f] | -n] [-z] pid eid\n";
42 43 44 45
    print STDERR " -v   - Enables verbose output\n";
    print STDERR " -u   - Enables update mode\n";
    print STDERR " -f   - Fix current resources during update mode\n";
    print STDERR " -n   - Run assign, but do not reserve/modify resources.\n";
46
    print STDERR " -r   - Regression mode.\n";
47 48
    print STDERR " -x   - Turn on the prepass\n";
    print STDERR " -m   - Set the multiplex factor; overrides experiment.\n";
49 50
    print STDERR " -p   - Do a precheck for mapability on an empty testbed - ".
		 "implies -n\n";
51
    print STDERR " -l   - Use rspec v2 instead of the text file format\n";
52
#    print STDERR " -z   - Force new ptopgen\n";
53
    print STDERR " -Z   - Force old ptopgen\n";
54
    print STDERR " -A   - Tell ptopgen all nodes are free; only with -n\n";
55 56
    exit($WRAPPER_FAILED);
}
57
my $optlist    = "dvunfprqczxm:ko:altzZA";
58 59 60 61 62 63
my $verbose    = 0;
my $debug      = 0;
my $fixmode    = 0;
my $updating   = 0;
my $impotent   = 0;
my $precheck   = 0;
64
my $allnodesfree = 0;
Leigh Stoller's avatar
Leigh Stoller committed
65
my $toponly    = 0;
66
my $prepass    = 0;
67
my $alloconly  = 0;
68
my $outfile;
69
my $mfactor;
70
my $regression = 0;
71
my $noassign   = 0;  # Only with regression mode, use previous solution.
72 73
my $noregfree  = 0;  # Only with regression mode, leave physical state at end.
my $usecurrent = 0;  # Only with regression mode, use current solution.
74 75 76
my $quiet      = 0;
my $clear      = 0;
my $warnings   = 0;
77
my $maxrun     = 3;  # Maximum number of times we run assign.
78
my $gotlock    = 0;
79
my $userspec   = 0;
80
my $use_old_ptopgen  = 0;
81
my $vtop;
82 83 84 85 86

#
# Configure variables
#
my $TB		= "@prefix@";
87
my $DBNAME	= "@TBDBNAME@";
88 89 90 91
my $TBOPS       = "@TBOPSEMAIL@";
my $ASSIGN      = "$TB/libexec/assign";
my $WRAPPER2    = "$TB/libexec/assign_wrapper2";
my $PTOPGEN     = "$TB/libexec/ptopgen";
92
my $PTOPGEN_NEW = "$TB/libexec/ptopgen_new";
93
my $VTOPGEN     = "$TB/bin/vtopgen";
94
my $NFREE       = "$TB/bin/nfree";
95
my $XERCES	= "@HAVE_XERCES@";
96 97 98 99 100 101 102 103 104 105

#
# Load the Testbed support stuff. 
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use libtblog;
use libvtop;
use libadminctrl;
106
use User;
107
use EmulabFeatures;
108 109 110 111 112 113

# Protos
sub fatal(@);
sub debug($);
sub chat($);
sub RunAssign($$);
114
sub FinalizeRegression($);
115 116
sub AssignLoop();
sub MapperWrapper();
117
sub PrintSolution();
118 119 120 121 122 123

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
124 125 126 127 128 129 130 131 132 133
$| = 1;

#
# We want warnings to cause assign_wrapper to exit abnormally.
# 
# FIXME: Is this still needed.  "warn" is only used once.  Also this
#  will cause perl internal warnings (such as "Use of uninitialized
#  value ..."  to cause assign_wrapper to fail. -- kevina
#
$SIG{__WARN__} = sub { tbwarn $_[0];$warnings++; };
134 135 136 137 138 139 140 141 142 143 144 145 146

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV < 2) {
    usage();
}
if (defined($options{"v"})) {
147
    TBDebugTimeStampsOn();
148 149
    $verbose++;
}
150 151 152
if (defined($options{"a"})) {
    $alloconly++;
}
153 154 155
if (defined($options{"A"})) {
    $allnodesfree++;
}
156 157 158
if (defined($options{"d"})) {
    $debug++;
}
159 160 161
if (defined($options{"u"})) {
    $updating = 1;
}
Leigh Stoller's avatar
Leigh Stoller committed
162 163 164 165
if (defined($options{"t"})) {
    $toponly = 1;
    $quiet   = 1;
}
166 167 168 169 170 171 172 173 174
if (defined($options{"n"})) {
    $impotent = 1;
}
if (defined($options{"f"})) {
    $fixmode = 1;
}
if (defined($options{"p"})) {
    $precheck = 1;
}
175 176 177
if (defined($options{"x"})) {
    $prepass = 1;
}
178 179 180
if (defined($options{"o"})) {
    $outfile = $options{"o"};
}
181 182 183
if (defined($options{"m"})) {
    $mfactor = $options{"m"};
}
184 185 186 187 188
if (defined($options{"r"})) {
    if ($DBNAME eq "tbdb") {
	fatal("Cannot use regression mode on main DB");
    }
    $regression = 1;
Leigh Stoller's avatar
Leigh Stoller committed
189 190
    $clear      = 1
	if (!defined($options{"t"}));
191
    $fixmode    = 1;
192
    TBDebugTimeStampsOn();
193
    $usecurrent = 1
194
	if (defined($options{"z"}));
195 196
    $noregfree = 1
	if (defined($options{"k"}));
197 198 199 200 201 202 203
}
if (defined($options{"q"})) {
    $quiet = 1;
}
if (defined($options{"c"})) {
    $clear = 1;
}
204
if (defined($options{"l"})) {
205 206 207 208 209
    if ($XERCES) {
	$userspec = 1;
    } else {
	fatal("Rspec v2 support requires that Xerces be installed");
    }
210
}
211 212 213
if (defined($options{"Z"})) {
    $use_old_ptopgen = 1;
}
214
if (defined($options{"z"})) {
215 216
#    $use_old_ptopgen = 0;
#    $PTOPGEN = $PTOPGEN_NEW;
217
}
218 219 220
if ($allnodesfree && !$impotent) {
    fatal("Not allowed to use -A without -n (impotent) option");
}
221 222 223 224 225 226 227

my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    fatal("Could not lookup experiment object $pid,$eid!")
}
228 229 230 231 232 233 234 235 236 237 238
#
# Verify that this person can muck with the experiment.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
}
if (!TBAdmin() &&
    ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
    fatal("You do not have permission to map this experiment!");
}
239

240 241 242
# multiplex_factor default.
$mfactor = $experiment->multiplex_factor()
    if (!defined($mfactor) && defined($experiment->multiplex_factor()));
243 244
# NS file can say to run the prepass.
my $useprepass = $experiment->useprepass();
245 246 247 248 249

my $newassign =
    EmulabFeatures->FeatureEnabled("NewAssign",
				   $this_user,
				   $experiment->GetGroup(), $experiment);
250 251

libvtop::Init($this_user, $experiment->GetGroup(), $experiment);
252
    
253 254 255 256 257 258
#
# These are the flags to the vtop creation code. 
#
my $vtopflags = 0;
$vtopflags |= $libvtop::VTOP_FLAGS_VERBOSE
    if ($verbose);
259 260
$vtopflags |= $libvtop::VTOP_FLAGS_QUIET
    if ($quiet);
261 262 263
$vtopflags |= $libvtop::VTOP_FLAGS_UPDATE
    if ($updating);
$vtopflags |= $libvtop::VTOP_FLAGS_FIXNODES
264
    if ($fixmode || $usecurrent);
265 266
$vtopflags |= $libvtop::VTOP_FLAGS_IMPOTENT
    if ($impotent);
267 268
$vtopflags |= $libvtop::VTOP_FLAGS_ALLOCONLY
    if ($alloconly);
269 270
$vtopflags |= $libvtop::VTOP_FLAGS_REGRESSION
    if ($regression);
271

272 273 274 275 276 277 278 279
MapperWrapper();
if ($regression) {
    if (0) {
	$updating   = 1;
	$fixmode    = 1;
	$clear      = 0;
	$vtopflags |=
	    ($libvtop::VTOP_FLAGS_UPDATE|$libvtop::VTOP_FLAGS_FIXNODES);
280

281 282 283
	MapperWrapper();
    }
    FinalizeRegression(0);
284
}
285 286
PrintSolution()
    if ($outfile);
287
exit(0);
288

289 290
sub MapperWrapper()
{
Leigh Stoller's avatar
Leigh Stoller committed
291 292
    chat("Starting the new and improved mapper wrapper.\n")
	if (!$toponly);
293

Leigh Stoller's avatar
Leigh Stoller committed
294 295 296 297 298
    # Need to do this cause libvtop will add them again.
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents()
	if ($regression);

299
    TBDebugTimeStamp("Create libvtop started");
300
    $vtop = libvtop->Create($experiment, $this_user, $vtopflags);
301 302 303 304
    if (!defined($vtop)) {
	fatal("Could not create vtop structure for $experiment");
    }
    TBDebugTimeStamp("Create libvtop ended");
Leigh Stoller's avatar
Leigh Stoller committed
305

Leigh Stoller's avatar
Leigh Stoller committed
306 307 308 309 310
    TBDebugTimeStamp("vtopgen started");
    $vtop->CreateVtop() == 0
	or fatal("Could not create vtop for $experiment");
    TBDebugTimeStamp("vtopgen finished");
    
311
    if (!$impotent && !$alloconly && !$toponly && ($updating || $clear)) {
312 313 314 315 316 317 318 319 320 321 322 323
	if ($clear) {
	    chat("Freeing reserved nodes ...\n");
	    system("export NORELOAD=1; $NFREE -x -a $pid $eid") == 0
		or fatal("Could not release nodes.");
	}
	chat("Clearing physical state before updating.\n");
	$experiment->RemovePhysicalState();
	exit(0)
	    if ($clear && !$regression);
    }
    AssignLoop();
    return 0;
324
}
325 326 327 328

#
# The assign loop. 
#
329 330 331 332
sub AssignLoop()
{
    my $currentrun     = 1;
    my $canceled       = 0;
333
    my $progress       = 0;
334 335 336
    my $tried_precheck = 0;
    # Admission control counts
    my %admission_control = ();
337

338 339 340 341 342
    # XXX plab hack - only run assign once on plab topologies, since
    # they're easy to map and the physical topology does not change
    # frequently.
    if ($vtop->plabcount() && $vtop->plabcount == $vtop->virtnodecount()) {
	$maxrun = 2;
343 344
    }

345 346
    TBDebugTimeStamp("mapper loop started");
    while (1) {
347
	chat("Mapper loop $currentrun\n");
348

349
	my $prefix = "$pid-$eid-$$";
350 351

	#
352
	# When precheck is on, we only do one run in impotent mode and exit.
353
	#
354 355 356 357 358
	if ($precheck) {
	    $prefix  .= ".empty";
	    $impotent = 1;
	    chat("Trying assign on an empty testbed.\n");
	}
359

360 361 362
	#
	# Serialize with the pool daemon if using shared nodes.
	#
363
	if ((!($impotent || $regression)) && $vtop->sharednodecount()) {
364
	    while (1) {
365 366 367 368
		#
		# Use a countup/countdown counter, so that multiple mappers
		# can run, but not while the pool_daemon is running.
		#
369
		my $lock_result =
370 371 372 373
		    DBQueryFatal("update emulab_locks set value=value+1 ".
				 "where name='pool_daemon' and value>=0");

		$gotlock = $lock_result->affectedrows;
374 375 376 377 378

		last
		    if ($gotlock);
		
		chat("Waiting for pool daemon lock ...\n");
379
		sleep(10);
380 381 382
	    }
	}

383 384 385 386
	#
	# RunAssign returns  0 if successful.
	#           returns -1 if failure, but assign says to stop trying.
	#           returns  1 if failure, but assign says to try again.
387 388
	#           returns  2 if assign succeeds, but no nodes allocated.
	#           returns  3 if assign succeeds, but some nodes allocated.
389 390 391
	#
	my $retval = RunAssign($precheck, $prefix);

392
	if ($gotlock) {
393 394 395
	    DBQueryFatal("update emulab_locks set value=value-1 ".
			 "where name='pool_daemon'");
	    $gotlock = 0;
396 397
	}

398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
	# Success!
	last
	    if ($retval == 0);

	if ($retval < 0 || $regression) {
	    #
	    # Failure in assign.
	    #
	    FinalizeRegression(1)
		if ($regression);

	    fatal({type => 'primary', severity => SEV_ERROR,
		   error => ['unretriable_assign_error']},
		  "Unretriable error. Giving up.");
	}
413
    
414 415 416
	#
	# When precheck is off, we do a precheck run if the first try fails
	# to find a solution. This avoids looping on an unsolvable topology.
417 418 419
	# But, if the reason we are here is cause we could not allocate nodes,
	# then we found a solution, and so trying on an empty testbed is
	# pointless; it will obviously find a solution again.
420
	#
421 422 423
	if (!$precheck && !$tried_precheck && ($retval == 2 || $retval == 3)) {
	    $tried_precheck = 1;
	}
424 425 426 427 428 429 430
	if (!$precheck && !$tried_precheck) {
	    chat("Trying assign on an empty testbed to verify mapability.\n");
	    my $save_impotent = $impotent;
	    $impotent  = 1;
	    my $retval = RunAssign(1, $prefix . ".empty");
	    if ($retval != 0) {
		fatal({type=>'extra', cause=>'user', severity=>SEV_ERROR,
431 432 433 434 435 436 437 438
	           error=>['infeasible_resource_assignment']}, 
		  "This experiment cannot be instantiated on this ".
                  "testbed. You have most likely asked for hardware ".
                  "this testbed does not have, such as nodes of a type ".
                  "it does not contain, or nodes with too many network ".
                  "interfaces.  You will need to modify this experiment ".
                  "before it can be swapped in - re-submitting the ".
                  "experiment as-is will always result in failure.");
439 440 441 442
	    }
	    chat("Assign succeeded on an empty testbed.\n");
	    $impotent = $save_impotent;
	    $tried_precheck = 1;
443
	}
444 445 446 447
	# We try a minimum number of times, cause the node pool is
	# always changing. But once we hit the maxrun, we continue
	# only if progress on the last loop.
	if ($currentrun >= $maxrun && !$progress) {
448 449 450 451
	    fatal({type => 'primary', severity => SEV_ERROR,
		   error => ['reached_assign_run_limit']},
		  "Reached run limit. Giving up.");
	}
452 453 454
	# See if we made progress or not.
	# Keep going if we allocated some nodes.
	$progress = ($retval == 3);
455

456 457 458 459
	chat("Waiting 5 seconds and trying again...\n");
	sleep(5);
	$currentrun++;
    }
460
    GatherAssignStats($pid, $eid, %{ $vtop->exptstats() })
Leigh Stoller's avatar
Leigh Stoller committed
461
	if (! ($impotent || $alloconly || $regression || $toponly));
462 463
    TBDebugTimeStamp("mapper loop finished");
    return 0;
464 465 466 467 468 469 470 471 472
}

#
# The guts of an assign run.
#
sub RunAssign($$)
{
    my ($precheck, $prefix) = @_;

473
    my $typelimitfile = $prefix .".limits";
474 475
    my $ptopfile = $prefix . ".ptop";
    my $vtopfile = $prefix . ".vtop";
476 477 478 479
    if ($userspec) {
	$ptopfile .= ".xml";
	$vtopfile .= ".xml";
    }
480 481
    my $assignexitcode = 0;

Leigh Stoller's avatar
Leigh Stoller committed
482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
    #
    # Now generate a vtop file and dump it to a file.
    #
    if (! open(VTOPFILE, "> $vtopfile")) {
	tberror("Could not open $vtopfile: $!\n");
	return -1;
    }
    my $reslibvtop;
    if ($userspec == 1) {
	$reslibvtop = $vtop->PrintRspec(*VTOPFILE);
    }
    else {
	$reslibvtop = $vtop->PrintTop(*VTOPFILE);
    }
    if ($reslibvtop != 0) {
	tberror("Could not print vtop file for $experiment\n");
	return -1;
    }
    close(VTOPFILE);
501
    system("/bin/cp -fp $vtopfile ${pid}-${eid}.vtop") if ($debug);
Leigh Stoller's avatar
Leigh Stoller committed
502 503 504
    return 0
	if ($toponly);

505 506 507 508 509 510 511 512 513 514 515 516 517 518
    if (! ($impotent || $regression || $alloconly)) {
	if ($experiment->Update({"maximum_nodes" => $vtop->maximum_nodes(),
				 "minimum_nodes" => $vtop->minimum_nodes(),
				 "virtnode_count"=> $vtop->virtnodecount() })){
	    tberror("Could not update min/max/virt nodes for $experiment\n");
	    return -1;
	}
    }
    # New solution each time.
    $vtop->ClearSolution();

    goto skipassign
	if ($vtop->nodecount() == $vtop->genicount());

519 520 521 522 523 524 525 526 527 528
    # Debugging hack for regression mode. Avoid really long assign runs.
    if ($regression && $noassign) {
	if (! -e "assign.log") {
	    chat("No existing assign results file!\n");
	    return -1;
	}
	chat("Using existing assign results file\n");
	goto skiprun;
    }
    
529 530 531 532
    #
    # Do admission control test, and gather the info.
    #
    my %admission_control;
533 534 535 536 537 538
    if (!$regression) {
	if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control)){
	    tberror("Failed admission control checks!\n");
	    return -1;
	}
    }
539

540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
    #
    # Append this admission control results to ptopgen.
    #
    if (scalar(keys(%admission_control))) {
	open(TYPELIMIT, ">$typelimitfile") or
	    return -1;

	foreach my $typeclass (keys(%admission_control)) {
	    my $count = $admission_control{$typeclass};

	    print TYPELIMIT "$typeclass $count\n";
	}
	close(TYPELIMIT);
    }


556 557 558 559 560 561 562 563 564 565 566
    #
    # Snapshot physical resources.
    #
    # if updating (-u), include any resources that may already be
    # allocated to experiment in the PTOP results.
    #
    my $ptopargs = "-p $pid ";
    $ptopargs   .= "-e $eid "
	if ($updating);
    $ptopargs   .= "-u "
	if ($updating && $experiment->elabinelab());
567 568
    $ptopargs   .= "-m $mfactor "
	if (defined($mfactor));
569 570 571 572 573 574
    $ptopargs   .= "-v "
	if ($vtop->virtnodecount());
    $ptopargs   .= "-r "
	if ($vtop->remotenodecount());
    $ptopargs   .= "-S "
	if ($vtop->simnodecount());
575 576
    $ptopargs   .= "-h "
	if ($vtop->sharednodecount());
577
    $ptopargs	.= "-a "
578
    	if ($precheck || $allnodesfree);
579 580
    $ptopargs	.= "-c " . $experiment->delay_capacity() . " "
    	if (defined($experiment->delay_capacity()));
581 582 583
    if ($userspec == 1) {
	$ptopargs .= "-x -g 2 ";
    }
584 585
    if ($use_old_ptopgen == 1) {
	$ptopargs .= "-Z ";
586
    }
587 588
    $ptopargs .= "-l $typelimitfile"
	if (scalar(keys(%admission_control)));
589 590 591 592 593

    chat("ptopargs: '$ptopargs'\n");
    TBDebugTimeStamp("ptopgen started");
    system("$PTOPGEN $ptopargs > $ptopfile");
    if ($?) {
594 595
	tberror("Failure in ptopgen\n");
	return -1;
596
    }
597
    system("/bin/cp -fp $ptopfile ${pid}-${eid}.ptop") if ($debug);
598 599 600
    TBDebugTimeStamp("ptopgen finished");

    # Run assign
601
    my $cmd  = "assign";
602 603
    $cmd .= "-new"
	if ($newassign);
604
    my $args = "";
605 606 607 608 609 610
    if ($XERCES) {
	$args .= "-f rspec " 
	    if ($userspec == 1);
	$args .= "-f text "
	    if ($userspec == 0);
    }
611
    $args .= "$ptopfile $vtopfile";
612 613
    $args = "-P $args"
	if (!$vtop->sharednodecount());
614 615 616 617
    $args = "-uod -c .75 $args"
	if ($vtop->virtnodecount() || $vtop->simnodecount());
    $args = "-n $args"
    	if ($precheck);
618 619
    $args = "-s 123456 $args"
	if ($regression);
620 621 622
    $args .= " PN=1.0"
	if ($vtop->sharednodecount());
    
623
    # The prepass speeds up assign on big topos with virtual nodes.
624
    if ($prepass || $useprepass) {
625 626 627 628
    	$cmd = "assign_prepass";
    	$args = "-m $mfactor $args"
    	    if (defined($mfactor));
    }
629 630 631 632 633 634
    chat("assign command: '$cmd $args'\n");

    #
    # Fork a child to run assign. Parent spins watching the cancel flag
    # and waiting for assign to finish.
    #
635
    TBDebugTimeStamp("assign started");
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
    if (my $childpid = fork()) {
	while (1) {
	    sleep(2);

	    if (waitpid($childpid, &WNOHANG) == $childpid) {
		$assignexitcode = $? >> 8;
		last;
	    }

	    # Check cancel flag.
	    if ($experiment->canceled()) {
		if ((my $pgrp = getpgrp($childpid)) > 0) {
		    kill('TERM', -$pgrp);
		    waitpid($childpid, 0);

651 652 653
		    tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
			     error => ['cancel_flag']},
			    "Cancel flag set; aborting assign run!\n");
654 655 656 657 658 659 660 661 662 663 664 665
		    return -1;
		}
		# Loop again to reap child above before exit.
	    }
	}
    }
    else {
	#
	# Change our session so the child can get a killpg without killing
	# the parent. 
	#
        POSIX::setsid();
666
	exec("nice $WRAPPER2 $cmd $args > assign.log 2>&1");
667 668
	die("Could not start assign!\n");
    }
669
    TBDebugTimeStamp("assign finished");
670 671 672

    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
673 674 675
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
	return -1;
    }

    # Check for possible full filesystem ...
    if (-z "assign.log") {
	tbnotice("assign.log is zero length! Stopping ...\n");
	return -1;
    }

    #
    # Saving up assign.log coz each swapin/modify is different and it
    # is nice to have every mapping for debugging and archiving
    # purposes We do not call it .log though, since we do not want it
    # copied out to the user directory every swapin. See Experiment.pm
    #
691 692
    system("/bin/cp -fp assign.log ${prefix}.assign");
    system("/bin/cp -fp assign.log ${pid}-${eid}.assign") if ($debug);
693 694

    if ($assignexitcode) {
695 696 697 698 699 700 701 702 703 704 705
	print "Assign exited with $assignexitcode\n" if ($debug);
	
	system("/bin/cat assign.log");
	#
	# assign returns two positive error codes (that we care about).
	# The distinction between them is somewhat murky. An exitval of
	# 1 means "retryable" while 2 means "unretryable". The former
	# means we can try again, while the later says there is no possible
	# way to map it. We pass this back to the caller so that we know
	# to exit the loop or try again.
	#
706 707 708 709 710 711 712
	return (($assignexitcode == 1) ? 1 : -1);
    }
    #
    # If we were doing the precheck, go ahead and exit now - there is no
    # useful information to parse out
    #
    if ($precheck) {
713
	chat("Precheck succeeded.\n");
714 715
	return 0;
    }
716
  skiprun:
717
    chat("Reading assign results.\n");
718 719 720 721
    if (!open(ASSIGNFP, "assign.log")) {
	print("Could not open assign logfile! $!\n");
	return -1;
    }
722
    TBDebugTimeStamp("ReadSolution started");
723
    if ($vtop->ReadTextSolution(*ASSIGNFP) != 0) {
724 725 726 727
	print("Could not parse assign logfile! $!\n");
	return -1;
    }
    close(ASSIGNFP);
728
    TBDebugTimeStamp("ReadSolution ended");
729
  skipassign:
730
    
731
    if (defined($vtop->genicount())) {
732 733 734 735 736 737 738 739
	TBDebugTimeStamp("Map Geni Resources Started");
	if ($vtop->MapResources() != 0) {
	    print("Could not map external resources! $!\n");
	    return -1;
	}
	TBDebugTimeStamp("Map Geni Resources ended");
    }
    TBDebugTimeStamp("InterpNodes Started");
740 741 742 743
    if ($vtop->InterpNodes() != 0) {
	print("Could not interpret nodes.\n");
	return -1;
    }
744
    TBDebugTimeStamp("InterpNodes ended, AllocNodes Started");
745 746
    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
747 748 749
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
750 751
	return -1;
    }
752
    my $retval = $vtop->AllocNodes();
753 754 755
    return $retval
	if ($retval != 0);

756
    TBDebugTimeStamp("AllocNodes ended, InterpLinks Started");
757 758 759 760
    if ($vtop->InterpLinks() != 0) {
	print("Could not setup links\n");
	return -1;
    }
761
    TBDebugTimeStamp("InterpLinks ended, InitializePhysNodes Started");
762 763 764 765
    if ($vtop->InitializePhysNodes() != 0) {
	print("Could not InitializePhysNodes\n");
	return -1;
    }
766
    TBDebugTimeStamp("InitializePhysNodes ended");
767
    if (! ($impotent || $regression || $alloconly)) {
768 769 770 771 772 773 774
	TBDebugTimeStamp("ReserveSharedBandwidth started");
	if ($experiment->ReserveSharedBandwidth($updating) != 0) {
	    print("Could not reserve shared bandwidth\n");
	    return -1;
	}
	TBDebugTimeStamp("ReserveSharedBandwidth ended");
    }
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
    return 0;
}

#
# In regression mode we want to save  the physical state and then clear
# the physical resources. 
#
sub FinalizeRegression($)
{
    my ($error) = @_;
    my $cwd;

    chomp($cwd = `/bin/pwd`);
    if (!$error) {
	chat("Saving physical state in regression mode\n");
790 791 792 793
	if (system("/bin/rm -rf $pid-$eid.pstate")) {
	    tberror("Could not clean physical state directory\n");
	    return -1;
	}
794 795
	if ($experiment->BackupPhysicalState("$cwd/$pid-$eid.pstate", 1)
	    != 0) {
796 797 798
	    print STDERR "Could not save physical state!\n";
	    exit(1);
	}
799 800 801 802 803 804 805 806 807 808 809 810
    
	# Generate a vtop file with all resources fixed.
	chat("Generating new vtop file with all resources fixed.\n");
	if (! open(VTOPFILE, "> $pid-$eid.fixed")) {
	    tberror("Could not open $pid-$eid.fixed: $!\n");
	    return -1;
	}
	if ($vtop->PrintTop(*VTOPFILE) != 0) {
	    tberror("Could not print fixed vtop file for $experiment\n");
	    return -1;
	}
	close(VTOPFILE);
811
    }
Leigh Stoller's avatar
Leigh Stoller committed
812 813 814
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents();
    
815 816 817
    return 0
	if ($noregfree);
    
818 819 820 821 822 823 824 825 826 827 828 829 830 831
    chat("Removing physical state in regression mode\n");
    if ($experiment->RemovePhysicalState() != 0) {
	print STDERR "Could not remove physical state!\n";
	exit(1);
    }

    if ($vtop->newreservednodes()) {
	my @newreservednodes = $vtop->newreservednodes();
	system("export NORELOAD=1; $NFREE -x $pid $eid @newreservednodes");
	if ($?) {
	    exit(1);
	}
    }
    return 0;
832
}
833

834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
#
# Print a solution in rspec format.
#
sub PrintSolution()
{
    my $output = undef;
    
    if ($outfile ne "-") {
	if (! open(OUTFILE, "> $outfile")) {
	    tberror("Could not open $outfile: $!\n");
	    return -1;
	}
	$output = *OUTFILE;
    }
    if ($vtop->PrintSolution($output) != 0) {
	tberror("Could not print solution for $experiment\n");
	return -1;
    }
    close($output)
	if (defined($output));
    return 0;
}

857 858 859 860
# We will come through here no matter how we exit.
END {
    # Watch for getting here cause of a die()/exit() statement someplace.
    my $exitcode = $?;
861

862 863 864 865 866 867 868 869 870
    #
    # Do not want to leave this around, it will lock the pool daemon out.
    #
    if ($gotlock) {
	DBQueryFatal("update emulab_locks set value=value-1 ".
		     "where name='pool_daemon'");
	$gotlock = 0;
    }

871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
    if ($exitcode && $exitcode != $WRAPPER_FAILED) {
	$exitcode = $WRAPPER_FAILED|$WRAPPER_FAILED_FATALLY;
    }
    
    if ($warnings > 0) {
	tberror("$warnings warnings.\n");

	$exitcode |= $WRAPPER_FAILED;
    }

    # Set recover bit if we are going to fail.
    $exitcode = $exitcode|$WRAPPER_FAILED_CANRECOVER
	if ($exitcode && !$NoRecover);

    # And change the exitcode to be what we want it to be.
    $? = $exitcode;
887 888 889 890
}

sub fatal(@)
{
891 892 893 894 895 896 897 898 899 900 901 902 903
    #
    # Free any newly reserved nodes (in update mode) so that tbswap knows
    # it is safe to recover the experiment. If we bypass this and leave
    # through the END block then NoRecover will still be set and tbswap
    # will know to swap the experiment out. 
    #
    if ($updating) {
	if (defined($vtop)) {
	    my @newreservednodes = $vtop->newreservednodes();
	    my $NoRecover = $vtop->norecover();
	
	    if (@newreservednodes) {
		$NoRecover = 0
Leigh Stoller's avatar
Leigh Stoller committed
904
		    if (system("$NFREE -x $pid $eid @newreservednodes") == 0);
905 906 907 908 909 910 911 912
	    }
	    else {
		# When not updating this is meaningless to tbswap.
		$NoRecover = 0;
	    }
	}
    }
    
913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929
    tberror(@_);

    # We next go to the END block above.
    exit($WRAPPER_FAILED);
}
sub debug($)
{
    if ($verbose) {
	print $_[0];
    }
}
sub chat($)
{
    if (! $quiet) {
	print $_[0];
    }
}