mapper.in 27.8 KB
Newer Older
1 2
#!/usr/bin/perl -w
#
3
# Copyright (c) 2000-2016 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#
use strict;
use English;
use Getopt::Std;
use POSIX qw(setsid ceil);
use POSIX ":sys_wait_h";

#
# This function as the main assign loop.  It converts the virtual
# topology into a top input including LAN and delay translation.  It
# then snapshots the current testbed physical state and runs assign,
# looping a couple times if assign fails.  When assign successfully
# completes it will interpret the results.  Attempt to match any
# existing portmap entries and then update the delays and vlans table.
#
# XXX Internally created nodes (jailhost,delay,sim) are not treated
#     consistently. Needs more thought.
#
# Return codes: We catch all errors with the END block below, and if
# anyone calls die() (exit value is 255) we add in the CANRECOVER bit.
# Failures in assign always cause the caller to stop retrying. 
#
# The CANRECOVER bit indicates 'recoverability' (no db or physical
46
# state was modified by the time the error occurred). This is relevant
47 48 49 50 51 52 53 54 55 56 57
# to only modify operations (update).
#
my $WRAPPER_SUCCESS		 = 0x00;
my $WRAPPER_FAILED		 = 0x01;	# Failed (Add other values)
my  $WRAPPER_FAILED_CANRECOVER   = 0x40;        # Can recover from update
my  $WRAPPER_FAILED_FATALLY      = 0x80;	# Do not use this.
# Set this once we modify DB state; forces no recover in fatal().
my $NoRecover = 0;

sub usage ()
{
58
    print STDERR "Usage: $0 [-v] [-u [-f] | -n] [-z] pid eid\n";
59 60 61 62
    print STDERR " -v   - Enables verbose output\n";
    print STDERR " -u   - Enables update mode\n";
    print STDERR " -f   - Fix current resources during update mode\n";
    print STDERR " -n   - Run assign, but do not reserve/modify resources.\n";
63
    print STDERR " -r   - Regression mode.\n";
64 65
    print STDERR " -x   - Turn on the prepass\n";
    print STDERR " -m   - Set the multiplex factor; overrides experiment.\n";
66 67
    print STDERR " -p   - Do a precheck for mapability on an empty testbed - ".
		 "implies -n\n";
68
    print STDERR " -l   - Use rspec v2 instead of the text file format\n";
69
#    print STDERR " -z   - Force new ptopgen\n";
70
    print STDERR " -Z   - Force old ptopgen\n";
71
    print STDERR " -A   - Tell ptopgen all nodes are free; only with -n\n";
72 73
    exit($WRAPPER_FAILED);
}
74
my $optlist    = "dvunfprqczxm:ko:altzZACFNL:S:G";
75 76 77
my $verbose    = 0;
my $debug      = 0;
my $fixmode    = 0;
78
my $fixlannodes= 0;
79 80 81
my $updating   = 0;
my $impotent   = 0;
my $precheck   = 0;
82
my $allnodesfree = 0;
Leigh B Stoller's avatar
Leigh B Stoller committed
83
my $toponly    = 0;
84
my $prepass    = 0;
85
my $alloconly  = 0;
86
my $gblinkcheck= 0;
87
my $outfile;
88
my $mfactor;
89
my $packoption;
90
my $regression = 0;
91
my $noassign   = 0;  # Only with regression mode, use previous solution.
92 93
my $noregfree  = 0;  # Only with regression mode, leave physical state at end.
my $usecurrent = 0;  # Only with regression mode, use current solution.
94
my $assignfile;
95 96 97
my $quiet      = 0;
my $clear      = 0;
my $warnings   = 0;
98
my $maxrun     = 3;  # Maximum number of times we run assign.
99
my $gotlock    = 0;
100
my $seriallock = 0;
101
my $userspec   = 0;
102
my $usecontrol = 0;
103
my $use_old_ptopgen  = 0;
104
my $vtop;
105 106 107 108 109

#
# Configure variables
#
my $TB		= "@prefix@";
110
my $MAINSITE    = @TBMAINSITE@;
111
my $DBNAME	= "@TBDBNAME@";
112 113 114 115
my $TBOPS       = "@TBOPSEMAIL@";
my $ASSIGN      = "$TB/libexec/assign";
my $WRAPPER2    = "$TB/libexec/assign_wrapper2";
my $PTOPGEN     = "$TB/libexec/ptopgen";
116
my $PTOPGEN_NEW = "$TB/libexec/ptopgen_new";
117
my $VTOPGEN     = "$TB/bin/vtopgen";
118
my $NFREE       = "$TB/bin/nfree";
119
my $XERCES	= "@HAVE_XERCES@";
120 121 122 123 124 125 126 127 128 129

#
# Load the Testbed support stuff. 
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use libtblog;
use libvtop;
use libadminctrl;
130
use libEmulab;
131
use User;
132
use EmulabFeatures;
133 134 135 136 137 138

# Protos
sub fatal(@);
sub debug($);
sub chat($);
sub RunAssign($$);
139
sub FinalizeRegression($);
140 141
sub AssignLoop();
sub MapperWrapper();
142
sub PrintSolution();
143 144 145 146 147 148

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
149 150 151 152 153 154 155 156 157 158
$| = 1;

#
# We want warnings to cause assign_wrapper to exit abnormally.
# 
# FIXME: Is this still needed.  "warn" is only used once.  Also this
#  will cause perl internal warnings (such as "Use of uninitialized
#  value ..."  to cause assign_wrapper to fail. -- kevina
#
$SIG{__WARN__} = sub { tbwarn $_[0];$warnings++; };
159 160 161 162 163 164 165 166 167 168 169 170 171

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV < 2) {
    usage();
}
if (defined($options{"v"})) {
172
    TBDebugTimeStampsOn();
173 174
    $verbose++;
}
175 176 177
if (defined($options{"a"})) {
    $alloconly++;
}
178 179 180
if (defined($options{"A"})) {
    $allnodesfree++;
}
181 182 183
if (defined($options{"d"})) {
    $debug++;
}
184 185 186
if (defined($options{"u"})) {
    $updating = 1;
}
Leigh B Stoller's avatar
Leigh B Stoller committed
187 188 189 190
if (defined($options{"t"})) {
    $toponly = 1;
    $quiet   = 1;
}
191 192 193
if (defined($options{"n"})) {
    $impotent = 1;
}
194 195 196 197 198 199
if (defined($options{"N"})) {
    $noassign = 1;
}
if (defined($options{"L"})) {
    $assignfile = $options{"L"};
}
200 201 202
if (defined($options{"f"})) {
    $fixmode = 1;
}
203 204 205
if (defined($options{"F"})) {
    $fixlannodes = 1;
}
206 207 208
if (defined($options{"G"})) {
    $gblinkcheck = 1;
}
209 210 211 212 213
if (defined($options{"S"})) {
    $packoption = $options{"S"};
    fatal("Bad -S option; must be pack or balance")
	if (! ($packoption eq "balance" || $packoption eq "pack"));
}
214 215 216
if (defined($options{"p"})) {
    $precheck = 1;
}
217 218 219
if (defined($options{"x"})) {
    $prepass = 1;
}
220 221 222
if (defined($options{"o"})) {
    $outfile = $options{"o"};
}
223 224 225
if (defined($options{"m"})) {
    $mfactor = $options{"m"};
}
226 227 228 229 230
if (defined($options{"r"})) {
    if ($DBNAME eq "tbdb") {
	fatal("Cannot use regression mode on main DB");
    }
    $regression = 1;
Leigh B Stoller's avatar
Leigh B Stoller committed
231 232
    $clear      = 1
	if (!defined($options{"t"}));
233
    $fixmode    = 1;
234
    TBDebugTimeStampsOn();
235
    $usecurrent = 1
236
	if (defined($options{"z"}));
237 238
    $noregfree = 1
	if (defined($options{"k"}));
239 240 241 242 243 244 245
}
if (defined($options{"q"})) {
    $quiet = 1;
}
if (defined($options{"c"})) {
    $clear = 1;
}
246 247 248
if (defined($options{"C"})) {
    $usecontrol = 1;
}
249
if (defined($options{"l"})) {
250 251 252 253 254
    if ($XERCES) {
	$userspec = 1;
    } else {
	fatal("Rspec v2 support requires that Xerces be installed");
    }
255
}
256 257 258
if (defined($options{"Z"})) {
    $use_old_ptopgen = 1;
}
259
if (defined($options{"z"})) {
260 261
#    $use_old_ptopgen = 0;
#    $PTOPGEN = $PTOPGEN_NEW;
262
}
263 264 265
if ($allnodesfree && !$impotent) {
    fatal("Not allowed to use -A without -n (impotent) option");
}
266 267 268 269 270 271 272

my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    fatal("Could not lookup experiment object $pid,$eid!")
}
273 274
my $project = $experiment->GetProject();

275 276 277 278
if ($pid eq "emulab-ops" || $pid eq "testbed") {
    $gblinkcheck++;
}

279 280 281 282 283 284 285 286 287 288 289
#
# Verify that this person can muck with the experiment.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
}
if (!TBAdmin() &&
    ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
    fatal("You do not have permission to map this experiment!");
}
290
my $real_user = User->RealUser();
291

292 293 294
# multiplex_factor default.
$mfactor = $experiment->multiplex_factor()
    if (!defined($mfactor) && defined($experiment->multiplex_factor()));
Leigh B Stoller's avatar
Leigh B Stoller committed
295 296
# NS file can say to run the prepass.
my $useprepass = $experiment->useprepass();
297 298 299 300 301

my $newassign =
    EmulabFeatures->FeatureEnabled("NewAssign",
				   $this_user,
				   $experiment->GetGroup(), $experiment);
302 303 304 305 306 307 308 309 310 311 312 313 314
if (!$usecontrol) {
    $usecontrol =
	EmulabFeatures->FeatureEnabled("ControlNetVlans",
				       $this_user,
				       $experiment->GetGroup(), $experiment);
    if ($usecontrol) {
	chat("Telling ptopgen to use control network vlans\n");
    }
}
if ($usecontrol && $MAINSITE) {
    $debug   = 1;
    $verbose = 1;
}
315

316 317 318 319 320 321 322
# XXX Hacky!
if ($MAINSITE && $TB ne "/usr/testbed") {
    $debug   = 1;
    $verbose = 1;
    $fixlannodes = $fixmode;
}

323
libvtop::Init($this_user, $experiment->GetGroup(), $experiment);
324
    
325 326 327 328 329 330
#
# These are the flags to the vtop creation code. 
#
my $vtopflags = 0;
$vtopflags |= $libvtop::VTOP_FLAGS_VERBOSE
    if ($verbose);
331 332
$vtopflags |= $libvtop::VTOP_FLAGS_QUIET
    if ($quiet);
333 334 335
$vtopflags |= $libvtop::VTOP_FLAGS_UPDATE
    if ($updating);
$vtopflags |= $libvtop::VTOP_FLAGS_FIXNODES
336
    if ($fixmode || $usecurrent);
337 338
$vtopflags |= $libvtop::VTOP_FLAGS_FIXLANNODES
    if ($fixlannodes);
339 340
$vtopflags |= $libvtop::VTOP_FLAGS_IMPOTENT
    if ($impotent);
341 342
$vtopflags |= $libvtop::VTOP_FLAGS_ALLOCONLY
    if ($alloconly);
343 344
$vtopflags |= $libvtop::VTOP_FLAGS_REGRESSION
    if ($regression);
345

346 347 348 349 350 351 352
MapperWrapper();
if ($regression) {
    if (0) {
	$updating   = 1;
	$fixmode    = 1;
	$clear      = 0;
	$vtopflags |=
353 354
	    ($libvtop::VTOP_FLAGS_UPDATE|$libvtop::VTOP_FLAGS_FIXLANNODES|
	     $libvtop::VTOP_FLAGS_FIXNODES);
355

356 357 358
	MapperWrapper();
    }
    FinalizeRegression(0);
359
}
360 361
PrintSolution()
    if ($outfile);
362
exit(0);
363

364 365
sub MapperWrapper()
{
Leigh B Stoller's avatar
Leigh B Stoller committed
366 367
    chat("Starting the new and improved mapper wrapper.\n")
	if (!$toponly);
368

Leigh B Stoller's avatar
Leigh B Stoller committed
369 370 371 372 373
    # Need to do this cause libvtop will add them again.
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents()
	if ($regression);

374
    TBDebugTimeStamp("Create libvtop started");
375
    $vtop = libvtop->Create($experiment, $this_user, $vtopflags, $real_user);
376 377 378 379
    if (!defined($vtop)) {
	fatal("Could not create vtop structure for $experiment");
    }
    TBDebugTimeStamp("Create libvtop ended");
Leigh B Stoller's avatar
Leigh B Stoller committed
380

Leigh B Stoller's avatar
Leigh B Stoller committed
381 382 383 384 385
    TBDebugTimeStamp("vtopgen started");
    $vtop->CreateVtop() == 0
	or fatal("Could not create vtop for $experiment");
    TBDebugTimeStamp("vtopgen finished");
    
386
    if (!$impotent && !$alloconly && !$toponly && ($updating || $clear)) {
387 388 389 390 391 392 393 394 395 396
	if ($clear) {
	    chat("Freeing reserved nodes ...\n");
	    system("export NORELOAD=1; $NFREE -x -a $pid $eid") == 0
		or fatal("Could not release nodes.");
	}
	chat("Clearing physical state before updating.\n");
	$experiment->RemovePhysicalState();
	exit(0)
	    if ($clear && !$regression);
    }
397 398 399 400 401 402
    if (!$toponly && $gblinkcheck) {
	if ($vtop->gblinks() && $vtop->mgblinks()) {
	    fatal("Not allowed to mix <=1Gb and >1Gb links");
	}
    }

403 404
    AssignLoop();
    return 0;
405
}
406 407 408 409

#
# The assign loop. 
#
410 411 412 413
sub AssignLoop()
{
    my $currentrun     = 1;
    my $canceled       = 0;
414
    my $progress       = 0;
415 416 417
    my $tried_precheck = 0;
    # Admission control counts
    my %admission_control = ();
418

419 420 421 422 423
    # XXX plab hack - only run assign once on plab topologies, since
    # they're easy to map and the physical topology does not change
    # frequently.
    if ($vtop->plabcount() && $vtop->plabcount == $vtop->virtnodecount()) {
	$maxrun = 2;
424 425
    }

426 427
    TBDebugTimeStamp("mapper loop started");
    while (1) {
428
	chat("Mapper loop $currentrun\n");
429

430
	my $prefix = "$pid-$eid-$$";
431 432

	#
433
	# When precheck is on, we only do one run in impotent mode and exit.
434
	#
435 436 437 438 439
	if ($precheck) {
	    $prefix  .= ".empty";
	    $impotent = 1;
	    chat("Trying assign on an empty testbed.\n");
	}
440

441 442 443
	#
	# Serialize with the pool daemon if using shared nodes.
	#
444
	if ((!($impotent || $regression)) && $vtop->sharednodecount()) {
445
	    while (1) {
446 447 448 449
		#
		# Use a countup/countdown counter, so that multiple mappers
		# can run, but not while the pool_daemon is running.
		#
450
		my $lock_result =
451 452 453 454
		    DBQueryFatal("update emulab_locks set value=value+1 ".
				 "where name='pool_daemon' and value>=0");

		$gotlock = $lock_result->affectedrows;
455 456 457 458 459

		last
		    if ($gotlock);
		
		chat("Waiting for pool daemon lock ...\n");
460
		sleep(10);
461 462
	    }
	}
463
	# Hack for Kirk.
464 465
	if (!($impotent || $regression) &&
	    ($pid eq "PNWorkshop" || $project->IsNonLocal())) {
466
	    while (1) {
467
		if (libEmulab::EmulabCountLock("mapperlock", 4) == 0) {
468 469 470 471 472 473 474 475
		    $seriallock = 1;
		    last;
		}
		chat("Waiting for mapper lock ...\n");
		sleep(5);
	    }
	}
	
476 477 478 479
	#
	# RunAssign returns  0 if successful.
	#           returns -1 if failure, but assign says to stop trying.
	#           returns  1 if failure, but assign says to try again.
480 481
	#           returns  2 if assign succeeds, but no nodes allocated.
	#           returns  3 if assign succeeds, but some nodes allocated.
482 483 484
	#
	my $retval = RunAssign($precheck, $prefix);

485
	if ($gotlock) {
486 487 488
	    DBQueryFatal("update emulab_locks set value=value-1 ".
			 "where name='pool_daemon'");
	    $gotlock = 0;
489
	}
490
	if ($seriallock) {
491
	    libEmulab::EmulabCountUnlock("mapperlock");
492 493
	    $seriallock = 0;
	}
494

495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
	# Success!
	last
	    if ($retval == 0);

	if ($retval < 0 || $regression) {
	    #
	    # Failure in assign.
	    #
	    FinalizeRegression(1)
		if ($regression);

	    fatal({type => 'primary', severity => SEV_ERROR,
		   error => ['unretriable_assign_error']},
		  "Unretriable error. Giving up.");
	}
510
    
511 512 513
	#
	# When precheck is off, we do a precheck run if the first try fails
	# to find a solution. This avoids looping on an unsolvable topology.
514 515 516
	# But, if the reason we are here is cause we could not allocate nodes,
	# then we found a solution, and so trying on an empty testbed is
	# pointless; it will obviously find a solution again.
517
	#
518 519 520
	if (!$precheck && !$tried_precheck && ($retval == 2 || $retval == 3)) {
	    $tried_precheck = 1;
	}
521 522 523 524 525 526 527
	if (!$precheck && !$tried_precheck) {
	    chat("Trying assign on an empty testbed to verify mapability.\n");
	    my $save_impotent = $impotent;
	    $impotent  = 1;
	    my $retval = RunAssign(1, $prefix . ".empty");
	    if ($retval != 0) {
		fatal({type=>'extra', cause=>'user', severity=>SEV_ERROR,
528 529 530 531 532 533 534 535
	           error=>['infeasible_resource_assignment']}, 
		  "This experiment cannot be instantiated on this ".
                  "testbed. You have most likely asked for hardware ".
                  "this testbed does not have, such as nodes of a type ".
                  "it does not contain, or nodes with too many network ".
                  "interfaces.  You will need to modify this experiment ".
                  "before it can be swapped in - re-submitting the ".
                  "experiment as-is will always result in failure.");
536 537 538 539
	    }
	    chat("Assign succeeded on an empty testbed.\n");
	    $impotent = $save_impotent;
	    $tried_precheck = 1;
540
	}
541 542 543 544
	# We try a minimum number of times, cause the node pool is
	# always changing. But once we hit the maxrun, we continue
	# only if progress on the last loop.
	if ($currentrun >= $maxrun && !$progress) {
545 546 547 548
	    fatal({type => 'primary', severity => SEV_ERROR,
		   error => ['reached_assign_run_limit']},
		  "Reached run limit. Giving up.");
	}
549 550 551
	# See if we made progress or not.
	# Keep going if we allocated some nodes.
	$progress = ($retval == 3);
552

553 554 555 556
	# A little bit of backoff after failure. 
	my $sval = int(rand($currentrun * 3)) + 3;
	chat("Waiting $sval seconds and trying again...\n");
	sleep($sval);
557 558
	$currentrun++;
    }
559
    GatherAssignStats($pid, $eid, %{ $vtop->exptstats() })
Leigh B Stoller's avatar
Leigh B Stoller committed
560
	if (! ($impotent || $alloconly || $regression || $toponly));
561 562
    TBDebugTimeStamp("mapper loop finished");
    return 0;
563 564 565 566 567 568 569 570 571
}

#
# The guts of an assign run.
#
sub RunAssign($$)
{
    my ($precheck, $prefix) = @_;

572
    my $typelimitfile = $prefix .".limits";
573 574
    my $ptopfile = $prefix . ".ptop";
    my $vtopfile = $prefix . ".vtop";
575 576 577 578
    if ($userspec) {
	$ptopfile .= ".xml";
	$vtopfile .= ".xml";
    }
579
    my $assignexitcode = 0;
580
    my $assignlog = "assign.log";
581

Leigh B Stoller's avatar
Leigh B Stoller committed
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
    #
    # Now generate a vtop file and dump it to a file.
    #
    if (! open(VTOPFILE, "> $vtopfile")) {
	tberror("Could not open $vtopfile: $!\n");
	return -1;
    }
    my $reslibvtop;
    if ($userspec == 1) {
	$reslibvtop = $vtop->PrintRspec(*VTOPFILE);
    }
    else {
	$reslibvtop = $vtop->PrintTop(*VTOPFILE);
    }
    if ($reslibvtop != 0) {
	tberror("Could not print vtop file for $experiment\n");
	return -1;
    }
    close(VTOPFILE);
601
    system("/bin/cp -fp $vtopfile ${pid}-${eid}.vtop") if ($debug);
Leigh B Stoller's avatar
Leigh B Stoller committed
602 603 604
    return 0
	if ($toponly);

605 606 607 608 609 610 611 612 613 614 615 616 617 618
    if (! ($impotent || $regression || $alloconly)) {
	if ($experiment->Update({"maximum_nodes" => $vtop->maximum_nodes(),
				 "minimum_nodes" => $vtop->minimum_nodes(),
				 "virtnode_count"=> $vtop->virtnodecount() })){
	    tberror("Could not update min/max/virt nodes for $experiment\n");
	    return -1;
	}
    }
    # New solution each time.
    $vtop->ClearSolution();

    goto skipassign
	if ($vtop->nodecount() == $vtop->genicount());

619 620 621 622 623 624
    # Debugging hack.
    if ($regression || $noassign || $assignfile) {
	$assignlog = $assignfile
	    if ($assignfile);
	
	if (! -e $assignlog) {
625 626 627 628 629 630 631
	    chat("No existing assign results file!\n");
	    return -1;
	}
	chat("Using existing assign results file\n");
	goto skiprun;
    }
    
632 633 634 635
    #
    # Do admission control test, and gather the info.
    #
    my %admission_control;
636 637 638 639 640 641
    if (!$regression) {
	if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control)){
	    tberror("Failed admission control checks!\n");
	    return -1;
	}
    }
642

643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
    #
    # Append this admission control results to ptopgen.
    #
    if (scalar(keys(%admission_control))) {
	open(TYPELIMIT, ">$typelimitfile") or
	    return -1;

	foreach my $typeclass (keys(%admission_control)) {
	    my $count = $admission_control{$typeclass};

	    print TYPELIMIT "$typeclass $count\n";
	}
	close(TYPELIMIT);
    }


659 660 661 662 663 664 665 666 667 668 669
    #
    # Snapshot physical resources.
    #
    # if updating (-u), include any resources that may already be
    # allocated to experiment in the PTOP results.
    #
    my $ptopargs = "-p $pid ";
    $ptopargs   .= "-e $eid "
	if ($updating);
    $ptopargs   .= "-u "
	if ($updating && $experiment->elabinelab());
670 671
    $ptopargs   .= "-m $mfactor "
	if (defined($mfactor));
672 673
    $ptopargs   .= "-C "
	if ($usecontrol);
674 675 676 677 678 679
    $ptopargs   .= "-v "
	if ($vtop->virtnodecount());
    $ptopargs   .= "-r "
	if ($vtop->remotenodecount());
    $ptopargs   .= "-S "
	if ($vtop->simnodecount());
680 681
    $ptopargs   .= "-h "
	if ($vtop->sharednodecount());
682 683
    $ptopargs   .= "-b "
	if ($vtop->bstorecount());
684
    $ptopargs	.= "-a "
685
    	if ($precheck || $allnodesfree);
686 687
    $ptopargs	.= "-c " . $experiment->delay_capacity() . " "
    	if (defined($experiment->delay_capacity()));
688 689 690
    if ($userspec == 1) {
	$ptopargs .= "-x -g 2 ";
    }
691 692
    if ($use_old_ptopgen == 1) {
	$ptopargs .= "-Z ";
693
    }
694 695
    $ptopargs .= "-z "
	if ($project->IsNonLocal() || $vtop->sharednodecount());
696 697 698 699 700 701
    if ($gblinkcheck) {
	if ($vtop->mgblinks() == 0) {
	    $ptopargs .= "-G ";
	}
    }
    $ptopargs .= "-l $typelimitfile "
702
	if (scalar(keys(%admission_control)));
703 704 705 706 707

    chat("ptopargs: '$ptopargs'\n");
    TBDebugTimeStamp("ptopgen started");
    system("$PTOPGEN $ptopargs > $ptopfile");
    if ($?) {
708 709
	tberror("Failure in ptopgen\n");
	return -1;
710
    }
711
    system("/bin/cp -fp $ptopfile ${pid}-${eid}.ptop") if ($debug);
712 713 714
    TBDebugTimeStamp("ptopgen finished");

    # Run assign
715
    my $cmd  = "assign";
716 717
    $cmd .= "-new"
	if ($newassign);
718
    my $args = "";
719 720 721 722 723 724
    if ($XERCES) {
	$args .= "-f rspec " 
	    if ($userspec == 1);
	$args .= "-f text "
	    if ($userspec == 0);
    }
725
    $args .= "$ptopfile $vtopfile";
726
    $args = "-P $args"
727
	if (!$vtop->sharednodecount());
728 729
    $args = "-F $args"
	if (!$updating);
730
    $args = "-uod -c .75 -H 3 $args"
731 732 733
	if ($vtop->virtnodecount() || $vtop->simnodecount());
    $args = "-n $args"
    	if ($precheck);
734 735
    $args = "-S $packoption $args"
    	if (defined($packoption));
736 737
    $args = "-s 123456 $args"
	if ($regression);
738
    $args = "-R $args PN=1.0"
Leigh B Stoller's avatar
Leigh B Stoller committed
739
	if (0 && $vtop->sharednodecount());
740
    
741
    # The prepass speeds up assign on big topos with virtual nodes.
Leigh B Stoller's avatar
Leigh B Stoller committed
742
    if ($prepass || $useprepass) {
743 744 745 746
    	$cmd = "assign_prepass";
    	$args = "-m $mfactor $args"
    	    if (defined($mfactor));
    }
747 748 749 750 751 752
    chat("assign command: '$cmd $args'\n");

    #
    # Fork a child to run assign. Parent spins watching the cancel flag
    # and waiting for assign to finish.
    #
753
    TBDebugTimeStamp("assign started");
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
    if (my $childpid = fork()) {
	while (1) {
	    sleep(2);

	    if (waitpid($childpid, &WNOHANG) == $childpid) {
		$assignexitcode = $? >> 8;
		last;
	    }

	    # Check cancel flag.
	    if ($experiment->canceled()) {
		if ((my $pgrp = getpgrp($childpid)) > 0) {
		    kill('TERM', -$pgrp);
		    waitpid($childpid, 0);

769 770 771
		    tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
			     error => ['cancel_flag']},
			    "Cancel flag set; aborting assign run!\n");
772 773 774 775 776 777 778 779 780 781 782 783
		    return -1;
		}
		# Loop again to reap child above before exit.
	    }
	}
    }
    else {
	#
	# Change our session so the child can get a killpg without killing
	# the parent. 
	#
        POSIX::setsid();
784
	exec("nice $WRAPPER2 $cmd $args > $assignlog 2>&1");
785 786
	die("Could not start assign!\n");
    }
787
    TBDebugTimeStamp("assign finished");
788 789 790

    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
791 792 793
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
794 795 796 797
	return -1;
    }

    # Check for possible full filesystem ...
798 799
    if (-z $assignlog) {
	tbnotice("$assignlog is zero length! Stopping ...\n");
800 801 802 803 804 805 806 807 808
	return -1;
    }

    #
    # Saving up assign.log coz each swapin/modify is different and it
    # is nice to have every mapping for debugging and archiving
    # purposes We do not call it .log though, since we do not want it
    # copied out to the user directory every swapin. See Experiment.pm
    #
809 810
    system("/bin/cp -fp $assignlog ${prefix}.assign");
    system("/bin/cp -fp $assignlog ${pid}-${eid}.assign") if ($debug);
811 812

    if ($assignexitcode) {
813 814
	print "Assign exited with $assignexitcode\n" if ($debug);
	
815
	system("/bin/cat $assignlog");
816 817 818 819 820 821 822 823
	#
	# assign returns two positive error codes (that we care about).
	# The distinction between them is somewhat murky. An exitval of
	# 1 means "retryable" while 2 means "unretryable". The former
	# means we can try again, while the later says there is no possible
	# way to map it. We pass this back to the caller so that we know
	# to exit the loop or try again.
	#
824 825 826 827 828 829 830
	return (($assignexitcode == 1) ? 1 : -1);
    }
    #
    # If we were doing the precheck, go ahead and exit now - there is no
    # useful information to parse out
    #
    if ($precheck) {
831
	chat("Precheck succeeded.\n");
832 833
	return 0;
    }
834
  skiprun:
835
    chat("Reading assign results.\n");
836
    if (!open(ASSIGNFP, $assignlog)) {
837 838 839
	print("Could not open assign logfile! $!\n");
	return -1;
    }
840
    TBDebugTimeStamp("ReadSolution started");
841
    if ($vtop->ReadTextSolution(*ASSIGNFP) != 0) {
842 843 844 845
	print("Could not parse assign logfile! $!\n");
	return -1;
    }
    close(ASSIGNFP);
846
    TBDebugTimeStamp("ReadSolution ended");
847
  skipassign:
848
    
849
    if (defined($vtop->genicount())) {
850 851 852 853 854 855 856 857
	TBDebugTimeStamp("Map Geni Resources Started");
	if ($vtop->MapResources() != 0) {
	    print("Could not map external resources! $!\n");
	    return -1;
	}
	TBDebugTimeStamp("Map Geni Resources ended");
    }
    TBDebugTimeStamp("InterpNodes Started");
858 859 860 861
    if ($vtop->InterpNodes() != 0) {
	print("Could not interpret nodes.\n");
	return -1;
    }
862
    TBDebugTimeStamp("InterpNodes ended, AllocNodes Started");
863 864
    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
865 866 867
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
868 869
	return -1;
    }
870
    my $retval = $vtop->AllocNodes();
871 872 873
    return $retval
	if ($retval != 0);

874
    TBDebugTimeStamp("AllocNodes ended, InterpLinks Started");
875 876 877 878
    if ($vtop->InterpLinks() != 0) {
	print("Could not setup links\n");
	return -1;
    }
879
    TBDebugTimeStamp("InterpLinks ended, InitializePhysNodes Started");
880 881 882 883
    if ($vtop->InitializePhysNodes() != 0) {
	print("Could not InitializePhysNodes\n");
	return -1;
    }
884
    TBDebugTimeStamp("InitializePhysNodes ended");
885
    if (! ($impotent || $regression || $alloconly)) {
886 887 888 889 890 891 892
	TBDebugTimeStamp("ReserveSharedBandwidth started");
	if ($experiment->ReserveSharedBandwidth($updating) != 0) {
	    print("Could not reserve shared bandwidth\n");
	    return -1;
	}
	TBDebugTimeStamp("ReserveSharedBandwidth ended");
    }
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907
    return 0;
}

#
# In regression mode we want to save  the physical state and then clear
# the physical resources. 
#
sub FinalizeRegression($)
{
    my ($error) = @_;
    my $cwd;

    chomp($cwd = `/bin/pwd`);
    if (!$error) {
	chat("Saving physical state in regression mode\n");
908 909 910 911
	if (system("/bin/rm -rf $pid-$eid.pstate")) {
	    tberror("Could not clean physical state directory\n");
	    return -1;
	}
912 913
	if ($experiment->BackupPhysicalState("$cwd/$pid-$eid.pstate", 1)
	    != 0) {
914 915 916
	    print STDERR "Could not save physical state!\n";
	    exit(1);
	}
917 918 919 920 921 922 923 924 925 926 927 928
    
	# Generate a vtop file with all resources fixed.
	chat("Generating new vtop file with all resources fixed.\n");
	if (! open(VTOPFILE, "> $pid-$eid.fixed")) {
	    tberror("Could not open $pid-$eid.fixed: $!\n");
	    return -1;
	}
	if ($vtop->PrintTop(*VTOPFILE) != 0) {
	    tberror("Could not print fixed vtop file for $experiment\n");
	    return -1;
	}
	close(VTOPFILE);
929
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
930 931 932
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents();
    
933 934 935
    return 0
	if ($noregfree);
    
936 937 938 939 940 941 942 943 944 945 946 947 948 949
    chat("Removing physical state in regression mode\n");
    if ($experiment->RemovePhysicalState() != 0) {
	print STDERR "Could not remove physical state!\n";
	exit(1);
    }

    if ($vtop->newreservednodes()) {
	my @newreservednodes = $vtop->newreservednodes();
	system("export NORELOAD=1; $NFREE -x $pid $eid @newreservednodes");
	if ($?) {
	    exit(1);
	}
    }
    return 0;
950
}
951

952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
#
# Print a solution in rspec format.
#
sub PrintSolution()
{
    my $output = undef;
    
    if ($outfile ne "-") {
	if (! open(OUTFILE, "> $outfile")) {
	    tberror("Could not open $outfile: $!\n");
	    return -1;
	}
	$output = *OUTFILE;
    }
    if ($vtop->PrintSolution($output) != 0) {
	tberror("Could not print solution for $experiment\n");
	return -1;
    }
    close($output)
	if (defined($output));
    return 0;
}

975 976 977 978
# We will come through here no matter how we exit.
END {
    # Watch for getting here cause of a die()/exit() statement someplace.
    my $exitcode = $?;
979

980 981 982 983 984 985 986 987
    #
    # Do not want to leave this around, it will lock the pool daemon out.
    #
    if ($gotlock) {
	DBQueryFatal("update emulab_locks set value=value-1 ".
		     "where name='pool_daemon'");
	$gotlock = 0;
    }
988
    if ($seriallock) {
989
	libEmulab::EmulabCountUnlock("mapperlock");
990 991
	$seriallock = 0;
    }
992

993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
    if ($exitcode && $exitcode != $WRAPPER_FAILED) {
	$exitcode = $WRAPPER_FAILED|$WRAPPER_FAILED_FATALLY;
    }
    
    if ($warnings > 0) {
	tberror("$warnings warnings.\n");

	$exitcode |= $WRAPPER_FAILED;
    }

    # Set recover bit if we are going to fail.
    $exitcode = $exitcode|$WRAPPER_FAILED_CANRECOVER
	if ($exitcode && !$NoRecover);

    # And change the exitcode to be what we want it to be.
    $? = $exitcode;
1009 1010 1011 1012
}

sub fatal(@)
{
1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
    #
    # Free any newly reserved nodes (in update mode) so that tbswap knows
    # it is safe to recover the experiment. If we bypass this and leave
    # through the END block then NoRecover will still be set and tbswap
    # will know to swap the experiment out. 
    #
    if ($updating) {
	if (defined($vtop)) {
	    my @newreservednodes = $vtop->newreservednodes();
	    my $NoRecover = $vtop->norecover();
	
	    if (@newreservednodes) {
		$NoRecover = 0
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1026
		    if (system("$NFREE -x $pid $eid @newreservednodes") == 0);
1027 1028 1029 1030 1031 1032 1033 1034
	    }
	    else {
		# When not updating this is meaningless to tbswap.
		$NoRecover = 0;
	    }
	}
    }
    
1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
    tberror(@_);

    # We next go to the END block above.
    exit($WRAPPER_FAILED);
}
sub debug($)
{
    if ($verbose) {
	print $_[0];
    }
}
sub chat($)
{
    if (! $quiet) {
	print $_[0];
    }
}