mapper.in 27.1 KB
Newer Older
1 2
#!/usr/bin/perl -w
#
3
# Copyright (c) 2000-2015 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#
use strict;
use English;
use Getopt::Std;
use POSIX qw(setsid ceil);
use POSIX ":sys_wait_h";

#
# This function as the main assign loop.  It converts the virtual
# topology into a top input including LAN and delay translation.  It
# then snapshots the current testbed physical state and runs assign,
# looping a couple times if assign fails.  When assign successfully
# completes it will interpret the results.  Attempt to match any
# existing portmap entries and then update the delays and vlans table.
#
# XXX Internally created nodes (jailhost,delay,sim) are not treated
#     consistently. Needs more thought.
#
# Return codes: We catch all errors with the END block below, and if
# anyone calls die() (exit value is 255) we add in the CANRECOVER bit.
# Failures in assign always cause the caller to stop retrying. 
#
# The CANRECOVER bit indicates 'recoverability' (no db or physical
46
# state was modified by the time the error occurred). This is relevant
47 48 49 50 51 52 53 54 55 56 57
# to only modify operations (update).
#
my $WRAPPER_SUCCESS		 = 0x00;
my $WRAPPER_FAILED		 = 0x01;	# Failed (Add other values)
my  $WRAPPER_FAILED_CANRECOVER   = 0x40;        # Can recover from update
my  $WRAPPER_FAILED_FATALLY      = 0x80;	# Do not use this.
# Set this once we modify DB state; forces no recover in fatal().
my $NoRecover = 0;

sub usage ()
{
58
    print STDERR "Usage: $0 [-v] [-u [-f] | -n] [-z] pid eid\n";
59 60 61 62
    print STDERR " -v   - Enables verbose output\n";
    print STDERR " -u   - Enables update mode\n";
    print STDERR " -f   - Fix current resources during update mode\n";
    print STDERR " -n   - Run assign, but do not reserve/modify resources.\n";
63
    print STDERR " -r   - Regression mode.\n";
64 65
    print STDERR " -x   - Turn on the prepass\n";
    print STDERR " -m   - Set the multiplex factor; overrides experiment.\n";
66 67
    print STDERR " -p   - Do a precheck for mapability on an empty testbed - ".
		 "implies -n\n";
68
    print STDERR " -l   - Use rspec v2 instead of the text file format\n";
69
#    print STDERR " -z   - Force new ptopgen\n";
70
    print STDERR " -Z   - Force old ptopgen\n";
71
    print STDERR " -A   - Tell ptopgen all nodes are free; only with -n\n";
72 73
    exit($WRAPPER_FAILED);
}
74
my $optlist    = "dvunfprqczxm:ko:altzZACFNL:";
75 76 77
my $verbose    = 0;
my $debug      = 0;
my $fixmode    = 0;
78
my $fixlannodes= 0;
79 80 81
my $updating   = 0;
my $impotent   = 0;
my $precheck   = 0;
82
my $allnodesfree = 0;
Leigh B Stoller's avatar
Leigh B Stoller committed
83
my $toponly    = 0;
84
my $prepass    = 0;
85
my $alloconly  = 0;
86
my $outfile;
87
my $mfactor;
88
my $regression = 0;
89
my $noassign   = 0;  # Only with regression mode, use previous solution.
90 91
my $noregfree  = 0;  # Only with regression mode, leave physical state at end.
my $usecurrent = 0;  # Only with regression mode, use current solution.
92
my $assignfile;
93 94 95
my $quiet      = 0;
my $clear      = 0;
my $warnings   = 0;
96
my $maxrun     = 3;  # Maximum number of times we run assign.
97
my $gotlock    = 0;
98
my $seriallock = 0;
99
my $userspec   = 0;
100
my $usecontrol = 0;
101
my $use_old_ptopgen  = 0;
102
my $vtop;
103 104 105 106 107

#
# Configure variables
#
my $TB		= "@prefix@";
108
my $MAINSITE    = @TBMAINSITE@;
109
my $DBNAME	= "@TBDBNAME@";
110 111 112 113
my $TBOPS       = "@TBOPSEMAIL@";
my $ASSIGN      = "$TB/libexec/assign";
my $WRAPPER2    = "$TB/libexec/assign_wrapper2";
my $PTOPGEN     = "$TB/libexec/ptopgen";
114
my $PTOPGEN_NEW = "$TB/libexec/ptopgen_new";
115
my $VTOPGEN     = "$TB/bin/vtopgen";
116
my $NFREE       = "$TB/bin/nfree";
117
my $XERCES	= "@HAVE_XERCES@";
118 119 120 121 122 123 124 125 126 127

#
# Load the Testbed support stuff. 
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use libtblog;
use libvtop;
use libadminctrl;
128
use libEmulab;
129
use User;
130
use EmulabFeatures;
131 132 133 134 135 136

# Protos
sub fatal(@);
sub debug($);
sub chat($);
sub RunAssign($$);
137
sub FinalizeRegression($);
138 139
sub AssignLoop();
sub MapperWrapper();
140
sub PrintSolution();
141 142 143 144 145 146

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
147 148 149 150 151 152 153 154 155 156
$| = 1;

#
# We want warnings to cause assign_wrapper to exit abnormally.
# 
# FIXME: Is this still needed.  "warn" is only used once.  Also this
#  will cause perl internal warnings (such as "Use of uninitialized
#  value ..."  to cause assign_wrapper to fail. -- kevina
#
$SIG{__WARN__} = sub { tbwarn $_[0];$warnings++; };
157 158 159 160 161 162 163 164 165 166 167 168 169

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV < 2) {
    usage();
}
if (defined($options{"v"})) {
170
    TBDebugTimeStampsOn();
171 172
    $verbose++;
}
173 174 175
if (defined($options{"a"})) {
    $alloconly++;
}
176 177 178
if (defined($options{"A"})) {
    $allnodesfree++;
}
179 180 181
if (defined($options{"d"})) {
    $debug++;
}
182 183 184
if (defined($options{"u"})) {
    $updating = 1;
}
Leigh B Stoller's avatar
Leigh B Stoller committed
185 186 187 188
if (defined($options{"t"})) {
    $toponly = 1;
    $quiet   = 1;
}
189 190 191
if (defined($options{"n"})) {
    $impotent = 1;
}
192 193 194 195 196 197
if (defined($options{"N"})) {
    $noassign = 1;
}
if (defined($options{"L"})) {
    $assignfile = $options{"L"};
}
198 199 200
if (defined($options{"f"})) {
    $fixmode = 1;
}
201 202 203
if (defined($options{"F"})) {
    $fixlannodes = 1;
}
204 205 206
if (defined($options{"p"})) {
    $precheck = 1;
}
207 208 209
if (defined($options{"x"})) {
    $prepass = 1;
}
210 211 212
if (defined($options{"o"})) {
    $outfile = $options{"o"};
}
213 214 215
if (defined($options{"m"})) {
    $mfactor = $options{"m"};
}
216 217 218 219 220
if (defined($options{"r"})) {
    if ($DBNAME eq "tbdb") {
	fatal("Cannot use regression mode on main DB");
    }
    $regression = 1;
Leigh B Stoller's avatar
Leigh B Stoller committed
221 222
    $clear      = 1
	if (!defined($options{"t"}));
223
    $fixmode    = 1;
224
    TBDebugTimeStampsOn();
225
    $usecurrent = 1
226
	if (defined($options{"z"}));
227 228
    $noregfree = 1
	if (defined($options{"k"}));
229 230 231 232 233 234 235
}
if (defined($options{"q"})) {
    $quiet = 1;
}
if (defined($options{"c"})) {
    $clear = 1;
}
236 237 238
if (defined($options{"C"})) {
    $usecontrol = 1;
}
239
if (defined($options{"l"})) {
240 241 242 243 244
    if ($XERCES) {
	$userspec = 1;
    } else {
	fatal("Rspec v2 support requires that Xerces be installed");
    }
245
}
246 247 248
if (defined($options{"Z"})) {
    $use_old_ptopgen = 1;
}
249
if (defined($options{"z"})) {
250 251
#    $use_old_ptopgen = 0;
#    $PTOPGEN = $PTOPGEN_NEW;
252
}
253 254 255
if ($allnodesfree && !$impotent) {
    fatal("Not allowed to use -A without -n (impotent) option");
}
256 257 258 259 260 261 262

my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    fatal("Could not lookup experiment object $pid,$eid!")
}
263 264
my $project = $experiment->GetProject();

265 266 267 268 269 270 271 272 273 274 275
#
# Verify that this person can muck with the experiment.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
}
if (!TBAdmin() &&
    ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
    fatal("You do not have permission to map this experiment!");
}
276
my $real_user = User->RealUser();
277

278 279 280
# multiplex_factor default.
$mfactor = $experiment->multiplex_factor()
    if (!defined($mfactor) && defined($experiment->multiplex_factor()));
Leigh B Stoller's avatar
Leigh B Stoller committed
281 282
# NS file can say to run the prepass.
my $useprepass = $experiment->useprepass();
283 284 285 286 287

my $newassign =
    EmulabFeatures->FeatureEnabled("NewAssign",
				   $this_user,
				   $experiment->GetGroup(), $experiment);
288 289 290 291 292 293 294 295 296 297 298 299 300
if (!$usecontrol) {
    $usecontrol =
	EmulabFeatures->FeatureEnabled("ControlNetVlans",
				       $this_user,
				       $experiment->GetGroup(), $experiment);
    if ($usecontrol) {
	chat("Telling ptopgen to use control network vlans\n");
    }
}
if ($usecontrol && $MAINSITE) {
    $debug   = 1;
    $verbose = 1;
}
301

302 303 304 305 306 307 308
# XXX Hacky!
if ($MAINSITE && $TB ne "/usr/testbed") {
    $debug   = 1;
    $verbose = 1;
    $fixlannodes = $fixmode;
}

309
libvtop::Init($this_user, $experiment->GetGroup(), $experiment);
310
    
311 312 313 314 315 316
#
# These are the flags to the vtop creation code. 
#
my $vtopflags = 0;
$vtopflags |= $libvtop::VTOP_FLAGS_VERBOSE
    if ($verbose);
317 318
$vtopflags |= $libvtop::VTOP_FLAGS_QUIET
    if ($quiet);
319 320 321
$vtopflags |= $libvtop::VTOP_FLAGS_UPDATE
    if ($updating);
$vtopflags |= $libvtop::VTOP_FLAGS_FIXNODES
322
    if ($fixmode || $usecurrent);
323 324
$vtopflags |= $libvtop::VTOP_FLAGS_FIXLANNODES
    if ($fixlannodes);
325 326
$vtopflags |= $libvtop::VTOP_FLAGS_IMPOTENT
    if ($impotent);
327 328
$vtopflags |= $libvtop::VTOP_FLAGS_ALLOCONLY
    if ($alloconly);
329 330
$vtopflags |= $libvtop::VTOP_FLAGS_REGRESSION
    if ($regression);
331

332 333 334 335 336 337 338
MapperWrapper();
if ($regression) {
    if (0) {
	$updating   = 1;
	$fixmode    = 1;
	$clear      = 0;
	$vtopflags |=
339 340
	    ($libvtop::VTOP_FLAGS_UPDATE|$libvtop::VTOP_FLAGS_FIXLANNODES|
	     $libvtop::VTOP_FLAGS_FIXNODES);
341

342 343 344
	MapperWrapper();
    }
    FinalizeRegression(0);
345
}
346 347
PrintSolution()
    if ($outfile);
348
exit(0);
349

350 351
sub MapperWrapper()
{
Leigh B Stoller's avatar
Leigh B Stoller committed
352 353
    chat("Starting the new and improved mapper wrapper.\n")
	if (!$toponly);
354

Leigh B Stoller's avatar
Leigh B Stoller committed
355 356 357 358 359
    # Need to do this cause libvtop will add them again.
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents()
	if ($regression);

360
    TBDebugTimeStamp("Create libvtop started");
361
    $vtop = libvtop->Create($experiment, $this_user, $vtopflags, $real_user);
362 363 364 365
    if (!defined($vtop)) {
	fatal("Could not create vtop structure for $experiment");
    }
    TBDebugTimeStamp("Create libvtop ended");
Leigh B Stoller's avatar
Leigh B Stoller committed
366

Leigh B Stoller's avatar
Leigh B Stoller committed
367 368 369 370 371
    TBDebugTimeStamp("vtopgen started");
    $vtop->CreateVtop() == 0
	or fatal("Could not create vtop for $experiment");
    TBDebugTimeStamp("vtopgen finished");
    
372
    if (!$impotent && !$alloconly && !$toponly && ($updating || $clear)) {
373 374 375 376 377 378 379 380 381 382 383 384
	if ($clear) {
	    chat("Freeing reserved nodes ...\n");
	    system("export NORELOAD=1; $NFREE -x -a $pid $eid") == 0
		or fatal("Could not release nodes.");
	}
	chat("Clearing physical state before updating.\n");
	$experiment->RemovePhysicalState();
	exit(0)
	    if ($clear && !$regression);
    }
    AssignLoop();
    return 0;
385
}
386 387 388 389

#
# The assign loop. 
#
390 391 392 393
sub AssignLoop()
{
    my $currentrun     = 1;
    my $canceled       = 0;
394
    my $progress       = 0;
395 396 397
    my $tried_precheck = 0;
    # Admission control counts
    my %admission_control = ();
398

399 400 401 402 403
    # XXX plab hack - only run assign once on plab topologies, since
    # they're easy to map and the physical topology does not change
    # frequently.
    if ($vtop->plabcount() && $vtop->plabcount == $vtop->virtnodecount()) {
	$maxrun = 2;
404 405
    }

406 407
    TBDebugTimeStamp("mapper loop started");
    while (1) {
408
	chat("Mapper loop $currentrun\n");
409

410
	my $prefix = "$pid-$eid-$$";
411 412

	#
413
	# When precheck is on, we only do one run in impotent mode and exit.
414
	#
415 416 417 418 419
	if ($precheck) {
	    $prefix  .= ".empty";
	    $impotent = 1;
	    chat("Trying assign on an empty testbed.\n");
	}
420

421 422 423
	#
	# Serialize with the pool daemon if using shared nodes.
	#
424
	if ((!($impotent || $regression)) && $vtop->sharednodecount()) {
425
	    while (1) {
426 427 428 429
		#
		# Use a countup/countdown counter, so that multiple mappers
		# can run, but not while the pool_daemon is running.
		#
430
		my $lock_result =
431 432 433 434
		    DBQueryFatal("update emulab_locks set value=value+1 ".
				 "where name='pool_daemon' and value>=0");

		$gotlock = $lock_result->affectedrows;
435 436 437 438 439

		last
		    if ($gotlock);
		
		chat("Waiting for pool daemon lock ...\n");
440
		sleep(10);
441 442
	    }
	}
443
	# Hack for Kirk.
444 445
	if (!($impotent || $regression) &&
	    ($pid eq "PNWorkshop" || $project->IsNonLocal())) {
446
	    while (1) {
447
		if (libEmulab::EmulabCountLock("mapperlock", 4) == 0) {
448 449 450 451 452 453 454 455
		    $seriallock = 1;
		    last;
		}
		chat("Waiting for mapper lock ...\n");
		sleep(5);
	    }
	}
	
456 457 458 459
	#
	# RunAssign returns  0 if successful.
	#           returns -1 if failure, but assign says to stop trying.
	#           returns  1 if failure, but assign says to try again.
460 461
	#           returns  2 if assign succeeds, but no nodes allocated.
	#           returns  3 if assign succeeds, but some nodes allocated.
462 463 464
	#
	my $retval = RunAssign($precheck, $prefix);

465
	if ($gotlock) {
466 467 468
	    DBQueryFatal("update emulab_locks set value=value-1 ".
			 "where name='pool_daemon'");
	    $gotlock = 0;
469
	}
470
	if ($seriallock) {
471
	    libEmulab::EmulabCountUnlock("mapperlock");
472 473
	    $seriallock = 0;
	}
474

475 476 477 478 479 480 481 482 483 484 485 486 487 488 489
	# Success!
	last
	    if ($retval == 0);

	if ($retval < 0 || $regression) {
	    #
	    # Failure in assign.
	    #
	    FinalizeRegression(1)
		if ($regression);

	    fatal({type => 'primary', severity => SEV_ERROR,
		   error => ['unretriable_assign_error']},
		  "Unretriable error. Giving up.");
	}
490
    
491 492 493
	#
	# When precheck is off, we do a precheck run if the first try fails
	# to find a solution. This avoids looping on an unsolvable topology.
494 495 496
	# But, if the reason we are here is cause we could not allocate nodes,
	# then we found a solution, and so trying on an empty testbed is
	# pointless; it will obviously find a solution again.
497
	#
498 499 500
	if (!$precheck && !$tried_precheck && ($retval == 2 || $retval == 3)) {
	    $tried_precheck = 1;
	}
501 502 503 504 505 506 507
	if (!$precheck && !$tried_precheck) {
	    chat("Trying assign on an empty testbed to verify mapability.\n");
	    my $save_impotent = $impotent;
	    $impotent  = 1;
	    my $retval = RunAssign(1, $prefix . ".empty");
	    if ($retval != 0) {
		fatal({type=>'extra', cause=>'user', severity=>SEV_ERROR,
508 509 510 511 512 513 514 515
	           error=>['infeasible_resource_assignment']}, 
		  "This experiment cannot be instantiated on this ".
                  "testbed. You have most likely asked for hardware ".
                  "this testbed does not have, such as nodes of a type ".
                  "it does not contain, or nodes with too many network ".
                  "interfaces.  You will need to modify this experiment ".
                  "before it can be swapped in - re-submitting the ".
                  "experiment as-is will always result in failure.");
516 517 518 519
	    }
	    chat("Assign succeeded on an empty testbed.\n");
	    $impotent = $save_impotent;
	    $tried_precheck = 1;
520
	}
521 522 523 524
	# We try a minimum number of times, cause the node pool is
	# always changing. But once we hit the maxrun, we continue
	# only if progress on the last loop.
	if ($currentrun >= $maxrun && !$progress) {
525 526 527 528
	    fatal({type => 'primary', severity => SEV_ERROR,
		   error => ['reached_assign_run_limit']},
		  "Reached run limit. Giving up.");
	}
529 530 531
	# See if we made progress or not.
	# Keep going if we allocated some nodes.
	$progress = ($retval == 3);
532

533 534 535 536
	# A little bit of backoff after failure. 
	my $sval = int(rand($currentrun * 3)) + 3;
	chat("Waiting $sval seconds and trying again...\n");
	sleep($sval);
537 538
	$currentrun++;
    }
539
    GatherAssignStats($pid, $eid, %{ $vtop->exptstats() })
Leigh B Stoller's avatar
Leigh B Stoller committed
540
	if (! ($impotent || $alloconly || $regression || $toponly));
541 542
    TBDebugTimeStamp("mapper loop finished");
    return 0;
543 544 545 546 547 548 549 550 551
}

#
# The guts of an assign run.
#
sub RunAssign($$)
{
    my ($precheck, $prefix) = @_;

552
    my $typelimitfile = $prefix .".limits";
553 554
    my $ptopfile = $prefix . ".ptop";
    my $vtopfile = $prefix . ".vtop";
555 556 557 558
    if ($userspec) {
	$ptopfile .= ".xml";
	$vtopfile .= ".xml";
    }
559
    my $assignexitcode = 0;
560
    my $assignlog = "assign.log";
561

Leigh B Stoller's avatar
Leigh B Stoller committed
562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
    #
    # Now generate a vtop file and dump it to a file.
    #
    if (! open(VTOPFILE, "> $vtopfile")) {
	tberror("Could not open $vtopfile: $!\n");
	return -1;
    }
    my $reslibvtop;
    if ($userspec == 1) {
	$reslibvtop = $vtop->PrintRspec(*VTOPFILE);
    }
    else {
	$reslibvtop = $vtop->PrintTop(*VTOPFILE);
    }
    if ($reslibvtop != 0) {
	tberror("Could not print vtop file for $experiment\n");
	return -1;
    }
    close(VTOPFILE);
581
    system("/bin/cp -fp $vtopfile ${pid}-${eid}.vtop") if ($debug);
Leigh B Stoller's avatar
Leigh B Stoller committed
582 583 584
    return 0
	if ($toponly);

585 586 587 588 589 590 591 592 593 594 595 596 597 598
    if (! ($impotent || $regression || $alloconly)) {
	if ($experiment->Update({"maximum_nodes" => $vtop->maximum_nodes(),
				 "minimum_nodes" => $vtop->minimum_nodes(),
				 "virtnode_count"=> $vtop->virtnodecount() })){
	    tberror("Could not update min/max/virt nodes for $experiment\n");
	    return -1;
	}
    }
    # New solution each time.
    $vtop->ClearSolution();

    goto skipassign
	if ($vtop->nodecount() == $vtop->genicount());

599 600 601 602 603 604
    # Debugging hack.
    if ($regression || $noassign || $assignfile) {
	$assignlog = $assignfile
	    if ($assignfile);
	
	if (! -e $assignlog) {
605 606 607 608 609 610 611
	    chat("No existing assign results file!\n");
	    return -1;
	}
	chat("Using existing assign results file\n");
	goto skiprun;
    }
    
612 613 614 615
    #
    # Do admission control test, and gather the info.
    #
    my %admission_control;
616 617 618 619 620 621
    if (!$regression) {
	if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control)){
	    tberror("Failed admission control checks!\n");
	    return -1;
	}
    }
622

623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
    #
    # Append this admission control results to ptopgen.
    #
    if (scalar(keys(%admission_control))) {
	open(TYPELIMIT, ">$typelimitfile") or
	    return -1;

	foreach my $typeclass (keys(%admission_control)) {
	    my $count = $admission_control{$typeclass};

	    print TYPELIMIT "$typeclass $count\n";
	}
	close(TYPELIMIT);
    }


639 640 641 642 643 644 645 646 647 648 649
    #
    # Snapshot physical resources.
    #
    # if updating (-u), include any resources that may already be
    # allocated to experiment in the PTOP results.
    #
    my $ptopargs = "-p $pid ";
    $ptopargs   .= "-e $eid "
	if ($updating);
    $ptopargs   .= "-u "
	if ($updating && $experiment->elabinelab());
650 651
    $ptopargs   .= "-m $mfactor "
	if (defined($mfactor));
652 653
    $ptopargs   .= "-C "
	if ($usecontrol);
654 655 656 657 658 659
    $ptopargs   .= "-v "
	if ($vtop->virtnodecount());
    $ptopargs   .= "-r "
	if ($vtop->remotenodecount());
    $ptopargs   .= "-S "
	if ($vtop->simnodecount());
660 661
    $ptopargs   .= "-h "
	if ($vtop->sharednodecount());
662 663
    $ptopargs   .= "-b "
	if ($vtop->bstorecount());
664
    $ptopargs	.= "-a "
665
    	if ($precheck || $allnodesfree);
666 667
    $ptopargs	.= "-c " . $experiment->delay_capacity() . " "
    	if (defined($experiment->delay_capacity()));
668 669 670
    if ($userspec == 1) {
	$ptopargs .= "-x -g 2 ";
    }
671 672
    if ($use_old_ptopgen == 1) {
	$ptopargs .= "-Z ";
673
    }
674 675
    $ptopargs .= "-z "
	if ($project->IsNonLocal() || $vtop->sharednodecount());
676 677
    $ptopargs .= "-l $typelimitfile"
	if (scalar(keys(%admission_control)));
678 679 680 681 682

    chat("ptopargs: '$ptopargs'\n");
    TBDebugTimeStamp("ptopgen started");
    system("$PTOPGEN $ptopargs > $ptopfile");
    if ($?) {
683 684
	tberror("Failure in ptopgen\n");
	return -1;
685
    }
686
    system("/bin/cp -fp $ptopfile ${pid}-${eid}.ptop") if ($debug);
687 688 689
    TBDebugTimeStamp("ptopgen finished");

    # Run assign
690
    my $cmd  = "assign";
691 692
    $cmd .= "-new"
	if ($newassign);
693
    my $args = "";
694 695 696 697 698 699
    if ($XERCES) {
	$args .= "-f rspec " 
	    if ($userspec == 1);
	$args .= "-f text "
	    if ($userspec == 0);
    }
700
    $args .= "$ptopfile $vtopfile";
701
    $args = "-P $args"
702
	if (!$vtop->sharednodecount());
703 704
    $args = "-F $args"
	if (!$updating);
705 706 707 708
    $args = "-uod -c .75 $args"
	if ($vtop->virtnodecount() || $vtop->simnodecount());
    $args = "-n $args"
    	if ($precheck);
709 710
    $args = "-s 123456 $args"
	if ($regression);
711
    $args = "-R $args PN=1.0"
Leigh B Stoller's avatar
Leigh B Stoller committed
712
	if (0 && $vtop->sharednodecount());
713
    
714
    # The prepass speeds up assign on big topos with virtual nodes.
Leigh B Stoller's avatar
Leigh B Stoller committed
715
    if ($prepass || $useprepass) {
716 717 718 719
    	$cmd = "assign_prepass";
    	$args = "-m $mfactor $args"
    	    if (defined($mfactor));
    }
720 721 722 723 724 725
    chat("assign command: '$cmd $args'\n");

    #
    # Fork a child to run assign. Parent spins watching the cancel flag
    # and waiting for assign to finish.
    #
726
    TBDebugTimeStamp("assign started");
727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
    if (my $childpid = fork()) {
	while (1) {
	    sleep(2);

	    if (waitpid($childpid, &WNOHANG) == $childpid) {
		$assignexitcode = $? >> 8;
		last;
	    }

	    # Check cancel flag.
	    if ($experiment->canceled()) {
		if ((my $pgrp = getpgrp($childpid)) > 0) {
		    kill('TERM', -$pgrp);
		    waitpid($childpid, 0);

742 743 744
		    tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
			     error => ['cancel_flag']},
			    "Cancel flag set; aborting assign run!\n");
745 746 747 748 749 750 751 752 753 754 755 756
		    return -1;
		}
		# Loop again to reap child above before exit.
	    }
	}
    }
    else {
	#
	# Change our session so the child can get a killpg without killing
	# the parent. 
	#
        POSIX::setsid();
757
	exec("nice $WRAPPER2 $cmd $args > $assignlog 2>&1");
758 759
	die("Could not start assign!\n");
    }
760
    TBDebugTimeStamp("assign finished");
761 762 763

    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
764 765 766
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
767 768 769 770
	return -1;
    }

    # Check for possible full filesystem ...
771 772
    if (-z $assignlog) {
	tbnotice("$assignlog is zero length! Stopping ...\n");
773 774 775 776 777 778 779 780 781
	return -1;
    }

    #
    # Saving up assign.log coz each swapin/modify is different and it
    # is nice to have every mapping for debugging and archiving
    # purposes We do not call it .log though, since we do not want it
    # copied out to the user directory every swapin. See Experiment.pm
    #
782 783
    system("/bin/cp -fp $assignlog ${prefix}.assign");
    system("/bin/cp -fp $assignlog ${pid}-${eid}.assign") if ($debug);
784 785

    if ($assignexitcode) {
786 787
	print "Assign exited with $assignexitcode\n" if ($debug);
	
788
	system("/bin/cat $assignlog");
789 790 791 792 793 794 795 796
	#
	# assign returns two positive error codes (that we care about).
	# The distinction between them is somewhat murky. An exitval of
	# 1 means "retryable" while 2 means "unretryable". The former
	# means we can try again, while the later says there is no possible
	# way to map it. We pass this back to the caller so that we know
	# to exit the loop or try again.
	#
797 798 799 800 801 802 803
	return (($assignexitcode == 1) ? 1 : -1);
    }
    #
    # If we were doing the precheck, go ahead and exit now - there is no
    # useful information to parse out
    #
    if ($precheck) {
804
	chat("Precheck succeeded.\n");
805 806
	return 0;
    }
807
  skiprun:
808
    chat("Reading assign results.\n");
809
    if (!open(ASSIGNFP, $assignlog)) {
810 811 812
	print("Could not open assign logfile! $!\n");
	return -1;
    }
813
    TBDebugTimeStamp("ReadSolution started");
814
    if ($vtop->ReadTextSolution(*ASSIGNFP) != 0) {
815 816 817 818
	print("Could not parse assign logfile! $!\n");
	return -1;
    }
    close(ASSIGNFP);
819
    TBDebugTimeStamp("ReadSolution ended");
820
  skipassign:
821
    
822
    if (defined($vtop->genicount())) {
823 824 825 826 827 828 829 830
	TBDebugTimeStamp("Map Geni Resources Started");
	if ($vtop->MapResources() != 0) {
	    print("Could not map external resources! $!\n");
	    return -1;
	}
	TBDebugTimeStamp("Map Geni Resources ended");
    }
    TBDebugTimeStamp("InterpNodes Started");
831 832 833 834
    if ($vtop->InterpNodes() != 0) {
	print("Could not interpret nodes.\n");
	return -1;
    }
835
    TBDebugTimeStamp("InterpNodes ended, AllocNodes Started");
836 837
    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
838 839 840
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
841 842
	return -1;
    }
843
    my $retval = $vtop->AllocNodes();
844 845 846
    return $retval
	if ($retval != 0);

847
    TBDebugTimeStamp("AllocNodes ended, InterpLinks Started");
848 849 850 851
    if ($vtop->InterpLinks() != 0) {
	print("Could not setup links\n");
	return -1;
    }
852
    TBDebugTimeStamp("InterpLinks ended, InitializePhysNodes Started");
853 854 855 856
    if ($vtop->InitializePhysNodes() != 0) {
	print("Could not InitializePhysNodes\n");
	return -1;
    }
857
    TBDebugTimeStamp("InitializePhysNodes ended");
858
    if (! ($impotent || $regression || $alloconly)) {
859 860 861 862 863 864 865
	TBDebugTimeStamp("ReserveSharedBandwidth started");
	if ($experiment->ReserveSharedBandwidth($updating) != 0) {
	    print("Could not reserve shared bandwidth\n");
	    return -1;
	}
	TBDebugTimeStamp("ReserveSharedBandwidth ended");
    }
866 867 868 869 870 871 872 873 874 875 876 877 878 879 880
    return 0;
}

#
# In regression mode we want to save  the physical state and then clear
# the physical resources. 
#
sub FinalizeRegression($)
{
    my ($error) = @_;
    my $cwd;

    chomp($cwd = `/bin/pwd`);
    if (!$error) {
	chat("Saving physical state in regression mode\n");
881 882 883 884
	if (system("/bin/rm -rf $pid-$eid.pstate")) {
	    tberror("Could not clean physical state directory\n");
	    return -1;
	}
885 886
	if ($experiment->BackupPhysicalState("$cwd/$pid-$eid.pstate", 1)
	    != 0) {
887 888 889
	    print STDERR "Could not save physical state!\n";
	    exit(1);
	}
890 891 892 893 894 895 896 897 898 899 900 901
    
	# Generate a vtop file with all resources fixed.
	chat("Generating new vtop file with all resources fixed.\n");
	if (! open(VTOPFILE, "> $pid-$eid.fixed")) {
	    tberror("Could not open $pid-$eid.fixed: $!\n");
	    return -1;
	}
	if ($vtop->PrintTop(*VTOPFILE) != 0) {
	    tberror("Could not print fixed vtop file for $experiment\n");
	    return -1;
	}
	close(VTOPFILE);
902
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
903 904 905
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents();
    
906 907 908
    return 0
	if ($noregfree);
    
909 910 911 912 913 914 915 916 917 918 919 920 921 922
    chat("Removing physical state in regression mode\n");
    if ($experiment->RemovePhysicalState() != 0) {
	print STDERR "Could not remove physical state!\n";
	exit(1);
    }

    if ($vtop->newreservednodes()) {
	my @newreservednodes = $vtop->newreservednodes();
	system("export NORELOAD=1; $NFREE -x $pid $eid @newreservednodes");
	if ($?) {
	    exit(1);
	}
    }
    return 0;
923
}
924

925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
#
# Print a solution in rspec format.
#
sub PrintSolution()
{
    my $output = undef;
    
    if ($outfile ne "-") {
	if (! open(OUTFILE, "> $outfile")) {
	    tberror("Could not open $outfile: $!\n");
	    return -1;
	}
	$output = *OUTFILE;
    }
    if ($vtop->PrintSolution($output) != 0) {
	tberror("Could not print solution for $experiment\n");
	return -1;
    }
    close($output)
	if (defined($output));
    return 0;
}

948 949 950 951
# We will come through here no matter how we exit.
END {
    # Watch for getting here cause of a die()/exit() statement someplace.
    my $exitcode = $?;
952

953 954 955 956 957 958 959 960
    #
    # Do not want to leave this around, it will lock the pool daemon out.
    #
    if ($gotlock) {
	DBQueryFatal("update emulab_locks set value=value-1 ".
		     "where name='pool_daemon'");
	$gotlock = 0;
    }
961
    if ($seriallock) {
962
	libEmulab::EmulabCountUnlock("mapperlock");
963 964
	$seriallock = 0;
    }
965

966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981
    if ($exitcode && $exitcode != $WRAPPER_FAILED) {
	$exitcode = $WRAPPER_FAILED|$WRAPPER_FAILED_FATALLY;
    }
    
    if ($warnings > 0) {
	tberror("$warnings warnings.\n");

	$exitcode |= $WRAPPER_FAILED;
    }

    # Set recover bit if we are going to fail.
    $exitcode = $exitcode|$WRAPPER_FAILED_CANRECOVER
	if ($exitcode && !$NoRecover);

    # And change the exitcode to be what we want it to be.
    $? = $exitcode;
982 983 984 985
}

sub fatal(@)
{
986 987 988 989 990 991 992 993 994 995 996 997 998
    #
    # Free any newly reserved nodes (in update mode) so that tbswap knows
    # it is safe to recover the experiment. If we bypass this and leave
    # through the END block then NoRecover will still be set and tbswap
    # will know to swap the experiment out. 
    #
    if ($updating) {
	if (defined($vtop)) {
	    my @newreservednodes = $vtop->newreservednodes();
	    my $NoRecover = $vtop->norecover();
	
	    if (@newreservednodes) {
		$NoRecover = 0
Leigh B. Stoller's avatar
Leigh B. Stoller committed
999
		    if (system("$NFREE -x $pid $eid @newreservednodes") == 0);
1000 1001 1002 1003 1004 1005 1006 1007
	    }
	    else {
		# When not updating this is meaningless to tbswap.
		$NoRecover = 0;
	    }
	}
    }
    
1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
    tberror(@_);

    # We next go to the END block above.
    exit($WRAPPER_FAILED);
}
sub debug($)
{
    if ($verbose) {
	print $_[0];
    }
}
sub chat($)
{
    if (! $quiet) {
	print $_[0];
    }
}