mapper.in 27.5 KB
Newer Older
1 2
#!/usr/bin/perl -w
#
3
# Copyright (c) 2000-2016 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#
use strict;
use English;
use Getopt::Std;
use POSIX qw(setsid ceil);
use POSIX ":sys_wait_h";

#
# This function as the main assign loop.  It converts the virtual
# topology into a top input including LAN and delay translation.  It
# then snapshots the current testbed physical state and runs assign,
# looping a couple times if assign fails.  When assign successfully
# completes it will interpret the results.  Attempt to match any
# existing portmap entries and then update the delays and vlans table.
#
# XXX Internally created nodes (jailhost,delay,sim) are not treated
#     consistently. Needs more thought.
#
# Return codes: We catch all errors with the END block below, and if
# anyone calls die() (exit value is 255) we add in the CANRECOVER bit.
# Failures in assign always cause the caller to stop retrying. 
#
# The CANRECOVER bit indicates 'recoverability' (no db or physical
46
# state was modified by the time the error occurred). This is relevant
47 48 49 50 51 52 53 54 55 56 57
# to only modify operations (update).
#
my $WRAPPER_SUCCESS		 = 0x00;
my $WRAPPER_FAILED		 = 0x01;	# Failed (Add other values)
my  $WRAPPER_FAILED_CANRECOVER   = 0x40;        # Can recover from update
my  $WRAPPER_FAILED_FATALLY      = 0x80;	# Do not use this.
# Set this once we modify DB state; forces no recover in fatal().
my $NoRecover = 0;

sub usage ()
{
58
    print STDERR "Usage: $0 [-v] [-u [-f] | -n] [-z] pid eid\n";
59 60 61 62
    print STDERR " -v   - Enables verbose output\n";
    print STDERR " -u   - Enables update mode\n";
    print STDERR " -f   - Fix current resources during update mode\n";
    print STDERR " -n   - Run assign, but do not reserve/modify resources.\n";
63
    print STDERR " -r   - Regression mode.\n";
64 65
    print STDERR " -x   - Turn on the prepass\n";
    print STDERR " -m   - Set the multiplex factor; overrides experiment.\n";
66 67
    print STDERR " -p   - Do a precheck for mapability on an empty testbed - ".
		 "implies -n\n";
68
    print STDERR " -l   - Use rspec v2 instead of the text file format\n";
69
#    print STDERR " -z   - Force new ptopgen\n";
70
    print STDERR " -Z   - Force old ptopgen\n";
71
    print STDERR " -A   - Tell ptopgen all nodes are free; only with -n\n";
72 73
    exit($WRAPPER_FAILED);
}
74
my $optlist    = "dvunfprqczxm:ko:altzZACFNL:S:G";
75 76 77
my $verbose    = 0;
my $debug      = 0;
my $fixmode    = 0;
78
my $fixlannodes= 0;
79 80 81
my $updating   = 0;
my $impotent   = 0;
my $precheck   = 0;
82
my $allnodesfree = 0;
Leigh B Stoller's avatar
Leigh B Stoller committed
83
my $toponly    = 0;
84
my $prepass    = 0;
85
my $alloconly  = 0;
86
my $gblinkcheck= 0;
87
my $outfile;
88
my $mfactor;
89
my $packoption;
90
my $regression = 0;
91
my $noassign   = 0;  # Only with regression mode, use previous solution.
92 93
my $noregfree  = 0;  # Only with regression mode, leave physical state at end.
my $usecurrent = 0;  # Only with regression mode, use current solution.
94
my $assignfile;
95 96 97
my $quiet      = 0;
my $clear      = 0;
my $warnings   = 0;
98
my $maxrun     = 3;  # Maximum number of times we run assign.
99
my $gotlock    = 0;
100
my $seriallock = 0;
101
my $userspec   = 0;
102
my $usecontrol = 0;
103
my $use_old_ptopgen  = 0;
104
my $vtop;
105 106 107 108 109

#
# Configure variables
#
my $TB		= "@prefix@";
110
my $MAINSITE    = @TBMAINSITE@;
111
my $DBNAME	= "@TBDBNAME@";
112 113 114 115
my $TBOPS       = "@TBOPSEMAIL@";
my $ASSIGN      = "$TB/libexec/assign";
my $WRAPPER2    = "$TB/libexec/assign_wrapper2";
my $PTOPGEN     = "$TB/libexec/ptopgen";
116
my $PTOPGEN_NEW = "$TB/libexec/ptopgen_new";
117
my $VTOPGEN     = "$TB/bin/vtopgen";
118
my $NFREE       = "$TB/bin/nfree";
119
my $XERCES	= "@HAVE_XERCES@";
120 121 122 123 124 125 126 127 128 129

#
# Load the Testbed support stuff. 
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use libtblog;
use libvtop;
use libadminctrl;
130
use libEmulab;
131
use User;
132
use EmulabFeatures;
133 134 135 136 137 138

# Protos
sub fatal(@);
sub debug($);
sub chat($);
sub RunAssign($$);
139
sub FinalizeRegression($);
140 141
sub AssignLoop();
sub MapperWrapper();
142
sub PrintSolution();
143 144 145 146 147 148

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
149 150 151 152 153 154 155 156 157 158
$| = 1;

#
# We want warnings to cause assign_wrapper to exit abnormally.
# 
# FIXME: Is this still needed.  "warn" is only used once.  Also this
#  will cause perl internal warnings (such as "Use of uninitialized
#  value ..."  to cause assign_wrapper to fail. -- kevina
#
$SIG{__WARN__} = sub { tbwarn $_[0];$warnings++; };
159 160 161 162 163 164 165 166 167 168 169 170 171

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV < 2) {
    usage();
}
if (defined($options{"v"})) {
172
    TBDebugTimeStampsOn();
173 174
    $verbose++;
}
175 176 177
if (defined($options{"a"})) {
    $alloconly++;
}
178 179 180
if (defined($options{"A"})) {
    $allnodesfree++;
}
181 182 183
if (defined($options{"d"})) {
    $debug++;
}
184 185 186
if (defined($options{"u"})) {
    $updating = 1;
}
Leigh B Stoller's avatar
Leigh B Stoller committed
187 188 189 190
if (defined($options{"t"})) {
    $toponly = 1;
    $quiet   = 1;
}
191 192 193
if (defined($options{"n"})) {
    $impotent = 1;
}
194 195 196 197 198 199
if (defined($options{"N"})) {
    $noassign = 1;
}
if (defined($options{"L"})) {
    $assignfile = $options{"L"};
}
200 201 202
if (defined($options{"f"})) {
    $fixmode = 1;
}
203 204 205
if (defined($options{"F"})) {
    $fixlannodes = 1;
}
206 207 208
if (defined($options{"G"})) {
    $gblinkcheck = 1;
}
209 210 211 212 213
if (defined($options{"S"})) {
    $packoption = $options{"S"};
    fatal("Bad -S option; must be pack or balance")
	if (! ($packoption eq "balance" || $packoption eq "pack"));
}
214 215 216
if (defined($options{"p"})) {
    $precheck = 1;
}
217 218 219
if (defined($options{"x"})) {
    $prepass = 1;
}
220 221 222
if (defined($options{"o"})) {
    $outfile = $options{"o"};
}
223 224 225
if (defined($options{"m"})) {
    $mfactor = $options{"m"};
}
226 227 228 229 230
if (defined($options{"r"})) {
    if ($DBNAME eq "tbdb") {
	fatal("Cannot use regression mode on main DB");
    }
    $regression = 1;
Leigh B Stoller's avatar
Leigh B Stoller committed
231 232
    $clear      = 1
	if (!defined($options{"t"}));
233
    $fixmode    = 1;
234
    TBDebugTimeStampsOn();
235
    $usecurrent = 1
236
	if (defined($options{"z"}));
237 238
    $noregfree = 1
	if (defined($options{"k"}));
239 240 241 242 243 244 245
}
if (defined($options{"q"})) {
    $quiet = 1;
}
if (defined($options{"c"})) {
    $clear = 1;
}
246 247 248
if (defined($options{"C"})) {
    $usecontrol = 1;
}
249
if (defined($options{"l"})) {
250 251 252 253 254
    if ($XERCES) {
	$userspec = 1;
    } else {
	fatal("Rspec v2 support requires that Xerces be installed");
    }
255
}
256 257 258
if (defined($options{"Z"})) {
    $use_old_ptopgen = 1;
}
259
if (defined($options{"z"})) {
260 261
#    $use_old_ptopgen = 0;
#    $PTOPGEN = $PTOPGEN_NEW;
262
}
263 264 265
if ($allnodesfree && !$impotent) {
    fatal("Not allowed to use -A without -n (impotent) option");
}
266 267 268 269 270 271 272

my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    fatal("Could not lookup experiment object $pid,$eid!")
}
273 274
my $project = $experiment->GetProject();

275 276 277 278 279 280 281 282 283 284 285
#
# Verify that this person can muck with the experiment.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
}
if (!TBAdmin() &&
    ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
    fatal("You do not have permission to map this experiment!");
}
286
my $real_user = User->RealUser();
287

288 289 290
# multiplex_factor default.
$mfactor = $experiment->multiplex_factor()
    if (!defined($mfactor) && defined($experiment->multiplex_factor()));
Leigh B Stoller's avatar
Leigh B Stoller committed
291 292
# NS file can say to run the prepass.
my $useprepass = $experiment->useprepass();
293 294 295 296 297

my $newassign =
    EmulabFeatures->FeatureEnabled("NewAssign",
				   $this_user,
				   $experiment->GetGroup(), $experiment);
298 299 300 301 302 303 304 305 306 307 308 309 310
if (!$usecontrol) {
    $usecontrol =
	EmulabFeatures->FeatureEnabled("ControlNetVlans",
				       $this_user,
				       $experiment->GetGroup(), $experiment);
    if ($usecontrol) {
	chat("Telling ptopgen to use control network vlans\n");
    }
}
if ($usecontrol && $MAINSITE) {
    $debug   = 1;
    $verbose = 1;
}
311

312 313 314 315 316 317 318
# XXX Hacky!
if ($MAINSITE && $TB ne "/usr/testbed") {
    $debug   = 1;
    $verbose = 1;
    $fixlannodes = $fixmode;
}

319
libvtop::Init($this_user, $experiment->GetGroup(), $experiment);
320
    
321 322 323 324 325 326
#
# These are the flags to the vtop creation code. 
#
my $vtopflags = 0;
$vtopflags |= $libvtop::VTOP_FLAGS_VERBOSE
    if ($verbose);
327 328
$vtopflags |= $libvtop::VTOP_FLAGS_QUIET
    if ($quiet);
329 330 331
$vtopflags |= $libvtop::VTOP_FLAGS_UPDATE
    if ($updating);
$vtopflags |= $libvtop::VTOP_FLAGS_FIXNODES
332
    if ($fixmode || $usecurrent);
333 334
$vtopflags |= $libvtop::VTOP_FLAGS_FIXLANNODES
    if ($fixlannodes);
335 336
$vtopflags |= $libvtop::VTOP_FLAGS_IMPOTENT
    if ($impotent);
337 338
$vtopflags |= $libvtop::VTOP_FLAGS_ALLOCONLY
    if ($alloconly);
339 340
$vtopflags |= $libvtop::VTOP_FLAGS_REGRESSION
    if ($regression);
341

342 343 344 345 346 347 348
MapperWrapper();
if ($regression) {
    if (0) {
	$updating   = 1;
	$fixmode    = 1;
	$clear      = 0;
	$vtopflags |=
349 350
	    ($libvtop::VTOP_FLAGS_UPDATE|$libvtop::VTOP_FLAGS_FIXLANNODES|
	     $libvtop::VTOP_FLAGS_FIXNODES);
351

352 353 354
	MapperWrapper();
    }
    FinalizeRegression(0);
355
}
356 357
PrintSolution()
    if ($outfile);
358
exit(0);
359

360 361
sub MapperWrapper()
{
Leigh B Stoller's avatar
Leigh B Stoller committed
362 363
    chat("Starting the new and improved mapper wrapper.\n")
	if (!$toponly);
364

Leigh B Stoller's avatar
Leigh B Stoller committed
365 366 367 368 369
    # Need to do this cause libvtop will add them again.
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents()
	if ($regression);

370
    TBDebugTimeStamp("Create libvtop started");
371
    $vtop = libvtop->Create($experiment, $this_user, $vtopflags, $real_user);
372 373 374 375
    if (!defined($vtop)) {
	fatal("Could not create vtop structure for $experiment");
    }
    TBDebugTimeStamp("Create libvtop ended");
Leigh B Stoller's avatar
Leigh B Stoller committed
376

Leigh B Stoller's avatar
Leigh B Stoller committed
377 378 379 380 381
    TBDebugTimeStamp("vtopgen started");
    $vtop->CreateVtop() == 0
	or fatal("Could not create vtop for $experiment");
    TBDebugTimeStamp("vtopgen finished");
    
382
    if (!$impotent && !$alloconly && !$toponly && ($updating || $clear)) {
383 384 385 386 387 388 389 390 391 392 393 394
	if ($clear) {
	    chat("Freeing reserved nodes ...\n");
	    system("export NORELOAD=1; $NFREE -x -a $pid $eid") == 0
		or fatal("Could not release nodes.");
	}
	chat("Clearing physical state before updating.\n");
	$experiment->RemovePhysicalState();
	exit(0)
	    if ($clear && !$regression);
    }
    AssignLoop();
    return 0;
395
}
396 397 398 399

#
# The assign loop. 
#
400 401 402 403
sub AssignLoop()
{
    my $currentrun     = 1;
    my $canceled       = 0;
404
    my $progress       = 0;
405 406 407
    my $tried_precheck = 0;
    # Admission control counts
    my %admission_control = ();
408

409 410 411 412 413
    # XXX plab hack - only run assign once on plab topologies, since
    # they're easy to map and the physical topology does not change
    # frequently.
    if ($vtop->plabcount() && $vtop->plabcount == $vtop->virtnodecount()) {
	$maxrun = 2;
414 415
    }

416 417
    TBDebugTimeStamp("mapper loop started");
    while (1) {
418
	chat("Mapper loop $currentrun\n");
419

420
	my $prefix = "$pid-$eid-$$";
421 422

	#
423
	# When precheck is on, we only do one run in impotent mode and exit.
424
	#
425 426 427 428 429
	if ($precheck) {
	    $prefix  .= ".empty";
	    $impotent = 1;
	    chat("Trying assign on an empty testbed.\n");
	}
430

431 432 433
	#
	# Serialize with the pool daemon if using shared nodes.
	#
434
	if ((!($impotent || $regression)) && $vtop->sharednodecount()) {
435
	    while (1) {
436 437 438 439
		#
		# Use a countup/countdown counter, so that multiple mappers
		# can run, but not while the pool_daemon is running.
		#
440
		my $lock_result =
441 442 443 444
		    DBQueryFatal("update emulab_locks set value=value+1 ".
				 "where name='pool_daemon' and value>=0");

		$gotlock = $lock_result->affectedrows;
445 446 447 448 449

		last
		    if ($gotlock);
		
		chat("Waiting for pool daemon lock ...\n");
450
		sleep(10);
451 452
	    }
	}
453
	# Hack for Kirk.
454 455
	if (!($impotent || $regression) &&
	    ($pid eq "PNWorkshop" || $project->IsNonLocal())) {
456
	    while (1) {
457
		if (libEmulab::EmulabCountLock("mapperlock", 4) == 0) {
458 459 460 461 462 463 464 465
		    $seriallock = 1;
		    last;
		}
		chat("Waiting for mapper lock ...\n");
		sleep(5);
	    }
	}
	
466 467 468 469
	#
	# RunAssign returns  0 if successful.
	#           returns -1 if failure, but assign says to stop trying.
	#           returns  1 if failure, but assign says to try again.
470 471
	#           returns  2 if assign succeeds, but no nodes allocated.
	#           returns  3 if assign succeeds, but some nodes allocated.
472 473 474
	#
	my $retval = RunAssign($precheck, $prefix);

475
	if ($gotlock) {
476 477 478
	    DBQueryFatal("update emulab_locks set value=value-1 ".
			 "where name='pool_daemon'");
	    $gotlock = 0;
479
	}
480
	if ($seriallock) {
481
	    libEmulab::EmulabCountUnlock("mapperlock");
482 483
	    $seriallock = 0;
	}
484

485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
	# Success!
	last
	    if ($retval == 0);

	if ($retval < 0 || $regression) {
	    #
	    # Failure in assign.
	    #
	    FinalizeRegression(1)
		if ($regression);

	    fatal({type => 'primary', severity => SEV_ERROR,
		   error => ['unretriable_assign_error']},
		  "Unretriable error. Giving up.");
	}
500
    
501 502 503
	#
	# When precheck is off, we do a precheck run if the first try fails
	# to find a solution. This avoids looping on an unsolvable topology.
504 505 506
	# But, if the reason we are here is cause we could not allocate nodes,
	# then we found a solution, and so trying on an empty testbed is
	# pointless; it will obviously find a solution again.
507
	#
508 509 510
	if (!$precheck && !$tried_precheck && ($retval == 2 || $retval == 3)) {
	    $tried_precheck = 1;
	}
511 512 513 514 515 516 517
	if (!$precheck && !$tried_precheck) {
	    chat("Trying assign on an empty testbed to verify mapability.\n");
	    my $save_impotent = $impotent;
	    $impotent  = 1;
	    my $retval = RunAssign(1, $prefix . ".empty");
	    if ($retval != 0) {
		fatal({type=>'extra', cause=>'user', severity=>SEV_ERROR,
518 519 520 521 522 523 524 525
	           error=>['infeasible_resource_assignment']}, 
		  "This experiment cannot be instantiated on this ".
                  "testbed. You have most likely asked for hardware ".
                  "this testbed does not have, such as nodes of a type ".
                  "it does not contain, or nodes with too many network ".
                  "interfaces.  You will need to modify this experiment ".
                  "before it can be swapped in - re-submitting the ".
                  "experiment as-is will always result in failure.");
526 527 528 529
	    }
	    chat("Assign succeeded on an empty testbed.\n");
	    $impotent = $save_impotent;
	    $tried_precheck = 1;
530
	}
531 532 533 534
	# We try a minimum number of times, cause the node pool is
	# always changing. But once we hit the maxrun, we continue
	# only if progress on the last loop.
	if ($currentrun >= $maxrun && !$progress) {
535 536 537 538
	    fatal({type => 'primary', severity => SEV_ERROR,
		   error => ['reached_assign_run_limit']},
		  "Reached run limit. Giving up.");
	}
539 540 541
	# See if we made progress or not.
	# Keep going if we allocated some nodes.
	$progress = ($retval == 3);
542

543 544 545 546
	# A little bit of backoff after failure. 
	my $sval = int(rand($currentrun * 3)) + 3;
	chat("Waiting $sval seconds and trying again...\n");
	sleep($sval);
547 548
	$currentrun++;
    }
549
    GatherAssignStats($pid, $eid, %{ $vtop->exptstats() })
Leigh B Stoller's avatar
Leigh B Stoller committed
550
	if (! ($impotent || $alloconly || $regression || $toponly));
551 552
    TBDebugTimeStamp("mapper loop finished");
    return 0;
553 554 555 556 557 558 559 560 561
}

#
# The guts of an assign run.
#
sub RunAssign($$)
{
    my ($precheck, $prefix) = @_;

562
    my $typelimitfile = $prefix .".limits";
563 564
    my $ptopfile = $prefix . ".ptop";
    my $vtopfile = $prefix . ".vtop";
565 566 567 568
    if ($userspec) {
	$ptopfile .= ".xml";
	$vtopfile .= ".xml";
    }
569
    my $assignexitcode = 0;
570
    my $assignlog = "assign.log";
571

Leigh B Stoller's avatar
Leigh B Stoller committed
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
    #
    # Now generate a vtop file and dump it to a file.
    #
    if (! open(VTOPFILE, "> $vtopfile")) {
	tberror("Could not open $vtopfile: $!\n");
	return -1;
    }
    my $reslibvtop;
    if ($userspec == 1) {
	$reslibvtop = $vtop->PrintRspec(*VTOPFILE);
    }
    else {
	$reslibvtop = $vtop->PrintTop(*VTOPFILE);
    }
    if ($reslibvtop != 0) {
	tberror("Could not print vtop file for $experiment\n");
	return -1;
    }
    close(VTOPFILE);
591
    system("/bin/cp -fp $vtopfile ${pid}-${eid}.vtop") if ($debug);
Leigh B Stoller's avatar
Leigh B Stoller committed
592 593 594
    return 0
	if ($toponly);

595 596 597 598 599 600 601 602 603 604 605 606 607 608
    if (! ($impotent || $regression || $alloconly)) {
	if ($experiment->Update({"maximum_nodes" => $vtop->maximum_nodes(),
				 "minimum_nodes" => $vtop->minimum_nodes(),
				 "virtnode_count"=> $vtop->virtnodecount() })){
	    tberror("Could not update min/max/virt nodes for $experiment\n");
	    return -1;
	}
    }
    # New solution each time.
    $vtop->ClearSolution();

    goto skipassign
	if ($vtop->nodecount() == $vtop->genicount());

609 610 611 612 613 614
    # Debugging hack.
    if ($regression || $noassign || $assignfile) {
	$assignlog = $assignfile
	    if ($assignfile);
	
	if (! -e $assignlog) {
615 616 617 618 619 620 621
	    chat("No existing assign results file!\n");
	    return -1;
	}
	chat("Using existing assign results file\n");
	goto skiprun;
    }
    
622 623 624 625
    #
    # Do admission control test, and gather the info.
    #
    my %admission_control;
626 627 628 629 630 631
    if (!$regression) {
	if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control)){
	    tberror("Failed admission control checks!\n");
	    return -1;
	}
    }
632

633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
    #
    # Append this admission control results to ptopgen.
    #
    if (scalar(keys(%admission_control))) {
	open(TYPELIMIT, ">$typelimitfile") or
	    return -1;

	foreach my $typeclass (keys(%admission_control)) {
	    my $count = $admission_control{$typeclass};

	    print TYPELIMIT "$typeclass $count\n";
	}
	close(TYPELIMIT);
    }


649 650 651 652 653 654 655 656 657 658 659
    #
    # Snapshot physical resources.
    #
    # if updating (-u), include any resources that may already be
    # allocated to experiment in the PTOP results.
    #
    my $ptopargs = "-p $pid ";
    $ptopargs   .= "-e $eid "
	if ($updating);
    $ptopargs   .= "-u "
	if ($updating && $experiment->elabinelab());
660 661
    $ptopargs   .= "-m $mfactor "
	if (defined($mfactor));
662 663
    $ptopargs   .= "-C "
	if ($usecontrol);
664 665 666 667 668 669
    $ptopargs   .= "-v "
	if ($vtop->virtnodecount());
    $ptopargs   .= "-r "
	if ($vtop->remotenodecount());
    $ptopargs   .= "-S "
	if ($vtop->simnodecount());
670 671
    $ptopargs   .= "-h "
	if ($vtop->sharednodecount());
672 673
    $ptopargs   .= "-b "
	if ($vtop->bstorecount());
674
    $ptopargs	.= "-a "
675
    	if ($precheck || $allnodesfree);
676 677
    $ptopargs	.= "-c " . $experiment->delay_capacity() . " "
    	if (defined($experiment->delay_capacity()));
678 679 680
    if ($userspec == 1) {
	$ptopargs .= "-x -g 2 ";
    }
681 682
    if ($use_old_ptopgen == 1) {
	$ptopargs .= "-Z ";
683
    }
684 685
    $ptopargs .= "-z "
	if ($project->IsNonLocal() || $vtop->sharednodecount());
686 687 688 689 690 691
    if ($gblinkcheck) {
	if ($vtop->mgblinks() == 0) {
	    $ptopargs .= "-G ";
	}
    }
    $ptopargs .= "-l $typelimitfile "
692
	if (scalar(keys(%admission_control)));
693 694 695 696 697

    chat("ptopargs: '$ptopargs'\n");
    TBDebugTimeStamp("ptopgen started");
    system("$PTOPGEN $ptopargs > $ptopfile");
    if ($?) {
698 699
	tberror("Failure in ptopgen\n");
	return -1;
700
    }
701
    system("/bin/cp -fp $ptopfile ${pid}-${eid}.ptop") if ($debug);
702 703 704
    TBDebugTimeStamp("ptopgen finished");

    # Run assign
705
    my $cmd  = "assign";
706 707
    $cmd .= "-new"
	if ($newassign);
708
    my $args = "";
709 710 711 712 713 714
    if ($XERCES) {
	$args .= "-f rspec " 
	    if ($userspec == 1);
	$args .= "-f text "
	    if ($userspec == 0);
    }
715
    $args .= "$ptopfile $vtopfile";
716
    $args = "-P $args"
717
	if (!$vtop->sharednodecount());
718 719
    $args = "-F $args"
	if (!$updating);
720
    $args = "-uod -c .75 -H 3 $args"
721 722 723
	if ($vtop->virtnodecount() || $vtop->simnodecount());
    $args = "-n $args"
    	if ($precheck);
724 725
    $args = "-S $packoption $args"
    	if (defined($packoption));
726 727
    $args = "-s 123456 $args"
	if ($regression);
728
    $args = "-R $args PN=1.0"
Leigh B Stoller's avatar
Leigh B Stoller committed
729
	if (0 && $vtop->sharednodecount());
730
    
731
    # The prepass speeds up assign on big topos with virtual nodes.
Leigh B Stoller's avatar
Leigh B Stoller committed
732
    if ($prepass || $useprepass) {
733 734 735 736
    	$cmd = "assign_prepass";
    	$args = "-m $mfactor $args"
    	    if (defined($mfactor));
    }
737 738 739 740 741 742
    chat("assign command: '$cmd $args'\n");

    #
    # Fork a child to run assign. Parent spins watching the cancel flag
    # and waiting for assign to finish.
    #
743
    TBDebugTimeStamp("assign started");
744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
    if (my $childpid = fork()) {
	while (1) {
	    sleep(2);

	    if (waitpid($childpid, &WNOHANG) == $childpid) {
		$assignexitcode = $? >> 8;
		last;
	    }

	    # Check cancel flag.
	    if ($experiment->canceled()) {
		if ((my $pgrp = getpgrp($childpid)) > 0) {
		    kill('TERM', -$pgrp);
		    waitpid($childpid, 0);

759 760 761
		    tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
			     error => ['cancel_flag']},
			    "Cancel flag set; aborting assign run!\n");
762 763 764 765 766 767 768 769 770 771 772 773
		    return -1;
		}
		# Loop again to reap child above before exit.
	    }
	}
    }
    else {
	#
	# Change our session so the child can get a killpg without killing
	# the parent. 
	#
        POSIX::setsid();
774
	exec("nice $WRAPPER2 $cmd $args > $assignlog 2>&1");
775 776
	die("Could not start assign!\n");
    }
777
    TBDebugTimeStamp("assign finished");
778 779 780

    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
781 782 783
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
784 785 786 787
	return -1;
    }

    # Check for possible full filesystem ...
788 789
    if (-z $assignlog) {
	tbnotice("$assignlog is zero length! Stopping ...\n");
790 791 792 793 794 795 796 797 798
	return -1;
    }

    #
    # Saving up assign.log coz each swapin/modify is different and it
    # is nice to have every mapping for debugging and archiving
    # purposes We do not call it .log though, since we do not want it
    # copied out to the user directory every swapin. See Experiment.pm
    #
799 800
    system("/bin/cp -fp $assignlog ${prefix}.assign");
    system("/bin/cp -fp $assignlog ${pid}-${eid}.assign") if ($debug);
801 802

    if ($assignexitcode) {
803 804
	print "Assign exited with $assignexitcode\n" if ($debug);
	
805
	system("/bin/cat $assignlog");
806 807 808 809 810 811 812 813
	#
	# assign returns two positive error codes (that we care about).
	# The distinction between them is somewhat murky. An exitval of
	# 1 means "retryable" while 2 means "unretryable". The former
	# means we can try again, while the later says there is no possible
	# way to map it. We pass this back to the caller so that we know
	# to exit the loop or try again.
	#
814 815 816 817 818 819 820
	return (($assignexitcode == 1) ? 1 : -1);
    }
    #
    # If we were doing the precheck, go ahead and exit now - there is no
    # useful information to parse out
    #
    if ($precheck) {
821
	chat("Precheck succeeded.\n");
822 823
	return 0;
    }
824
  skiprun:
825
    chat("Reading assign results.\n");
826
    if (!open(ASSIGNFP, $assignlog)) {
827 828 829
	print("Could not open assign logfile! $!\n");
	return -1;
    }
830
    TBDebugTimeStamp("ReadSolution started");
831
    if ($vtop->ReadTextSolution(*ASSIGNFP) != 0) {
832 833 834 835
	print("Could not parse assign logfile! $!\n");
	return -1;
    }
    close(ASSIGNFP);
836
    TBDebugTimeStamp("ReadSolution ended");
837
  skipassign:
838
    
839
    if (defined($vtop->genicount())) {
840 841 842 843 844 845 846 847
	TBDebugTimeStamp("Map Geni Resources Started");
	if ($vtop->MapResources() != 0) {
	    print("Could not map external resources! $!\n");
	    return -1;
	}
	TBDebugTimeStamp("Map Geni Resources ended");
    }
    TBDebugTimeStamp("InterpNodes Started");
848 849 850 851
    if ($vtop->InterpNodes() != 0) {
	print("Could not interpret nodes.\n");
	return -1;
    }
852
    TBDebugTimeStamp("InterpNodes ended, AllocNodes Started");
853 854
    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
855 856 857
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
858 859
	return -1;
    }
860
    my $retval = $vtop->AllocNodes();
861 862 863
    return $retval
	if ($retval != 0);

864
    TBDebugTimeStamp("AllocNodes ended, InterpLinks Started");
865 866 867 868
    if ($vtop->InterpLinks() != 0) {
	print("Could not setup links\n");
	return -1;
    }
869
    TBDebugTimeStamp("InterpLinks ended, InitializePhysNodes Started");
870 871 872 873
    if ($vtop->InitializePhysNodes() != 0) {
	print("Could not InitializePhysNodes\n");
	return -1;
    }
874
    TBDebugTimeStamp("InitializePhysNodes ended");
875
    if (! ($impotent || $regression || $alloconly)) {
876 877 878 879 880 881 882
	TBDebugTimeStamp("ReserveSharedBandwidth started");
	if ($experiment->ReserveSharedBandwidth($updating) != 0) {
	    print("Could not reserve shared bandwidth\n");
	    return -1;
	}
	TBDebugTimeStamp("ReserveSharedBandwidth ended");
    }
883 884 885 886 887 888 889 890 891 892 893 894 895 896 897
    return 0;
}

#
# In regression mode we want to save  the physical state and then clear
# the physical resources. 
#
sub FinalizeRegression($)
{
    my ($error) = @_;
    my $cwd;

    chomp($cwd = `/bin/pwd`);
    if (!$error) {
	chat("Saving physical state in regression mode\n");
898 899 900 901
	if (system("/bin/rm -rf $pid-$eid.pstate")) {
	    tberror("Could not clean physical state directory\n");
	    return -1;
	}
902 903
	if ($experiment->BackupPhysicalState("$cwd/$pid-$eid.pstate", 1)
	    != 0) {
904 905 906
	    print STDERR "Could not save physical state!\n";
	    exit(1);
	}
907 908 909 910 911 912 913 914 915 916 917 918
    
	# Generate a vtop file with all resources fixed.
	chat("Generating new vtop file with all resources fixed.\n");
	if (! open(VTOPFILE, "> $pid-$eid.fixed")) {
	    tberror("Could not open $pid-$eid.fixed: $!\n");
	    return -1;
	}
	if ($vtop->PrintTop(*VTOPFILE) != 0) {
	    tberror("Could not print fixed vtop file for $experiment\n");
	    return -1;
	}
	close(VTOPFILE);
919
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
920 921 922
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents();
    
923 924 925
    return 0
	if ($noregfree);
    
926 927 928 929 930 931 932 933 934 935 936 937 938 939
    chat("Removing physical state in regression mode\n");
    if ($experiment->RemovePhysicalState() != 0) {
	print STDERR "Could not remove physical state!\n";
	exit(1);
    }

    if ($vtop->newreservednodes()) {
	my @newreservednodes = $vtop->newreservednodes();
	system("export NORELOAD=1; $NFREE -x $pid $eid @newreservednodes");
	if ($?) {
	    exit(1);
	}
    }
    return 0;
940
}
941

942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
#
# Print a solution in rspec format.
#
sub PrintSolution()
{
    my $output = undef;
    
    if ($outfile ne "-") {
	if (! open(OUTFILE, "> $outfile")) {
	    tberror("Could not open $outfile: $!\n");
	    return -1;
	}
	$output = *OUTFILE;
    }
    if ($vtop->PrintSolution($output) != 0) {
	tberror("Could not print solution for $experiment\n");
	return -1;
    }
    close($output)
	if (defined($output));
    return 0;
}

965 966 967 968
# We will come through here no matter how we exit.
END {
    # Watch for getting here cause of a die()/exit() statement someplace.
    my $exitcode = $?;
969

970 971 972 973 974 975 976 977
    #
    # Do not want to leave this around, it will lock the pool daemon out.
    #
    if ($gotlock) {
	DBQueryFatal("update emulab_locks set value=value-1 ".
		     "where name='pool_daemon'");
	$gotlock = 0;
    }
978
    if ($seriallock) {
979
	libEmulab::EmulabCountUnlock("mapperlock");
980 981
	$seriallock = 0;
    }
982

983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998
    if ($exitcode && $exitcode != $WRAPPER_FAILED) {
	$exitcode = $WRAPPER_FAILED|$WRAPPER_FAILED_FATALLY;
    }
    
    if ($warnings > 0) {
	tberror("$warnings warnings.\n");

	$exitcode |= $WRAPPER_FAILED;
    }

    # Set recover bit if we are going to fail.
    $exitcode = $exitcode|$WRAPPER_FAILED_CANRECOVER
	if ($exitcode && !$NoRecover);

    # And change the exitcode to be what we want it to be.
    $? = $exitcode;
999 1000 1001 1002
}

sub fatal(@)
{
1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
    #
    # Free any newly reserved nodes (in update mode) so that tbswap knows
    # it is safe to recover the experiment. If we bypass this and leave
    # through the END block then NoRecover will still be set and tbswap
    # will know to swap the experiment out. 
    #
    if ($updating) {
	if (defined($vtop)) {
	    my @newreservednodes = $vtop->newreservednodes();
	    my $NoRecover = $vtop->norecover();
	
	    if (@newreservednodes) {
		$NoRecover = 0
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1016
		    if (system("$NFREE -x $pid $eid @newreservednodes") == 0);
1017 1018 1019 1020 1021 1022 1023 1024
	    }
	    else {
		# When not updating this is meaningless to tbswap.
		$NoRecover = 0;
	    }
	}
    }
    
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
    tberror(@_);

    # We next go to the END block above.
    exit($WRAPPER_FAILED);
}
sub debug($)
{
    if ($verbose) {
	print $_[0];
    }
}
sub chat($)
{
    if (! $quiet) {
	print $_[0];
    }
}