mapper.in 30.9 KB
Newer Older
1 2
#!/usr/bin/perl -w
#
3
# Copyright (c) 2000-2017 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#
use strict;
use English;
use Getopt::Std;
use POSIX qw(setsid ceil);
use POSIX ":sys_wait_h";

#
# This function as the main assign loop.  It converts the virtual
# topology into a top input including LAN and delay translation.  It
# then snapshots the current testbed physical state and runs assign,
# looping a couple times if assign fails.  When assign successfully
# completes it will interpret the results.  Attempt to match any
# existing portmap entries and then update the delays and vlans table.
#
# XXX Internally created nodes (jailhost,delay,sim) are not treated
#     consistently. Needs more thought.
#
# Return codes: We catch all errors with the END block below, and if
# anyone calls die() (exit value is 255) we add in the CANRECOVER bit.
# Failures in assign always cause the caller to stop retrying. 
#
# The CANRECOVER bit indicates 'recoverability' (no db or physical
46
# state was modified by the time the error occurred). This is relevant
47 48 49 50 51 52 53 54 55 56 57
# to only modify operations (update).
#
my $WRAPPER_SUCCESS		 = 0x00;
my $WRAPPER_FAILED		 = 0x01;	# Failed (Add other values)
my  $WRAPPER_FAILED_CANRECOVER   = 0x40;        # Can recover from update
my  $WRAPPER_FAILED_FATALLY      = 0x80;	# Do not use this.
# Set this once we modify DB state; forces no recover in fatal().
my $NoRecover = 0;

sub usage ()
{
58
    print STDERR "Usage: $0 [-v] [-u [-f] | -n] [-z] pid eid\n";
59 60 61 62
    print STDERR " -v   - Enables verbose output\n";
    print STDERR " -u   - Enables update mode\n";
    print STDERR " -f   - Fix current resources during update mode\n";
    print STDERR " -n   - Run assign, but do not reserve/modify resources.\n";
63
    print STDERR " -r   - Regression mode.\n";
64 65
    print STDERR " -x   - Turn on the prepass\n";
    print STDERR " -m   - Set the multiplex factor; overrides experiment.\n";
66 67
    print STDERR " -p   - Do a precheck for mapability on an empty testbed - ".
		 "implies -n\n";
68
    print STDERR " -l   - Use rspec v2 instead of the text file format\n";
69
#    print STDERR " -z   - Force new ptopgen\n";
70
    print STDERR " -Z   - Force old ptopgen\n";
71
    print STDERR " -A   - Tell ptopgen all nodes are free; only with -n\n";
72 73
    exit($WRAPPER_FAILED);
}
74
my $optlist    = "dvunfprqczxm:ko:altzZACFNL:S:G";
75 76 77
my $verbose    = 0;
my $debug      = 0;
my $fixmode    = 0;
78
my $fixlannodes= 0;
79 80 81
my $updating   = 0;
my $impotent   = 0;
my $precheck   = 0;
82
my $allnodesfree = 0;
Leigh B Stoller's avatar
Leigh B Stoller committed
83
my $toponly    = 0;
84
my $prepass    = 0;
85
my $alloconly  = 0;
86
my $gblinkcheck= 0;
87
my $outfile;
88
my $mfactor;
89
my $packoption;
90
my $regression = 0;
91
my $noassign   = 0;  # Only with regression mode, use previous solution.
92 93
my $noregfree  = 0;  # Only with regression mode, leave physical state at end.
my $usecurrent = 0;  # Only with regression mode, use current solution.
94
my $assignfile;
95 96 97
my $quiet      = 0;
my $clear      = 0;
my $warnings   = 0;
98
my $maxrun     = 3;  # Maximum number of times we run assign.
99
my $gotlock    = 0;
100
my $seriallock = 0;
101
my $userspec   = 0;
102
my $usecontrol = 0;
103
my $use_old_ptopgen  = 0;
104
my $vtop;
105
my $retval;
106 107 108 109 110

#
# Configure variables
#
my $TB		= "@prefix@";
111
my $MAINSITE    = @TBMAINSITE@;
112
my $DBNAME	= "@TBDBNAME@";
113 114 115 116
my $TBOPS       = "@TBOPSEMAIL@";
my $ASSIGN      = "$TB/libexec/assign";
my $WRAPPER2    = "$TB/libexec/assign_wrapper2";
my $PTOPGEN     = "$TB/libexec/ptopgen";
117
my $PTOPGEN_NEW = "$TB/libexec/ptopgen_new";
118
my $VTOPGEN     = "$TB/bin/vtopgen";
119
my $NFREE       = "$TB/bin/nfree";
120
my $XERCES	= "@HAVE_XERCES@";
121 122 123 124 125 126 127 128 129 130

#
# Load the Testbed support stuff. 
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use libtblog;
use libvtop;
use libadminctrl;
131
use libEmulab;
132
use User;
133
use EmulabFeatures;
134
use Reservation;
135 136 137 138 139 140

# Protos
sub fatal(@);
sub debug($);
sub chat($);
sub RunAssign($$);
141
sub FinalizeRegression($);
142
sub AssignLoop();
143
sub MapperWrapper(;$);
144
sub PrintSolution();
145 146 147 148 149 150

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
151 152 153 154 155 156 157 158 159 160
$| = 1;

#
# We want warnings to cause assign_wrapper to exit abnormally.
# 
# FIXME: Is this still needed.  "warn" is only used once.  Also this
#  will cause perl internal warnings (such as "Use of uninitialized
#  value ..."  to cause assign_wrapper to fail. -- kevina
#
$SIG{__WARN__} = sub { tbwarn $_[0];$warnings++; };
161 162 163 164 165 166 167 168 169 170 171 172 173

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV < 2) {
    usage();
}
if (defined($options{"v"})) {
174
    TBDebugTimeStampsOn();
175 176
    $verbose++;
}
177 178 179
if (defined($options{"a"})) {
    $alloconly++;
}
180 181 182
if (defined($options{"A"})) {
    $allnodesfree++;
}
183 184 185
if (defined($options{"d"})) {
    $debug++;
}
186 187 188
if (defined($options{"u"})) {
    $updating = 1;
}
Leigh B Stoller's avatar
Leigh B Stoller committed
189 190 191 192
if (defined($options{"t"})) {
    $toponly = 1;
    $quiet   = 1;
}
193 194 195
if (defined($options{"n"})) {
    $impotent = 1;
}
196 197 198 199 200 201
if (defined($options{"N"})) {
    $noassign = 1;
}
if (defined($options{"L"})) {
    $assignfile = $options{"L"};
}
202 203 204
if (defined($options{"f"})) {
    $fixmode = 1;
}
205 206 207
if (defined($options{"F"})) {
    $fixlannodes = 1;
}
208 209 210
if (defined($options{"G"})) {
    $gblinkcheck = 1;
}
211 212 213 214 215
if (defined($options{"S"})) {
    $packoption = $options{"S"};
    fatal("Bad -S option; must be pack or balance")
	if (! ($packoption eq "balance" || $packoption eq "pack"));
}
216 217 218
if (defined($options{"p"})) {
    $precheck = 1;
}
219 220 221
if (defined($options{"x"})) {
    $prepass = 1;
}
222 223 224
if (defined($options{"o"})) {
    $outfile = $options{"o"};
}
225 226 227
if (defined($options{"m"})) {
    $mfactor = $options{"m"};
}
228 229 230 231 232
if (defined($options{"r"})) {
    if ($DBNAME eq "tbdb") {
	fatal("Cannot use regression mode on main DB");
    }
    $regression = 1;
Leigh B Stoller's avatar
Leigh B Stoller committed
233 234
    $clear      = 1
	if (!defined($options{"t"}));
235
    $fixmode    = 1;
236
    TBDebugTimeStampsOn();
237
    $usecurrent = 1
238
	if (defined($options{"z"}));
239 240
    $noregfree = 1
	if (defined($options{"k"}));
241 242 243 244 245 246 247
}
if (defined($options{"q"})) {
    $quiet = 1;
}
if (defined($options{"c"})) {
    $clear = 1;
}
248 249 250
if (defined($options{"C"})) {
    $usecontrol = 1;
}
251
if (defined($options{"l"})) {
252 253 254 255 256
    if ($XERCES) {
	$userspec = 1;
    } else {
	fatal("Rspec v2 support requires that Xerces be installed");
    }
257
}
258 259 260
if (defined($options{"Z"})) {
    $use_old_ptopgen = 1;
}
261
if (defined($options{"z"})) {
262 263
#    $use_old_ptopgen = 0;
#    $PTOPGEN = $PTOPGEN_NEW;
264
}
265 266 267
if ($allnodesfree && !$impotent) {
    fatal("Not allowed to use -A without -n (impotent) option");
}
268 269 270 271 272 273 274

my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    fatal("Could not lookup experiment object $pid,$eid!")
}
275 276
my $project = $experiment->GetProject();

277 278 279 280 281 282 283 284 285 286 287
#
# Verify that this person can muck with the experiment.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
}
if (!TBAdmin() &&
    ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
    fatal("You do not have permission to map this experiment!");
}
288
my $real_user = User->RealUser();
289

290 291 292
# multiplex_factor default.
$mfactor = $experiment->multiplex_factor()
    if (!defined($mfactor) && defined($experiment->multiplex_factor()));
293 294
$packoption = $experiment->packing_strategy()
    if (!defined($packoption) && defined($experiment->packing_strategy()));
Leigh B Stoller's avatar
Leigh B Stoller committed
295 296
# NS file can say to run the prepass.
my $useprepass = $experiment->useprepass();
297 298 299 300 301

my $newassign =
    EmulabFeatures->FeatureEnabled("NewAssign",
				   $this_user,
				   $experiment->GetGroup(), $experiment);
302 303 304 305 306 307 308 309 310 311 312 313 314
if (!$usecontrol) {
    $usecontrol =
	EmulabFeatures->FeatureEnabled("ControlNetVlans",
				       $this_user,
				       $experiment->GetGroup(), $experiment);
    if ($usecontrol) {
	chat("Telling ptopgen to use control network vlans\n");
    }
}
if ($usecontrol && $MAINSITE) {
    $debug   = 1;
    $verbose = 1;
}
315

316 317 318 319 320 321 322
# XXX Hacky!
if ($MAINSITE && $TB ne "/usr/testbed") {
    $debug   = 1;
    $verbose = 1;
    $fixlannodes = $fixmode;
}

323
libvtop::Init($this_user, $experiment->GetGroup(), $experiment);
324
    
325 326 327 328 329 330
#
# These are the flags to the vtop creation code. 
#
my $vtopflags = 0;
$vtopflags |= $libvtop::VTOP_FLAGS_VERBOSE
    if ($verbose);
331 332
$vtopflags |= $libvtop::VTOP_FLAGS_QUIET
    if ($quiet);
333 334 335
$vtopflags |= $libvtop::VTOP_FLAGS_UPDATE
    if ($updating);
$vtopflags |= $libvtop::VTOP_FLAGS_FIXNODES
336
    if ($fixmode || $usecurrent);
337 338
$vtopflags |= $libvtop::VTOP_FLAGS_FIXLANNODES
    if ($fixlannodes);
339 340
$vtopflags |= $libvtop::VTOP_FLAGS_IMPOTENT
    if ($impotent);
341 342
$vtopflags |= $libvtop::VTOP_FLAGS_ALLOCONLY
    if ($alloconly);
343 344
$vtopflags |= $libvtop::VTOP_FLAGS_REGRESSION
    if ($regression);
345

Leigh B Stoller's avatar
Leigh B Stoller committed
346
if (IsMultiPCArchitecture()) {
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
    my @architectures = PCArchitectures();

    $vtop = libvtop->Create($experiment, $this_user, $vtopflags, $real_user);
    if (!defined($vtop)) {
	fatal("Could not create vtop structure for $experiment");
    }
    $vtop->CreateVtop() == 0
	or fatal("Could not create vtop for $experiment");

    #
    # If all nodes are explicitly typed, we run the mapper normally.
    #
    if ($vtop->AllNodesTyped()) {
	$retval = MapperWrapper();
    }
    else {
363 364
	my $ordered = 0;
	
365 366 367 368 369 370 371 372 373
	#
	# If all of the typed nodes are of one type, then try assign with
	# that type first. In other words, set the order in which to try the
	# different types, according to how many nodes of that architecture.
	#
	my %architectures = map { $_ => 0 } @architectures;

	foreach my $type (keys(%{ $vtop->types() })) {
	    $type = NodeType->Lookup($type);
374

375 376 377 378 379
	    # If no architecture defined for this type, then we do not count.
	    if (defined($type->architecture())) {
		$architectures{$type->architecture()} +=
		    $vtop->types()->{$type->type()};
	    }
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
	    $ordered += $vtop->types()->{$type->type()};
	}
	#
	# If no types specified, and thus no implied ordering, then we
	# fall back to the ordering specified in the sitevar.
	#
	if (!$ordered) {
	    my $string   = GetSiteVar("general/architecture_priority");
	    my @archs    = split(",", $string);
	    my $count    = scalar(@archs);

	    print "Forcing architecture ordering: @archs\n";
	    
	    foreach my $arch (@archs) {
		# Set decreasing count for sort below.
		$architectures{$arch} = $count--;
	    }
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
	}
	# sort the array.
	@architectures = sort { $architectures{$b} <=>
				    $architectures{$a} } keys(%architectures);

	#
	# Run through each architecture.
	#
	foreach my $architecture (@architectures) {
	    $retval = MapperWrapper($architecture);
	    last
		if (!$retval);
	    #
	    # If we managed to reserve any nodes, we have to free them
	    # before moving onto the next architecture. 
	    #
	    if ($vtop->newreservednodes()) {
		my @newreservednodes = $vtop->newreservednodes();
		if (system("$NFREE -x $pid $eid @newreservednodes")) {
416 417
		    # Clear this so that we do not try again in fatal();
		    $vtop->clearnewreserved();
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
		    fatal("Could not release new nodes after $architecture");
		}
		# Kill this, we are done with it now that nodes are released
		# (do not want to release them again). 
		$vtop = undef;
	    }
	}
    }
    if (ref($retval)) {
	fatal(@{ $retval });
    }
}
else {
    my $retval = MapperWrapper();
    if (ref($retval)) {
	fatal(@{ $retval });
    }
}
436 437 438 439 440 441
if ($regression) {
    if (0) {
	$updating   = 1;
	$fixmode    = 1;
	$clear      = 0;
	$vtopflags |=
442 443
	    ($libvtop::VTOP_FLAGS_UPDATE|$libvtop::VTOP_FLAGS_FIXLANNODES|
	     $libvtop::VTOP_FLAGS_FIXNODES);
444

445 446 447
	MapperWrapper();
    }
    FinalizeRegression(0);
448
}
449 450
PrintSolution()
    if ($outfile);
451
exit(0);
452

453
sub MapperWrapper(;$)
454
{
455 456
    my ($architecture) = @_;
    
Leigh B Stoller's avatar
Leigh B Stoller committed
457 458
    chat("Starting the new and improved mapper wrapper.\n")
	if (!$toponly);
459 460
    chat("Forcing untyped nodes to $architecture\n")
	if (defined($architecture));
461

Leigh B Stoller's avatar
Leigh B Stoller committed
462 463 464 465 466
    # Need to do this cause libvtop will add them again.
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents()
	if ($regression);

467
    TBDebugTimeStamp("Create libvtop started");
468
    $vtop = libvtop->Create($experiment, $this_user, $vtopflags, $real_user);
469 470 471 472
    if (!defined($vtop)) {
	fatal("Could not create vtop structure for $experiment");
    }
    TBDebugTimeStamp("Create libvtop ended");
Leigh B Stoller's avatar
Leigh B Stoller committed
473

Leigh B Stoller's avatar
Leigh B Stoller committed
474
    TBDebugTimeStamp("vtopgen started");
475
    $vtop->CreateVtop($architecture) == 0
Leigh B Stoller's avatar
Leigh B Stoller committed
476 477 478
	or fatal("Could not create vtop for $experiment");
    TBDebugTimeStamp("vtopgen finished");
    
479
    if (!$impotent && !$alloconly && !$toponly && ($updating || $clear)) {
480 481 482 483 484 485 486 487 488 489
	if ($clear) {
	    chat("Freeing reserved nodes ...\n");
	    system("export NORELOAD=1; $NFREE -x -a $pid $eid") == 0
		or fatal("Could not release nodes.");
	}
	chat("Clearing physical state before updating.\n");
	$experiment->RemovePhysicalState();
	exit(0)
	    if ($clear && !$regression);
    }
490 491 492 493 494
    if (!$toponly && $gblinkcheck) {
	if ($vtop->gblinks() && $vtop->mgblinks()) {
	    fatal("Not allowed to mix <=1Gb and >1Gb links");
	}
    }
495
    return AssignLoop();
496
}
497 498 499 500

#
# The assign loop. 
#
501 502 503 504
sub AssignLoop()
{
    my $currentrun     = 1;
    my $canceled       = 0;
505
    my $progress       = 0;
506 507 508
    my $tried_precheck = 0;
    # Admission control counts
    my %admission_control = ();
509

510 511 512 513 514
    # XXX plab hack - only run assign once on plab topologies, since
    # they're easy to map and the physical topology does not change
    # frequently.
    if ($vtop->plabcount() && $vtop->plabcount == $vtop->virtnodecount()) {
	$maxrun = 2;
515 516
    }

517 518
    TBDebugTimeStamp("mapper loop started");
    while (1) {
519
	chat("Mapper loop $currentrun\n");
520

521
	my $prefix = "$pid-$eid-$$";
522 523

	#
524
	# When precheck is on, we only do one run in impotent mode and exit.
525
	#
526 527 528 529 530
	if ($precheck) {
	    $prefix  .= ".empty";
	    $impotent = 1;
	    chat("Trying assign on an empty testbed.\n");
	}
531

532 533 534
	#
	# Serialize with the pool daemon if using shared nodes.
	#
535
	if ((!($impotent || $regression)) && $vtop->sharednodecount()) {
536
	    while (1) {
537 538 539 540
		#
		# Use a countup/countdown counter, so that multiple mappers
		# can run, but not while the pool_daemon is running.
		#
541
		my $lock_result =
542 543 544 545
		    DBQueryFatal("update emulab_locks set value=value+1 ".
				 "where name='pool_daemon' and value>=0");

		$gotlock = $lock_result->affectedrows;
546 547 548 549 550

		last
		    if ($gotlock);
		
		chat("Waiting for pool daemon lock ...\n");
551
		sleep(10);
552 553
	    }
	}
554
	# Hack for Kirk.
555
	if (!($impotent || $regression) &&
556
	    ($pid eq "CCNC2017Tutorial" || $project->IsNonLocal())) {
557
	    while (1) {
558
		if (libEmulab::EmulabCountLock("mapperlock", 3) == 0) {
559 560 561 562 563 564 565 566
		    $seriallock = 1;
		    last;
		}
		chat("Waiting for mapper lock ...\n");
		sleep(5);
	    }
	}
	
567 568 569 570
	#
	# RunAssign returns  0 if successful.
	#           returns -1 if failure, but assign says to stop trying.
	#           returns  1 if failure, but assign says to try again.
571 572
	#           returns  2 if assign succeeds, but no nodes allocated.
	#           returns  3 if assign succeeds, but some nodes allocated.
573 574 575
	#
	my $retval = RunAssign($precheck, $prefix);

576
	if ($gotlock) {
577 578 579
	    DBQueryFatal("update emulab_locks set value=value-1 ".
			 "where name='pool_daemon'");
	    $gotlock = 0;
580
	}
581
	if ($seriallock) {
582
	    libEmulab::EmulabCountUnlock("mapperlock");
583 584
	    $seriallock = 0;
	}
585

586 587 588 589 590 591 592 593 594 595 596
	# Success!
	last
	    if ($retval == 0);

	if ($retval < 0 || $regression) {
	    #
	    # Failure in assign.
	    #
	    FinalizeRegression(1)
		if ($regression);

597 598 599
	    return [{type  => 'primary', severity => SEV_ERROR,
		     error => ['unretriable_assign_error']},
		    "Unretriable error. Giving up."];
600
	}
601
    
602 603 604
	#
	# When precheck is off, we do a precheck run if the first try fails
	# to find a solution. This avoids looping on an unsolvable topology.
605 606 607
	# But, if the reason we are here is cause we could not allocate nodes,
	# then we found a solution, and so trying on an empty testbed is
	# pointless; it will obviously find a solution again.
608
	#
609 610 611
	if (!$precheck && !$tried_precheck && ($retval == 2 || $retval == 3)) {
	    $tried_precheck = 1;
	}
612 613 614 615 616 617
	if (!$precheck && !$tried_precheck) {
	    chat("Trying assign on an empty testbed to verify mapability.\n");
	    my $save_impotent = $impotent;
	    $impotent  = 1;
	    my $retval = RunAssign(1, $prefix . ".empty");
	    if ($retval != 0) {
618 619 620 621 622 623 624 625 626
		return [{type=>'extra', cause=>'user', severity=>SEV_ERROR,
			 error=>['infeasible_resource_assignment']}, 
			"This experiment cannot be instantiated on this ".
			"testbed. You have most likely asked for hardware ".
			"this testbed does not have, such as nodes of a type ".
			"it does not contain, or nodes with too many network ".
			"interfaces.  You will need to modify this experiment ".
			"before it can be swapped in - re-submitting the ".
			"experiment as-is will always result in failure."];
627 628 629 630
	    }
	    chat("Assign succeeded on an empty testbed.\n");
	    $impotent = $save_impotent;
	    $tried_precheck = 1;
631
	}
632 633 634 635
	# We try a minimum number of times, cause the node pool is
	# always changing. But once we hit the maxrun, we continue
	# only if progress on the last loop.
	if ($currentrun >= $maxrun && !$progress) {
636 637 638
	    return [{type => 'primary', severity => SEV_ERROR,
		     error => ['reached_assign_run_limit']},
		    "Reached run limit. Giving up."];
639
	}
640 641 642
	# See if we made progress or not.
	# Keep going if we allocated some nodes.
	$progress = ($retval == 3);
643

644 645 646 647
	# A little bit of backoff after failure. 
	my $sval = int(rand($currentrun * 3)) + 3;
	chat("Waiting $sval seconds and trying again...\n");
	sleep($sval);
648 649
	$currentrun++;
    }
650
    GatherAssignStats($pid, $eid, %{ $vtop->exptstats() })
Leigh B Stoller's avatar
Leigh B Stoller committed
651
	if (! ($impotent || $alloconly || $regression || $toponly));
652 653
    TBDebugTimeStamp("mapper loop finished");
    return 0;
654 655 656 657 658 659 660 661 662
}

#
# The guts of an assign run.
#
sub RunAssign($$)
{
    my ($precheck, $prefix) = @_;

663
    my $typelimitfile = $prefix .".limits";
664 665
    my $ptopfile = $prefix . ".ptop";
    my $vtopfile = $prefix . ".vtop";
666 667 668 669
    if ($userspec) {
	$ptopfile .= ".xml";
	$vtopfile .= ".xml";
    }
670
    my $assignexitcode = 0;
671
    my $assignlog = "assign.log";
672

Leigh B Stoller's avatar
Leigh B Stoller committed
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
    #
    # Now generate a vtop file and dump it to a file.
    #
    if (! open(VTOPFILE, "> $vtopfile")) {
	tberror("Could not open $vtopfile: $!\n");
	return -1;
    }
    my $reslibvtop;
    if ($userspec == 1) {
	$reslibvtop = $vtop->PrintRspec(*VTOPFILE);
    }
    else {
	$reslibvtop = $vtop->PrintTop(*VTOPFILE);
    }
    if ($reslibvtop != 0) {
	tberror("Could not print vtop file for $experiment\n");
	return -1;
    }
    close(VTOPFILE);
692
    system("/bin/cp -fp $vtopfile ${pid}-${eid}.vtop") if ($debug);
Leigh B Stoller's avatar
Leigh B Stoller committed
693 694 695
    return 0
	if ($toponly);

696 697 698 699 700 701 702 703 704 705 706 707 708 709
    if (! ($impotent || $regression || $alloconly)) {
	if ($experiment->Update({"maximum_nodes" => $vtop->maximum_nodes(),
				 "minimum_nodes" => $vtop->minimum_nodes(),
				 "virtnode_count"=> $vtop->virtnodecount() })){
	    tberror("Could not update min/max/virt nodes for $experiment\n");
	    return -1;
	}
    }
    # New solution each time.
    $vtop->ClearSolution();

    goto skipassign
	if ($vtop->nodecount() == $vtop->genicount());

710 711 712 713 714 715
    # Debugging hack.
    if ($regression || $noassign || $assignfile) {
	$assignlog = $assignfile
	    if ($assignfile);
	
	if (! -e $assignlog) {
716 717 718 719 720 721 722
	    chat("No existing assign results file!\n");
	    return -1;
	}
	chat("Using existing assign results file\n");
	goto skiprun;
    }
    
723 724 725 726
    #
    # Do admission control test, and gather the info.
    #
    my %admission_control;
727 728 729 730 731 732
    if (!$regression) {
	if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control)){
	    tberror("Failed admission control checks!\n");
	    return -1;
	}
    }
733

734
    # Bound the results to avoid future reservation conflicts.
735
    Reservation->FlushAll();
736 737 738 739 740 741 742 743 744 745 746
    my $reservation_bounds = Reservation->MaxSwapInMap( $experiment );
    foreach my $type ( keys( %$reservation_bounds ) ) {
	if( exists( $admission_control{ $type } ) ) {
	    if( $reservation_bounds->{ $type } < $admission_control{ $type } ) {
		$admission_control{ $type } = $reservation_bounds->{ $type };
	    }
	} else {
	    $admission_control{ $type } = $reservation_bounds->{ $type };
	}
    }
    
747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
    #
    # Append this admission control results to ptopgen.
    #
    if (scalar(keys(%admission_control))) {
	open(TYPELIMIT, ">$typelimitfile") or
	    return -1;

	foreach my $typeclass (keys(%admission_control)) {
	    my $count = $admission_control{$typeclass};

	    print TYPELIMIT "$typeclass $count\n";
	}
	close(TYPELIMIT);
    }


763 764 765 766 767 768 769 770 771 772 773
    #
    # Snapshot physical resources.
    #
    # if updating (-u), include any resources that may already be
    # allocated to experiment in the PTOP results.
    #
    my $ptopargs = "-p $pid ";
    $ptopargs   .= "-e $eid "
	if ($updating);
    $ptopargs   .= "-u "
	if ($updating && $experiment->elabinelab());
774 775
    $ptopargs   .= "-m $mfactor "
	if (defined($mfactor));
776 777
    $ptopargs   .= "-C "
	if ($usecontrol);
778 779 780 781 782 783
    $ptopargs   .= "-v "
	if ($vtop->virtnodecount());
    $ptopargs   .= "-r "
	if ($vtop->remotenodecount());
    $ptopargs   .= "-S "
	if ($vtop->simnodecount());
784 785
    $ptopargs   .= "-h "
	if ($vtop->sharednodecount());
786 787
    $ptopargs   .= "-b "
	if ($vtop->bstorecount());
788
    $ptopargs	.= "-a "
789
    	if ($precheck || $allnodesfree);
790 791
    $ptopargs	.= "-c " . $experiment->delay_capacity() . " "
    	if (defined($experiment->delay_capacity()));
792 793 794
    if ($userspec == 1) {
	$ptopargs .= "-x -g 2 ";
    }
795 796
    if ($use_old_ptopgen == 1) {
	$ptopargs .= "-Z ";
797
    }
798 799
    $ptopargs .= "-z "
	if ($project->IsNonLocal() || $vtop->sharednodecount());
800 801 802 803 804 805
    if ($gblinkcheck) {
	if ($vtop->mgblinks() == 0) {
	    $ptopargs .= "-G ";
	}
    }
    $ptopargs .= "-l $typelimitfile "
806
	if (scalar(keys(%admission_control)));
807 808 809 810 811

    chat("ptopargs: '$ptopargs'\n");
    TBDebugTimeStamp("ptopgen started");
    system("$PTOPGEN $ptopargs > $ptopfile");
    if ($?) {
812 813
	tberror("Failure in ptopgen\n");
	return -1;
814
    }
815
    system("/bin/cp -fp $ptopfile ${pid}-${eid}.ptop") if ($debug);
816 817 818
    TBDebugTimeStamp("ptopgen finished");

    # Run assign
819
    my $cmd  = "assign";
820 821
    $cmd .= "-new"
	if ($newassign);
822
    my $args = "";
823 824 825 826 827 828
    if ($XERCES) {
	$args .= "-f rspec " 
	    if ($userspec == 1);
	$args .= "-f text "
	    if ($userspec == 0);
    }
829
    $args .= "$ptopfile $vtopfile";
830
    $args = "-P $args"
831
	if (!$vtop->sharednodecount());
832 833
    $args = "-F $args"
	if (!$updating);
834
    $args = "-uod -c .75 -H 3 $args"
835 836 837
	if ($vtop->virtnodecount() || $vtop->simnodecount());
    $args = "-n $args"
    	if ($precheck);
838 839
    $args = "-S $packoption $args"
    	if (defined($packoption));
840 841
    $args = "-s 123456 $args"
	if ($regression);
842
    $args = "-R $args PN=1.0"
Leigh B Stoller's avatar
Leigh B Stoller committed
843
	if (0 && $vtop->sharednodecount());
844
    
845
    # The prepass speeds up assign on big topos with virtual nodes.
Leigh B Stoller's avatar
Leigh B Stoller committed
846
    if ($prepass || $useprepass) {
847 848 849 850
    	$cmd = "assign_prepass";
    	$args = "-m $mfactor $args"
    	    if (defined($mfactor));
    }
851 852 853 854 855 856
    chat("assign command: '$cmd $args'\n");

    #
    # Fork a child to run assign. Parent spins watching the cancel flag
    # and waiting for assign to finish.
    #
857
    TBDebugTimeStamp("assign started");
858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
    if (my $childpid = fork()) {
	while (1) {
	    sleep(2);

	    if (waitpid($childpid, &WNOHANG) == $childpid) {
		$assignexitcode = $? >> 8;
		last;
	    }

	    # Check cancel flag.
	    if ($experiment->canceled()) {
		if ((my $pgrp = getpgrp($childpid)) > 0) {
		    kill('TERM', -$pgrp);
		    waitpid($childpid, 0);

873 874 875
		    tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
			     error => ['cancel_flag']},
			    "Cancel flag set; aborting assign run!\n");
876 877 878 879 880 881 882 883 884 885 886 887
		    return -1;
		}
		# Loop again to reap child above before exit.
	    }
	}
    }
    else {
	#
	# Change our session so the child can get a killpg without killing
	# the parent. 
	#
        POSIX::setsid();
888
	exec("nice $WRAPPER2 $cmd $args > $assignlog 2>&1");
889 890
	die("Could not start assign!\n");
    }
891
    TBDebugTimeStamp("assign finished");
892 893 894

    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
895 896 897
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
898 899 900 901
	return -1;
    }

    # Check for possible full filesystem ...
902 903
    if (-z $assignlog) {
	tbnotice("$assignlog is zero length! Stopping ...\n");
904 905 906 907 908 909 910 911 912
	return -1;
    }

    #
    # Saving up assign.log coz each swapin/modify is different and it
    # is nice to have every mapping for debugging and archiving
    # purposes We do not call it .log though, since we do not want it
    # copied out to the user directory every swapin. See Experiment.pm
    #
913 914
    system("/bin/cp -fp $assignlog ${prefix}.assign");
    system("/bin/cp -fp $assignlog ${pid}-${eid}.assign") if ($debug);
915 916

    if ($assignexitcode) {
917 918
	print "Assign exited with $assignexitcode\n" if ($debug);
	
919
	system("/bin/cat $assignlog");
920 921 922 923 924 925 926 927
	#
	# assign returns two positive error codes (that we care about).
	# The distinction between them is somewhat murky. An exitval of
	# 1 means "retryable" while 2 means "unretryable". The former
	# means we can try again, while the later says there is no possible
	# way to map it. We pass this back to the caller so that we know
	# to exit the loop or try again.
	#
928 929 930 931 932 933 934
	return (($assignexitcode == 1) ? 1 : -1);
    }
    #
    # If we were doing the precheck, go ahead and exit now - there is no
    # useful information to parse out
    #
    if ($precheck) {
935
	chat("Precheck succeeded.\n");
936 937
	return 0;
    }
938
  skiprun:
939
    chat("Reading assign results.\n");
940
    if (!open(ASSIGNFP, $assignlog)) {
941 942 943
	print("Could not open assign logfile! $!\n");
	return -1;
    }
944
    TBDebugTimeStamp("ReadSolution started");
945
    if ($vtop->ReadTextSolution(*ASSIGNFP) != 0) {
946 947 948 949
	print("Could not parse assign logfile! $!\n");
	return -1;
    }
    close(ASSIGNFP);
950
    TBDebugTimeStamp("ReadSolution ended");
951
  skipassign:
952
    
953
    if (defined($vtop->genicount())) {
954 955 956 957 958 959 960 961
	TBDebugTimeStamp("Map Geni Resources Started");
	if ($vtop->MapResources() != 0) {
	    print("Could not map external resources! $!\n");
	    return -1;
	}
	TBDebugTimeStamp("Map Geni Resources ended");
    }
    TBDebugTimeStamp("InterpNodes Started");
962 963 964 965
    if ($vtop->InterpNodes() != 0) {
	print("Could not interpret nodes.\n");
	return -1;
    }
966
    TBDebugTimeStamp("InterpNodes ended, AllocNodes Started");
967 968
    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
969 970 971
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
972 973
	return -1;
    }
974
    my $retval = $vtop->AllocNodes();
975 976 977
    return $retval
	if ($retval != 0);

978
    TBDebugTimeStamp("AllocNodes ended, InterpLinks Started");
979 980 981 982
    if ($vtop->InterpLinks() != 0) {
	print("Could not setup links\n");
	return -1;
    }
983
    TBDebugTimeStamp("InterpLinks ended, InitializePhysNodes Started");
984 985 986 987
    if ($vtop->InitializePhysNodes() != 0) {
	print("Could not InitializePhysNodes\n");
	return -1;
    }
988
    TBDebugTimeStamp("InitializePhysNodes ended");
989
    if (! ($impotent || $regression || $alloconly)) {
990 991 992 993 994 995 996
	TBDebugTimeStamp("ReserveSharedBandwidth started");
	if ($experiment->ReserveSharedBandwidth($updating) != 0) {
	    print("Could not reserve shared bandwidth\n");
	    return -1;
	}
	TBDebugTimeStamp("ReserveSharedBandwidth ended");
    }
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
    return 0;
}

#
# In regression mode we want to save  the physical state and then clear
# the physical resources. 
#
sub FinalizeRegression($)
{
    my ($error) = @_;
    my $cwd;

    chomp($cwd = `/bin/pwd`);
    if (!$error) {
	chat("Saving physical state in regression mode\n");
1012 1013 1014 1015
	if (system("/bin/rm -rf $pid-$eid.pstate")) {
	    tberror("Could not clean physical state directory\n");
	    return -1;
	}
1016 1017
	if ($experiment->BackupPhysicalState("$cwd/$pid-$eid.pstate", 1)
	    != 0) {
1018 1019 1020
	    print STDERR "Could not save physical state!\n";
	    exit(1);
	}
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
    
	# Generate a vtop file with all resources fixed.
	chat("Generating new vtop file with all resources fixed.\n");
	if (! open(VTOPFILE, "> $pid-$eid.fixed")) {
	    tberror("Could not open $pid-$eid.fixed: $!\n");
	    return -1;
	}
	if ($vtop->PrintTop(*VTOPFILE) != 0) {
	    tberror("Could not print fixed vtop file for $experiment\n");
	    return -1;
	}
	close(VTOPFILE);
1033
    }
Leigh B Stoller's avatar
Leigh B Stoller committed
1034 1035 1036
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents();
    
1037 1038 1039
    return 0
	if ($noregfree);
    
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
    chat("Removing physical state in regression mode\n");
    if ($experiment->RemovePhysicalState() != 0) {
	print STDERR "Could not remove physical state!\n";
	exit(1);
    }

    if ($vtop->newreservednodes()) {
	my @newreservednodes = $vtop->newreservednodes();
	system("export NORELOAD=1; $NFREE -x $pid $eid @newreservednodes");
	if ($?) {
	    exit(1);
	}
    }
    return 0;
1054
}
1055

1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
#
# Print a solution in rspec format.
#
sub PrintSolution()
{
    my $output = undef;
    
    if ($outfile ne "-") {
	if (! open(OUTFILE, "> $outfile")) {
	    tberror("Could not open $outfile: $!\n");
	    return -1;
	}
	$output = *OUTFILE;
    }
    if ($vtop->PrintSolution($output) != 0) {
	tberror("Could not print solution for $experiment\n");
	return -1;
    }
    close($output)
	if (defined($output));
    return 0;
}

1079 1080 1081 1082
# We will come through here no matter how we exit.
END {
    # Watch for getting here cause of a die()/exit() statement someplace.
    my $exitcode = $?;
1083

1084 1085 1086 1087 1088 1089 1090 1091
    #
    # Do not want to leave this around, it will lock the pool daemon out.
    #
    if ($gotlock) {
	DBQueryFatal("update emulab_locks set value=value-1 ".
		     "where name='pool_daemon'");
	$gotlock = 0;
    }
1092
    if ($seriallock) {
1093
	libEmulab::EmulabCountUnlock("mapperlock");
1094 1095
	$seriallock = 0;
    }
1096

1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
    if ($exitcode && $exitcode != $WRAPPER_FAILED) {
	$exitcode = $WRAPPER_FAILED|$WRAPPER_FAILED_FATALLY;
    }
    
    if ($warnings > 0) {
	tberror("$warnings warnings.\n");

	$exitcode |= $WRAPPER_FAILED;
    }

    # Set recover bit if we are going to fail.
    $exitcode = $exitcode|$WRAPPER_FAILED_CANRECOVER
	if ($exitcode && !$NoRecover);

    # And change the exitcode to be what we want it to be.
    $? = $exitcode;
1113 1114 1115 1116
}

sub fatal(@)
{
1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
    #
    # Free any newly reserved nodes (in update mode) so that tbswap knows
    # it is safe to recover the experiment. If we bypass this and leave
    # through the END block then NoRecover will still be set and tbswap
    # will know to swap the experiment out. 
    #
    if ($updating) {
	if (defined($vtop)) {
	    my @newreservednodes = $vtop->newreservednodes();
	    my $NoRecover = $vtop->norecover();
	
	    if (@newreservednodes) {
		$NoRecover = 0
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1130
		    if (system("$NFREE -x $pid $eid @newreservednodes") == 0);
1131 1132 1133 1134 1135 1136 1137 1138
	    }
	    else {
		# When not updating this is meaningless to tbswap.
		$NoRecover = 0;
	    }
	}
    }
    
1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
    tberror(@_);

    # We next go to the END block above.
    exit($WRAPPER_FAILED);
}
sub debug($)
{
    if ($verbose) {
	print $_[0];
    }
}
sub chat($)
{
    if (! $quiet) {
	print $_[0];
    }
}