mapper.in 32.3 KB
Newer Older
1
2
#!/usr/bin/perl -w
#
3
# Copyright (c) 2000-2017 University of Utah and the Flux Group.
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23
24
25
26
#
use strict;
use English;
use Getopt::Std;
Leigh B Stoller's avatar
Leigh B Stoller committed
27
use Data::Dumper;
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
use POSIX qw(setsid ceil);
use POSIX ":sys_wait_h";

#
# This function as the main assign loop.  It converts the virtual
# topology into a top input including LAN and delay translation.  It
# then snapshots the current testbed physical state and runs assign,
# looping a couple times if assign fails.  When assign successfully
# completes it will interpret the results.  Attempt to match any
# existing portmap entries and then update the delays and vlans table.
#
# XXX Internally created nodes (jailhost,delay,sim) are not treated
#     consistently. Needs more thought.
#
# Return codes: We catch all errors with the END block below, and if
# anyone calls die() (exit value is 255) we add in the CANRECOVER bit.
# Failures in assign always cause the caller to stop retrying. 
#
# The CANRECOVER bit indicates 'recoverability' (no db or physical
47
# state was modified by the time the error occurred). This is relevant
48
49
50
51
52
53
54
55
56
57
58
# to only modify operations (update).
#
my $WRAPPER_SUCCESS		 = 0x00;
my $WRAPPER_FAILED		 = 0x01;	# Failed (Add other values)
my  $WRAPPER_FAILED_CANRECOVER   = 0x40;        # Can recover from update
my  $WRAPPER_FAILED_FATALLY      = 0x80;	# Do not use this.
# Set this once we modify DB state; forces no recover in fatal().
my $NoRecover = 0;

sub usage ()
{
59
    print STDERR "Usage: $0 [-v] [-u [-f] | -n] [-z] pid eid\n";
60
61
62
63
    print STDERR " -v   - Enables verbose output\n";
    print STDERR " -u   - Enables update mode\n";
    print STDERR " -f   - Fix current resources during update mode\n";
    print STDERR " -n   - Run assign, but do not reserve/modify resources.\n";
64
    print STDERR " -r   - Regression mode.\n";
65
66
    print STDERR " -x   - Turn on the prepass\n";
    print STDERR " -m   - Set the multiplex factor; overrides experiment.\n";
67
68
    print STDERR " -p   - Do a precheck for mapability on an empty testbed - ".
		 "implies -n\n";
69
    print STDERR " -l   - Use rspec v2 instead of the text file format\n";
70
#    print STDERR " -z   - Force new ptopgen\n";
71
    print STDERR " -Z   - Force old ptopgen\n";
72
    print STDERR " -A   - Tell ptopgen all nodes are free; only with -n\n";
73
74
    exit($WRAPPER_FAILED);
}
75
my $optlist    = "dvunfprqczxm:ko:altzZACFNL:S:G";
76
77
78
my $verbose    = 0;
my $debug      = 0;
my $fixmode    = 0;
79
my $fixlannodes= 0;
80
81
82
my $updating   = 0;
my $impotent   = 0;
my $precheck   = 0;
83
my $allnodesfree = 0;
Leigh B Stoller's avatar
Leigh B Stoller committed
84
my $toponly    = 0;
85
my $prepass    = 0;
86
my $alloconly  = 0;
87
my $gblinkcheck= 0;
88
my $outfile;
89
my $mfactor;
90
my $packoption;
91
my $regression = 0;
92
my $noassign   = 0;  # Only with regression mode, use previous solution.
93
94
my $noregfree  = 0;  # Only with regression mode, leave physical state at end.
my $usecurrent = 0;  # Only with regression mode, use current solution.
95
my $assignfile;
96
97
98
my $quiet      = 0;
my $clear      = 0;
my $warnings   = 0;
99
my $maxrun     = 3;  # Maximum number of times we run assign.
100
my $gotlock    = 0;
101
my $seriallock = 0;
102
my $userspec   = 0;
103
my $usecontrol = 0;
104
my $use_old_ptopgen  = 0;
105
my $vtop;
106
my $retval;
107
108
109
110
111

#
# Configure variables
#
my $TB		= "@prefix@";
112
my $MAINSITE    = @TBMAINSITE@;
113
my $DBNAME	= "@TBDBNAME@";
114
115
116
117
my $TBOPS       = "@TBOPSEMAIL@";
my $ASSIGN      = "$TB/libexec/assign";
my $WRAPPER2    = "$TB/libexec/assign_wrapper2";
my $PTOPGEN     = "$TB/libexec/ptopgen";
118
my $PTOPGEN_NEW = "$TB/libexec/ptopgen_new";
119
my $VTOPGEN     = "$TB/bin/vtopgen";
120
my $NFREE       = "$TB/bin/nfree";
Leigh B Stoller's avatar
Leigh B Stoller committed
121
my $PREDICT     = "$TB/sbin/predict";
122
my $XERCES	= "@HAVE_XERCES@";
123
124
125
126
127
128

#
# Load the Testbed support stuff. 
#
use lib "@prefix@/lib";
use libdb;
Leigh B Stoller's avatar
Leigh B Stoller committed
129
use emutil;
130
131
132
133
use libtestbed;
use libtblog;
use libvtop;
use libadminctrl;
134
use libEmulab;
135
use User;
136
use EmulabFeatures;
137
use Reservation;
138
139
140
141
142
143

# Protos
sub fatal(@);
sub debug($);
sub chat($);
sub RunAssign($$);
144
sub FinalizeRegression($);
145
sub AssignLoop();
146
sub MapperWrapper(;$);
147
sub PrintSolution();
Leigh B Stoller's avatar
Leigh B Stoller committed
148
sub DumpReservationInfo($$);
149
150
151
152
153
154

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Turn off line buffering on output
155
156
157
158
159
160
161
162
163
164
$| = 1;

#
# We want warnings to cause assign_wrapper to exit abnormally.
# 
# FIXME: Is this still needed.  "warn" is only used once.  Also this
#  will cause perl internal warnings (such as "Use of uninitialized
#  value ..."  to cause assign_wrapper to fail. -- kevina
#
$SIG{__WARN__} = sub { tbwarn $_[0];$warnings++; };
165
166
167
168
169
170
171
172
173
174
175
176
177

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV < 2) {
    usage();
}
if (defined($options{"v"})) {
178
    TBDebugTimeStampsOn();
179
180
    $verbose++;
}
181
182
183
if (defined($options{"a"})) {
    $alloconly++;
}
184
185
186
if (defined($options{"A"})) {
    $allnodesfree++;
}
187
188
189
if (defined($options{"d"})) {
    $debug++;
}
190
191
192
if (defined($options{"u"})) {
    $updating = 1;
}
Leigh B Stoller's avatar
Leigh B Stoller committed
193
194
195
196
if (defined($options{"t"})) {
    $toponly = 1;
    $quiet   = 1;
}
197
198
199
if (defined($options{"n"})) {
    $impotent = 1;
}
200
201
202
203
204
205
if (defined($options{"N"})) {
    $noassign = 1;
}
if (defined($options{"L"})) {
    $assignfile = $options{"L"};
}
206
207
208
if (defined($options{"f"})) {
    $fixmode = 1;
}
209
210
211
if (defined($options{"F"})) {
    $fixlannodes = 1;
}
212
213
214
if (defined($options{"G"})) {
    $gblinkcheck = 1;
}
215
216
217
218
219
if (defined($options{"S"})) {
    $packoption = $options{"S"};
    fatal("Bad -S option; must be pack or balance")
	if (! ($packoption eq "balance" || $packoption eq "pack"));
}
220
221
222
if (defined($options{"p"})) {
    $precheck = 1;
}
223
224
225
if (defined($options{"x"})) {
    $prepass = 1;
}
226
227
228
if (defined($options{"o"})) {
    $outfile = $options{"o"};
}
229
230
231
if (defined($options{"m"})) {
    $mfactor = $options{"m"};
}
232
233
234
235
236
if (defined($options{"r"})) {
    if ($DBNAME eq "tbdb") {
	fatal("Cannot use regression mode on main DB");
    }
    $regression = 1;
Leigh B Stoller's avatar
Leigh B Stoller committed
237
238
    $clear      = 1
	if (!defined($options{"t"}));
239
    $fixmode    = 1;
240
    TBDebugTimeStampsOn();
241
    $usecurrent = 1
242
	if (defined($options{"z"}));
243
244
    $noregfree = 1
	if (defined($options{"k"}));
245
246
247
248
249
250
251
}
if (defined($options{"q"})) {
    $quiet = 1;
}
if (defined($options{"c"})) {
    $clear = 1;
}
252
253
254
if (defined($options{"C"})) {
    $usecontrol = 1;
}
255
if (defined($options{"l"})) {
256
257
258
259
260
    if ($XERCES) {
	$userspec = 1;
    } else {
	fatal("Rspec v2 support requires that Xerces be installed");
    }
261
}
262
263
264
if (defined($options{"Z"})) {
    $use_old_ptopgen = 1;
}
265
if (defined($options{"z"})) {
266
267
#    $use_old_ptopgen = 0;
#    $PTOPGEN = $PTOPGEN_NEW;
268
}
269
270
271
if ($allnodesfree && !$impotent) {
    fatal("Not allowed to use -A without -n (impotent) option");
}
272
273
274
275
276
277
278

my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $experiment = Experiment->Lookup($pid, $eid);
if (!defined($experiment)) {
    fatal("Could not lookup experiment object $pid,$eid!")
}
279
280
my $project = $experiment->GetProject();

281
282
283
284
285
286
287
288
289
290
291
#
# Verify that this person can muck with the experiment.
#
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
}
if (!TBAdmin() &&
    ! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
    fatal("You do not have permission to map this experiment!");
}
292
my $real_user = User->RealUser();
293

294
295
296
# multiplex_factor default.
$mfactor = $experiment->multiplex_factor()
    if (!defined($mfactor) && defined($experiment->multiplex_factor()));
297
298
$packoption = $experiment->packing_strategy()
    if (!defined($packoption) && defined($experiment->packing_strategy()));
Leigh B Stoller's avatar
Leigh B Stoller committed
299
300
# NS file can say to run the prepass.
my $useprepass = $experiment->useprepass();
301
302
303
304
305

my $newassign =
    EmulabFeatures->FeatureEnabled("NewAssign",
				   $this_user,
				   $experiment->GetGroup(), $experiment);
306
307
308
309
310
311
312
313
314
315
316
317
318
if (!$usecontrol) {
    $usecontrol =
	EmulabFeatures->FeatureEnabled("ControlNetVlans",
				       $this_user,
				       $experiment->GetGroup(), $experiment);
    if ($usecontrol) {
	chat("Telling ptopgen to use control network vlans\n");
    }
}
if ($usecontrol && $MAINSITE) {
    $debug   = 1;
    $verbose = 1;
}
319

320
321
322
323
324
325
326
# XXX Hacky!
if ($MAINSITE && $TB ne "/usr/testbed") {
    $debug   = 1;
    $verbose = 1;
    $fixlannodes = $fixmode;
}

327
libvtop::Init($this_user, $experiment->GetGroup(), $experiment);
328
    
329
330
331
332
333
334
#
# These are the flags to the vtop creation code. 
#
my $vtopflags = 0;
$vtopflags |= $libvtop::VTOP_FLAGS_VERBOSE
    if ($verbose);
335
336
$vtopflags |= $libvtop::VTOP_FLAGS_QUIET
    if ($quiet);
337
338
339
$vtopflags |= $libvtop::VTOP_FLAGS_UPDATE
    if ($updating);
$vtopflags |= $libvtop::VTOP_FLAGS_FIXNODES
340
    if ($fixmode || $usecurrent);
341
342
$vtopflags |= $libvtop::VTOP_FLAGS_FIXLANNODES
    if ($fixlannodes);
343
344
$vtopflags |= $libvtop::VTOP_FLAGS_IMPOTENT
    if ($impotent);
345
346
$vtopflags |= $libvtop::VTOP_FLAGS_ALLOCONLY
    if ($alloconly);
347
348
$vtopflags |= $libvtop::VTOP_FLAGS_REGRESSION
    if ($regression);
349

Leigh B Stoller's avatar
Leigh B Stoller committed
350
if (IsMultiPCArchitecture()) {
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
    my @architectures = PCArchitectures();

    $vtop = libvtop->Create($experiment, $this_user, $vtopflags, $real_user);
    if (!defined($vtop)) {
	fatal("Could not create vtop structure for $experiment");
    }
    $vtop->CreateVtop() == 0
	or fatal("Could not create vtop for $experiment");

    #
    # If all nodes are explicitly typed, we run the mapper normally.
    #
    if ($vtop->AllNodesTyped()) {
	$retval = MapperWrapper();
    }
    else {
367
368
	my $ordered = 0;
	
369
370
371
372
373
374
375
376
377
	#
	# If all of the typed nodes are of one type, then try assign with
	# that type first. In other words, set the order in which to try the
	# different types, according to how many nodes of that architecture.
	#
	my %architectures = map { $_ => 0 } @architectures;

	foreach my $type (keys(%{ $vtop->types() })) {
	    $type = NodeType->Lookup($type);
378

379
380
381
382
383
	    # If no architecture defined for this type, then we do not count.
	    if (defined($type->architecture())) {
		$architectures{$type->architecture()} +=
		    $vtop->types()->{$type->type()};
	    }
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
	    $ordered += $vtop->types()->{$type->type()};
	}
	#
	# If no types specified, and thus no implied ordering, then we
	# fall back to the ordering specified in the sitevar.
	#
	if (!$ordered) {
	    my $string   = GetSiteVar("general/architecture_priority");
	    my @archs    = split(",", $string);
	    my $count    = scalar(@archs);

	    print "Forcing architecture ordering: @archs\n";
	    
	    foreach my $arch (@archs) {
		# Set decreasing count for sort below.
		$architectures{$arch} = $count--;
	    }
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
	}
	# sort the array.
	@architectures = sort { $architectures{$b} <=>
				    $architectures{$a} } keys(%architectures);

	#
	# Run through each architecture.
	#
	foreach my $architecture (@architectures) {
	    $retval = MapperWrapper($architecture);
	    last
		if (!$retval);
	    #
	    # If we managed to reserve any nodes, we have to free them
	    # before moving onto the next architecture. 
	    #
	    if ($vtop->newreservednodes()) {
		my @newreservednodes = $vtop->newreservednodes();
		if (system("$NFREE -x $pid $eid @newreservednodes")) {
420
421
		    # Clear this so that we do not try again in fatal();
		    $vtop->clearnewreserved();
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
		    fatal("Could not release new nodes after $architecture");
		}
		# Kill this, we are done with it now that nodes are released
		# (do not want to release them again). 
		$vtop = undef;
	    }
	}
    }
    if (ref($retval)) {
	fatal(@{ $retval });
    }
}
else {
    my $retval = MapperWrapper();
    if (ref($retval)) {
	fatal(@{ $retval });
    }
}
440
441
442
443
444
445
if ($regression) {
    if (0) {
	$updating   = 1;
	$fixmode    = 1;
	$clear      = 0;
	$vtopflags |=
446
447
	    ($libvtop::VTOP_FLAGS_UPDATE|$libvtop::VTOP_FLAGS_FIXLANNODES|
	     $libvtop::VTOP_FLAGS_FIXNODES);
448

449
450
451
	MapperWrapper();
    }
    FinalizeRegression(0);
452
}
453
454
PrintSolution()
    if ($outfile);
455
exit(0);
456

457
sub MapperWrapper(;$)
458
{
459
460
    my ($architecture) = @_;
    
Leigh B Stoller's avatar
Leigh B Stoller committed
461
462
    chat("Starting the new and improved mapper wrapper.\n")
	if (!$toponly);
463
464
    chat("Forcing untyped nodes to $architecture\n")
	if (defined($architecture));
465

Leigh B Stoller's avatar
Leigh B Stoller committed
466
467
468
469
470
    # Need to do this cause libvtop will add them again.
    # Must be done before nodes are released.
    $experiment->DeleteInternalProgramAgents()
	if ($regression);

471
    TBDebugTimeStamp("Create libvtop started");
472
    $vtop = libvtop->Create($experiment, $this_user, $vtopflags, $real_user);
473
474
475
476
    if (!defined($vtop)) {
	fatal("Could not create vtop structure for $experiment");
    }
    TBDebugTimeStamp("Create libvtop ended");
Leigh B Stoller's avatar
Leigh B Stoller committed
477

Leigh B Stoller's avatar
Leigh B Stoller committed
478
    TBDebugTimeStamp("vtopgen started");
479
    $vtop->CreateVtop($architecture) == 0
Leigh B Stoller's avatar
Leigh B Stoller committed
480
481
482
	or fatal("Could not create vtop for $experiment");
    TBDebugTimeStamp("vtopgen finished");
    
483
    if (!$impotent && !$alloconly && !$toponly && ($updating || $clear)) {
484
485
486
487
488
489
490
491
492
493
	if ($clear) {
	    chat("Freeing reserved nodes ...\n");
	    system("export NORELOAD=1; $NFREE -x -a $pid $eid") == 0
		or fatal("Could not release nodes.");
	}
	chat("Clearing physical state before updating.\n");
	$experiment->RemovePhysicalState();
	exit(0)
	    if ($clear && !$regression);
    }
494
495
496
497
498
    if (!$toponly && $gblinkcheck) {
	if ($vtop->gblinks() && $vtop->mgblinks()) {
	    fatal("Not allowed to mix <=1Gb and >1Gb links");
	}
    }
499
    return AssignLoop();
500
}
501
502
503
504

#
# The assign loop. 
#
505
506
507
508
sub AssignLoop()
{
    my $currentrun     = 1;
    my $canceled       = 0;
509
    my $progress       = 0;
510
511
512
    my $tried_precheck = 0;
    # Admission control counts
    my %admission_control = ();
513

514
515
516
517
518
    # XXX plab hack - only run assign once on plab topologies, since
    # they're easy to map and the physical topology does not change
    # frequently.
    if ($vtop->plabcount() && $vtop->plabcount == $vtop->virtnodecount()) {
	$maxrun = 2;
519
520
    }

521
522
    TBDebugTimeStamp("mapper loop started");
    while (1) {
523
	chat("Mapper loop $currentrun\n");
524

525
	my $prefix = "$pid-$eid-$$";
526
527

	#
528
	# When precheck is on, we only do one run in impotent mode and exit.
529
	#
530
531
532
533
534
	if ($precheck) {
	    $prefix  .= ".empty";
	    $impotent = 1;
	    chat("Trying assign on an empty testbed.\n");
	}
535

536
537
538
	#
	# Serialize with the pool daemon if using shared nodes.
	#
539
	if ((!($impotent || $regression)) && $vtop->sharednodecount()) {
540
	    while (1) {
541
542
543
544
		#
		# Use a countup/countdown counter, so that multiple mappers
		# can run, but not while the pool_daemon is running.
		#
545
		my $lock_result =
546
547
548
549
		    DBQueryFatal("update emulab_locks set value=value+1 ".
				 "where name='pool_daemon' and value>=0");

		$gotlock = $lock_result->affectedrows;
550
551
552
553
554

		last
		    if ($gotlock);
		
		chat("Waiting for pool daemon lock ...\n");
555
		sleep(10);
556
557
	    }
	}
558
	# Hack for Kirk.
559
	if (!($impotent || $regression) &&
560
	    ($pid eq "CCNC2017Tutorial" || $project->IsNonLocal())) {
561
	    while (1) {
562
		if (libEmulab::EmulabCountLock("mapperlock", 3) == 0) {
563
564
565
566
567
568
569
570
		    $seriallock = 1;
		    last;
		}
		chat("Waiting for mapper lock ...\n");
		sleep(5);
	    }
	}
	
571
572
573
574
	#
	# RunAssign returns  0 if successful.
	#           returns -1 if failure, but assign says to stop trying.
	#           returns  1 if failure, but assign says to try again.
575
576
	#           returns  2 if assign succeeds, but no nodes allocated.
	#           returns  3 if assign succeeds, but some nodes allocated.
577
578
579
	#
	my $retval = RunAssign($precheck, $prefix);

580
	if ($gotlock) {
581
582
583
	    DBQueryFatal("update emulab_locks set value=value-1 ".
			 "where name='pool_daemon'");
	    $gotlock = 0;
584
	}
585
	if ($seriallock) {
586
	    libEmulab::EmulabCountUnlock("mapperlock");
587
588
	    $seriallock = 0;
	}
589

590
591
592
593
594
595
596
597
598
599
600
	# Success!
	last
	    if ($retval == 0);

	if ($retval < 0 || $regression) {
	    #
	    # Failure in assign.
	    #
	    FinalizeRegression(1)
		if ($regression);

601
602
603
	    return [{type  => 'primary', severity => SEV_ERROR,
		     error => ['unretriable_assign_error']},
		    "Unretriable error. Giving up."];
604
	}
605
    
606
607
608
	#
	# When precheck is off, we do a precheck run if the first try fails
	# to find a solution. This avoids looping on an unsolvable topology.
609
610
611
	# But, if the reason we are here is cause we could not allocate nodes,
	# then we found a solution, and so trying on an empty testbed is
	# pointless; it will obviously find a solution again.
612
	#
613
614
615
	if (!$precheck && !$tried_precheck && ($retval == 2 || $retval == 3)) {
	    $tried_precheck = 1;
	}
616
617
618
619
620
621
	if (!$precheck && !$tried_precheck) {
	    chat("Trying assign on an empty testbed to verify mapability.\n");
	    my $save_impotent = $impotent;
	    $impotent  = 1;
	    my $retval = RunAssign(1, $prefix . ".empty");
	    if ($retval != 0) {
622
623
624
625
626
627
628
629
630
		return [{type=>'extra', cause=>'user', severity=>SEV_ERROR,
			 error=>['infeasible_resource_assignment']}, 
			"This experiment cannot be instantiated on this ".
			"testbed. You have most likely asked for hardware ".
			"this testbed does not have, such as nodes of a type ".
			"it does not contain, or nodes with too many network ".
			"interfaces.  You will need to modify this experiment ".
			"before it can be swapped in - re-submitting the ".
			"experiment as-is will always result in failure."];
631
632
633
634
	    }
	    chat("Assign succeeded on an empty testbed.\n");
	    $impotent = $save_impotent;
	    $tried_precheck = 1;
635
	}
636
637
638
639
	# We try a minimum number of times, cause the node pool is
	# always changing. But once we hit the maxrun, we continue
	# only if progress on the last loop.
	if ($currentrun >= $maxrun && !$progress) {
640
641
642
	    return [{type => 'primary', severity => SEV_ERROR,
		     error => ['reached_assign_run_limit']},
		    "Reached run limit. Giving up."];
643
	}
644
645
646
	# See if we made progress or not.
	# Keep going if we allocated some nodes.
	$progress = ($retval == 3);
647

648
649
650
651
	# A little bit of backoff after failure. 
	my $sval = int(rand($currentrun * 3)) + 3;
	chat("Waiting $sval seconds and trying again...\n");
	sleep($sval);
652
653
	$currentrun++;
    }
654
    GatherAssignStats($pid, $eid, %{ $vtop->exptstats() })
Leigh B Stoller's avatar
Leigh B Stoller committed
655
	if (! ($impotent || $alloconly || $regression || $toponly));
656
657
    TBDebugTimeStamp("mapper loop finished");
    return 0;
658
659
660
661
662
663
664
665
666
}

#
# The guts of an assign run.
#
sub RunAssign($$)
{
    my ($precheck, $prefix) = @_;

667
    my $typelimitfile = $prefix .".limits";
668
669
    my $ptopfile = $prefix . ".ptop";
    my $vtopfile = $prefix . ".vtop";
670
671
672
673
    if ($userspec) {
	$ptopfile .= ".xml";
	$vtopfile .= ".xml";
    }
674
    my $assignexitcode = 0;
Leigh B Stoller's avatar
Leigh B Stoller committed
675
676
    my $assignlog  = "assign.log";
    my $resinfolog = "resinfo.log";
677

Leigh B Stoller's avatar
Leigh B Stoller committed
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
    #
    # Now generate a vtop file and dump it to a file.
    #
    if (! open(VTOPFILE, "> $vtopfile")) {
	tberror("Could not open $vtopfile: $!\n");
	return -1;
    }
    my $reslibvtop;
    if ($userspec == 1) {
	$reslibvtop = $vtop->PrintRspec(*VTOPFILE);
    }
    else {
	$reslibvtop = $vtop->PrintTop(*VTOPFILE);
    }
    if ($reslibvtop != 0) {
	tberror("Could not print vtop file for $experiment\n");
	return -1;
    }
    close(VTOPFILE);
697
    system("/bin/cp -fp $vtopfile ${pid}-${eid}.vtop") if ($debug);
Leigh B Stoller's avatar
Leigh B Stoller committed
698
699
700
    return 0
	if ($toponly);

701
702
703
704
705
706
707
708
709
710
711
712
713
714
    if (! ($impotent || $regression || $alloconly)) {
	if ($experiment->Update({"maximum_nodes" => $vtop->maximum_nodes(),
				 "minimum_nodes" => $vtop->minimum_nodes(),
				 "virtnode_count"=> $vtop->virtnodecount() })){
	    tberror("Could not update min/max/virt nodes for $experiment\n");
	    return -1;
	}
    }
    # New solution each time.
    $vtop->ClearSolution();

    goto skipassign
	if ($vtop->nodecount() == $vtop->genicount());

715
716
717
718
719
720
    # Debugging hack.
    if ($regression || $noassign || $assignfile) {
	$assignlog = $assignfile
	    if ($assignfile);
	
	if (! -e $assignlog) {
721
722
723
724
725
726
727
	    chat("No existing assign results file!\n");
	    return -1;
	}
	chat("Using existing assign results file\n");
	goto skiprun;
    }
    
728
729
730
731
    #
    # Do admission control test, and gather the info.
    #
    my %admission_control;
732
733
734
735
736
737
    if (!$regression) {
	if (!TBAdmissionControlCheck(undef, $experiment, \%admission_control)){
	    tberror("Failed admission control checks!\n");
	    return -1;
	}
    }
738

739
    # Bound the results to avoid future reservation conflicts.
740
    Reservation->FlushAll();
Leigh B Stoller's avatar
Leigh B Stoller committed
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
    my $reservation_bounds = Reservation->MaxSwapInMap($experiment);
    # Dump to file.
    DumpReservationInfo($resinfolog, $reservation_bounds);
    foreach my $type (keys(%$reservation_bounds)) {
	if (exists($admission_control{$type})) {
	    if ($reservation_bounds->{$type} < $admission_control{$type}) {
		$admission_control{$type} = $reservation_bounds->{$type};
	    }
	}
	else {
	    $admission_control{$type} = $reservation_bounds->{$type};
	}
	#
	# Look to see if topology specifies nodes with this specific
	# type. If so, we can tell right away that the user is asking for
	# more then is available.
	#
	# This only works for nodes with hard types. Auxtypes and vtypes
	# will still need to go through assign. Might be able to handle
	# auxtypes though.
	#
	if (exists($vtop->types()->{$type})) {
	    my $bound  = $reservation_bounds->{$type};
	    my $wanted = $vtop->types()->{$type};

	    if ($wanted > $bound) {
		print "*** Resource reservation violation: ".
768
		    "$wanted nodes of type $type requested, but only $bound ".
769
		    "available because of existing resource reservations ".
Leigh B Stoller's avatar
Leigh B Stoller committed
770
771
772
		    "to other projects.\n";
		# Unretriable error
		return -1;
773
774
775
776
	    }
	}
    }
    
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
    #
    # Append this admission control results to ptopgen.
    #
    if (scalar(keys(%admission_control))) {
	open(TYPELIMIT, ">$typelimitfile") or
	    return -1;

	foreach my $typeclass (keys(%admission_control)) {
	    my $count = $admission_control{$typeclass};

	    print TYPELIMIT "$typeclass $count\n";
	}
	close(TYPELIMIT);
    }


793
794
795
796
797
798
799
800
801
802
803
    #
    # Snapshot physical resources.
    #
    # if updating (-u), include any resources that may already be
    # allocated to experiment in the PTOP results.
    #
    my $ptopargs = "-p $pid ";
    $ptopargs   .= "-e $eid "
	if ($updating);
    $ptopargs   .= "-u "
	if ($updating && $experiment->elabinelab());
804
805
    $ptopargs   .= "-m $mfactor "
	if (defined($mfactor));
806
807
    $ptopargs   .= "-C "
	if ($usecontrol);
808
809
810
811
812
813
    $ptopargs   .= "-v "
	if ($vtop->virtnodecount());
    $ptopargs   .= "-r "
	if ($vtop->remotenodecount());
    $ptopargs   .= "-S "
	if ($vtop->simnodecount());
814
815
    $ptopargs   .= "-h "
	if ($vtop->sharednodecount());
816
817
    $ptopargs   .= "-b "
	if ($vtop->bstorecount());
818
    $ptopargs	.= "-a "
819
    	if ($precheck || $allnodesfree);
820
821
    $ptopargs	.= "-c " . $experiment->delay_capacity() . " "
    	if (defined($experiment->delay_capacity()));
822
823
824
    if ($userspec == 1) {
	$ptopargs .= "-x -g 2 ";
    }
825
826
    if ($use_old_ptopgen == 1) {
	$ptopargs .= "-Z ";
827
    }
828
829
    $ptopargs .= "-z "
	if ($project->IsNonLocal() || $vtop->sharednodecount());
830
831
832
833
834
835
    if ($gblinkcheck) {
	if ($vtop->mgblinks() == 0) {
	    $ptopargs .= "-G ";
	}
    }
    $ptopargs .= "-l $typelimitfile "
836
	if (scalar(keys(%admission_control)));
837
838
839
840
841

    chat("ptopargs: '$ptopargs'\n");
    TBDebugTimeStamp("ptopgen started");
    system("$PTOPGEN $ptopargs > $ptopfile");
    if ($?) {
842
843
	tberror("Failure in ptopgen\n");
	return -1;
844
    }
845
    system("/bin/cp -fp $ptopfile ${pid}-${eid}.ptop") if ($debug);
846
847
848
    TBDebugTimeStamp("ptopgen finished");

    # Run assign
849
    my $cmd  = "assign";
850
851
    $cmd .= "-new"
	if ($newassign);
852
    my $args = "";
853
854
855
856
857
858
    if ($XERCES) {
	$args .= "-f rspec " 
	    if ($userspec == 1);
	$args .= "-f text "
	    if ($userspec == 0);
    }
859
    $args .= "$ptopfile $vtopfile";
860
    $args = "-P $args"
861
	if (!$vtop->sharednodecount());
862
863
    $args = "-F $args"
	if (!$updating);
864
    $args = "-uod -c .75 -H 3 $args"
865
866
867
	if ($vtop->virtnodecount() || $vtop->simnodecount());
    $args = "-n $args"
    	if ($precheck);
868
869
    $args = "-S $packoption $args"
    	if (defined($packoption));
870
871
    $args = "-s 123456 $args"
	if ($regression);
872
    $args = "-R $args PN=1.0"
Leigh B Stoller's avatar
Leigh B Stoller committed
873
	if (0 && $vtop->sharednodecount());
874
    
875
    # The prepass speeds up assign on big topos with virtual nodes.
Leigh B Stoller's avatar
Leigh B Stoller committed
876
    if ($prepass || $useprepass) {
877
878
879
880
    	$cmd = "assign_prepass";
    	$args = "-m $mfactor $args"
    	    if (defined($mfactor));
    }
881
882
883
884
885
886
    chat("assign command: '$cmd $args'\n");

    #
    # Fork a child to run assign. Parent spins watching the cancel flag
    # and waiting for assign to finish.
    #
887
    TBDebugTimeStamp("assign started");
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
    if (my $childpid = fork()) {
	while (1) {
	    sleep(2);

	    if (waitpid($childpid, &WNOHANG) == $childpid) {
		$assignexitcode = $? >> 8;
		last;
	    }

	    # Check cancel flag.
	    if ($experiment->canceled()) {
		if ((my $pgrp = getpgrp($childpid)) > 0) {
		    kill('TERM', -$pgrp);
		    waitpid($childpid, 0);

903
904
905
		    tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
			     error => ['cancel_flag']},
			    "Cancel flag set; aborting assign run!\n");
906
907
908
909
910
911
912
913
914
915
916
917
		    return -1;
		}
		# Loop again to reap child above before exit.
	    }
	}
    }
    else {
	#
	# Change our session so the child can get a killpg without killing
	# the parent. 
	#
        POSIX::setsid();
918
	exec("nice $WRAPPER2 $cmd $args > $assignlog 2>&1");
919
920
	die("Could not start assign!\n");
    }
921
    TBDebugTimeStamp("assign finished");
922
923
924

    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {
925
926
927
	tberror({cause => 'canceled', severity => SEV_IMMEDIATE,
		 error => ['cancel_flag']},
		"Cancel flag set; aborting assign run!\n");
928
929
930
931
	return -1;
    }

    # Check for possible full filesystem ...
932
933
    if (-z $assignlog) {
	tbnotice("$assignlog is zero length! Stopping ...\n");
934
935
936
937
938
939
940
941
942
	return -1;
    }

    #
    # Saving up assign.log coz each swapin/modify is different and it
    # is nice to have every mapping for debugging and archiving
    # purposes We do not call it .log though, since we do not want it
    # copied out to the user directory every swapin. See Experiment.pm
    #
943
944
    system("/bin/cp -fp $assignlog ${prefix}.assign");
    system("/bin/cp -fp $assignlog ${pid}-${eid}.assign") if ($debug);
Leigh B Stoller's avatar
Leigh B Stoller committed
945
    system("/bin/cp -fp $resinfolog ${prefix}-${resinfolog}");
946
947

    if ($assignexitcode) {
948
949
	print "Assign exited with $assignexitcode\n" if ($debug);
	
950
	system("/bin/cat $assignlog");
Leigh B Stoller's avatar
Leigh B Stoller committed
951
	
952
953
954
955
956
957
958
959
	#
	# assign returns two positive error codes (that we care about).
	# The distinction between them is somewhat murky. An exitval of
	# 1 means "retryable" while 2 means "unretryable". The former
	# means we can try again, while the later says there is no possible
	# way to map it. We pass this back to the caller so that we know
	# to exit the loop or try again.
	#
960
961
962
963
964
965
966
	return (($assignexitcode == 1) ? 1 : -1);
    }
    #
    # If we were doing the precheck, go ahead and exit now - there is no
    # useful information to parse out
    #
    if ($precheck) {
967
	chat("Precheck succeeded.\n");
968
969
	return 0;
    }
970
  skiprun:
971
    chat("Reading assign results.\n");
972
    if (!open(ASSIGNFP, $assignlog)) {
973
974
975
	print("Could not open assign logfile! $!\n");
	return -1;
    }
976
    TBDebugTimeStamp("ReadSolution started");
977
    if ($vtop->ReadTextSolution(*ASSIGNFP) != 0) {
978
979
980
981
	print("Could not parse assign logfile! $!\n");
	return -1;
    }
    close(ASSIGNFP);
982
    TBDebugTimeStamp("ReadSolution ended");
983
  skipassign:
984
    
985
    if (defined($vtop->genicount())) {
986
987
988
989
990
991
992
993
	TBDebugTimeStamp("Map Geni Resources Started");
	if ($vtop->MapResources() != 0) {
	    print("Could not map external resources! $!\n");
	    return -1;
	}
	TBDebugTimeStamp("Map Geni Resources ended");
    }
    TBDebugTimeStamp("InterpNodes Started");
994
995
996
997
    if ($vtop->InterpNodes() != 0) {
	print("Could not interpret nodes.\n");
	return -1;
    }
998
    TBDebugTimeStamp("InterpNodes ended, AllocNodes Started");
999
1000
    # Check cancel flag before continuing. 
    if ($experiment->canceled()) {