assign_wrapper.in 112 KB
Newer Older
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1
#!/usr/bin/perl -w
Leigh B. Stoller's avatar
Leigh B. Stoller committed
2
3
#
# EMULAB-COPYRIGHT
Leigh B. Stoller's avatar
Leigh B. Stoller committed
4
# Copyright (c) 2000-2003 University of Utah and the Flux Group.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
5
6
# All rights reserved.
#
7
8
use English;
use Getopt::Std;
9
10
use POSIX qw(setsid);
use POSIX ":sys_wait_h";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
11

12
#
Leigh B. Stoller's avatar
Leigh B. Stoller committed
13
14
15
16
17
18
# This function as the main assign loop.  It converts the virtual
# topology into a top input including LAN and delay translation.  It
# then snapshots the current testbed physical state and runs assign,
# looping a couple times if assign fails.  When assign successfully
# completes it will interpret the results.  Attempt to match any
# existing portmap entries and then update the delays and vlans table.
Chad Barb's avatar
Chad Barb committed
19
#
20
21
22
23
# XXX: Update does not work with widearea nodes.
#      Internally created nodes (jailhost,delay,sim) are not treated
#        consistently. Needs more thought.
#
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Return codes: -1 is an uncontrolled error (someone called die()). No
# recovery is possible so the caller has to check for this explicitly.
# The CANRECOVER bit indicates 'recoverability' (no db or physical
# state was modified by the time the error occurred). It also is used
# to signal that a retry is possible. 
#
my $WRAPPER_SUCCESS		 = 0x00;
my $WRAPPER_FAILED		 = 0x01;	# Failed (Add other values)
my  $WRAPPER_FAILED_MAXCONCURENT = 0x02;	# max_concurrent violation
my  $WRAPPER_FAILED_BANDWIDTH    = 0x04;	# Bandwidth violation
my  $WRAPPER_FAILED_LINKUSERS    = 0x08;	# Linkusers violation
my  $WRAPPER_FAILED_DESIRES      = 0x10;	# Desires violation
my  $WRAPPER_FAILED_UNASSIGNED   = 0x20;	# Unassigned violation
my  $WRAPPER_FAILED_CANRECOVER   = 0x80;        # Can recover, retry.
# Set this once we modify DB state; forces no recover in fatal().
my $NoRecover = 0;
    
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
sub usage ()
{
    print STDERR "Usage: $0 [-v] [-u | -n] pid eid\n";
    print STDERR " -v   - enables verbose output\n";
    print STDERR " -u   - enables update functionality\n";
    print STDERR " -t   - Create the TOP file and then exit\n";
    print STDERR " -n   - Run assign, but do not reserve/modify resources.\n";
    exit(-1);
}
my $optlist  = "vutn";
my $verbose  = 0;
my $updating = 0;
my $toponly  = 0;
my $impotent = 0;

#
# Configure variables
#
my $TBROOT	  = "@prefix@";
my $DELAYCAPACITY = @DELAYCAPACITY@;
$ENV{'PATH'} = "/usr/bin:$TBROOT/libexec:$TBROOT/sbin:$TBROOT/bin";

#
# Turn off line buffering on output
#
$| = 1;

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
require exitonwarn;

Chad Barb's avatar
Chad Barb committed
76
#
77
78
79
80
81
# assign_wrapper Settings
#
# Maximum delay in ms above which a delay node is needed.
# (Note that the DB represents delays as floating point numbers)
my $delaythresh = 2;
Chad Barb's avatar
Chad Barb committed
82

83
84
# Maximum number of times we run assign.
my $maxrun = 20;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
85

86
87
88
# Use the switch to delay when possible. Currentlythis only works for 10mbit
# links (actually, its turned off cause it does not work; auto handshake).
my $delaywithswitch = 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
89

90
91
92
93
94
95
96
97
98
99
100
#
# Some handy constants. Speed in Mbits/sec and Kbits/sec units.
#
# Its probably a good idea to leave portbw (current_speed) in Mbs, since
# those numbers are used at the switch and the interfaces, which really
# only think in Mbps.
#
my $S10Mbs  = 10;
my $S100Mbs = 100;
my $S10Kbs  = 10000;
my $S100Kbs = 100000;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
101

102
#
103
104
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
105
#
106
107
108
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
109
}
110
111
if (@ARGV != 2) {
    usage();
Chad Barb's avatar
Chad Barb committed
112
}
113
if (defined($options{"v"})) {
Chad Barb's avatar
   
Chad Barb committed
114
115
    $verbose = 1;
}
116
if (defined($options{"u"})) {
Chad Barb's avatar
   
Chad Barb committed
117
    $updating = 1;
118
}
119
120
121
122
123
if (defined($options{"t"})) {
    $toponly = 1;
}
if (defined($options{"n"})) {
    $impotent = 1;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
124
}
125
126
127
128
my $pid = $ARGV[0];
my $eid = $ARGV[1];
my $ptopfile = "$pid-$eid-$$.ptop";
my $topfile  = "$eid.top";
129

130
131
132
sub fatal ($$)
{
    my($exitcode, $message) = @_;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
133

134
135
136
137
138
139
140
    print STDERR "*** $0:\n".
	         "    $message\n";

    # Remove recover bit.
    $exitcode = $exitcode & ~$WRAPPER_FAILED_CANRECOVER
	if ($NoRecover);
    
141
142
    exit($exitcode);
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
143

144
145
sub printdb ($)
{
Leigh B. Stoller's avatar
Leigh B. Stoller committed
146
147
148
    if ($verbose) {
	print $_[0];
    }
149
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
150

151
print "assign_wrapper improved started\n";
152
153
TBDebugTimeStamp("assign_wrapper started");

Leigh B. Stoller's avatar
Leigh B. Stoller committed
154
#
155
# The main data structures:
Leigh B. Stoller's avatar
Leigh B. Stoller committed
156
#
157
158
159
160
161
# virt_nodes: The virtual nodes, indexed by vname. Each entry is a
# hash reference, initially of just the DB info, but possibly
# augmented as we proceed through assign.  Do not confuse these
# virtual nodes with the other virtual nodes! These are the ones from
# the actual topology, the virt_nodes table in the DB.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
162
#
163
164
my %virt_nodes = ();

Leigh B. Stoller's avatar
Leigh B. Stoller committed
165
#
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# virt_lans: The equivalent of virt_nodes; the virt_lans table in the DB.
# Since there are multiple rows per lan (one for each node), this is a
# multilevel structure. The first slot is another hash, one for each node.
# The rest of the slots store other random things associated with the lan.
# So, looks something like:
#
#   %virt_lans = (link0 => {members    => member0 => { db row ref },
#                                         member1 => { db row ref }}
#                           mustdelay    => 0,
#                           emulated     => 0,
#                           uselinkdelay => 0,
#                           nobwshaping  => 0,
#                           useveth      => 0,
#                           trivok       => 0
#                          }
#                 link1 => ...
#                )
#
my %virt_lans = ();

#
# virt_vtypes: The virt_vtypes table in the DB, indexed by the vtype
# name (user chosen name).
#
my %virt_vtypes = ();

#
# node_types: The node_types table from the DB, indexed by the type name.
#
my %node_types = ();

Leigh B. Stoller's avatar
Leigh B. Stoller committed
197
# 
198
199
200
201
# interface_types: We need this to find out the bandwidths of the devices
# we actually have on the testbed. Index by interface type name.
#
my %interface_types = ();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
202

203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#
# phys_nodes: The equiv of virt_nodes above, except that these are pulled
# from the DB once the physical resources have been allocated. Indexed
# by physname, but there is a pointer from the virt_nodes table entry
# to the the corresponding physnode entry. 
# 
my %phys_nodes = ();

#
# More physical side data structures.
# v2pmap is indexed by virtual and contains the physical node.
my %v2pmap = ();
# p2vmap is indexed by physical and contains one or more virtual nodes.
my %p2vmap = ();
# plinks is indexed by virtual name and contains
#  (pnodeportA,pnodeportB) .  If one is a delay node it is always
#  the second.
my %plinks = ();
# virtnodes is the list of subnodes on physnodes.
my %virtnodes = ();
my %v2vmap = ();

#
# Support for experiment modify. We create v2p and v2v mappings of the
# current topology so we can figure out how its changed after assign
# runs. These correspond to v2pmap and v2vmap mentioned above.
# 
my %reserved_v2pmap  = ();
my %reserved_v2vmap  = ();
232

Leigh B. Stoller's avatar
Leigh B. Stoller committed
233
#
234
235
236
237
238
239
240
241
242
243
244
245
246
# Experiment wide options. See below. They come from the experiments table.
# Defining these will override experiment table setting. 
#
# Set this when forcing linkdelays instead of delay nodes. Set in the NS
# file with a tb-compat directive. The force directive says to set up a
# link delay, even when no delay would otherwise be inserted.
# usewatunnels is also set in the NS, and can be used to turn them off. 
# The multiplex_factor is to override node_types table for virtnode.
my $uselinkdelays;
my $forcelinkdelays;
my $usewatunnels;
my $multiplex_factor;

247
248
249
250
# For admission control. Not well defined yet.
my $cpu_usage;
my $mem_usage;

251
######################################################################
Leigh B. Stoller's avatar
Leigh B. Stoller committed
252

253
254
255
256
257
# ips is indexed by node:port and contains the IP address for the port.
my %ips	      = ();

# memberof is indexed by node:port and holds the lan it is a member of.
my %memberof = ();
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273

# delaylinks stores the actual link delay info, converted from the
# virt delay params above. It is indexed by link name and contains
# [delay,bw,loss] for each direction. 
my %delaylinks = ();

# delaynodes stores the names of delaynodes that we create on the
# fly using delayid. This is useful for doing isdelay? tests.
my %delaynodes = ();
my $delayid    = 0;

# nodedelays and linkdelays are the final (!) representation. Indexed by
# integer id, they store the physical node info and the delay info. 
my %nodedelays = ();
my %linkdelays = ();

274
275
276
277
# Virtual nodes that the user has requested be "fixed" to a specific
# physical node.
my %fixed_nodes     = ();

278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# portbw is indexed by virtual nodeport and contains the bandwidth of
# that port. Note that port bandwidth in the interfaces table is left
# in Mbps units for now. Thats inconsistent, I know. For LANs with
# other bandwidths the port speed will be 100 and a delay node will be
# inserted.
my %portbw = ();

# lannodes is indexed by physical name and is the set of fake lan nodes.
# lan nodes are named lan/<virtual lan>. We need to know these so that
# when they come back from assign, we can ignore them.
my %lannodes = ();

# Node estimates and counts. Avoid running assign if there is no way to
# satisfy the estimates for physical nodes.
my $minimum_nodes;
my $maximum_nodes;
my $remotecount  = 0;
my $virtcount    = 0;
296
my $plabcount    = 0;
297
my $needwanassign= 0;
298

299
300
301
302
303
304
305
306
307
308
309
310
#
# This is for stats gathering. It might duplicate other stuff, but
# thats okay.
#
my %expt_stats = (# pnodes include jailnodes and delaynodes.
		  # We let the wrapper determine pnodes once the
		  # experiment is fully swapped in so that the record
		  # is not "committed" until successful swapin.
		  jailnodes   => 0,
		  vnodes      => 0,
                  # vnodes include wanodes.
		  wanodes     => 0,
311
312
		  # wanodes includes plabnodes.
		  plabnodes   => 0,
313
314
315
316
317
318
319
320
321
322
323
324
325
		  simnodes    => 0,
		  delaynodes  => 0,
		  linkdelays  => 0,
		  links       => 0,
		  walinks     => 0,
		  lans        => 0,
		  shapedlinks => 0,
		  shapedlans  => 0,
		  minlinks    => 100000,
		  # includes emulated links. Maybe thats wrong.
		  maxlinks    => 0,
);

326
327
# XXX NSE hack: List of simulated nodes. All these will go
# into one pc850. Needs to change in distributed nse.
Shashi Guruprasad's avatar
Shashi Guruprasad committed
328
329
my @simnodelist;
my %simnode_iplist = ();
330
331
my %iptonodemap    = ();
my $nsenode_id     = 0;
332
333
# lans that have simulated nodes
my %simnodelans    = ();
Shashi Guruprasad's avatar
Shashi Guruprasad committed
334

335
# Counters for generating IDs.
336
my $virtnode_id  = 0;
337
my $veth_id      = 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
338

339
340
######################################################################
# Step 1 - Setup virtual topology
341
#
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# Here we need to read the virtual topology in from the virt_nodes
# and virt_lans table.  We then need to add delay and lan nodes as
# necessary.
#
# Conversion details:
#   Let L be a LAN with N members.
#   If N == 2 
#      Let N1 be node 1
#      Let N2 be node 2
#      If L is delayed
#         Generate delay node D
#         Link N1 to D
#         Link N2 to D
#      Else
#         Link N1 to N2
#   Else
#      Generate lan node A
#      If L is delayed
#        Foreach node N in L
#           Generate delay node DN
#           Link A to DN
#           Link N to DN
#      Else
#        Foreach node N in L
#           Link N to A
#
# Delay node names:
#  delay nodes are named tbdelayXX N > 2
#   and tbsdelayXX for N == 2.
#
########################################################################
Chad Barb's avatar
   
Chad Barb committed
373

374
375
printdb "Generating TOP file.\n";
TBDebugTimeStamp("TOP started");
376

377
378
379
380
381
#
# Load phys info. Interface types, node types, etc. Its any physical stuff
# we need.
#
LoadPhysInfo();
382

383
384
385
386
#
# Load the Experiment info and virt topology.
#
LoadExperiment();
387

388
389
390
391
392
393
#
# If updating, load current experiment resources. We have to be careful
# of how this is merged in with the (new) desired topology. See below.
#
LoadCurrent()
    if ($updating);
Chad Barb's avatar
   
Chad Barb committed
394

395
396
397
398
#
# Check Max Concurrent for OSID violations.
#
CheckMaxConcurrent();
399

400
401
402
403
#
# Create the TOP file.
#
CreateTopFile();
404

405
406
407
408
409
410
TBDebugTimeStamp("TOP finished");

# Stop here ...
if ($toponly) {
    print "Stopping after creating the TOP file, as directed.\n";
    exit(0);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
411
}
412

413

414
415
416
417
418
419
420
421
422
######################################################################
# Step 2 - Assign Loop
# 
# Here we loop up to maxrun times.  In each loop we snapshot the
# current testbed state into a ptop file.  We then run assign.  If
# assign succeeds we attempt to reserve the resources.  If that works
# we're done with step 2 otherwise we loop again.
#
#######################################################################
Leigh B. Stoller's avatar
Leigh B. Stoller committed
423

424
425
my $currentrun = 1;
my $canceled   = 0;
426
427

# XXX plab hack - only run assign once on plab topologies, since they're easy
428
# to map and the physical topology does not change frequently.
429
430
431
if ($plabcount && (keys(%virt_nodes) == $plabcount)) {
    $maxrun = 1;
}
432

433
434
TBDebugTimeStamp("assign_loop started");
while (1) {
435
436
    # Check cancel flag before continuing. 
    TBGetBatchCancelFlag($pid, $eid, \$canceled);
437
438
    fatal($WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER,
	  "Cancel flag set; aborting assign run!")
439
440
	if ($canceled);

441
    print "Assign Run $currentrun\n";
442

443
444
445
446
447
448
449
450
451
452
453
454
455
    #
    # RunAssign returns 0 if successful. Returns -1 if there was an
    # an error in RunAssign that is deemed fatal; do not try anymore.
    # The exitcode is returned in $exitcode. If RunAssign returns a
    # postive value, it means there was a problem, but we can keep
    # trying.
    #
    my $exitcode = 0;
    my $retval   = RunAssign(\$exitcode);

    # Success!
    last
	if ($retval == 0);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
456

457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
    if ($retval < 0) {
	#
	# Failure in assign.
	#
	if ($currentrun >= $maxrun) {
	    fatal($exitcode,
		  "Reached run limit. Giving up.");
	}
	fatal($exitcode,
	      "Unretriable error. Giving up.");
    }
    print "Waiting 5 seconds and trying again...\n";
    sleep(5);
    $currentrun++;
}
TBDebugTimeStamp("assign_loop finished");

#
# Run assign once.
# 
sub RunAssign ($)
{
    my ($retval) = @_;

    # Clear globals for each run.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
482
483
    undef %v2pmap;
    undef %p2vmap;
484
    undef %v2vmap;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
485
    undef %plinks;
486
    undef %virtnodes;
487
488
489

    my %toreserve = ();
    my %subnodes  = ();
Shashi Guruprasad's avatar
Shashi Guruprasad committed
490
    
491
    TBDebugTimeStamp("ptopgen started");
492
    # Snapshot physical resources.
Chad Barb's avatar
   
Chad Barb committed
493
494
495
496
    #
    # if updating (-u), include any resources that may already be
    # allocated to experiment in the PTOP results.
    #
497
498
    my $ptopargs = "-p $pid ";
    $ptopargs   .= "-e $eid "
499
500
501
	if ($updating);
    $ptopargs   .= "-m $multiplex_factor "
	if (defined($multiplex_factor));
502
503
    $ptopargs   .= "-v "
	if ($virtcount);
504
505
    $ptopargs   .= "-r "
	if ($remotecount);
506
    system("ptopgen $ptopargs > $ptopfile");
Chad Barb's avatar
   
Chad Barb committed
507

508
    TBDebugTimeStamp("ptopgen finished");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
509
510

    # Get number of nodes
511
512
    my $numnodes_result = 
	DBQueryFatal("select a.node_id,a.type from" .
Leigh B. Stoller's avatar
Leigh B. Stoller committed
513
514
515
		     " nodes as a left join reserved as b" .
		     " on a.node_id=b.node_id" .
		     " where b.node_id is null" .
516
		     " and a.role='testnode' and a.type!='dnard'");
517
    $numnodes = $numnodes_result->numrows;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
518
519
    
    if ($numnodes < $minimum_nodes) {
520
521
	fatal($WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER,
	      "Insufficient nodes available.");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
522
523
    }

524
    TBDebugTimeStamp("assign started");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
525
    # Run assign
Leigh B. Stoller's avatar
Leigh B. Stoller committed
526
    my $cmdargs = "$ptopfile $topfile";
527
    $cmdargs = "-Pop $cmdargs"
528
	if ($virtcount);
529
    print "assign $cmdargs\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
530

531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
    #
    # Fork a child to run assign. Parent spins watching the cancel flag
    # and waiting for assign to finish.
    #
    if (my $childpid = fork()) {
	while (1) {
	    sleep(2);

	    if (waitpid($childpid, &WNOHANG) == $childpid) {
		$assignexitcode = $? >> 8;
		last;
	    }

	    # Check cancel flag.
	    TBGetBatchCancelFlag($pid, $eid, \$canceled);
	    if ($canceled) {
		if ((my $pgrp = getpgrp($childpid)) > 0) {
		    kill('TERM', -$pgrp);
		    waitpid($childpid, 0);
	    
		    fatal($WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER,
			  "Cancel flag set; aborting assign run!");
		}
		# Loop again to reap child above before exit.
	    }
	}
    }
    else {
	#
	# Change our session so the child can get a killpg without killing
	# the parent. 
	#
        POSIX::setsid();
	exec("assign $cmdargs > assign.log");
	die("*** $0:\n".
	    "    Could not start assign!\n");
    }
Chad Barb's avatar
Chad Barb committed
568

569
570
    # Check cancel flag before continuing. 
    TBGetBatchCancelFlag($pid, $eid, \$canceled);
571
572
    fatal($WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER, 
	  "Cancel flag set; aborting assign run!")
573
574
	if ($canceled);

575
576
577
578
579
580
581
582
    my $violations = 0;
    my $score = -1;

    open(ASSIGNFP, "assign.log") or
	fatal($WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER,
	      "Could not open assign logfile!");

    if ($assignexitcode == 0) {
Chad Barb's avatar
Chad Barb committed
583
584
585
586
587
588
589
590
591
592
593
594
595
	# read output
	# Header
	printdb "Reading assign results.\n";
	while (<ASSIGNFP>) {
	    chop;
	    /No physical nodes of type (.+)$/ && do {
		$score=-2;
		print $_ . "\n";
	    };
	    /^With ([0-9]+) violations$/ && do {
		$violations = $1;
		last;
	    };
596
597
598
599
	    /^[ \t]+BEST SCORE: [ \t]+([0-9]+(\.[0-9]+)?)/ && do {
		$score=$1;
		print $_ . "\n";
	    };
Chad Barb's avatar
Chad Barb committed
600
601
602
	}
	if ($score == -2) {
	    # Type error
603
604
	    fatal($WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER,
		  "Type error. Giving up!");
Chad Barb's avatar
Chad Barb committed
605
606
	}
	printdb "Found score $score, violations $violations.\n";
607
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
608

609
610
611
612
613
614
615
616
617
    #
    # If we found violations above, or if the score is <0 then figure
    # out what happened, and return the result. I am not sure if this
    # stuff is set when the assign exitcode is 2 (no retry), but we end up
    # in here anyway. Should be harmless.
    #
    if ($violations || $score < 0 || $assignexitcode) {
	# Compute the exitcode and return it. Caller decides what to do.
	my $exitcode = $WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER;
618

619
620
621
622
623
624
625
626
627
628
629
	# spit out up to nodes
	print "ASSIGN FAILED: \n";       
	while (<ASSIGNFP>) {
	    if (/link_users:\s*(\d+)$/) {
		$exitcode |= $WRAPPER_FAILED_LINKUSERS;
	    }
	    elsif (/bandwidth:\s*(\d+)$/) {
		$exitcode |= $WRAPPER_FAILED_BANDWIDTH;
	    }
	    elsif (/unassigned:\s*(\d+)$/) {
		$exitcode |= $WRAPPER_FAILED_UNASSIGNED;
630
	    }
631
632
633
634
635
636
637
638
	    elsif (/desires:\s*(\d+)$/) {
		$exitcode |= $WRAPPER_FAILED_DESIRES;
	    }
	    if (/^Nodes:/) {last;}
	    print "$_";
	}
	close(ASSIGNFP);
	$$retval = $exitcode;
Chad Barb's avatar
   
Chad Barb committed
639

640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
	# Signal retriable to caller, since assign thinks it is.
	return 1
	    if ($assignexitcode == 1);
	return -1;
    }

    #
    # Assign success; parse results.
    # 
    # read nodes section
    while (<ASSIGNFP> !~ /^Nodes:/) {}
    printdb "Nodes:\n";
    while (<ASSIGNFP>) {
	chop;
	/^End Nodes$/ && last;
	@info = split;
	my ($virtual,$physical) = @info[0,1];

	# We don't care about LAN nodes anymore.
	if (defined($lannodes{$virtual})) {
	    next;
	}

	if (physnodeallocated($physical)) {
	    #
	    # Mark node as being reused.
	    #
	    # Look at virtual node being mapped to node;
	    # if it wasn't in the previous map, mark node for reboot.
	    #
	    if (physnodereuse($physical) eq "reboot") {
		# No changes once it goes into reboot.
		;
	    }
	    elsif (virtnodeisvirt($virtual)) {
675
		#
676
677
678
679
680
681
682
683
684
685
686
687
688
689
		# A new virt virtual node on an existing physical node
		# does not force the physnode to be rebooted; we can
		# set up a new virtnode on it without a reboot. If its
		# an existing virtual on the same physnode, then mark
		# both as reused; no need to reboot either. If the 
		# virtnode has moved here from someplace else, no
		# reboot of the physnode either, but obviously the
		# vnode will be released and a new one allocated.  What
		# we cannot determine is if its just a renamed node
		# (which would require a reboot of the the virtual
		# node). 
		# 
		if (!exists($reserved_v2pmap{$virtual})) {
		    physnodesetreuse($physical, "reused");
Chad Barb's avatar
   
Chad Barb committed
690
		}
691
692
		elsif ($reserved_v2pmap{$virtual} eq $physical) {
		    my $reserved = $reserved_v2vmap{$virtual};
693

694
695
		    physnodesetreuse($reserved, "reused");
		    physnodesetreuse($physical, "reused");
696
697
		}
		else {
698
		    physnodesetreuse($physical, "reused");
699
700
701
		}
	    }
	    else {
Chad Barb's avatar
   
Chad Barb committed
702
		#
703
704
705
706
707
708
709
		# If a new virtual node mapped to this physnode (maybe
		# even the luser changed the name of the node), or if an
		# existing virtual node moved to this physnode, must
		# reboot the physnode. Else, the physnode is being
		# reused as is, and no need to mess with it. If the
		# user requested reboot, that will be handled outside
		# of this script.
710
		#
711
712
713
714
715
716
		if (!exists($reserved_v2pmap{$virtual}) ||
		    $reserved_v2pmap{$virtual} ne $physical) {
		    physnodesetreuse($physical, "reboot");
		}
		else {
		    physnodesetreuse($physical, "reused");
717
		}
718
	    }
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
	}
	else {
	    #
	    # This is a new node; we'll have to reserve it. Note that
	    # we do not reserve a widearea physnode when a virtual node
	    # is mapped to it; they are special.
	    #
	    $toreserve{$physical} = 1
		if (!virtnodeisremote($virtual));
	}
	
	if (virtnodeisvirt($virtual)) {
	    #
	    # If mapping a virtual node, then record that, since we need
	    # to allocate the virtnodes on that physnode, later.
	    #
	    if (!defined($virtnodes{$physical})) {
		$virtnodes{$physical} = [];
Shashi Guruprasad's avatar
Shashi Guruprasad committed
737
	    }
738
739
740
741
742
743
744
745
746
747
748
749
750
	    push(@{$virtnodes{$physical}}, $virtual);
	}
	elsif (virtnodeissubnode($virtual)) {
	    #
	    # Need to allocate the parent to. Should be optional?
	    # Save away and deal with once we have all the results.
	    #
	    $subnodes{$virtual} = $physical;
	}
	
	$v2pmap{$virtual} = $physical;
	if( ! defined($p2vmap{$physical}) ) {
	    $p2vmap{$physical} = [];
Leigh B. Stoller's avatar
Leigh B. Stoller committed
751
	}
752
753
754
	push(@{$p2vmap{$physical}}, $virtual);
	printdb "  $virtual $physical\n";
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
755

756
757
758
759
760
761
762
763
764
    #
    # Process the subnodes. We have to allocate the parent at the same
    # time, lest it get sucked away for some other purpose by another
    # experiment. We might want to push this off into nalloc, but not
    # sure yet.
    #
    for my $virtual (keys(%subnodes)) {
	my $physical = $subnodes{$virtual};
	my $parent;
765

766
	TBPhysNodeID($physical, \$parent);
767

768
	printdb "  Subnode: $virtual $physical $parent\n";
769

770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
	#
	# See if we already have it. Swapmod, retry, or perhaps
	# the parent could be named separately? Or maybe there are
	# several subnodes on the physnode?
	#
	next
	    if (exists($p2vmap{$parent}));

	# Make up a name and add to the list.
	my $newvname = newvname($parent, "phost");

	$v2pmap{$newvname} = $parent;
	$p2vmap{$parent} = [ $newvname ];
	$toreserve{$parent} = 1;
	printdb "  Adding subnode host: $newvname $parent\n";
    }

    # read Edges
    # By convention, in plinks, the delay node is always the second
    # entry.
    while (<ASSIGNFP> !~ /^Edges:/) { }
    printdb "Edges:\n";
    EDGEWHILE: while (<ASSIGNFP>) {
	/^End Edges$/ && last EDGEWHILE;
	@info = split;
	$line = $_;
	$_ = $info[1]; # type
        SWITCH1: {
	    /^intraswitch$/ && do {
		($vlink,$rawA,$rawB) = @info[0,3,5];
		last SWITCH1;
	    };
	    /^interswitch$/ && do {
		($vlink,$rawA,$rawB) = @info[0,3,$#info];
		last SWITCH1;
	    };
	    /^direct$/ && do {
		($vlink,$rawA,$rawB) = @info[0,3,5];
		last SWITCH1;
	    };
	    /^trivial$/ && do {
		# we don't have plinks for trivial links
		$vlink = $info[0];
		$plinks{$vlink} = [];
		next EDGEWHILE;
	    };
	    print "Found garbage: $line\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
817
	}
818
819
820
821
822
823
	$nodeportA = &getnodeport($rawA);
	$nodeportB = &getnodeport($rawB);
	$nodeportA =~ s/\//:/;
	$nodeportB =~ s/\//:/;
	$plinks{$vlink} = [$nodeportA,$nodeportB];
	printdb "  $vlink " . join(" ",@{$plinks{$vlink}}) . "\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
824
825
    }
    close(ASSIGNFP);
826
    TBDebugTimeStamp("assign finished");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
827
828

    # Reserve resources
829
830
831
832
    if ($impotent) {
	print "Skipping physical reservation, as directed.\n";
	return 0;
    }
Chad Barb's avatar
Chad Barb committed
833

834
835
836
837
    TBDebugTimeStamp("reserving started");
    system("nalloc -p $pid $eid " . join(" ", keys(%toreserve)));
    TBDebugTimeStamp("reserving finished");
    my $exitval  = $? >> 8;
Chad Barb's avatar
   
Chad Barb committed
838

839
840
841
842
843
844
845
846
    #
    # If nalloc failed with a fatal error, lets give it up. No retry.
    # 
    if ($exitval < 0) {
	print "Failed to reserve any nodes.\n";
	$$retval = $WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER;
	return -1; 
    }
Chad Barb's avatar
   
Chad Barb committed
847

848
849
850
851
852
853
854
    #
    # Okay, if nalloc got anything, we have to set the norecover bit,
    # since tbswap operates on the principle that any change in the DB
    # means no recover is possible. This can probably me dealt with by
    # deallocating any nodes we allocated in the wrapper before exiting.
    #
    $NoRecover = 1;
Chad Barb's avatar
   
Chad Barb committed
855

856
857
858
859
860
861
862
863
864
865
866
867
868
869
    #
    # Otherwise, all newly allocated nodes MUST go to the INIT_DIRTY
    # allocstate since the user now has control of them. If we eventually
    # fail, nodes not in RES_READY are deallocated (retry/modify). 
    #
    if ($exitval > 0) {
	my @reserved = ExpNodes($pid, $eid);

	# We got only some. Need to figure out which.
	print "Reserved some nodes ($exitval) we needed, but not all.\n";
	
	foreach my $node (@reserved) {
	    if (exists($toreserve{$node})) {
		TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_INIT_DIRTY());
Chad Barb's avatar
   
Chad Barb committed
870
	    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
871
872
	}

873
874
875
876
877
878
879
880
881
882
883
884
	#
	# We check to see if were were able to reserve all the fixed
	# nodes we needed. If we couldn't get the fixed list, then
	# this experiment is unlikely to map in the "near" future, so
	# give up now (no retry).
	#
	foreach my $node (values(%fixed_nodes)) {
	    if (! grep {$_ eq $node} @reserved) {
		printdb "  Could not allocate fixed node $node!\n";
		$$retval = $WRAPPER_FAILED;
		return -1;
	    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
885
	}
886
887
888
889
890
891
892
893
894
895
896
	#
	# Okay, must extend the fixed list with newly allocated nodes
	# so that we can recreate the top file, and try again with a
	# new set.
	#
	foreach my $node (@reserved) {
	    if (exists($toreserve{$node})) {
		foreach my $vname (@{$p2vmap{$node}}) {
		    $fixed_nodes{$vname} = $node;
		}
	    }
897
	}
898
899
900
901
	CreateTopFile();
	$$retval = 0;
	return 1;
    }
Chad Barb's avatar
Chad Barb committed
902

903
904
905
906
907
908
909
910
911
    #
    # We got all the nodes we wanted. All newly allocated nodes MUST
    # go to the INIT_DIRTY allocstate since the user now has control
    # of them.
    #
    print "Successfully reserved all physical nodes we needed.\n";
	
    foreach my $node (keys(%toreserve)) {
	TBSetNodeAllocState($node, TBDB_ALLOCSTATE_RES_INIT_DIRTY());
Leigh B. Stoller's avatar
Leigh B. Stoller committed
912
    }
Chad Barb's avatar
Chad Barb committed
913

914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
    #
    # Release phys and virt nodes no longer needed. They are marked
    # for teardown. They need to be freed by SOMEONE, currently the
    # wrapper (tbswap), since this only happens when in update mode
    # (swapmod).
    #
    foreach my $pnode (keys(%phys_nodes)) {
	my $reuse = physnodereuse($pnode);
		
	if ($reuse eq "unused") {
	    #
	    # Node was used in previous incarnation, but not any more.
	    #
	    TBSetNodeAllocState($pnode, TBDB_ALLOCSTATE_RES_TEARDOWN());
	}
	elsif ($reuse eq "reboot") {
	    #
	    # Node is being reused, but for a different purpose, so
	    # it should be rebooted.
	    #
	    TBSetNodeAllocState($pnode, TBDB_ALLOCSTATE_RES_INIT_DIRTY());
	}
    }
    return 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
938
}
939

940
941
942
943
944
945
946
947
948
949
###########################################################################
# Step 2A
#
# We run the wanassigner to allocate remote nodes. We do this after cause
# it takes so long. We run it just once.
#
# wanassign does its own nalloc.
#
###########################################################################

950
#
951
# VIRTNODES HACK: Allocate the remote virtual nodes.
952
#
953
if ($needwanassign) {
954
    my $success  = 0;
955
    my $wanargs  = ($impotent ? "-n" : "");
956

957
958
    print "Running 'wanassign -d $wanargs $pid $eid'\n";
    open(WANFP,"wanassign -d $wanargs $pid $eid 2>&1 | tee wanassign.log |") or
959
960
	fatal($WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER,
	      "Failed to start wanassign: $!");
961
962
963
964
965
966
967

    printdb "Reading wanassign results.\n";
    while (<WANFP>) {
	chop;
	if ($_ =~ /(\S+) mapsto (\S+)/) {
	    $v2vmap{$1} = $2;
	    printdb "  $1 $2\n";
968
	}
969
970
971
972
973
974
	if ($_ =~ /^Success/) {
	    $success = 1;
	}
	# Skip other output. Usually its debugging output.
    }
    close(WANFP) or
975
976
977
978
	fatal($WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER,
	      "wanassign: " .
	      ($? ? "exited with status: $?." :
	            "error closing pipe: $!"));
979

980
    if (!$success) {
981
982
	fatal($WRAPPER_FAILED|$WRAPPER_FAILED_CANRECOVER,
	      "wanassign could not find a solution!");
983
984
985
986
    }
    foreach my $virtual (keys(%v2vmap)) {
	my $physical = $v2vmap{$virtual};
	my $phys_nodeid;
987

988
	TBPhysNodeID($physical, \$phys_nodeid);
989
	    
990
991
992
993
994
	$v2pmap{$virtual} = $phys_nodeid;
	if ( !defined($p2vmap{$phys_nodeid})) {
	    $p2vmap{$phys_nodeid} = [];
	}
	push(@{$p2vmap{$phys_nodeid}}, $virtual);
995

996
997
	# Virtual nodes are always clean. Also prevents errors elsewhere.
	if (!$impotent) {
998
	    TBSetNodeAllocState($physical, TBDB_ALLOCSTATE_RES_INIT_CLEAN());
999
	}
1000
    }
1001
    TBDebugTimeStamp("wanassign finished");
1002
1003
}

Chad Barb's avatar
Chad Barb committed
1004
1005
#
# Recoverability ends.
1006
# All fatal() calls from this point do not have the recoverable '64' bit set.
Chad Barb's avatar
Chad Barb committed
1007
#
1008
$NoRecover = 1;
1009

1010
1011
1012
# VIRTNODES HACK: Local virtnodes have to be mapped now. This is a little
# hokey in that the virtnodes just need to be allocated from the pool that
# is on the real node. We know they are free, but we should go through
1013
# nalloc anyway. If anything fails, no point in retry.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1014
#
1015
1016
foreach my $pnode (keys(%virtnodes)) {
    my @vlist = @{$virtnodes{$pnode}};
1017
    my $numvs = @vlist;
1018
    my @plist = ();
1019
1020
    my @oplist = ();
    my @ovlist = ();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1021

1022
1023
    # Check cancel flag before continuing. 
    TBGetBatchCancelFlag($pid, $eid, \$canceled);
1024
1025
    fatal($WRAPPER_FAILED,
	  "Cancel flag set; aborting assign run!")
1026
1027
	if ($canceled);

Leigh B. Stoller's avatar
Leigh B. Stoller committed
1028
    #
1029
1030
1031
    # If updating, need to watch for nodes that are already reserved.
    # We save that info in oplist/ovlist, and build a new vlist for
    # avail, of just the nodes we need in this run. 
1032
    #
1033
1034
1035
1036
1037
    if ($updating) {
	my @newvlist = ();
	my @delvlist = ();
	
	foreach my $vnode (@vlist) {
1038
	    if (!defined($reserved_v2vmap{$vnode})) {
1039
1040
1041
1042
		# A new vnode on pnode to allocate.
		push(@newvlist, $vnode);
		next;
	    }
1043
	    if ($reserved_v2pmap{$vnode} ne $pnode) {
1044
		# A vnode moved. Its new to this pnode.
1045
		print "$vnode has moved from $reserved_v2pmap{$vnode} ".
1046
1047
1048
1049
1050
		    "to $pnode!\n";
		
		push(@newvlist, $vnode);
		next;
	    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1051

1052
	    # Push already allocated p/v onto lists for later.
1053
	    push(@oplist, $reserved_v2vmap{$vnode});
1054
1055
1056
1057
1058
	    push(@ovlist, $vnode);
	}
	# These are the new nodes we need to allocate
	@vlist = @newvlist;
	$numvs = scalar(@vlist);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1059

1060
1061
1062
1063
	if (@oplist) {
	    print "Reusing vnodes @oplist\n"; 
	}
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1064

1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
    #
    # Still need to allocate some virtnodes?
    #
    if ($numvs) {
	#
	# Run avail to get the list of virtnodes on the phys node. We
	# already know there are enough, since assign knows that.
	#
	print "Asking avail for $numvs vnodes on $pnode\n";
    
	open(AVAIL,"$TBROOT/sbin/avail virtonly=$pnode rand limit=$numvs |")
1076
	    or fatal($WRAPPER_FAILED, "avail failed");
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087

	while (<AVAIL>) {
	    next
		if (! /^\|/);
	    next
		if (/node_id/);

	    if ($_ =~ /^\|([-a-zA-Z0-9]+)\s*\|(\w+)\s*\|(\w+)\s*\|$/) {
		push(@plist, $1);
	    }
	    else {
1088
1089
		fatal($WRAPPER_FAILED,
		      "Bad line from avail: $_");
1090
	    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1091
	}
1092
1093
1094
1095
	close(AVAIL);

	# Sanity check.
	if (scalar(@vlist) != scalar(@plist)) {
1096
1097
	    fatal($WRAPPER_FAILED,
		  "Could not map some virtual nodes on $pnode");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1098
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1099

1100
1101
1102
1103
	#
	# Try to allocate. Note, if this fails we are done for. Okay for now
	# since it is never the case that it should fail!
	#
1104
1105
1106
1107
1108
1109
1110
	if ($impotent) {
	    print "Selected ($pnode) @plist\n";
	    print "Skipping physical reservation, as directed.\n";
	}
	else {
	    print "Reserving ($pnode) @plist ...\n";
	    if (system("nalloc $pid $eid @plist")) {
1111
1112
		fatal($WRAPPER_FAILED,
		      "Failed to reserve @plist (on $pnode)");
1113
	    }
1114
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1115
1116
    }

1117
1118
1119
1120
1121
1122
1123
    if ($updating) {
	#
	# Append the lists we created above, so that we get all of them
	# in the loop below.
	#
	@plist = (@plist, @oplist);
	@vlist = (@vlist, @ovlist);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1124
    }
1125
    
1126
1127
1128
    while (@plist) {
	my $physical = pop(@plist);
	my $virtual  = pop(@vlist);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1129

1130
1131
	$v2vmap{$virtual}  = $physical;
	printdb "  Mapping $virtual to $physical on $pnode\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1132

1133
1134
1135
1136
1137
	#
	# New virtual nodes are always clean. Old ones stay in whatever
	# state they were in so that os_setup/vnode_setup know they
	# need to reboot them.
	#
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
	if (!$impotent) {
	    if (!defined($reserved_v2vmap{$virtual})) {
		TBSetNodeAllocState($physical,
				    TBDB_ALLOCSTATE_RES_INIT_CLEAN());
	    }
	    elsif ($reserved_v2vmap{$virtual} ne $physical) {
		# Node has moved! Nuts!
		TBSetNodeAllocState($physical,
				    TBDB_ALLOCSTATE_RES_INIT_DIRTY());
	    }
1148
	}
1149
1150
    }
}
1151

1152
1153
# Check cancel flag before continuing. 
TBGetBatchCancelFlag($pid, $eid, \$canceled);
1154
1155
fatal($WRAPPER_FAILED,
      "Cancel flag set; aborting assign run!")
1156
1157
    if ($canceled);

1158
# Set port range (see below for how we deal with update).
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1159
TBExptSetPortRange();
1160

1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
# Load the current physical resources. This avoids lots of repeated
# queries to the DB.
LoadPhysResources();

#
# For update, wipe old interfaces in DB (normally done by nfree.)
# These will get rebuilt soon. 
#
if ($updating && !$impotent) {
    foreach my $pnode (keys(%phys_nodes)) {
	#
	# Do not need to do this for phys nodes that are to be
	# released, or for virtnodes since they do not have interfaces
	# associated with them directly. This is probably a bad assumption
	# though, and perhaps this entire function should be moved to the
	# library. 
	#
	next
	    if (physnodeisvirtnode($pnode) ||
		physnodereuse($pnode) eq "unused");

1182
	DBQueryFatal("update interfaces set IP='',IPaliases=NULL,mask=NULL " .
1183
1184
		     "where node_id='$pnode' and ".
		     "  role='" . TBDB_IFACEROLE_EXPERIMENT() . "'");
1185
1186
1187
1188
1189
1190

	# Clean the veth_interfaces table for this node too.
	DBQueryFatal("delete from veth_interfaces where node_id='$pnode'");
    }
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
######################################################################
# Step 3 - Convert to vlans, delays, and portmap
# 
# Here we convert the plinks into vlans, delays, and portmap.  We
# convert them first into internal datastructure.  After Step 4
# when we do some port swapping we'll upload the modified versions
# of these structures into the database.
#
# delays is indexed by an internal ID and contains:
#  [pnode, int0, int1, vname, delay, bandwidth, lossrate]
# portmap is indexed by <virtual node>:<virtual port> and contains
#  the physical port.
#
# vlan ids
#  vlan ids are increasing integers in the case of node<->delay connections.
#  In the case of actual LANs either of real node or of delay nodes
#  they are indexed by virtual lan name.
# delay ids
#  delay ids are increasing integers.  We could have used a list of
# delays just as well.  Having it as an array may prove useful for
# future changes however.
######################################################################

$delayid = 0;

printdb "Interpreting results.\n";
1217
TBDebugTimeStamp("interpreting started");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1218
foreach $plink (keys(%plinks)) {
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
    # trivial links do not have physical links, so no delay nodes. But,
    # we *do* use trivial links for intranode links, and thus there could
    # be link delays (ie: two jailed nodes on a link/lan assigned to the
    # same phys node).
    my $trivial = 0;

    if (scalar(@{$plinks{$plink}})) {
	($nodeportA,$nodeportB) = @{$plinks{$plink}};
	($nodeA,$portA) = split(":", $nodeportA);
	($nodeB,$portB) = split(":", $nodeportB);
	printdb "plink $plink - $nodeportA $nodeportB\n";
    }
    else {
	$trivial = 1;
	printdb "plink $plink - trivial\n";
Shashi Guruprasad's avatar
Shashi Guruprasad committed
1234
    }
1235

1236
    if (($lan,$virtA,$virtC) =
1237
	   ($plink =~ m|^linksdelaysrc/(.+)/(.+),(.+)$|)) {
1238
1239
1240
1241
	# trivial links do not have physical links, so no delay nodes.
	if ($trivial) {
	    next;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1242
1243
1244
	# Node has a single entry in lan.
	# Node is nodeportA
	# Delay node is nodeportB
1245
	# Other end of delay node will be given by corresponding plink
1246
1247
	#   linksdelaydst/lan/virtC,virtA where nodeportA will be the other 
	#   node in the virtual LAN and nodeportB will be the other end of the
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1248
	#   delay node.
1249
1250
	($nodeportC,$nodeportD) =
	    @{$plinks{"linksdelaydst/$lan/$virtC,$virtA"}};
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1251
1252
1253
1254
1255
1256
1257
	($nodeC,$portC) = split(":",$nodeportC);
	($nodeD,$portD) = split(":",$nodeportD);
	printdb "LINK delay: other end = $nodeportC $nodeportD\n";

	# assert nodeB == nodeD

	printdb "  VLANS:\n";
1258
1259
	AddVlan("link", "$lan" . "-delaysrc", $nodeportA, $nodeportB);
	AddVlan("link", "$lan" . "-delaydst", $nodeportC, $nodeportD);
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
	
	my ($member0,$delay,$bandwidth,$lossrate,
	    $member1,$rdelay,$rbandwidth,$rlossrate) = @{$delaylinks{$plink}};
	
	$nodedelays{$delayid++} = [$nodeB,$portB,$portD,$lan,
			       $member0,$delay,$bandwidth,$lossrate,
			       $member1,$rdelay,$rbandwidth,$rlossrate];
	printdb "  Delay: \[$nodeB,$portB,$portD,$lan," .
	    "$delay,$bandwidth,$lossrate,$rdelay,$rbandwidth," .
	    "$rlossrate,$nodeportA,$nodeportC\]\n";

	#
	# Setup portmap using virt members in plink name.
	#
	$portmap{$virtA} = $portA;
	$portmap{$virtC} = $portC;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1276
	printdb "  Portmap:\n";
1277
1278
1279
1280
	printdb "    $virtA = $portA\n";
	printdb "    $virtC = $portC\n";
    }
    elsif (($lan,$virtA) = ($plink =~ m|^linkdelaysrc/([^/]+)/(.+)$|)) {
1281
1282
1283
1284
	# trivial links do not have physical links, so no delay nodes.
	if ($trivial) {
	    next;
	}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1285
1286
	# Node may have multiple entries in lan.
	# Delay node is nodeB and portB.
1287
	# Other end of delay node will be given by corresponding plink
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1288
1289
	#  linkdelaydst/lan/node where nodeportA will the LAN node and
	#  nodeportB will be the other end of the delay node.
1290
1291
	
	($nodeportC,$nodeportD) = @{$plinks{"linkdelaydst/$lan/$virtA"}};
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1292
1293
1294
	($nodeC,$portC) = split(":",$nodeportC);
	($nodeD,$portD) = split(":",$nodeportD);
	printdb "LAN delay src: other end = $nodeportC $nodeportD\n";
1295

Leigh B. Stoller's avatar
Leigh B. Stoller committed
1296
	printdb "  VLANS:\n";
1297
	AddVlan("link", $lan . "-delay" . $nodeA, $nodeportA, $nodeportB);
1298
	AddVlan("lan",  $lan, $nodeportD);
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1299
	
1300
1301
1302
1303
1304
1305
1306
	my ($member0,$delay,$bandwidth,$loss,
	    $member1,$rdelay,$rbandwidth,$rloss) = @{$delaylinks{$plink}};
	
	$nodedelays{$delayid++} = [$nodeB,$portB,$portD,$lan,
				   $member0,$delay,$bandwidth,$loss,
				   $member1,$rdelay,$rbandwidth,$rloss];
	printdb "  Delays: \[$nodeB,$portB,$portD,$lan," .
Christopher Alfeld's avatar
Christopher Alfeld committed
1307
	    "$delay,$bandwidth,$loss,$rdelay,$rbandwidth,$rloss," .
1308
	    "$nodeportA,$nodeportC\]\n";
Shashi Guruprasad's avatar
Shashi Guruprasad committed
1309

1310
	$portmap{$virtA} = $portA;
Shashi Guruprasad's avatar
Shashi Guruprasad committed
1311
	printdb "  Portmap:\n";
1312
1313
1314
1315
1316
1317
1318
1319
	printdb "    $virtA = $portA\n";
    }
    elsif (($lan,$virtA,$virtB) = ($plink =~ m|^linksimple/(.+)/(.+),(.+)$|)) {
	#
	# nodeportA and nodeportB are the only two nodes in the LAN.
	# If the link is delayed, its with endpoint delays, not a delay node.
	#
	printdb "  Link:";
1320
1321
1322
1323
1324
1325
1326
1327

	#
	# trivial links do not have physical links, but could be using
	# virtual interfaces on the same node. 
	#
	if (! $trivial) {
	    AddVlan("link", $lan, $nodeportA, $nodeportB);

1328
	    if (virtlanuseveth($lan)) {
1329
1330
1331
		#
		# Create some new veth devices.
		# 
1332
1333
		$portA = NewVethIface($lan, $virtA, $nodeA, $portA);
		$portB = NewVethIface($lan, $virtB, $nodeB, $portB);
1334
1335
1336
1337
1338
1339
1340
	    }
	}
	else {
	    # No phys mapping. We create a veth, but there is no phys mapping
	    # for the port.
	    $nodeA = $v2pmap{(split(":", $virtA))[0]};
	    $nodeB = $v2pmap{(split(":", $virtB))[0]};
1341
1342
	    $portA = NewVethIface($lan, $virtA, $nodeA);
	    $portB = NewVethIface($lan, $virtB, $nodeB);
1343
1344
1345
1346
1347
1348
1349
1350
1351
	}
	#
	# Setup portmap using virt members in plink name.
	#
	$portmap{$virtA} = $portA;
	$portmap{$virtB} = $portB;
	printdb "  Portmap:\n";
	printdb "    $virtA = $portA\n";
	printdb "    $virtB = $portB\n";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1352
	
1353
1354
1355
1356
	if ($delaylinks{$plink}) {
	    my ($member0,$delay,$bandwidth,$loss,
		$member1,$rdelay,$rbandwidth,$rloss) =
		    @{$delaylinks{$plink}};
Shashi Guruprasad's avatar
Shashi Guruprasad committed
1357

1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
	    #
	    # Two entries, one for each side of the duplex link.
	    #
	    $linkdelays{$delayid++} = [$nodeA,$portA,$lan,$member0,
				       $delay,$bandwidth,$loss,
				       undef,undef,undef,0];
	    
	    $linkdelays{$delayid++} = [$nodeB,$portB,$lan,$member1,
				       $rdelay,$rbandwidth,$rloss,
				       undef,undef,undef,0];
	    
	    printdb "  LinkDelay: \[$nodeA,$portA,$nodeB,$portB," .
		"$lan,$delay,$bandwidth,$loss,$rdelay,$rbandwidth,$rloss\]\n";
Shashi Guruprasad's avatar
Shashi Guruprasad committed
1371
	}
1372
1373
    }
    elsif (($lan,$virtA) = ($plink =~ m|^linklan/([^/]+)/(.+)$|)) {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
1374
1375
1376
1377
	# node may be the LAN multiple times.
	# nodeportA is the node.
	# nodeportB is the LAN
	# No delays
1378
	printdb "  LAN:";
1379
1380
1381
1382
1383
1384
1385
1386

	#
	# trivial links do not have physical links, but could be using
	# virtual interfaces on the same node. 
	#
	if (! $trivial) {
	    AddVlan("lan", $lan, $nodeportA);

1387