linktest.pl.in 34.5 KB
Newer Older
1
#!/usr/bin/perl -w -T
2
#
3
# EMULAB-COPYRIGHT
Mike Hibler's avatar
Mike Hibler committed
4
# Copyright (c) 2000-2005 University of Utah and the Flux Group.
5
# All rights reserved.
6
#
7

8
9
use strict;
use Class::Struct;
10
11
use POSIX qw(uname);
use IO::Handle;
12
13
14
15
16
17
18
19
20
21
22
23
24
25
use English;

#
# Linktest test script. This script is set up to run as root on
# experiment nodes. It is invoked by the Linktest daemon after the
# daemon receives a Linktest "START" event. The script runs tests using
# ping, rude/crude (a real-time packet emitter/collector) and pathrate
# to test all links in the experiment. If the results show a 99% chance
# that the experiment is configured incorrectly, an error is logged to
# the experiment directory in tbdata/linktest.  Valid ranges have been
# calibrated ahead of time.
#
sub usage() 
{
Timothy Stack's avatar
   
Timothy Stack committed
26
27
28
29
    print("Usage: linktest.pl\n".
	  " [STARTAT=<test step, 1-4>]\n".
	  " [STOPAT=<test step, 1-4>]\n".
	  " [DEBUG=<debugging level. 1=on, 0=off>]\n");
30
    exit(0);
31
32
}

33
34
35
36
37





38
39
40
##############################################################################
# Constants
##############################################################################
41

42
# path to applications and files
43
use constant LINKTEST_NSPATH => "@LINKTEST_NSPATH@";
44
45
46
47
48
49
50
51
use constant PATH_NICKNAME => "@CLIENT_VARDIR@/boot/nickname";
use constant PATH_KEYFILE => "@CLIENT_VARDIR@/boot/eventkey";
use constant PATH_RUDE => "/usr/local/bin/rude";
use constant PATH_CRUDE => "/usr/local/bin/crude";
use constant PATH_PATHRATE_SND => "/usr/local/bin/pathrate_snd";
use constant PATH_PATHRATE_RCV => "/usr/local/bin/pathrate_rcv";
use constant PATH_EMULAB_SYNC => "@CLIENT_BINDIR@/emulab-sync";
use constant PATH_LTEVENT => "@CLIENT_BINDIR@/ltevent";
Timothy Stack's avatar
   
Timothy Stack committed
52
use constant PATH_TEVC => "@CLIENT_BINDIR@/tevc";
53
use constant RUN_PATH => "@CLIENT_BINDIR@"; # where the linktest-ns runs.
54
use constant EVENTSERVER => "@EVENTSERVER@";
55

56
57
# log files used by tests.
use constant CRUDE_DAT => "/tmp/crude.dat"; # binary data
58
59
use constant RUDE_CFG  => "/tmp/rude.cfg";
use constant PATHRATE_DAT => "/tmp/pathrate.dat";
60

61
# pathrate test limits.
62
use constant LIMIT_BW_HI => 45000000;
63
64
65
use constant LIMIT_BW_LO =>  1000000;
use constant LIMIT_BW_LOSS => 0;

66
67
# Make sure that we dont get bogged down in being too accurate! 
# Make sure the error is a certain significance before we start reporting it.
68
69
70
use constant INSIGNIFICANT_LAT_ERROR_LO => 0.50;  # ms
use constant INSIGNIFICANT_LAT_ERROR_HI => 3.00;  # ms
use constant INSIGNIFICANT_BW_ERROR     => 1.00;  # mb
71
72
73

# latency must be corrected for xmit delay under this speed.
use constant LAT_LOW_BW => 10000000;
74

75
76
77
78
# slow send rate (for bw 256kbps to 1Mbps)
use constant SLOW_BW => 256000;
use constant SLOW_SEND => 400;
use constant FAST_SEND => 1002;
79

80
# misc contstants
81
use constant BSD => "FreeBSD";
82
use constant LINUX => "Linux";
83
use constant RTPROTO_STATIC => "Static";
Timothy Stack's avatar
   
Timothy Stack committed
84
use constant EVENT_COMPLETE => "COMPLETE";
85
use constant EVENT_REPORT => "REPORT";
Timothy Stack's avatar
   
Timothy Stack committed
86
use constant EVENT_LOG => "LOG";
87
use constant PING_SEND_COUNT => 10;
David Anderson's avatar
David Anderson committed
88
use constant SYNC_NAMESPACE => "linktest";
89
90

# test levels
91
92
93
94
use constant TEST_LATENCY => 1; # direct link connectivity & latency
use constant TEST_RT_STATIC => 2;   # prior plus static routing
use constant TEST_LOSS => 3;   # prior plus loss
use constant TEST_BW => 4; # prior plus bandwidth
95

96
# test names
97
98
99
100
use constant NAME_RT_STATIC => "Static Routing";
use constant NAME_LATENCY => "Latency (Round Trip)";
use constant NAME_LOSS => "Loss";
use constant NAME_BW => "Bandwidth";
101

102
# error suffix for logs
103
use constant SUFFIX_ERROR => ".error";
104
use constant DEBUG_ALL => 2; # debug level for all debug info, not just msgs.
105

106
107
# exit codes
use constant EXIT_ABORTED => -1;
Timothy Stack's avatar
   
Timothy Stack committed
108
use constant EXIT_NOT_OK => 1;
109
110
use constant EXIT_OK => 0;

111
112
113
114
115
116
117

# struct for representing a link.
struct ( edge => {
    src => '$',
    dst => '$',
    bw  => '$',
    delay => '$',
118
119
    loss => '$'});

Timothy Stack's avatar
   
Timothy Stack committed
120
121
122
123
124
struct ( host => {
    name => '$',
    visited => '$',
    links => '@'});

125
# fixes emacs colorization woes introduced by above struct definition.
Timothy Stack's avatar
   
Timothy Stack committed
126
# struct ( unused => { foo => '$'});
127

128
129
130
use constant TRUE => 1;
use constant FALSE => 0;

131
132
133
134
##############################################################################
# Globals
##############################################################################

135
my $topology_file;    # location of the topology input file.
136
my $synserv;    # synch server node
137
my $rtproto;    # routing protocol
138
my $hostname;   # this hosts name
139
140
141
142
my $exp_id;     # experiment id
my $proj_id;    # project id
my $gid;        # group id
my $platform;   # name of platform
143
144
145
146
my $startat=1;  # which test to start at
my $stopat=99;  # which test to stop at
my @kill_list;  # PIDs maintained through the life of linktest
                    # which get killed as part of cleanup.
147
148
149
my $debug_level = 0; # enable debug statements
                    # 1 = print debug statements.
                    # 2 = show STDOUT and STDERR
150
my $barr_count;   # used by synserv host, nubmer of hosts -1
151
my $log_file;    # common logfile for information saved over time.
152
153

my @hosts; # hosts: list of text strings containing host names.
154
           # sorted alphabetically
Timothy Stack's avatar
   
Timothy Stack committed
155
my %hostmap;
156
my @links; # links: list of edge structs.
157
           # sorted alphabetically by src . dst
158
                                  
159
160
my $expt_path;  # experiment path (ie, tbdata) set by init.
my $linktest_path;   # log path (ie tbdata/linktest) set by init.
Timothy Stack's avatar
   
Timothy Stack committed
161
162
163
164
165
166
167
168
my $simname = "ns";
my $swapper = "";
my $swapperid = 0;
my $swappergid = 0;
my $token = -1;
my $error_count = 0;
my $stage_error_count = 0;
my $total_error_count = 0;
169
170
171
172

##############################################################################
# Main control
##############################################################################
173

174
175
176
177
178
179
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

$| = 1; #Turn off line buffering on output

180
181
182
# Make sure that files written into the experiment subdir are group writable.
umask(0002);

183
184
185
186
187
188
189
190
191
192
193
194
195
196
#
# Parse command arguments. Since Linktest is run via the event system,
# parse out pairs of <symbol>=<value>.
#
foreach my $arg (@ARGV) {
    if($arg =~ /STOPAT=(\d)/) {
	$stopat=$1;
    }
    if($arg =~ /STARTAT=(\d)/) {
	$startat=$1;
    }
    if($arg =~ /DEBUG=(\d)/) {
	$debug_level=$1;
    }
Timothy Stack's avatar
   
Timothy Stack committed
197
198
199
200
201
202
203
    if($arg =~ /TOKEN=(\d+)/) {
	$token=$1;
    }
    if($arg =~ /SWAPPER=(\w+)/) {
	$swapper=$1;
	(undef,undef,$swapperid,$swappergid) = getpwnam($swapper);
    }
204
205
206
207
208
209
210
211
212
213
214
215
216
}

#
# Parse the nickname file to obtain the host name, 
# experiment ID and the project ID.
#
my $fname = PATH_NICKNAME;
die("Could not locate $fname\n") unless -e $fname;
my @results = &read_file($fname);
($hostname, $exp_id, $proj_id) = split /\./, $results[0];
chomp $hostname;
chomp $exp_id;
chomp $proj_id;
Mike Hibler's avatar
Mike Hibler committed
217
218
219
220
221
222
223
224

# taint check pid/eid
if ($proj_id =~ /([-\w]*)/) {
    $proj_id = $1;
}
if ($exp_id =~ /([-\w]*)/) {
    $exp_id = $1;
}
225
226
227
228
229
230
231
232
$gid = $proj_id;

#
# Set path variables storing the experiment logging path,
# the current ns file and the output file for topology info.
#
$expt_path = "/proj/$proj_id/exp/$exp_id/tbdata";
$linktest_path = "$expt_path/linktest";
Timothy Stack's avatar
   
Timothy Stack committed
233
$topology_file = "@CLIENT_VARDIR@/boot/ltmap";
234
235
236
237
238
239
240
241
242
243
244
245
246
247

#
# Determine location of the customized ns binary for Linktest.
#
($platform) = POSIX::uname();

#
# Parse the syncserver file to find out which node is the sync server.
#
my $ssname = "@CLIENT_VARDIR@/boot/syncserver";
die("Could not locate an emulab-sync server\n") unless -e $ssname;
@results = &read_file($ssname);
($synserv) = split/\./, $results[0];
chomp $synserv;
248

249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#
# If the current node is the special node (arbitrarily the sync
# server node), do some housekeeping and run ns to generate
# the topology input file, which is read by all nodes to obtain
# the experiment topology.
#
if(&is_special_node()) {

    #
    # If the shared path used by Linktest for logging and temporary
    # files already exists, clear its contents for this run.
    #
    if( -e $linktest_path ) {
	die("Path $linktest_path is not a directory\n") 
	    unless -d $linktest_path;

	opendir (DIR,$linktest_path)
	    ||  die("Could not open $linktest_path: $!");
	my @dirfiles = grep (/error$/,readdir(DIR));
	foreach (@dirfiles) {
	    &do_unlink("$linktest_path/$_");
	}
	closedir(DIR);

    } else {
	# 
	# The shared path does not exist, create it.
	#
	mkdir (&check_filename($linktest_path),0777) 
	    || die("Could not create directory $linktest_path: $!");
Timothy Stack's avatar
   
Timothy Stack committed
279
	chown($swapperid, $swappergid, $linktest_path);
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
    }

    #
    # Call get_topo before the other nodes because
    # get_topo is used to initialize @hosts.
    #
    &get_topo($topology_file);

    #
    # Caculate the barrier count using the results of the NS script.
    # Note: This could be gotten from scalar(@hosts), but that
    # requires a successful parse of the NS file, which may fail.
    #
    $barr_count = @hosts;
    $barr_count--;
    
} # end of chores for the special node.

#
# All nodes remove local temporary files remaining from the last
# run, if any.
# 
&do_unlink(CRUDE_DAT);
&do_unlink(PATHRATE_DAT);
&do_unlink(RUDE_CFG);

#
# Start up child processes for receiving RUDE and Pathrate streams.
# These run over the lifetime of Linktest to reduce the number of
# barrier synchronizations and startup delays. Always give the 
# collectors a moment to start up.
#
if(&dotest(TEST_BW)){
    &my_system_initonly(PATH_PATHRATE_SND,"-i","-q");
    sleep(1);
}
if(&dotest(TEST_LOSS)){
    &my_system_initonly(PATH_CRUDE,"-l",CRUDE_DAT);
    sleep(1);
}

#
# Finally, synchronize so that all nodes have completed startup
# tasks. When all nodes reach this barrier, the topology input file
# has been written, local variables are initialized, background
# listeners have been started, and temporary files have been cleared.
#
&barrier;

#
# Beginning of the tests. First, all nodes read the topology.
# Since this can occur at the same time after the barrier,
# have a short randomized delay to avoid hitting NFS too hard.
#
sleep(int(rand(5)));
&get_topo($topology_file) unless &is_special_node();
&debug_top();
337

Timothy Stack's avatar
   
Timothy Stack committed
338
339
340
341
my $msg = "Linktest Starting";
&sim_event(EVENT_LOG,$msg);
&debug("\n$msg\n\n");

342
if(&dotest(TEST_LATENCY)) {
343
344
    my $msg = "Testing Single Hop Connectivity and Latency...";
    &post_event(EVENT_REPORT,$msg);
Timothy Stack's avatar
   
Timothy Stack committed
345
    &sim_event(EVENT_LOG,$msg);
346
    &debug("\n$msg\n\n");
347
    &latency_test;
Timothy Stack's avatar
   
Timothy Stack committed
348
    &report_status;
349
350
}

351
352
353
if(&dotest(TEST_RT_STATIC)
    && defined($rtproto)
    && $rtproto eq RTPROTO_STATIC) {
354
355
    my $msg = "Testing Static Routing...";
    &post_event(EVENT_REPORT,$msg);
Timothy Stack's avatar
   
Timothy Stack committed
356
    &sim_event(EVENT_LOG,$msg);
357
    &debug("\n$msg\n\n");
358
    &static_rt_test; # nodes not covered by 1hop test
Timothy Stack's avatar
   
Timothy Stack committed
359
    &report_status;
360
}
361

362
if(&dotest(TEST_LOSS)) {
363
364
    my $msg = "Testing Loss...";
    &post_event(EVENT_REPORT,$msg);
Timothy Stack's avatar
   
Timothy Stack committed
365
    &sim_event(EVENT_LOG,$msg);
366
    &debug("\n$msg\n\n");
367
    &loss_test; 
Timothy Stack's avatar
   
Timothy Stack committed
368
    &report_status;
369
}
370

371
if(&dotest(TEST_BW)){
372
373
    my $msg = "Testing Bandwidth...";
    &post_event(EVENT_REPORT,$msg);
Timothy Stack's avatar
   
Timothy Stack committed
374
    &sim_event(EVENT_LOG,$msg);
375
    &debug("\n$msg\n\n");
376
    &bw_test;
Timothy Stack's avatar
   
Timothy Stack committed
377
    &report_status;
378
}
379

380
381
&cleanup;

382
383
&barrier();

Timothy Stack's avatar
   
Timothy Stack committed
384
385
386
387
$msg = "Linktest Done";
&sim_event(EVENT_LOG,$msg);
&debug("\n$msg\n\n");

388
389
390
#
# Send an event indicating that Linktest has completed normally.
#
Timothy Stack's avatar
   
Timothy Stack committed
391
&post_event(EVENT_COMPLETE,"ERROR=$total_error_count CTOKEN=$token");
392

393
exit(EXIT_OK);
394

395

396
##############################################################################
397
# Loss Test Functions
398
399
##############################################################################

400
# Writes the configuration file used by RUDE.
401
402
sub write_rude_cfg {
    my ($stream_id, $edge) = @_;
403
404
405
406
407
408
409
410
411
412
    my @contents;
    push @contents, "START NOW\n";
    push @contents, "0000 $stream_id ON 3001 " 
	. $edge->dst 
	    . ":10001 CONSTANT " 
		. &get_loss_sample_size($edge) 
		    . " 20\n";
    push @contents, "1000 $stream_id OFF\n";

    &write_file(RUDE_CFG, @contents);
413
414
415

}

416
417
# Returns the sample size used by the Loss test.
# TODO: why this number? (from my ProbStats book.)
418
419
420
421
422
423
424
425
426
sub get_loss_sample_size {
    my $edge = shift @_;
    if($edge->loss > 0) {
	return &round( 10 / $edge->loss);
    } else {
	return SLOW_SEND; # just in case a slow link with no loss.
    }
}

427
428
429
430


# returns TRUE if the link loss is valid for the linktest loss test.
sub valid_loss {
431
    my $edge = shift @_;
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
    if($edge->bw >= SLOW_BW && $edge->bw < LIMIT_BW_LO) {
	if(&get_loss_sample_size($edge) > SLOW_SEND) {
	    return FALSE;
	} else {
	    return TRUE;
	}
    } elsif( $edge->bw >= LIMIT_BW_LO) {
	# also want an upper limit.
	if(&get_loss_sample_size($edge) > FAST_SEND) {
	    return FALSE;
	} else {
	    return TRUE;
	}
    } else {
	return FALSE;
    }
448
}
449
450
451
452


# This test uses RUDE and CRUDE to send a stream of packets
# in both directions over a duplex link.
453
454
455
456
457
458
459
460
461
462
463
sub loss_test {
    my %analyze;
    my %recv_cnt;
    my $stream_id = 1;
    my @edge_copy = @links;
    while(&has_elems(\@edge_copy)) {
	my ($edge,$other_edge) = &get_twoway_assign(\@edge_copy);
	if(defined($edge) && defined($other_edge)) {
	    if($hostname eq $edge->src) {
		if(valid_loss($edge)) {
		    &write_rude_cfg($stream_id,$edge);
464
		    &my_system(PATH_RUDE,"-s", RUDE_CFG);
465
466
467
468
469
470
471
		    $analyze{$stream_id} = $other_edge;
		} else {
		    &debug("Skipping loss test for " . &print_link($edge) . "\n");
		}
	    } elsif ($hostname eq $other_edge->src) {
		if(valid_loss($other_edge)) {
		    &write_rude_cfg($stream_id,$other_edge);
472
		    &my_system(PATH_RUDE,"-s", RUDE_CFG);
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
		    $analyze{$stream_id} = $edge;
		} else {
		    &debug("Skipping loss test for " . &print_link($other_edge) . "\n");
		}
	    }
	}
	$stream_id++;
	&barrier();
    }

    # wait for any stragglers due to delay-- there is a  race
    # between the barrier sync on the control net and the expt net latency.
    sleep(1);

    # count packets received for each stream.
488
489
490
491
    my @results = &my_tick(PATH_CRUDE,"-d",CRUDE_DAT);
    my $result_count = @results;
    &debug("result_count from crude: $result_count\n");
    foreach (@results) {
492
493
494
495
496
	if(/ID=(\d+) /) {
	    $recv_cnt{$1}++;
	}
    }

497
    # analyze only links for which a stream was received.
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
    foreach my $key (keys %analyze) {
	my $edge = $analyze{$key};
	my $received = $recv_cnt{$key};

	if(!defined($received)) {
	    $received=0;
	    &error (NAME_LOSS,$edge,"No packets received from " . $edge->src);
	} else {
	    # this is a large sample test about proportion p.
	    # this is considered a valid statistical estimate for np >= 10.
	    my $p = 1 - $edge->loss;
	    my $n = &get_loss_sample_size($edge) + 1 ;


	    my $p_hat = $received / $n;
	    my $numerator = $p_hat - $p;
514
	    my $denominator = sqrt( abs( $p * (1 - $p_hat) / $n) );
515
516
517
518

	    if( $edge->loss == 0) {
		if($received < $n) {

519
520
521
		    ####
		    ## TODO:

522
523
524
		    
		    my $errmsg = "Unexpected loss occurred (n=$n, received=$received)\n";
		    &error(NAME_LOSS, $edge, $errmsg);
525
		} # note, no logging of succesful 0-loss. (too common).
526
527
528
529
	    } elsif($denominator == 0) {
		my $errmsg = "No packets were lost (n=$n, plr=" . $edge->loss .")";
		&error(NAME_LOSS, $edge, $errmsg);
	    } else {
530

531
532
533
534
		
		my $z = $numerator / $denominator;
		my $reject_region = 2.58; # alpha = 0.1, normal distro by CLT
		if(abs($z) > $reject_region) {
535
		    my $errmsg = "Probable loss misconfiguration (n:$n, received: $received; expected proportion: $p, measured proportion=$p_hat)";
536
537
538
539
540
541
542
543
		    &error(NAME_LOSS, $edge, $errmsg);
		}
	    }
	}

    }


544
    # wait for completion before next test.
545
546
547
    &barrier();
}

548
549
550
##############################################################################
# Latency Test Functions
##############################################################################
551

552
553
554
# returns whether the link latency is in a valid test range.
sub valid_latency {
    return TRUE;
555

556
}
557

558
559
560
561
562
563
564
565
566
# Pings a node and returns information.
# @param[0] := host to ping
# @param[1] := ttl, 0 for default
# @return: (received_count, avg_latency ms)
sub ping_node {
    my ($host,$ttl) = @_;
    my $count = 0;
    my $avg_latency = 0;
    my $stddev = 0;
567

568
569
570
    my $send_count = PING_SEND_COUNT;
    my $timeout = 1; # 1 second
    my $send_rate = $timeout / $send_count;
571

572
573
574
575
576
577
578
579
580
581
582
583
584
    # set deadline to prevent long waits
    my $cmd;
    if($ttl) {
	if($platform eq BSD) {
	    $cmd = "/sbin/ping -c $send_count -q -i $send_rate -t $timeout -m $ttl $host";
	} elsif($platform eq LINUX) {
	    $cmd = "/bin/ping -c $send_count -q -i $send_rate -w $timeout -t $ttl $host";
	}
    } else {
	if($platform eq BSD) {
	    $cmd = "/sbin/ping -c $send_count -q -i $send_rate -t $timeout $host";
	} elsif($platform eq LINUX) {
	    $cmd = "/bin/ping -c $send_count -q -i $send_rate -w $timeout $host";
585
586
587
	}
    }

588
589
590
591
592
593
594
595
596
597
    # note backticks passes SIGINT to child procs
    my @args = split(/\s+/,$cmd);
    my @results = &my_tick(@args);

    my $reslt_cnt = @results;
    my $result = $results[$reslt_cnt-2];
    if($platform eq BSD && $result =~ /(\d+) packets received/) {
	$count = $1;
    } elsif($platform eq LINUX && $result =~ /(\d+) received/) {
	$count = $1;
598
599
    }

600
601
602
603
604
    if($count) {
	$result = $results[$reslt_cnt-1];
	if($result=~ /\d+\.\d+\/(\d+\.\d+)\/\d+\.\d+\/(\d+\.\d+)/) {
	    $avg_latency = $1;
	    $stddev = $2;
605
606
	}
    }
607
    return ($count, $avg_latency, $stddev);
608
609
}

610
# For directly connected hosts, checks latency using Ping.
611
sub latency_test {
Timothy Stack's avatar
   
Timothy Stack committed
612
    my %waitlist;
613
    my @edge_copy = @links;
614

615
    while(&has_elems(\@edge_copy)) {
616
617
618
	my ($edge,$other_edge) = &get_twoway_assign(\@edge_copy);
	if(defined($edge) && defined($other_edge)) {
	    if($hostname eq $edge->src ) {
619
620
621
622
623
624
625
		# todo: consider ignoring latency if no delay node.
		if(&valid_latency($edge) && &valid_latency($other_edge)
		   ) {
		    my $pid = fork();
		    if(!$pid) {

			# call ping_node with ttl=1
626
627
			my ($result_cnt, $sample_avg, $sample_dev) =
			    &ping_node($edge->dst,1);
628
629
630
631
632
633

			my $n = PING_SEND_COUNT;

			if($result_cnt == 0) {
			    my $errmsg = "No packets were received (n=$n)\n";
			    &error(NAME_LATENCY, $edge, $errmsg);
Timothy Stack's avatar
   
Timothy Stack committed
634
			    exit(EXIT_NOT_OK);
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
			} else {

# facts from analysis in /users/davidand/public/calibrate.
# came from 40 independent swapins (enough for normality assumption)
# (note that data actually is normal at any particular latency point, 
# according to described.lst)

# best fit regression for the error as a function of total latency, according to sas.
# see regression1.lst and regression1.sas
#-0.00523(actual)     0.00003096 fbsd
#-0.00530(actual)     0.00003478 linux
# roughly identical, so use:
#-0.005(actual)

# inherent delay in the system (with a delay node) is
# see described.lst and described.sas
# 0.337737  fbsd
# 0.362282  linux (median was 0.328000)
653
# round to:
654
# 0.333 ms
655
656

# note, this has been measured and is in one of the emulab papers (Shashi)
657
658
659
660
661
662
663
664
665
666
667
668
669
670

# Also, described.lst provides good support for the notion that
# the distribution of latencies is normal. For Fbsd all of the 
# distributions were normal, and most were for Linux. So, use this
# assumption in order to send fewer test packets.
			   

			    # the null hypothesis value, u.
			    my $u = $edge->delay + $other_edge->delay;
			    
			    # the calibration as a function of $u
			    $u += 0.333 - 0.005 * $u / 2;


671
# factor in transport delay at slow network speeds.
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687

			    # transport delay: 64 bytes + 18 bytes eth/crc
			    # to units of ms.
			    if($edge->bw < LAT_LOW_BW) {
				$u += 1000 * (82 * 8) / $edge->bw;
			    }
			    if($other_edge->bw < LAT_LOW_BW) {
				$u += 1000 * (82 * 8) / $other_edge->bw;
			    }
			    
			    my $x_bar = $sample_avg;
			    my $numerator = $x_bar - $u;

			    my $S = $sample_dev;
			    

688
			    my $denominator = $S / sqrt( abs( $n ) );
689
690
691
692

			    if($denominator == 0) {
				my $errmsg = "Invalid sample standard deviation (possible parse problem, please report). (n=$n, u=$u, x_bar=$x_bar, S=$S)";
				&error(NAME_LATENCY, $edge, $errmsg);
Timothy Stack's avatar
   
Timothy Stack committed
693
				exit(EXIT_NOT_OK);
694
695
696
697
			    } else {
				my $z = $numerator / $denominator;
				my $t_reject = 3.250; # alpha = 0.01, df=9

698
699
700
701
702
703
704
705
706
707
708
				if ((abs($z) > $t_reject) &&
				    (($x_bar < $u &&
				      (($u - $x_bar) >=
				       INSIGNIFICANT_LAT_ERROR_LO)) ||
				     ($x_bar > $u &&
				      (($x_bar - $u) >=
				       INSIGNIFICANT_LAT_ERROR_HI)))) {
				    &error(NAME_LATENCY, $edge,
					   "Probable latency ".
					   "misconfiguration (expected=$u, ".
					   "measured mean=$x_bar).");
Timothy Stack's avatar
   
Timothy Stack committed
709
				    exit(EXIT_NOT_OK);
710
711
712
				}
			    }
			}
713
			exit(EXIT_OK);
714
		    } else {
Timothy Stack's avatar
   
Timothy Stack committed
715
			$waitlist{$pid} = 1;
716
		    }
717
		} else {
718
		    &debug("Skipping latency test for " . &print_link($edge) . " to " . &print_link($other_edge) . "\n");
719
720
		}
	    }
721
#
722
723
	}
    }
724

Timothy Stack's avatar
   
Timothy Stack committed
725
    &wait_all(%waitlist);
726
727
    # wait for completion before next test.
    &barrier();
728
729
}

730

731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
##############################################################################
# Bandwidth Test Functions
##############################################################################

# Returns whether the link bandwidth is in a valid test range.
sub valid_bw {
    my $edge = shift @_;
    if($edge->bw >= LIMIT_BW_LO
       && $edge->bw <= LIMIT_BW_HI
       && $edge->loss <= LIMIT_BW_LOSS
       ) {
	return TRUE;
    } else {
	return FALSE;
    }
}


# Checks bandwidth for directly connected links.
750
751
sub bw_test {
    my @analyze_list;
752
    my @edge_copy;
753
    @edge_copy = @links;
754
755


756
757
758

    # all nodes will execute the same reductions on the edge list
    # on their own so that the number of barriers is the same.
759
    while(&has_elems(\@edge_copy)) {
760
	my $edge = &get_assign(\@edge_copy);
761

762
	if(defined($edge) ) {
763
764
765
766
	    if($hostname eq $edge->dst) {
		if (&valid_bw($edge)) {
		    push @analyze_list, $edge;
		    
767
768
769
		    &info("    Starting bandwidth test for " .
			  &print_link($edge) . "\n");
		    
770
		    &my_system(PATH_PATHRATE_RCV, "-Q","-s",$edge->src,"-q","-N", PATHRATE_DAT);
771
772
		} else {
		    &debug("Skipping bandwidth test for " . &print_link($edge) . "\n");
773
774
775
776
777
		    &info("*** Skipping bandwidth test for " .
			  &print_link($edge) . "\n");
		    &info("*** Bandwidth is out of range ".
			  "(" . LIMIT_BW_LO . " <= BW <= " . LIMIT_BW_HI .") ".
			  "or loss is too high (> " . LIMIT_BW_LOSS . ").\n");
778
		}
779
	    } 
780
781
782
783
	}
	&barrier();
    }

784

785
786
    # read the log file.
    if(@analyze_list) {
787
788
	my @results = &read_file(PATHRATE_DAT);
	foreach (@results) {
789
790
791
792
	    my $edge = shift(@analyze_list);
	    my $sender = $edge->src;
	    if(/SNDR=$sender.*CAPL=(\d+\.\d+)Mbps.*CAPH=(\d+\.\d+)Mbps/) {
		my $expected = $edge->bw / 1000000;
793
794
		my $low = $1;
		my $hi = $2;
795
796


797
798
799
800
801
802
803
804
		my $diff;
		if($expected > $2) {
		    $diff = $expected - $2;
		} elsif ($expected < $1) {
		    $diff = $1 - $expected;
		} else {
		    $diff = 0;
		}
805

806
		my $output = sprintf "Receive from " . $edge->src
807
		    . ": $1/$2/%.1f/%.1f\n", $expected, $diff;
808
809
		&debug($output);

810
		if($diff > INSIGNIFICANT_BW_ERROR) {
811
		    &error (NAME_BW, $edge, "Bandwidth estimate $low to $hi Mbps does not include expected bandwidth $expected Mbps");
812
		}
813

814
	    } else {
815
		 die ("Error while parsing " . PATHRATE_DAT . "\n");
816
817
818
	    }
	}
    }
819
820
    # wait for completion before termination so that all errors reported in.
    &barrier();
821
822
}

823
824
825
826
827

##############################################################################
# Static Routing Connectivity Test Functions
##############################################################################

Timothy Stack's avatar
   
Timothy Stack committed
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
# Traverse the links between nodes to figure out which nodes are actually
# reachable.  First parameter is a reference to an array that should be filled
# out with node names.  The second parameter contains the name of the node to
# visit.
sub reachable_nodes {
    my ($nodes_ref, $currnode) = @_;

    $hostmap{$currnode}->visited(1);
    foreach my $edge (@{ $hostmap{$currnode}->links }) {
	my $nextnode;

	if ($edge->src eq $currnode) {
	    $nextnode = $edge->dst;
	} 
	else {
	    $nextnode = $edge->src;
844
	}
Timothy Stack's avatar
   
Timothy Stack committed
845
846
847
	if ($hostmap{$nextnode}->visited == 0) {
	    if (!($currnode eq $hostname)) { # Don't add 1st hop nodes.
		push @{$nodes_ref}, $nextnode;
848
	    }
Timothy Stack's avatar
   
Timothy Stack committed
849
	    &reachable_nodes($nodes_ref, $nextnode);
850
	}
851
    }
Timothy Stack's avatar
   
Timothy Stack committed
852
853
    $hostmap{$currnode}->visited(2);
}
854

Timothy Stack's avatar
   
Timothy Stack committed
855
856
857
858
859
860
861
862
863
# Attempts to reach nodes that are not on a direct link
# with this host. IE, use TTL > 1. Pings are in parallel.
sub static_rt_test {
    my @nodes = ();

    &reachable_nodes(\@nodes, $hostname);
    &debug("Static route test nodes: @nodes\n");

    my %waitlist;
864
    # fork processes to run the pings in parallel.
Timothy Stack's avatar
   
Timothy Stack committed
865
866
867
868
869
870
871
872
873
874
    foreach my $dst (@nodes) {
	my $pid = fork();
	if(!$pid) {
	    my ($recv_cnt,$ignored1, $ignored2) = &ping_node($dst,0);
	    if(!$recv_cnt) {
		my $newEdge = new edge;
		$newEdge->src($hostname);
		$newEdge->dst($dst);
		&error(NAME_RT_STATIC,$newEdge , "$hostname could not ping $dst");
		exit(EXIT_NOT_OK);
875
	    } else {
Timothy Stack's avatar
   
Timothy Stack committed
876
		&debug("Attempting to reach $dst... OK\n");
877
	    }
Timothy Stack's avatar
   
Timothy Stack committed
878
879
880
	    exit(EXIT_OK);
	} else {
	    $waitlist{$pid} = 1;
881
882
	}
    }
Timothy Stack's avatar
   
Timothy Stack committed
883
    &wait_all(%waitlist);
884
885
    # wait for completion before next test.
    &barrier();
886
887
}

888

889
890
891
892
893
894
895
896

##############################################################################
# Utility Functions
##############################################################################

# Convenience to print information about a link.
sub print_link {
    my $edge = shift @_;
897
898
899
900
901
902
    my $str = $edge->src . " to " . $edge->dst;
    if ($edge->bw && $edge->delay && $edge->loss) {
	$str .= " (" . ($edge->bw / 1000000)  . " Mbps, " .
	    $edge->delay . "ms, " . (100 * $edge->loss) . "% loss)";
    }
    return $str;
903
}
904

905
# Handles reading NS output.
906
907
908
sub get_topo {
    my $ns_outfile = shift(@_);

909
910
    my @results = &read_file($ns_outfile);
    foreach (@results) {
911
912
913
914
915

	# load the output from ns.
	# the file format is simple:
	# expr := h <node name>
	#      || l <src node> <dst node> <bw (Mb/s)> <latency (s)> <loss (%)>
916
	if( /^h (\S+)/ ) {
Timothy Stack's avatar
   
Timothy Stack committed
917
918
919
920
921
922
	    push @hosts, $1;
	    my $newHost = new host;
	    $newHost->name($1);
	    $newHost->visited(0);
	    $hostmap{$1} = $newHost;
	} elsif ( /^l (\S+)\s+(\S+)\s+(\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)/) {
923
924
925
926
	    my $newEdge = new edge;
	    $newEdge->src($1);
	    $newEdge->dst($2);
	    $newEdge->bw($3);
927
	    $newEdge->delay($4 * 1000); # units of ms
928
	    $newEdge->loss($5);
929
	    push @links, $newEdge;
Timothy Stack's avatar
   
Timothy Stack committed
930
931
	    push @{ $hostmap{$newEdge->src}->links }, $newEdge;
	    push @{ $hostmap{$newEdge->dst}->links }, $newEdge;
932
933

	# currently recognize only Static routing
Timothy Stack's avatar
   
Timothy Stack committed
934
	} elsif (/^r [Ss]tatic/i) {
935
	    $rtproto = RTPROTO_STATIC;
Timothy Stack's avatar
   
Timothy Stack committed
936
937
938
939
	} elsif (/^s ([-\w\(\)]+)/i) {
	    $simname = $1;
	} else {
	    print "Bad line in map: $_\n";
940
941
942
	}
    }

943
944
945
    # sorted order.
    @hosts = sort { $a cmp $b } @hosts;
    @links = sort { $a->src . $a->dst cmp $b->src . $b->dst } @links;
946
947
948

}

949
950
951
952
953
954
955
# Send an info message.
sub info {
    my($msg) = @_;
    
    &post_event(EVENT_REPORT, "    " . $msg);
    &sim_event(EVENT_LOG, "    " . $msg);
}
956
957
958
959

# prints out the topology read in from the NS file
sub debug_top {
    &debug("nodes:\n");
960
    foreach my $vert (@hosts) {
961
962
963
	&debug( " " . $vert . "\n");
    }
    &debug("links:\n");
964
    foreach my $edge (@links) {
965
966
967
968
969
970
971
	&debug( " " . $edge->src . " " . $edge->dst . " " . $edge->bw
		. " " . $edge->delay . " " . $edge->loss . "\n"
		);
    }
    &debug("routing protocol: $rtproto\n") if defined($rtproto);
}

972
# log to expt problem directory.
973
sub error {
974
975
    my($test,$edge,$msg) = @_;

Timothy Stack's avatar
   
Timothy Stack committed
976
977
    $error_count += 1;

978
    my $output = "$test\n";
979
    $output .= "  Link:  " . &print_link($edge) . "\n";
980
981
    $output .=     "  Error: $msg\n\n";

982
983
984
    &debug($output);    
    &append_file($linktest_path . "/" . $hostname . SUFFIX_ERROR,
		 $output);
985
986
}

Timothy Stack's avatar
   
Timothy Stack committed
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
sub report_status {
    if ($hostname eq $synserv) {
	if ($stage_error_count) {
	    my $msg = "  Some tests had errors!";
	    &post_event(EVENT_REPORT,$msg);
	    &sim_event(EVENT_LOG,$msg);
	    &debug("\n$msg\n");
	}
	else {
	    my $msg = "  All tests were successful!";
	    &post_event(EVENT_REPORT,$msg);
	    &sim_event(EVENT_LOG,$msg);
	    &debug("\n$msg\n");
	}