linktest.pl 75.5 KB
Newer Older
1
#!/usr/bin/perl -w -T
2
#
3
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
5
# All rights reserved.
6
#
7

8 9
use strict;
use Class::Struct;
10
use POSIX qw(uname);
11
use POSIX qw(strftime);
12
use IO::Handle;
13
use English;
14
use Socket;
15

16
my $LINKTEST_VERSION = "1.2";
17

18 19 20
#
# Linktest test script. This script is set up to run as root on
# experiment nodes. It is invoked by the Linktest daemon after the
21 22 23 24 25 26
# daemon receives a Linktest "START" event. The script runs tests
# using ping, rude/crude (a real-time packet emitter/collector) and a
# locally hacked iperf to test all links in the experiment. If the
# results show a 99% chance that the experiment is configured
# incorrectly, an error is logged to the experiment directory in
# tbdata/linktest.  Valid ranges have been calibrated ahead of time.
27 28 29
#
sub usage() 
{
Timothy Stack's avatar
 
Timothy Stack committed
30 31 32
    print("Usage: linktest.pl\n".
	  " [STARTAT=<test step, 1-4>]\n".
	  " [STOPAT=<test step, 1-4>]\n".
Mike Hibler's avatar
Mike Hibler committed
33
	  " [COMPAT=<version or 0>]\n".
34 35 36
	  " [DOARP=<1=yes, 0=no>]\n".
	  " [REPORTONLY=<1=yes, 0=no>]\n".
	  " [NODES=<n1,n2...>\n".
37 38 39 40 41
	  " [DEBUG=<debugging level. 1=on, 0=off>]\n".
	  " [LOGRUN=<command/used/for/logging>]\n".
	  " [LOGDIR=<path/to/log/root>]\n".
	  " [BINDIR=<path/to/binary/files>]\n".
	  " [VARDIR=<path/to/config/files>]\n".
42
	  " [STANDALONE=<1|0>]\n".
43
	  " [EVENTSERVER=<eventserver hostname>]\n");
44
    print("    <test step>: 1=conn/latency, 2=routing, 3=loss, 4=BW\n".
Mike Hibler's avatar
Mike Hibler committed
45
	  "    COMPAT=<version>: remain compatible with version <version> or earlier\n".
46 47
	  "    DOARP=1: run a single-ping pass to create ARP entries\n".
	  "    REPORTONLY=1: report stats only, do not pass judgement\n".
48 49 50 51 52 53
	  "    NODES: comma-separated list of virtnames to run on\n".
	  "    LOGRUN: Command to run for high-level logging. The log message as its only argument.".
	  "    LOGDIR: Path used to store stderr logs of individual test utilities.\n".
	  "    BINDIR: Path must contain emulab-rude emulab-crude emulab-iperf\n".
	  "    VARDIR: Path must contain...\n".
	  "    EVENTSERVER: Hostname of event server\n");
54
    exit(0);
55 56
}

57

58 59 60
##############################################################################
# Constants
##############################################################################
61 62 63

# log files used by tests.
use constant CRUDE_DAT => "/tmp/crude.dat"; # binary data
64
use constant RUDE_CFG  => "/tmp/rude.cfg";
65
use constant IPERF_DAT => "/tmp/iperf.dat";
66

67 68 69
# Packet size for iperf (1470 default).  Reduce to avoid problems with veths
use constant IPERF_PKTSIZE => 1450;

70 71 72
# max time to run iperf
use constant BW_TEST_MAXDURATION => 10;

73
# iperf test limits.
74 75 76 77 78
use constant LIMIT_BW_HI  => 100000000;
use constant LIMIT_BW_MED =>  10000000;
use constant LIMIT_BW_LO  =>   1000000;
use constant LIMIT_BW_MIN =>     64000;
use constant LIMIT_BW_LOSS => 0.20;
79

80 81
# Make sure that we dont get bogged down in being too accurate! 
# Make sure the error is a certain significance before we start reporting it.
82
use constant INSIGNIFICANT_LAT_ERROR_LO => 0.50;  # ms
83
use constant INSIGNIFICANT_LAT_ERROR_HI => 3.50;  # ms
84 85
use constant INSIGNIFICANT_BW_ERROR_HI  => 0.015; # percent.
use constant INSIGNIFICANT_BW_ERROR_LO  => 0.03;  # percent.
Russ Fish's avatar
Russ Fish committed
86
use constant INSIGNIFICANT_BW_ERROR_LO_Windows  => 0.10;  # Lower expectations.
87

88
# slow send rate (for bw from LIMIT_BW_MIN to LIMIT_BW_LO)
89 90 91
use constant SLOW_SEND => 100;
use constant FAST_SEND => 250;
use constant LOSS_TEST_DURATION => 4;	# In seconds.
92

93
# misc contstants
94
use constant BSD => "FreeBSD";
95
use constant LINUX => "Linux";
96
use constant RTPROTO_STATIC => "Static";
97
use constant RTPROTO_SESSION => "Session";
Timothy Stack's avatar
 
Timothy Stack committed
98
use constant EVENT_COMPLETE => "COMPLETE";
99
use constant EVENT_REPORT => "REPORT";
Timothy Stack's avatar
 
Timothy Stack committed
100
use constant EVENT_LOG => "LOG";
101
use constant PING_SEND_COUNT => 10;
David Anderson's avatar
David Anderson committed
102
use constant SYNC_NAMESPACE => "linktest";
103 104 105 106

# crude should have higher priority than rude on the same machine
use constant RUDE_PRI => 3;
use constant CRUDE_PRI => 5;
107 108

# test levels
109 110 111 112
use constant TEST_LATENCY => 1; # direct link connectivity & latency
use constant TEST_RT_STATIC => 2;   # prior plus static routing
use constant TEST_LOSS => 3;   # prior plus loss
use constant TEST_BW => 4; # prior plus bandwidth
113
use constant TEST_UNLINK => 5; # prior plus unconnected interfaces
114

115
# test names
116
use constant NAME_RT_STATIC => "Routing";
117
use constant NAME_LATENCY => "Latency";
118 119
use constant NAME_LOSS => "Loss";
use constant NAME_BW => "Bandwidth";
120
use constant NAME_UNLINK => "Unlink";
121

122
# error suffix for logs
123
use constant SUFFIX_ERROR => ".error";
124
use constant DEBUG_ALL => 2; # debug level for all debug info, not just msgs.
125

126 127
# exit codes
use constant EXIT_ABORTED => -1;
Timothy Stack's avatar
 
Timothy Stack committed
128
use constant EXIT_NOT_OK => 1;
129 130
use constant EXIT_OK => 0;

131 132 133
# Protos
sub TimeStamp();
sub PATH_NICE();
134 135 136

# struct for representing a link.
struct ( edge => {
137
    name => '$',
138
    src => '$',
139
    srcip => '$',
140
    dst => '$',
141
    dstip => '$',
142
    bw  => '$',
143 144
    testbw  => '$',
    bwtime  => '$',
145
    delay => '$',
146
    loss => '$',
147 148 149 150 151
    queuetype => '$',
    mac => '$',
    mpxstyle => '$',
    dstyle => '$',
    symlanignore => '$'});
152

Timothy Stack's avatar
 
Timothy Stack committed
153 154 155
struct ( host => {
    name => '$',
    visited => '$',
156 157 158 159 160 161 162 163 164
    links => '@',
    isvnode => '$',
    pname => '$',
    phost => '$',
    ptype => '$',
    osid => '$',
    os => '$',
    osvers => '$',
    osfeatures => '$'});
Timothy Stack's avatar
 
Timothy Stack committed
165

166
# fixes emacs colorization woes introduced by above struct definition.
Timothy Stack's avatar
 
Timothy Stack committed
167
# struct ( unused => { foo => '$'});
168

169 170 171
use constant TRUE => 1;
use constant FALSE => 0;

172 173 174 175
##############################################################################
# Globals
##############################################################################

176
my $topology_file;    # location of the topology input file.
177
my $ptopology_file;   # location of the physical topology input file.
178
my $synserv;    # synch server node
179
my $rtproto;    # routing protocol
180
my $hostname;   # this hosts name
181 182 183
my $exp_id;     # experiment id
my $proj_id;    # project id
my $platform;   # name of platform
184 185
my $startat=1;  # which test to start at
my $stopat=99;  # which test to stop at
186
my %kill_list;  # PIDS that are running as background procs, needing cleanup.
187
my $debug_level = 0; # enable debug statements
188 189
                     # 1 = print debug statements.
                     # 2 = show STDOUT and STDERR
190 191 192
my $arpit = 1;	# do a single ping to force ARP exchange
my $reportonly = 0; # just report the values seen, no pass/fail judgement
my $printsched = 0; # just print the test schedule
Mike Hibler's avatar
Mike Hibler committed
193
my $compat = 1.1;   # be compatible (wrt. synch) with old versions
194
my $barriers_hit = 1;
195
my $barr_count;   # used by synserv host, nubmer of hosts -1
196
my $log_file;    # common logfile for information saved over time.
197 198

my @hosts; # hosts: list of text strings containing host names.
199
           # sorted alphabetically
Timothy Stack's avatar
 
Timothy Stack committed
200
my %hostmap;
201 202 203
my $numvnodes = 0;
my %vhostmap;
my %linkmembers;
204
my @links; # links: list of edge structs.
205
           # sorted alphabetically by src . dst
206

207
my $linktest_path;   # log path (ie tbdata/linktest) set by init.
Timothy Stack's avatar
 
Timothy Stack committed
208 209 210 211 212 213 214 215
my $simname = "ns";
my $swapper = "";
my $swapperid = 0;
my $swappergid = 0;
my $token = -1;
my $error_count = 0;
my $stage_error_count = 0;
my $total_error_count = 0;
216

217 218 219
my $warn_partial_test = 0;
my $warn_unshaped_links = 0;

220 221 222
my $listener_iperf;
my $listener_crude;

223 224 225
##############################################################################
# Main control
##############################################################################
226

227 228 229 230 231 232
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

$| = 1; #Turn off line buffering on output

233 234 235
# Make sure that files written into the experiment subdir are group writable.
umask(0002);

236 237 238 239 240
# Traditional Emulab defaults that can be overridden but must be set
our $VARDIR = "/var/emulab";
our $BINDIR = "/usr/local/etc/emulab";
our $PROJDIR = "/proj";

241
our $LOGRUN = "";
242
our $LOGDIR = "";
243
our $SHAREDDIR = "";
244
our $EVENTSERVER = "";
245
our $EVENTID = "";
246
our $STANDALONE = 0;
247

248 249 250 251 252 253 254 255 256 257 258
#
# Parse command arguments. Since Linktest is run via the event system,
# parse out pairs of <symbol>=<value>.
#
foreach my $arg (@ARGV) {
    if($arg =~ /STOPAT=(\d)/) {
	$stopat=$1;
    }
    if($arg =~ /STARTAT=(\d)/) {
	$startat=$1;
    }
Mike Hibler's avatar
Mike Hibler committed
259 260 261
    if($arg =~ /COMPAT=(\d(?:\.\d+))/) {
	$compat=$1/1.0;
    }
262 263 264 265 266 267
    if($arg =~ /DOARP=(\d)/) {
	$arpit=$1;
    }
    if($arg =~ /REPORTONLY=(\d)/) {
	$reportonly=$1;
    }
268 269 270
    if($arg =~ /DEBUG=(\d)/) {
	$debug_level=$1;
    }
271 272 273
    if($arg =~ /PRINTSCHED=(\d)/) {
	$printsched=$1;
    }
Timothy Stack's avatar
 
Timothy Stack committed
274 275 276 277 278 279 280
    if($arg =~ /TOKEN=(\d+)/) {
	$token=$1;
    }
    if($arg =~ /SWAPPER=(\w+)/) {
	$swapper=$1;
	(undef,undef,$swapperid,$swappergid) = getpwnam($swapper);
    }
281 282 283
    if($arg =~ /LOGRUN=(.+)/) {
	$LOGRUN = $1;
    }
284 285 286
    if($arg =~ /SHAREDDIR=(.+)/) {
	$SHAREDDIR = $1;
    }
287
    if($arg =~ /LOGDIR=(.+)/) {
288
	$LOGDIR = $1;
289 290 291 292
    }
    if($arg =~ /VARDIR=(.+)/) {
	$VARDIR = $1;
    }
293 294 295
    if($arg =~ /PROJDIR=(.+)/) {
	$PROJDIR = $1;
    }
296 297 298 299 300 301
    if($arg =~ /BINDIR=(.+)/) {
	$BINDIR = $1;
    }
    if($arg =~ /EVENTSERVER=(.+)/) {
	$EVENTSERVER = $1;
    }
302 303 304 305 306
    if ($arg =~ /STANDALONE=(.+)/) {
	if ($1 eq "1") {
	    $STANDALONE = 1;
	}
    }
307 308
}

Mike Hibler's avatar
Mike Hibler committed
309 310 311 312 313 314 315 316
$compat = 99
    if ($compat == 0);

# XXX no arp test in 1.1 and before
if ($compat < 1.2) {
    $arpit = 0;
}

317 318
# path to applications and files
our $PATH_NICKNAME = "$VARDIR/boot/nickname";
319
our $PATH_CONTROL_IF = "$VARDIR/boot/controlif";
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
our $PATH_KEYFILE = "$VARDIR/boot/eventkey";
our $PATH_RUDE = "$BINDIR/emulab-rude";
our $PATH_CRUDE = "$BINDIR/emulab-crude";
our $PATH_IPERF = "$BINDIR/emulab-iperf";
our $PATH_RCTOPO = "$BINDIR/rc/rc.topomap";
our $PATH_EMULAB_SYNC = "$BINDIR/emulab-sync";
our $PATH_LTEVENT = "$BINDIR/ltevent";
our $PATH_TEVC = "$BINDIR/tevc";
our $RUN_PATH = "$BINDIR"; # where the linktest-ns runs.
our $PATH_SCHEDFILE = "$VARDIR/logs/linktest.sched";
our $PATH_SYNCSERVER = "$VARDIR/boot/syncserver";
our $PATH_TOPOFILE = "$VARDIR/boot/ltmap";
our $PATH_PTOPOFILE = "$VARDIR/boot/ltpmap";

my $schedfile = $PATH_SCHEDFILE;
if ($printsched) {
    open(SCHED, ">$schedfile") or
    die("Could not open schedule log file $schedfile");
} else {
    unlink($schedfile);
}

342 343 344 345
#
# Parse the nickname file to obtain the host name, 
# experiment ID and the project ID.
#
346
my $fname = $PATH_NICKNAME;
347 348 349 350 351 352
if (-r $fname) {
    my $name = `cat $fname`;
    if ($name =~ /([-\@\w]*)\.([-\@\w]*)\.([-\@\w]*)/) {
	$hostname = $1;
	$exp_id = $2;
	$proj_id = $3;
353 354
    } elsif ($name =~ /^([-\@\w]*)$/) {
	$hostname = $1;
355 356 357 358 359
    } else {
	die("Could not parse $fname info\n");
    }
} else {
    die("Could not locate $fname\n");
Mike Hibler's avatar
Mike Hibler committed
360
}
361 362 363 364 365 366 367 368

#
# Now that we know the pid/eid, defaults some values (unless otherwise set).
#
$LOGDIR = "$PROJDIR/$proj_id/exp/$exp_id/tbdata"
    if ($LOGDIR eq "");
$EVENTID = "$proj_id/$exp_id"
    if ($EVENTID eq "");
369 370 371 372 373

#
# Set path variables storing the experiment logging path,
# the current ns file and the output file for topology info.
#
374
$linktest_path = "$LOGDIR/linktest";
375 376
$topology_file = $PATH_TOPOFILE;
$ptopology_file = $PATH_PTOPOFILE;
377 378

#
379 380
# Determine what OS we are.  Used for handling the occasional difference
# in the location of or options to system utilities (e.g., ping).
381 382 383 384
#
($platform) = POSIX::uname();

#
385 386 387 388 389 390
# First, everyone parses the topo files to see who is participating in
# this run.  All instances will be started simultaneuously from the event
# system, so we throw a random sleep in here to keep nodes from pounding
# the NFS server.
#
sleep(int(rand(5)));
391 392 393
if (! $STANDALONE) {
    &my_system($PATH_RCTOPO, "reconfig");
}
394
&get_topo($topology_file, $ptopology_file);
395 396 397 398 399 400
&debug_top();

#
# Finally, see who the master is for synchronization.
# We prefer the synch server node for the experiment, but if that node
# is not participating, we choose the first node on the host list.
401
#
402
$synserv = "";
403
my $ssname = $PATH_SYNCSERVER;
404
if ($ssname) {
405
    my @results = &read_file($ssname);
406 407 408
    ($synserv) = split/\./, $results[0];
    chomp $synserv;
}
409
if (@hosts > 0 && (!$synserv || !exists($hostmap{$synserv}))) {
410 411 412
    $synserv = $hosts[0];
}
&debug("Synch master is $synserv\n");
413

414
#
415 416
# If the current node is the special node, do some housekeeping
# and initialize the barrier count.
417
#
418
if(&is_special_node() || !$SHAREDDIR) {
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
    #
    # If the shared path used by Linktest for logging and temporary
    # files already exists, clear its contents for this run.
    #
    if( -e $linktest_path ) {
	die("Path $linktest_path is not a directory\n") 
	    unless -d $linktest_path;

	opendir (DIR,$linktest_path)
	    ||  die("Could not open $linktest_path: $!");
	my @dirfiles = grep (/error$/,readdir(DIR));
	foreach (@dirfiles) {
	    &do_unlink("$linktest_path/$_");
	}
	closedir(DIR);

    } else {
	# 
	# The shared path does not exist, create it.
	#
	mkdir (&check_filename($linktest_path),0777) 
	    || die("Could not create directory $linktest_path: $!");
Timothy Stack's avatar
 
Timothy Stack committed
441
	chown($swapperid, $swappergid, $linktest_path);
442 443
    }

444 445 446 447
    if (&is_special_node()) {
	$barr_count = @hosts;
	$barr_count--;
    }
448
}
449

450 451 452 453 454 455 456 457 458 459 460 461 462 463
#
# If there are no links to test, there is nothing to do!
# Do this after we have identified the synch server.
#
if (@links == 0) {
    &debug("No links to test!\n");

    my $msg = "Linktest skipped, no links";
    &sim_event(EVENT_LOG,$msg);
    &debug("\n$msg\n\n");
    &post_event(EVENT_COMPLETE,"ERROR=0 CTOKEN=$token");
    exit(EXIT_OK);
}

464 465 466 467 468 469 470 471 472 473 474 475 476
#
# If it has been determined that we are not a part of the run,
# exit now so we don't screw up the barrier synchs.  Note that post_event
# will only happen if we are the synch server and we can only be the
# synch server if no other node is participating in the run either.
#
if (!exists($hostmap{$hostname})) {
    &debug("$hostname not participating in this linktest run.\n");
    &sim_event(EVENT_LOG,"Linktest skipped, no nodes participating");
    &post_event(EVENT_COMPLETE,"ERROR=0 CTOKEN=$token");
    exit(EXIT_OK);
}

477 478 479 480 481
#
# All nodes remove local temporary files remaining from the last
# run, if any.
# 
&do_unlink(CRUDE_DAT);
482
&do_unlink(IPERF_DAT);
483 484 485
&do_unlink(RUDE_CFG);

#
486
# Start up listeners; they run over the lifetime of Linktest to
487
# reduce the number of barrier synchronizations and startup
488
# delays. Always give the collectors a second to start up.
489
#
490 491 492 493 494 495
my $listeners = 0;
if (&dotest(TEST_BW)) {
    if ($printsched) {
	&schedlog("start iperf listener");
    } else {
	$listener_iperf = &start_listener(PATH_NICE, "-n", "-10",
496
					  $PATH_IPERF,"-s","-f","b","-u",
497 498 499 500 501 502 503 504
					  "-w","200000","-l",IPERF_PKTSIZE);
	$listeners++;
    }
}
if (&dotest(TEST_LOSS)) {
    if ($printsched) {
	&schedlog("start crude listener");
    } else {
505
	$listener_crude = &start_listener($PATH_CRUDE,"-l",CRUDE_DAT,
506 507 508 509 510
					  "-P",CRUDE_PRI);
	$listeners++;
    }
}
if ($listeners) {
511 512 513 514 515 516 517 518 519
    sleep(1);
}

#
# Finally, synchronize so that all nodes have completed startup
# tasks. When all nodes reach this barrier, the topology input file
# has been written, local variables are initialized, background
# listeners have been started, and temporary files have been cleared.
#
520 521 522
if ($printsched) {
    &schedlog("barrier $barriers_hit: post-startup");
}
523 524 525
&barrier;

#
526
# Beginning of the tests.
527
#
528 529 530
my $msg = "Linktest Version $LINKTEST_VERSION";
&sim_event(EVENT_LOG, $msg);
&post_event(EVENT_REPORT, $msg);
Timothy Stack's avatar
 
Timothy Stack committed
531 532
&debug("\n$msg\n\n");

533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
#
# Print out any warnings that alter the behavior of the run
#
if ($warn_partial_test) {
    my $msg = "*** WARNING: some hosts do not support linktest,".
	      " skipping links between those hosts";
    &sim_event(EVENT_LOG, $msg);
    &post_event(EVENT_REPORT, $msg);
    &debug("\n$msg\n\n");
}
if ($warn_unshaped_links && &dotest(TEST_BW)) {
    my $msg = "*** WARNING: tb-set-noshaping used on one or more links,".
	      " skipping BW tests for those links.";
    &sim_event(EVENT_LOG, $msg);
    &post_event(EVENT_REPORT, $msg);
    &debug("\n$msg\n\n");
}

551 552 553 554 555 556 557
if (defined($rtproto) && $rtproto eq RTPROTO_SESSION) {
    my $msg = "Session routing active; waiting a bit to let routes stabilize";
    &sim_event(EVENT_LOG,$msg);
    &debug("\n$msg\n\n");
    sleep(30);
}

558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
if ($arpit) {
    my $stamp = TimeStamp();
    my $msg   = "Pre-test ARP on all nodes ... $stamp";
    &post_event(EVENT_REPORT,$msg);
    &sim_event(EVENT_LOG,$msg);
    # Ick, this barrier makes sure the above message gets into the log
    # first, so as not to confuse Mike
    if ($printsched) {
	&schedlog("barrier $barriers_hit: pre-arp test");
    }
    &barrier();
    &debug("\n$msg\n\n");
    &force_arp;
}

573
if(&dotest(TEST_LATENCY)) {
574 575
    my $stamp = TimeStamp();
    my $msg   = "Testing Single Hop Connectivity and Latency ... $stamp";
576
    &post_event(EVENT_REPORT,$msg);
Timothy Stack's avatar
 
Timothy Stack committed
577
    &sim_event(EVENT_LOG,$msg);
578 579
    # Ick, this barrier makes sure the above message gets into the log
    # first, so as not to confuse Mike
580 581 582
    if ($printsched) {
	&schedlog("barrier $barriers_hit: pre-latency test");
    }
583
    &barrier();
584
    &debug("\n$msg\n\n");
585
    &latency_test;
586
    &report_status(NAME_LATENCY);
587 588
}

589
if(&dotest(TEST_RT_STATIC)
590
    && defined($rtproto)
591
    && ($rtproto eq RTPROTO_STATIC || $rtproto eq RTPROTO_SESSION)) {
592
    my $msg;
593
    my $stamp = TimeStamp();
594 595 596 597 598

    if ($total_error_count) {
	$msg = "Skipping Routing tests because of previous errors!";
    }
    else {
599
	$msg = "Testing Routing ... $stamp";
600
    }
601
    &post_event(EVENT_REPORT,$msg);
Timothy Stack's avatar
 
Timothy Stack committed
602
    &sim_event(EVENT_LOG,$msg);
603 604
    # Ick, this barrier makes sure the above message gets into the log
    # first, so as not to confuse Mike
605 606 607
    if ($printsched) {
	&schedlog("barrier $barriers_hit: pre-routing test");
    }
608
    &barrier();
609
    &debug("\n$msg\n\n");
610 611
    if (! $total_error_count) {
	&static_rt_test; # nodes not covered by 1hop test
612
	&report_status(NAME_RT_STATIC);
613 614
    }
    else {
615 616 617
	if ($printsched) {
	    &schedlog("barrier $barriers_hit: post-routing test");
	}
618 619
	&barrier();
    }
620
}
621

622
if(&dotest(TEST_LOSS)) {
623 624
    my $stamp = TimeStamp();
    my $msg = "Testing Loss ... $stamp";
625
    &post_event(EVENT_REPORT,$msg);
Timothy Stack's avatar
 
Timothy Stack committed
626
    &sim_event(EVENT_LOG,$msg);
627 628
    # Ick, this barrier makes sure the above message gets into the log
    # first, so as not to confuse Mike
629 630 631
    if ($printsched) {
	&schedlog("barrier $barriers_hit: pre-loss test");
    }
632
    &barrier();
633
    &debug("\n$msg\n\n");
634
    &loss_test; 
635
    &report_status(NAME_LOSS);
636
}
637

638
if(&dotest(TEST_BW)){
639 640
    my $stamp = TimeStamp();
    my $msg = "Testing Bandwidth ... $stamp";
641
    &post_event(EVENT_REPORT,$msg);
Timothy Stack's avatar
 
Timothy Stack committed
642
    &sim_event(EVENT_LOG,$msg);
643
    # Ick, this barrier makes sure the above message gets into the log
644
    # first, so as not to confuse Mike
645 646 647
    if ($printsched) {
	&schedlog("barrier $barriers_hit: pre-bandwidth test");
    }
648
    &barrier();
649
    &debug("\n$msg\n\n");
650
    &bw_test;
651
    &report_status(NAME_BW);
652
}
653

654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
if(&dotest(TEST_UNLINK)) {
    my $stamp = TimeStamp();
    my $msg   = "Testing Unconnected Interfaces ... $stamp";
    &post_event(EVENT_REPORT,$msg);
    &sim_event(EVENT_LOG,$msg);
    # Ick, this barrier makes sure the above message gets into the log
    # first, so as not to confuse Mike
    if ($printsched) {
	&schedlog("barrier $barriers_hit: pre-unlink test");
    }
    &barrier();
    &debug("\n$msg\n\n");
    &unlink_test;
    &report_status(NAME_UNLINK);
}

670 671
&cleanup;

672 673 674
if ($printsched) {
    &schedlog("barrier $barriers_hit: post-test");
}
675 676
&barrier();

Timothy Stack's avatar
 
Timothy Stack committed
677 678 679 680
$msg = "Linktest Done";
&sim_event(EVENT_LOG,$msg);
&debug("\n$msg\n\n");

681 682 683
#
# Send an event indicating that Linktest has completed normally.
#
Timothy Stack's avatar
 
Timothy Stack committed
684
&post_event(EVENT_COMPLETE,"ERROR=$total_error_count CTOKEN=$token");
685

686
exit(EXIT_OK);
687

688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
##############################################################################
# Unlink Test Functions
##############################################################################

our $control_if = "";
our %interfaces = ();

sub setup_interfaces {
    $control_if = `cat $PATH_CONTROL_IF`;
    chomp($control_if);
    foreach my $link (@{ $hostmap{$hostname}->links }) {
	$interfaces{lc($link->mac)} = 1;
	my $foo = $link->mac;
    }
}

sub get_iflist {
    my $raw = `/sbin/ifconfig -a`;
    my @result = split("\n\n", $raw);
    return \@result;
}

sub gather_stats {
    my @result = ();
    my @iflist = @{ &get_iflist() };
    foreach my $ifline (@iflist) {
	if ($ifline =~ /^(\w+)\W.*RX packets:([0-9]+) /) {
	    if ($1 ne $control_if) {
		push(@result, $2);
	    }
	}
    }
    return \@result;
}

sub check_stats {
    my @first = @{ $_[0] };
    my @second = @{ $_[1] };
    my $result = 0;
    if (scalar(@first) == scalar(@second)) {
	$result = 1;
	for (my $i = 0; $i < scalar(@first); ++$i) {
	    if ($first[$i] != $second[$i]) {
		$result = 0;
		last;
	    }
	}
    }
    return $result;
}

sub arpping {
    my @iflist = @{ &get_iflist() };
    foreach my $ifline (@iflist) {
	if ($ifline =~ /^(\w+)\W.*HWaddr ([0-9a-fA-F:]+)/) {
	    my $ifname = $1;
	    my $mac = lc(join('', split(':', $2)));
	    if (! exists($interfaces{$mac}) && $ifname ne $control_if) {
		my $command
		    = "sudo /sbin/ifconfig $ifname up; ".
		      "sudo /sbin/arping -c 1 -w 1 -I $ifname 10.0.0.1; ".
		      "sudo /sbin/ifconfig $ifname down";
#		print($command."\n");
		system($command);
	    }
	}
    }
}

sub modify_interfaces {
    my ($speed, $duplex) = @_;
    my @iflist = @{ &get_iflist() };
    foreach my $ifline (@iflist) {
	if ($ifline =~ /^(\w+)\W.*HWaddr ([0-9a-fA-F:]+)/) {
	    my $ifname = $1;
	    my $mac = lc(join('', split(':', $2)));
	    if (! exists($interfaces{$mac}) && $ifname ne $control_if) {
		my $command = "sudo /sbin/ethtool -s $ifname speed $speed ".
		              "duplex $duplex autoneg off";
#		print($command."\n");
		system($command);
	    }
	}
    }
}

sub unlink_test {
    my @speeds = ('10', '100', '1000');
    my @duplexes = ('half', 'full');
    &setup_interfaces();
    my $start = &gather_stats();
    &barrier();
    &arpping();
    foreach my $speed (@speeds) {
	foreach my $duplex (@duplexes) {
	    &modify_interfaces($speed, $duplex);
	    &arpping();
	}
    }
    &barrier();
    my $end = &gather_stats();
    if (! &check_stats($start, $end)) {
	&error(NAME_UNLINK, undef,
	       "Some interfaces received packets");
    }
}

795

796
##############################################################################
797
# Loss Test Functions
798 799
##############################################################################

800

801
# Writes the configuration file used by RUDE.
802 803
sub write_rude_cfg {
    my ($stream_id, $edge) = @_;
804 805
    my $sample_size = &get_loss_sample_size($edge);
    my $millis      = LOSS_TEST_DURATION * 1000;
806
    my @contents;
807
    
808 809 810 811 812 813 814 815 816
    #
    # Run for the desired time and then tell rude to not transmit, but
    # wait, for the edge one-way latency time.  This way rude will not
    # exit before the last packet has a chance to arrive at the destination.
    # Delaying here simplifies the barrier synchronization (we don't have
    # to do a sub-second delay in perl).  Note that a final, zero rate
    # MODIFY like we use here only works in our modified version of rude!
    # Stock rude will just exit after the last actual transmission; i.e.,
    # prior to the final one-way latency wait.
817 818 819 820
    #
    # START <when>
    # <start-offset-ms> <flowID> ON <src-port> \
    #    <dst-addr>:<dst-port> CONSTANT <packets per second> <packet size>
821 822
    # <stop-offset-ms> <flowID> MODIFY CONSTANT 0 <XX>
    # <stop-offset-ms+one-way-link-latency> <flowID> OFF
823 824 825 826 827
    #
    # For our purposes, the variables are:
    #	$sample_size	packet rate
    #	$millis		time to run
    #
828
    push @contents, "START NOW\n";
829 830
    # Let bind() choose the src port; any constant port may be already in use.
    push @contents, "0000 $stream_id ON 0 " 
831
	. $edge->dst . "-" . $edge->name
832
	    . ":10001 CONSTANT $sample_size 20\n";
833 834 835 836
    if ($edge->delay) {
	push @contents, "$millis $stream_id MODIFY CONSTANT 0 20\n";
	$millis += int($edge->delay);
    }
837
    push @contents, "$millis $stream_id OFF\n";
838 839

    &write_file(RUDE_CFG, @contents);
840 841 842

}

843 844
# Returns the sample size used by the Loss test.
# TODO: why this number? (from my ProbStats book.)
845 846 847
sub get_loss_sample_size {
    my $edge = shift @_;
    if($edge->loss > 0) {
848
	return &round( 2.5 / $edge->loss);
849
    } else {
850 851
	# just in case a slow link with no loss.
	return SLOW_SEND; 
852 853 854
    }
}

855 856 857 858


# returns TRUE if the link loss is valid for the linktest loss test.
sub valid_loss {
859
    my $edge = shift @_;
860
    if($edge->bw >= LIMIT_BW_MIN && $edge->bw < LIMIT_BW_LO) {
861 862 863 864 865 866 867 868 869 870 871 872 873 874 875
	if(&get_loss_sample_size($edge) > SLOW_SEND) {
	    return FALSE;
	} else {
	    return TRUE;
	}
    } elsif( $edge->bw >= LIMIT_BW_LO) {
	# also want an upper limit.
	if(&get_loss_sample_size($edge) > FAST_SEND) {
	    return FALSE;
	} else {
	    return TRUE;
	}
    } else {
	return FALSE;
    }
876
}
877 878 879 880


# This test uses RUDE and CRUDE to send a stream of packets
# in both directions over a duplex link.
881 882 883 884 885
sub loss_test {
    my %analyze;
    my %recv_cnt;
    my $stream_id = 1;
    my @edge_copy = @links;
886 887 888
    my $trun = 1;
    my $rude_arg = "";

Mike Hibler's avatar
Mike Hibler committed
889 890
    # XXX version 1.1 compatibility; used to start crude here and wait
    if ($compat < 1.2) {
891
	&debug("performing barrier synch for backward compatibility\n");
Mike Hibler's avatar
Mike Hibler committed
892 893 894
	&barrier();
    }

895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
    #
    # XXX "concession" to vnodes: the stock rude is, well..."rude",
    # when it comes to CPU usage.  It spins between time intervals
    # which is rather painful for vnodes.  So we have a local version
    # which allows sleeping between intervals if the clock resolution
    # is sufficient.  We send 100 pps (10000us) and vnodes have a 1000HZ
    # (1000us) clock, which qualifies as sufficient.
    #
    # So, we add the extra rude option if conditions are met.
    #
    if ($numvnodes && $hostmap{$hostname}->isvnode) {
	my $hz = `/sbin/sysctl kern.clockrate 2>/dev/null`;
	if ($hz =~ /\shz = (\d+),/) {
	    $rude_arg = "-C $1";
	}
    }
911

912
    while(&has_elems(\@edge_copy)) {
913
	my ($edge,$other_edge) = &get_twoway_assign(\@edge_copy, 1);
914 915 916 917
	if(defined($edge) && defined($other_edge)) {
	    if($hostname eq $edge->src) {
		if(valid_loss($edge)) {
		    &write_rude_cfg($stream_id,$edge);
918 919 920 921 922 923 924
		    if ($printsched) {
			&schedlog("rude " . schedprint_link($edge) .
				  " (pps=" .
				  &get_loss_sample_size($edge) .
				  ", time=" .
				  LOSS_TEST_DURATION . "s, psize=20)");
		    } else {
925
			&my_system($PATH_RUDE,"-s", RUDE_CFG, "-P", RUDE_PRI,
926 927 928
				   $rude_arg);
			$analyze{$stream_id} = $other_edge;
		    }
929
		} else {
930 931 932 933
		    if ($printsched) {
			&schedlog("skipping loss test " .
				  schedprint_link($edge));
		    }
934 935 936 937
		    &debug("Skipping loss test for " .
			   &print_link($edge) . "\n");
		    &info("*** Skipping loss test on $hostname for " .
			  &print_link($edge) . "\n");
938 939 940 941
		}
	    } elsif ($hostname eq $other_edge->src) {
		if(valid_loss($other_edge)) {
		    &write_rude_cfg($stream_id,$other_edge);
942 943 944 945 946 947 948
		    if ($printsched) {
			&schedlog("rude " . schedprint_link($other_edge) .
				  " (pps=" .
				  &get_loss_sample_size($edge) .
				  ", time=" .
				  LOSS_TEST_DURATION . "s, psize=20)");
		    } else {
949
			&my_system($PATH_RUDE,"-s", RUDE_CFG, "-P", RUDE_PRI,
950 951 952
				   $rude_arg);
			$analyze{$stream_id} = $edge;
		    }
953
		} else {
954 955 956 957
		    if ($printsched) {
			&schedlog("skipping loss test " .
				  schedprint_link($other_edge));
		    }
958 959 960 961
		    &debug("Skipping loss test for " .
			   &print_link($other_edge) . "\n");
		    &info("*** Skipping loss test on $hostname for " .
			  &print_link($other_edge) . "\n");
962 963 964 965
		}
	    }
	}
	$stream_id++;
966 967 968 969
	if ($printsched) {
	    &schedlog("barrier $barriers_hit: loss: after run $trun");
	    $trun++;
	}
970 971 972 973 974 975 976 977
	&barrier();
    }

    # wait for any stragglers due to delay-- there is a  race
    # between the barrier sync on the control net and the expt net latency.
    sleep(1);

    # count packets received for each stream.
978 979 980 981
    my @results;
    if ($printsched) {
	@results = ();
    } else {
982
	@results = &my_tick($PATH_CRUDE,"-d",CRUDE_DAT);
983
    }
984 985 986
    my $result_count = @results;
    &debug("result_count from crude: $result_count\n");
    foreach (@results) {
987 988 989 990 991
	if(/ID=(\d+) /) {
	    $recv_cnt{$1}++;
	}
    }

992
    # analyze only links for which a stream was received.
993 994
    foreach my $key (keys %analyze) {
	my $edge = $analyze{$key};
995
	my $sent = (&get_loss_sample_size($edge) * LOSS_TEST_DURATION) + 1 ;
996 997
	my $received = $recv_cnt{$key};

998 999 1000 1001
	if ($reportonly) {
	    &info("    Loss result on $hostname for " .
		  &print_edge($edge) .
		  ": sent/recv = $sent/$received\n");
1002
	    next;
1003 1004
	}

1005 1006 1007 1008 1009 1010 1011 1012
	if(!defined($received)) {
	    $received=0;
	    &error (NAME_LOSS,$edge,"No packets received from " . $edge->src);
	} else {
	    # this is a large sample test about proportion p.
	    # this is considered a valid statistical estimate for np >= 10.
	    my $p = 1 - $edge->loss;

1013
	    my $p_hat = $received / $sent;
1014
	    my $numerator = $p_hat - $p;
1015
	    my $denominator = sqrt( abs( $p * (1 - $p_hat) / $sent) );
1016