automanage.pl 13.2 KB
Newer Older
1
#!/usr/bin/perl -w
Dan Gebhardt's avatar
Dan Gebhardt committed
2 3 4 5 6
#
# EMULAB-COPYRIGHT
# Copyright (c) 2006 University of Utah and the Flux Group.
# All rights reserved.
#
7

8
use strict;
9 10
use lib '/usr/testbed/lib';
use libxmlrpc;
11
use libwanetmon;
12 13
use English;
use Getopt::Std;
14 15
use IO::Socket::INET;
use IO::Select;
16

17 18
$| = 1;

19
my ($constrFilename, $expid, $bwdutycycle, $port);
20 21 22 23 24
my $numsites;
my %test_per = (  # defaults
	       "latency" => 300,
	       "bw"      => 0,
	       );
25
$thisManagerID = "automanagerclient";
26 27 28
my %intersitenodes = (); #final list for fully-connected test
my @constrnodes;    #test constrained to these nodes
my %sitenodes;      #hash listing all sites => nodes
29
my $CPUUSAGETHRESHOLD = 10;  #should help prevent flip-flopping between
30
                             #"best" nodes at a site
31 32
                             #TODO: document normal range of values for CPU
my $SITEDIFFTHRESHOLD = 5;   #number of site differences between period
33
                             #calculations that trigger an update
34
my $IPERFDURATION = 5;      #duration in seconds of iperf test
35
my %allnodes;
36
my %deadsites;
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52

# RPC STUFF ##############################################
my $TB         = "/usr/testbed";
my $ELABINELAB = 0;
my $RPCSERVER  = "boss.emulab.net";  #?
my $RPCPORT    = "3069";
#my $RPCCERT    = "/etc/outer_emulab.pem"; #?
my $RPCCERT = "~/.ssl/emulab.pem";
my $MODULE = "node";
my $METHOD = "getlist";
# END RPC STUFF ##########################################


sub usage 
{
	warn "Usage: $0 [-BLP] [-f constraint file] [-l latency test period]".
53 54
	     " [-e pid/eid]".
	     " <bandwidth duty cycle 0-1>".
55
#	     " <number of sites or \"all\">\n".
56 57 58 59 60 61
	     "where: -B = Do not measure bandwidth\n".
	     "       -L = Do not measure latency\n".
	     "       -P = Do not measure latency to nodes not responding to pings\n";
	return 1;
}

62 63 64 65 66 67 68

my %opt = ();
getopts("B:L:P:f:l:e:s:p",\%opt);
#TODO: other options
if( $opt{f}) { $constrFilename = $opt{f}; }
if( $opt{l}) { $test_per{latency} = $opt{l}; }
if ($opt{e}) { $expid = $opt{e}; } else { $expid = "none"; }
69
if ($opt{p}) { $port = $opt{p}; } else{ $port = 5052; }
70

71 72
setcmdport($port);  #set the library's port
setexpid($expid);
73

74 75 76 77 78
if( !defined $ARGV[0] ){
    exit &usage;
}
$bwdutycycle = $ARGV[0];

79
my $lastupdated_numnodes = 0;
80

81 82
my $socket;
my $sel = IO::Select->new();
83

84
#FORWARD DECL'S
85
sub stopnode($);
86
sub outputErrors();
87

88
print "exp = $expid\n";
89
#############################################################################
90 91 92 93
#
# Initialization
#
libxmlrpc::Config({"server"  => $RPCSERVER,
94 95 96 97
		    "verbose" => 0,
#		    "cert"    => $RPCCERT,
		    "portnum" => $RPCPORT});

98
# Stop all nodes in constraint set
99 100 101 102 103 104 105 106 107
if( defined $constrFilename )
{
    open FILE, "< $constrFilename" or die "cannot open file $constrFilename";
    while( <FILE> ){
	chomp;
	if( $_ =~ m/plab/ ){ 
	    push @constrnodes, $_;
	    print "$_\n";
	}
108
    }
109 110
    close FILE;
    foreach my $node (@constrnodes){
111
#    print "stopping $node\n";
112 113
	stopnode($node);
    }
114 115 116
}
#stop all nodes from XML-RPC query
else{
117 118 119 120 121 122
    getnodeinfo();
    foreach my $site (keys %sitenodes){
	foreach my $node (@{$sitenodes{$site}}){
	    stopnode($node);
	}
    }
123 124
}

125 126

###########################################################
127 128 129
#
# Main Loop
#
130
my $f_firsttime = 1;
131 132
while(1)
{
133
#    %deadnodes = ();
134

135
    #update node list
136

137
    getnodeinfo();
138

139
    choosenodes();
140

141 142 143 144 145 146
#    if( $f_firsttime ){
#	print "FIRST TIME UPDATE\n";
#	updateTests();
#    }

    modifytests($f_firsttime);
147

148
#    printchosennodes();
149

Dan Gebhardt's avatar
Dan Gebhardt committed
150
    outputErrors();
151

152
    sleep( 60*5 );
153
    $f_firsttime = 0;
154 155 156 157 158 159 160 161
}


###########################################################

sub getnodeinfo
{
    #retrieve list of nodes
162
    %sitenodes = ();
163 164
    my $rval = libxmlrpc::CallMethod($MODULE, $METHOD,
				     {"class" => "pcplabphys"});
165 166 167
    if( defined $rval ){
	%allnodes = %$rval;
    }else{ return; }
168

169 170 171 172 173 174
    #remove old node-listing file
    my $nodesfilename = "allnodelisting_automanage.txt";
    unlink $nodesfilename or warn "can't delete node-listing file";
    open FILE, "> $nodesfilename"
	or warn "can't open file $nodesfilename\n";

175 176 177 178 179
    #populate sitenodes
    foreach my $node (keys %allnodes){
	my $siteid = $allnodes{$node}{site};
	push @{$sitenodes{$siteid}}, $node;
#       print @{$sitenodes{$siteid}}."\n";
180
	print FILE "$node\n";
181
    }
182
    close FILE;
183 184
}

185 186 187 188 189 190 191
sub printNodeInfo($)
{
    my ($node) = @_;
    foreach my $key (keys %{$allnodes{$node}} ){
	print "\t$key = $allnodes{$node}{$key}\n";
    }
}
192 193 194 195 196 197 198

########################################################
#
# choose a node from each possible site
sub choosenodes
{
    foreach my $site (keys %sitenodes){
199
#	print "site $site\n";
200
	my $bestnode = choosebestnode($site);
201 202 203
#	if( !defined $bestnode ){ print "BESTNODE NOT DEF!!!\n"; }
#	if( $bestnode ne "NONE"){ print "bestnode for $site = $bestnode\n"; }
	if( $bestnode ne "NONE" &&
204 205
	    !defined $intersitenodes{$site} )
	{
206
	    print time()." SECTION 1: adding $bestnode at $site\n";
207 208
	    # ** This section handles when a site is seen for the 1st time

209
	    #set new node to represent this site
210
	    $intersitenodes{$site} = $bestnode;
211 212
	    initNewSiteNode($site,$bestnode)
		if( $f_firsttime == 0 );
213 214 215
	}
	elsif( ("NONE" eq $bestnode) && defined $intersitenodes{$site} )
	{
216
	    print time()." SECTION 2: removing tests to $site / ".
217
		"$intersitenodes{$site} \n";
218
	    # ** This section handles when a site has no nodes available
219

220
	    #no available node at this site, so remove site from hash
221
	    foreach my $srcsite (keys %intersitenodes){
222 223
		stoppairtest( $intersitenodes{$srcsite},
			      $intersitenodes{$site} );
224
	    }
225
	    delete $intersitenodes{$site};
226
	}
227
	elsif( defined $intersitenodes{$site} &&
228 229
	       ( $intersitenodes{$site} ne $bestnode ||
		 getstatus($bestnode) eq "anyscheduled_no"
230
	       )
231
	     )
232
	{
233
	    print time()." SECTION 3: node change/restart at $site from ".
234 235
		"$intersitenodes{$site} to $bestnode\n";
	    # ** This section handles when a "bestnode" at a site changes
236

237 238 239
	    #TODO: This logic should be fixed so the new tests are
	    #  started before old ones are stopped. This may help
	    #  prevent "holes" in the data collection to a site.
240 241
	    #  Stop sigs to other nodes using old "bestnode" value
	    if( defined $intersitenodes{$site} ){
242
		foreach my $srcsite (keys %intersitenodes){
243 244
		    stoppairtest( $intersitenodes{$srcsite},
				  $intersitenodes{$site} );
245
		}
246 247
	    }
	    
248
	    #set new node to represent this site
249
	    $intersitenodes{$site} = $bestnode;
250
	    initNewSiteNode($site);	    
251
	}
252 253 254
    }
}

255 256 257 258 259
sub initNewSiteNode($)
{
    my ($site) = @_;
#    $intersitenodes{$site} = $bestnode;

260 261 262
    # stop any tests remaining on this node.
    stopnode($intersitenodes{$site});

263 264 265 266 267
    # start tests to and from this new site
    foreach my $srcsite (keys %intersitenodes){
	edittest( $intersitenodes{$srcsite},
		  $intersitenodes{$site},
		  $test_per{bw},
268 269 270
		  "bw",
		  0,
		  $thisManagerID);
271 272 273
	edittest( $intersitenodes{$site},
		  $intersitenodes{$srcsite},
		  $test_per{bw},
274 275 276
		  "bw"
		  0,
		  $thisManagerID );
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	my $r = rand;
	if( $r <= .5 ){
	    edittest( $intersitenodes{$srcsite},
		      $intersitenodes{$site},
		      $test_per{latency},
		      "latency" );
	}else{
	    edittest( $intersitenodes{$site},
		      $intersitenodes{$srcsite},
		      $test_per{latency},
		      "latency" );
	}
    }
}

292

293 294
#
# Re-adjust the test periods of connections based on number of nodes
295
#   Pass a non-zero parameter to force an initialization of all paths
296 297
sub modifytests
{
298 299
    my ($f_forceInit) = @_;

300 301
    my $numsites = scalar(keys %intersitenodes);
    my $bwper = ($numsites - 1) * $IPERFDURATION * 1/$bwdutycycle;
302
    #TODO: ?? dynamically change latency period, too?
303

304 305
    #update connections to use  newly calculated periods
    if( abs($lastupdated_numnodes - $numsites) > $SITEDIFFTHRESHOLD )
306
    {
307 308 309
	if( !$opt{B} ){       
	    $test_per{bw} = $bwper;
	    print "new BW per = $bwper\n";
310 311 312
	    $lastupdated_numnodes = $numsites;
	    updateTests(1,0);   #handles changing number of sites.
	                        # only update bandwidth
313
	}
314
    }
315

316
    if( defined $f_forceInit && $f_forceInit != 0 ){
317
	updateTests(1,1);
318
    }
319 320 321 322 323 324 325 326
}



sub printchosennodes
{
    foreach my $node (values %intersitenodes){
	print "site: ". $allnodes{$node}{site} . " = $node\n";
327 328
#	#TODO:: why does this give an error?
#	print "node = $node\n";
329 330 331 332 333 334 335
    }
}


sub choosebestnode($)
{
    my ($site) = @_;
336

337
    my $bestnode = "NONE";  #default to an error value
338
   
339
    my $flag_siteIncluded = 0;  #set if any node at site is in constraint set
340

341
=pod
342
    print "site: $site ";
343
    foreach my $node ( @{$sitenodes{$site}} ){
344
	print "$node ";
345 346 347 348
    }
    print "\n";
=cut
    foreach my $node ( @{$sitenodes{$site}} ){
349 350 351 352 353 354 355 356
=pod
	if(isnodeinconstrset($node)){
	    print "node $node is in constr set ";
	    if( $allnodes{$node}{free} == 1 ){
		print "and free";
	    }
	    print "\n";
	}
357 358 359 360 361
=cut
        if( isnodeinconstrset($node) ){
	    $flag_siteIncluded = 1;
#	    print "SETTING SITEINCLUDED=1 for $node at $site\n";
	}
362

363
	if( $allnodes{$node}{free} == 1 && isnodeinconstrset($node) ){
364
#	    print "choosebestnode: considering $node\n";
365
#	    print "choosing best node for site $site\n";
366 367
	    #first time thru loop...
	    if( $bestnode eq "NONE" ){
368 369 370 371 372 373
		#set this to be best node if it responds to a command
		
#		if( edittest($node,"NOADDR",0,"bw") == 1 ){
#		    $bestnode = $node;
#		}
		if( getstatus($node) ne "error" ){
374 375
		    $bestnode = $node;
		}
376
	    }else{
377 378 379
		if( ($allnodes{$node}{cpu} < $allnodes{$bestnode}{cpu}
		    - $CPUUSAGETHRESHOLD) &&
		    (edittest($node,"NOADDR",0,"bw") == 1) )
380
		{
381 382 383 384
		    print '$allnodes{$node}{cpu}'.
			"  $allnodes{$node}{cpu}\n";
		    print '$allnodes{best$node}{cpu}'.
			"  $allnodes{$bestnode}{cpu}\n";
385 386 387 388 389
		    $bestnode = $node;
		}
	    }
	}
    }
390 391 392 393 394 395 396 397 398



    if( $bestnode eq "NONE" && $flag_siteIncluded == 1){
#	print "^^^^^adding to deadsites: $site\n";
	$deadsites{$site} = 1;
    }else{
	delete $deadsites{$site};
    }
399 400 401 402 403 404 405 406 407
    return $bestnode;
}


sub isnodeinconstrset($)
{
    my ($node) = @_;
    
    #if constraint set is empty, return true
408
    if( @constrnodes == 0 ){
409 410
	return 1;
    }else{
411 412 413 414 415 416 417
	#check if node exists in contraint set
	foreach my $cnode (@constrnodes){
	    if( $node eq $cnode ){
		return 1;
	    }
	}
	return 0;
418 419 420 421
    }
}


422

423 424
#
# update all nodes with new test periods and destination nodes
425 426
#   Two parameters: first is flag to update bw, second to update latency
sub updateTests($$)
427
{
428
    my ($f_updatebw, $f_updatelat) = @_;
429 430
    print "UPDATING TESTS\n";

431 432 433 434
    my $srcnode;
    my $bw_destnodes;
    my $destnode;
    #init bandwidth
435 436 437 438 439
    if( $f_updatebw ){
	foreach my $srcsite (keys %intersitenodes){
	    $srcnode = $intersitenodes{$srcsite};
	    $bw_destnodes = "";
	    foreach my $destsite (keys %intersitenodes){
440
#	    print "looking at site $destsite\n";
441 442 443 444 445 446 447
		if( defined $intersitenodes{$destsite}) {
		    $destnode = $intersitenodes{$destsite};
		}
		#add destination nodes to this source node, but not "self"
		if( $destnode ne $srcnode ){
		    $bw_destnodes .= " ".$destnode;
		}
448
	    }
449
	    initnode($srcnode, $bw_destnodes, $test_per{bw}, "bw");
450 451 452
	}
    }

453
    #init latency: fully connected, but only one direction each path
454 455 456 457 458 459 460 461 462 463 464 465 466
    if( $f_updatelat ){
	my %initstrs;  #build init strings for each site node
	my @sitekeys = keys %intersitenodes;
	for( my $i = 0; $i < @sitekeys-1; $i++ ){
	    for( my $j = $i+1; $j < @sitekeys; $j++ ){
		my $r = rand;
		if( $r <= .5 ){
		    $initstrs{$intersitenodes{$sitekeys[$i]}} .= 
			"$intersitenodes{$sitekeys[$j]} ";
		}else{
		    $initstrs{$intersitenodes{$sitekeys[$j]}} .= 
			"$intersitenodes{$sitekeys[$i]} ";
		}
467 468
	    }
	}
469 470 471 472 473 474
	# now send the inits to all nodes
	foreach my $srcsite (keys %intersitenodes){
	    $srcnode = $intersitenodes{$srcsite};
	    initnode($srcnode, $initstrs{$srcnode}, 
		     $test_per{latency}, "latency");
	}
475
    }
476 477
}

478 479 480
#
# Stop all tests from a node
#
481

482 483 484 485 486
sub stopnode($)
{
    my ($node) = @_;

    if( isnodeinconstrset($node) ){
487
=pod
488 489
	my %cmd = ( expid    => $expid,
		    cmdtype  => "STOPALL" );
490
	print "stopnode $node called\n";
491
	sendcmd($node,\%cmd);
492 493 494
=cut
        libwanetmon::stopnode($node);
        print "stopnode $node called\n";
495 496
    }
}
497

498 499 500



501

502 503 504 505 506 507 508 509
#
# stops the tests from given source and destination nodes
#
sub stoppairtest($$)
{
    my ($srcnode, $destnode) = @_;
    my $testper = 0;
    my @testtypes = ("latency","bw");
510 511 512

    foreach my $testtype (@testtypes){
	edittest($srcnode, $destnode, 0, $testtype);
513
    }
514
#    print "stopping pair tests from $srcnode to $destnode\n";
515 516 517
}

#
518
# destnodes is space-separated string
519
#
520 521 522
sub initnode($$$$)
{
    my ($node, $destnodes, $testper, $testtype) = @_;
523

524 525
    if( defined $destnodes ){
	print "SENDING INIT: *$node*$destnodes*$testper*$testtype*\n";
526

527 528 529 530 531 532
	my %cmd = ( expid     => $expid,
		    cmdtype   => "INIT",
		    destnodes => $destnodes,
		    testtype  => $testtype,
		    testper   => $testper
		    );
533

534
	sendcmd($node,\%cmd);
535

536 537 538 539
	print "sent initnode to $node\n";
	print "destnodes = $destnodes\n";
	print "testper = $testper\n";
	print "testtype = $testtype\n";
540 541 542 543 544 545
    }
}


sub outputErrors()
{
546 547 548 549 550 551
    if( keys %deadnodes > 0 ){
	print "Nodes not responding:\n";
	foreach my $node (keys %deadnodes){
	    print "$node  ";
	}
	print "\n";
552
    }
553 554 555 556 557 558 559 560 561 562 563 564
    if( keys %deadsites > 0 ){
	print "Sites with no nodes available:\n";
	foreach my $site (keys %deadsites){
	    print "$site  ";
	}
	print "\n";
    }
    print "Sites with nodes available:\n";
    foreach my $site (keys %intersitenodes){
	print "$site ";
    }
    print "\n";
565 566 567
}