automanage.pl 13.9 KB
Newer Older
1
#!/usr/bin/perl -w
Dan Gebhardt's avatar
Dan Gebhardt committed
2 3
#
# Copyright (c) 2006 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
Dan Gebhardt's avatar
Dan Gebhardt committed
23
#
24

25
use strict;
26 27
use lib '/usr/testbed/lib';
use libxmlrpc;
28
use libwanetmon;
29 30
use English;
use Getopt::Std;
31 32
use IO::Socket::INET;
use IO::Select;
33

34 35
$| = 1;

36
my ($constrFilename, $expid, $bwdutycycle, $port);
37 38 39 40 41
my $numsites;
my %test_per = (  # defaults
	       "latency" => 300,
	       "bw"      => 0,
	       );
42
$thisManagerID = "automanagerclient";
43 44 45
my %intersitenodes = (); #final list for fully-connected test
my @constrnodes;    #test constrained to these nodes
my %sitenodes;      #hash listing all sites => nodes
46
my $CPUUSAGETHRESHOLD = 10;  #should help prevent flip-flopping between
47
                             #"best" nodes at a site
48 49
                             #TODO: document normal range of values for CPU
my $SITEDIFFTHRESHOLD = 5;   #number of site differences between period
50
                             #calculations that trigger an update
51
my $IPERFDURATION = 5;      #duration in seconds of iperf test
52
my %allnodes;
53
my %deadsites;
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69

# RPC STUFF ##############################################
my $TB         = "/usr/testbed";
my $ELABINELAB = 0;
my $RPCSERVER  = "boss.emulab.net";  #?
my $RPCPORT    = "3069";
#my $RPCCERT    = "/etc/outer_emulab.pem"; #?
my $RPCCERT = "~/.ssl/emulab.pem";
my $MODULE = "node";
my $METHOD = "getlist";
# END RPC STUFF ##########################################


sub usage 
{
	warn "Usage: $0 [-BLP] [-f constraint file] [-l latency test period]".
70 71
	     " [-e pid/eid]".
	     " <bandwidth duty cycle 0-1>".
72
#	     " <number of sites or \"all\">\n".
73 74 75 76 77 78
	     "where: -B = Do not measure bandwidth\n".
	     "       -L = Do not measure latency\n".
	     "       -P = Do not measure latency to nodes not responding to pings\n";
	return 1;
}

79 80 81 82 83 84 85

my %opt = ();
getopts("B:L:P:f:l:e:s:p",\%opt);
#TODO: other options
if( $opt{f}) { $constrFilename = $opt{f}; }
if( $opt{l}) { $test_per{latency} = $opt{l}; }
if ($opt{e}) { $expid = $opt{e}; } else { $expid = "none"; }
86
if ($opt{p}) { $port = $opt{p}; } else{ $port = 5052; }
87

88 89
setcmdport($port);  #set the library's port
setexpid($expid);
90

91 92 93 94 95
if( !defined $ARGV[0] ){
    exit &usage;
}
$bwdutycycle = $ARGV[0];

96
my $lastupdated_numnodes = 0;
97

98 99
my $socket;
my $sel = IO::Select->new();
100

101
#FORWARD DECL'S
102
sub stopnode($);
103
sub outputErrors();
104

105
print "exp = $expid\n";
106
#############################################################################
107 108 109 110
#
# Initialization
#
libxmlrpc::Config({"server"  => $RPCSERVER,
111 112 113 114
		    "verbose" => 0,
#		    "cert"    => $RPCCERT,
		    "portnum" => $RPCPORT});

115
# Stop all nodes in constraint set
116 117 118 119 120 121 122 123 124
if( defined $constrFilename )
{
    open FILE, "< $constrFilename" or die "cannot open file $constrFilename";
    while( <FILE> ){
	chomp;
	if( $_ =~ m/plab/ ){ 
	    push @constrnodes, $_;
	    print "$_\n";
	}
125
    }
126 127
    close FILE;
    foreach my $node (@constrnodes){
128
#    print "stopping $node\n";
129 130
	stopnode($node);
    }
131 132 133
}
#stop all nodes from XML-RPC query
else{
134 135 136 137 138 139
    getnodeinfo();
    foreach my $site (keys %sitenodes){
	foreach my $node (@{$sitenodes{$site}}){
	    stopnode($node);
	}
    }
140 141
}

142 143

###########################################################
144 145 146
#
# Main Loop
#
147
my $f_firsttime = 1;
148 149
while(1)
{
150
#    %deadnodes = ();
151

152
    #update node list
153

154
    getnodeinfo();
155

156
    choosenodes();
157

158 159 160 161 162 163
#    if( $f_firsttime ){
#	print "FIRST TIME UPDATE\n";
#	updateTests();
#    }

    modifytests($f_firsttime);
164

165
#    printchosennodes();
166

Dan Gebhardt's avatar
Dan Gebhardt committed
167
    outputErrors();
168

169
    sleep( 60*5 );
170
    $f_firsttime = 0;
171 172 173 174 175 176 177 178
}


###########################################################

sub getnodeinfo
{
    #retrieve list of nodes
179
    %sitenodes = ();
180 181
    my $rval = libxmlrpc::CallMethod($MODULE, $METHOD,
				     {"class" => "pcplabphys"});
182 183 184
    if( defined $rval ){
	%allnodes = %$rval;
    }else{ return; }
185

186 187 188 189 190 191
    #remove old node-listing file
    my $nodesfilename = "allnodelisting_automanage.txt";
    unlink $nodesfilename or warn "can't delete node-listing file";
    open FILE, "> $nodesfilename"
	or warn "can't open file $nodesfilename\n";

192 193 194 195 196
    #populate sitenodes
    foreach my $node (keys %allnodes){
	my $siteid = $allnodes{$node}{site};
	push @{$sitenodes{$siteid}}, $node;
#       print @{$sitenodes{$siteid}}."\n";
197
	print FILE "$node\n";
198
    }
199
    close FILE;
200 201
}

202 203 204 205 206 207 208
sub printNodeInfo($)
{
    my ($node) = @_;
    foreach my $key (keys %{$allnodes{$node}} ){
	print "\t$key = $allnodes{$node}{$key}\n";
    }
}
209 210 211 212 213 214 215

########################################################
#
# choose a node from each possible site
sub choosenodes
{
    foreach my $site (keys %sitenodes){
216
#	print "site $site\n";
217
	my $bestnode = choosebestnode($site);
218 219 220
#	if( !defined $bestnode ){ print "BESTNODE NOT DEF!!!\n"; }
#	if( $bestnode ne "NONE"){ print "bestnode for $site = $bestnode\n"; }
	if( $bestnode ne "NONE" &&
221 222
	    !defined $intersitenodes{$site} )
	{
223
	    print time()." SECTION 1: adding $bestnode at $site\n";
224 225
	    # ** This section handles when a site is seen for the 1st time

226
	    #set new node to represent this site
227
	    $intersitenodes{$site} = $bestnode;
228 229
	    initNewSiteNode($site,$bestnode)
		if( $f_firsttime == 0 );
230 231 232
	}
	elsif( ("NONE" eq $bestnode) && defined $intersitenodes{$site} )
	{
233
	    print time()." SECTION 2: removing tests to $site / ".
234
		"$intersitenodes{$site} \n";
235
	    # ** This section handles when a site has no nodes available
236

237
	    #no available node at this site, so remove site from hash
238
	    foreach my $srcsite (keys %intersitenodes){
239 240
		stoppairtest( $intersitenodes{$srcsite},
			      $intersitenodes{$site} );
241
	    }
242
	    delete $intersitenodes{$site};
243
	}
244
	elsif( defined $intersitenodes{$site} &&
245 246
	       ( $intersitenodes{$site} ne $bestnode ||
		 getstatus($bestnode) eq "anyscheduled_no"
247
	       )
248
	     )
249
	{
250
	    print time()." SECTION 3: node change/restart at $site from ".
251 252
		"$intersitenodes{$site} to $bestnode\n";
	    # ** This section handles when a "bestnode" at a site changes
253

254 255 256
	    #TODO: This logic should be fixed so the new tests are
	    #  started before old ones are stopped. This may help
	    #  prevent "holes" in the data collection to a site.
257 258
	    #  Stop sigs to other nodes using old "bestnode" value
	    if( defined $intersitenodes{$site} ){
259
		foreach my $srcsite (keys %intersitenodes){
260 261
		    stoppairtest( $intersitenodes{$srcsite},
				  $intersitenodes{$site} );
262
		}
263 264
	    }
	    
265
	    #set new node to represent this site
266
	    $intersitenodes{$site} = $bestnode;
267
	    initNewSiteNode($site);	    
268
	}
269 270 271
    }
}

272 273 274 275 276
sub initNewSiteNode($)
{
    my ($site) = @_;
#    $intersitenodes{$site} = $bestnode;

277 278 279
    # stop any tests remaining on this node.
    stopnode($intersitenodes{$site});

280 281 282 283 284
    # start tests to and from this new site
    foreach my $srcsite (keys %intersitenodes){
	edittest( $intersitenodes{$srcsite},
		  $intersitenodes{$site},
		  $test_per{bw},
285 286 287
		  "bw",
		  0,
		  $thisManagerID);
288 289 290
	edittest( $intersitenodes{$site},
		  $intersitenodes{$srcsite},
		  $test_per{bw},
291 292 293
		  "bw"
		  0,
		  $thisManagerID );
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
	my $r = rand;
	if( $r <= .5 ){
	    edittest( $intersitenodes{$srcsite},
		      $intersitenodes{$site},
		      $test_per{latency},
		      "latency" );
	}else{
	    edittest( $intersitenodes{$site},
		      $intersitenodes{$srcsite},
		      $test_per{latency},
		      "latency" );
	}
    }
}

309

310 311
#
# Re-adjust the test periods of connections based on number of nodes
312
#   Pass a non-zero parameter to force an initialization of all paths
313 314
sub modifytests
{
315 316
    my ($f_forceInit) = @_;

317 318
    my $numsites = scalar(keys %intersitenodes);
    my $bwper = ($numsites - 1) * $IPERFDURATION * 1/$bwdutycycle;
319
    #TODO: ?? dynamically change latency period, too?
320

321 322
    #update connections to use  newly calculated periods
    if( abs($lastupdated_numnodes - $numsites) > $SITEDIFFTHRESHOLD )
323
    {
324 325 326
	if( !$opt{B} ){       
	    $test_per{bw} = $bwper;
	    print "new BW per = $bwper\n";
327 328 329
	    $lastupdated_numnodes = $numsites;
	    updateTests(1,0);   #handles changing number of sites.
	                        # only update bandwidth
330
	}
331
    }
332

333
    if( defined $f_forceInit && $f_forceInit != 0 ){
334
	updateTests(1,1);
335
    }
336 337 338 339 340 341 342 343
}



sub printchosennodes
{
    foreach my $node (values %intersitenodes){
	print "site: ". $allnodes{$node}{site} . " = $node\n";
344 345
#	#TODO:: why does this give an error?
#	print "node = $node\n";
346 347 348 349 350 351 352
    }
}


sub choosebestnode($)
{
    my ($site) = @_;
353

354
    my $bestnode = "NONE";  #default to an error value
355
   
356
    my $flag_siteIncluded = 0;  #set if any node at site is in constraint set
357

358
=pod
359
    print "site: $site ";
360
    foreach my $node ( @{$sitenodes{$site}} ){
361
	print "$node ";
362 363 364 365
    }
    print "\n";
=cut
    foreach my $node ( @{$sitenodes{$site}} ){
366 367 368 369 370 371 372 373
=pod
	if(isnodeinconstrset($node)){
	    print "node $node is in constr set ";
	    if( $allnodes{$node}{free} == 1 ){
		print "and free";
	    }
	    print "\n";
	}
374 375 376 377 378
=cut
        if( isnodeinconstrset($node) ){
	    $flag_siteIncluded = 1;
#	    print "SETTING SITEINCLUDED=1 for $node at $site\n";
	}
379

380
	if( $allnodes{$node}{free} == 1 && isnodeinconstrset($node) ){
381
#	    print "choosebestnode: considering $node\n";
382
#	    print "choosing best node for site $site\n";
383 384
	    #first time thru loop...
	    if( $bestnode eq "NONE" ){
385 386 387 388 389 390
		#set this to be best node if it responds to a command
		
#		if( edittest($node,"NOADDR",0,"bw") == 1 ){
#		    $bestnode = $node;
#		}
		if( getstatus($node) ne "error" ){
391 392
		    $bestnode = $node;
		}
393
	    }else{
394 395 396
		if( ($allnodes{$node}{cpu} < $allnodes{$bestnode}{cpu}
		    - $CPUUSAGETHRESHOLD) &&
		    (edittest($node,"NOADDR",0,"bw") == 1) )
397
		{
398 399 400 401
		    print '$allnodes{$node}{cpu}'.
			"  $allnodes{$node}{cpu}\n";
		    print '$allnodes{best$node}{cpu}'.
			"  $allnodes{$bestnode}{cpu}\n";
402 403 404 405 406
		    $bestnode = $node;
		}
	    }
	}
    }
407 408 409 410 411 412 413 414 415



    if( $bestnode eq "NONE" && $flag_siteIncluded == 1){
#	print "^^^^^adding to deadsites: $site\n";
	$deadsites{$site} = 1;
    }else{
	delete $deadsites{$site};
    }
416 417 418 419 420 421 422 423 424
    return $bestnode;
}


sub isnodeinconstrset($)
{
    my ($node) = @_;
    
    #if constraint set is empty, return true
425
    if( @constrnodes == 0 ){
426 427
	return 1;
    }else{
428 429 430 431 432 433 434
	#check if node exists in contraint set
	foreach my $cnode (@constrnodes){
	    if( $node eq $cnode ){
		return 1;
	    }
	}
	return 0;
435 436 437 438
    }
}


439

440 441
#
# update all nodes with new test periods and destination nodes
442 443
#   Two parameters: first is flag to update bw, second to update latency
sub updateTests($$)
444
{
445
    my ($f_updatebw, $f_updatelat) = @_;
446 447
    print "UPDATING TESTS\n";

448 449 450 451
    my $srcnode;
    my $bw_destnodes;
    my $destnode;
    #init bandwidth
452 453 454 455 456
    if( $f_updatebw ){
	foreach my $srcsite (keys %intersitenodes){
	    $srcnode = $intersitenodes{$srcsite};
	    $bw_destnodes = "";
	    foreach my $destsite (keys %intersitenodes){
457
#	    print "looking at site $destsite\n";
458 459 460 461 462 463 464
		if( defined $intersitenodes{$destsite}) {
		    $destnode = $intersitenodes{$destsite};
		}
		#add destination nodes to this source node, but not "self"
		if( $destnode ne $srcnode ){
		    $bw_destnodes .= " ".$destnode;
		}
465
	    }
466
	    initnode($srcnode, $bw_destnodes, $test_per{bw}, "bw");
467 468 469
	}
    }

470
    #init latency: fully connected, but only one direction each path
471 472 473 474 475 476 477 478 479 480 481 482 483
    if( $f_updatelat ){
	my %initstrs;  #build init strings for each site node
	my @sitekeys = keys %intersitenodes;
	for( my $i = 0; $i < @sitekeys-1; $i++ ){
	    for( my $j = $i+1; $j < @sitekeys; $j++ ){
		my $r = rand;
		if( $r <= .5 ){
		    $initstrs{$intersitenodes{$sitekeys[$i]}} .= 
			"$intersitenodes{$sitekeys[$j]} ";
		}else{
		    $initstrs{$intersitenodes{$sitekeys[$j]}} .= 
			"$intersitenodes{$sitekeys[$i]} ";
		}
484 485
	    }
	}
486 487 488 489 490 491
	# now send the inits to all nodes
	foreach my $srcsite (keys %intersitenodes){
	    $srcnode = $intersitenodes{$srcsite};
	    initnode($srcnode, $initstrs{$srcnode}, 
		     $test_per{latency}, "latency");
	}
492
    }
493 494
}

495 496 497
#
# Stop all tests from a node
#
498

499 500 501 502 503
sub stopnode($)
{
    my ($node) = @_;

    if( isnodeinconstrset($node) ){
504
=pod
505 506
	my %cmd = ( expid    => $expid,
		    cmdtype  => "STOPALL" );
507
	print "stopnode $node called\n";
508
	sendcmd($node,\%cmd);
509 510 511
=cut
        libwanetmon::stopnode($node);
        print "stopnode $node called\n";
512 513
    }
}
514

515 516 517



518

519 520 521 522 523 524 525 526
#
# stops the tests from given source and destination nodes
#
sub stoppairtest($$)
{
    my ($srcnode, $destnode) = @_;
    my $testper = 0;
    my @testtypes = ("latency","bw");
527 528 529

    foreach my $testtype (@testtypes){
	edittest($srcnode, $destnode, 0, $testtype);
530
    }
531
#    print "stopping pair tests from $srcnode to $destnode\n";
532 533 534
}

#
535
# destnodes is space-separated string
536
#
537 538 539
sub initnode($$$$)
{
    my ($node, $destnodes, $testper, $testtype) = @_;
540

541 542
    if( defined $destnodes ){
	print "SENDING INIT: *$node*$destnodes*$testper*$testtype*\n";
543

544 545 546 547 548 549
	my %cmd = ( expid     => $expid,
		    cmdtype   => "INIT",
		    destnodes => $destnodes,
		    testtype  => $testtype,
		    testper   => $testper
		    );
550

551
	sendcmd($node,\%cmd);
552

553 554 555 556
	print "sent initnode to $node\n";
	print "destnodes = $destnodes\n";
	print "testper = $testper\n";
	print "testtype = $testtype\n";
557 558 559 560 561 562
    }
}


sub outputErrors()
{
563 564 565 566 567 568
    if( keys %deadnodes > 0 ){
	print "Nodes not responding:\n";
	foreach my $node (keys %deadnodes){
	    print "$node  ";
	}
	print "\n";
569
    }
570 571 572 573 574 575 576 577 578 579 580 581
    if( keys %deadsites > 0 ){
	print "Sites with no nodes available:\n";
	foreach my $site (keys %deadsites){
	    print "$site  ";
	}
	print "\n";
    }
    print "Sites with nodes available:\n";
    foreach my $site (keys %intersitenodes){
	print "$site ";
    }
    print "\n";
582 583 584
}