Commit 7d62d520 authored by Dan Gebhardt's avatar Dan Gebhardt

First "release" of automanage.

- when a site has no available nodes or the bestnode. changes,
  measurements to this node are stopped from all others.

- tests are started to a new node chosen for a site.

- before a site's bestnode is selected, automanage verifies bgmon is
  running correctly on the node

- pseudo-balanced latency initialization is done using randomly
  selected path directions between a node pair.

- other misc. stuff.
parent 26e5f364
...@@ -69,8 +69,9 @@ my $lastupdated_numnodes = 0; ...@@ -69,8 +69,9 @@ my $lastupdated_numnodes = 0;
my $socket; my $socket;
my $sel = IO::Select->new(); my $sel = IO::Select->new();
#FORWARD DECL'S
sub stopnode($); sub stopnode($);
sub outputErrors();
print "exp = $expid\n"; print "exp = $expid\n";
############################################################################# #############################################################################
...@@ -103,15 +104,31 @@ foreach my $node (@constrnodes){ ...@@ -103,15 +104,31 @@ foreach my $node (@constrnodes){
# #
# Main Loop # Main Loop
# #
#my $f_firsttime = 1;
while(1) while(1)
{ {
%deadnodes = ();
#update node list #update node list
# print "getnodeinfo\n";
getnodeinfo(); getnodeinfo();
# sleep(10);
# print "choosenodes\n";
choosenodes(); choosenodes();
# sleep(10);
# print "modifytests\n";
modifytests(); modifytests();
# sleep(10);
# printchosennodes(); # printchosennodes();
outputErrors(); outputErrors();
select(undef, undef, undef, 5.0); # sleep( 10 );
sleep( 60 );
# $f_firsttime = 0;
} }
...@@ -122,17 +139,26 @@ sub getnodeinfo ...@@ -122,17 +139,26 @@ sub getnodeinfo
#retrieve list of nodes #retrieve list of nodes
my $rval = libxmlrpc::CallMethod($MODULE, $METHOD, my $rval = libxmlrpc::CallMethod($MODULE, $METHOD,
{"class" => "pcplabphys"}); {"class" => "pcplabphys"});
%allnodes = %$rval; if( defined $rval ){
%allnodes = %$rval;
}else{ return; }
#populate sitenodes #populate sitenodes
foreach my $node (keys %allnodes){ foreach my $node (keys %allnodes){
my $siteid = $allnodes{$node}{site}; my $siteid = $allnodes{$node}{site};
@{$sitenodes{$siteid}} = ();
push @{$sitenodes{$siteid}}, $node; push @{$sitenodes{$siteid}}, $node;
# print @{$sitenodes{$siteid}}."\n"; # print @{$sitenodes{$siteid}}."\n";
} }
} }
sub printNodeInfo($)
{
my ($node) = @_;
foreach my $key (keys %{$allnodes{$node}} ){
print "\t$key = $allnodes{$node}{$key}\n";
}
}
######################################################## ########################################################
# #
...@@ -142,63 +168,76 @@ sub choosenodes ...@@ -142,63 +168,76 @@ sub choosenodes
{ {
foreach my $site (keys %sitenodes){ foreach my $site (keys %sitenodes){
# print "site $site\n"; # print "site $site\n";
# my $bestnode = "NONE";
my $bestnode = choosebestnode($site); my $bestnode = choosebestnode($site);
if( "NONE" eq $bestnode ){ if( !defined $bestnode ){ print "BESTNODE NOT DEF!!!\n"; }
if( "NONE" ne $bestnode &&
!defined $intersitenodes{$site} )
{
print "SECTION 1: adding $bestnode at $site\n";
# ** This section handles when a site is seen for the 1st time
#set new node to represent this site
$intersitenodes{$site} = $bestnode;
}
elsif( ("NONE" eq $bestnode) && defined $intersitenodes{$site} )
{
print "SECTION 2: removing tests to $site / ".
"$intersitenodes{$site} \n";
# ** This section handles when a site has no nodes available # ** This section handles when a site has no nodes available
#no available node at this site, so remove site from hash #no available node at this site, so remove site from hash
#(done?)TODO: send "stop" signals to all other nodes having this
# site as the destination
foreach my $srcsite (keys %intersitenodes){ foreach my $srcsite (keys %intersitenodes){
if( defined $intersitenodes{$site} ){ stoppairtest( $intersitenodes{$srcsite},
stoppairtest( $intersitenodes{$srcsite}, $intersitenodes{$site} );
$intersitenodes{$site} );
}
} }
delete $intersitenodes{$site}; delete $intersitenodes{$site};
} }
else{ elsif( defined $intersitenodes{$site} &&
if( (!defined $intersitenodes{$site} || $intersitenodes{$site} ne $bestnode
$intersitenodes{$site} ne $bestnode) #&& isnodeinconstrset($bestnode)
#&& isnodeinconstrset($bestnode) )
) {
{ print "SECTION 3: node change at $site from ".
# ** This section handles when a "bestnode" at a site changes "$intersitenodes{$site} to $bestnode\n";
# ** This section handles when a "bestnode" at a site changes
#(done?)TODO
# Stop sigs to other nodes using old "bestnode" value # Stop sigs to other nodes using old "bestnode" value
if( defined $intersitenodes{$site} ){ if( defined $intersitenodes{$site} ){
foreach my $srcsite (keys %intersitenodes){
stoppairtest( $intersitenodes{$srcsite},
$intersitenodes{$site} );
}
}
#set new node to represent this site
$intersitenodes{$site} = $bestnode;
#(done?)TODO: start other nodes using this new "bestnode"
# (This uses the EDIT bgmon command - see bgmon.pl)
foreach my $srcsite (keys %intersitenodes){ foreach my $srcsite (keys %intersitenodes){
edittest( $intersitenodes{$srcsite}, stoppairtest( $intersitenodes{$srcsite},
$intersitenodes{$site}, $intersitenodes{$site} );
$test_per{bw},
"bw" );
} }
#TODO: need to do this smartly... }
=pod
#set new node to represent this site
$intersitenodes{$site} = $bestnode;
foreach my $srcsite (keys %intersitenodes){
edittest( $intersitenodes{$srcsite}, edittest( $intersitenodes{$srcsite},
$intersitenodes{$site}, $intersitenodes{$site},
$test_per{latency}, $test_per{bw},
"latency" ); "bw" );
=cut my $r = rand;
if( $r <= .5 ){
edittest( $intersitenodes{$srcsite},
$intersitenodes{$site},
$test_per{latency},
"latency" );
}else{
edittest( $intersitenodes{$site},
$intersitenodes{$srcsite},
$test_per{latency},
"latency" );
}
} }
}
}
} }
} }
# #
# Re-adjust the test periods of connections based on number of nodes # Re-adjust the test periods of connections based on number of nodes
# #
...@@ -239,6 +278,8 @@ sub choosebestnode($) ...@@ -239,6 +278,8 @@ sub choosebestnode($)
my ($site) = @_; my ($site) = @_;
my $bestnode = "NONE"; #default to an error value my $bestnode = "NONE"; #default to an error value
=pod =pod
print "$site "; print "$site ";
foreach my $node ( @{$sitenodes{$site}} ){ foreach my $node ( @{$sitenodes{$site}} ){
...@@ -255,17 +296,31 @@ sub choosebestnode($) ...@@ -255,17 +296,31 @@ sub choosebestnode($)
} }
print "\n"; print "\n";
} }
=cut =cut
if( $allnodes{$node}{free} == 1 && isnodeinconstrset($node) ) { #this command acts like a bgmon "ping" - used to
#determine if bgmon running correctly
my %cmd = ( expid => $expid,
cmdtype => "EDIT",
dstnode => "NOADDR",
testtype => "bw",
testper => 0 );
if( $allnodes{$node}{free} == 1 &&
isnodeinconstrset($node) )
{
# print "choosing best node for site $site\n"; # print "choosing best node for site $site\n";
#first time thru loop... #first time thru loop...
if( $bestnode eq "NONE" ){ if( $bestnode eq "NONE" ){
#set this to be best node #set this to be best node
$bestnode = $node; $bestnode = $node;
}else{ }else{
if( $allnodes{$node}{cpu} < $allnodes{$bestnode}{cpu} if( ($allnodes{$node}{cpu} < $allnodes{$bestnode}{cpu}
+ $CPUUSAGETHRESHOLD) - $CPUUSAGETHRESHOLD) &&
(edittest($node,"NOADDR",0,"bw") == 1) )
{ {
print "setting new bestnode\n";
print '$allnodes{$node}{cpu}'." $allnodes{$node}{cpu}\n";
print '$allnodes{$bestnode}{cpu}'.
" $allnodes{$bestnode}{cpu}\n";
$bestnode = $node; $bestnode = $node;
} }
} }
...@@ -319,11 +374,29 @@ sub updateTests ...@@ -319,11 +374,29 @@ sub updateTests
} }
} }
initnode($srcnode, $bw_destnodes, $test_per{bw}, "bw"); initnode($srcnode, $bw_destnodes, $test_per{bw}, "bw");
#TODO! Distribute initialization times evenly
} }
#TODO: LATENCY #init latency: fully connected, but only one direction each path
my %initstrs; #build init strings for each site node
my @sitekeys = keys %intersitenodes;
for( my $i = 0; $i < @sitekeys-1; $i++ ){
for( my $j = $i+1; $j < @sitekeys; $j++ ){
my $r = rand;
if( $r <= .5 ){
$initstrs{$intersitenodes{$sitekeys[$i]}} .=
"$intersitenodes{$sitekeys[$j]} ";
}else{
$initstrs{$intersitenodes{$sitekeys[$j]}} .=
"$intersitenodes{$sitekeys[$i]} ";
}
}
}
# now send the inits to all nodes
foreach my $srcsite (keys %intersitenodes){
$srcnode = $intersitenodes{$srcsite};
initnode($srcnode, $initstrs{$srcnode}, $test_per{latency}, "latency");
}
} }
# #
...@@ -336,6 +409,7 @@ sub stopnode($) ...@@ -336,6 +409,7 @@ sub stopnode($)
if( isnodeinconstrset($node) ){ if( isnodeinconstrset($node) ){
my %cmd = ( expid => $expid, my %cmd = ( expid => $expid,
cmdtype => "STOPALL" ); cmdtype => "STOPALL" );
print "stopnode $node called\n";
sendcmd($node,\%cmd); sendcmd($node,\%cmd);
} }
} }
...@@ -360,7 +434,7 @@ sub edittest($$$$) ...@@ -360,7 +434,7 @@ sub edittest($$$$)
testtype => $testtype, testtype => $testtype,
testper => $testper ); testper => $testper );
sendcmd($srcnode,\%cmd); return sendcmd($srcnode,\%cmd);
} }
# #
...@@ -411,7 +485,7 @@ sub sendcmd($$) ...@@ -411,7 +485,7 @@ sub sendcmd($$)
my $sercmd = serialize_hash( \%cmd ); my $sercmd = serialize_hash( \%cmd );
my $f_success = 0; my $f_success = 0;
my $max_tries = 5; my $max_tries = 3;
do{ do{
$socket = IO::Socket::INET->new( PeerPort => $port, $socket = IO::Socket::INET->new( PeerPort => $port,
Proto => 'tcp', Proto => 'tcp',
...@@ -424,7 +498,7 @@ sub sendcmd($$) ...@@ -424,7 +498,7 @@ sub sendcmd($$)
# timeout period? # timeout period?
$sel->add($socket); $sel->add($socket);
my ($ready) = $sel->can_read(1); my ($ready) = $sel->can_read(1);
if( $ready eq $socket ){ if( defined($ready) && $ready eq $socket ){
my $ack = <$ready>; my $ack = <$ready>;
chomp $ack; chomp $ack;
if( $ack eq "ACK" ){ if( $ack eq "ACK" ){
...@@ -447,20 +521,25 @@ sub sendcmd($$) ...@@ -447,20 +521,25 @@ sub sendcmd($$)
if( $f_success == 0 && $max_tries == 0 ){ if( $f_success == 0 && $max_tries == 0 ){
$deadnodes{$node} = 1; $deadnodes{$node} = 1;
print "DID NOT GET ACK from $node for command $sercmd\n";
return -1;
}elsif( $f_success == 1 ){
#success!
return 1;
} }
} }
sub outputErrors() sub outputErrors()
{ {
print "Nodes not responding to Command:\n"; if( keys %deadnodes > 0 ){
foreach my $node (keys %deadnodes){ print "Nodes not responding:\n";
print "$node "; foreach my $node (keys %deadnodes){
print "$node ";
}
print "\n";
} }
print "\n";
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment