Commit 50012a92 authored by Dan Gebhardt's avatar Dan Gebhardt
Browse files

This checkin contains the last versions of wanetmon project files before the

modifications for "multiple managers" and command queuing. Some files were
in the development stage, and thus may not work as advertised. For "stable"
versions, use the previous release.
parent 5a8b9223
......@@ -17,6 +17,7 @@ my %test_per = ( # defaults
"latency" => 300,
"bw" => 0,
);
$thisManagerID = "automanagerclient";
my %intersitenodes = (); #final list for fully-connected test
my @constrnodes; #test constrained to these nodes
my %sitenodes; #hash listing all sites => nodes
......@@ -259,11 +260,15 @@ sub initNewSiteNode($)
edittest( $intersitenodes{$srcsite},
$intersitenodes{$site},
$test_per{bw},
"bw" );
"bw",
0,
$thisManagerID);
edittest( $intersitenodes{$site},
$intersitenodes{$srcsite},
$test_per{bw},
"bw" );
"bw"
0,
$thisManagerID );
my $r = rand;
if( $r <= .5 ){
edittest( $intersitenodes{$srcsite},
......
......@@ -84,9 +84,12 @@ my %cacheLastSentAt; #{indx} -> tstamp of last sent cache result
my $cacheIndxWaitPer = 5; #wait x sec between cache send attempts of same indx
my $iperfduration = 5; #length of each iperf test in seconds
my $iperftimeout = 30; #kill an iperf process lasting longer than this.
my $outDet_maxPastSuc = 12; #consider a path for outage detection if a valid
#result appeared within this many hours in the past
=pod
# percentage of testing period to wait after a test process abnormally exits
# note: 0.1 = 10%
=pod
my %TEST_FAIL_RETRY= (latency => 0.3,
bw => 0.5);
=cut
......@@ -170,7 +173,7 @@ sub handleincomingmsgs()
my $inmsg;
my $cmdHandle;
#check for pending received events
my @ready = $sel->can_read($pollPer); #wait max of 0.1 sec. Don't want to
#have 0 here, or CPU usage goes high
foreach my $handle (@ready){
......@@ -223,133 +226,42 @@ sub handleincomingmsgs()
my $testev = \%{ $testevents{$linkdest}{$testtype} };
print time()." EDIT:\n";
print( "linkdest=$linkdest\n".
"testype =$testtype\n".
"testper=$newtestper\n" );
print "duration=".$sockIn{duration}."\n"
if( defined $sockIn{duration} );
if( editTest($testev, $newtestper, $duration, $managerID) ){
print time()." EDIT:\n";
print( "linkdest=$linkdest\n".
"testype =$testtype\n".
"testper=$newtestper\n" );
print "duration=".$sockIn{duration}."\n"
if( defined $sockIn{duration} );
#
# only edit test if this test is not in an outage state,
# thus testing periods defined outside of user control
#
if( !defined $testev->{outagestate} ){
$testev->{outagestate} = "normal";
# print "initial EDIT, so setting outagestate to normal\n";
}
=pod
# only change test if conditions met...
if( checkTestChangeAllowed( $testev, $managerID ) ){
# TODO: TEST THIS!!!
#
# Smartly handle two overlapping requests, such that
# the highest rate is used for the duration specified
# in the command
# TODO: Add fancier queuing here!
# Specifications:
# - an EDIT with testper=0
# will stop the corresponding 'temp' or 'forever' test
# selected by the value of 'duration'.
# - ** Only a "background" rate (forever) and one
# temporary rate increase are supported.
# - ANY new 'forever' rate will overwrite the previous one
# - A 'forever' faster than existing 'temp' overwrites 'temp'
# and does not recover it if a future 'forever' has a rate
# less than the 'temp' (it removes all trace)
# state descriptions: (Current state) -> (action)
# saved valid|Currently running | New| change|replace
# 'forever' | 0=forever |Edit| period?|saved
# exists | 1=temp) |Type| -> |'forever'?
# 0 0 0 1 0
# 0 1 0 0 1
# 1 1 0 0 1
# 0 0 1 1 1
# 0 1 1 1 0
# 1 1 1 1 0
#TODO!!!
# EDIT above... action after rcving 'forever' while running
# a 'temp' depends on period comparisons between
# old 'forever' new 'forever' and 'temp'
#make sure existing testper is valid
if( !defined $testev->{testper} ){
$testev->{testper} = 0;
}
#make sure limitTime is valid
if( !defined $testev->{limitTime} ){
$testev->{limitTime} = 0;
}
if( $testev->{outagestate} eq "normal" )
{
editTest($testev, $newtestper, $duration, $managerID);
}else{
print time()." PATH CURRENTLY IN OUTAGE DETECTION MODE\n";
if( defined $sockIn{duration} && $sockIn{duration} > 0 ){
# New edit is a 'temp' type
if( $newtestper < $testev->{testper} ||
$testev->{testper} == 0 )
{
# state (xx1->1?) new per checked for faster freq.
if( $testev->{limitTime} == 0 && $newtestper > 0){
# state (x01->11)
#Save existing 'forever' test and use new testper
$testev->{prevPeriod} = $testev->{testper};
$testev->{testper} = $newtestper;
$testev->{limitTime} =
time_all()+$sockIn{duration};
}elsif( $testev->{limitTime} != 0 ){
# state (x11->10)
if( $newtestper == 0 ){
# temp edit is 0 per, so re-start saved
#'forever' test
$testev->{testper} = $testev->{prevPeriod};
$testev->{limitTime} = 0;
}else{
#update period and duration with new command
$testev->{testper} = $newtestper;
$testev->{limitTime} =
time_all()+$sockIn{duration};
}
}
}
#save this newest command in "pending"
if( defined $duration && $duration > 0){
$testev->{pendingtempper} = $newtestper;
$testev->{pendingtempduration} = $duration;
$testev->{pendingtemprcvtime} = time();
}else{
#state (xx0->??)
# New edit is a 'forever' type
if( $testev->{limitTime} == 0 ){
# state (x00->10)
# currently running a forever
$testev->{testper} = $newtestper;
}else{
# state (x10->01)
# currently running a temp
# cases of periods
# 1) new forever is not 0 and < existing temp.
# 2) new forever is 0
# 3) new forever is > existing temp
if( $newtestper != 0 &&
$newtestper < $testev->{testper} )
{
#case 1
$testev->{testper} = $newtestper;
$testev->{limitTime} = 0;
$testev->{prevPeriod} = 0;
}elsif( $newtestper == 0 ){
#case 2
$testev->{prevPeriod} = 0;
}elsif( $newtestper > $testev->{testper} ){
#case 3
$testev->{prevPeriod} = $newtestper;
}
}
$testev->{pendingdurationper} = $newtestper;
}
$testev->{flag_scheduled} = 0;
$testev->{timeOfNextRun} = time_all();
$testev->{managerID} = $managerID;
print time()." EDIT:\n";
print( "linkdest=$linkdest\n".
"testype =$testtype\n".
"testper=$newtestper\n" );
print "duration=".$sockIn{duration}."\n"
if( defined $sockIn{duration} );
}
=cut
print( "linkdest=$linkdest\n".
"testype =$testtype\n".
"testper=$newtestper\n" );
print "duration=".$sockIn{duration}."\n"
if( defined $sockIn{duration} );
}
elsif( $cmdtype eq "INIT" ){
print time()." INIT: ";
......@@ -369,8 +281,28 @@ sub handleincomingmsgs()
my $offset = 0;
foreach my $linkdest (@destnodes){
my $testev = \%{ $testevents{$linkdest}{$testtype} };
editTest($testev, $newtestper, $duration, $managerID, $offset);
$offset += $offsetinc;
#
# only edit test if this test is not in an outage state,
# thus testing periods defined outside of user control
#
if( !defined $testev->{outagestate} ){
$testev->{outagestate} = "normal";
}if( $testev->{outagestate} eq "normal" ){
editTest($testev, $newtestper, $duration,
$managerID, $offset);
$offset += $offsetinc;
}else{
print time()." PATH CURRENTLY IN OUTAGE DETECTION MODE\n";
#save this newest command in "pending"
if( defined $duration && $duration > 0){
$testev->{pendingtempper} = $newtestper;
$testev->{pendingtempduration} = $duration;
$testev->{pendingtemprcvtime} = time();
}else{
$testev->{pendingdurationper} = $newtestper;
}
}
}
print " $testtype $newtestper\n";
}
......@@ -565,6 +497,12 @@ while (1) {
#reset flags
$testev->{"flag_finished"} = 0;
$testev->{"flag_scheduled"} = 0;
#TODO: Outage detection here...
#look at latency to determine outage
if( $testtype eq "latency" ){
updateOutageState( $testev );
}
}
#schedule new tests
......@@ -996,15 +934,18 @@ sub createDBfilename()
#############################################################################
#
# return a warning (bw, for now) if a test has not been run for a while
# after when it is scheduled to be
sub detectHang($)
{
my ($nodeid) = @_;
my $TIMEOUT_NUM_PER = 5;
my $TIMEOUT_NUM_PER = 10;
if(
$testevents{$nodeid}{bw}{flag_scheduled} == 1 &&
if( $testevents{$nodeid}{bw}{flag_scheduled} == 1 &&
time_all() > $testevents{$nodeid}{bw}{timeOfNextRun} +
$testevents{$nodeid}{bw}{testper} * $TIMEOUT_NUM_PER )
$testevents{$nodeid}{bw}{testper} * $TIMEOUT_NUM_PER
&& $testevents{$nodeid}{bw}{testper} >0 )
{
return "bw";
}
......@@ -1030,6 +971,8 @@ sub isMsgValid(\%)
sub checkTestChangeAllowed(\%$)
{
return 1; # always allowed.. for now
=pod
my ($href, $managerID ) = @_;
my %testev = %{$href};
if( !defined %testev ||
......@@ -1049,6 +992,7 @@ sub checkTestChangeAllowed(\%$)
" old managerID=$testev{managerID}\n";
return 0;
}
=cut
}
......@@ -1168,3 +1112,54 @@ sub editTest(\$$$,$)
return 0;
}
}
#
#
#
=pod
state transitions
CurrentState Input NextState
----------------------------------------------
normal gotErr&PrevSucc highFreq
else normal
highFreq <60sec&ERR highFreq
>60sec&ERR medFreq
SUCCESS outageEnd
medFreq <10min&ERR medFreq
>10min&ERR lowFreq
SUCCESS outageEnd
lowFreq ERR lowFreq
SUCCESS outageEnd
outageEnd <120sec&SUCCESS outageEnd
>120sec&SUCCESS normal
ERR highFreq
=cut
sub updateOutageState(\$)
{
my ($testev) = @_;
my $curstate = $testev->{outagestate};
if( $parseData > 0 ){
#valid result, so note the time that this was seen
$testev->{lastValidLatTime} = time();
}
# SWITCH ON outagestate
if( $curstate eq "normal" ){
}elsif(){
}
if( defined $testev->{lastValidLatTime} &&
time() < $testev->{lastValidLatTime}
+ $outDet_maxPastSuc )
{
#path down and was up recently, so start latency outage
}
}
......@@ -7,23 +7,30 @@ use Exporter;
use vars qw(@ISA @EXPORT);
use IO::Socket::INET;
use IO::Select;
#use lib '/usr/testbed/lib';
#use event;
require Exporter;
@ISA = "Exporter";
@EXPORT = qw (
our @EXPORT = qw (
%deadnodes
%ERRID
deserialize_hash
serialize_hash
sendcmd
sendcmd_evsys
time_all
setcmdport
setexpid
stopnode
stopnode_evsys
edittest
edittest_evsys
killnode
getstatus
);
our @EXPORT_OK = qw(
);
# These errors define specifics of when a measurement value cannot be
......@@ -49,11 +56,13 @@ my $expid;
sub setcmdport($)
{
$port = $_[0];
# print "libwanetmon: port=$port\n";
}
sub setexpid($)
{
$expid = $_[0];
# print "libwanetmon: expid=$expid\n";
}
......@@ -117,6 +126,7 @@ sub sendcmd($$)
}
my $sercmd = serialize_hash( \%cmd );
# print "sercmd=$sercmd\n";
my $f_success = 0;
my $max_tries = 3;
my $retval;
......@@ -167,14 +177,74 @@ sub sendcmd($$)
}
sub stopnode($)
#
# input params:
# - name of command (EDIT, INIT, etc..)
# - hash of extra strings to add to event notification
# - handle to eventsystem "handle"
sub sendcmd_evsys($$$)
{
my ($node) = @_;
my ($cmdname, $hashref,$handle) = @_;
my %cmd = %$hashref;
#
# This is the evsys command to send
#
my $tuple = event::address_tuple_alloc();
if (!$tuple) { die "Could not allocate an address tuple\n"; }
%$tuple = ( objtype => "BGMON",
objname => "manager",
eventtype => $cmdname,
expt => "__none",
);
my $notification = event::event_notification_alloc($handle,$tuple);
if (!$notification) { die "Could not allocate notification\n"; }
# set extra params
foreach my $name (keys %cmd){
if( 0 == event::event_notification_put_string( $handle,
$notification,
"$name",
$cmd{$name} ) )
{ warn "Could not add attribute to notification\n"; }
}
#send notification
if (!event::event_notify($handle, $notification)) {
die("could not send test event notification");
}
event::event_notification_free($handle, $notification);
}
sub stopnode($$)
{
my ($node,$managerID) = @_;
my %cmd = ( expid => $expid,
managerID => $managerID,
cmdtype => "STOPALL" );
sendcmd($node,\%cmd);
}
#
#
sub stopnode_evsys($$$)
{
my ($node, $managerID, $handle) = @_;
my %cmd = ( srcnode => $node,
managerID => $managerID,
cmdtype => "STOPALL" );
sendcmd_evsys("STOPALL",\%cmd,$handle);
}
sub killnode($)
{
my ($node) = @_;
......@@ -184,12 +254,10 @@ sub killnode($)
}
sub edittest($$$;$)
sub edittest($$$$$$)
{
my ($srcnode, $destnode, $testper, $testtype, $limitTime) = @_;
if( !defined $limitTime ){
$limitTime = 0;
}
my ($srcnode, $destnode, $testper, $testtype, $duration, $managerID) = @_;
if ($srcnode eq $destnode ){
return -1;
}
......@@ -199,11 +267,32 @@ sub edittest($$$;$)
dstnode => $destnode,
testtype => $testtype,
testper => $testper,
limitTime=> $limitTime);
duration => $duration );
return ${[sendcmd($srcnode,\%cmd)]}[0];
}
sub edittest_evsys($$$$$$$)
{
my ($srcnode, $destnode, $testper, $testtype,
$duration, $managerID, $handle) = @_;
if ($srcnode eq $destnode ){
return -1;
}
my %cmd = ( managerID => $managerID,
srcnode => $srcnode,
dstnode => $destnode,
testtype => $testtype,
testper => $testper,
duration => $duration );
sendcmd_evsys("EDIT",\%cmd,$handle);
#return ${[sendcmd($srcnode,\%cmd)]}[0];
}
sub getstatus($){
my ($node) = @_;
......@@ -218,4 +307,6 @@ sub getstatus($){
}
1;
......@@ -14,6 +14,8 @@ Parameters/options (more details):
schedule measurements in a fully connected fashion. Default
is to init nodes in a pairwise fashion, in order listed in
<input_file>.
- (-e): project and experiment ids of the experiment running bgmons on
plab. For now, this is tbres/pelabbgmon.
After script starts, it awaits a user command at a console prompt.
The commands fully implemented are "start" and "stop", which send
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment