Commit ce218759 authored by David Johnson's avatar David Johnson

Finish and fix ewma ranking in libplabnodehist.

Also add plabnodehistmetrics, which ranks plab nodes using
libplabnodehist, writes the ranks and metadata to
/share/planetlab/reliable_nodes, and inserts a '+unavail' weight into
node_features for planetlab nodes.
parent cd7aac62
......@@ -16,7 +16,7 @@ SUBDIRS = libdslice etc
SBIN_STUFF = plabslice plabnode plabrenewd plabmetrics plabstats \
plabmonitord plablinkdata plabdist plabhttpd plabdiscover \
plabrenewonce plabnodehistclean
plabrenewonce plabnodehistclean plabnodehistmetrics
LIB_STUFF = libplab.py mod_dslice.py mod_PLC.py mod_PLCNM.py \
mod_PLC4.py sshhttp.py \
......
......@@ -211,7 +211,7 @@ sub rankNodesByUnavail(;$$$) {
$doewma = 0;
}
my %rank = ();
my %fnstats = ();
my %nstats = ();
my $now = $sref->{'__NOW__'};
......@@ -228,6 +228,8 @@ sub rankNodesByUnavail(;$$$) {
# boundary. Of course, if not using ewma, we don't care about
# this kind of split.
#
my %nstats = ();
# keep track of the oldest bin we accumulate if doing ewma
$nstats{'__MAXBIN__'} = 0;
# need to track separately the total length of all failure/success
......@@ -252,10 +254,20 @@ sub rankNodesByUnavail(;$$$) {
$start_slot = int(($now - $seq[0]) / $BIN_SECONDS);
$stop_slot = int(($now - ($seq[0] + $seq[1]))
/ $BIN_SECONDS);
if ($stop_slot > $nstats{'__MAXBIN__'}) {
$nstats{'__MAXBIN__'} = $stop_slot;
if ($debug) {
print STDERR "start_slot=$start_slot; " .
"stop_slot=$stop_slot\n";
}
if ($start_slot > $nstats{'__MAXBIN__'}) {
$nstats{'__MAXBIN__'} = $start_slot;
}
if ($debug) {
print STDERR "maxbin=" . $nstats{'__MAXBIN__'} . "\n";
}
if ($start_slot == $stop_slot) {
if (!defined($nstats{"seqlen_$s"}[$start_slot])) {
$nstats{"seqlen_$s"}[$start_slot] = 0;
......@@ -263,22 +275,39 @@ sub rankNodesByUnavail(;$$$) {
$nstats{"seqlen_$s"}[$start_slot] += $seq[1];
}
else {
if (!defined($nstats{"seqlen_$s"}[$start_slot])) {
$nstats{"seqlen_$s"}[$start_slot] = 0;
# have to fill in all bins between start/stop;
# they may not be adjacent
my $i;
for ($i = $stop_slot; $i <= $start_slot; ++$i) {
if (!defined($nstats{"seqlen_$s"}[$i])) {
$nstats{"seqlen_$s"}[$i] = 0;
}
}
if (!defined($nstats{"seqlen_$s"}[$stop_slot])) {
$nstats{"seqlen_$s"}[$stop_slot] = 0;
for ($i = $stop_slot; $i <= $start_slot; ++$i) {
my $curslotend = $now - ($i + 1) * $BIN_SECONDS;
# if the end of the current slot is prior to
# the start of the sequence, only add in the
# binsize minus the diff between the end of the
# slot and the start of the sequence
if ($curslotend < $seq[0]) {
$nstats{"seqlen_$s"}[$i] += $BIN_SECONDS -
($seq[0] - $curslotend);
}
else {
$nstats{"seqlen_$s"}[$i] += $BIN_SECONDS;
}
}
my $start_slot_time = (($seq[1] + $seq[0]) -
($stop_slot * $BIN_SECONDS)) -
$seq[0];
my $stop_slot_time = $seq[1] - $start_slot_time;
#my $start_slot_time = (($seq[1] + $seq[0]) -
# ($stop_slot * $BIN_SECONDS)) -
# $seq[0];
#my $stop_slot_time = $seq[1] - $start_slot_time;
$nstats{"seqlen_$s"}[$start_slot] +=
$start_slot_time;
$nstats{"seqlen_$s"}[$stop_slot] +=
$stop_slot_time;
#$nstats{"seqlen_$s"}[$start_slot] +=
# $start_slot_time;
#$nstats{"seqlen_$s"}[$stop_slot] +=
# $stop_slot_time;
}
if (!defined($nstats{"seqnum_$s"}[$stop_slot])) {
......@@ -392,7 +421,37 @@ sub rankNodesByUnavail(;$$$) {
#
my $unavail = 0;
if ($doewma) {
;
my $weight = 0.0;
my $i;
my $currentsum = 0;
for ($i = 0; $i < ($nstats{'__MAXBIN__'} + 1); ++$i) {
if ($debug) {
print STDERR "f bin $i =" . $nstats{"seqlen_failure"}[$i] .
"; s bin $i = " . $nstats{"seqlen_success"}[$i] . "\n";
}
my $factor = (1 - $EWMA_ALPHA) ** $i;
my $denom = 0;
if (defined($nstats{"seqlen_failure"}[$i])
&& $nstats{"seqlen_failure"}[$i] > 0) {
$denom += $nstats{"seqlen_failure"}[$i];
}
else {
$nstats{"seqlen_failure"}[$i] = 0;
}
if (defined($nstats{"seqlen_success"}[$i])
&& $nstats{"seqlen_success"}[$i] > 0) {
$denom += $nstats{"seqlen_success"}[$i];
}
if ($denom > 0) {
$currentsum += $factor *
($nstats{"seqlen_failure"}[$i] / $denom);
}
$weight += $factor;
}
$unavail = $currentsum / $weight;
}
else {
$unavail = $nstats{"totseqlen_failure"} /
......@@ -403,17 +462,31 @@ sub rankNodesByUnavail(;$$$) {
print STDERR "$nodeid: unavail = " . $unavail . "\n";
}
my $finalrank = $unavail + $jitter_deduction;
$rank{$nodeid} = $finalrank;
# save off stuff we want to return
$fnstats{$nodeid}{'rank'} = $unavail + $jitter_deduction;
$fnstats{$nodeid}{'jitter_deduction'} = $jitter_deduction;
$fnstats{$nodeid}{'totseqlen_failure'} = $nstats{'totseqlen_failure'};
$fnstats{$nodeid}{'totseqlen_success'} = $nstats{'totseqlen_success'};
$fnstats{$nodeid}{'totseqnum_failure'} = $nstats{'totseqnum_failure'};
$fnstats{$nodeid}{'totseqnum_success'} = $nstats{'totseqnum_success'};
$fnstats{$nodeid}{'jitseqnum_failure'} = $nstats{'jitseqnum_failure'};
$fnstats{$nodeid}{'jitseqnum_success'} = $nstats{'jitseqnum_success'};
}
# finally, sort by rank and return a list of tuples.
my @retval = ();
my $i = 0;
foreach my $n (sort { $rank{$a} <=> $rank{$b} } keys(%rank)) {
foreach my $n (sort { $fnstats{$a}{'rank'} <=> $fnstats{$b}{'rank'} } keys(%fnstats)) {
$retval[$i]{'nodeid'} = $n;
$retval[$i]{'rank'} = $rank{$n};
$retval[$i]{'rank'} = $fnstats{$n}{'rank'};
$retval[$i]{'jitter_deduction'} = $fnstats{$n}{'jitter_deduction'};
$retval[$i]{'totseqlen_failure'} = $fnstats{$n}{'totseqlen_failure'};
$retval[$i]{'totseqlen_success'} = $fnstats{$n}{'totseqlen_success'};
$retval[$i]{'totseqnum_failure'} = $fnstats{$n}{'totseqnum_failure'};
$retval[$i]{'totseqnum_success'} = $fnstats{$n}{'totseqnum_success'};
$retval[$i]{'jitseqnum_failure'} = $fnstats{$n}{'jitseqnum_failure'};
$retval[$i]{'jitseqnum_success'} = $fnstats{$n}{'jitseqnum_success'};
++$i;
}
......
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
# Copyright (c) 2007 University of Utah and the Flux Group.
# All rights reserved.
#
use English;
use Getopt::Std;
sub usage()
{
print STDOUT
"Usage: plabnodehistmetrics [-n]\n";
exit(-1);
}
my $optlist = "n";
my $impotent= 0;
#
# Only real root can call this.
#
if ($UID != 0) {
print STDERR "You must be root to run this script!\n";
exit(-1);
}
#
# Configure variables
#
my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
#
# Untaint the path
#
$ENV{'PATH'} = '/bin:/usr/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libplabnodehist qw(getNodeHistSequences rankNodesByUnavail sequenceToStr);
my $OUTFILE = "/share/planetlab/reliable_nodes";
# Be careful not to exit on transient error
$libdb::DBQUERY_MAXTRIES = 30;
#
# Parse command arguments. Once we return from getopts, all that should
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
usage();
}
if (@ARGV) {
usage();
}
if (defined($options{"n"})) {
$impotent = 1;
}
# Grab the plab_nodehist sequence stats.
#
my $seqref = getNodeHistSequences();
my $rankref = rankNodesByUnavail($seqref,1,1);
my @ranklist = @$rankref;
#
# Dump out to the reliable nodes file for local people.
#
my $OF;
if ($impotent) {
$OF = STDOUT;
}
else {
open($OF,">$OUTFILE")
or die "could not open reliable nodes file $OUTFILE!";
}
my $i = 0;
for ($i = 0; $i < scalar(@ranklist); ++$i) {
print $OF "" . $ranklist[$i]{'nodeid'} .
"\t" . sprintf("%.4f",$ranklist[$i]{'rank'}) .
"\t" . sprintf("%.4f",$ranklist[$i]{'jitter_deduction'}) .
"\t" . $ranklist[$i]{'totseqlen_success'} .
"\t" . $ranklist[$i]{'totseqnum_success'} .
"\t" . $ranklist[$i]{'jitseqnum_success'} .
"\t" . $ranklist[$i]{'totseqlen_failure'} .
"\t" . $ranklist[$i]{'totseqnum_failure'} .
"\t" . $ranklist[$i]{'jitseqnum_failure'} .
"\n";
}
close($OF);
#
# Now dump stats into the db as a feature of "unavailability".
# Note that we cap the feature weight at 0.99 so it's valid and proper for
# assign.
# Also note that we use the unavailability rank augmented with a jitter
# increase (i.e., if the node is jittery, we increase its unavailability
# score).
#
if (!$impotent) {
my ($nodeid,$weight) = (0,0);
for ($i = 0; $i < scalar(@ranklist); ++$i) {
$nodeid = $ranklist[$i]{'nodeid'};
$weight = $ranklist[$i]{'rank'};
if ($weight > 0.99) {
$weight = 0.99;
}
DBQueryWarn("replace into node_features ".
" (node_id, feature, weight) ".
" values ('$nodeid', '+unavail', $weight)");
}
}
exit(0);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment