Commit ead90a69 authored by Timothy Stack's avatar Timothy Stack
Browse files

Many tweaks to the feedback stuff and more comments. Update

webfeedback to talk to the newer version of canaryd.  Add feedback
"estimate" stuff so that if we have no data (because of an overloaded
node) to work with, we can make some sort of "reasonable" guesstimate
on every iteration.
parent 62b5e3a8
......@@ -13,7 +13,20 @@
#
BEGIN {
OVERLOAD_MAX = 3; # XXX Make this a command line argument.
# The overload "threshold", so, this many overload signals in a row means
# the node is overloaded. The 2.25 here requires three CPU/swapout/etc
# related overloads or a few timing related overloads.
OVERLOAD_MAX = 2.25; # XXX Make this a command line argument.
# The maximum allowed time between canaryd reports, otherwise we consider
# it an overload condition.
TIME_OVERLOAD = 1.1;
# The maximum number of pageouts allowed. XXX I don't know the semantics...
SWAPOUT_OVERLOAD = 2;
DISK_OVERLOAD = 50;
error = 0;
if( ARGC < 2 )
{
......@@ -34,18 +47,6 @@ BEGIN {
error = -1;
exit error;
}
printf("# -*- TCL -*-\n");
printf("# Automatically generated feedback file.\n");
printf("#\n");
for( lpc = 1; lpc < ARGC; lpc++ )
{
printf("# ARGV[%d]: %s\n", lpc, ARGV[lpc]);
}
printf("#\n");
printf("# Generated at: %s\n", strftime());
printf("#\n\n");
printf("# BEGIN Node/LAN\n");
}
#
......@@ -64,13 +65,14 @@ BEGIN {
#
# vers=3 mis=1084907941 lave=0.0000000000,0.0000000000,0.0000000000 abits=0x5 page=0,0 ints=1134,693,82 cpu=0,1,98 iface=00:02:b3:3f:7a:20,18804,3352,14047137,457522 iface=00:03:47:73:a2:42,9079,125118,3428409,60045802 iface=00:00:00:00:00:00,0,0,0,0 iface=00:00:0a:01:08:03,2846,125780,385828,58014417 iface=00:00:0a:01:12:03,125782,2843,58015462,385822 iface=00:00:0a:01:0d:02,6285,152,2896485,19985 iface=00:00:0a:01:12:02,2845,125781,385780,58015462 vnode=dslclient-11.testdssvm.tbres.emulab.net,0.0,1.8 vnode=server.testdssvm.tbres.emulab.net,2.1,2.5 vnode=corerouter.testdssvm.tbres.emulab.net,0.0,1.2
#
/vers=3/ {
/vers=2/ {
line_count[ARGIND] += 1;
if( (vnode_count > 1) &&
(line_count[ARGIND - 1] > 0) &&
(line_count[ARGIND - 1] < 3) )
{
# There are not enough lines to deduce anything other than overload.
# There are not enough canaryd lines to deduce anything other than
# overload.
for( vnode_name in line_vnode_names )
{
alerts[vnode_name] = 1;
......@@ -80,12 +82,17 @@ BEGIN {
alerts[mac] = 1;
}
}
vnode_count = 0;
total_vnode_cpu = 0;
# Initialize some "locals"
vnode_count = 0; # The number of vnodes on this pnode.
total_vnode_cpu = 0; # The total CPU used by vnodes
total_vnode_pps = 0; # The total number of packets per second (in and out)
time_diff_s = 0;
ovld = 0;
delete line_vnode_names;
delete line_macs;
# Loop through the key/value pairs on the canaryd line.
for( lpc = 2; lpc <= NF; lpc++ )
{
# Determine the field type and
......@@ -93,6 +100,8 @@ BEGIN {
# ... handle it.
if( field[1] == "stamp" )
{
# Timestamp, mostly just care about how far off we are from the last
# period.
split(field[2], data, /,/);
if( last_stamp_s[ARGIND] )
{
......@@ -107,22 +116,55 @@ BEGIN {
last_stamp_s[ARGIND] = data[1];
last_stamp_us[ARGIND] = data[2];
# Assume overload if the time diff is relatively large.
if( time_diff_s > 1.25 )
# Assume some overload if the diff from the last period is relatively
# large.
if( time_diff_s > TIME_OVERLOAD )
{
ovld = 1;
# Signal multiple overload events for each missed time interval.
overload[ARGIND] += (time_diff_s - 1.0);
printf("# tstmp ovld (%f): %s\n", (time_diff_s - 1.0), $0);
}
}
else if( field[1] == "ovld" )
{
if( !ovld )
ovld = field[2];
if( field[2] == 1 )
overload[ARGIND] += 1;
}
else if( field[1] == "cpu" )
{
# Total CPU time and
total_cpu = field[2];
# ... signal overload if its too high.
if( total_cpu >= 99 )
overload[ARGIND] += 1;
}
else if( field[1] == "intr" )
{
# Record interrupt load so we can try to assign it to node members
# later on.
intr = field[2];
}
else if( field[1] == "sys" )
{
# Record system CPU load so we can try to assign it to node members
# later on.
sys_cpu = field[2];
}
else if( field[1] == "mem" )
{
split(field[2], data, /,/);
if( data[2] > SWAPOUT_OVERLOAD )
{
# Swapping out too much...
overload[ARGIND] += 1;
}
}
else if( field[1] == "disk" )
{
if( fields[2] > DISK_OVERLOAD )
{
overload[ARGIND] += 1;
}
}
else if( field[1] == "iface" )
{
......@@ -175,6 +217,12 @@ BEGIN {
in_pkts = in_pkts / time_diff_s;
out_bw = out_bw / time_diff_s;
out_pkts = out_pkts / time_diff_s;
# Record these values so we can do something with them and the
# interrupt load numbers later.
total_vnode_pps += in_pkts + out_pkts;
line_pps[link_mac] = in_pkts + out_pkts;
# Find the maximum of bandwidth/packets of the data seen so far.
if( in_pkts > links_mac_pkts[link_mac] )
{
......@@ -192,6 +240,8 @@ BEGIN {
{
links_mac_bw[link_mac] = out_bw;
}
# We've seen this MAC address...
line_macs[link_mac] = 1;
}
else if( field[1] == "vnode" )
......@@ -217,44 +267,61 @@ BEGIN {
}
else if( vnode_count == 1 )
{
vnodes_cpu[vnode_name,FNR] = total_cpu;
if( total_cpu >= 99 )
{
vnodes_cpu[vnode_name,FNR] = total_cpu;
}
else
{
vnodes_cpu[vnode_name,FNR] += intr;
}
}
else
{
# Check if slothd signalled overload for this period.
if( ovld || (total_cpu >= 99) )
# We're in overload for this period, but do not signal an alert until
# we see OVERLOAD_MAX consecutive indicators.
if( overload[ARGIND] >= OVERLOAD_MAX )
{
# We're in overload for this period, but do not signal an alert until
# we see OVERLOAD_MAX consecutive indicators.
if( overload[ARGIND] >= OVERLOAD_MAX )
printf("# overload! %f %f\n", last_overload[ARGIND], overload[ARGIND]);
for( vnode_name in line_vnode_names )
{
for( vnode_name in line_vnode_names )
{
alerts[vnode_name] = 1;
}
for( mac in line_macs )
{
alerts[mac] = 1;
}
alerts[vnode_name] = 1;
}
else
for( mac in line_macs )
{
overload[ARGIND] += 1;
alerts[mac] = 1;
}
}
else
else if( last_overload[ARGIND] == overload[ARGIND] )
{
# We're not in overload, so we clear it, and
overload[ARGIND] = 0;
# ... add any unaccounted for CPU to _all_ of the vnodes. Kind of a
# hack, but, nothing else to do here except be conservative.
diff = total_cpu - total_vnode_cpu;
printf("warning: unaccounted for CPU %f\n", diff) > /dev/stderr;
for( vnode_name in line_vnode_names )
}
else
{
printf("# more overload %f %f\n", last_overload[ARGIND], overload[ARGIND]);
}
if( total_vnode_pps > 0 )
{
# Try to guess how much interrupt load can be attributed to a vnode
# based on the number of packets sent/received.
ipp = intr / total_vnode_pps;
printf("# ipp %f = %f / %f\n", ipp, intr, total_vnode_pps);
for( mac in line_macs )
{
vnodes_cpu[vnode_name,FNR] += diff;
if( mac in mac2node )
{
nipp = ipp * line_pps[mac];
vnodes_cpu[mac2node[mac],FNR] += nipp;
printf("# adding %s %s %f -> %f\n",
mac2node[mac],
mac2link[mac],
nipp,
vnodes_cpu[mac2node[mac],FNR]);
}
}
}
last_overload[ARGIND] = overload[ARGIND];
}
}
......@@ -273,27 +340,8 @@ BEGIN {
# set Reservations(link,node0,kbps) 123235.00
#
/^[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]]\:[[:xdigit:]][[:xdigit:]] [[:alnum:]\-]* [[:alnum:]\-]*$/ {
if( $1 in links_mac )
{
printf("set Reservations(%s,%s,kbps) %f\n",
$3,
$2,
(links_mac_bw[$1] * 8) / 1000.0);
printf("set Reservations(%s,%s,pps) %f\n",
$3,
$2,
links_mac_pkts[$1]);
links_name[$3] = 1;
if( links_mac_bw[$1] > links_bw[$3] )
links_bw[$3] = links_mac_bw[$1];
if( links_mac_pkts[$1] > links_pkts[$3] )
links_pkts[$3] = links_mac_pkts[$1];
if( $1 in alerts )
{
printf("set Alerts(%s,%s) 1\n", $3, $2);
printf("set Alerts(%s) 1\n", $3);
}
}
mac2node[$1] = $2;
mac2link[$1] = $3;
}
#
......@@ -306,6 +354,56 @@ END {
exit error;
}
if( (vnode_count > 1) &&
(line_count[ARGIND - 1] > 0) &&
(line_count[ARGIND - 1] < 3) )
{
# There are not enough lines to deduce anything other than overload.
for( vnode_name in line_vnode_names )
{
alerts[vnode_name] = 1;
}
for( mac in line_macs )
{
alerts[mac] = 1;
}
}
printf("# -*- TCL -*-\n");
printf("# Automatically generated feedback file.\n");
printf("#\n");
for( lpc = 1; lpc < ARGC; lpc++ )
{
printf("# ARGV[%d]: %s\n", lpc, ARGV[lpc]);
}
printf("#\n");
printf("# Generated at: %s\n", strftime());
printf("#\n\n");
printf("# BEGIN Node/LAN\n");
for( mac in mac2node )
{
printf("set Reservations(%s,%s,kbps) %f # %s\n",
mac2link[mac],
mac2node[mac],
(links_mac_bw[mac] * 8) / 1000.0,
mac);
printf("set Reservations(%s,%s,pps) %f\n",
mac2link[mac],
mac2node[mac],
links_mac_pkts[mac]);
links_name[mac2link[mac]] = 1;
if( links_mac_bw[mac] > links_bw[mac2link[mac]] )
links_bw[mac2link[mac]] = links_mac_bw[mac];
if( links_mac_pkts[mac] > links_pkts[mac2link[mac]] )
links_pkts[mac2link[mac]] = links_mac_pkts[mac];
if( mac in alerts )
{
printf("set Alerts(%s,%s) 1\n", mac2link[mac], mac2node[mac]);
printf("set Alerts(%s) 1\n", mac2link[mac]);
}
}
printf("# END Node/LAN\n\n");
printf("# BEGIN Nodes\n");
......
......@@ -62,6 +62,7 @@ delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use event;
#
# Parse command arguments. Once we return from getopts, all that should
......@@ -170,27 +171,47 @@ if ($mode eq "record") {
}
print STDOUT "Starting tracers...\n";
# Just need to send a START event to all of the canaryds.
my $handle = event_register_withkeyfile("elvin://boss",
0,
"/proj/$pid/exp/$eid/tbdata/eventkey");
if (!$handle) { die "Unable to register with event system\n"; }
my $tuple = address_tuple_alloc();
if (!$tuple) { die "Could not allocate an address tuple\n"; }
# XXX Need to update DB and what not to say "CANARYD" instead of "SLOTHD"
%$tuple = ( objtype => "SLOTHD",
eventtype => "START",
expt => "$pid/$eid",
host => "*",
site => "*",
group => "*",
objname => "slothd");
my $notification = event_notification_alloc($handle,$tuple);
if (!$notification) { die "Could not allocate notification\n"; }
# Tell canaryd how long it should record data for.
event_notification_put_string($handle, $notification, "ARGS",
"DURATION=$duration");
if (!event_notify($handle, $notification)) {
die("could not send test event notification");
}
event_notification_free($handle, $notification);
if (event_unregister($handle) == 0) {
die("could not unregister with event system");
}
# Get the list of virtual hosts for the virtual nodes.
my $query_result =
DBQueryFatal("select node_id,vname from reserved ".
"where pid='$pid' and eid='$eid' and erole='virthost'");
# Iterate through the virthosts starting up slothd in high-resolution
# tracing mode.
while (my ($node_id,$vname) = $query_result->fetchrow_array()) {
my $cmd;
$cmd = "rm -f /var/run/slothd.pid";
if (!$fake) {
system("/usr/local/bin/sudo $TB/bin/sshtb -host $node_id \"$cmd\"");
}
$cmd = "/proj/tbres/kwebb/evslothd -e -i 1 -t ${duration}";
if (!$fake) {
system("/usr/local/bin/sudo $TB/bin/sshtb -host $node_id \"$cmd\"");
}
}
# Sleep for the duration of the run, then
sleep($duration + 3);
......@@ -207,10 +228,8 @@ if ($mode eq "record") {
$cmd = "/usr/local/bin/rsync -az ".
"--rsh=\"/usr/local/bin/sudo sshtb -host \" ".
"${node_id}:/var/emulab/logs/ ${expdir}/logs/${vname}/";
$vhost_logs .= " ${expdir}/logs/${vname}/slothd.log";
#if (!$fake) {
system($cmd);
#}
$vhost_logs .= " ${expdir}/logs/${vname}/canaryd.log";
system($cmd);
}
......@@ -305,7 +324,7 @@ if ($mode eq "record") {
# Start digest-slothd with all of the slothd logs, tee its output to the
# feedback file, and
open(DIGESTER,
"| $digest_slothd ${vhost_logs} - ".
"| $digest_slothd - ${vhost_logs} ".
" | tee ${expdir}/tbdata/feedback_data.tcl") or
fatal("Could not run digest-slothd!");
......
......@@ -329,6 +329,25 @@ Simulator instproc run {} {
close $file
}
# Write out the feedback "estimate" file.
var_import ::TBCOMPAT::EstimatedReservations;
if {! [file isdirectory $expdir]} {
# Experiment directory does not exist, so we cannot write the file...
} elseif {[array size EstimatedReservations] > 0} {
set file [open "$expdir/tbdata/feedback_estimate.tcl" w]
puts $file "# -*- TCL -*-"
puts $file "# Automatically generated feedback estimated file."
puts $file "#"
puts $file "# Generated at: [clock format [clock seconds]]"
puts $file "#"
puts $file ""
foreach res [array names EstimatedReservations] {
puts $file "set EstimatedReservations($res) $EstimatedReservations($res)"
}
close $file
}
# If we are running in impotent mode we stop here
if {$impotent == 1 && $passmode == 0} {return}
......
......@@ -135,6 +135,13 @@ namespace eval TBCOMPAT {
# to be read in during future evaluations of the NS file.
variable BootstrapReservations
# Table of vnodes/vlinks that were locate on an overloaded pnode.
variable Alerts
# Table of "estimated" reservations. Basically, its our memory of previous
# guesses for vnodes that have 0% CPU usage on an overloaded pnode.
variable EstimatedReservations
# The experiment directory, this is where the feedback related files will
# be read from and dumped to. XXX Hacky
set expdir "/proj/${::GLOBALS::pid}/exp/${::GLOBALS::eid}/"
......@@ -155,6 +162,10 @@ namespace eval TBCOMPAT {
if {[file exists "${expdir}/tbdata/bootstrap_data.tcl"]} {
source "${expdir}/tbdata/bootstrap_data.tcl"
}
# Get any estimated feedback data from a previous run.
if {[file exists "${expdir}/tbdata/feedback_estimate.tcl"]} {
source "${expdir}/tbdata/feedback_estimate.tcl"
}
#
# Configure the default reservations for an object based on an optional
......@@ -195,6 +206,27 @@ namespace eval TBCOMPAT {
return $retval
}
#
# Produce an estimate of a vnode's resource usage. If a guess was already
# made in the previous iteration, double that value. Otherwise, we just
# assume 10%.
#
# @param object The object for which to produce the estimate.
# @param rtype The resource type: cpupercent, rampercent
# @return The estimated resource usage.
#
proc feedback-estimate {object rtype} {
var_import ::TBCOMPAT::EstimatedReservations
if {[array get EstimatedReservations $object,$rtype] != ""} {
set retval [expr [set EstimatedReservations($object,$rtype)] * 2]
} else {
set retval 10.0; # XXX get from DB
}
set EstimatedReservations($object,$rtype) $retval
return $retval
}
#
# Record bootstrap feedback data for a resource class. This function
# should be called for every member of a resource class so that the one
......@@ -1069,8 +1101,8 @@ proc tb-feedback-vnode {vnode hardware args} {
# ... set computed default values, and
if {[::TBCOMPAT::feedback-defaults $vnode $(-rclass)] == 0} {
# No feedback exists yet, so we assume 100%.
set Reservations($vnode,cpupercent) 100.0
set Reservations($vnode,rampercent) 100.0
set Reservations($vnode,cpupercent) 92.0
set Reservations($vnode,rampercent) 80.0
tbx-log " Initializing node, $vnode, to one-to-one."
}
......@@ -1093,12 +1125,27 @@ proc tb-feedback-vnode {vnode hardware args} {
return
}
if {([array get Alerts $vnode] != "") && [set Alerts($vnode)] > 0} {
tbx-log "Alert for $vnode"
set desired_reservation \
[expr $desired_reservation * $(-alertscale)]; # XXX
# The pnode was overloaded, need to adjust the reservation in a
# more radical fashion.
tbx-log "Alert for $vnode $desired_reservation"
if {$desired_reservation < 0.1} {
# No good data to work with, make an estimate.
set desired_reservation [::TBCOMPAT::feedback-estimate \
$vnode $reservation_type]
} else {
# Some data, try applying the alert scale value.
set desired_reservation \
[expr $desired_reservation * $(-alertscale)]; # XXX
}
}
if {$desired_reservation > 100.0} {
set desired_reservation 100.0
if {$reservation_type == "cpupercent"} {
if {$desired_reservation > 92.0} {
set desired_reservation 92.0
}
} else {
if {$desired_reservation > 80.0} {
set desired_reservation 80.0
}
}
tbx-log " $reservation_type: ${desired_reservation}"
# Finally, tell assign about our desire.
......@@ -1173,6 +1220,8 @@ proc tb-feedback-vlan {vnode lan args} {
}
if {([array get Alerts $lan,$vnode] != "") &&
[set Alerts($lan,$vnode)] > 0} {
# The pnode was overloaded, need to adjust the reservation in a
# more radical fashion.
tbx-log "Alert for $lan, $vnode"
set desired_reservation \
[expr $desired_reservation * $(-alertscale)]; # XXX
......
......@@ -388,8 +388,8 @@ foreach $node (keys(%nodes)) {
# Add CPU and RAM information
push @features, "?+cpu:$cpu_speed";
push @features, "?+ram:$ram";
push @features, "?+cpupercent:100";
push @features, "?+rampercent:100";
push @features, "?+cpupercent:92"; # XXX Hack
push @features, "?+rampercent:80"; # XXX Hack
}
# Add features
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment