Commit 495f6803 authored by Leigh B. Stoller's avatar Leigh B. Stoller
Browse files

Big update to the stats gathering code ...

This change attempts to make the stats gathering code more reliable by
not relying on the testbed_stats records to reconstruct usage
statistics.  The main source of errors and total confusion in the
current stats code is that testbed_stats includes all the errors and
transitions, from which I have to reconstruct what happened in order
to determine usage by a project or user.

The new stats code still generates the testbed_stats code, but actual
usage is recorded as it happens, in the experiment_resources table, as
swapins, swapouts, and swapmods occur. Its also much faster to compute
the data for the tables in the web interface, not having to scan a
zillion testbed_stats records in php.

There is a time consuming update to the records that takes place with
a lot of tables locked.
parent cf64f716
This diff is collapsed.
......@@ -788,6 +788,60 @@ sub MemberList($$;$$)
return 0;
}
#
# Update the aggregate stats.
#
sub UpdateStats($$$$$)
{
my ($self, $mode, $duration, $pnodes, $vnodes) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $gid_idx = $self->gid_idx();
DBQueryWarn("update group_stats ".
"set expt${mode}_count=expt${mode}_count+1, ".
" expt${mode}_last=now(), ".
" allexpt_duration=allexpt_duration+${duration}, ".
" allexpt_vnodes=allexpt_vnodes+${vnodes}, ".
" allexpt_pnodes=allexpt_pnodes+${pnodes}, ".
" allexpt_vnode_duration=".
" allexpt_vnode_duration+($vnodes * ${duration}), ".
" allexpt_pnode_duration=".
" allexpt_pnode_duration+($pnodes * ${duration}) ".
"where gid_idx='$gid_idx'");
if ($mode eq TBDB_STATS_SWAPIN || $mode eq TBDB_STATS_START) {
DBQueryWarn("update groups set ".
" expt_last=now(),expt_count=expt_count+1 ".
"where gid_idx='$gid_idx'");
}
$self->Refresh();
return 0;
}
#
# Bump last activity
#
sub BumpActivity($)
{
my ($self) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $gid_idx = $self->gid_idx();
DBQueryWarn("update group_stats set last_activity=now() ".
"where gid_idx='$gid_idx'");
return 0;
}
############################################################################
package Group::MemberShip;
......@@ -1046,3 +1100,4 @@ sub ModifyTrust($$)
# _Always_ make sure that this 1 is at the end of the file...
1;
......@@ -563,5 +563,60 @@ sub GroupList($$)
return 0;
}
#
# Update the aggregate stats.
#
sub UpdateStats($$$$$)
{
my ($self, $mode, $duration, $pnodes, $vnodes) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $pid_idx = $self->pid_idx();
DBQueryWarn("update project_stats ".
"set expt${mode}_count=expt${mode}_count+1, ".
" expt${mode}_last=now(), ".
" allexpt_duration=allexpt_duration+${duration}, ".
" allexpt_vnodes=allexpt_vnodes+${vnodes}, ".
" allexpt_pnodes=allexpt_pnodes+${pnodes}, ".
" allexpt_vnode_duration=".
" allexpt_vnode_duration+($vnodes * ${duration}), ".
" allexpt_pnode_duration=".
" allexpt_pnode_duration+($pnodes * ${duration}) ".
"where pid_idx='$pid_idx'");
if ($mode eq TBDB_STATS_SWAPIN || $mode eq TBDB_STATS_START) {
DBQueryWarn("update projects set ".
" expt_last=now(),expt_count=expt_count+1 ".
"where pid_idx='$pid_idx'");
}
$self->Refresh();
return 0;
}
#
# Bump last activity
#
sub BumpActivity($)
{
my ($self) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $pid_idx = $self->pid_idx();
DBQueryWarn("update project_stats set last_activity=now() ".
"where pid_idx='$pid_idx'");
return 0;
}
# _Always_ make sure that this 1 is at the end of the file...
1;
......@@ -24,6 +24,7 @@ use overload ('""' => 'Stringify');
use Project;
use vars qw($NEWUSER_FLAGS_PROJLEADER $NEWUSER_FLAGS_WIKIONLY
$NEWUSER_FLAGS_WEBONLY $NEWUSER_FLAGS_ARCHIVED
$NEWUSER_FLAGS_NOUUID
$USERSTATUS_ACTIVE $USERSTATUS_FROZEN
$USERSTATUS_UNAPPROVED $USERSTATUS_UNVERIFIED
$USERSTATUS_NEWUSER $USERSTATUS_ARCHIVED
......@@ -48,6 +49,7 @@ $NEWUSER_FLAGS_PROJLEADER = 0x01;
$NEWUSER_FLAGS_WIKIONLY = 0x02;
$NEWUSER_FLAGS_WEBONLY = 0x04;
$NEWUSER_FLAGS_ARCHIVED = 0x08;
$NEWUSER_FLAGS_NOUUID = 0x80;
# Status values.
$USERSTATUS_ACTIVE = "active";
......@@ -60,6 +62,7 @@ $USERSTATUS_ARCHIVED = "archived";
# Why, why, why?
@EXPORT_OK = qw($NEWUSER_FLAGS_PROJLEADER $NEWUSER_FLAGS_WIKIONLY
$NEWUSER_FLAGS_WEBONLY $NEWUSER_FLAGS_ARCHIVED
$NEWUSER_FLAGS_NOUUID
$USERSTATUS_ACTIVE $USERSTATUS_FROZEN
$USERSTATUS_UNAPPROVED $USERSTATUS_UNVERIFIED
$USERSTATUS_NEWUSER $USERSTATUS_ARCHIVED);
......@@ -263,11 +266,13 @@ sub LookupByEmail($$)
sub Create($$$$)
{
my ($class, $uid, $flags, $argref) = @_;
my $uuid;
my $isleader = ($flags & $NEWUSER_FLAGS_PROJLEADER ? 1 : 0);
my $wikionly = ($flags & $NEWUSER_FLAGS_WIKIONLY ? 1 : 0);
my $webonly = ($flags & $NEWUSER_FLAGS_WEBONLY ? 1 : 0);
my $archived = ($flags & $NEWUSER_FLAGS_ARCHIVED ? 1 : 0);
my $nouuid = ($flags & $NEWUSER_FLAGS_NOUUID ? 1 : 0);
#
# If no uid, we need to generate a unique one for the user.
......@@ -406,11 +411,14 @@ sub Create($$$$)
# And a verification key.
my $verify_key = TBGenSecretKey();
# And a UUID (universally unique identifier).
my $uuid = NewUUID();
if (!defined($uuid)) {
print "*** WARNING: Could not generate a UUID!\n";
return undef;
if (! $nouuid) {
# And a UUID (universally unique identifier).
$uuid = NewUUID();
if (!defined($uuid)) {
print "*** WARNING: Could not generate a UUID!\n";
return undef;
}
push(@insert_data, "uid_uuid='$uuid'");
}
# Now tack on other stuff we need.
......@@ -424,7 +432,6 @@ sub Create($$$$)
push(@insert_data, "unix_uid=$unix_uid");
push(@insert_data, "mailman_password='$mailman_password'");
push(@insert_data, "verify_key='$verify_key'");
push(@insert_data, "uid_uuid='$uuid'");
push(@insert_data, "uid_idx='$uid_idx'");
push(@insert_data, "uid='$uid'");
......@@ -448,8 +455,14 @@ sub Create($$$$)
DBQueryWarn("insert into users set " . join(",", @insert_data))
or return undef;
if (! DBQueryWarn("insert into user_stats (uid, uid_idx, uid_uuid) ".
"VALUES ('$uid', $uid_idx, '$uuid')")) {
# And the stats record.
@insert_data = ();
push(@insert_data, "uid_idx='$uid_idx'");
push(@insert_data, "uid='$uid'");
push(@insert_data, "uid_uuid='$uuid'")
if (!$nouuid);
if (! DBQueryWarn("insert into user_stats set ".join(",", @insert_data))) {
DBQueryFatal("delete from users where uid_idx='$uid_idx'");
return undef;
}
......@@ -789,6 +802,54 @@ sub FlipTo($$)
return 0;
}
#
# Update aggregate stats.
#
sub UpdateStats($$$$$)
{
my ($self, $mode, $duration, $pnodes, $vnodes) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $uid_idx = $self->uid_idx();
DBQueryWarn("update user_stats ".
"set expt${mode}_count=expt${mode}_count+1, ".
" expt${mode}_last=now(), ".
" allexpt_duration=allexpt_duration+${duration}, ".
" allexpt_vnodes=allexpt_vnodes+${vnodes}, ".
" allexpt_pnodes=allexpt_pnodes+${pnodes}, ".
" allexpt_vnode_duration=".
" allexpt_vnode_duration+($vnodes * ${duration}), ".
" allexpt_pnode_duration=".
" allexpt_pnode_duration+($pnodes * ${duration}) ".
"where uid_idx='$uid_idx'");
$self->Refresh();
return 0;
}
#
# Bump last activity
#
sub BumpActivity($)
{
my ($self) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $uid_idx = $self->uid_idx();
DBQueryWarn("update user_stats set last_activity=now() ".
"where uid_idx='$uid_idx'");
return 0;
}
# _Always_ make sure that this 1 is at the end of the file...
1;
#!/usr/bin/perl -wT
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2006 University of Utah and the Flux Group.
# Copyright (c) 2000-2007 University of Utah and the Flux Group.
# All rights reserved.
#
use English;
......@@ -297,6 +297,37 @@ if ($query_result->numrows) {
print "\n";
}
#
# Look for inconsistent resource records.
#
if (! ($query_result =
DBQueryWarn("select e.idx,r.idx,e.pid,e.eid,".
" e.expt_head_uid,UNIX_TIMESTAMP(r.swapin_time) ".
" from experiment_resources as r ".
"left join experiments as e on e.idx=r.exptidx ".
" where e.state='swapped' and swapin_time!=0 and ".
" swapout_time=0 and swapmod_time=0 and pnodes>0"))){
fatal("Error accessing the database.");
}
if ($query_result->numrows) {
print "\n";
print "----------------------------------------------------------------\n";
print "\n";
print "Experiments with inconsistent experiment_resource records:\n";
printf("%-12s %-12s %-8s %-12s %-22s %s\n",
"PID", "EID", "IDX", "Creator", "Started", "RIDX");
print "------------ ------------ -------- ------------ ------------ ".
"---------------------- -------\n";
while (my ($exptidx,$idx,$pid,$eid,$creator,$swapin_time) =
$query_result->fetchrow()) {
printf("%-12s %-12s %-8s %-12s %-22s %s\n",
$pid, $eid, $exptidx, $creator, $swapin_time, $idx);
}
}
#
# Age any login entries that have timed out.
#
......
......@@ -202,7 +202,7 @@ use vars qw(@ISA @EXPORT);
TBSiteVarExists TBGetSiteVar TBSetSiteVar
TBActivityReport GatherSwapStats GatherAssignStats
TBActivityReport GatherAssignStats
TBAvailablePCs
TBDB_IFACEROLE_CONTROL TBDB_IFACEROLE_EXPERIMENT
......@@ -4961,364 +4961,6 @@ sub TBAvailablePCs(;$)
return $count;
}
#
# Gather Swap stats.
#
# usage: GatherExptStats(char *pid, char *eid, char *uid,
# char *mode, int code, int flags)
# Mode is one of preload, start, in, out, modify, end.
#
sub GatherSwapStats($$$$$;$)
{
my ($pid, $eid, $uid, $mode, $ecode, $flags) = @_;
my ($pnodes,$vnodes,$duration);
my $batchctrl = 0;
# Optional argument to modify the stats gathering.
$flags = 0
if (!defined($flags));
#
# If this is a start time marker, then just record the time in a global
# variable and return. This is cheezy, but the interface I'm providing
# allows for fancier stuff later if desired.
#
if ($flags & TBDB_STATS_FLAGS_START) {
$TBDB_STATS_STARTCLOCK = time();
return;
}
my $session = tblog_session();
$session = 'NULL' unless defined $session;
local $DBQUERY_MAXTRIES = 0;
my $query_result =
DBQueryWarn("select e.state,e.gid,e.idx,s.rsrcidx,s.lastrsrc, ".
" e.expt_swap_uid,s.swapin_count,e.batchmode ".
" from experiments as e ".
"left join experiment_stats as s on e.idx=s.exptidx ".
"where e.pid='$pid' and e.eid='$eid'");
if (!$query_result || !$query_result->numrows) {
return;
}
my ($curstate, $gid, $exptidx, $rsrcidx, $lastrsrc, $lastswapuid,
$swapin_count, $batchmode) = $query_result->fetchrow_array;
# This overrides when doing a swapmod.
$lastswapuid = $TBDB_STATS_SAVEDSWAPUID
if (defined($TBDB_STATS_SAVEDSWAPUID));
# This happens at first swapin.
$lastswapuid = $uid
if (!defined($lastswapuid) || $lastswapuid eq "");
# Need these for user_stats below. Eventually do major rewrite!
my $thisuser = User->LookupByUid($uid);
my $lastuser = User->LookupByUid($lastswapuid);
my $this_uid_idx = $thisuser->uid_idx();
my $last_uid_idx = $thisuser->uid_idx();
#
# A non-zero ecode indicates error. If op is a preload/swapin/start/modify
# then we do not want to gather anymore stats beyond the error code
# since the results are not well defined. swapout,terminate
# errors do normal processing.
#
if ($ecode) {
DBQueryWarn("update experiment_stats set ".
" swap_errors=swap_errors+1, ".
" swap_exitcode=$ecode, ".
" last_error=$session ".
"where pid='$pid' and eid='$eid' and exptidx=$exptidx");
#
# What about SWAPOUT errors? Well, 99.9 percent of the time,
# the nodes are going to be released even if the swapout fails.
# Since that is what we really care about wrt resource consumption,
# we let swapout errors fall through to the stats code below.
# I could probably check for this case (nodes actually released)
# but not bothering to do so.
#
if ($mode eq TBDB_STATS_START ||
$mode eq TBDB_STATS_PRELOAD) {
goto logit;
}
#
# If a modify or swapin failed, we need to revert back to the old
# resource record since the current one is bogus.
#
if ($mode eq TBDB_STATS_SWAPMODIFY || $mode eq TBDB_STATS_SWAPIN) {
if (defined($lastrsrc)) {
DBQueryWarn("update experiment_stats set ".
" rsrcidx=$lastrsrc,lastrsrc=NULL ".
"where pid='$pid' and eid='$eid' and ".
" exptidx=$exptidx");
DBQueryWarn("delete from experiment_resources ".
"where idx=$rsrcidx");
$rsrcidx = $lastrsrc;
}
goto logit;
}
}
#
# Termination is easy; just one field to update.
#
if ($mode eq TBDB_STATS_TERMINATE) {
DBQueryWarn("update experiment_stats ".
"set destroyed=now() ".
"where pid='$pid' and eid='$eid' and exptidx=$exptidx");
}
#
# Pre-modify. Need to generate a new resource record. If the experiment
# fails to modify, we revert back to the old resource record later.
#
if (($mode eq TBDB_STATS_SWAPMODIFY &&
$flags & TBDB_STATS_FLAGS_PREMODIFY) ||
($mode eq TBDB_STATS_SWAPIN &&
$flags & TBDB_STATS_FLAGS_PRESWAPIN)) {
# This check to catch preloaded experiments, which have an
# unused resources record.
if (($mode eq TBDB_STATS_SWAPMODIFY) ||
(($mode eq TBDB_STATS_SWAPIN) and $swapin_count)) {
$query_result =
DBQueryWarn("insert into experiment_resources ".
" (idx, tstamp, exptidx, lastidx) ".
"values (0, now(), $exptidx, $rsrcidx)");
if (! $query_result ||
! $query_result->insertid) {
print STDERR
"*** WARNING $0:\n".
" Failed to insert a new resource record for ".
"$pid/$eid\n";
}
$lastrsrc = $rsrcidx;
$rsrcidx = $query_result->insertid;
DBQueryWarn("update experiment_stats set ".
" rsrcidx=$rsrcidx,lastrsrc=$lastrsrc ".
"where pid='$pid' and eid='$eid' and ".
" exptidx=$exptidx");
#
# In SWAPIN, copy over the thumbnail. This is temporary; I think
# the thumbnail is going to end up going someplace else.
#
if ($mode eq TBDB_STATS_SWAPIN) {
$query_result =
DBQueryWarn("select thumbnail from experiment_resources ".
"where idx=$lastrsrc");
if ($query_result &&
$query_result->numrows) {
my ($thumbdata) = $query_result->fetchrow_array();
TBExptSetThumbNail($pid, $eid, $thumbdata);
}
}
}
#
# When doing a (pre)swapmod, save off the previous swap uid
# so we can do accounting later. Might be the same as the current
# uid of course.
#
if ($mode eq TBDB_STATS_SWAPMODIFY) {
$TBDB_STATS_SAVEDSWAPUID = $lastswapuid;
}
return;
}
#
# On a swapout/modify complete, update the duration counters. We
# want to update the aggregates too below, so get the numbers we
# need for that first. Modify is a bit of a complication since we
# want to charge for the experiment as it *was* until this point,
# since the number of nodes has changed. So, each modify changes
# the time we use to determine the resource usage; the start time is
# the greater of the swapin time or the last modify time.
#
$pnodes = 0;
$vnodes = 0;
$duration = 0;
if ($mode eq TBDB_STATS_SWAPOUT ||
($mode eq TBDB_STATS_SWAPMODIFY && $curstate eq EXPTSTATE_ACTIVE &&
($flags & TBDB_STATS_FLAGS_PREMODIFY) == 0)) {
$query_result =
DBQueryWarn("select r.pnodes,r.vnodes, ".
" IF(s.swapmod_last is not NULL and ".
" UNIX_TIMESTAMP(s.swapmod_last) > ".
" UNIX_TIMESTAMP(s.swapin_last), ".
" UNIX_TIMESTAMP(now()) - ".
" UNIX_TIMESTAMP(s.swapmod_last), ".
" UNIX_TIMESTAMP(now()) - ".
" UNIX_TIMESTAMP(s.swapin_last)) ".
" from experiment_stats as s ".
"left join experiment_resources as r on ".
" r.idx=s.rsrcidx ".
"where s.exptidx=$exptidx");
if ($query_result && $query_result->numrows) {
($pnodes,$vnodes,$duration) = $query_result->fetchrow_array;
# Might happen if swapin stats got losts.
$duration = 0
if (! defined($duration));
}
#
# Increment idleswap indicator, but only valid on swapout.
#
if ($flags & TBDB_STATS_FLAGS_IDLESWAP) {
DBQueryWarn("update experiment_stats ".
"set idle_swaps=idle_swaps+1 ".
"where pid='$pid' and eid='$eid' and ".
" exptidx=$exptidx");
}
}
#
# Per project/group/user aggregates.
#
if ($mode eq TBDB_STATS_PRELOAD ||
$mode eq TBDB_STATS_START ||
$mode eq TBDB_STATS_SWAPOUT ||
$mode eq TBDB_STATS_SWAPIN ||
$mode eq TBDB_STATS_SWAPMODIFY) {
DBQueryWarn("update project_stats ".
"set expt${mode}_count=expt${mode}_count+1, ".
" expt${mode}_last=now(), ".
" allexpt_duration=allexpt_duration+${duration}, ".
" allexpt_vnodes=allexpt_vnodes+${vnodes}, ".
" allexpt_pnodes=allexpt_pnodes+${pnodes}, ".
" allexpt_vnode_duration=".
" allexpt_vnode_duration+($vnodes * ${duration}), ".
" allexpt_pnode_duration=".
" allexpt_pnode_duration+($pnodes * ${duration}) ".
"where pid='$pid'");
DBQueryWarn("update group_stats ".
"set expt${mode}_count=expt${mode}_count+1, ".
" expt${mode}_last=now(), ".
" allexpt_duration=allexpt_duration+${duration}, ".
" allexpt_vnodes=allexpt_vnodes+${vnodes}, ".
" allexpt_pnodes=allexpt_pnodes+${pnodes}, ".
" allexpt_vnode_duration=".
" allexpt_vnode_duration+($vnodes * ${duration}), ".
" allexpt_pnode_duration=".
" allexpt_pnode_duration+($pnodes * ${duration}) ".
"where pid='$pid' and gid='$gid'");
DBQueryWarn("update user_stats ".
"set expt${mode}_count=expt${mode}_count+1, ".
" expt${mode}_last=now(), ".
" allexpt_duration=allexpt_duration+${duration}, ".
" allexpt_vnodes=allexpt_vnodes+${vnodes}, ".
" allexpt_pnodes=allexpt_pnodes+${pnodes}, ".
" allexpt_vnode_duration=".
" allexpt_vnode_duration+($vnodes * ${duration}), ".
" allexpt_pnode_duration=".
" allexpt_pnode_duration+($pnodes * ${duration}) ".
"where uid_idx=".
((($mode eq TBDB_STATS_SWAPOUT) ||
($mode eq TBDB_STATS_SWAPMODIFY)) ?
"'$last_uid_idx'" : "'$this_uid_idx'"));
#
# Project/group aggregate is a little more convenient to work with
# in some places.
#
if ($mode eq TBDB_STATS_SWAPIN ||
$mode eq TBDB_STATS_START) {
DBQueryWarn("update projects set ".
" expt_last=now(),expt_count=expt_count+1 ".
"where pid='$pid'");
DBQueryWarn("update groups set ".
" expt_last=now(),expt_count=expt_count+1 ".
"where pid='$pid' and gid='$gid'");
}
#
# Update the per-experiment record.
# Note that we map start into swapin.
#
if ($mode eq TBDB_STATS_SWAPOUT ||
$mode eq TBDB_STATS_SWAPIN ||
$mode eq TBDB_STATS_START ||
$mode eq TBDB_STATS_SWAPMODIFY) {
my $tmp = $mode;
if ($mode eq TBDB_STATS_START) {
$tmp = TBDB_STATS_SWAPIN;
}
DBQueryWarn("update experiment_stats ".
"set ${tmp}_count=${tmp}_count+1, ".
" ${tmp}_last=now(), ".
" swapin_duration=swapin_duration+${duration}, ".
" swap_exitcode=0, ".
" last_error=NULL ".
"where pid='$pid' and eid='$eid' and ".