Commit 62b5e3a8 authored by Timothy Stack's avatar Timothy Stack

Updated canaryd, ended up starting fresh and pulling things in, rather

than jamming more stuff into the old one.  Most of the code came from
the previous version of canaryd, the cpu broker (process accounting),
and the janosvm (network interface accounting).  Its missing some
features of the old one, but those can be incorporated without too
much trouble.

Changes:

  Designed to permanently run on the pnodes:  it waits for START events
    before it begins recording.  However, I haven't done the work
    necessary to have it always startup on the pnodes.

  No more exec'ing: process stuff is taken from "/proc", and network
    interface stats are pulled from getifaddrs(3).

  Fixed some minor bugs: A typo caused the real-time priority to not
    be set, use setitimer instead of sleep to get more accurate
    spacing between samples.
parent 9a237b4a
......@@ -32,8 +32,8 @@ all: $(CDPROGS) client
include ${TESTBED_SRCDIR}/GNUmakerules
canaryd: canaryd.o auxfuncs.o version.o
$(CC) $(CFLAGS) $(LDFLAGS) -static -o $@ canaryd.o auxfuncs.o version.o $(LIBS) -ldevstat
canaryd: canaryd.o childProcess.o networkInterface.o canarydEvents.o auxfuncs.o version.o
$(CC) $(CFLAGS) $(LDFLAGS) -static -o $@ canaryd.o childProcess.o networkInterface.o canarydEvents.o auxfuncs.o version.o $(LIBS) -ldevstat
alertlistener: alertlistener.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ alertlistener.o $(LIBS)
......@@ -41,7 +41,7 @@ alertlistener: alertlistener.o
$(TBLIB):
gmake -C $(OBJDIR)/lib
version.c: canaryd.c canaryd.h auxfuncs.c auxfuncs.h alertlistener.c
version.c: canaryd.c childProcess.h childProcess.c networkInterface.h networkInterface.c canarydEvents.h canarydEvents.c auxfuncs.c auxfuncs.h alertlistener.c
echo >$@ "char build_info[] = \"Built `date +%d-%b-%Y` by `id -nu`@`hostname | sed 's/\..*//'`:`pwd`\";"
client: canaryd
......
#include <sys/param.h>
#include <sys/wait.h>
#include <sys/dkstat.h>
#include <sys/mbuf.h>
#include <sys/sysctl.h>
......@@ -28,11 +30,62 @@ char **specified_devices;
devstat_select_mode select_mode;
struct statinfo cur, last;
extern void lerror(const char* msgstr);
static int getswapouts(void);
static int cpu_state(int which);
static long percentages(int cnt, int *out, register long *new,
register long *old, long *diffs);
#define MAXLINELEN 256
/* XXX change to combine last return value of procfunc with exec'ed process'
exit status & write macros for access.
*/
int procpipe(char *const prog[], int (procfunc)(char*,void*), void* data) {
int fdes[2], retcode, cpid, status;
char buf[MAXLINELEN];
FILE *in;
if ((retcode=pipe(fdes)) < 0) {
lerror("Couldn't alloc pipe");
}
else {
switch ((cpid = fork())) {
case 0:
close(fdes[0]);
dup2(fdes[1], STDOUT_FILENO);
if (execvp(prog[0], prog) < 0) {
lerror("Couldn't exec program");
exit(1);
}
break;
case -1:
lerror("Error forking child process");
close(fdes[0]);
close(fdes[1]);
retcode = -1;
break;
default:
close(fdes[1]);
in = fdopen(fdes[0], "r");
while (!feof(in) && !ferror(in)) {
if (fgets(buf, sizeof(buf), in)) {
if ((retcode = procfunc(buf, data)) < 0) break;
}
}
fclose(in);
wait(&status);
if (retcode > -1) retcode = WEXITSTATUS(status);
break;
} /* switch ((cpid = fork())) */
}
return retcode;
}
int
getnumcpus(void)
{
......@@ -75,12 +128,6 @@ getmempages(void)
return (int)(total / pagesize);
}
int
getcpubusy(void)
{
return (1000 - cpu_state(CP_IDLE)) / 10;
}
int *
getmembusy(unsigned totalpages)
{
......@@ -279,25 +326,23 @@ getdiskbusy(void)
* Use the constants in <sys/dkstat.h>
* CP_USER=0, CP_NICE=1, CP_SYS=2, CP_INTR=3, CP_IDLE=4
*/
static
int cpu_state(int which) {
int *getcpustates() {
static long cp_time[CPUSTATES];
static long cp_old[CPUSTATES];
static long cp_diff[CPUSTATES];
static int cpu_states[CPUSTATES];
static long tot;
size_t len = sizeof(cp_time);
/* Copy the last cp_time into cp_old */
memcpy(&cp_old, &cp_time, CPUSTATES*sizeof(long));
/* puts kern.cp_time array into cp_time */
if (sysctlbyname("kern.cp_time", &cp_time, &len, NULL, 0) == -1 || !len)
return 0;
/* Use percentages function lifted from top(1) to figure percentages */
tot = percentages(CPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
percentages(CPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
return cpu_states[which];
return cpu_states;
}
/*
......
......@@ -4,7 +4,10 @@ int getcpuspeed(void);
int getmempages(void);
char **getdrivedata(char**);
int getcpubusy(void);
int *getmembusy(unsigned pages);
int *getnetmembusy(void);
int getdiskbusy(void);
int *getcpustates();
int procpipe(char *const prog[], int (procfunc)(char*,void*), void* data);
This diff is collapsed.
/*
* canarydEvents.c
*
* Copyright (c) 2004 The University of Utah and the Flux Group.
* All rights reserved.
*
* This file is licensed under the terms of the GNU Public License.
* See the file "license.terms" for restrictions on redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*/
/**
* @file canarydEvents.c
*
* Implementation file for the event related stuff for canaryd.
*/
#include <ctype.h>
#include "auxfuncs.h"
#include "canarydEvents.h"
struct ceCanarydEventsData canaryd_events_data;
/**
* The list of events we care about: start, stop, reset, report.
*/
static const char *CANARYD_EVENTS = (TBDB_EVENTTYPE_START ", "
TBDB_EVENTTYPE_STOP ", "
TBDB_EVENTTYPE_RESET ", "
TBDB_EVENTTYPE_REPORT);
/**
* Grab a the PID/EID from a "tmcc status" invocation.
*
* <p>Expected input: ALLOCATED=pid/eid ...
*
* @param buf The output of tmcc.
* @param data ...
* @return Zero on success, -1 otherwise.
*/
static int ceGrabPidEid(char *buf, void *data)
{
char *bp, *tmpbp, *value = "";
int retval = -1;
if( strstr(buf, "ALLOCATED=") )
{
bp = tmpbp = buf+10;
while (!isspace(*tmpbp)) tmpbp++;
*tmpbp = '\0';
value = strdup(bp);
retval = 0;
}
canaryd_events_data.ced_PidEid = value;
return retval;
}
int ceInitCanarydEvents(const char *event_server)
{
char *stprog[] = {"tmcc", "status", NULL};
address_tuple_t tuple = NULL;
int retval = 1;
/* Allocate the tuple that describes what we want from the event system, */
if( (tuple = address_tuple_alloc()) == NULL )
{
retval = 0;
}
/* ... determine the pid/eid for this node, and */
else if( procpipe(stprog, &ceGrabPidEid, NULL) )
{
retval = 0;
}
/* ... attempt the subscription. */
else
{
tuple->host = ADDRESSTUPLE_ANY;
tuple->site = ADDRESSTUPLE_ANY;
tuple->group = ADDRESSTUPLE_ANY;
tuple->expt = (char *)canaryd_events_data.ced_PidEid;
tuple->objtype = "SLOTHD"; // XXX This needs to be updated in the DB
tuple->objname = ADDRESSTUPLE_ANY;
tuple->eventtype = (char *)CANARYD_EVENTS;
/* Get a handle to the event system and */
if( (canaryd_events_data.ced_Handle =
event_register_withkeyfile((char *)event_server,
0,
EVENTKEYFILE)) == NULL )
{
retval = 0;
}
/* ... subscribe to the canaryd events. */
else if( !event_subscribe(canaryd_events_data.ced_Handle,
ceEventCallback,
tuple,
NULL) )
{
retval = 0;
}
}
address_tuple_free(tuple);
return( retval );
}
/*
* canarydEvents.h
*
* Copyright (c) 2004 The University of Utah and the Flux Group.
* All rights reserved.
*
* This file is licensed under the terms of the GNU Public License.
* See the file "license.terms" for restrictions on redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*/
/**
* @file canarydEvents.h
*
* Header file for the event related stuff for canaryd.
*/
#ifndef CANARYD_EVENTS_H
#define CANARYD_EVENTS_H
#include "event.h"
#include "tbdefs.h"
/**
* Initialize the canaryd connection to the Emulab event system.
*
* @param event_server An "elvin://" URL for the server.
* @return True if the initialization was successful, false otherwise.
*/
int ceInitCanarydEvents(const char *event_server);
/**
* The path to the event secret key file.
*/
#define EVENTKEYFILE "/var/emulab/boot/eventkey"
/*
* Global data for the canaryd-related event stuff.
*
* ced_PidEid - The pid/eid of the experiment. For example, "tbres/ftest".
* ced_Handle - The handle to the event system.
*/
struct ceCanarydEventsData {
const char *ced_PidEid;
event_handle_t ced_Handle;
} canaryd_events_data;
/**
* Callback for individual events.
*
* @param handle The event handle the event was received on.
* @param notification The event notification itself.
* @param data ...
*/
extern void ceEventCallback(event_handle_t handle,
event_notification_t notification,
void *data);
#endif
/*
* childProcess.c
*
* Copyright (c) 2003, 2004 The University of Utah and the Flux Group.
* All rights reserved.
*
* This file is licensed under the terms of the GNU Public License.
* See the file "license.terms" for restrictions on redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*/
/**
* @file childProcess.c
*
* Implementation file for the child process accounting functions.
*/
#include "config.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/param.h>
#include "childProcess.h"
/**
* The file name format for the process' memory map and statistics in '/proc'.
*/
#if defined(__FreeBSD__)
static char *PROC_STAT_FORMAT = "/proc/%d/status";
static char *PROC_MAP_FORMAT = "/proc/%d/map";
#endif
/**
* Global data for child processes.
*/
struct cpChildProcessData child_process_data;
int cpInitChildProcessData(void)
{
int lpc, retval = 1;
for( lpc = 0; lpc < CPD_TABLE_SIZE; lpc++ )
{
child_process_data.cpd_Table[lpc] = NULL;
}
child_process_data.cpd_Flags |= CPDF_INITIALIZED;
return( retval );
}
void cpKillChildProcessData(void)
{
/* XXX implement me */
}
void cpCollectChildProcesses(void)
{
int lpc;
/* Increment the generation number and */
child_process_data.cpd_CurrentGeneration += 1;
/* ... then collect any objects that do not match. */
for( lpc = 0; lpc < CPD_TABLE_SIZE; lpc++ )
{
struct cpChildProcess *cp, **prev, *next;
prev = &child_process_data.cpd_Table[lpc];
cp = child_process_data.cpd_Table[lpc];
while( cp != NULL )
{
next = cp->cp_Next;
if( cp->cp_Generation != child_process_data.cpd_CurrentGeneration )
{
*prev = cp->cp_Next;
cpDeleteChildProcess(cp);
}
else
{
prev = &cp->cp_Next;
}
cp = next;
}
}
}
/**
* Find a cpVNode with the given name.
*
* @param name The name of the vnode.
* @return The matching cpVNode object or NULL.
*/
struct cpVNode *cpFindVNode(const char *name)
{
struct cpVNode *vn, *retval = NULL;
vn = child_process_data.cpd_VNodes;
while( (vn != NULL) && (retval == NULL) )
{
if( strcmp(vn->vn_Name, name) == 0 )
{
retval = vn;
}
vn = vn->vn_Next;
}
return( retval );
}
struct cpChildProcess *cpFindChildProcess(pid_t child_pid)
{
struct cpChildProcess *curr, *retval = NULL;
int hash = child_pid % CPD_TABLE_SIZE;
curr = child_process_data.cpd_Table[hash];
while( (curr != NULL) && (retval == NULL) )
{
if( curr->cp_PID == child_pid )
{
retval = curr;
}
curr = curr->cp_Next;
}
return( retval );
}
struct cpChildProcess *cpCreateChildProcess(pid_t child_pid)
{
struct cpChildProcess *retval;
/* Allocate the structure and the /proc file name in one chunk. */
if( (retval = calloc(1,
sizeof(struct cpChildProcess) +
strlen(PROC_STAT_FORMAT) +
16 + /* extra space for the PID */
strlen(PROC_MAP_FORMAT) +
16 +
1)) != NULL )
{
int rc, hash;
FILE *file;
retval->cp_PID = child_pid;
/* Generate the "status" file name, */
rc = snprintf((char *)(retval + 1),
strlen(PROC_STAT_FORMAT) + 16 + 1,
PROC_STAT_FORMAT,
child_pid);
retval->cp_StatusFileName = (const char *)(retval + 1);
/* ... the "map" file name, */
snprintf((char *)retval->cp_StatusFileName + rc + 1,
strlen(PROC_MAP_FORMAT) + 16 + 1,
PROC_MAP_FORMAT,
child_pid);
retval->cp_MapFileName = (char *)retval->cp_StatusFileName + rc + 1;
/* ... and make the process part of the current generation. */
retval->cp_Generation = child_process_data.cpd_CurrentGeneration;
/* Check the status file to see if the process is in a vnode, then */
if( (file = fopen(retval->cp_StatusFileName, "r")) != NULL )
{
char buffer[1024], vname[MAXHOSTNAMELEN];
fgets(buffer, sizeof(buffer), file);
sscanf(buffer,
"%*s %*u %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s "
"%s",
vname);
fclose(file);
if( strcmp(vname, "-") == 0 )
{
/* Not in a vnode. */
}
else if( (retval->cp_VNode = cpFindVNode(vname)) == NULL )
{
struct cpVNode *vn;
/* In a new vnode. */
if( (vn = calloc(1,
sizeof(struct cpVNode) +
strlen(vname) + 1)) != NULL )
{
strcpy((char *)(vn + 1), vname);
vn->vn_Name = (const char *)(vn + 1);
vn->vn_Next = child_process_data.cpd_VNodes;
child_process_data.cpd_VNodes = vn;
}
retval->cp_VNode = vn;
}
}
/* ... add the process to the hash table. */
hash = child_pid % CPD_TABLE_SIZE;
retval->cp_Next = child_process_data.cpd_Table[hash];
child_process_data.cpd_Table[hash] = retval;
}
return( retval );
}
void cpDeleteChildProcess(struct cpChildProcess *cp)
{
if( cp != NULL )
{
free(cp);
cp = NULL;
}
}
#if defined(__FreeBSD__)
unsigned long long cpSampleUsage(struct cpChildProcess *cp)
{
static char unused_string[1024];
int retval = 0;
FILE *file;
/* Get CPU statistics first, then */
if( (file = fopen(cp->cp_StatusFileName, "r")) != NULL )
{
struct timeval utime, stime, accum, usage;
char buffer[1024];
int unused_int;
memset(&utime, 0, sizeof(struct timeval));
memset(&stime, 0, sizeof(struct timeval));
memset(&accum, 0, sizeof(struct timeval));
fgets(buffer, sizeof(buffer), file);
sscanf(buffer,
"%s %d %d %d %d %d,%d %s %d,%d %ld,%ld %ld,%ld",
unused_string,
&unused_int,
&unused_int,
&unused_int,
&unused_int,
&unused_int,
&unused_int,
unused_string,
&unused_int,
&unused_int,
&utime.tv_sec,
&utime.tv_usec,
&stime.tv_sec,
&stime.tv_usec);
timeradd(&utime, &stime, &accum);
timersub(&accum, &cp->cp_LastUsage, &usage);
cp->cp_LastUsage = accum;
retval += (usage.tv_sec * 1000000) + usage.tv_usec;
cp->cp_VNode->vn_Usage += retval;
fclose(file);
}
/* ... do the memory stats. */
if( (file = fopen(cp->cp_MapFileName, "r")) != NULL )
{
unsigned long long total_memory = 0;
char buffer[16384];
int rc;
if( (rc = fread(buffer, 1, sizeof(buffer), file)) > 0 )
{
char *line = buffer;
do {
int resident, private_resident, ref_count, shadow_count, flags;
void *start, *end, *obj;
char *next_line;
char type[32];
next_line = strchr(line, '\n');
*next_line = '\0';
sscanf(line,
"%p %p %d %d %p %*s %d %d %x %*s %*s %s",
&start,
&end,
&resident,
&private_resident,
&obj,
&ref_count,
&shadow_count,
&flags,
type);
/* XXX Not sure which ones to count. */
if( ((strcmp(type, "default") == 0) && (ref_count <= 5)) ||
((strcmp(type, "vnode") == 0) && (ref_count <= 2)) )
{
total_memory += ((char *)end) - ((char *)start);
}
line = next_line + 1;
} while( line < &buffer[rc] );
}
cp->cp_MemoryUsage = total_memory;
cp->cp_VNode->vn_MemoryUsage += total_memory;
fclose(file);
}
return( retval );
}
#endif
/*
* childProcess.h
*
* Copyright (c) 2003, 2004 The University of Utah and the Flux Group.
* All rights reserved.
*
* This file is licensed under the terms of the GNU Public License.
* See the file "license.terms" for restrictions on redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*/
/**
* @file childProcess.h
*
* Header file for the child process accounting functions.
*
* NOTE: Most of this was taken from the CPU Broker and tweaked to suit the
* testbed's needs.
*/
#ifndef _child_process_h
#define _child_process_h
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/types.h>
/*
* A cpVNode records the resource usage of a particular vnode.
*
* vn_Next - Link to next object in the list.
* vn_Name - The name of the VNode (e.g. node.eid.pid.emulab.net)
* vn_Usage - The CPU usage since the last sample.
* vn_MemoryUsage - The Memory usage since the last sample.
*/
struct cpVNode {
struct cpVNode *vn_Next;
const char *vn_Name;
unsigned long long vn_Usage;
unsigned long long vn_MemoryUsage;
};
/*
* A cpChildProcess is used to track and record the resource usage of a child
* process.
*
* cp_Next - Link to next object in the list.
* cp_PID - The process ID of the child.
* cp_VNode - NULL or a reference to the VNode this process is in.
* cp_StatusFileName - The "status" file name in "/proc" for this process.
* cp_MapFileName - The "map" file name in "/proc" for this process.
* cp_Generation - Generation number, used to garbage collect processes that
* do not exist any more.
* cp_MemoryUsage - The memory used by this process since the last sample.
* cp_LastUsage - The last recorded usage for this child. Used to compute
* the difference in usage between the current and last sample time.
*/
struct cpChildProcess {
struct cpChildProcess *cp_Next;
pid_t cp_PID;
struct cpVNode *cp_VNode;
const char *cp_StatusFileName;
const char *cp_MapFileName;
unsigned long cp_Generation;
unsigned long long cp_MemoryUsage;
#if defined(linux) || defined(__FreeBSD__)
struct timeval cp_LastUsage;
#else
#error "Implement me"
#endif
};
enum {
CPDB_INITIALIZED,
};
/*
* Flags for the cpChildProcessData structure.
*
* CPDF_INITIALIZED - The global data is initialized.
*/
enum {
CPDF_INITIALIZED = (1L << CPDB_INITIALIZED),
};
#define CPD_TABLE_SIZE 31
/*
* Global data for tracking child processes.
*
* cpd_Flags - Holds the CPDF_ flags.
* cpd_CurrentGeneration - The current generation number, this will be compared
* against each process' generation number to decide when to garbage collect
* cpChildProcess objects.
* cpd_VNodes - The list of detected vnodes.
* cpd_Table - Hash table of processes, hashes are based on the PID.
*/
struct cpChildProcessData {
unsigned long cpd_Flags;
unsigned long cpd_CurrentGeneration;
struct cpVNode *cpd_VNodes;
struct cpChildProcess *cpd_Table[CPD_TABLE_SIZE];
} child_process_data;
/**
* Initialize the internal accounting data structures.
*
* @return True on success, false otherwise.
*/
int cpInitChildProcessData(void);
/**
* Deinitialize the internal accounting data structures.
*/
void cpKillChildProcessData(void);