Commit 62b5e3a8 authored by Timothy Stack's avatar Timothy Stack

Updated canaryd, ended up starting fresh and pulling things in, rather

than jamming more stuff into the old one.  Most of the code came from
the previous version of canaryd, the cpu broker (process accounting),
and the janosvm (network interface accounting).  Its missing some
features of the old one, but those can be incorporated without too
much trouble.

Changes:

  Designed to permanently run on the pnodes:  it waits for START events
    before it begins recording.  However, I haven't done the work
    necessary to have it always startup on the pnodes.

  No more exec'ing: process stuff is taken from "/proc", and network
    interface stats are pulled from getifaddrs(3).

  Fixed some minor bugs: A typo caused the real-time priority to not
    be set, use setitimer instead of sleep to get more accurate
    spacing between samples.
parent 9a237b4a
......@@ -32,8 +32,8 @@ all: $(CDPROGS) client
include ${TESTBED_SRCDIR}/GNUmakerules
canaryd: canaryd.o auxfuncs.o version.o
$(CC) $(CFLAGS) $(LDFLAGS) -static -o $@ canaryd.o auxfuncs.o version.o $(LIBS) -ldevstat
canaryd: canaryd.o childProcess.o networkInterface.o canarydEvents.o auxfuncs.o version.o
$(CC) $(CFLAGS) $(LDFLAGS) -static -o $@ canaryd.o childProcess.o networkInterface.o canarydEvents.o auxfuncs.o version.o $(LIBS) -ldevstat
alertlistener: alertlistener.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ alertlistener.o $(LIBS)
......@@ -41,7 +41,7 @@ alertlistener: alertlistener.o
$(TBLIB):
gmake -C $(OBJDIR)/lib
version.c: canaryd.c canaryd.h auxfuncs.c auxfuncs.h alertlistener.c
version.c: canaryd.c childProcess.h childProcess.c networkInterface.h networkInterface.c canarydEvents.h canarydEvents.c auxfuncs.c auxfuncs.h alertlistener.c
echo >$@ "char build_info[] = \"Built `date +%d-%b-%Y` by `id -nu`@`hostname | sed 's/\..*//'`:`pwd`\";"
client: canaryd
......
#include <sys/param.h>
#include <sys/wait.h>
#include <sys/dkstat.h>
#include <sys/mbuf.h>
#include <sys/sysctl.h>
......@@ -28,11 +30,62 @@ char **specified_devices;
devstat_select_mode select_mode;
struct statinfo cur, last;
extern void lerror(const char* msgstr);
static int getswapouts(void);
static int cpu_state(int which);
static long percentages(int cnt, int *out, register long *new,
register long *old, long *diffs);
#define MAXLINELEN 256
/* XXX change to combine last return value of procfunc with exec'ed process'
exit status & write macros for access.
*/
int procpipe(char *const prog[], int (procfunc)(char*,void*), void* data) {
int fdes[2], retcode, cpid, status;
char buf[MAXLINELEN];
FILE *in;
if ((retcode=pipe(fdes)) < 0) {
lerror("Couldn't alloc pipe");
}
else {
switch ((cpid = fork())) {
case 0:
close(fdes[0]);
dup2(fdes[1], STDOUT_FILENO);
if (execvp(prog[0], prog) < 0) {
lerror("Couldn't exec program");
exit(1);
}
break;
case -1:
lerror("Error forking child process");
close(fdes[0]);
close(fdes[1]);
retcode = -1;
break;
default:
close(fdes[1]);
in = fdopen(fdes[0], "r");
while (!feof(in) && !ferror(in)) {
if (fgets(buf, sizeof(buf), in)) {
if ((retcode = procfunc(buf, data)) < 0) break;
}
}
fclose(in);
wait(&status);
if (retcode > -1) retcode = WEXITSTATUS(status);
break;
} /* switch ((cpid = fork())) */
}
return retcode;
}
int
getnumcpus(void)
{
......@@ -75,12 +128,6 @@ getmempages(void)
return (int)(total / pagesize);
}
int
getcpubusy(void)
{
return (1000 - cpu_state(CP_IDLE)) / 10;
}
int *
getmembusy(unsigned totalpages)
{
......@@ -279,25 +326,23 @@ getdiskbusy(void)
* Use the constants in <sys/dkstat.h>
* CP_USER=0, CP_NICE=1, CP_SYS=2, CP_INTR=3, CP_IDLE=4
*/
static
int cpu_state(int which) {
int *getcpustates() {
static long cp_time[CPUSTATES];
static long cp_old[CPUSTATES];
static long cp_diff[CPUSTATES];
static int cpu_states[CPUSTATES];
static long tot;
size_t len = sizeof(cp_time);
/* Copy the last cp_time into cp_old */
memcpy(&cp_old, &cp_time, CPUSTATES*sizeof(long));
/* puts kern.cp_time array into cp_time */
if (sysctlbyname("kern.cp_time", &cp_time, &len, NULL, 0) == -1 || !len)
return 0;
/* Use percentages function lifted from top(1) to figure percentages */
tot = percentages(CPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
percentages(CPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
return cpu_states[which];
return cpu_states;
}
/*
......
......@@ -4,7 +4,10 @@ int getcpuspeed(void);
int getmempages(void);
char **getdrivedata(char**);
int getcpubusy(void);
int *getmembusy(unsigned pages);
int *getnetmembusy(void);
int getdiskbusy(void);
int *getcpustates();
int procpipe(char *const prog[], int (procfunc)(char*,void*), void* data);
/*
* EMULAB-COPYRIGHT
* Copyright (c) 2000-2004 University of Utah and the Flux Group.
* canaryd.c
*
* Copyright (c) 2004 The University of Utah and the Flux Group.
* All rights reserved.
*
* This file is licensed under the terms of the GNU Public License.
* See the file "license.terms" for restrictions on redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*/
#include "canaryd.h"
#include "config.h"
CANARYD_OPTS *opts;
CANARYD_PARAMS *parms;
void lerror(const char* msgstr) {
if (msgstr) {
syslog(LOG_ERR, "%s: %m", msgstr);
fprintf(stderr, "canaryd: %s: %s\n", msgstr, strerror(errno));
}
}
void lwarn(const char* msgstr) {
if (msgstr) {
syslog(LOG_WARNING, "%s", msgstr);
fprintf(stderr, "canaryd: %s\n", msgstr);
}
}
void lnotice(const char *msgstr) {
if (msgstr) {
syslog(LOG_NOTICE, "%s", msgstr);
printf("canaryd: %s\n", msgstr);
}
}
void sigunkhandler(int signum) {
int status;
char message[50];
sprintf(message, "Unhandled signal: %d. Exiting.", signum);
lerror(message);
unlink(PIDFILE);
while (wait(&status) != -1);
exit(signum);
}
void siginthandler(int signum) {
parms->dolast = 1;
}
void sigalrmhandler(int signum) {
lnotice("Run length has expired; exiting.");
parms->dolast = 1;
}
void usage(void) {
printf("Usage:\tcanaryd -h\n"
"\tcanaryd [-o] [-d] [-i <interval>] [-p <port>] [-s <server>]\n"
"\t [-l] [-t <runlen>] [-r <interval>]\n"
"\t -h\t\t This message.\n"
"\t -o\t\t Run once (collect a single report).\n"
"\t -d\t\t Debug mode; do not fork into background.\n"
"\t -i <interval>\t Regular run interval, in seconds.\n"
"\t -p <port>\t Send on port <port>\n"
"\t -s <server>\t Send data to <server>\n"
"\t -l\t\t Run in logging mode. Will log to: " DEF_CDLOG "\n"
"\t -t <runlen>\t Run for <runlen> seconds.\n"
"\t -r <interval>\t Report overload at most every <interval> seconds \n");
exit(0);
}
int main(int argc, char **argv) {
int exitcode = -1;
u_int span;
time_t curtime;
int rnd_off = 0;
static CANARYD_OPTS mopts;
static CANARYD_PARAMS mparms;
static CANARYD_PACKET mpkt;
CANARYD_PACKET *pkt;
extern char build_info[];
/* pre-init */
bzero(&mopts, sizeof(CANARYD_OPTS));
bzero(&mparms, sizeof(CANARYD_PARAMS));
bzero(&mpkt, sizeof(CANARYD_PACKET));
opts = &mopts;
parms = &mparms;
pkt = &mpkt;
if (parse_args(argc, argv) < 0) {
fprintf(stderr, "Error processing arguments.\n");
return exitcode;
}
/**
* @file canaryd.c
*
* Implementation file for the main bits of canaryd.
*/
if (init_canaryd() < 0) {
lerror("Problem initializing, bailing out.");
do_exit(exitcode);
}
#include <config.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
#include <fcntl.h>
#include <syslog.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/dkstat.h>
#include <devstat.h>
#include <sys/stat.h>
#include <sys/rtprio.h>
#include <dirent.h>
#include <sys/socket.h>
#include <net/if.h>
#include <ifaddrs.h>
#include <unistd.h>
#include "auxfuncs.h"
#include "canarydEvents.h"
#include "childProcess.h"
#include "networkInterface.h"
/**
* The PATH to use when canaryd executes another program.
*/
#define CANARYD_PATH_ENV "/bin:/usr/bin:/sbin:/usr/sbin:" CLIENT_BINDIR
get_vmem_stats(pkt); /* XXX: doesn't belong here.. */
exitcode = 0;
lnotice("Canaryd started");
lnotice(build_info);
/**
* The path to this program's PID file.
*/
#define PIDFILE "/var/run/canaryd.pid"
for (;;) {
curtime = time(0);
/**
* The canaryd log file path.
*/
#define CANARYD_LOG "/var/emulab/logs/canaryd.log"
/* Collect current machine stats */
mparms.cnt++;
get_load(pkt);
get_packet_counts(pkt);
get_vnode_stats(pkt);
get_vmem_stats(pkt);
check_overload(pkt);
if (pkt->overload && parms->numvnodes/2 > 1) {
// (curtime >= mparms.lastolrpt + mopts.ol_rep_interval)) {
send_ol_event(pkt);
mparms.lastolrpt = curtime;
}
/**
* Version information.
*
* @see version.c
*/
extern char build_info[];
/* Poll the event system */
event_poll(mparms.ev_handle);
enum {
CDB_LOOPING,
CDB_TRACE_IMMEDIATELY,
CDB_TRACING,
CDB_DEBUG,
};
/*
* Time to send a packet?
* Yes, if:
* 1) We've been idle, and now we see activity (aggressive mode)
* 2) Its been over <reg_interval> seconds since the last report
*/
if ((curtime >= mparms.lastrpt + mopts.interval) ||
parms->dolast) {
if (send_pkt(pkt)) {
mparms.lastrpt = curtime;
}
if (parms->dolast) {
break;
}
}
/**
* Flags for the canaryd_data.cd_Flags variable.
*/
enum {
CDF_LOOPING = (1L << CDB_LOOPING), /**< Continue Looping waiting for
events. */
/**
* Start tracing immediately, do not wait for START event.
*/
CDF_TRACE_IMMEDIATELY = (1L << CDB_TRACE_IMMEDIATELY),
CDF_TRACING = (1L << CDB_TRACING), /**< Generate trace data. */
CDF_DEBUG = (1L << CDB_DEBUG), /**< Debugging mode. */
};
if (mopts.once) {
break;
}
/*
* Figure out, based on run count, and activity, how long
* to sleep.
*/
if (mopts.log) {
span = mopts.interval;
}
/* randomly offset the first packet to avoid packet implosion. */
else if (mparms.cnt == 1) {
rnd_off = (rand() / (float) RAND_MAX) *
(OFFSET_FRACTION*mopts.interval);
rnd_off = (rnd_off > 0) ? rnd_off : 0;
span = mopts.interval - rnd_off;
}
else {
span = mopts.interval;
}
if (opts->debug) {
printf("About to sleep for %u seconds.\n", span);
fflush(stdout);
}
if (parms->dolast) {
continue;
/*
* Global data for canaryd.
*
* cd_Flags - Holds the CDF_ flags.
* cd_IntervalTime - The time to wait between samples.
* cd_CurrentTime - The current time of day.
* cd_OutputFile - The output file for the trace.
*/
struct {
int cd_Flags;
struct itimerval cd_IntervalTime;
struct timeval cd_CurrentTime;
struct timeval cd_StopTime;
FILE *cd_OutputFile;
} canaryd_data;
/**
* Log an error.
*
* @param msgstr The message to log.
*/
void lerror(const char* msgstr)
{
if( msgstr != NULL )
{
syslog(LOG_ERR, "%s: %m", msgstr);
fprintf(stderr, "canaryd: %s: %s\n", msgstr, strerror(errno));
}
sleep(span);
}
return exitcode; /* NOTREACHED */
}
int parse_args(int argc, char **argv) {
char ch;
/* setup defaults. */
opts->once = 0;
opts->interval = DEF_INTVL;
opts->debug = 0;
opts->port = CANARYD_DEF_PORT;
opts->servname = BOSSNODE;
opts->log = 0;
opts->run_len = 0;
opts->ol_rep_interval = DEF_OLREPINTVL;
while ((ch = getopt(argc, argv, "oi:g:dp:s:lt:r:h")) != -1) {
switch (ch) {
case 'o': /* run once */
opts->once = 1;
break;
case 'i':
if ((opts->interval = atol(optarg)) < MIN_INTVL) {
lwarn("Warning! Tnterval set too low, defaulting.");
opts->interval = MIN_INTVL;
}
break;
case 'd':
lnotice("Debug mode requested; staying in foreground.");
opts->debug = 1;
break;
case 'p':
opts->port = (u_short)atoi(optarg);
break;
case 's':
if (optarg && *optarg) {
opts->servname = strdup(optarg);
}
else {
lwarn("Invalid server name, default used.");
}
break;
case 'l':
lnotice("Logging mode enabled");
opts->log = 1;
break;
case 't':
opts->run_len = strtoul(optarg, NULL, 0);
break;
case 'r':
opts->ol_rep_interval = strtoul(optarg, NULL, 0);
break;
case 'h':
default:
usage();
return -1;
break;
/**
* Log a warning.
*
* @param msgstr The message to log.
*/
void lwarn(const char* msgstr)
{
if( msgstr != NULL )
{
syslog(LOG_WARNING, "%s", msgstr);
fprintf(stderr, "canaryd: %s\n", msgstr);
}
}
return 0;
}
int init_canaryd(void) {
int pfd, i;
char pidbuf[10];
char servbuf[MAXHNAMELEN];
struct hostent *hent;
char *ciprog[] = {"control_interface", NULL};
char *stprog[] = {"tmcc", "status", NULL};
struct rtprio myprio = {RTP_PRIO_REALTIME, 0};
char *canaryd_evts = TBDB_EVENTTYPE_START ", " TBDB_EVENTTYPE_STOP ", "
TBDB_EVENTTYPE_RESET ", " TBDB_EVENTTYPE_REPORT;
address_tuple_t tuple;
char *driveargv[] = {"ad0", NULL};
/* init internal vars */
parms->dolast = 0; /* init send-last-report-before-exiting variable */
parms->cnt = 0; /* daemon iter count */
parms->lastrpt = 0; /* the last time a report pkt was sent */
parms->startup = time(NULL); /* Make sure we don't report < invocation */
parms->pideid = "";
parms->ev_server = "boss.emulab.net"; /* XXX */
parms->ol_detect = 0;
parms->send_ev_report = 0;
parms->numvnodes = 0;
/* Event arg items */
for (i = 0; i < NUMRANGES; ++i) {
parms->olr[i].min = evargitems[i].defmin;
parms->olr[i].max = evargitems[i].defmax;
}
/* Setup signals */
signal(SIGTERM, siginthandler);
signal(SIGINT, siginthandler);
signal(SIGQUIT, siginthandler);
signal(SIGALRM, sigalrmhandler);
signal(SIGHUP, SIG_IGN);
signal(SIGPIPE, SIG_IGN);
signal(SIGBUS, sigunkhandler);
signal(SIGSEGV, sigunkhandler);
signal(SIGFPE, sigunkhandler);
signal(SIGILL, sigunkhandler);
signal(SIGSYS, sigunkhandler);
/* Set the priority to realtime */
if (rtprio(RTP_PRIO_REALTIME, getpid(), &myprio) < 0) {
lerror("Couldn't set RT priority!");
}
/* Setup logging facil. */
openlog("canaryd", LOG_NDELAY, LOG_TESTBED);
/* Setup path */
if (setenv("PATH", CANARYD_PATH_ENV, 1) < 0) {
lerror("Couldn't set path env");
}
/* Seed the random number generator */
srand(time(NULL));
/* Grab control net iface */
if (procpipe(ciprog, &grab_cifname, parms)) {
lwarn("Failed to get control net iface name");
}
/* Get the expt pid/eid. */
if (procpipe(stprog, &grab_pideid, parms)) {
lwarn("Failed to get pid/eid");
}
/* Open the logfile, if necessary */
if (opts->log) {
if ((parms->log = fopen(DEF_CDLOG, "w")) == NULL) {
lerror("Failed to open log file");
return -1;
}
}
/************************************************************
* REGISTER WITH EVENT SYSTEM *
************************************************************/
/*
* Find the local monitor node
*/
#ifdef COMMENT /* XXX: fixup to use expt-specific mon node's elvind */
if ((ssfile = fopen(SYNCSERVFILE, "r")) == NULL) {
lerror("Failed to open sync server ident file");
return -1;
}
if ((fgets(servbuf, sizeof(servbuf), ssfile) == NULL) ||
! *servbuf) {
lerror("No sync server name found in file!");
fclose(ssfile);
return -1;
}
if (servbuf[strlen(servbuf)-1] == '\n') {
servbuf[strlen(servbuf)-1] = '\0';
}
parms->ev_server = strdup(servbuf);
fclose(ssfile);
#endif
/*
* Convert server/port to elvin thing.
*
* XXX This elvin string stuff should be moved down a layer.
*/
if (parms->ev_server) {
snprintf(servbuf, sizeof(servbuf), "elvin://%s",
parms->ev_server);
/* free(parms->ev_server); */
parms->ev_server = strdup(servbuf);
}
/*
* Construct an address tuple for subscribing to events for
* this node.
*/
tuple = address_tuple_alloc();
if (tuple == NULL) {
lerror("could not allocate an address tuple");
return -1;
}
printf("pid/eid: %s\n", parms->pideid);
/*
* Ask for canaryd agent events
*/
tuple->host = ADDRESSTUPLE_ANY;
tuple->site = ADDRESSTUPLE_ANY;
tuple->group = ADDRESSTUPLE_ANY;
tuple->expt = parms->pideid;
tuple->objtype = TBDB_OBJECTTYPE_CANARYD;
tuple->objname = ADDRESSTUPLE_ANY;
tuple->eventtype = canaryd_evts;
/*
* Register with the event system.
*/
parms->ev_handle = event_register_withkeyfile(parms->ev_server, 0,
EVENTKEYFILE);
if (parms->ev_handle == NULL) {
lerror("could not register with event system");
return -1;
}
/*
* Subscribe to the event we specified above.
*/
if (! event_subscribe(parms->ev_handle, ev_callback, tuple, NULL)) {
lerror("could not subscribe to event");
return -1;
}
address_tuple_free(tuple); /* XXX: keep around for sending evts. */
#ifdef __linux__
/* Open socket for SIOCGHWADDR ioctl (to get mac addresses) */
parms->ifd = socket(PF_INET, SOCK_DGRAM, 0);
#endif
/* Setup "vmstat" stuff */
getdrivedata(driveargv);
/* prepare UDP connection to server */
if ((parms->sd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
lerror("Could not alloc socket");
return -1;
}
if (!(hent = gethostbyname(opts->servname))) {
lerror("Can't resolve server hostname"); /* XXX use herror */
return -1;
}
bzero(&parms->servaddr, sizeof(struct sockaddr_in));
parms->servaddr.sin_family = AF_INET;
parms->servaddr.sin_port = htons(opts->port);
bcopy(hent->h_addr_list[0], &parms->servaddr.sin_addr.s_addr,
sizeof(struct in_addr));
if (connect(parms->sd, (struct sockaddr*)&parms->servaddr,
sizeof(struct sockaddr_in)) < 0) {
lerror("Couldn't connect to server");
return -1;
}
/* Daemonize, unless in debug, or once-only mode. */
if (!opts->debug && !opts->once) {
if (daemon(0,0) < 0) {
lerror("Couldn't daemonize");
return -1;
}
/* Try to get lock. If can't, then bail out. */
if ((pfd = open(PIDFILE, O_EXCL | O_CREAT | O_RDWR)) < 0) {
lerror("Can't create lock file.");
return -1;
/**
* Log a notice.
*
* @param msgstr The message to log.
*/
void lnotice(const char *msgstr)
{
if( msgstr != NULL )
{
syslog(LOG_NOTICE, "%s", msgstr);
printf("canaryd: %s\n", msgstr);
}
fchmod(pfd, S_IRUSR | S_IRGRP | S_IROTH);
sprintf(pidbuf, "%d", getpid());
write(pfd, pidbuf, strlen(pidbuf));
close(pfd);
}
/* Setup run length alarm, if needed. */
if (opts->run_len) {
alarm(opts->run_len);
}
return 0;
}
void do_exit(int exval) {