From 636aaa2b8bdaea28284d128949119ee09612f5ee Mon Sep 17 00:00:00 2001
From: Timothy Stack <stack@flux.utah.edu>
Date: Mon, 25 Oct 2004 19:25:06 +0000
Subject: [PATCH] Changes to the "auto nice daemon" so it can work better in
 Emulab.

  * sensors/and/GNUmakefile.in: Emulab-specific make file.  Updated to
    work with a build tree separate from the source and gave it a new
    version number. Files are installed under "/usr/testbed/" on ops.

  * sensors/and/Makefile: Add a warning that this is not the real
    makefile for us.

  * sensors/and/and-OpenBSD.c: Update to work with FreeBSD and add
    support for reporting process start time.

  * sensors/and/and-emulab.conf.in: Emulab-specific configuration,
    similar to the standard one, except it sends mail to tbops when it
    does something.

  * sensors/and/and-emulab.priorities: Emulab-specific priorities
    database. It excludes daemon pseudo users and the event-scheduler,
    otherwise, niceness levels apply to everyone.

  * sensors/and/and.8.man: Add the pid file to the 'FILES' section.

  * sensors/and/and.c: Added support for running a command when a
    niceness level change occurs.  Also writes out the pid file when
    not running in test mode.

  * sensors/and/and.conf.5.man: Add docs for the level commands.

  * sensors/and/and.h: Add start time and child CPU time to the
    and_procent struct.

  * sensors/and/and.startup: Changed to use "test" instead of bash
    syntax and the pid file is now used instead of killall.  Also
    added a "reconfig" option that HUPs the daemon.
---
 sensors/and/GNUmakefile.in        | 217 ++++++++++++++++++++++++++++++
 sensors/and/Makefile              |  11 +-
 sensors/and/and-OpenBSD.c         |  28 ++--
 sensors/and/and-emulab.conf.in    |  59 ++++++++
 sensors/and/and-emulab.priorities |  66 +++++++++
 sensors/and/and.8.man             |   6 +
 sensors/and/and.c                 | 214 +++++++++++++++++++++++++++--
 sensors/and/and.conf.5.man        |  54 ++++++++
 sensors/and/and.h                 |   6 +-
 sensors/and/and.startup           |  34 +++--
 10 files changed, 665 insertions(+), 30 deletions(-)
 create mode 100644 sensors/and/GNUmakefile.in
 create mode 100644 sensors/and/and-emulab.conf.in
 create mode 100644 sensors/and/and-emulab.priorities

diff --git a/sensors/and/GNUmakefile.in b/sensors/and/GNUmakefile.in
new file mode 100644
index 0000000000..292a40b1a8
--- /dev/null
+++ b/sensors/and/GNUmakefile.in
@@ -0,0 +1,217 @@
+#
+# EMULAB-COPYRIGHT
+# Copyright (c) 2004 University of Utah and the Flux Group.
+# All rights reserved.
+#
+# This is the Emulab specific makefile.
+#
+
+#
+# Makefile for auto nice daemon
+#
+# 1999-2004 Patrick Schemitz <schemitz@users.sourceforge.net>
+# http://and.sourceforge.net/
+#
+
+SRCDIR          = @srcdir@
+TESTBED_SRCDIR  = @top_srcdir@
+OBJDIR          = ../..
+SUBDIR          = sensors/and
+
+include $(OBJDIR)/Makeconf
+
+all: and doc and-emulab.conf
+
+include $(TESTBED_SRCDIR)/GNUmakerules
+
+#
+# Init script.
+#
+INITSCRIPT=and.init
+
+#
+# Install to the ops directory.
+#
+PREFIX=$(INSTALL_DIR)/opsdir
+INSTALL_ETC=$(PREFIX)/etc
+INSTALL_INITD=$(PREFIX)/etc/rc.d
+INSTALL_SBIN=$(PREFIX)/sbin
+INSTALL_MAN=$(PREFIX)/man
+
+#
+# Version and date
+#
+VERSION=1.2.1-emulab
+DATE="25 Oct 2004"
+
+#
+# Man pages
+#
+MANPAGES=and.8 and.conf.5 and.priorities.5
+
+#
+# Determine architecture from uname(1)
+#
+ARCH=$(shell uname)
+
+#
+# Architecture-dependent settings: ANSI C compiler and linker
+#
+ifeq (${ARCH},Linux)
+  LIBS =
+else
+ifeq (${ARCH},OSF1)
+  LIBS =
+else
+ifeq (${ARCH},OpenBSD)
+  LIBS = -lkvm
+else
+ifeq (${ARCH},FreeBSD)
+  LIBS = -lkvm
+endif
+endif
+endif
+endif
+
+
+#
+# Build the auto-nice daemon.
+#
+and: and.o $(INITSCRIPT) and-$(ARCH).o
+	$(LD) and.o and-$(ARCH).o -o and $(LIBS)
+
+
+#
+# Independent part: configuration management, priority database.
+#
+# XXX Emulab note: We cannot use INSTALL_ETC for the paths since those may
+# not exist on ops.
+#
+and.o: and.c and.h
+	$(CC) -DDEFAULT_INTERVAL=60 -DDEFAULT_NICE=0 \
+	  -DDEFAULT_CONFIG_FILE=\"$(INSTALL_DIR)/etc/and.conf\" \
+	  -DDEFAULT_DATABASE_FILE=\"$(INSTALL_DIR)/etc/and.priorities\" \
+	  -DAND_VERSION=\"$(VERSION)\" -DAND_DATE=\"$(DATE)\" -c $<
+
+
+#
+# Unix variant specific stuff
+#
+and-Linux.o: and-Linux.c and.h
+	$(CC) -c $<
+
+and-OpenBSD.o: and-OpenBSD.c and.h
+	$(CC) -c $<
+
+and-FreeBSD.o: and-OpenBSD.c and.h
+	$(CC) -c $< -o $@
+
+and-OSF1.o: and-OSF1.c and.h
+	$(CC) -c $< $@
+
+and-IRIX.o: and-OSF1.c and.h
+	$(CC) -c $< -o $@
+
+and-IRIX64.o: and-OSF1.c and.h
+	$(CC) -c $< -o $@
+
+and-SunOS.o: and-OSF1.c and.h
+	$(CC) -c $< -o $@
+
+
+
+#
+# Create script for SysV init
+#
+and.init: and.startup
+	sed s:INSTALL_SBIN:$(INSTALL_SBINDIR):g < $< > $@
+	chmod +x and.init
+
+
+#
+# Man pages
+#
+doc:	$(MANPAGES)
+
+and.8:	and.8.man
+	cat $< | \
+		sed s/__VERSION__/$(VERSION)/g | \
+		sed s/__DATE__/$(DATE)/g > $@
+
+and.conf.5:	and.conf.5.man
+	cat $< | \
+		sed s/__VERSION__/$(VERSION)/g | \
+		sed s/__DATE__/$(DATE)/g > $@
+
+and.priorities.5:	and.priorities.5.man
+	cat $< | \
+		sed s/__VERSION__/$(VERSION)/g | \
+		sed s/__DATE__/$(DATE)/g > $@
+
+#
+# Nothing to do for client install.
+#
+client client-install:
+
+#
+# Install and under $(PREFIX)/bin etc.
+#
+install: all
+	strip and
+#-mkdir $(PREFIX)
+	-mkdir -p $(DESTDIR)$(INSTALL_SBIN)
+	-mkdir -p $(DESTDIR)$(INSTALL_ETC)
+	-mkdir -p $(DESTDIR)$(INSTALL_INITD)
+	-mkdir -p $(DESTDIR)$(INSTALL_MAN)/man5
+	-mkdir -p $(DESTDIR)$(INSTALL_MAN)/man8
+	$(INSTALL) -m 0755 and $(DESTDIR)$(INSTALL_SBIN)
+	$(INSTALL) -m 0644 and-emulab.conf $(DESTDIR)$(INSTALL_ETC)/and.conf
+	$(INSTALL) -m 0644 $(SRCDIR)/and-emulab.priorities \
+		$(DESTDIR)$(INSTALL_ETC)/and.priorities
+ifneq (${INITSCRIPT},)
+ifneq (${INSTALL_INITD},)
+	@echo "Installing SysV script in $(DESTDIR)$(INSTALL_INITD)"
+	$(INSTALL) -m 0755 $(INITSCRIPT) $(DESTDIR)$(INSTALL_INITD)/and
+else
+	@echo "Installing SysV script in $(DESTDIR)$(INSTALL_SBIN)"
+	$(INSTALL) -m 0755 $(INITSCRIPT) $(DESTDIR)$(INSTALL_SBIN)
+	@echo "Installing SysV init.d finder in $(DESTDIR)$(INSTALL_SBIN)"
+	$(INSTALL) -m 0755 and-find-init.d $(DESTDIR)$(INSTALL_SBIN)
+endif
+endif
+	$(INSTALL) -m 0644 and.8 $(DESTDIR)$(INSTALL_MAN)/man8
+	$(INSTALL) -m 0644 and.conf.5 $(DESTDIR)$(INSTALL_MAN)/man5
+	$(INSTALL) -m 0644 and.priorities.5 $(DESTDIR)$(INSTALL_MAN)/man5
+
+simpleinstall: and and.init
+	strip and
+	mkdir -p $(DESTDIR)$(INSTALL_SBIN) $(DESTDIR)$(INSTALL_ETC)
+	mkdir -p $(DESTDIR)$(INSTALL_INITD)
+	mkdir -p $(DESTDIR)$(INSTALL_MAN)/man5 $(DESTDIR)$(INSTALL_MAN)/man8
+	cp and $(DESTDIR)$(INSTALL_SBIN)
+	test -e $(DESTDIR)$(INSTALL_ETC)/and.conf || \
+	   cp and.conf $(DESTDIR)$(INSTALL_ETC)
+	test -e $(DESTDIR)$(INSTALL_ETC)/and.priorities || \
+	   cp and.priorities $(DESTDIR)$(INSTALL_ETC)
+ifneq (${INITSCRIPT},) # on SysV only
+	cp $(INITSCRIPT) $(DESTDIR)$(INSTALL_INITD)/and
+endif
+	cp and.8 $(DESTDIR)$(INSTALL_MAN)/man8
+	cp and.conf.5 $(DESTDIR)$(INSTALL_MAN)/man5
+	cp and.priorities.5 $(DESTDIR)$(INSTALL_MAN)/man5
+
+uninstall:
+	rm -f $(DESTDIR)$(INSTALL_SBIN)/and
+	rm -f $(DESTDIR)$(INSTALL_INITD)/and
+	rm -f $(DESTDIR)$(INSTALL_ETC)/and.conf
+	rm -f $(DESTDIR)$(INSTALL_ETC)/and.priorities
+	rm -f $(DESTDIR)$(INSTALL_MAN)/man8/and.8
+	rm -f $(DESTDIR)$(INSTALL_MAN)/man5/and.conf.5
+	rm -f $(DESTDIR)$(INSTALL_MAN)/man5/and.priorities.5
+
+
+#
+# Clean up generated files.
+#
+clean:
+	rm -f *.o and and.init $(MANPAGES)
diff --git a/sensors/and/Makefile b/sensors/and/Makefile
index 5bf4f78052..fcdc16a2d5 100644
--- a/sensors/and/Makefile
+++ b/sensors/and/Makefile
@@ -6,6 +6,15 @@
 #
 
 
+#
+# EMULAB PEOPLE LOOK HERE!
+#
+# This file is DEAD, edit the GNUmakefile.in if you want to change something.
+#
+# EMULAB PEOPLE LOOK HERE!
+#
+
+
 #######################################################################
 # Edit here to adapt to your system!                                  #
 #######################################################################
@@ -214,7 +223,7 @@ and.priorities.5:	and.priorities.5.man
 #
 # Install and under $(PREFIX)/bin etc.
 #
-install: and $(INITSCRIPT)
+install: and $(INITSCRIPT) doc
 	strip and
 #-mkdir $(PREFIX)
 	-mkdir -p $(DESTDIR)$(INSTALL_SBIN)
diff --git a/sensors/and/and-OpenBSD.c b/sensors/and/and-OpenBSD.c
index fb4565041a..c26d65a6e2 100644
--- a/sensors/and/and-OpenBSD.c
+++ b/sensors/and/and-OpenBSD.c
@@ -1,7 +1,7 @@
 /*
 
     AND auto nice daemon - renice programs according to their CPU usage.
-    Copyright (C) 1999-2001 Patrick Schemitz <schemitz@users.sourceforge.net>
+    Copyright (C) 1999-2004 Patrick Schemitz <schemitz@users.sourceforge.net>
     http://and.sourceforge.net/
 
     This program is free software; you can redistribute it and/or modify
@@ -94,21 +94,31 @@ struct and_procent *openbsd_getnext ()
   strncpy(openbsd_proc.command,openbsd_pt[openbsd_next].kp_proc.p_comm,1023);
   openbsd_proc.command[1023] = 0;
   openbsd_proc.pid = openbsd_pt[openbsd_next].kp_proc.p_pid;
-  openbsd_proc.ppid = openbsd_pt[openbsd_next].kp_proc.p_ppid; /* FIXME that correct? */
-  openbsd_proc.nice = openbsd_pt[openbsd_next].kp_proc.p_nice-20;
+  // 
   openbsd_proc.uid = openbsd_pt[openbsd_next].kp_eproc.e_pcred.p_ruid;
   openbsd_proc.gid = openbsd_pt[openbsd_next].kp_eproc.e_pcred.p_rgid;
+#if defined(__FreeBSD__)
+  openbsd_proc.ppid = openbsd_pt[openbsd_next].kp_eproc.e_ppid;
+  openbsd_proc.nice = openbsd_pt[openbsd_next].kp_proc.p_nice;
+  openbsd_proc.stime =
+    openbsd_pt[openbsd_next].kp_eproc.e_stats.p_start.tv_sec;
+  openbsd_proc.utime =
+    openbsd_pt[openbsd_next].kp_proc.p_runtime / (1000 * 1000);
+  openbsd_proc.ctime =
+    openbsd_pt[openbsd_next].kp_eproc.e_stats.p_cru.ru_utime.tv_sec +
+    openbsd_pt[openbsd_next].kp_eproc.e_stats.p_cru.ru_stime.tv_sec;
+#else
   /* Adapted from top(1) port, as found in the misc@openbsd.org archive */
+  openbsd_proc.ppid = openbsd_pt[openbsd_next].kp_proc.p_ppid; /* FIXME that correct? */
+  openbsd_proc.nice = openbsd_pt[openbsd_next].kp_proc.p_nice-20;
   openbsd_proc.utime = (openbsd_pt[openbsd_next].kp_proc.p_uticks +
 			openbsd_pt[openbsd_next].kp_proc.p_sticks +
 			openbsd_pt[openbsd_next].kp_proc.p_iticks)
     / openbsd_hz;
-  /*
-    printf("%-20s  %5i  %3i  %i\n",openbsd_proc.command,openbsd_proc.pid,
-    openbsd_proc.nice,openbsd_proc.utime);
-  */
-  and_printf(3, "OpenBSD: process %s pid: %d ppid: %d\n", 
-             openbsd_proc.command, openbsd_proc.pid, openbsd_proc.ppid);
+#endif
+  and_printf(3, "OpenBSD: process %s pid: %d ppid: %d cpu_secs: %d\n",
+             openbsd_proc.command, openbsd_proc.pid, openbsd_proc.ppid,
+	     openbsd_proc.utime);
   openbsd_next++;
   return &openbsd_proc;
 }
diff --git a/sensors/and/and-emulab.conf.in b/sensors/and/and-emulab.conf.in
new file mode 100644
index 0000000000..6d32fea1cb
--- /dev/null
+++ b/sensors/and/and-emulab.conf.in
@@ -0,0 +1,59 @@
+#
+# Emulab-ops configuration file for the auto nice daemon, /etc/and.conf
+#
+# Comments must have the # in the *first* column!
+#
+# Read and.conf(5) for details.
+#
+# 1999, 2000, 2004 Patrick Schemitz, schemitz@users.sourceforge.net
+# 
+
+#
+# Nice level for jobs that are not in and.priorities.
+# 0 = do not renice.
+#
+defaultnice 0
+
+#
+# Time interval between renice cycles, in seconds. Default is
+# 60 seconds.
+#
+interval 120
+
+#
+# Ranges for the nice levels. Jobs with less than lv1time seconds
+# CPU time are not reniced; jobs between lv1time and lv2time seconds
+# are reniced to the first level in an.priorities; jobs between
+# lv2time and lv3time seconds to the second level; jobs with more
+# than lv3time seconds are reniced to the third level.
+#
+lv1time 90
+lv2time 1800
+lv2cmd mail -s "@THISHOMEBASE@: CPU Hog - $AND_COMMAND (30 min.)" @TBOPSEMAIL@
+lv3time 7200
+lv3cmd mail -s "@THISHOMEBASE@: CPU Hog - $AND_COMMAND (2 hrs.)" @TBOPSEMAIL@
+
+#
+# Strategy for picking the right priority entry for a user/group/job
+# triple. The strategy is a permutation of "cgu", "c"ommand, "g"roup,
+# "u"ser. The order specifies the affinity of the priority lookup
+# method. "cug" means an exact match of the command has priority
+# over an exact match of the user or group. See the documentation
+# for more details.
+#
+affinity cpug
+
+#
+# Minimum user/group id to be even considered for renicing. Processes
+# with lower user/group id are ignored. This does not affect root
+# (user id 0), which is never, ever reniced.
+#
+minuid 1
+mingid 1
+
+# NOTE: Emulab user IDs start at 10000, so if you want to exempt the
+# core developers, comment out the above lines and uncomment the ones
+# below.
+#
+# minuid 10000
+# mingid 6000
diff --git a/sensors/and/and-emulab.priorities b/sensors/and/and-emulab.priorities
new file mode 100644
index 0000000000..217f627481
--- /dev/null
+++ b/sensors/and/and-emulab.priorities
@@ -0,0 +1,66 @@
+#
+# Sample priority database for the auto-nice daemon, /etc/and.priorities
+#
+# Comments must have the # in the _first_ column!
+#
+# File format:
+#    user group job parent nice1 nice2 nice3
+# - user: user name or user id or * for all
+# - group: group name or group id or * for all
+# - job: executable (without path; may be a regexp) or * for all
+# - parent: keyword "parent=" or "ancestor=" followed by the 
+#   executable (without path; may be a regexp), or * for all
+# - nice1, nice2, nice3: nice levels for CPU usage ranges.
+#
+# At least one of user or group must be an asterisk *.
+#
+# After /etc/and.conf:lv1time seconds matching jobs are niced to nice1,
+# after /etc/and.conf:lv2time to nice2 and after /etc/and.conf:lv3time
+# to nice3.
+#
+# Read and.priorities(5) for details.
+#
+# 1999, 2000, 2004 Patrick Schemitz, schemitz@users.sourceforge.net
+#
+
+#
+# Philosophy:
+#
+# Knock everyone except for the well known daemons, ops is not a
+# general purpose machine, that is what the experimental nodes are for.
+# If you make any changes, note that a perceptive user might rename
+# his jobs' executables to obtain higher privilegue. So the values
+# must be sensible to that.  
+#
+
+#
+# Default entry -- moderate renicing at first, drop them further down
+# after that.
+#
+*	*	*       *	5	15	19
+
+#
+# daemon entry -- for the portmapper and such.
+#
+daemon	*	*	*	0	0	0
+
+#
+# The main elvind server.
+#
+nobody	*	elvind	*	0	0	0
+
+#
+# Sendmail stuff
+#
+smmsp	*	*	*	0	0	0
+
+#
+# The event scheduler, not sure about this, but for long running
+# experiments the scheduler ends up accumulating a lot of time.
+#
+# Note that we only do this for the mainline one, development
+# versions are not exempt.
+#
+*	*	event-sched	*	0	0	0
+
+*	*	sshd		*	0	5	15
diff --git a/sensors/and/and.8.man b/sensors/and/and.8.man
index 418177615b..508a7effff 100644
--- a/sensors/and/and.8.man
+++ b/sensors/and/and.8.man
@@ -129,6 +129,12 @@ Both files have their own manual pages.
 Contains logging and status information for debugging purposes. 
 Used in test mode only.
 
+.TP 0.5i
+.B /var/run/and.pid
+Process identifier file.  Stores the PID for the currently running
+.I and
+process.  This file is not written in test mode.
+
 .SH "SEE ALSO"
 .BR and.conf (5),
 .BR and.priorities (5),
diff --git a/sensors/and/and.c b/sensors/and/and.c
index 54933b785a..049cbe2272 100644
--- a/sensors/and/and.c
+++ b/sensors/and/and.c
@@ -45,9 +45,15 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
+#include <sys/wait.h>
 #include <signal.h>
 #include <regex.h>
 #include <values.h>
+#include <errno.h>
+#include <pwd.h>
+#include <grp.h>
+#include <fcntl.h>
+#include <sys/stat.h>
 
 #define DEBUG 0
 
@@ -60,6 +66,11 @@
 #ifdef __GNUC__
 int vsnprintf (char *str, size_t n, const char *format, va_list ap);
 #define HAVE_VSNPRINTF
+int snprintf (char *str, size_t n, const char *format, ...);
+#define HAVE_SNPRINTF
+#define and_snprintf snprintf
+#else
+#define and_snprintf sprintf
 #endif
 
 #include "and.h"
@@ -86,6 +97,8 @@ int vsnprintf (char *str, size_t n, const char *format, va_list ap);
 #define DEFAULT_DATABASE_FILE "/etc/and.priorities"
 #endif
 
+#define AND_PIDFILE "/var/run/and.pid"
+
 #ifndef AND_VERSION
 #define AND_VERSION "1.0.7 or above (not compiled in)"
 #endif
@@ -165,10 +178,12 @@ struct {
   bool lock_interval;
   unsigned interval;
   unsigned time_mark [3];
+  char *cmd[3];
   char affinity [5];
   int weight [PRI_N];
   int min_uid;
   int min_gid;
+  char *cmd_user;
 } and_config;
 
 
@@ -191,6 +206,10 @@ void set_defaults (int argc, char **argv)
   and_config.min_gid = 0;
   gethostname(and_config.hostname,511);
   and_config.hostname[511] = 0;
+  and_config.cmd[0] = "";
+  and_config.cmd[1] = "";
+  and_config.cmd[2] = "";
+  and_config.cmd_user = "nobody";
 }
 
 
@@ -286,8 +305,11 @@ void print_config ()
 	     "default nicelevel:     %2i\n"
 	     "interval     [sec]:   %3u\n"
 	     "level 0 from [sec]:   %3u\n"
+	     "level 0 cmd:          %s\n"
 	     "level 1 from [sec]:   %3u\n"
+	     "level 1 cmd:          %s\n"
 	     "level 2 from [sec]:   %3u\n"
+	     "level 2 cmd:          %s\n"
              "minimum uid:          %i\n"
              "minimum gid:          %i\n"
 	     "affinity:             %s\n"
@@ -299,8 +321,9 @@ void print_config ()
 	     (and_config.test?"just checkin'":"I'm serious."),
 	     and_config.verbose,
 	     and_config.nice_default, and_config.interval,
-	     and_config.time_mark[0], and_config.time_mark[1],
-	     and_config.time_mark[2], 
+	     and_config.time_mark[0], and_config.cmd[0],
+	     and_config.time_mark[1], and_config.cmd[1],
+	     and_config.time_mark[2], and_config.cmd[2],
              and_config.min_uid, and_config.min_gid,
              and_config.affinity,
 	     and_config.weight[PRI_U], and_config.weight[PRI_G],
@@ -603,6 +626,10 @@ void read_config ()
 	and_printf(0,"Configuration file line %i has invalid value for lv1time: %s.\n",
 		   line, value);
       }
+    } else if (strcmp(param,"lv1cmd")==0) {
+      if (buffer[strlen(buffer) - 1] == '\n')
+	buffer[strlen(buffer) - 1] = '\0';
+      and_config.cmd[0] = strdup(&buffer[strlen(param) + 1]);
     } else if (strcmp(param,"lv2time")==0) {
       if (uval < UINT_MAX)
 	and_config.time_mark[1] = uval;
@@ -611,6 +638,10 @@ void read_config ()
 	and_printf(0,"Configuration file line %i has invalid value for lv2time: %s.\n",
 		   line, value);
       }
+    } else if (strcmp(param,"lv2cmd")==0) {
+      if (buffer[strlen(buffer) - 1] == '\n')
+	buffer[strlen(buffer) - 1] = '\0';
+      and_config.cmd[1] = strdup(&buffer[strlen(param) + 1]);
     } else if (strcmp(param,"lv3time")==0) {
       if (uval < UINT_MAX)
 	and_config.time_mark[2] = uval;
@@ -619,6 +650,18 @@ void read_config ()
 	and_printf(0,"Configuration file line %i has invalid value for lv3time: %s.\n",
 		   line, value);
       }
+    } else if (strcmp(param,"lv3cmd")==0) {
+      if (buffer[strlen(buffer) - 1] == '\n')
+	buffer[strlen(buffer) - 1] = '\0';
+      and_config.cmd[2] = strdup(&buffer[strlen(param) + 1]);
+    } else if (strcmp(param,"cmduser")==0) {
+      if (strlen(value) > 0)
+	and_config.cmd_user = value;
+      else {
+	++bad;
+	and_printf(0,"Configuration file line %i has empty value for cmduser.\n",
+		   line);
+      }
     } else if (strcmp(param,"affinity")==0) {
       bad_f = -1;
       u = g = c = p = 0;
@@ -679,11 +722,12 @@ void read_config ()
 
 /* Compute new nice level for given command/uid/gid/utime */
 
-int and_getnice (int uid, int gid, char *command, struct and_procent *parent, unsigned cpu_seconds)
+int and_getnice (int uid, int gid, char *command, struct and_procent *parent, unsigned cpu_seconds, char **cmd_out)
 {
   int i, level, entry, exact = -1, last;
   struct and_procent *par;
   int exactness [PRI_MAXENTRIES];
+  *cmd_out = NULL;
   if (!command) {
     and_printf(0,"Process without command string encountered. Aborting.\n");
     abort();
@@ -769,13 +813,130 @@ int and_getnice (int uid, int gid, char *command, struct and_procent *parent, un
   while (level >= 0 && and_config.time_mark[level] > cpu_seconds) {
     --level;
   }
-  and_printf(2,"command=%s (%i,%i,%s) hit on entry=%i, exactness=%i, level=%i.\n",
+  and_printf(2,"command=%s (%i,%i,%s) hit on entry=%i, exactness=%i, level=%i, cs=%i.\n",
              command, uid, gid, (parent!=NULL?parent->command:"(orphan)"), 
-             entry, exact, level);
+             entry, exact, level, cpu_seconds);
+  *cmd_out = (level >= 0) ? and_config.cmd[level] : NULL;
   return (level >= 0 ? and_db.entry[entry].nl[level] : 0);
 }
 
 
+int and_exec (char *cmd, struct and_procent *current, int newnice)
+{
+  int rc, retval = -1;
+
+  assert(cmd != NULL);
+  assert(strlen(cmd) > 0);
+  assert(newnice != 0);
+  
+  switch (rc = fork()) {
+  case 0:
+    /* child */
+    if (getuid() == 0) {
+      char *user_name = "(unknown)", *group_name = "(unknown)";
+      int exit_value = EXIT_SUCCESS;
+      struct passwd *pw;
+      struct group *grp;
+      char buffer[2048];
+      FILE *file;
+
+      setenv("HOME", "", 1);
+      setenv("USER", and_config.cmd_user, 1);
+      
+      if ((pw = getpwnam(and_config.cmd_user)) == NULL) {
+	syslog(LOG_ERR,"unknown command user: %s",and_config.cmd_user);
+	exit(1);
+      }
+
+      if (setgid(pw->pw_gid) ||
+	  initgroups(and_config.cmd_user, pw->pw_gid) ||
+	  setuid(pw->pw_uid)) {
+	syslog(LOG_ERR,"unable to drop privileges: %s",strerror(errno));
+	exit(1);
+      }
+
+      setenv("AND_HOST", and_config.hostname, 1);
+      
+      if ((pw = getpwuid(current->uid)) != NULL)
+	user_name = pw->pw_name;
+      if ((grp = getgrgid(current->gid)) != NULL)
+	group_name = grp->gr_name;
+      
+      sprintf(buffer, "%d", current->pid);
+      setenv("AND_PID", buffer, 1);
+      sprintf(buffer, "%d", current->ppid);
+      setenv("AND_PPID", buffer, 1);
+      setenv("AND_USER", user_name, 1);
+      setenv("AND_GROUP", group_name, 1);
+      setenv("AND_COMMAND", current->command, 1);
+      
+      if ((file = popen(cmd, "w")) != NULL) {
+	char time_string[256];
+	time_t current_time;
+
+	current_time = time(NULL);
+	strftime(time_string, sizeof(time_string),
+		 "%H:%M",
+		 localtime(&current_time));
+
+	and_snprintf(buffer, sizeof(buffer),
+		     "\n"
+		     "[This is an automated message from the auto nice daemon (AND)]\n"
+		     "\n");
+	buffer[sizeof(buffer) - 1] = '\0';
+	fwrite(buffer, 1, strlen(buffer), file);
+	and_snprintf(buffer, sizeof(buffer),
+		     "A CPU hog has been detected at %s hours on %s:\n"
+		     "\n"
+		     "  pid\t\t%d\n"
+		     "  ppid\t\t%d\n"
+		     "  uid\t\t%d %s\n"
+		     "  gid\t\t%d %s\n"
+		     "  old nice\t%d\n"
+		     "  CPU seconds\t%d\n"
+		     "  command\t%s\n"
+		     "  start time\t%s"
+		     "\n"
+		     "Action taken:\n"
+		     "  %s %d\n",
+		     time_string,
+		     and_config.hostname,
+		     current->pid,
+		     current->ppid,
+		     current->uid, user_name,
+		     current->gid, group_name,
+		     current->nice,
+		     current->utime,
+		     current->command,
+		     ctime(&current->stime),
+		     (newnice > 0) ? "Changed nice to" : "Signalled process with",
+		     (newnice > 0) ? newnice : -newnice);
+	buffer[sizeof(buffer) - 1] = '\0';
+	fwrite(buffer, 1, strlen(buffer), file);
+	if (pclose(file) == -1) {
+	  syslog(LOG_ERR,"error while executing: %s",cmd);
+
+	  exit_value = EXIT_FAILURE;
+	}
+      }
+      else {
+	exit_value = EXIT_FAILURE;
+      }
+      
+      exit(exit_value);
+    }
+    break;
+  case -1:
+    break;
+  default:
+    /* parent */
+    retval = 0;
+    break;
+  }
+  
+  return (retval);
+}
+
 
 /**********************************************************************
 
@@ -812,10 +973,14 @@ struct and_procent* and_find_proc (struct and_procent *head, int ppid)
 void and_loop ()
 {
   struct and_procent *head, *current, *new, *proc;
+  int childstatus;
   int newnice;
   int njobs = 0;
   assert(and_getfirst != NULL);
   assert(and_getnext != NULL);
+  while (wait3(&childstatus,WNOHANG,NULL) > 0) {
+    // reaped child...
+  }
   head = NULL;
   current = NULL;
   proc = and_getfirst();
@@ -843,9 +1008,10 @@ void and_loop ()
   }
   current = head;
   while (current != NULL) {
+    char *cmd;
     njobs++;
     newnice = and_getnice(current->uid,current->gid,current->command,
-                          current->parent,current->utime);
+                          current->parent,current->utime,&cmd);
     if (current->uid != 0) {
       if (newnice) {
 	if (newnice > 0) {
@@ -858,6 +1024,9 @@ void and_loop ()
 			 current->command);
 	      setpriority(PRIO_PROCESS,current->pid,newnice);
 	    }
+	    if ((cmd != NULL) && (strlen(cmd) > 0)) {
+		and_exec(cmd, current, newnice);
+	    }
 	  }
 	} else {
 	  if (and_config.test)
@@ -868,6 +1037,9 @@ void and_loop ()
                        current->command);
 	    kill(current->pid,-newnice);
 	  }
+	  if ((cmd != NULL) && (strlen(cmd) > 0)) {
+	      and_exec(cmd, current, newnice);
+	  }
 	}
       }
     }
@@ -945,6 +1117,7 @@ void and_getopt (int argc, char** argv)
 
 
 static int g_reload_conf;
+static bool g_looping = true;
 
 
 void and_trigger_readconf (int sig)
@@ -952,6 +1125,10 @@ void and_trigger_readconf (int sig)
   g_reload_conf = (sig == SIGHUP);
 }
 
+void and_trigger_stoplooping (int sig)
+{
+  g_looping = false;
+}
 
 void and_readconf ()
 {
@@ -961,15 +1138,17 @@ void and_readconf ()
   g_reload_conf = 0;
 }
 
-
 void and_worker ()
 {
   read_config();
   read_priorities();
+  signal(SIGTERM,and_trigger_stoplooping);
+  signal(SIGINT,and_trigger_stoplooping);
+  signal(SIGQUIT,and_trigger_stoplooping);
   signal(SIGHUP,and_trigger_readconf);
   and_printf(0,"AND ready.\n");
   g_reload_conf = 0;
-  while (1) {
+  while (g_looping) {
     if (g_reload_conf) {
       and_readconf();
     }
@@ -986,7 +1165,24 @@ int and_main (int argc, char** argv)
   if (and_config.test) {
     and_worker();
   } else {
-    if (fork() == 0) and_worker();
+    if (daemon(0,0) < 0) {
+      perror("Unable to daemonize");
+    }
+    else {
+      char pidbuf[32];
+      int pfd;
+      
+      if ((pfd = open(AND_PIDFILE, O_EXCL | O_CREAT | O_WRONLY)) < 0) {
+	and_printf(0,"Could not create pid file: %s\n",AND_PIDFILE);
+	exit(1);
+      }
+      fchmod(pfd, S_IRUSR | S_IRGRP | S_IROTH);
+      sprintf(pidbuf,"%d",getpid());
+      write(pfd,pidbuf,strlen(pidbuf));
+      close(pfd);
+      and_worker();
+      unlink(AND_PIDFILE);
+    }
   }
   return 0;
 }
diff --git a/sensors/and/and.conf.5.man b/sensors/and/and.conf.5.man
index ac57573488..bbd6bcc007 100644
--- a/sensors/and/and.conf.5.man
+++ b/sensors/and/and.conf.5.man
@@ -90,6 +90,56 @@ Defaults are
 .I 3600
 seconds.
 
+.TP 0.5i
+.B lv1cmd, lv2cmd, lv3cmd
+Commands for the nice levels.  Jobs that reach a new level, and have their
+priorities changed, will cause the corresponding command to be executed.  The
+commands are executed using
+.B popen(3)
+and an informational message describing the offending job is piped in.  In
+addition, the following environment variables will be set, so you can reference
+them in the given command-line:
+
+.RS
+.TP 0.5i
+.B AND_HOST
+The host where
+.I and
+is running.
+
+.TP 0.5i
+.B AND_PID
+The process ID of the offending job.
+
+.TP 0.5i
+.B AND_PPID
+The parent process ID of the offending job.
+
+.TP 0.5i
+.B AND_USER
+The user name the offending job is running with.
+
+.TP 0.5i
+.B AND_GROUP
+The group name the offending job is running with.
+
+.TP 0.5i
+.B AND_COMMAND
+The command name of the offending job.
+.RE
+
+.RS
+Normally, the command is executed as "nobody", or the user specified by the
+.I cmduser
+setting.  While this environment is limiting, it is enough to send mail or
+perform some other simple communication.
+.RE
+
+.TP 0.5i
+.B cmduser
+The user name to use when running level commands.  The default value for
+this is "nobody", which should be sufficient for most uses.
+
 .TP 0.5i
 .B minuid, mingid
 Minimum user id and group id to be considered for renicing. 
@@ -161,6 +211,10 @@ lv2time 1800
 .br
 lv3time 3600
 .br
+# Get someone involved if they reach level 3
+.br
+lv3cmd mail -s "CPU Hog - $AND_COMMAND" bofh@foobar.com
+.br
 # Hosts foo, bar, baz are terminals and must
 .br
 # be more responsive, so earlier renice.
diff --git a/sensors/and/and.h b/sensors/and/and.h
index 7b1108f462..c6b0298b35 100644
--- a/sensors/and/and.h
+++ b/sensors/and/and.h
@@ -46,10 +46,12 @@ struct and_procent {
   int uid;
   int gid;
   int nice;
-  unsigned utime;
+  time_t stime;			// process start time in seconds.
+  unsigned utime;		// CPU time in seconds.
+  unsigned ctime;		// reaped child CPU time in seconds.
   char command [1024];
   /* to be filled by and.c: */
-  struct and_procent *parent; 
+  struct and_procent *parent;
   struct and_procent *next;
 };
 
diff --git a/sensors/and/and.startup b/sensors/and/and.startup
index ea4d968b51..ba77aaddaa 100644
--- a/sensors/and/and.startup
+++ b/sensors/and/and.startup
@@ -15,14 +15,15 @@
 #
 
 AND_FLAGS=""
-test -r /etc/rc.config && . /etc/rc.config
+
+# test -r /etc/rc.config && . /etc/rc.config
 
 case "$1" in
     start)
 	echo -n "Starting auto nice daemon:"
-	INSTALL_SBIN/and $AND_FLAGS >&/dev/null
-	ps axuw | grep -v grep | grep INSTALL_SBIN/and >/dev/null
-	if [ $? = 0 ]; then
+	INSTALL_SBIN/and $AND_FLAGS > /dev/null 2>&1
+	ps axuw | grep -v grep | grep INSTALL_SBIN/and > /dev/null
+	if test $? -eq 0; then
 	    echo " done"
 	    exit 0
 	else
@@ -32,8 +33,23 @@ case "$1" in
 	;;
     stop)
 	echo -n "Shutting down auto nice daemon:"
-        uname | grep OSF1 >/dev/null || killall INSTALL_SBIN/and
-	echo " done"
+	ps axuw | grep -v grep | grep INSTALL_SBIN/and > /dev/null
+	if test $? -eq 0; then
+	    kill `cat /var/run/and.pid`
+	    echo " done"
+	else
+	    echo " already stopped"
+	fi
+	exit 0
+	;;
+    reconfig)
+	echo -n "Reconfiguring auto nice daemon:"
+	if test -r "/var/run/and.pid"; then
+	    kill -HUP `cat /var/run/and.pid`
+	    echo " done"
+	else
+	    echo " no process"
+	fi
 	exit 0
 	;;
     restart)
@@ -42,8 +58,8 @@ case "$1" in
 	;;
     status)
 	echo -n "Checking for auto nice daemon: "
-	ps axuw | grep -v grep | grep INSTALL_SBIN/and >/dev/null
-	if [ $? = 0 ]; then
+	ps axuw | grep -v grep | grep INSTALL_SBIN/and > /dev/null
+	if test $? -eq 0; then
 	    echo "running"
 	    exit 0
 	else
@@ -52,6 +68,6 @@ case "$1" in
 	fi
 	;;
     *)
-	echo "Usage: $0 {start|stop|status|restart}"
+	echo "Usage: $0 {start|stop|status|reconfig|restart}"
 	exit 1
 esac
-- 
GitLab