Commit 159076bf authored by Leigh B. Stoller's avatar Leigh B. Stoller

Some "improvements" to linktest ...

* The linktest daemon (the one that runs on the nodes) no longer talks
  to boss directly, but instead contacts the local elvind; rc.linktest
  is changed to reflect that.

* A bunch of signal handler changes to run_linktest.pl; do not rely on
  events to stop linktest when it is running on boss; when the user
  kills a running linktest make sure all the processes are killed
  explicitly.

* New wrapper script (linktest_control) for use on boss, specifically
  when being called from the web interface. This script handles the DB
  part (getting linktest_level and linktest_pid), making sure that
  only one linktest is running at a time (on boss) and reseting the
  pid in the DB as needed. The -k option kills a running linktest, and
  is invoked from the web interface when the user wants to kill one in
  progress. This gets the pid from the DB and sends it a TERM signal,
  which sends a TERM to the run_linktest.pl script, which sends a TERM
  to the ltevent helper app.

  Note that this wrapper is also suitable for the XMLRPC interface,
  although I have not added it there yet.
parent 4cb78154
......@@ -15,6 +15,7 @@ DAEMON = linktest
LTEVENT = ltevent
SCRIPT = linktest.pl
SCRIPT_RUN = run_linktest.pl
SCRIPT_CONTROL = linktest_control
SCRIPT_TBCOMPAT = tb_compat.tcl
SCRIPT_NSTB_COMPAT = nstb_compat.tcl
......@@ -22,7 +23,7 @@ SYSTEM := $(shell uname -s)
include $(OBJDIR)/Makeconf
all: binaries $(SCRIPT) $(SCRIPT_RUN) weblinktest
all: binaries $(SCRIPT) $(SCRIPT_RUN) weblinktest linktest_control
include $(TESTBED_SRCDIR)/GNUmakerules
......@@ -81,6 +82,7 @@ install:
$(INSTALL_PROGRAM) $(LOCAL_BINDIR)/$(LTEVENT) $(INSTALL_LIBEXECDIR)
$(INSTALL_PROGRAM) weblinktest $(INSTALL_LIBEXECDIR)
$(INSTALL_PROGRAM) $(SCRIPT_RUN) $(INSTALL_BINDIR)
$(INSTALL_PROGRAM) $(SCRIPT_CONTROL) $(INSTALL_SBINDIR)
$(INSTALL_DATA) $(SRCDIR)/linktest.html $(INSTALL_WWWDIR)/doc
control-install: install
......
#!/usr/bin/perl -wT
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
# All rights reserved.
#
use English;
use Getopt::Std;
use POSIX;
#
# Run the linktest code from experiment swapin; this script serves as
# a wrapper for run_linktest.pl. We store the pid in the DB, and allow
# for linktest cancelation from the webpage. This script is only run on
# boss; otherwise the user is running run_linktest.pl directly on ops or
# on a node, and can terminate linktest directly (via ^C or SIGTERM).
#
sub usage()
{
print("Usage: linktest_control [-d] [-k | -l <level>] [-o <file>] ".
"<pid> <eid>\n".
"-l - Run linktest at a specific level; defaults to DB value.\n".
"-k - Kill a currently running linktest.\n".
"-o - Specify output file for linktest results.\n".
"-d - Turn on debugging output.\n");
exit(-1);
}
my $optlist = "dkl:o:";
my $debug = 0;
my $cancel = 0;
my $level;
my $output;
my $child_pid; # Child run_linktest process.
#
# Configure variables
#
my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
#
# Turn off line buffering on output
#
$| = 1;
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
usage();
}
if (defined($options{"d"})) {
$debug = 1;
}
if (defined($options{"k"})) {
$cancel = 1;
}
if (defined($options{"l"})) {
$level = $options{"l"};
if ($level =~ /^(\d*)$/) {
$level = $1;
}
else {
die("Bad data in level: $level.");
}
}
if (defined($options{"o"})) {
$output = $options{"o"};
# Note different taint check (allow /).
if ($output =~ /^([-\w\.\/]+)$/) {
$output = $1;
}
else {
die("Bad data in output file: $output\n");
}
}
if (@ARGV != 2) {
usage();
}
my $pid = $ARGV[0];
my $eid = $ARGV[1];
#
# Untaint args.
#
if ($pid =~ /^([-\@\w]+)$/) {
$pid = $1;
}
else {
die("Bad data in pid: $pid.");
}
if ($eid =~ /^([-\@\w]+)$/) {
$eid = $1;
}
else {
die("Bad data in eid: $eid.");
}
#
# Lets see if there is a linktest running already.
#
my $query_result =
DBQueryFatal("select linktest_level,linktest_pid ".
"from experiments where eid='$eid' and pid='$pid'");
if (! $query_result->numrows) {
die("*** $0:\n".
" No such experiment $pid/$eid exists!\n");
}
my ($linktest_level, $linktest_pid) = $query_result->fetchrow_array();
if (defined($linktest_pid) && $linktest_pid) {
if (! kill(0, $linktest_pid) && ($ERRNO == ESRCH)) {
DBQueryFatal("update experiments set linktest_pid=0 ".
"where pid='$pid' and eid='$eid'");
if ($cancel) {
print("Linktest has already exited on experiment $pid/$eid!\n");
exit(0);
}
else {
print("Clearing stale linktest pid from DB for $pid/$eid!\n");
}
}
elsif (!$cancel) {
die("*** $0:\n".
" Linktest is already running on experiment $pid/$eid!\n");
}
else {
if (! kill('TERM', $linktest_pid)) {
SENDMAIL($TBOPS,
"Failed to stop linktest daemon for $pid/$eid",
"Could not kill(TERM) process $linktest_pid: $? $!");
die("*** $0:\n".
" Failed to stop linktest daemon for $pid/$eid!\n");
}
exit(0);
}
}
elsif ($cancel) {
die("*** $0:\n".
" Linktest is not running on experiment $pid/$eid!\n")
}
#
# Okay, lets run linktest. First set up a handler so that we can catch
# a termination signal and kill ourselves off.
#
sub cleanup()
{
DBQueryFatal("update experiments set linktest_pid=0 ".
"where pid='$pid' and eid='$eid'");
}
sub handler($)
{
$SIG{TERM} = 'IGNORE';
$SIG{INT} = 'IGNORE';
if (defined($child_pid)) {
kill('TERM', $child_pid);
waitpid($child_pid, 0);
undef($child_pid);
}
cleanup();
die("*** $0:\n".
" Linktest has been canceled on experiment $pid/$eid!\n")
}
#
# Fork a child to run the actual linktest script. The parent just waits
# for child to exit, or to be signaled to terminate the child.
#
if (($child_pid = fork()) < 0) {
die("*** $0:\n".
" Linktest could not fork a new process for $pid/$eid!\n")
}
if ($child_pid) {
#
# Parent.
#
$SIG{TERM} = \&handler;
$SIG{INT} = \&handler;
DBQueryFatal("update experiments set linktest_pid=$$ ".
"where pid='$pid' and eid='$eid'");
waitpid($child_pid, 0);
my $exitval = $? >> 8;
cleanup();
exit($exitval);
}
#
# Child execs run_linktest.
#
exec "$TB/bin/run_linktest.pl",
("-d", $debug, "-v"),
(defined($output) ? ("-o", $output) : ()),
("-l", (defined($level) ? $level : $linktest_level), "-e", "$pid/$eid");
die("*** $0:\n".
" Could not exec run_linktest.pl\n");
......@@ -36,6 +36,7 @@ my $port;
my $pid;
my $eid;
my $logfile;
my $child_pid;
my $startAt = 1; # default start level
my $stopAt = 4 ; # default stop level
......@@ -231,6 +232,33 @@ print "Quick termination requested.\n"
print "Debug mode requested.\n"
if ($debug);
sub handler($)
{
my ($signame) = @_;
$SIG{INT} = 'IGNORE';
$SIG{TERM} = 'IGNORE';
sleep(2);
&kill_linktest_run;
if (defined($child_pid)) {
kill('TERM', $child_pid);
waitpid($child_pid, 0);
undef($child_pid);
}
if ($signame eq 'ALRM') {
print "Linktest timer has expired, aborting the run.\n"
if ($verbose);
}
else {
print "Linktest KILL event has been sent, aborting the run.\n"
if ($verbose);
}
exit(&analyze);
}
#
# Now that linktest has started, wait for events to be reported
# by ltevent. It will print out the event followed by args,
......@@ -238,45 +266,42 @@ print "Debug mode requested.\n"
#
$args = starter();
$args .= " -w";
if(my $pid =fork) {
if (($child_pid = fork())) {
#
# Install signal handlers to wait for a kill or a timeout.
# If the process is killed, kill Linktest!
#
$SIG{INT} = sub {
&kill_linktest_run;
print "Linktest KILL event has been sent, aborting the run.\n"
if ($verbose);
exit(&analyze);
};
$SIG{INT} = \&handler;
$SIG{TERM} = \&handler;
#
# Set timeout behavior if requested.
#
if($timeout) {
$SIG{ALRM} = sub {
&kill_linktest_run;
print "Timeout expired.\n"
if ($verbose);
exit(&analyze);
};
if ($timeout) {
$SIG{ALRM} = \&handler;
alarm($timeout);
}
waitpid($pid,0);
waitpid($child_pid, 0);
alarm 0;
exit(&analyze());
}
else {
my $ltpid;
} else {
#
# Open child process to read in the output from ltevent,
# and just print out the return values for feedback.
#
my $ltpid = open(LTC, "$args |");
$SIG{TERM} = sub {
if (defined($ltpid)) {
kill('TERM', $ltpid);
waitpid($ltpid, 0);
exit(0);
}
};
$ltpid = open(LTC, "$args |");
if (! $ltpid) {
die("*** $0:\n".
" Error running '$args'\n");
......
......@@ -20,7 +20,7 @@ my $TB = "@prefix@";
#
# Run the real thing, and never return.
#
exec "$TB/bin/run_linktest.pl", @ARGV;
exec "$TB/sbin/linktest_control", @ARGV;
die("*** $0:\n".
" Could not exec program: $!");
......@@ -59,7 +59,6 @@ if ($action eq "stop") {
#
# Need the keyfile. We talk to the elvind on boss, not locally.
#
my $boss = tmccbossname();
my $keyfile = TMEVENTKEY();
#
......@@ -70,5 +69,6 @@ my ($pid, $eid, $vname) = check_nickname();
#
# Start the daemon.
#
system("linktest -e $pid/$eid -s $boss -l $logfile -i $pidfile -k $keyfile");
system("linktest -e $pid/$eid ".
"-s localhost -l $logfile -i $pidfile -k $keyfile");
exit($? >> 0);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment