linktest_control.in 8.04 KB
Newer Older
1 2 3
#!/usr/bin/perl -wT
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2011 University of Utah and the Flux Group.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# All rights reserved.
#
use English;
use Getopt::Std;
use POSIX;

#
# Run the linktest code from experiment swapin; this script serves as
# a wrapper for run_linktest.pl. We store the pid in the DB, and allow
# for linktest cancelation from the webpage. This script is only run on
# boss; otherwise the user is running run_linktest.pl directly on ops or
# on a node, and can terminate linktest directly (via ^C or SIGTERM).
#
sub usage()
{
20 21
    print("Usage: linktest_control [-d] [-t timeout] [-k | -l <level>] ".
	  "[-o <file>] <pid> <eid>\n".
22 23
	  "-l   - Run linktest at a specific level; defaults to DB value.\n".
	  "-k   - Kill a currently running linktest.\n".
24
	  "-t   - Specify timeout in seconds.\n".
25
	  "-o   - Specify output file for linktest results.\n".
Leigh B. Stoller's avatar
Leigh B. Stoller committed
26
	  "-m   - Send email to swapper if linktest fails.\n".
27
	  "-r   - Report results only, don't flag errors.\n".
28 29 30
	  "-d   - Turn on debugging output.\n");
    exit(-1);
}
31
my $optlist  = "dkl:o:t:mfr";
32
my $debug    = 2;
33
my $cancel   = 0;
Leigh B. Stoller's avatar
Leigh B. Stoller committed
34
my $sendmail = 0;
35
my $forcerun = 0;
36
my $reportonly = 0;
37
my $timeout;
38 39 40 41 42 43 44 45 46
my $level;
my $output;
my $child_pid;			# Child run_linktest process.

#
# Configure variables
#
my $TB		= "@prefix@";
my $TBOPS       = "@TBOPSEMAIL@";
47
my $CONTROL	= "@USERNODE@";
Leigh B. Stoller's avatar
Leigh B. Stoller committed
48
my $TBBASE	= "@TBBASE@";
49
my $SSH		= "$TB/bin/sshtb";
50

51 52 53 54
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

55 56 57 58 59 60
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
61 62
use User;
use Experiment;
63 64 65 66 67 68

#
# Turn off line buffering on output
#
$| = 1; 

69 70 71 72 73
if ($EUID != 0) {
    die("*** $0:\n".
	"    Must be root! Maybe its a development version?\n");
}

74 75 76 77 78 79 80 81 82
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"d"})) {
83
    $debug = 2;
84
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
85 86 87
if (defined($options{"m"})) {
    $sendmail = 1;
}
88 89 90
if (defined($options{"r"})) {
    $reportonly = 1;
}
91 92 93
if (defined($options{"f"})) {
    $forcerun = 1;
}
94 95 96
if (defined($options{"k"})) {
    $cancel = 1;
}
97 98 99 100 101 102 103 104 105 106
if (defined($options{"t"})) {
    $timeout = $options{"t"};

    if ($timeout =~ /^(\d*)$/) {
	$timeout = $1;
    }
    else {
	die("Bad data in timeout: $timeout");
    }
}
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
if (defined($options{"l"})) {
    $level = $options{"l"};

    if ($level =~ /^(\d*)$/) {
	$level = $1;
    }
    else {
	die("Bad data in level: $level.");
    }
}
if (defined($options{"o"})) {
    $output = $options{"o"};

    # Note different taint check (allow /).
    if ($output =~ /^([-\w\.\/]+)$/) {
	$output = $1;
    }
    else {
	die("Bad data in output file: $output\n");
    }
}
if (@ARGV != 2) {
    usage();
}
131 132 133 134
# Slowly convert to using Experiment module.
my $experiment = Experiment->Lookup($ARGV[0], $ARGV[1]);
if (!defined($experiment)) {
    tbdie("Could not lookup experiment object!")
135
}
136 137
my $pid = $experiment->pid();
my $eid = $experiment->eid();
138

139 140 141
#
# Check state. Only in the active state
#
142 143 144
if ($experiment->state() ne EXPTSTATE_ACTIVE &&
    $experiment->state() ne EXPTSTATE_ACTIVATING &&
    $experiment->state() ne EXPTSTATE_MODIFY_RESWAP) {
145
    die("*** $0:\n".
Timothy Stack's avatar
 
Timothy Stack committed
146
	"    Experiment $pid/$eid must be active!\n");
147 148
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
149
#
150
# Verify user and get his DB uid and other info for later.
Leigh B. Stoller's avatar
Leigh B. Stoller committed
151
#
152 153 154
my $this_user = User->ThisUser();
if (! defined($this_user)) {
    tbdie("You ($UID) do not exist!");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
155
}
156 157 158
my $dbuid      = $this_user->uid();
my $user_name  = $this_user->name();
my $user_email = $this_user->email();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
159

160 161 162 163
#
# Check permission. Only people with permission to destroy the experiment
# can do this.
#
164
if (! $experiment->AccessCheck($this_user, TB_EXPT_DESTROY)) {
165 166 167 168 169
    die("*** $0:\n".
	"    You do not have permission to start/stop linktest for ".
	"$pid/$eid!\n");
}

170 171 172
# Need the unix_gidname info to pass to ops.
my $unix_gidname = $experiment->GetGroup()->unix_name();
my $errlog       = $experiment->UserDir() . "/logs/linktest.log";
173
my $project      = $experiment->GetProject();
Mike Hibler's avatar
Mike Hibler committed
174
my $unix_pidname = $project->unix_name();
175

176 177 178
#
# Lets see if there is a linktest running already.
#
179
my $linktest_pid = $experiment->linktest_pid();
180 181
if (defined($linktest_pid) && $linktest_pid) {
    if (! kill(0, $linktest_pid) && ($ERRNO == ESRCH)) {
182
	$experiment->Update({'linktest_pid' => 0});
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
	if ($cancel) {
	    print("Linktest has already exited on experiment $pid/$eid!\n");
	    exit(0);
	}
	else {
	    print("Clearing stale linktest pid from DB for $pid/$eid!\n");
	}
    }
    elsif (!$cancel) {
	die("*** $0:\n".
	    "    Linktest is already running on experiment $pid/$eid!\n");
    }
    else {
	if (! kill('TERM', $linktest_pid)) {
	    SENDMAIL($TBOPS,
		     "Failed to stop linktest daemon for $pid/$eid",
		     "Could not kill(TERM) process $linktest_pid: $? $!");

	    die("*** $0:\n".
		"    Failed to stop linktest daemon for $pid/$eid!\n");
	}
	exit(0);
    }
}
elsif ($cancel) {
    die("*** $0:\n".
	"    Linktest is not running on experiment $pid/$eid!\n")
}

212
my @hosed = ();
213
$experiment->LinkTestCapable(\@hosed);
214 215 216 217 218 219 220 221 222
if (@hosed > 0) {
    print STDERR
	"*** Nodes running an OSID that does not support linktest:\n";
    print STDERR
	"***   ", join(' ', @hosed), "\n";
    if (!$forcerun) {
	die("*** $0:\n".
	    "    Aborting linktest since not all nodes support it!\n");
    }
223 224
}

225 226 227 228 229 230
#
# Okay, lets run linktest. First set up a handler so that we can catch
# a termination signal and kill ourselves off. 
#
sub cleanup()
{
231 232
    $experiment->Update({'linktest_pid' => 0})
	if (defined($experiment));
233 234 235 236 237 238 239 240
}

sub handler($)
{
    $SIG{TERM} = 'IGNORE';
    $SIG{INT}  = 'IGNORE';

    if (defined($child_pid)) {
241
	kill('HUP', $child_pid);
242 243 244 245 246 247 248 249
	waitpid($child_pid, 0);
	undef($child_pid);
    }
    cleanup();
    die("*** $0:\n".
	"    Linktest has been canceled on experiment $pid/$eid!\n")
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
250 251
sub notify()
{
252
    my $lev = (defined($level) ? $level : $experiment->linktest_level());
253 254 255
    my $logname = undef;
    my $isopen;
    my $filespec = "";
256
    my $errbody = "";
257

258
    $experiment->GetLogFile(\$logname, \$isopen);
259 260 261
    if (defined($logname)) {
	$filespec = "\n\nfile://$logname\n";
    }
Leigh B. Stoller's avatar
Leigh B. Stoller committed
262
    
263 264 265 266
    if (-e $errlog) {
	$errbody = `/bin/cat $errlog`;
    }

267
    SENDMAIL($TBOPS,
Leigh B. Stoller's avatar
Leigh B. Stoller committed
268 269 270 271
	     "Linktest Failure: $pid/$eid",
	     "Failure in linktest (level $lev); ".
	     "returned non-zero status.\n".
	     "Activity log file at:\n\n".
272
	     "    $TBBASE/spewlogfile.php3?pid=${pid}&eid=${eid}\n".
273 274
	     "$filespec\n".
	     "\n".
275
	     $errbody,
276
	     "$user_name <$user_email>");
Leigh B. Stoller's avatar
Leigh B. Stoller committed
277 278
}

279 280 281 282
#
# Fork a child to run the actual linktest script. The parent just waits
# for child to exit, or to be signaled to terminate the child.
#
283 284 285 286 287 288 289
if (defined($output)) {
    $child_pid = TBBackGround($output);
}
else {
    $child_pid = fork();
}
if ($child_pid < 0) {
290
    die("*** $0:\n".
291
	"    Linktest could not fork a new process for $pid/$eid!\n");
292 293 294 295 296 297 298 299 300
}

if ($child_pid) {
    #
    # Parent. 
    #
    $SIG{TERM} = \&handler;
    $SIG{INT}  = \&handler;

301
    $experiment->Update({'linktest_pid' => $$});
302 303 304
    
    waitpid($child_pid, 0);
    my $exitval = $? >> 8;
305 306 307
    if (-e $errlog) {
	system("/bin/cat $errlog");
    }
308
    cleanup();
Leigh B. Stoller's avatar
Leigh B. Stoller committed
309 310
    notify()
	if ($exitval && $sendmail);
311 312 313 314
    exit($exitval);
}

#
315 316 317
# Child execs an ssh to users.emulab.net where linktest is actually run.
# We tell ssh to allocate a tty so that we can kill it with TERM and have
# everything die off properly.
318
#
319 320
# XXX: "-t -t" to ssh is so that kill -TERM carries across to ops. 
#
321
my @cmdargs = ("$SSH", "-t", "-t", "-F", "/dev/null",
322
	       "-host", $CONTROL, "exec", "$TB/sbin/linktest.proxy");
323
push(@cmdargs, ("-d", "$debug"))
324
    if ($debug);
325
push(@cmdargs, ("-t", "$timeout"))
326
    if (defined($timeout));
327
push(@cmdargs, "-l");
328
push(@cmdargs, (defined($level) ? $level : $experiment->linktest_level()));
329
push(@cmdargs, ("-g", $unix_gidname));
330
push(@cmdargs, ("-p", $unix_pidname));
331 332
push(@cmdargs, ("-u", $dbuid));
push(@cmdargs, ("-e", "$pid/$eid"));
333
push(@cmdargs, ("-o", $errlog));
334 335
push(@cmdargs, "-r")
    if ($reportonly);
336

337
print "Running '@cmdargs'\n"
338
    if ($debug);
339

340 341
# For sshtb
$UID=0;
342
exec(@cmdargs);
343 344
die("*** $0:\n".
    "    Could not exec run_linktest.pl\n");