run_linktest.pl.in 9.65 KB
Newer Older
1
2
3
#!/usr/bin/perl -wT
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2000-2006 University of Utah and the Flux Group.
5
# All rights reserved.
6
7
8
#
use strict;
use Getopt::Std;
9
use English;
10
11
use POSIX;

12

13
14
#
# Wrapper for running the linktest daemon. This script is currently
15
# setup so it can run on either ops or from an experimental node.
16
17
18
19
#
sub usage()
{
    print "Usage: run_linktest.pl ".
Timothy Stack's avatar
   
Timothy Stack committed
20
	  "[-q] [-d level] [-t timeout] [-v] [-s server] [-p port] [-k keyfile] [-l level] [-o logfile] -e pid/eid\n".
21
22
23
24
	  "Use -q for quick termination mode, which skips the Bandwidth test\n".
          "Use -v for verbose feedback messages\n" .
	  "Use -t <time> to set a timeout in seconds\n";
	     
25
    exit(1);
26
}
Kevin Atkinson's avatar
   
Kevin Atkinson committed
27
my $optlist = "vqd:s:p:k:e:L:l:o:t:";
28
my $debug   = 0;
29
30
my $verbose = 0;
my $timeout = 0;
31
32
33
34
35
my $server;
my $keyfile;
my $port;
my $pid;
my $eid;
36
my $logfile;
37
my $child_pid;
38
39
my $startAt = 1; # default start level
my $stopAt = 4 ; # default stop level
40
41
42
43
44

# Local goo
my $TB          = "@prefix@";
my $TMCC	= "@CLIENT_BINDIR@/tmcc";
my $LTEVENT     = "@CLIENT_BINDIR@/ltevent";
45
my $LOGHOLE     = "$TB/bin/loghole";
46
my $LTEVENTOPS  = "$TB/libexec/ltevent";
Timothy Stack's avatar
   
Timothy Stack committed
47
48
my $STOPEVENT   = "STOP"; # XXX Left in here for backwards compat.
my $COMPLETEEVENT = "COMPLETE";
49
50
my $KILLEVENT   = "KILL";
my $REPORTEVENT   = "REPORT";
51

52
53
54
55
56
57
58
59
#
# This script should be run as a real person!
#
if (! $EUID) {
    die("*** $0:\n".
	"    This script should not be run as root!\n");
}

60
61
62
63
64
65
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

$| = 1; #Turn off line buffering on output

66
67
68
69
70
#
# Make sure log files get created so project members can delete them!
#
umask(0002);

71
72
73
74
75
76
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
Timothy Stack's avatar
   
Timothy Stack committed
77
    print "error: cannot parse options\n";
78
79
80
    usage();
}
if (@ARGV) {
Timothy Stack's avatar
   
Timothy Stack committed
81
    print "error: extra arguments\n";
82
83
84
85
86
87
88
89
90
91
92
93
    usage();
}
if (defined($options{"d"})) {
    $debug = $options{"d"};
    if ($debug =~ /^([\w]+)$/) {
	$debug = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in debug: $debug\n");
    }
}
94
95
96
97
98
99
100
101
102
103
104
105
106
if (defined($options{"v"})) {
    $verbose = 1;
}
if (defined($options{"t"})) {
    $timeout = $options{"t"};
    if ($timeout =~ /^([\w]+)$/) {
	$timeout = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in timeout: $timeout\n");
    }
}
Kevin Atkinson's avatar
   
Kevin Atkinson committed
107
108
109
110
111
112
113
114
115
116
if (defined($options{"L"})) {
    $startAt = $options{"L"};
    if ($startAt =~ /^(\d)$/) {
	$startAt = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in start level: $startAt\n");
    }
}
117
118
119
120
121
122
123
124
125
126
127
if (defined($options{"l"})) {
    $stopAt = $options{"l"};
    if ($stopAt =~ /^(\d)$/) {
	$stopAt = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in level: $stopAt\n");
    }
}

128
if (defined($options{"q"})) {
129
130
131
132
    # ignore if via -l they are already in quick mode.
    if($stopAt > 3) { 
	$stopAt = 3; 
    }
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
}
if (defined($options{"s"})) {
    $server = $options{"s"};
    if ($server =~ /^([-\w\.]+)$/) {
	$server = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in server: $server\n");
    }
}
if (defined($options{"k"})) {
    $keyfile = $options{"k"};
    if ($keyfile =~ /^([-\w\.\/]+)$/) {
	$keyfile = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in keyfile: $keyfile\n");
    }
}
154
155
156
157
158
159
160
161
162
163
if (defined($options{"o"})) {
    $logfile = $options{"o"};
    if ($logfile =~ /^([-\w\.\/]+)$/) {
	$logfile = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in logfile: $logfile\n");
    }
}
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
if (defined($options{"p"})) {
    $port = $options{"p"};
    if ($port =~ /^(\d+)$/) {
	$port = $1;
    }
    else {
	die("*** $0:\n".
	    "    Bad data in port: $port\n");
    }
}
if (defined($options{"e"})) {
    ($pid,$eid) = split(/\//, $options{"e"});
}
else {
    usage();
}

#
# Untaint args.
#
if ($pid =~ /^([-\w]+)$/) {
    $pid = $1;
}
else {
    die("*** $0:\n".
	"    Bad data in pid: $pid\n");
}
if ($eid =~ /^([-\@\w]+)$/) {
    $eid = $1;
}
else {
    die("*** $0:\n".
	"    Bad data in eid: $eid\n");
}

199

200
#
201
# Default to the standard event server.
202
203
#
if (!defined($server)) {
204
    $server = "@EVENTSERVER@";
205
}
206
207

#
208
# These days, must use a keyfile!
209
#
210
211
212
if (!defined($keyfile)) {
    $keyfile = "/proj/$pid/exp/$eid/tbdata/eventkey";
}
213
214
215
216
217

my $linktest_path; # path to linktest data.
$linktest_path = "/proj/" . $pid . "/exp/" . $eid . "/tbdata/linktest";

# send the startup event.
218
my $args = starter();
219
220
# event arguments
$args .=  " -x START";
221
$args .= " STARTAT=$startAt STOPAT=$stopAt";
222
$args .= " DEBUG=$debug"
223
    if ($debug);
224
225
226
227
228

system($args);
if ($?) {
    die("*** $0:\n".
	"    Error running '$args'\n");
229
230
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
231
print "Starting linktest at " . &TBTimeStamp() . "\n";
232
print "Quick termination requested.\n"
233
    if (defined($options{"q"}));
234
print "Debug mode requested.\n"
235
    if ($debug);
236

237
238
239
240
241
242
sub handler($)
{
    my ($signame) = @_;

    $SIG{INT}  = 'IGNORE';
    $SIG{TERM} = 'IGNORE';
243
    $SIG{HUP}  = 'IGNORE';
244
245
246
247
248
249
250
251
252
253
254

    sleep(2);
    &kill_linktest_run;

    if (defined($child_pid)) {
	kill('TERM', $child_pid);
	waitpid($child_pid, 0);
	undef($child_pid);
    }

    if ($signame eq 'ALRM') {
255
	print "*** Linktest timer has expired, aborting the run.\n";
256
	&analyze(1);
257
258
    }
    else {
259
	print "*** Linktest has been aborted\n";
260
	&run_loghole;
261
    }
262
    exit(1);
263
264
}

265
266
267
#
# Now that linktest has started, wait for events to be reported
# by ltevent. It will print out the event followed by args,
Timothy Stack's avatar
   
Timothy Stack committed
268
# which are informational. The events sent are KILL, STOP, COMPLETE and REPORT.
269
#
270
$args = starter();
271
$args .= " -w";
272
if (($child_pid = fork())) {
273
    my $exitval;
274

275
276
277
278
    #
    # Install signal handlers to wait for a kill or a timeout.
    # If the process is killed, kill Linktest!
    #
279
280
    $SIG{INT}  = \&handler;
    $SIG{TERM} = \&handler;
281
    $SIG{HUP}  = \&handler;
282
283
284
285
    
    #
    # Set timeout behavior if requested.
    #
286
287
    if ($timeout) {
	$SIG{ALRM} = \&handler;
288
289
	alarm($timeout);
    }
290
    waitpid($child_pid, 0);
291
    $exitval = $?;
292
    alarm 0;
293
    if ($exitval) {
294
295
	&run_loghole;
	
296
297
	exit($exitval >> 8);
    }
298
    exit(&analyze(0));
299
300
301
}
else {
    my $ltpid;
302
    my $exitval = 0;
303
304
305
306
307

    #
    # Open child process to read in the output from ltevent,
    # and just print out the return values for feedback.
    #
308
309
310
311
312
313
314
    $SIG{TERM} = sub {
	if (defined($ltpid)) {
	    kill('TERM', $ltpid);
	    waitpid($ltpid, 0);
	    exit(0);
	}
    };
Timothy Stack's avatar
   
Timothy Stack committed
315

316
    $ltpid = open(LTC, "$args |");
317
318
319
320
321
    if (! $ltpid) {
	die("*** $0:\n".
	    "    Error running '$args'\n");
    }
    while(<LTC>) {
322
	chomp;
Timothy Stack's avatar
   
Timothy Stack committed
323
	if(/(\w+)\s?(.*)/) {
324
325
	    my $eventtype = $1;
	    my $eventargs = $2;
Timothy Stack's avatar
   
Timothy Stack committed
326
327
	    if (($eventtype eq $STOPEVENT) ||
		($eventtype eq $COMPLETEEVENT)) {
Leigh B. Stoller's avatar
Leigh B. Stoller committed
328
		print "Linktest completed at " . &TBTimeStamp() . "\n"
329
		    if($verbose);
330
331
332
333
		last;
	    }
	    elsif ($eventtype eq $KILLEVENT) {
		print("Linktest has been cancelled due to a timeout ".
334
335
		      "or unrecoverable error.\n");
		$exitval = 1;
336
		last;
337
338
339
340
	    } else {
		#
		# Print out report messages if in verbose mode.
		#
Timothy Stack's avatar
   
Timothy Stack committed
341
		print $eventargs . "\n"
342
343
344
345
346
		    if ($verbose);
	    }
	} else {
	    # parse error, exit.
	    print "error parsing: " . $_ . "\n";
347
	    $exitval = -1;
348
	    last;
349
350
	}
    }
351
352
    kill('TERM', $ltpid);
    close(LTC);
353
    exit($exitval);
354
355
}

356
#
357
358
359
# Spits out the results from the Linktest path,
# with a return code that indicates whether errors were found
# by Linktest on the nodes.
360
# 
361
362
363
sub analyze($) {
    my ($timedout) = @_;    
    
364
365
    my @dir_contents;
    opendir(DIR, $linktest_path) ||
366
	die("*** $0:\n".
367
368
369
370
	    "    Cannot open $linktest_path\n");
    @dir_contents = grep(/\.fatal$|\.error$/, readdir(DIR));
    closedir(DIR);

371
372
373
    unlink($logfile)
	if (defined($logfile));

374
    return 0
375
376
377
378
	if (! (scalar(@dir_contents) || $timedout));

    &run_loghole
	if (! $timedout);
379

380
381
382
383
384
385
386
    if (!defined($logfile)) {
	print "*************************************************************";
	print "****\n";
	print "***************** Linktest Error Reports ********************";
	print "****\n\n";
    }

387
    if ($timedout && defined($logfile)) {
388
389
390
	my $msg = "Linktest timer expired, run was aborted\n".
	          "Gathering results generated before the timer expired\n".
		  "\n";
391

392
393
	system("echo '$msg' > $logfile");
    }
394

395
396
397
398
399
400
401
402
403
    foreach my $file (@dir_contents) {
	# Hmm, need to taint check the filenames. Ick.
	if ($file =~ /^([-\w\.\/]+)$/) {
	    $file = $1;
	}
	else {
	    die("*** $0:\n".
		"    Bad data in filename: $file\n");
	}
404
	if (defined($logfile)) {
405
406
407
408
409
410
411
412
413
414
415
416
	    open LOG_FILE, ">>$logfile" || 
		die "Could not open $logfile for append: $!";

	    open NODE_TRACE, "$linktest_path/$file" || 
		die "Could not open $file for read: $!";
	    while(<NODE_TRACE>) {
		print LOG_FILE $_;
	    }
	    close NODE_TRACE;
	    close LOG_FILE;
	} else {
	    system("/bin/cat $linktest_path/$file");
417
418
	}
    }
419
420
421
422
    if (!defined($logfile)) {
	print "*************************************************************";
	print "****\n";
    }
423
    return scalar(@dir_contents);
424
}
425

426
# Initial part of command string to ltevent.
427
sub starter {
428
429
    my $cmd;

430
431
    if (-x $LTEVENTOPS) {
	$cmd = $LTEVENTOPS;
432
    }
433
434
    else {
	$cmd = $LTEVENT;
435
    }
436
437
438
439
440
441
    $cmd .= " -s $server -e $pid/$eid";
    $cmd .= " -p $port"
	if (defined($port));
    $cmd .= " -k $keyfile"
	if (defined($keyfile));
    
442
443
    return $cmd
}
444
445
446
447
448
449
450
451
452
453
454

# Sub to kill off linktest on the nodes.
sub kill_linktest_run {
    my $args = starter();
    $args .= " -x $KILLEVENT";
    system($args);
    if ($?) {
	die("*** $0:\n".
	    "    Error running '$args'\n");
    }
}
Leigh B. Stoller's avatar
Leigh B. Stoller committed
455

456
457
458
459
460
461
462
sub run_loghole {
    my $ltlogs = "/proj/$pid/exp/$eid/tbdata/ltlogs";

    print "Downloading logs...\n";

    system("rm -rf $ltlogs");
    system("mkdir -p $ltlogs");
463
    system("chmod 775 $ltlogs");
464
465
466
467
    system("$LOGHOLE -e $pid/$eid sync -n -l $ltlogs -r /var/emulab/logs ".
	   "> $ltlogs/loghole.out 2>&1");
}

Leigh B. Stoller's avatar
Leigh B. Stoller committed
468
469
470
sub TBTimeStamp {
    return POSIX::strftime("%H:%M:%S", localtime());
}