eventsys.proxy.in 7.02 KB
Newer Older
1
2
3
4
#!/usr/bin/perl -w

#
# EMULAB-COPYRIGHT
Timothy Stack's avatar
   
Timothy Stack committed
5
# Copyright (c) 2000-2006 University of Utah and the Flux Group.
6
7
8
9
10
11
12
13
14
# All rights reserved.
#

use English;
use Getopt::Std;
use Errno;
use POSIX ":sys_wait_h";
    
#
15
16
# A wrapper for controlling from boss the event scheduler running on ops.
# This wrapper runs on ops.
17
18
19
20
21
22
23
24
#
# The first argument option is the user to run this script as, since we
# get invoked by a root ssh from boss. 
#
#
sub usage()
{
    print "Usage: eventsys.proxy -u user -g gid -e pid/eid -k keyfile ".
25
	"-l logfile -t record_file start|stop|replay\n";
26
27
    exit(-1);
}
28
my $optlist = "u:e:k:dl:g:t:a";
29
my $debug   = 0;
30
my $runagent= 0;
31
32
33
34
35
36
my $user;
my $pid;
my $eid;
my $gid;
my $keyfile;
my $logfile;
37
my $recordfile;
38
39
40
41
42
43
44
my $action;

#
# Configure variables
#
my $TB       = "@prefix@";
my $TBOPS    = "@TBOPSEMAIL@";
45
my $CONTROL  = "@USERNODE@";
46
my $sched    = "$TB/sbin/event-sched";
47
my $agent    = "$TB/sbin/program-agent";
48
49
my $PIDDIR   = "/var/run/emulab/evsched";
my $PIDFILE;
50
my $EXPDIR;
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

#
# Turn off line buffering on output
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Only real root, cause the script has to read/write a pid file that
# cannot be accessed by the user.
#
if ($UID != 0) {
    die("*** $0:\n".
	"    Must be root to run this script!\n");
}

#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libtestbed;

78
79
80
# Protos
sub StartProgram($@);

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
%options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (@ARGV != 1) {
    usage();
}
if (! defined($options{"u"}) ||
    ! defined($options{"e"}) ||
    ! defined($options{"g"}) ||
    ! defined($options{"l"}) ||
    ! defined($options{"k"})) {
    usage();
}    
	    
if (defined($options{"d"})) {
    $debug = 1;
}
103
104
105
if (defined($options{"a"})) {
    $runagent = 1;
}
106
107
108
if (defined($options{"t"})) {
    $recordfile = $options{"t"};
}
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

$logfile    = $options{"l"};
$keyfile    = $options{"k"};
$user       = $options{"u"};
$gid        = $options{"g"};
$action     = $ARGV[0];

if ($options{"e"} =~ /^([-\w]*)\/([-\w]*)$/) {
    $pid = $1;
    $eid = $2;
}
else {
    usage();
}
$PIDFILE = "$PIDDIR/${pid}_${eid}.pid";
124
$EXPDIR  = PROJROOT() . "/$pid/exp/$eid";
125
126
127
128
129

#
# Deal with stop and replay.
#
if ($action eq "stop" || $action eq "replay") {
130
131
132
133
134
    if (-e $PIDFILE) {
	# Send any swapout events and wait, at most three seconds, for them to
	# complete.
	system("$TB/bin/tevc -w -t 3 -e ${pid}/${eid} now __ns_swapout run");

Timothy Stack's avatar
   
Timothy Stack committed
135
136
137
138
	# Send any teardown events and wait, at most five seconds, for them to
	# complete.
	system("$TB/bin/tevc -w -t 5 -e ${pid}/${eid} now __ns_teardown run");

139
140
141
142
143
144
145
146
147
148
149
	my $epid = `cat $PIDFILE`;
	# untaint
	if ($epid =~ /^(\d*)$/) {
	    $epid = $1;
	}
	else {
	    die("*** $0:\n".
		"    Bad data in pid: $epid!\n");
	}
	unlink($PIDFILE);

150
151
152
153
154
	if (kill(0, $epid) || ! $!{ESRCH}) {
	    if (! kill('TERM', $epid)) {
		die("*** $0:\n".
		    "Failed to stop event system for $pid/$eid! - $! $epid\n");
	    }
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
	}
    }
    
    if ($action eq "stop") {
	exit(0);
    }
    # replay continues below, but give exiting scheduler a chance to react!
    sleep(1);
}

#
# Make sure the pid directory exists.
#
if (! -d $PIDDIR) {
    if (system("mkdir -p -m 775 $PIDDIR")) {
	die("*** $0:\n".
	    "    Could not mkdir $PIDDIR\n");
    }
}

#
# Okay, now flip to user before running the event scheduler. Must put the
# user into both the project group and the experiment subgroup.
#
my (undef,undef,$unix_uid) = getpwnam($user) or
    die("*** $0:\n".
	"    No such user $user\n");

my (undef,undef,$unix_ggid) = getgrnam($gid) or
    die("*** $0:\n".
	"    No such group $gid\n");

my (undef,undef,$unix_pgid) = getgrnam($pid) or
    die("*** $0:\n".
	"    No such group $pid\n");

191
my $LOGDIR = `dirname $logfile`;
Timothy Stack's avatar
Timothy Stack committed
192
$LOGDIR =~ s/\s+$//;
193
194
195
196
197
198
199
if (! -d $LOGDIR) {
    if (system("mkdir -p -m 775 $LOGDIR")) {
	die("*** $0:\n".
	    "    Could not mkdir $LOGDIR\n");
    }
}

200
#
201
# Create a child whose output is directed into the logfile. Parent waits
202
203
204
205
206
207
208
209
210
211
212
# a moment and then exits.
#
if (-e $logfile) {
    system("mv -f $logfile ${logfile}.old");
}

if (my $childpid = TBBackGround($logfile)) {
    #
    # Delay a moment, and they look for an exit status. This is intended
    # to catch startup problems.
    #
213
    sleep(2);
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
    my $foo = waitpid($childpid, &WNOHANG);
    if ($foo) {
	my $status = $?;
	unlink($PIDFILE);

	system("cat $logfile")
	    if (-s $logfile);
	
	die("*** $0:\n".
	    "    Failed to start event system for $pid/$eid: $foo $status!\n");
    }
    exit(0);
}

#
229
# Write out a pid file prior to flipping; the user is not granted
230
231
232
233
234
235
236
# access to this pid file.
#
if (system("echo '$PID' > $PIDFILE")) {
    die("*** $0:\n".
	"    Could not create $PIDFILE!");
}

237
238
239
240
241
#
# We will have two subprocesses.
#
my $schedpid;
my $agentpid;
242

243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#
# Catch TERM to kill off the scheduler and the agent. The death is picked
# up in the loop below.
#
sub handler ($) {
    my ($signame) = @_;
    
    $SIG{TERM} = 'IGNORE';
    
    print "Caught a TERM; killing the scheduler and agent\n";
    
    kill('TERM', $schedpid)
	if (defined($schedpid));
    kill('TERM', $agentpid)
	if (defined($agentpid));
    sleep(1);
}
$SIG{TERM} = \&handler;

#
# Set the command lines for the programs
#
my @sched_command_options = ();

push(@sched_command_options, "-d")
    if ($debug);
push(@sched_command_options, ("-t", $recordfile))
    if ($recordfile);
push(@sched_command_options, ("-s", "localhost", "-k", $keyfile, $pid, $eid));

my @agent_command_options = ("-u", $user, "-d", "-e", "$pid/$eid",
			     "-k", $keyfile,
275
			     "-v", "ops",
276
277
			     "-c", "$EXPDIR/tbdata/program_agents",
			     "-f", "$EXPDIR/tbdata/environment",
278
279
			     "-l", "$EXPDIR/logs/progagent.debug",
			     "-o", "$EXPDIR/ops");
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345

#
# We want to catch these programs if they exit abnormally.
#
$schedpid = StartProgram($sched, @sched_command_options);
$agentpid = StartProgram($agent, @agent_command_options)
    if ($runagent);

while (1) {
    my $waitpid = wait();
    my $exitstatus = $?;
    my $which;

    # No more children.
    last
	if ($waitpid < 0);

    if ($waitpid == $schedpid) {
	$schedpid = undef;
	$which    = "Event Scheduler";
    }
    else {
	$agentpid = undef;
	$which    = "Program Agent";
    }

    #
    # Send mail about abnormal exit.
    # 
    if ($?) {
	SENDMAIL($user,
		 "$which for $pid/$eid died on $CONTROL",
		 "$which exited with status: $?",
		 "$user",
		 "CC: $TBOPS");
    }

    last
	if (! (defined($schedpid) || defined($agentpid)));
}
exit(0);

sub StartProgram($@)
{
    my ($command, @arguments) = @_;

    my $mypid = fork();
    if ($mypid) {
	return $mypid;
    }
    select(undef, undef, undef, 0.2);

    # Flip to user and never go back
    $GID            = $unix_ggid;
    $EGID           = "$unix_ggid $unix_ggid $unix_pgid";
    $EUID = $UID    = $unix_uid;
    $ENV{'USER'}    = $user;
    $ENV{'LOGNAME'} = $user;

    print "$command @arguments\n";

    exec $command, @arguments;
    
    die("*** $0:\n".
	"    Could not exec $command!\n");
}
346