ch_daemon.in 6.24 KB
Newer Older
1
2
3
#!/usr/bin/perl -w
#
# GENIPUBLIC-COPYRIGHT
4
# Copyright (c) 2008-2011 University of Utah and the Flux Group.
5
6
7
8
9
10
11
12
13
14
15
16
# All rights reserved.
#
use strict;
use English;
use Getopt::Std;

#
# Look for things that need to be expired and resources released. This is
# incredibly primitive at the moment.
#
sub usage()
{
17
    print "Usage: ch_daemon [-s] [-d] [-n]\n";
18
19
    exit(1);
}
20
21
my $optlist   = "dns";
my $oneshot   = 0;
22
23
24
25
26
27
28
29
30
31
32
33
my $debug     = 0;
my $impotent  = 0;

#
# Configure variables
#
my $TB		  = "@prefix@";
my $TBOPS         = "@TBOPSEMAIL@";
my $TBLOGS        = "@TBLOGSEMAIL@";
my $PGENIDOMAIN   = "@PROTOGENI_DOMAIN@";
my $PGENISUPPORT  = @PROTOGENI_SUPPORT@;
my $PGENICLRHOUSE = @PROTOGENI_CLEARINGHOUSE@;
34
my $ADDIR         = "$TB/www/protogeni/advertisements";
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
my $LOGFILE       = "$TB/log/ch_daemon.log";
my $EMULAB_PEMFILE = "$TB/etc/genich.pem";
my $SLEEP_INTERVAL = 60;
my $PROBE_INTERVAL = (60 * 60 * 24);

# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

# Protos
sub fatal($);
sub notify($);

#
# Probe interval countdown. Set to zero so it runs right away.
#
my $probe_countdown = 0;
	  
#
# Turn off line buffering on output
#
$| = 1; 

if ($UID != 0) {
    fatal("Must be root to run this script\n");
}

#
# Exit if not a protogeni site.
#
65
if (! ($PGENISUPPORT && $PGENICLRHOUSE)) {
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    exit(0);
}

#
# Check args early so we get the right DB.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"d"})) {
    $debug = 1;
}
if (defined($options{"n"})) {
    $impotent = 1;
}
82
83
84
if (defined($options{"s"})) {
    $oneshot = 1;
}
85
86
87
88
89
90
91
92
93
94
95
96
97
98

# Do this early so that we talk to the right DB.
use vars qw($GENI_DBNAME);
$GENI_DBNAME = "geni-ch";

# Load the Testbed support stuff.
use lib "@prefix@/lib";
require GeniDB;
require GeniUtil;
require GeniCertificate;
require GeniAuthority;
require libGeni;
use libtestbed;
use emutil;
99
use libEmulab;
100
101
102
103
104
105
106
107
108
109
110
111
112
113
use POSIX qw(strftime ceil);

#
# So we know who/what we are acting as.
#
my $certificate = GeniCertificate->LoadFromFile($EMULAB_PEMFILE);
if (!defined($certificate)) {
    fatal("Could not get uuid/urn from $EMULAB_PEMFILE\n");
}
Genixmlrpc->SetContext(Genixmlrpc->Context($certificate));
Genixmlrpc->SetTimeout(10);
$ENV{'MYUUID'} = $certificate->uuid();
$ENV{'MYURN'} = "urn:publicid:IDN+@OURDOMAIN@+authority+ch";

114
115
116
117
118
119
120
121
122
123
124
125
if (!$oneshot) {
    if (CheckDaemonRunning("ch_daemon")) {
	fatal("Not starting another ch daemon!");
    }
    # Go to ground.
    if (! $debug) {
	if (TBBackGround($LOGFILE)) {
	    exit(0);
	}
    }
    if (MarkDaemonRunning("ch_daemon")) {
	fatal("Could not mark daemon as running!");
126
127
128
129
130
131
132
133
134
135
136
137
138
139
    }
}
#
# Setup a signal handler for newsyslog.
#
sub handler()
{
    my $SAVEEUID = $EUID;
    
    $EUID = 0;
    ReOpenLog($LOGFILE);
    $EUID = $SAVEEUID;
}
$SIG{HUP} = \&handler
140
    if (! ($debug || $oneshot));
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

GeniUtil::FlipToGeniUser();

# Do not wait long.
Genixmlrpc->SetTimeout(10);

#
# Probe CMs
#
sub ProbeCMs()
{
    my @authorities = ();
    my $up	    = "Up:\n";
    my $down        = "Down:\n";
    my $disabled    = "Disabled:\n";
156
    my %ads         = ();
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
    
    if (GeniAuthority->ListAll(\@authorities)) {
	fatal("Could not get authorities list from the DB");
    }
    foreach my $authority (@authorities) {
	my $urn = $authority->urn();
	
	next
	    if ($authority->type() ne "cm");

	if ($authority->disabled()) {
	    $disabled .= "$urn\n";
	    next;
	}

	print "Contacting $urn\n";
	my $version = $authority->Version();
	if (!defined($version)) {
	    print "Error getting version from $urn\n";
	    $down .= "$urn\n";
177
178
179
180
181
182
183
184
185
	    next;
	}
	print "  Running version $version\n";
	$up .= "$urn, version $version\n";

	#
	# Discover resources and cache; for the map client.
	# Returns a string of xml.
	#
186
	my $resources = $authority->DiscoverResources(undef, 0);
187
188
189
190
191
192
193
194
195
196
197
	# Errors will go to the log.
	next
	    if (!defined($resources));

	if (open(OUT, "> $ADDIR/$urn")) {
	    print OUT $resources;
	    print OUT "\n";
	    close(OUT);

	    # Save the name so we can write the text file at the end.
	    $ads{$urn} = $urn;
198
199
	}
	else {
200
201
	    notify("Could not open $ADDIR/$urn for output\n");
	    next;
202
203
	}
    }
204
205
206
207
208
209
210
211
212
213
    if (open(MAP, "> $ADDIR/list.txt")) {
	foreach my $urn (keys(%ads)) {
	    print MAP "$urn\n";
	}
	close(MAP);
    }
    else {
	notify("Could not open $ADDIR/list.txt for output\n");
	next;
    }
214
215
216
217
218
219
    SENDMAIL("geni-dev-utah\@flux.utah.edu",
	     "ClearingHouse Daemon Message",
	     "This is an automated message from the ClearingHouse Daemon\n\n".
	     $up . "\n" . $down . "\n" . $disabled . "\n", $TBOPS);
}

220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#
# Look for slices that have expired.  In theory this shouldn't be
# necessary, since SAs should tell the CH when they unregister expired
# slices and nobody should care about stale slice entries at the CH
# anyway, but in practice it's helpful to expire stale CH records
# because (a) we don't want to collect junk indefinitely from misbehaving
# SAs and (b) old versions of the SA treated CH registration failure
# as fatal and so wouldn't be able to reuse slice names if a stale
# entry somehow survived at the CH.
#
sub ExpireSlices()
{
    if( !$impotent ) {
	GeniDB::DBQueryWarn("delete from geni_slices ".
			    "where UNIX_TIMESTAMP(now()) > ".
			    "      UNIX_TIMESTAMP(expires) and ".
			    "      shutdown is null");
    }

    # We've only updated the database and not cleaned out the stale
    # entries from our cache, but that is about to be flushed anyway
    # (see below).
}

244
245
246
247
248
249
if ($oneshot) {
    ProbeCMs();
    ExpireSlices();
    exit(0);
}

250
while (1) {
251
252
253
254
    if (NoLogins()) {
	sleep(5);
	next;
    }
255
256
257
258
259
260
261

    print "Running at ".
	POSIX::strftime("20%y-%m-%d %H:%M:%S", localtime()) . "\n";

    $probe_countdown -= $SLEEP_INTERVAL;
    if ($probe_countdown <= 0) {
	ProbeCMs();
262
	ExpireSlices();
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
	$probe_countdown = $PROBE_INTERVAL;
    }

    # Be certain stale info is gone.
    GeniUtil::FlushCaches();

    sleep($SLEEP_INTERVAL);
}
exit(0);

sub fatal($)
{
    my ($msg) = @_;

    #
    # Send a message to the testbed list. 
    #
    SENDMAIL($TBOPS,
	     "ProtoGENI CH daemon died",
	     $msg,
	     $TBOPS);

285
286
287
    MarkDaemonStopped("ch_daemon")
	if (!$oneshot);
    
288
289
290
291
292
293
294
295
296
297
298
299
    die("*** $0:\n".
	"    $msg\n");
}

sub notify($)
{
    my ($msg) = @_;

    print "$msg\n";
    SENDMAIL($TBOPS, "ClearingHouse Daemon Message", $msg, $TBOPS);
}