pool_daemon.in 12 KB
Newer Older
1 2 3
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
4
# Copyright (c) 2009, 2010 University of Utah and the Flux Group.
5 6 7 8 9 10
# All rights reserved.
#
use strict;
use English;
use Getopt::Std;
use POSIX qw(tmpnam);
11
use Data::Dumper;
12 13 14 15 16 17

#
# Manage the pool of shared nodes.
#
sub usage()
{
18
    print STDOUT "Usage: pool_daemon [-d] [nsfile]\n" .
19 20 21
	"Use the -d option to prevent daemonization\n";
    exit(-1);
}
22
my $optlist = "dne1fs";
23 24
my $debug    = 0;
my $impotent = 0;
25 26
my $killme   = 0;
my $nofree   = 1;
27 28
my $gotlock  = 0;
my $mailsent = 0;
29
my $oneshot  = 0;
30 31
my $swapin   = 0;
my $force    = 0;
32
my $nsfile;
33 34 35 36 37 38 39

#
# Configure variables
#
my $TB       = "@prefix@";
my $TBOPS    = "@TBOPSEMAIL@";
my $BOSSNODE = "@BOSSNODE@";
40
my $logfile  = "$TB/log/poollog";
41
my $SWAPEXP  = "$TB/bin/swapexp";
42
my $tmpfile  = "/tmp/pool-$$.nsfile";
43 44 45 46 47 48 49 50 51 52 53

# Testbed Support library
use lib "@prefix@/lib";
use libdb;
use libtblog;
use event;
use libtestbed;
use NodeType;
use Experiment;
use User;
use OSinfo;
54
use Image;
55

56 57 58 59 60 61 62 63
#
# This should run as root.
#
if ($UID != 0) {
    die("*** $0:\n".
	"    Only root can run this script!\n");
}

64 65 66 67 68 69 70 71 72
# We use tblog to determine why swapexp failed.
tblog_stop_capture();

# Locals
my $EVhandle;

# Prototypes.
sub SetupEventHandler();
sub fatal($);
73
sub cleanup();
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
sub notify($);
		      
#
# Turn off line buffering on output (dots ...).
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/usr/bin:";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"d"})) {
96
    $debug = 1;
97
}
98 99 100
if (defined($options{"1"})) {
    $oneshot = 1;
}
101
if (defined($options{"n"})) {
102 103
    $impotent = 1;
}
104 105 106
if (defined($options{"f"})) {
    $force = 1;
}
107 108
if (defined($options{"e"})) {
    $nofree = 1;
109
}
110 111 112 113 114 115 116 117
if (defined($options{"s"})) {
    $swapin = 1;
}
# Temporary disable until I feel more confident.
if (!$force) {
    print "Pool daemon is currently disabled. Exiting.\n";
    exit(0);
}
118 119 120 121 122 123
usage()
    if (@ARGV > 1);
$nsfile = $ARGV[0]
    if (@ARGV == 1);
$oneshot = 1
    if (defined($nsfile));
124

125 126 127
if (!$impotent && CheckDaemonRunning("pool_daemon")) {
    fatal("Not starting another pool daemon!");
}
128
# Go to ground.
129
if (! ($oneshot || $debug || $impotent)) {
130
    if (TBBackGround($logfile)) {
131 132 133
	exit(0);
    }
}
134
if (!($impotent || $oneshot) && MarkDaemonRunning("pool_daemon")) {
135 136 137 138 139 140 141 142 143 144
    fatal("Could not mark daemon as running!");
}
#
# Setup a signal handler for newsyslog.
#
sub handler()
{
    ReOpenLog($logfile);
}
$SIG{HUP} = \&handler
145
    if (!($oneshot || $debug || $impotent));
146

147 148 149 150 151 152
print "Pool Daemon starting... pid $$, at ".`date`;

#
# Grab the shared node experiment. This should come from a sitevar.
# Or perhaps we want to manage multiple shared pools?
#
153 154 155 156
my $experiment =
    Experiment->Lookup(TBOPSPID(), "shared-nodes") ||
    Experiment->Lookup(TBOPSPID(), "shared-node");
    
157 158 159 160 161 162
if (!defined($experiment)) {
    fatal("Could not lookup shared node experiment. Exiting ...");
}
my $pid = $experiment->pid();
my $eid = $experiment->eid();

163 164 165 166 167 168 169 170 171
#
# If the experiment is in the new state, then set it to swapped and exit.
# If in the swapped state, also exit since the local Emulab is not using
# shared nodes yet.
#
if ($experiment->state() eq EXPTSTATE_NEW()) {
    $experiment->SetState(EXPTSTATE_SWAPPED());
}

172 173 174 175 176 177 178 179
#
# We need this user for running swapexp below.
#
my $elabman = User->Lookup("elabman");
if (!defined($elabman)) {
    fatal("Could not lookup elabman user. Exiting ...");
}

180 181 182 183 184 185
#
# We need to have this image available.
#
my $image = Image->Lookup(TBOPSPID(), "FEDORA8-OVZ-STD");
if (!defined($image)) {
    print STDERR "Pool Daemon exiting since there is suitable image\n";
186
    cleanup();
187 188 189
    exit(0);
}

190 191 192 193 194 195 196 197 198 199 200 201 202
#
# And handler for TERM since we really do not want this to be
# interrupted. Just set a flag that will cause it to exit at
# the next loop.
#
sub sigterm()
{
    print "Got a TERM signal; arranging to exit soon\n";
    $killme = 1;
}
$SIG{TERM} = \&sigterm;

while (!$killme) {
203
    my $disabled;
204
    my $didsomething = 0;
205
    
206 207
    print "Pool Daemon running at ".`date`;

208 209
    if (! TBGetSiteVar("web/nologins", \$disabled) || $disabled) {
	print "  Skipping this loop cause of nologins\n";
210
	goto loop;
211 212
    }

213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
    #
    # Serialize this part with the mapper.
    #
    if (!$impotent) {
	my $tries = 0;
	
	while (1) {
	    #
	    # Use a countup/countdown counter, so that multiple mappers
	    # can run, but not while the pool_daemon is running.
	    #
	    my $lock_result =
		DBQueryWarn("update emulab_locks set value=-1 ".
			    "where name='pool_daemon' and value=0");
	    fatal("DB Error going for lock")
		if (!defined($lock_result));

	    $gotlock = $lock_result->affectedrows;

	    last
		if ($gotlock);

	    if ($tries++ > 100) {
		notify("Cannot get the lock after a really long time");
		$tries = 0;
	    }
239
	    print "Waiting for pool daemon lock ...\n";
240 241 242 243
	    sleep(10);
	}
    }
    
244 245 246
    Node->FlushAll();
    $experiment->Refresh() == 0
	or fatal("Could not reload $experiment");
247

248
    if ($experiment->state() eq EXPTSTATE_SWAPPED() && !$swapin) {
249 250 251
	print "Skipping this loop cause the experiment is swapped\n";
	goto loop;
    }
252 253 254 255 256 257
    if (defined($nsfile)) {
	print "Copying $nsfile to $tmpfile\n";
	system("/bin/cp -f $nsfile $tmpfile") == 0
	    or fatal("Could not copy nsfile");
	goto skipns;
    }
258
    
259 260 261 262 263 264 265
    my @nodelist = $experiment->NodeList();
    my %inuse    = ();
    my %tofree   = ();
    my $loaded   = 0;
    my $unloaded = 0;
    my $newcount = 0;

266 267 268 269 270
    # Sitevars to control poolsize. Reloas each loop.
    my $maxpoolsize  = TBGetSiteVar("general/maxpoolsize");
    my $minpoolsize  = TBGetSiteVar("general/minpoolsize");
    my $poolnodetype = TBGetSiteVar("general/poolnodetype");

271 272 273 274 275 276 277 278 279
    #
    # Sanity check the nodetype to make sure it has been set.
    #
    my $typeinfo = NodeType->Lookup("pc", $poolnodetype);
    if (!defined($typeinfo)) {
	fatal("$poolnodetype is not a valid pc type. Please set the ".
	      "'general/poolnodetype' site variable");
    }

280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
    #
    # Look to see how each of the nodes is packed. This is
    # advisory; we will not know for sure until tables locked
    # in nfree and we can count the number of vnodes on it.
    #
    foreach my $node (@nodelist) {
	my $reservation = $node->Reservation();
	# Node released somewhere else. 
	next
	    if (!defined($reservation));
	next
	    if (!$experiment->SameExperiment($reservation));

	my $maxsharecount = $node->MaxShareCount();
	# Transient error?
	next
	    if ($maxsharecount < 0);

	# Look for nodes with nothing on them.
	my $vnodecount = $node->HasVirtualNodes();
	# Transient error?
	next
	    if ($vnodecount < 0);

304
	if ($vnodecount == 0) {
305 306
	    print "$node no longer has virtual nodes on it.\n";
	    # Free the node unless we would go below the minpoolsize.
307 308
	    if (!$nofree &&
		scalar(@nodelist) - scalar(keys(%tofree)) > $minpoolsize) {
309 310 311
		print "  Adding to free list.\n";
		$tofree{$node->node_id()} = $node;
	    }
312 313 314 315
	    next;
	}
	# Count up loaded vs. unloaded nodes.
	my $factor = $maxsharecount / $vnodecount;
316
	print "$node load factor is $factor\n";
317 318 319 320 321 322 323 324 325
	if ($factor < 0.5) {
	    $unloaded++;
	}
	else {
	    $loaded++;
	}
	$inuse{$node->vname()} = $node;
    }
    #
326
    # Try to keep at least one unloaded machine available, but stay under max.
327
    #
Leigh B. Stoller's avatar
Leigh B. Stoller committed
328
    if (scalar(@nodelist) - $loaded < 1) {
329 330 331 332 333
	if (keys(%tofree)) {
	    # rescue one from the free list.
	    my $key = (keys(%tofree))[0];
	    delete($tofree{$key});
	}
334
 	elsif (scalar(@nodelist) < $maxpoolsize) {
335 336 337
	    $newcount++;
	}
    }
338 339 340 341 342
    if (!$debug) {
	if (! (keys(%tofree) || $newcount)) {
	    exit(0)
		if ($impotent);
	    goto loop;
343 344
	    # This counts as doing something.
	    $didsomething = 1;
345
	}
346 347 348 349 350 351 352 353 354 355
    }

    #
    # Generate a new NS file. Be nice to not have to this, but not
    # having an NS file could confuse things for the web interface.
    #
    print "Generating a new NS file in $tmpfile\n";
    
    if (!open(NS, ">$tmpfile")) {
	notify("Could not create $tmpfile");
356
	goto loop;
357 358 359 360 361 362 363 364 365 366
    }
    print NS "# Auto generated by the pool daemon\n\n";
    print NS "source tb_compat.tcl\n";
    print NS "set ns [new Simulator]\n";

    foreach my $node (@nodelist) {
	next
	    if (exists($tofree{$node->node_id}));

	my $vname  = $node->vname();
367
	my $nodeid = $node->node_id();
368 369 370 371 372 373 374
	my $osid   = $node->def_boot_osid();
	my $osinfo = OSinfo->Lookup($osid);
	my $osname = $osinfo->osname();

	print NS "set $vname [\$ns node]\n";
	print NS "tb-set-node-os \$$vname $osname\n";
	print NS "tb-set-node-sharingmode \$$vname \"shared_local\"\n";
375
	print NS "tb-fix-node \$$vname $nodeid\n";
376 377 378 379 380 381 382 383 384 385
    }
    while ($newcount) {
	my $id    = 1;
	my $vname = "vhost${id}";

	while (exists($inuse{$vname})) {
	    $id++;
	    $vname = "vhost${id}";
	}
	print NS "set $vname [\$ns node]\n";
386
	print NS "tb-set-node-os \$$vname " . $image->imagename() . "\n";
387
	print NS "tb-set-node-sharingmode \$$vname \"shared_local\"\n";
388 389
	print NS "tb-set-hardware \$$vname $poolnodetype\n"
	    if (defined($poolnodetype) && $poolnodetype ne "");
390 391 392 393 394 395

	$newcount--;
    }
    print NS "\$ns rtproto Static\n";
    print NS "\$ns run\n";
    close(NS);
396
  skipns:
397 398
    chmod(0775, $tmpfile);

399
    last
400
	if ($impotent || $killme);
401

402 403 404
    # Must do this each time before fork.
    tblog_new_session();

405 406 407 408 409 410 411 412 413 414 415 416 417 418
    #
    # Start a swapmod. 
    #
    my $childpid = fork();
    if ($childpid) {
	print "Starting a swap modify. Child is $childpid.\n";
	
	#
	# Wait for the swapmod to complete.
	#
	waitpid($childpid, 0);
	my $exitval = $?;
	print "Swap modify done at " . `date`;
	$experiment->LockDown(1);
419
	$didsomething = 1;
420
	if ($exitval) {
421
	    my $error_data = tblog_lookup_error();
422 423 424

	    if ($error_data->{'cause'} eq "temp") {
		print "Temporary resource shortage; try again later\n";
425
		goto loop;
426
	    }
427 428 429 430 431 432 433 434 435 436 437
	    fatal("swapmod failed");
	}
    }
    else {
	if ($elabman->FlipTo($experiment->unix_gid())) {
	    fatal("Could not flipto $elabman");
	}
	$experiment->LockDown(0);	
	exec("$SWAPEXP -q -w -n -s modify $pid $eid $tmpfile");
	die("Could not exec $SWAPEXP\n");
    }
438
  loop:
439 440 441 442 443 444 445 446 447
    if ($gotlock) {
	my $lock_result =
	    DBQueryWarn("update emulab_locks set value=0 ".
			"where name='pool_daemon'");
	fatal("DB Error releasing lock")
	    if (!defined($lock_result));
	
	$gotlock = 0;
    }
448
    last
449
	if ($oneshot && $didsomething);
450 451 452
    
    # Use a long period; we do not want the pool to change too fast.
    sleep(120);
453
}
454 455
cleanup();
exit(0);
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502

#
# Subscribe to experiment state change events.
#
sub SetupEventHandler()
{
    my $port = @BOSSEVENTPORT@;
    my $URL  = "elvin://localhost:$port";
    
    # Connect to the event system, and subscribe the the events we want
    $EVhandle = event_register($URL, 0);
    
    if (!$EVhandle) {
	fatal("Unable to register with event system");
    }

    my $tuple = address_tuple_alloc();
    if (!$tuple) {
	fatal("Could not allocate an address tuple");
    }

    %$tuple = ( objtype   => libdb::TBDB_TBEVENT_CONTROL(),
		objname   => "pool_daemon",
		host      => $BOSSNODE,
	      );
    
    if (!event_subscribe($EVhandle, \&EventHandler, $tuple)) {
	fatal("Could not subscribe to events");
    }
}

#
# Callback for above.
#
sub EventHandler($$$) {
    my ($handle,$notification,undef) = @_;
    
    my $objname   = event_notification_get_objname($handle,$notification);
    my $eventtype = event_notification_get_eventtype($handle,$notification);

    print "$objname, $eventtype\n";
}

sub fatal($)
{
    my ($msg) = @_;

503 504 505
    SENDMAIL($TBOPS, "Pool Daemon Died", $msg, $TBOPS)
	if (!($impotent || $oneshot));
    
506
    $mailsent = 1;
507
    cleanup();
508 509 510 511 512 513 514 515 516 517
    die($msg);
}

sub notify($)
{
    my ($msg) = @_;

    print "$msg\n";
    SENDMAIL($TBOPS, "Pool Daemon Message", $msg, $TBOPS);
}
518 519 520

sub cleanup()
{
521 522 523 524 525 526
    if ($gotlock) {
	DBQueryWarn("update emulab_locks set value=0 ".
		    "where name='pool_daemon'");
	$gotlock = 0;
    }

527
    MarkDaemonStopped("pool_daemon")
528
	if (!($impotent || $oneshot));
529
}
530 531 532 533

END {
    my $exitcode = $?;

534
    if ($exitcode && !($mailsent || $impotent || $oneshot)) {
535 536 537 538 539 540 541
	SENDMAIL($TBOPS, "Pool Daemon Died",
		 "Please look at $logfile", $TBOPS);
    }
    cleanup();

    $? = $exitcode;
}