pool_daemon.in 12.6 KB
Newer Older
1 2
#!/usr/bin/perl -w
#
3
# Copyright (c) 2009-2016 University of Utah and the Flux Group.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 
# {{{EMULAB-LICENSE
# 
# This file is part of the Emulab network testbed software.
# 
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
# 
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
# License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this file.  If not, see <http://www.gnu.org/licenses/>.
# 
# }}}
23 24 25 26 27
#
use strict;
use English;
use Getopt::Std;
use POSIX qw(tmpnam);
28
use Data::Dumper;
29 30 31 32 33 34

#
# Manage the pool of shared nodes.
#
sub usage()
{
35
    print STDOUT "Usage: pool_daemon [-d] [nsfile]\n" .
36 37 38
	"Use the -d option to prevent daemonization\n";
    exit(-1);
}
39
my $optlist = "dne1fs";
40 41
my $debug    = 0;
my $impotent = 0;
42 43
my $killme   = 0;
my $nofree   = 1;
44 45
my $gotlock  = 0;
my $mailsent = 0;
46
my $oneshot  = 0;
47 48
my $swapin   = 0;
my $force    = 0;
49
my $nsfile;
50 51 52 53 54 55 56

#
# Configure variables
#
my $TB       = "@prefix@";
my $TBOPS    = "@TBOPSEMAIL@";
my $BOSSNODE = "@BOSSNODE@";
57
my $logfile  = "$TB/log/poollog";
58
my $SWAPEXP  = "$TB/bin/swapexp";
59
my $tmpfile  = "/tmp/pool-$$.nsfile";
60 61 62 63 64 65 66 67 68 69

# Testbed Support library
use lib "@prefix@/lib";
use libdb;
use libtblog;
use event;
use libtestbed;
use NodeType;
use Experiment;
use User;
70
use OSImage;
71

72 73 74 75 76 77 78 79
#
# This should run as root.
#
if ($UID != 0) {
    die("*** $0:\n".
	"    Only root can run this script!\n");
}

80 81 82 83 84 85 86 87 88
# We use tblog to determine why swapexp failed.
tblog_stop_capture();

# Locals
my $EVhandle;

# Prototypes.
sub SetupEventHandler();
sub fatal($);
89
sub cleanup();
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
sub notify($);
		      
#
# Turn off line buffering on output (dots ...).
#
$| = 1;

#
# Untaint the path
# 
$ENV{'PATH'} = "/bin:/usr/bin:";
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};

#
# Parse command arguments. Once we return from getopts, all that should be
# left are the required arguments.
#
my %options = ();
if (! getopts($optlist, \%options)) {
    usage();
}
if (defined($options{"d"})) {
112
    $debug = 1;
113
}
114 115 116
if (defined($options{"1"})) {
    $oneshot = 1;
}
117
if (defined($options{"n"})) {
118 119
    $impotent = 1;
}
120 121 122
if (defined($options{"f"})) {
    $force = 1;
}
123 124
if (defined($options{"e"})) {
    $nofree = 1;
125
}
126 127 128 129 130 131 132 133
if (defined($options{"s"})) {
    $swapin = 1;
}
# Temporary disable until I feel more confident.
if (!$force) {
    print "Pool daemon is currently disabled. Exiting.\n";
    exit(0);
}
134 135 136 137 138 139
usage()
    if (@ARGV > 1);
$nsfile = $ARGV[0]
    if (@ARGV == 1);
$oneshot = 1
    if (defined($nsfile));
140

141 142 143
if (!$impotent && CheckDaemonRunning("pool_daemon")) {
    fatal("Not starting another pool daemon!");
}
144
# Go to ground.
145
if (! ($oneshot || $debug || $impotent)) {
146
    if (TBBackGround($logfile)) {
147 148 149
	exit(0);
    }
}
150
if (!($impotent || $oneshot) && MarkDaemonRunning("pool_daemon")) {
151 152 153 154 155 156 157 158 159 160
    fatal("Could not mark daemon as running!");
}
#
# Setup a signal handler for newsyslog.
#
sub handler()
{
    ReOpenLog($logfile);
}
$SIG{HUP} = \&handler
161
    if (!($oneshot || $debug || $impotent));
162

163 164 165 166 167 168
print "Pool Daemon starting... pid $$, at ".`date`;

#
# Grab the shared node experiment. This should come from a sitevar.
# Or perhaps we want to manage multiple shared pools?
#
169 170 171 172
my $experiment =
    Experiment->Lookup(TBOPSPID(), "shared-nodes") ||
    Experiment->Lookup(TBOPSPID(), "shared-node");
    
173 174 175 176 177 178
if (!defined($experiment)) {
    fatal("Could not lookup shared node experiment. Exiting ...");
}
my $pid = $experiment->pid();
my $eid = $experiment->eid();

179 180 181 182 183 184 185 186 187
#
# If the experiment is in the new state, then set it to swapped and exit.
# If in the swapped state, also exit since the local Emulab is not using
# shared nodes yet.
#
if ($experiment->state() eq EXPTSTATE_NEW()) {
    $experiment->SetState(EXPTSTATE_SWAPPED());
}

188 189 190 191 192 193 194 195
#
# We need this user for running swapexp below.
#
my $elabman = User->Lookup("elabman");
if (!defined($elabman)) {
    fatal("Could not lookup elabman user. Exiting ...");
}

196 197 198
#
# We need to have this image available.
#
199
my $image = OSImage->Lookup(TBOPSPID(), "FEDORA8-OVZ-STD");
200 201
if (!defined($image)) {
    print STDERR "Pool Daemon exiting since there is suitable image\n";
202
    cleanup();
203 204 205
    exit(0);
}

206 207 208 209 210 211 212 213 214 215 216 217 218
#
# And handler for TERM since we really do not want this to be
# interrupted. Just set a flag that will cause it to exit at
# the next loop.
#
sub sigterm()
{
    print "Got a TERM signal; arranging to exit soon\n";
    $killme = 1;
}
$SIG{TERM} = \&sigterm;

while (!$killme) {
219
    my $disabled;
220
    my $didsomething = 0;
221
    
222 223
    print "Pool Daemon running at ".`date`;

224 225
    if (! TBGetSiteVar("web/nologins", \$disabled) || $disabled) {
	print "  Skipping this loop cause of nologins\n";
226
	goto loop;
227 228
    }

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
    #
    # Serialize this part with the mapper.
    #
    if (!$impotent) {
	my $tries = 0;
	
	while (1) {
	    #
	    # Use a countup/countdown counter, so that multiple mappers
	    # can run, but not while the pool_daemon is running.
	    #
	    my $lock_result =
		DBQueryWarn("update emulab_locks set value=-1 ".
			    "where name='pool_daemon' and value=0");
	    fatal("DB Error going for lock")
		if (!defined($lock_result));

	    $gotlock = $lock_result->affectedrows;

	    last
		if ($gotlock);

	    if ($tries++ > 100) {
		notify("Cannot get the lock after a really long time");
		$tries = 0;
	    }
255
	    print "Waiting for pool daemon lock ...\n";
256 257 258 259
	    sleep(10);
	}
    }
    
260 261 262
    Node->FlushAll();
    $experiment->Refresh() == 0
	or fatal("Could not reload $experiment");
263

264
    if ($experiment->state() eq EXPTSTATE_SWAPPED() && !$swapin) {
265 266 267
	print "Skipping this loop cause the experiment is swapped\n";
	goto loop;
    }
268 269 270 271 272 273
    if (defined($nsfile)) {
	print "Copying $nsfile to $tmpfile\n";
	system("/bin/cp -f $nsfile $tmpfile") == 0
	    or fatal("Could not copy nsfile");
	goto skipns;
    }
274
    
275 276 277 278 279 280 281
    my @nodelist = $experiment->NodeList();
    my %inuse    = ();
    my %tofree   = ();
    my $loaded   = 0;
    my $unloaded = 0;
    my $newcount = 0;

282 283 284 285 286
    # Sitevars to control poolsize. Reloas each loop.
    my $maxpoolsize  = TBGetSiteVar("general/maxpoolsize");
    my $minpoolsize  = TBGetSiteVar("general/minpoolsize");
    my $poolnodetype = TBGetSiteVar("general/poolnodetype");

287 288 289 290 291 292 293 294 295
    #
    # Sanity check the nodetype to make sure it has been set.
    #
    my $typeinfo = NodeType->Lookup("pc", $poolnodetype);
    if (!defined($typeinfo)) {
	fatal("$poolnodetype is not a valid pc type. Please set the ".
	      "'general/poolnodetype' site variable");
    }

296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
    #
    # Look to see how each of the nodes is packed. This is
    # advisory; we will not know for sure until tables locked
    # in nfree and we can count the number of vnodes on it.
    #
    foreach my $node (@nodelist) {
	my $reservation = $node->Reservation();
	# Node released somewhere else. 
	next
	    if (!defined($reservation));
	next
	    if (!$experiment->SameExperiment($reservation));

	my $maxsharecount = $node->MaxShareCount();
	# Transient error?
	next
	    if ($maxsharecount < 0);

	# Look for nodes with nothing on them.
	my $vnodecount = $node->HasVirtualNodes();
	# Transient error?
	next
	    if ($vnodecount < 0);

320
	if ($vnodecount == 0) {
321 322
	    print "$node no longer has virtual nodes on it.\n";
	    # Free the node unless we would go below the minpoolsize.
323 324
	    if (!$nofree &&
		scalar(@nodelist) - scalar(keys(%tofree)) > $minpoolsize) {
325 326 327
		print "  Adding to free list.\n";
		$tofree{$node->node_id()} = $node;
	    }
328 329 330 331
	    next;
	}
	# Count up loaded vs. unloaded nodes.
	my $factor = $maxsharecount / $vnodecount;
332
	print "$node load factor is $factor\n";
333 334 335 336 337 338 339 340 341
	if ($factor < 0.5) {
	    $unloaded++;
	}
	else {
	    $loaded++;
	}
	$inuse{$node->vname()} = $node;
    }
    #
342
    # Try to keep at least one unloaded machine available, but stay under max.
343
    #
Leigh Stoller's avatar
Leigh Stoller committed
344
    if (scalar(@nodelist) - $loaded < 1) {
345 346 347 348 349
	if (keys(%tofree)) {
	    # rescue one from the free list.
	    my $key = (keys(%tofree))[0];
	    delete($tofree{$key});
	}
350
 	elsif (scalar(@nodelist) < $maxpoolsize) {
351 352 353
	    $newcount++;
	}
    }
354 355 356 357 358
    if (!$debug) {
	if (! (keys(%tofree) || $newcount)) {
	    exit(0)
		if ($impotent);
	    goto loop;
359 360
	    # This counts as doing something.
	    $didsomething = 1;
361
	}
362 363 364 365 366 367 368 369 370 371
    }

    #
    # Generate a new NS file. Be nice to not have to this, but not
    # having an NS file could confuse things for the web interface.
    #
    print "Generating a new NS file in $tmpfile\n";
    
    if (!open(NS, ">$tmpfile")) {
	notify("Could not create $tmpfile");
372
	goto loop;
373 374 375 376 377 378 379 380 381 382
    }
    print NS "# Auto generated by the pool daemon\n\n";
    print NS "source tb_compat.tcl\n";
    print NS "set ns [new Simulator]\n";

    foreach my $node (@nodelist) {
	next
	    if (exists($tofree{$node->node_id}));

	my $vname  = $node->vname();
383
	my $nodeid = $node->node_id();
384
	my $osid   = $node->def_boot_osid();
385 386
	my $osimage= OSImage->Lookup($osid);
	my $osname = $osimage->imagename();
387 388 389 390

	print NS "set $vname [\$ns node]\n";
	print NS "tb-set-node-os \$$vname $osname\n";
	print NS "tb-set-node-sharingmode \$$vname \"shared_local\"\n";
391
	print NS "tb-fix-node \$$vname $nodeid\n";
392 393 394 395 396 397 398 399 400 401
    }
    while ($newcount) {
	my $id    = 1;
	my $vname = "vhost${id}";

	while (exists($inuse{$vname})) {
	    $id++;
	    $vname = "vhost${id}";
	}
	print NS "set $vname [\$ns node]\n";
402
	print NS "tb-set-node-os \$$vname " . $image->imagename() . "\n";
403
	print NS "tb-set-node-sharingmode \$$vname \"shared_local\"\n";
404 405
	print NS "tb-set-hardware \$$vname $poolnodetype\n"
	    if (defined($poolnodetype) && $poolnodetype ne "");
406 407 408 409 410 411

	$newcount--;
    }
    print NS "\$ns rtproto Static\n";
    print NS "\$ns run\n";
    close(NS);
412
  skipns:
413 414
    chmod(0775, $tmpfile);

415
    last
416
	if ($impotent || $killme);
417

418 419 420
    # Must do this each time before fork.
    tblog_new_session();

421 422 423 424 425 426 427 428 429 430 431 432 433 434
    #
    # Start a swapmod. 
    #
    my $childpid = fork();
    if ($childpid) {
	print "Starting a swap modify. Child is $childpid.\n";
	
	#
	# Wait for the swapmod to complete.
	#
	waitpid($childpid, 0);
	my $exitval = $?;
	print "Swap modify done at " . `date`;
	$experiment->LockDown(1);
435
	$didsomething = 1;
436
	if ($exitval) {
437
	    my $error_data = tblog_lookup_error();
438 439 440

	    if ($error_data->{'cause'} eq "temp") {
		print "Temporary resource shortage; try again later\n";
441
		goto loop;
442
	    }
443 444 445 446 447 448 449 450 451 452 453
	    fatal("swapmod failed");
	}
    }
    else {
	if ($elabman->FlipTo($experiment->unix_gid())) {
	    fatal("Could not flipto $elabman");
	}
	$experiment->LockDown(0);	
	exec("$SWAPEXP -q -w -n -s modify $pid $eid $tmpfile");
	die("Could not exec $SWAPEXP\n");
    }
454
  loop:
455 456 457 458 459 460 461 462 463
    if ($gotlock) {
	my $lock_result =
	    DBQueryWarn("update emulab_locks set value=0 ".
			"where name='pool_daemon'");
	fatal("DB Error releasing lock")
	    if (!defined($lock_result));
	
	$gotlock = 0;
    }
464
    last
465
	if ($oneshot && $didsomething);
466 467 468
    
    # Use a long period; we do not want the pool to change too fast.
    sleep(120);
469
}
470 471
cleanup();
exit(0);
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518

#
# Subscribe to experiment state change events.
#
sub SetupEventHandler()
{
    my $port = @BOSSEVENTPORT@;
    my $URL  = "elvin://localhost:$port";
    
    # Connect to the event system, and subscribe the the events we want
    $EVhandle = event_register($URL, 0);
    
    if (!$EVhandle) {
	fatal("Unable to register with event system");
    }

    my $tuple = address_tuple_alloc();
    if (!$tuple) {
	fatal("Could not allocate an address tuple");
    }

    %$tuple = ( objtype   => libdb::TBDB_TBEVENT_CONTROL(),
		objname   => "pool_daemon",
		host      => $BOSSNODE,
	      );
    
    if (!event_subscribe($EVhandle, \&EventHandler, $tuple)) {
	fatal("Could not subscribe to events");
    }
}

#
# Callback for above.
#
sub EventHandler($$$) {
    my ($handle,$notification,undef) = @_;
    
    my $objname   = event_notification_get_objname($handle,$notification);
    my $eventtype = event_notification_get_eventtype($handle,$notification);

    print "$objname, $eventtype\n";
}

sub fatal($)
{
    my ($msg) = @_;

519 520 521
    SENDMAIL($TBOPS, "Pool Daemon Died", $msg, $TBOPS)
	if (!($impotent || $oneshot));
    
522
    $mailsent = 1;
523
    cleanup();
524 525 526 527 528 529 530 531 532 533
    die($msg);
}

sub notify($)
{
    my ($msg) = @_;

    print "$msg\n";
    SENDMAIL($TBOPS, "Pool Daemon Message", $msg, $TBOPS);
}
534 535 536

sub cleanup()
{
537 538 539 540 541 542
    if ($gotlock) {
	DBQueryWarn("update emulab_locks set value=0 ".
		    "where name='pool_daemon'");
	$gotlock = 0;
    }

543
    MarkDaemonStopped("pool_daemon")
544
	if (!($impotent || $oneshot));
545
}
546 547 548 549

END {
    my $exitcode = $?;

550
    if ($exitcode && !($mailsent || $impotent || $oneshot)) {
551 552 553 554 555 556 557
	SENDMAIL($TBOPS, "Pool Daemon Died",
		 "Please look at $logfile", $TBOPS);
    }
    cleanup();

    $? = $exitcode;
}