Commit 8f8b8b94 authored by Leigh B Stoller's avatar Leigh B Stoller
Browse files

Merge branch 'master' of git-public.flux.utah.edu:/flux/git/emulab-devel

parents 4f83dfac 9d45ca7e
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2009 University of Utah and the Flux Group.
# Copyright (c) 2000-2010 University of Utah and the Flux Group.
# All rights reserved.
#
use strict;
......@@ -284,6 +284,8 @@ sub AssignLoop()
TBDebugTimeStamp("mapper loop started");
while (1) {
my $gotlock = 0;
chat("Assign run $currentrun\n");
my $prefix = ($debug || $regression ? "$pid-$eid" : "$pid-$eid-$$");
......@@ -297,6 +299,28 @@ sub AssignLoop()
chat("Trying assign on an empty testbed.\n");
}
#
# Serialize with the pool daemon if using shared nodes.
# XXX When using shared nodes, only one can proceed at a
# time through assignment. This is okay for now since few
# experiments are using shared nodes. Eventually needs to be
# a barrier.
#
if ((!($impotent || $regression)) && $vtop->sharednodecount()) {
while (1) {
my $lock_result =
DBQueryWarn("select get_lock('pool_daemon', 10)");
fatal("DB Error tring to get pool_daemon lock")
if (!defined($lock_result));
($gotlock) = $lock_result->fetchrow_array();
last
if ($gotlock);
chat("Waiting for pool daemon lock ...\n");
}
}
#
# RunAssign returns 0 if successful.
# returns -1 if failure, but assign says to stop trying.
......@@ -305,6 +329,11 @@ sub AssignLoop()
#
my $retval = RunAssign($precheck, $prefix);
if ($gotlock) {
DBQueryWarn("select release_lock('pool_daemon')")
or fatal("Could not release the pool lock");
}
# Success!
last
if ($retval == 0);
......
......@@ -153,6 +153,13 @@ while (my ($node_id,$pid,$eid,$vname,$erole) = $db_result->fetchrow_array()) {
($erole eq "boss" || $erole eq "boss+router")) {
push @{$cnames{$node_id}}, "www.$eid.$pid";
}
#
# Special case for inner elab ops; add CNAME for event-server.
#
if (defined($erole) &&
($erole eq "ops" || $erole eq "ops+fs")) {
push @{$cnames{$node_id}}, "event-server.$eid.$pid";
}
}
......
......@@ -19,10 +19,11 @@ sub usage()
"Use the -d option to prevent daemonization\n";
exit(-1);
}
my $optlist = "dn";
my $optlist = "dne";
my $debug = 0;
my $impotent = 0;
my $startup = 0;
my $killme = 0;
my $nofree = 1;
#
# This should run as root.
......@@ -88,10 +89,13 @@ if (@ARGV != 0) {
usage();
}
if (defined($options{"d"})) {
$debug = $options{"d"};
$debug = 1;
}
if (defined($options{"n"})) {
$impotent = $options{"n"};
$impotent = 1;
}
if (defined($options{"e"})) {
$nofree = 1;
}
if (!$impotent && CheckDaemonRunning("pool_daemon")) {
......@@ -164,21 +168,26 @@ if (!defined($image)) {
exit(0);
}
while (1) {
#
# And handler for TERM since we really do not want this to be
# interrupted. Just set a flag that will cause it to exit at
# the next loop.
#
sub sigterm()
{
print "Got a TERM signal; arranging to exit soon\n";
$killme = 1;
}
$SIG{TERM} = \&sigterm;
while (!$killme) {
my $disabled;
# Use a long period; we do not want the pool to change too fast.
if (!$startup) {
$startup++;
}
else {
sleep(120);
}
print "Pool Daemon running at ".`date`;
if (! TBGetSiteVar("web/nologins", \$disabled) || $disabled) {
print " Skipping this loop cause of nologins\n";
next;
goto loop;
}
Node->FlushAll();
......@@ -196,6 +205,24 @@ while (1) {
my $minpoolsize = TBGetSiteVar("general/minpoolsize");
my $poolnodetype = TBGetSiteVar("general/poolnodetype");
#
# Serialize this part with the mapper.
#
if (!$impotent) {
while (1) {
my $lock_result =
DBQueryWarn("select get_lock('pool_daemon', 5)");
fatal("DB Error tring to get pool_daemon lock")
if (!defined($lock_result));
my ($gotlock) = $lock_result->fetchrow_array();
last
if ($gotlock);
print "Waiting for pool daemon lock ...\n";
}
}
#
# Look to see how each of the nodes is packed. This is
# advisory; we will not know for sure until tables locked
......@@ -220,7 +247,7 @@ while (1) {
next
if ($vnodecount < 0);
if ($vnodecount == 0) {
if ($vnodecount == 0 && !$nofree) {
print "$node no longer has virtual nodes on it.\n";
# Free the node unless we would go below the minpoolsize.
if (scalar(@nodelist) - scalar(keys(%tofree)) > $minpoolsize) {
......@@ -231,6 +258,7 @@ while (1) {
}
# Count up loaded vs. unloaded nodes.
my $factor = $maxsharecount / $vnodecount;
print "$node load factor is $factor\n";
if ($factor < 0.5) {
$unloaded++;
}
......@@ -248,14 +276,14 @@ while (1) {
my $key = (keys(%tofree))[0];
delete($tofree{$key});
}
elsif (scalar(@nodelist) < $maxpoolsize) {
elsif (scalar(@nodelist) < $maxpoolsize) {
$newcount++;
}
}
if (! (keys(%tofree) || $newcount)) {
exit(0)
if ($impotent);
next;
goto loop;
}
#
......@@ -268,7 +296,7 @@ while (1) {
if (!open(NS, ">$tmpfile")) {
notify("Could not create $tmpfile");
next;
goto loop;
}
print NS "# Auto generated by the pool daemon\n\n";
print NS "source tb_compat.tcl\n";
......@@ -311,7 +339,7 @@ while (1) {
chmod(0775, $tmpfile);
exit(0)
if ($impotent);
if ($impotent || $killme);
# Must do this each time before fork.
tblog_new_session();
......@@ -335,7 +363,7 @@ while (1) {
if ($error_data->{'cause'} eq "temp") {
print "Temporary resource shortage; try again later\n";
next;
goto loop;
}
fatal("swapmod failed");
}
......@@ -348,6 +376,12 @@ while (1) {
exec("$SWAPEXP -q -w -n -s modify $pid $eid $tmpfile");
die("Could not exec $SWAPEXP\n");
}
loop:
DBQueryWarn("select release_lock('pool_daemon')")
or fatal("Could not release the pool lock");
# Use a long period; we do not want the pool to change too fast.
sleep(120);
}
#
......
......@@ -199,6 +199,7 @@ if (!defined($experiment)) {
}
my $special = ($pid eq "testbed" || $pid eq "tbres" ||
$pid eq "emulab-ops" || $pid eq "utahstud");
my $newsetup = ($pid eq "testbed");
#
# Print starting message.
......@@ -1285,8 +1286,10 @@ sub doSwapin($) {
#
print "Resetting OS and rebooting.\n";
TBDebugTimeStamp("launching os_setup");
if (!($os_setup_pid = fork())) {
exec("os_setup $pid $eid") or return 1;
if (!($os_setup_pid = fork())) {
my $oscmd = ($newsetup ? "os_setup_new" : "os_setup");
exec("$oscmd $pid $eid") or return 1;
} elsif ($os_setup_pid == -1) {
tberror "Fork failed.";
return 1;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment