Commit 53fc7653 authored by Mike Hibler's avatar Mike Hibler

Merge remote branch 'central/master'

parents e19b2a93 8da2e96a
......@@ -18,60 +18,6 @@ Note that some instructions may have steps that need to occur at a few
different points in the install process - these are marked with the
earliest time one of the steps needs to occur.
20100507: After install
Minor change to apache.conf, so you need to install that and
restart apache.
boss> cd /your/obj/tree/apache
boss> gmake
To be safe, you should backup your existing config file, and
then install the new one.
boss> cp /usr/local/etc/apache/httpd.conf ~/httpd.conf
boss> sudo gmake install
And restart apache if its currently running.
boss> sudo /usr/local/etc/rc.d/apache.sh restart
20100108: After install:
Change to how stated does it logging. In /etc/newsyslog.conf,
find the line for stated, and append this to the end of that line.
/var/run/stated.pid 31
then restart stated:
sudo kill -USR1 `cat /var/run/stated.pid`
20091228: After install:
New daemon to better manage the transition of nodes into
hwdown after a failure. In /etc/newsyslog.conf, add this line:
/usr/testbed/log/checknodes.log 640 14 300 * Z /var/run/checknodes.pid
Then start this daemon:
boss> sudo /usr/testbed/sbin/checknodes_daemon
Also install new rc.d files on boss.
Note that this is not done as part of toplevel install.
cd /your/object/tree/obj/rc.d
sudo gmake install
20091103: After install:
The structure of /usr/testbed/expinfo has changed. With the
testbed "shutdown" (web interface locked out), run the
following script:
boss> sudo /usr/testbed/sbin/fixexpinfo
20091102: After install:
Some utilities may make use of location information on
......
......@@ -335,8 +335,13 @@ sub Create($$$$;$)
# Insert the sshkey if we got one.
if (defined($keys)) {
foreach my $keyref (@{ $keys }) {
my $safe_key = DBQuoteSpecial($keyref->{'key'});
my $safe_type = DBQuoteSpecial($keyref->{'type'});
my $key = $keyref->{'key'};
my $type = $keyref->{'type'};
chomp($key);
chomp($type);
my $safe_key = DBQuoteSpecial($key);
my $safe_type = DBQuoteSpecial($type);
DBQueryWarn("replace into geni_userkeys set ".
" uuid=$safe_uuid, created=now(), ".
......@@ -380,8 +385,13 @@ sub Modify($$$$)
if (!DBQueryWarn("delete from geni_userkeys where uuid='$uuid'"));
foreach my $keyref (@{ $keys }) {
my $safe_key = DBQuoteSpecial($keyref->{'key'});
my $safe_type = DBQuoteSpecial($keyref->{'type'});
my $key = $keyref->{'key'};
my $type = $keyref->{'type'};
chomp($key);
chomp($type);
my $safe_key = DBQuoteSpecial($key);
my $safe_type = DBQuoteSpecial($type);
DBQueryWarn("replace into geni_userkeys set ".
" uuid='$uuid', created=now(), ".
......
......@@ -68,6 +68,7 @@ my $quiet = 0;
my $clear = 0;
my $warnings = 0;
my $maxrun = 3; # Maximum number of times we run assign.
my $gotlock = 0;
my $vtop;
#
......@@ -284,8 +285,6 @@ sub AssignLoop()
TBDebugTimeStamp("mapper loop started");
while (1) {
my $gotlock = 0;
chat("Assign run $currentrun\n");
my $prefix = ($debug || $regression ? "$pid-$eid" : "$pid-$eid-$$");
......@@ -301,23 +300,24 @@ sub AssignLoop()
#
# Serialize with the pool daemon if using shared nodes.
# XXX When using shared nodes, only one can proceed at a
# time through assignment. This is okay for now since few
# experiments are using shared nodes. Eventually needs to be
# a barrier.
#
if (0 && (!($impotent || $regression)) && $vtop->sharednodecount()) {
if ((!($impotent || $regression)) && $vtop->sharednodecount()) {
while (1) {
#
# Use a countup/countdown counter, so that multiple mappers
# can run, but not while the pool_daemon is running.
#
my $lock_result =
DBQueryWarn("select get_lock('pool_daemon', 10)");
fatal("DB Error tring to get pool_daemon lock")
if (!defined($lock_result));
DBQueryFatal("update emulab_locks set value=value+1 ".
"where name='pool_daemon' and value>=0");
$gotlock = $lock_result->affectedrows;
($gotlock) = $lock_result->fetchrow_array();
last
if ($gotlock);
chat("Waiting for pool daemon lock ...\n");
sleep(10);
}
}
......@@ -330,8 +330,9 @@ sub AssignLoop()
my $retval = RunAssign($precheck, $prefix);
if ($gotlock) {
DBQueryWarn("select release_lock('pool_daemon')")
or fatal("Could not release the pool lock");
DBQueryFatal("update emulab_locks set value=value-1 ".
"where name='pool_daemon'");
$gotlock = 0;
}
# Success!
......@@ -742,6 +743,15 @@ END {
# Watch for getting here cause of a die()/exit() statement someplace.
my $exitcode = $?;
#
# Do not want to leave this around, it will lock the pool daemon out.
#
if ($gotlock) {
DBQueryFatal("update emulab_locks set value=value-1 ".
"where name='pool_daemon'");
$gotlock = 0;
}
if ($exitcode && $exitcode != $WRAPPER_FAILED) {
$exitcode = $WRAPPER_FAILED|$WRAPPER_FAILED_FATALLY;
}
......
......@@ -24,14 +24,8 @@ my $debug = 0;
my $impotent = 0;
my $killme = 0;
my $nofree = 1;
#
# This should run as root.
#
if ($UID != 0) {
die("*** $0:\n".
" Only root can run this script!\n");
}
my $gotlock = 0;
my $mailsent = 0;
#
# Configure variables
......@@ -54,6 +48,14 @@ use User;
use OSinfo;
use Image;
#
# This should run as root.
#
if ($UID != 0) {
die("*** $0:\n".
" Only root can run this script!\n");
}
# We use tblog to determine why swapexp failed.
tblog_stop_capture();
......@@ -144,11 +146,6 @@ my $eid = $experiment->eid();
if ($experiment->state() eq EXPTSTATE_NEW()) {
$experiment->SetState(EXPTSTATE_SWAPPED());
}
if ($experiment->state() eq EXPTSTATE_SWAPPED()) {
print STDERR "Pool Daemon exiting since the experiment is swapped\n";
cleanup();
exit(0);
}
#
# We need this user for running swapexp below.
......@@ -190,9 +187,46 @@ while (!$killme) {
goto loop;
}
#
# Serialize this part with the mapper.
#
if (!$impotent) {
my $tries = 0;
while (1) {
#
# Use a countup/countdown counter, so that multiple mappers
# can run, but not while the pool_daemon is running.
#
my $lock_result =
DBQueryWarn("update emulab_locks set value=-1 ".
"where name='pool_daemon' and value=0");
fatal("DB Error going for lock")
if (!defined($lock_result));
$gotlock = $lock_result->affectedrows;
last
if ($gotlock);
if ($tries++ > 100) {
notify("Cannot get the lock after a really long time");
$tries = 0;
}
chat("Waiting for pool daemon lock ...\n");
sleep(10);
}
}
Node->FlushAll();
$experiment->Refresh() == 0
or fatal("Could not reload $experiment");
if ($experiment->state() eq EXPTSTATE_SWAPPED()) {
print "Skipping this loop cause the experiment is swapped\n";
goto loop;
}
my @nodelist = $experiment->NodeList();
my %inuse = ();
my %tofree = ();
......@@ -205,24 +239,6 @@ while (!$killme) {
my $minpoolsize = TBGetSiteVar("general/minpoolsize");
my $poolnodetype = TBGetSiteVar("general/poolnodetype");
#
# Serialize this part with the mapper.
#
if (!$impotent) {
while (1) {
my $lock_result =
DBQueryWarn("select get_lock('pool_daemon', 5)");
fatal("DB Error tring to get pool_daemon lock")
if (!defined($lock_result));
my ($gotlock) = $lock_result->fetchrow_array();
last
if ($gotlock);
print "Waiting for pool daemon lock ...\n";
}
}
#
# Look to see how each of the nodes is packed. This is
# advisory; we will not know for sure until tables locked
......@@ -247,10 +263,11 @@ while (!$killme) {
next
if ($vnodecount < 0);
if ($vnodecount == 0 && !$nofree) {
if ($vnodecount == 0) {
print "$node no longer has virtual nodes on it.\n";
# Free the node unless we would go below the minpoolsize.
if (scalar(@nodelist) - scalar(keys(%tofree)) > $minpoolsize) {
if (!$nofree &&
scalar(@nodelist) - scalar(keys(%tofree)) > $minpoolsize) {
print " Adding to free list.\n";
$tofree{$node->node_id()} = $node;
}
......@@ -280,10 +297,12 @@ while (!$killme) {
$newcount++;
}
}
if (! (keys(%tofree) || $newcount)) {
exit(0)
if ($impotent);
goto loop;
if (!$debug) {
if (! (keys(%tofree) || $newcount)) {
exit(0)
if ($impotent);
goto loop;
}
}
#
......@@ -338,7 +357,7 @@ while (!$killme) {
close(NS);
chmod(0775, $tmpfile);
exit(0)
last
if ($impotent || $killme);
# Must do this each time before fork.
......@@ -377,12 +396,21 @@ while (!$killme) {
die("Could not exec $SWAPEXP\n");
}
loop:
DBQueryWarn("select release_lock('pool_daemon')")
or fatal("Could not release the pool lock");
if ($gotlock) {
my $lock_result =
DBQueryWarn("update emulab_locks set value=0 ".
"where name='pool_daemon'");
fatal("DB Error releasing lock")
if (!defined($lock_result));
$gotlock = 0;
}
# Use a long period; we do not want the pool to change too fast.
sleep(120);
}
cleanup();
exit(0);
#
# Subscribe to experiment state change events.
......@@ -431,6 +459,7 @@ sub fatal($)
my ($msg) = @_;
SENDMAIL($TBOPS, "Pool Daemon Died", $msg, $TBOPS);
$mailsent = 1;
cleanup();
die($msg);
}
......@@ -445,6 +474,24 @@ sub notify($)
sub cleanup()
{
if ($gotlock) {
DBQueryWarn("update emulab_locks set value=0 ".
"where name='pool_daemon'");
$gotlock = 0;
}
MarkDaemonStopped("pool_daemon")
if (!$impotent);
}
END {
my $exitcode = $?;
if ($exitcode && !$mailsent) {
SENDMAIL($TBOPS, "Pool Daemon Died",
"Please look at $logfile", $TBOPS);
}
cleanup();
$? = $exitcode;
}
......@@ -658,7 +658,7 @@ sub createVlan($$$) {
my $RetVal = $self->hammer($closure, "$id: creation");
if (!defined($RetVal)) { return 0; }
print " Creating VLAN $vlan_id as VLAN #$vlan_number on " .
"$self->{NAME} ... ";
"$self->{NAME} ...\n";
# You'ld think you'ld be able to add IgmpSnoopEnable to the above as one
......
......@@ -29,6 +29,7 @@ my $SFSUSERS = "/etc/sfs/sfs_users";
my $SFSHOSTKEY = "/etc/sfs/sfs_host_key";
my $VARACCTDIR = "/var/account";
my $IFTAB = "/etc/iftab";
my $ANACRON = "/usr/sbin/anacron";
#
# Dead wood in $BINDIR
......@@ -68,6 +69,7 @@ if (getopts("N", \%options)) {
$noumount = 1;
}
}
my $isvm = ((-e "$ETCDIR/genvmtype") ? 1 : 0);
#
# First clean up the node as it would be if free.
......@@ -90,7 +92,7 @@ foreach my $dbfile (@DBFILES) {
# This allows us to clean up /users and /proj. We only do the cleanup if
# if unmount succeeds and even then, we do it in a safe way.
#
if (!$noumount && -e "/share/freebsd") {
if (!($noumount || $isvm) && -e "/share/freebsd") {
print "Unmounting NFS filesystems ...\n";
if (!system("umount -at nfs")) {
#
......@@ -113,19 +115,22 @@ if (!$noumount && -e "/share/freebsd") {
# to send mail will hang and eventually timeout. This makes things take a
# lot longer, so we hack and temporarily move sendmail while we run anacron!
#
if (-x "/usr/sbin/sendmail") {
my $didrename = 0;
print "Running cron jobs (might see failed attempts to mail output).\n";
if (rename("/usr/sbin/sendmail", "/usr/sbin/_sendmail")) {
$didrename = 1;
}
system("anacron -d -f -s -n");
if ($didrename) {
rename("/usr/sbin/_sendmail", "/usr/sbin/sendmail");
if (-x "$ANACRON") {
if (-x "/usr/sbin/sendmail") {
my $didrename = 0;
print "Running cron jobs ".
"(might see failed attempts to mail output).\n";
if (rename("/usr/sbin/sendmail", "/usr/sbin/_sendmail")) {
$didrename = 1;
}
system("$ANACRON -d -f -s -n");
if ($didrename) {
rename("/usr/sbin/_sendmail", "/usr/sbin/sendmail");
}
} else {
print "Running cron jobs.\n";
system("$ANACRON -d -f -s -n");
}
} else {
print "Running cron jobs.\n";
system("anacron -d -f -s -n");
}
#
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment