Commit d08b5e41 authored by Leigh Stoller's avatar Leigh Stoller

Make Frisbee.Redux live:

* Add appropriate goo to os/GNUMakefile so that Frisbee daemon is
  built and installed.

* Rework the frisbee launcher slightly. Aside from little changes
  (send email to tbops when frisbeed dies, new cmdline syntax to
  frisbeed), allow for frisbeed to exit gracefully after a period of
  inactivity (no client requests for 30 minutes, at present). In order
  to prevent a race condition with a new client being added (and
  rebooted) and frisbeed terminating before the client gets started,
  add a load_busy indicator to the images table (next to load_address
  slot) and set that to one each time to frisbeelauncher is invoked.
  When frisbeed exits, test and clear that bit atomically (lock
  tables) and go around another time (restart frisbeed for another 30
  minute period).

* Rework waitmode in os_load. Wait for all of the nodes to finish at
  once, and track which nodes never finish. Retry those nodes again by
  rebooting. The number of retries is configurable in the script, and
  is currently set to one. This should take care of some PXE boot
  related problems, although obviously not all.

* Got rid of -w option to os_load and made waitmode the default. The
  -s option can be used to start a reload, but not to wait for it to
  complete.

* Minor changes to sched_reload and reload_daemon; pass in -s option
  to os_load.
parent 11db77e6
......@@ -8,7 +8,7 @@ SUBDIR = os
include $(OBJDIR)/Makeconf
SUBDIRS = imagezip
SUBDIRS = imagezip frisbee.redux
all: $(SUBDIRS) split-image.sh
......@@ -17,8 +17,12 @@ include $(TESTBED_SRCDIR)/GNUmakerules
imagezip:
@$(MAKE) -C imagezip all
frisbee.redux:
@$(MAKE) -C frisbee.redux all
install: $(INSTALL_SBINDIR)/split-image.sh
@$(MAKE) -C imagezip install
@$(MAKE) -C frisbee.redux install
control-install:
@$(MAKE) -C imagezip install
......@@ -33,6 +37,7 @@ clean: subdir-clean
subdir-clean:
@$(MAKE) -C imagezip clean
@$(MAKE) -C frisbee.redux clean
distclean: subdir-distclean
......
......@@ -5,6 +5,12 @@ use Sys::Syslog;
# Configure variables
my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
#
# Turn off line buffering on output
#
$| = 1;
use lib "@prefix@/lib";
use libdb;
......@@ -17,24 +23,36 @@ my $BASEPORT = "3564";
my $LOGFILE = "$TB/log/frisbeelauncher";
# Process command line options
getopts('nd',\%opt);
getopts('d',\%opt);
if (@ARGV != 1) {
exit &usage();
}
$imageid = shift @ARGV;
# Grab the filename to give to frisbee
my $filename = &get_filename($imageid);
# Make sure that the user can read the image file or bomb out right now.
if (! -r $filename) {
die("You do not have permission to read the image file for\n".
"imageid $imageid: $filename\n");
}
#
# Need to lock the tables here, since we are going to mess with the
# busy indicator.
#
&lock_tables;
# Try to discover if some other process is handling this address
$address = &get_address($imageid);
if ($address) {
&debug("A server (address $address) is already running for image $imageid\n");
if ($address && &keepbusy($imageid)) {
&unlock_tables;
&debug("A server ($address) is already running for image $imageid\n");
exit (0);
}
# We're going to pick an address from these tables, so we need to lock
# the tables to avoid race condidtions
&lock_tables;
# Pick an address: Die if unsucessful, set address and unlock if sucessful
$address = &pick_address;
&debug("Picked address $address\n");
......@@ -47,14 +65,6 @@ if (!$address) {
&set_address($imageid,$address);
&unlock_tables;
# Grab the filename to give to frisbee
my $filename = &get_filename($imageid);
# Make sure that the user can read the image file or bomb out right now.
if (! -r $filename) {
die "You don't have permission to read the image file for imageid $imageid: $filename\n";
}
# Run in the background
if (TBBackGround($LOGFILE)) {
exit(0);
......@@ -66,33 +76,55 @@ $SIG{HUP} = $SIG{INT} = $SIG{TERM} = \&cleanup;
# Now, we actually launch Frisbee
while (1) {
#
# Each time the server exits, test the busy bit to see if
# it should keep going. This has to be done with tables locked
# since another caller is going to bump it.
#
&lock_tables();
if (! &testbusy($imageid)) {
last;
}
&unlock_tables();
if ($child_pid = fork()) {
# Wait for child to exit
wait();
if (!$?) {
# Proccess exited normally, so we can exit
&debug("Frisbee exited normally\n");
last;
} else {
print STDERR "$$: Frisbee died abnormally, with return " .
"value $? - restarting\n";
waitpid($child_pid, 0);
if ($?) {
SENDMAIL($TBOPS, "TESTBED: Frisbeed Failed!",
"Imageid: $imageid\n".
"Address: $address\n\n".
"Process $child_pid exited with value $?.\n".
"Please look at the syslog for frisbeed!\n\n".
"NOTE: Another frisbeed will not start!\n");
#
# Dump early. This will leave the address in
# in the DB, so that another one will not start
# until the matter is resolved by someone.
#
exit(1);
}
} else {
}
else {
# Child branch
# The database format for address is host:port - however, we need
# to give them as seperate arguments to frisbeed
$address =~ s/:/ /g;
if (!exec("$FRISBEED $filename $address")) {
# Hmmm. We'll exit(0), so that the parent doesn't try to
# restart us
print STDERR "$$: Unable to exec $FRISBEED\n";
exit(0);
# The database format for address is host:port - however,
# we need to give them as seperate arguments to frisbeed.
if ($address =~ /(.*):(.*)/) {
my $addr = $1;
my $port = $2;
if (!exec("$FRISBEED -m $addr -p $port $filename")) {
die("$$: Unable to exec $FRISBEED\n");
}
}
die("$$: Bad address format: $address.\n");
}
}
&clear_address;
&unlock_tables();
exit(0);
######################################################################
......@@ -101,8 +133,7 @@ exit(0);
# Print out a usage mesage
sub usage {
print "Usage: $0 [-n] [-d] IMAGEID\n";
print "-n: Don't kill server when idle\n";
print "Usage: $0 [-d] IMAGEID\n";
print "-d: Print debugging output\n";
}
......@@ -115,10 +146,10 @@ sub debug {
# Grab the address for the passed-in imageid
sub get_address {
my ($imageid) = @_;
my $image_query = "SELECT imageid,load_address FROM images WHERE " .
"imageid='$imageid'";
my $sth = DBQueryFatal($image_query);
my $sth =
DBQueryFatal("SELECT imageid,load_address ".
"FROM images WHERE imageid='$imageid'");
my @row = $sth->fetchrow;
if (!@row) {
......@@ -196,9 +227,39 @@ sub pick_address {
# Pass in an imageid, and an address
sub set_address {
my ($imageid,$address) = @_;
my $address_update = "UPDATE images SET load_address='$address' " .
"WHERE imageid='$imageid'";
DBQueryFatal($address_update);
DBQueryFatal("UPDATE images SET load_address='$address',load_busy=1 " .
"WHERE imageid='$imageid'");
}
# Bump the busy indicator to keep the frisbeed going.
sub keepbusy($imageid) {
my ($imageid) = @_;
DBQueryFatal("UPDATE images SET load_busy=GREATEST(load_busy,1) " .
"WHERE imageid='$imageid'");
return 1;
}
# Test the busy indicator, and set to zero.
sub testbusy($imageid) {
my ($imageid) = @_;
my $query_result =
DBQueryFatal("select load_busy from images ".
"WHERE imageid='$imageid'");
my @row = $query_result->fetchrow;
if (!@row) {
return 0;
}
if ($row[0]) {
DBQueryFatal("UPDATE images SET load_busy=0 ".
"WHERE imageid='$imageid'");
}
return $row[0];
}
# Kill off our child process, if started, and clear out registered address
......@@ -212,11 +273,11 @@ sub cleanup {
exit(1);
}
# Clear out the address registered to this process
# Clear out the address (and pid) registered to this process
sub clear_address {
&debug("Clearing out registered load_address\n");
# Now, clear out the load_address we had set up
my $address_clear = "UPDATE images SET load_address='' " .
my $address_clear = "UPDATE images SET load_address='',load_busy=0 " .
"WHERE imageid='$imageid'";
DBQueryFatal($address_clear);
}
......@@ -14,18 +14,19 @@ use Getopt::Std;
#
sub usage()
{
print STDOUT "Usage: os_load [-s | -w] [-r | -n] [-i <imageid>] ".
"<node> [node ...]\n".
print STDOUT
"Usage: os_load [-s] [-r | -n] [-i <imageid>] <node> [node ...]\n".
" os_load [-s] [-r | -n] [-i <imageid>] -e pid,eid\n".
" os_load -l\n".
"Use -i to specify an imageid. Use node default otherwise.\n".
"Use -s to setup reload only, but do not issue a reboot.\n".
"Use -w to block waiting for nodes to finish reloading.\n".
" (-s and -w are mutually exclusive)\n".
"Use -s to start reload, but do not wait for it to complete.\n".
"Use -e to reboot all the nodes in an experiment\n" .
"Use -l to get a list of images you are permitted to load.\n" .
"Use -r to use Frisbee to reload disks.\n" .
"Use -n to use netdisk to reload disks.\n";
exit(-1);
}
my $optlist = "sldwrni:";
my $optlist = "sldrni:e:";
#
# Configure variables
......@@ -36,12 +37,17 @@ my $TBOPS = "@TBOPSEMAIL@";
my $BOSSADDR = "@BOSSNODE@";
my $USERADDR = "@USERNODE@";
#
# Max number of simultaneous loads. Will be better with Frisbee.
#
my $MAXLOADS = 2;
#
# Max number of retries (per node) before its deemed fatal. This allows
# for the occasional pxeboot failure.
#
my $MAXRETRIES = 1;
#
# Load the Testbed support stuff.
#
......@@ -61,12 +67,13 @@ my $usedefault = 1;
my $imageid;
my %imageid_row;
my @nodes = ();
my %retries = ();
my $mereuser = 0;
my $setuponly = 0;
my $waitmode = 0;
my $waitmode = 1;
my $failures = 0;
my $startwait = 0;
my $type = TB_DEFAULT_RELOADTYPE;
my $cmdline = "";
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';
......@@ -86,14 +93,8 @@ if (defined($options{"l"})) {
dolisting();
exit(0);
}
if (@ARGV < 1) {
usage();
}
if (defined($options{"s"})) {
$setuponly = 1;
}
if (defined($options{"w"})) {
$waitmode = 1;
$waitmode = 0;
}
if (defined($options{"r"})) {
if (defined($options{"n"})) {
......@@ -104,9 +105,6 @@ if (defined($options{"r"})) {
if (defined($options{"n"})) {
$type = TB_RELOADTYPE_NETDISK;
}
if ($waitmode && $setuponly) {
usage();
}
if (defined($options{"i"})) {
$imageid = $options{"i"};
$usedefault = 0;
......@@ -118,19 +116,40 @@ if (defined($options{"i"})) {
die("*** Bad $imageid name.\n");
}
}
#
# Untaint nodes.
#
foreach my $node ( @ARGV ) {
if ($node =~ /^([-\@\w]+)$/) {
$node = $1;
if (defined($options{"e"})) {
if (@ARGV) {
usage();
}
my $pideid = $options{"e"};
if ($pideid =~ /([-\w]*),([-\w]*)/) {
if (! (@nodes = ExpNodes($1, $2))) {
die("*** $0:\n".
" There are no nodes in $1/$2!\n");
}
}
else {
die("*** Bad node name: $node.\n");
die("Invalid argument to -e option: $pideid\n");
usage();
}
}
else {
if (! @ARGV) {
usage();
}
#
# Untaint nodes.
#
foreach my $node ( @ARGV ) {
if ($node =~ /^([-\@\w]+)$/) {
$node = $1;
}
else {
die("*** Bad node name: $node.\n");
}
push(@nodes, $node);
push(@nodes, $node);
}
}
#
......@@ -205,7 +224,6 @@ foreach my $node (@nodes) {
# For now, all testbed default images come from paper and all pid specific
# images come from plastic:/proj.
#
my $cmdline = "";
if (defined($imageid_row{'pid'})) {
if (! ($imagepath =~ /^\/proj\//)) {
die("*** $0:\n".
......@@ -271,27 +289,14 @@ foreach my $node (@nodes) {
print STDOUT "Setting up reload for $node\n";
if (!$TESTMODE) {
if ($type eq TB_RELOADTYPE_FRISBEE) {
DBQueryFatal("update nodes set ".
"next_pxe_boot_path='$FRISBEEPATH'" .
"where node_id='$node'");
system "$FRISBEELAUNCHER $imageid" and
die "*** Unable to launch frisbee daemon\n";
} elsif ($type eq TB_RELOADTYPE_NETDISK) {
DBQueryFatal("update nodes set ".
"next_boot_osid='$NETDISKOSID',".
"next_boot_cmd_line='$cmdline' ".
"where node_id='$node'");
} else {
die "Unknown reload type ($type)\n";
}
SetupReload($node);
}
}
#
# Exit if not doing about actual reload.
#
if ($setuponly || $TESTMODE) {
if ($TESTMODE) {
print STDOUT "OS Reload (Setup/Testmode) Done!\n";
exit 0;
}
......@@ -311,8 +316,16 @@ if (! $waitmode) {
# Okay, in waitmode we do a couple at a time and wait for them to come
# back alive before proceeding to the next one.
#
# The retry vector is initialized to the number of retries we allow per
# node, afterwhich its a fatal error.
#
foreach my $node (@nodes) {
$retries{$node} = $MAXRETRIES;
}
while (@nodes) {
my @batch = ();
my @batch = ();
my @failed = ();
my $i;
#
......@@ -328,27 +341,141 @@ while (@nodes) {
@nodes = ();
}
print "Issuing reload/reboot for @batch and then waiting ...\n";
print "Issuing reboot for @batch and then waiting ...\n";
system("$nodereboot @batch");
$failures = $? >> 8;
if ($failures) {
if ($?) {
print "Reboot failed for (some of) @batch. Quitting!\n";
exit $failures;
exit ($? >> 8);
}
#
# Now wait for them.
#
$startwait = time;
foreach my $node (@batch) {
if (WaitTillReloadDone($node) == 0) {
print "$node appears have reloaded okay.\n";
next;
@failed = WaitTillReloadDone(@batch);
while (@failed) {
my $node = shift(@failed);
if ($retries{$node}) {
print "*** Trying $node again (resetting/rebooting) ...\n";
push(@nodes, $node);
# Possible race with reboot?
SetupReload($node);
# Retry until count hits zero.
$retries{$node} -= 1;
}
else {
print "*** $node failed too many times. Skipping!\n";
$failures++;
}
}
}
print "OS Reload Done! There were $failures failures!\n";
exit($failures);
#
# Wait for a reload to finish. We do this in a rather kludgey manner,
# by waiting for bootinfo to clear the DB state (next_boot_osid).
#
sub WaitTillReloadDone {
my (@nodes) = @_;
my %done = ();
my $count = @nodes;
my @failed = ();
print "$node may be down.\n".
"Please contact $TBOPS for assistance.\n";
#
# Seven minutes seems like a long time to wait, but it ain't!
#
my $maxwait = (60 * 7);
foreach my $node ( @nodes ) {
$done{$node} = 0;
}
print STDERR "Waiting for @nodes to finish reloading\n" if $dbg;
#
# Start a counter going, relative to the time we rebooted the first
# node.
#
my $waittime = 0;
my $minutes = 0;
while ($count) {
foreach my $node ( @nodes ) {
if (! $done{$node}) {
my ($query_result, @row);
if ($type eq TB_RELOADTYPE_FRISBEE) {
$query_result =
DBQueryFatal("SELECT next_pxe_boot_path FROM nodes ".
"where node_id='$node'");
}
else {
$query_result =
DBQueryFatal("SELECT next_boot_osid FROM nodes ".
"where node_id='$node'");
}
@row = $query_result->fetchrow_array();
if (! $row[0]) {
print STDERR "$node alive and well\n" if $dbg;
$count--;
$done{$node} = 1;
next;
}
$waittime = time - $startwait;
if ($waittime > $maxwait) {
my $t = (int ($waittime / 60));
print "*** $node appears wedged; ".
"its been $t minutes since it was rebooted.\n";
$count--;
$done{$node} = 1;
push(@failed, $node);
next;
}
if (int($waittime / 60) > $minutes) {
$minutes = int($waittime / 60);
print "Still waiting for $node - ".
"its been $minutes minute(s)\n";
}
}
}
sleep(5);
}
return @failed;
}
#
# Setup a reload. Note that type, imageid, and cmdline are global.
#
sub SetupReload($)
{
my ($node) = @_;
if ($type eq TB_RELOADTYPE_FRISBEE) {
DBQueryFatal("update nodes set ".
"next_pxe_boot_path='$FRISBEEPATH'" .
"where node_id='$node'");
system "$FRISBEELAUNCHER $imageid" and
die "*** Unable to launch frisbee daemon\n";
}
elsif ($type eq TB_RELOADTYPE_NETDISK) {
DBQueryFatal("update nodes set ".
"next_boot_osid='$NETDISKOSID',".
"next_boot_cmd_line='$cmdline' ".
"where node_id='$node'");
}
else {
die "*** Unknown reload type ($type)\n";
}
}
......@@ -381,58 +508,3 @@ sub dolisting()
printf "%-20s %s\n", $id, $desc;
}
}
#
# Wait for a reload to finish. We do this in a rather kludgey manner,
# by waiting for bootinfo to clear the DB state (next_boot_osid).
#
sub WaitTillReloadDone {
my ($pc) = @_;
print STDERR "Waiting for $pc to finish reloading\n" if $dbg;
#
# Seven minutes seems like a long time to wait, but it ain't!
#
my $maxwait = (60 * 7);
#
# Start a counter going, relative to the time we rebooted the first
# node.
#
my $waittime = 0;
my $minutes = 0;
while (1) {
my ($query_result, @row);
if ($type eq TB_RELOADTYPE_FRISBEE) {
$query_result =
DBQueryFatal("SELECT next_pxe_boot_path FROM nodes ".
"where node_id='$pc'");
} elsif ($type eq TB_RELOADTYPE_NETDISK) {
$query_result =
DBQueryFatal("SELECT next_boot_osid FROM nodes ".
"where node_id='$pc'");
}
@row = $query_result->fetchrow_array();
if (! $row[0]) {
print STDERR "$pc alive and well\n" if $dbg;
return 0;
}
$waittime = time - $startwait;
if ($waittime > $maxwait) {
print "$pc appears unresponsive; its been ",
(int ($waittime / 60))," minutes since reload started.\n";
return 1;
}
if (int($waittime / 60) > $minutes) {
$minutes = int($waittime / 60);
print "Still waiting for $pc - its been $minutes minute(s)\n";
}
sleep(5);
}
}
......@@ -50,7 +50,7 @@ my $RELOADPID = NODERELOADING_PID;
my $RELOADEID = NODERELOADING_EID;
my $PENDINGEID = NODERELOADPENDING_EID;
my $os_load = "$TB/bin/os_load";
my $os_load = "$TB/bin/os_load -s";
my $sched_reload= "$TB/sbin/sched_reload";
my $reboot = "$TB/bin/node_reboot";
my $logfile = "$TB/log/reloadlog";
......
......@@ -43,7 +43,7 @@ my $RELOADPID = NODERELOADING_PID;
my $RELOADEID = NODERELOADING_EID;
my $PENDINGEID = NODERELOADPENDING_EID;
my $osload = "$TB/bin/os_load";
my $osload = "$TB/bin/os_load -s";
my $nalloc = "$TB/bin/nalloc";
my $name = "";
my $error = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment