Commit 263e2cb3 authored by Leigh Stoller's avatar Leigh Stoller

Some changes for Mike and Firewalled ElabInElab experiments. I need

all of the nodes to boot up normally before I can turn them into an
inner elab (latter, after os_setup). That cannot happen with the
firewall rules in place. So, when an experiment is firewalled, reorder
the boot/wait list and wait for the firewall node first. Once that
hits ISUP, tell the elabinelab code (-f) option so that it can do what
it needs, which right now means an ssh over to the firewall node to
temporarily disable all the rules.

We still need to deal with teardown though.
parent 834351f3
......@@ -16,13 +16,14 @@ use Getopt::Std;
sub usage()
{
print STDOUT "Usage: elabinelab [-d] [-g] pid eid\n";
print STDOUT " elabinelab [-d] [-k] pid eid\n";
print STDOUT " elabinelab [-d] [-k | -f] pid eid\n";
exit(-1);
}
my $optlist = "dgk";
my $optlist = "dgkf";
my $debug = 1;
my $killmode = 0;
my $fwboot = 0;
my $dbgooonly= 0;
#
......@@ -100,6 +101,9 @@ if (defined($options{"d"})) {
if (defined($options{"k"})) {
$killmode = 1;
}
if (defined($options{"f"})) {
$fwboot = 1;
}
if (! @ARGV) {
usage();
}
......@@ -156,6 +160,23 @@ exit(0)
my $firewall;
my $firewalled = TBExptFirewall($pid, $eid, \$firewall);
#
# Presetup; turn off firewall.
#
if ($fwboot) {
exit(0)
if (!$firewalled);
print "Turning off firewall rules on $firewall\n";
$UID = 0;
system("$SSH -host $firewall ipfw add 1 allow all from any to any");
if ($?) {
die("*** $0:\n".
" Error turning off firewall rules ($firewall)!\n");
}
exit(0);
}
#
# If we are going to start an inner experiment, grab the stuff we need
# from the DB and save it.
......@@ -226,21 +247,13 @@ exit(0)
if ($dbgooonly);
#
# For SSH below
# For SSH and SCP below
#
$UID = 0;
#
# If firewalled, turn off the firewall during the setup.
#
if ($firewalled) {
print "Turning off firewall rules on $firewall\n";
system("$SSH -host $firewall ipfw add 1 allow all from any to any");
if ($?) {
die("*** $0:\n".
" Error turning off firewall rules ($firewall)!\n");
}
}
# The firewall should be off at this point; called from os_setup with -f.
#
#
# This is temporary. I think I will switch this over to grabbing the latest
......@@ -406,7 +419,12 @@ if (defined($elabinelab_eid)) {
}
#
# Turn the firewall back on.
# Turn the firewall back on.
#
# XXX If this fails, we have to do something much stronger! We do not want
# nodes coming up and starting something if the firewall is not active.
# Maybe hit the panic button from here (turning off the control network).
#
#
if ($firewalled) {
print "Turning firewall back on\n";
......
......@@ -2,7 +2,7 @@
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
# Copyright (c) 2000-2005 University of Utah and the Flux Group.
# All rights reserved.
#
use English;
......@@ -65,6 +65,7 @@ TBDebugTimeStampsOn();
my $vnode_setup = "$TB/sbin/vnode_setup";
my $osselect = "$TB/bin/os_select";
my $nodereboot = "$TB/bin/node_reboot";
my $elab_setup = "$TB/sbin/elabinelab";
my $dbg = 0;
my $failed = 0;
my $noretry = 0;
......@@ -164,6 +165,22 @@ my $user_email_to = "$user_name <$user_email>";
TBDebugTimeStamp("os_setup started");
#
# See if the experiment is firewalled
#
my $firewall;
my $firewalled = TBExptFirewall($pid, $eid, \$firewall);
#
# Ditto ElabinElab.
#
my $elabinelab;
if (! TBExptIsElabInElab($pid, $eid, \$elabinelab)) {
die("*** $0:\n".
" Could not get elabinelab status for experiment $pid/$eid\n");
}
#
# Get the set of nodes, as well as the nodes table information for them.
#
......@@ -616,10 +633,25 @@ TBDebugTimeStamp("rebooting/reloading finished");
# above? So that they enter a reasonably known state before we try to tear
# things down. Otherwise we could end up power cycling nodes a lot more often.
# This should probably be handled in other ways, say via stated or the alloc
# state machine.
# state machine.
#
my @nodelist = keys(%nodes);
#
# Firewall stuff. If there is a firewall, we want to wait for that node
# first, so reorder the list.
#
if ($firewalled) {
my @tmp = ();
foreach my $node (@nodelist) {
push(@tmp, $node)
if ($node ne $firewall);
}
unshift(@tmp, $firewall);
@nodelist = @tmp;
}
#
# Now lets wait for them to come back alive. Set up a retry list though
# so that we can give each node at least 1 second chance. Avoids pointless
......@@ -652,10 +684,9 @@ while ( @nodelist ) {
if (!TBNodeStateWait($node, $wstart, (60*7), \$actual_state,
(TBDB_NODESTATE_TBFAILED, TBDB_NODESTATE_ISUP))) {
print "state is $actual_state\n";
if ($actual_state eq TBDB_NODESTATE_TBFAILED) {
print "*** WARNING: $node reported a TBFAILED event; not retrying\n";
print "*** WARNING: $node reported a TBFAILED event; ".
"not retrying\n";
$retries{$node} = 0;
goto tbfailed;
}
......@@ -663,6 +694,17 @@ while ( @nodelist ) {
SetNodeBootStatus($node, NODEBOOTSTATUS_OKAY);
TBSetNodeAllocState( $node, TBDB_ALLOCSTATE_RES_READY() );
$nodeAllocStates{$node} = TBDB_ALLOCSTATE_RES_READY();
#
# Firewall has booted.
#
if ($firewalled && $node eq $firewall) {
if (!FirewallBoot()) {
print "*** WARNING: Firewall Boot Setup failed!\n";
$failed++;
$noretry = 1;
}
}
next;
}
......@@ -1051,3 +1093,28 @@ sub ForkCmd($) {
system($cmd);
exit($? >> 8);
}
#
# A firewall has booted up. Might need to do something.
#
sub FirewallBoot()
{
#
# The only case that currently matters is if the experiment is
# elabinelab. In this case we want to turn off the firewall so
# the nodes can boot/reload normally. Later, after we set up the
# inner elab, we turn the firewall back on.
#
return 1
if (!$elabinelab);
#
# We use the elabinelab program to do this, since it knows what it
# might want to do (and helpfully, is setuid so it can ssh over).
#
system("$elab_setup -f $pid $eid");
return 0
if ($?);
return 1;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment