Commit dd3b8989 authored by Leigh B. Stoller's avatar Leigh B. Stoller
Browse files

* After rebooting the inner nodes, ssh into the inner boss and run

  utility script to wait for them to reboot and reach PXEWAIT. This
  indicates inner emulab is raelly ready

* When an inner experiment is defined (elabinelab_eid in experiments
  table) fire that experiment off by doing an ssh into inner boss. I
  am currently doing this with -w (wait mode) but eventually will need
  to do it async for experiments in which the control net is turned
  off. Also, not actually swapping experiment in yet since multicast
  and frisbee are still broken inside.

* Add -k mode for cleaning up. The intent of this is to avoid power
  cycling all the nodes cause outer elab cannot reboot or ipod them.
  Goes like this:

  * Clear the inner_elab_role for experiment's nodes from the reserved
    table.

  * Clear def_boot_osid,next_boot_osid,temp_boot_osid for nodes. This
    is bogus cause os_select whines about doing this, but the point is
    to make sure that all nodes will go into PXEWAIT when they reboot.
    We could have them go into MFS, but thats bound to cause problems
    if inner elab has a lot of nodes (remember, cannot trust what is
    on disk). This needs more thought.

  * Regen and restart outer dhcpd. Nodes will become part of outer
    emulab on next boot cycle.

  * SSH into inner boss and kill inner DHCPD so that there will not be
    any DHCPD responses on inner control network.

  * SSH into inner boss and have it reboot all inner nodes.

  * Wait for node to reach PXEWAIT.

  The above needs more thought wrt firewalled experiments and isolated
  control network.

* Kill off some old MFS copy code since we now get those direct from
  website.
parent f336fe42
......@@ -16,11 +16,13 @@ use Getopt::Std;
sub usage()
{
print STDOUT "Usage: elabinelab [-d] [-g] pid eid\n";
print STDOUT " elabinelab [-d] [-k] pid eid\n";
exit(-1);
}
my $optlist = "dg";
my $optlist = "dgk";
my $debug = 1;
my $killmode = 0;
my $dbgooonly= 0;
#
......@@ -33,6 +35,14 @@ my $TBOPSPID = TBOPSPID();
my $SSH = "$TB/bin/sshtb";
my $nodereboot = "$TB/bin/node_reboot";
my $makeconf = "$TB/sbin/dhcpd_makeconf";
my $nodewait = "$TB/sbin/node_statewait";
# Locals
my $elabinelab;
my $elabinelab_eid;
# Protos
sub TearDownEmulab();
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:/usr/site/bin';
......@@ -56,6 +66,9 @@ use lib "@prefix@/lib";
use libdb;
use libtestbed;
# Be careful not to exit on transient error
$libdb::DBQUERY_MAXTRIES = 30;
# Locals
my $PROJROOT = PROJROOT();
my $SAVEUID = $UID;
......@@ -67,6 +80,8 @@ my @expnodes = ();
my $dbuid;
my $user_name;
my $user_email;
my $query_result;
my $exptinfo;
#
# Parse command arguments. Once we return from getopts, all that should
......@@ -82,6 +97,9 @@ if (defined($options{"g"})) {
if (defined($options{"d"})) {
$debug = 1;
}
if (defined($options{"k"})) {
$killmode = 1;
}
if (! @ARGV) {
usage();
}
......@@ -121,8 +139,81 @@ if (! UserDBInfo($dbuid, \$user_name, \$user_email)) {
}
TBDebugTimeStampsOn();
CopyMFS()
if (0);
#
# Get elabinelab status to make sure, and to see if we need to fire off
# an experiment inside once its setup.
#
if (! TBExptIsElabInElab($pid, $eid, \$elabinelab, \$elabinelab_eid)) {
die("*** $0:\n".
" Could not get elabinelab status for experiment $pid/$eid\n");
}
exit(0)
if (!$elabinelab);
#
# If we are going to start an inner experiment, grab the stuff we need
# from the DB and save it.
#
if (defined($elabinelab_eid)) {
$query_result =
DBQueryFatal("select nsfile from nsfiles ".
"where pid='$pid' and eid='$elabinelab_eid'");
die("*** $0:\n".
" No such experiment in DB for $pid/$elabinelab_eid\n")
if (!$query_result->numrows);
my ($nsfile) = $query_result->fetchrow_array();
die("*** $0:\n".
" No nsfile in DB for $pid/$elabinelab_eid\n")
if (!defined($nsfile) || $nsfile eq "");
$query_result =
DBQueryFatal("select * from experiments ".
"where pid='$pid' and eid='$elabinelab_eid'");
die("*** $0:\n".
" No such experiment in DB for $pid/$elabinelab_eid\n")
if (!$query_result->numrows);
$exptinfo = $query_result->fetchrow_hashref();
$exptinfo->{"nsfile"} = $nsfile;
}
#
# Get the role for each node.
#
$query_result =
DBQueryFatal("select r.node_id,r.inner_elab_role from reserved as r ".
"where r.pid='$pid' and r.eid='$eid'");
while (my ($node_id,$role) = $query_result->fetchrow_array()) {
# Like, the firewall node.
next
if (!defined($role));
$noderoles{$node_id} = $role;
$bossnode = $node_id
if ($role eq 'boss');
$opsnode = $node_id
if ($role eq 'ops');
push(@expnodes, $node_id)
if ($role eq 'node');
}
#
# Tear down an inner emulab.
#
if ($killmode) {
exit(TearDownEmulab());
}
if (1) {
#
# Get elabinelab info. If this is a container for an actual experiment,
# then need to fire off the experiment once the inner emulab is ready to
# go.
#
TBDebugTimeStamp("Dumping DB state");
DumpDBGoo();
exit(0)
......@@ -156,7 +247,7 @@ if ($?) {
$UID = $SAVEUID;
SENDMAIL("$user_name <$user_email>",
"ElabInElab Failure: $pid/$eid",
"Error build the ops node ($opsnode)",
"Error building the ops node ($opsnode)",
$TBOPS,
"Cc: $TBOPS",
("/tmp/opsnode.$$"));
......@@ -172,7 +263,7 @@ if ($?) {
$UID = $SAVEUID;
SENDMAIL("$user_name <$user_email>",
"ElabInElab Failure: $pid/$eid",
"Error build the boss node ($bossnode)",
"Error building the boss node ($bossnode)",
$TBOPS,
"Cc: $TBOPS",
("/tmp/bossnode.$$"));
......@@ -190,7 +281,8 @@ SENDMAIL("$user_name <$user_email>",
"Cc: $TBOPS",
("/tmp/opsnode.$$", "/tmp/bossnode.$$"));
system("rm -f /tmp/opsnode.$$ /tmp/bossnode.$$");
$EUID = $UID = $SAVEUID;
$UID = $SAVEUID;
$EUID = $UID;
#
# Restart DHCPD before going into os_setup, since DHCPD must be ready
......@@ -204,6 +296,7 @@ if ($?) {
}
# Reboot ops and wait for it to come back.
print "Rebooting opsnode ($opsnode).\n";
TBDebugTimeStamp("Rebooting opsnode");
system("$nodereboot -w $opsnode");
if ($?) {
......@@ -211,6 +304,7 @@ if ($?) {
" Error rebooting the opsnode ($opsnode)!\n");
}
# Reboot boss and wait for it to come back.
print "Rebooting bossnode ($bossnode).\n";
TBDebugTimeStamp("Rebooting bossnode");
system("$nodereboot -w $bossnode");
if ($?) {
......@@ -221,34 +315,76 @@ if ($?) {
# Reboot the experimental nodes. They will come up inside the inner elab.
# DO NOT WAIT! They are not going to report ISUP from this point on.
if (@expnodes) {
print "Rebooting inner experimental nodes.\n";
TBDebugTimeStamp("Rebooting experimental nodes");
system("$nodereboot @expnodes");
if ($?) {
die("*** $0:\n".
" Error rebooting the expnodes (@expnodes)!\n");
}
#
# Instead, we ssh into the node and use a utility script to determine
# when the nodes have rebooted and are in PXEWAIT (part of the inner elab).
#
$EUID = 0;
$UID = 0;
print "Waiting for nodes to reboot and join the inner emulab.\n";
TBDebugTimeStamp("Waiting for inner nodes to reboot");
system("$SSH -host $bossnode /usr/testbed/sbin/node_statewait -t 180 -a");
if ($?) {
print STDERR "*** $0:\n".
" Error waiting for inner nodes to join!\n";
exit(($debug ? 0 : -1));
}
$UID = $SAVEUID;
$EUID = $UID;
}
}
TBDebugTimeStamp("ElabInElab setup done");
exit(0);
#
# Copy the necessary tftpboot files into the project directory so that
# the inner elab boss can get them later when it sets up.
#
sub CopyMFS()
{
system("tar cf - -C /tftpboot ".
" pxeboot.emu freebsd frisbee freebsd.newnode | ".
" gzip | $SSH -1 -F /dev/null -host $CONTROL ".
" '(cat > /$PROJROOT/$pid/exp/$eid/tftpboot.tar.gz)'");
# Fire off inner elab experiment.
#
if (defined($elabinelab_eid)) {
# Formatted to make batchexp happy.
my $nsfilename = "/tmp/$pid-$elabinelab_eid-$$.nsfile";
#
# Write NS file to temp file so we can send it over.
#
open(NS, "> /tmp/$$.ns")
or die("*** $0:\n".
" Could not write ns code to tmp file!\n");
print NS $exptinfo->{"nsfile"};
print NS "\n";
close(NS);
#
# Copy the file over.
#
$UID = 0;
system("cat /tmp/$$.ns | $SSH -host $bossnode '(cat > $nsfilename)'");
if ($?) {
die("*** $0:\n".
" Could not create tftpboot.tar.gz\n");
" Could not copy ns code to inner boss ($bossnode)!\n");
}
return 0;
#
# No run batchexp on the node as the user.
#
system("$SSH -host $bossnode 'sudo -u $dbuid /usr/testbed/bin/batchexp ".
" -q -i -w -f -S \"ElabInElab Experiment\" ".
" -L \"ElabInElab ElabInElab\" -E \"ElabInElab Experiment\" ".
" -p $pid -e $elabinelab_eid $nsfilename'");
$UID = $SAVEUID;
unlink("/tmp/$$.ns");
}
TBDebugTimeStamp("ElabInElab setup done");
exit(0);
#
# Dump parts of the DB that are needed for inner elab to run. The idea
# is to create a set of files named by the table name. Note that mysqld
......@@ -270,28 +406,6 @@ sub DumpDBGoo()
die("*** $0:\n".
" Could not chmod $statedir\n");
#
# Get the role for each node.
#
my $query_result =
DBQueryFatal("select r.node_id,v.inner_elab_role from reserved as r ".
"left join virt_nodes as v on v.vname=r.vname and ".
" v.pid=r.pid and v.eid=r.eid ".
"where r.pid='$pid' and r.eid='$eid'");
while (my ($node_id,$role) = $query_result->fetchrow_array()) {
# Like, the firewall node.
next
if (!defined($role));
$noderoles{$node_id} = $role;
$bossnode = $node_id
if ($role eq 'boss');
$opsnode = $node_id
if ($role eq 'ops');
push(@expnodes, $node_id)
if ($role eq 'node');
}
#
# These tables are dumped completely.
#
......@@ -432,7 +546,7 @@ sub DumpDBGoo()
# to resolve incorrectly to an inner control IP, which will not work
# from the pxeboot kernel since it uses the outer control network.
# Just remove the host spec; pxeboot will do the right thing.
$query_result =
my $query_result =
DBQueryFatal("select osid,path from temp_os_info ".
"where path like '%:%'");
......@@ -580,6 +694,102 @@ sub DumpDBGoo()
return 0;
}
#
# Tear down an inner Emulab as cleanly as possible to avoid power cycling nodes.
#
sub TearDownEmulab()
{
my $tbdir = "/usr/testbed";
my $wap = "$tbdir/sbin/withadminprivs";
my $nodereboot = "$tbdir/bin/node_reboot";
#
# We want to rebuild the DHCPD file so that when we reboot the inner nodes
# they come back to the outer emulab. We cannot just free the nodes, cause
# then the reload daemon might beat us to it, and end up power cycling the
# nodes, and that would be bad. So, munge the DB and clear the "role" slot
# for inner nodes.
#
DBQueryFatal("update reserved set inner_elab_role=NULL ".
"where pid='$pid' and eid='$eid'");
#
# XXX Failure at this point will leave things in an inconsistent state
# cause we have just munged the reserved table. Since we were trying
# to swap out the experiment, I think this will be okay. Wait and see.
#
return 0
if (!defined($bossnode));
#
# Now regen the DHCPD file.
#
$EUID = $UID;
print "Regenerating DHCPD config file and restarting daemon.\n";
system("$makeconf -i -r");
if ($?) {
die("*** $0:\n".
" Failed to reconfig/restart DHCPD.\n");
}
$EUID = 0;
#
# When the nodes reboot, we want them to do something reasonable. We
# have no idea what is loaded on the disk, so they should go into an
# MFS and wait, but then a bunch of nodes will all try to load the big
# MFS at once, and that could wreak havoc. So, clear the boot osids
# so they go into PXEWAIT. I could use os_select, but clearing all the
# OSIDs for a node is apparently a bad thing and generates warnings and
# emails. Why is that? So just clear the DB state until I figure out
# why that is.
#
DBQueryFatal("update nodes set ".
" def_boot_osid='',next_boot_osid='',temp_boot_osid='' ".
"where " . join(" or ", map("node_id='$_'", @expnodes)));
#
# SSH in and kill the inner DHCPD daemon so that it does not reply
# to rebooting nodes along the inner control network.
#
$UID = 0;
print "Killing DHCPD on inner boss ($bossnode)\n";
system("$SSH -host $bossnode /usr/local/etc/rc.d/2.dhcpd.sh stop");
if ($?) {
die("*** $0:\n".
" Could not stop DHCPD on inner bossnode ($bossnode)!\n");
}
#
# Now we ask inner boss to reboot all of the testnodes. Maybe need an
# option to node_reboot, but for now just pass them on the command line.
#
print "Asking inner boss ($bossnode) to reboot inner nodes\n";
system("$SSH -host $bossnode $wap $nodereboot -b @expnodes");
if ($?) {
#
# This error is non-fatal; Outer boss will just resort to power cycle.
#
print STDERR "*** $0:\n".
" Could not reboot some inner nodes!\n".
" Continuing anyway; outer boss will use power cycle.\n";
}
#
# Now we wait for them to reach PXEWAIT. Again, use our utility script
# instead of stated stuff.
#
$EUID = $UID;
print "Waiting for inner nodes to reach PXEWAIT\n";
system("$nodewait @expnodes");
if ($?) {
#
# This error is non-fatal; Outer boss will just resort to power cycle.
#
print STDERR "*** $0:\n".
" Some machines did not reboot properly!\n".
" Continuing anyway; outer boss will use power cycle.\n";
}
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment