All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

Commit 6feda7d3 authored by Mike Hibler's avatar Mike Hibler

Changes for speeding up elabinelab server setup.

Boss/ops/fs: reboot them together after setup rather than serially.

Nodes: leave them in PXEWAIT throughout the setup, until after boss has
been rebooted.  At that point we send them the new bootinfo RESTART command
telling pxeboot to re-DHCP and use the new info obtained (next-server) to
contact a potentially new boss node.  This is a quick way to switch a node
in PXEWAIT from talking to the outer boss to talking to the inner one.

A significant number of rinky-dink changes were needed to do this, primarily
adding a new state, PXELIMBO, where nodes can be sent to sit until they are
restarted.  It turns out, just putting them in an existing state such as
PXEWAKEUP or SHUTDOWN wouldn't work, as they tend to timeout or otherwise
reboot.
parent 3e34e8fe
...@@ -247,6 +247,7 @@ if (1) { ...@@ -247,6 +247,7 @@ if (1) {
# Now symlink all the alternate boots to pxeboot.emu # Now symlink all the alternate boots to pxeboot.emu
# XXX we assume everything is at the top level of /tftpboot right now. # XXX we assume everything is at the top level of /tftpboot right now.
# #
$EUID = 0;
foreach my $boot (@bogoboots) { foreach my $boot (@bogoboots) {
if ($boot =~ /^\/tftpboot\/([^\/]+)$/) { if ($boot =~ /^\/tftpboot\/([^\/]+)$/) {
$boot = $1; $boot = $1;
...@@ -259,6 +260,7 @@ if (1) { ...@@ -259,6 +260,7 @@ if (1) {
} }
} }
} }
$EUID = $UID;
# #
# Remake the dhcpd.conf file to reflect any pxeboot change. # Remake the dhcpd.conf file to reflect any pxeboot change.
......
...@@ -102,7 +102,7 @@ use vars qw(@ISA @EXPORT); ...@@ -102,7 +102,7 @@ use vars qw(@ISA @EXPORT);
TBDB_NODESTATE_RELOADDONE TBDB_NODESTATE_RELOADDONE_V2 TBDB_NODESTATE_RELOADDONE TBDB_NODESTATE_RELOADDONE_V2
TBDB_NODESTATE_UNKNOWN TBDB_NODESTATE_UNKNOWN
TBDB_NODESTATE_PXEWAIT TBDB_NODESTATE_PXEWAKEUP TBDB_NODESTATE_PXEWAIT TBDB_NODESTATE_PXEWAKEUP
TBDB_NODESTATE_PXEFAILED TBDB_NODESTATE_PXEFAILED TBDB_NODESTATE_PXELIMBO
TBDB_NODESTATE_PXEBOOTING TBDB_NODESTATE_ALWAYSUP TBDB_NODESTATE_PXEBOOTING TBDB_NODESTATE_ALWAYSUP
TBDB_NODESTATE_MFSSETUP TBDB_NODESTATE_TBFAILED TBDB_NODESTATE_MFSSETUP TBDB_NODESTATE_TBFAILED
TBDB_NODESTATE_POWEROFF TBDB_NODESTATE_POWEROFF
...@@ -485,6 +485,7 @@ sub TBDB_NODESTATE_RELOADDONE() { "RELOADDONE"; } ...@@ -485,6 +485,7 @@ sub TBDB_NODESTATE_RELOADDONE() { "RELOADDONE"; }
sub TBDB_NODESTATE_RELOADDONE_V2(){ "RELOADDONEV2"; } sub TBDB_NODESTATE_RELOADDONE_V2(){ "RELOADDONEV2"; }
sub TBDB_NODESTATE_UNKNOWN() { "UNKNOWN"; }; sub TBDB_NODESTATE_UNKNOWN() { "UNKNOWN"; };
sub TBDB_NODESTATE_PXEWAIT() { "PXEWAIT"; } sub TBDB_NODESTATE_PXEWAIT() { "PXEWAIT"; }
sub TBDB_NODESTATE_PXELIMBO() { "PXELIMBO"; }
sub TBDB_NODESTATE_PXEWAKEUP() { "PXEWAKEUP"; } sub TBDB_NODESTATE_PXEWAKEUP() { "PXEWAKEUP"; }
sub TBDB_NODESTATE_PXEFAILED() { "PXEFAILED"; } sub TBDB_NODESTATE_PXEFAILED() { "PXEFAILED"; }
sub TBDB_NODESTATE_PXEBOOTING() { "PXEBOOTING"; } sub TBDB_NODESTATE_PXEBOOTING() { "PXEBOOTING"; }
...@@ -2748,13 +2749,15 @@ sub TBNodeStateWait ($$$$@) { ...@@ -2748,13 +2749,15 @@ sub TBNodeStateWait ($$$$@) {
$waittime = time - $waitstart; $waittime = time - $waitstart;
if ($waittime > $maxwait) { if ($waittime > $maxwait) {
$minutes = int($waittime / 60); $minutes = int($waittime / 60);
print "*** Giving up on $pc - it's been $minutes minute(s).\n"; print "*** Giving up on $pc ($state) - ",
"it's been $minutes minute(s).\n";
TBNodeConsoleTail($pc, *STDOUT); TBNodeConsoleTail($pc, *STDOUT);
return 1; return 1;
} }
if (int($waittime / 60) > $minutes) { if (int($waittime / 60) > $minutes) {
$minutes = int($waittime / 60); $minutes = int($waittime / 60);
print "Still waiting for $pc - it's been $minutes minute(s).\n"; print "Still waiting for $pc ($state) - ",
"it's been $minutes minute(s).\n";
} }
sleep(1); sleep(1);
} }
......
/* /*
* EMULAB-COPYRIGHT * EMULAB-COPYRIGHT
* Copyright (c) 2000-2004, 2006, 2007 University of Utah and the Flux Group. * Copyright (c) 2000-2010 University of Utah and the Flux Group.
* All rights reserved. * All rights reserved.
*/ */
...@@ -32,7 +32,8 @@ usage() ...@@ -32,7 +32,8 @@ usage()
"options:\n" "options:\n"
"-d - Turn on debugging\n" "-d - Turn on debugging\n"
"-q - Tell node to query bootinfo again\n" "-q - Tell node to query bootinfo again\n"
"-r - Tell node to reboot\n", "-r - Tell node to reboot\n"
"-R - Tell node to restart (re-DHCP to change server)\n",
progname); progname);
exit(-1); exit(-1);
} }
...@@ -40,7 +41,8 @@ usage() ...@@ -40,7 +41,8 @@ usage()
int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
int sock, err, c, reboot = 0, query = 0; int sock, err, c;
int reboot = 0, restart = 0, query = 0;
struct sockaddr_in name, target; struct sockaddr_in name, target;
boot_info_t boot_info; boot_info_t boot_info;
boot_what_t *boot_whatp = (boot_what_t *) &boot_info.data; boot_what_t *boot_whatp = (boot_what_t *) &boot_info.data;
...@@ -49,7 +51,7 @@ main(int argc, char **argv) ...@@ -49,7 +51,7 @@ main(int argc, char **argv)
progname = argv[0]; progname = argv[0];
while ((c = getopt(argc, argv, "dhvrq")) != -1) { while ((c = getopt(argc, argv, "dhvrRq")) != -1) {
switch (c) { switch (c) {
case 'd': case 'd':
debug++; debug++;
...@@ -57,6 +59,9 @@ main(int argc, char **argv) ...@@ -57,6 +59,9 @@ main(int argc, char **argv)
case 'r': case 'r':
reboot++; reboot++;
break; break;
case 'R':
restart++;
break;
case 'q': case 'q':
query++; query++;
break; break;
...@@ -75,7 +80,7 @@ main(int argc, char **argv) ...@@ -75,7 +80,7 @@ main(int argc, char **argv)
if (!argc) if (!argc)
usage(); usage();
if (query && reboot) if (query && (reboot || restart))
usage(); usage();
if (debug) if (debug)
...@@ -131,8 +136,9 @@ main(int argc, char **argv) ...@@ -131,8 +136,9 @@ main(int argc, char **argv)
bzero(&boot_info, sizeof(boot_info)); bzero(&boot_info, sizeof(boot_info));
boot_info.version = BIVERSION_CURRENT; boot_info.version = BIVERSION_CURRENT;
if (reboot) { if (reboot || restart) {
boot_whatp->type = BIBOOTWHAT_TYPE_REBOOT; boot_whatp->type = reboot ?
BIBOOTWHAT_TYPE_REBOOT : BIBOOTWHAT_TYPE_RESTART;
#ifdef EVENTSYS #ifdef EVENTSYS
bievent_send(target.sin_addr, (void *) NULL, bievent_send(target.sin_addr, (void *) NULL,
TBDB_NODESTATE_SHUTDOWN); TBDB_NODESTATE_SHUTDOWN);
......
/* /*
* EMULAB-COPYRIGHT * EMULAB-COPYRIGHT
* Copyright (c) 2000-2008 University of Utah and the Flux Group. * Copyright (c) 2000-2010 University of Utah and the Flux Group.
* All rights reserved. * All rights reserved.
* *
* Derived from boot/bootwhat.h in the OSKit. * Derived from boot/bootwhat.h in the OSKit.
...@@ -101,6 +101,7 @@ typedef struct boot_what { ...@@ -101,6 +101,7 @@ typedef struct boot_what {
#define BIBOOTWHAT_TYPE_REBOOT 5 /* Reboot */ #define BIBOOTWHAT_TYPE_REBOOT 5 /* Reboot */
#define BIBOOTWHAT_TYPE_AUTO 6 /* Do a bootinfo query */ #define BIBOOTWHAT_TYPE_AUTO 6 /* Do a bootinfo query */
#define BIBOOTWHAT_TYPE_MFS 7 /* Boot an MFS from server:/path */ #define BIBOOTWHAT_TYPE_MFS 7 /* Boot an MFS from server:/path */
#define BIBOOTWHAT_TYPE_RESTART 8 /* Restart ourselves without reset */
/* Flags */ /* Flags */
#define BIBOOTWHAT_FLAGS_CMDLINE 0x01 /* Kernel to boot */ #define BIBOOTWHAT_FLAGS_CMDLINE 0x01 /* Kernel to boot */
......
...@@ -425,6 +425,7 @@ REPLACE INTO state_transitions VALUES ('NORMALv1','ISUP','PXEBOOTING','KernelCha ...@@ -425,6 +425,7 @@ REPLACE INTO state_transitions VALUES ('NORMALv1','ISUP','PXEBOOTING','KernelCha
REPLACE INTO state_transitions VALUES ('NODEALLOC','FREE_CLEAN','RES_INIT_CLEAN','Reserve'); REPLACE INTO state_transitions VALUES ('NODEALLOC','FREE_CLEAN','RES_INIT_CLEAN','Reserve');
REPLACE INTO state_transitions VALUES ('PXEKERNEL','PXEWAIT','PXEBOOTING','Retry'); REPLACE INTO state_transitions VALUES ('PXEKERNEL','PXEWAIT','PXEBOOTING','Retry');
REPLACE INTO state_transitions VALUES ('PXEKERNEL','PXEBOOTING','PXEWAIT','Free'); REPLACE INTO state_transitions VALUES ('PXEKERNEL','PXEBOOTING','PXEWAIT','Free');
REPLACE INTO state_transitions VALUES ('PXEKERNEL','PXELIMBO','PXEBOOTING','Bootinfo-Restart');
REPLACE INTO state_transitions VALUES ('BATCHSTATE','ACTIVATING','SWAPPED','NonBatch'); REPLACE INTO state_transitions VALUES ('BATCHSTATE','ACTIVATING','SWAPPED','NonBatch');
REPLACE INTO state_transitions VALUES ('NORMAL','ISUP','SHUTDOWN','Reboot'); REPLACE INTO state_transitions VALUES ('NORMAL','ISUP','SHUTDOWN','Reboot');
REPLACE INTO state_transitions VALUES ('NORMAL','REBOOTING','SHUTDOWN','Reboot'); REPLACE INTO state_transitions VALUES ('NORMAL','REBOOTING','SHUTDOWN','Reboot');
......
#
# Add a PXEKERNEL state to support fast transition of nodes from outer
# Emulab to inner Emulabs.
#
use strict;
use libdb;
sub DoUpdate($$$)
{
my ($dbhandle, $dbname, $version) = @_;
DBQueryFatal("REPLACE INTO state_transitions VALUES ".
" ('PXEKERNEL','PXELIMBO','PXEBOOTING','Bootinfo-Restart')");
return 0;
}
1;
...@@ -183,6 +183,7 @@ while (1) { ...@@ -183,6 +183,7 @@ while (1) {
"left join node_types as t on t.type=n.type ". "left join node_types as t on t.type=n.type ".
"where (n.eventstate!='". TBDB_NODESTATE_ISUP ."' and ". "where (n.eventstate!='". TBDB_NODESTATE_ISUP ."' and ".
" n.eventstate!='". TBDB_NODESTATE_PXEWAIT ."' and ". " n.eventstate!='". TBDB_NODESTATE_PXEWAIT ."' and ".
" n.eventstate!='". TBDB_NODESTATE_PXELIMBO ."' and ".
" n.eventstate!='". TBDB_NODESTATE_ALWAYSUP ."' and ". " n.eventstate!='". TBDB_NODESTATE_ALWAYSUP ."' and ".
" n.eventstate!='". TBDB_NODESTATE_POWEROFF ."') and ". " n.eventstate!='". TBDB_NODESTATE_POWEROFF ."') and ".
" r.pid is null and n.role='testnode' and ". " r.pid is null and n.role='testnode' and ".
......
...@@ -14,6 +14,7 @@ use Getopt::Std; ...@@ -14,6 +14,7 @@ use Getopt::Std;
use lib "@prefix@/lib"; use lib "@prefix@/lib";
use libdb; use libdb;
use libtestbed; use libtestbed;
use libtblog;
use Experiment; use Experiment;
use User; use User;
use Lan; use Lan;
...@@ -29,7 +30,7 @@ sub usage() ...@@ -29,7 +30,7 @@ sub usage()
exit(-1); exit(-1);
} }
my $optlist = "dgkfur"; my $optlist = "dgkfurP";
my $debug = 1; my $debug = 1;
my $verbose = 0; my $verbose = 0;
my $killmode = 0; my $killmode = 0;
...@@ -38,6 +39,10 @@ my $dbgooonly= 0; ...@@ -38,6 +39,10 @@ my $dbgooonly= 0;
my $update = 0; my $update = 0;
my $remove = 0; my $remove = 0;
# XXX experimental speed hacks
my $inparallel = 1;
my $restartnodes = 1;
sub DumpDBGoo(); sub DumpDBGoo();
# #
...@@ -51,6 +56,7 @@ my $TBOPSPID = TBOPSPID(); ...@@ -51,6 +56,7 @@ my $TBOPSPID = TBOPSPID();
my $SSH = "$TB/bin/sshtb"; my $SSH = "$TB/bin/sshtb";
my $SCP = "/usr/bin/scp"; my $SCP = "/usr/bin/scp";
my $nodereboot = "$TB/bin/node_reboot"; my $nodereboot = "$TB/bin/node_reboot";
my $noderestart = "$TB/sbin/bootinfosend -R";
my $makeconf = "$TB/sbin/dhcpd_makeconf"; my $makeconf = "$TB/sbin/dhcpd_makeconf";
my $nodewait = "$TB/sbin/node_statewait"; my $nodewait = "$TB/sbin/node_statewait";
my $snmpit = "$TB/bin/snmpit"; my $snmpit = "$TB/bin/snmpit";
...@@ -117,6 +123,9 @@ if (defined($options{"u"})) { ...@@ -117,6 +123,9 @@ if (defined($options{"u"})) {
if (defined($options{"r"})) { if (defined($options{"r"})) {
$remove = 1; $remove = 1;
} }
if (defined($options{"P"})) {
$inparallel = 1;
}
if (! @ARGV) { if (! @ARGV) {
usage(); usage();
} }
...@@ -278,6 +287,7 @@ $UID = 0; ...@@ -278,6 +287,7 @@ $UID = 0;
# version from the web server. # version from the web server.
# #
# XXX ugh, copy over a newer mkextrafs.pl as well (one that supports -2). # XXX ugh, copy over a newer mkextrafs.pl as well (one that supports -2).
# XXX ughII, we only copy over a FreeBSD version, this will break a Linux boss.
# #
my $mkelab = "$TB/etc/rc.mkelab"; my $mkelab = "$TB/etc/rc.mkelab";
if (-e "$expdir/rc.mkelab") { if (-e "$expdir/rc.mkelab") {
...@@ -386,37 +396,94 @@ if ($?) { ...@@ -386,37 +396,94 @@ if ($?) {
" Failed to reconfig/restart DHCPD.\n"); " Failed to reconfig/restart DHCPD.\n");
} }
if (defined($fsnode)) { if ($inparallel) {
# Reboot fs and wait for it to come back. my $nodes = "$bossnode $opsnode";
print "Rebooting fsnode ($fsnode).\n"; $nodes .= " $fsnode"
TBDebugTimeStamp("Rebooting fsnode"); if (defined($fsnode));
system("$nodereboot -w $fsnode"); print "Rebooting servers ($nodes).\n";
TBDebugTimeStamp("Rebooting servers");
system("$nodereboot -w $nodes");
if ($?) { if ($?) {
die("*** $0:\n". die("*** $0:\n".
" Error rebooting the fsnode ($fsnode)!\n"); " Error rebooting the servers ($nodes)!\n");
}
} else {
if (defined($fsnode)) {
# Reboot fs and wait for it to come back.
print "Rebooting fsnode ($fsnode).\n";
TBDebugTimeStamp("Rebooting fsnode");
system("$nodereboot -w $fsnode");
if ($?) {
die("*** $0:\n".
" Error rebooting the fsnode ($fsnode)!\n");
}
}
# Reboot ops and wait for it to come back.
print "Rebooting opsnode ($opsnode).\n";
TBDebugTimeStamp("Rebooting opsnode");
system("$nodereboot -w $opsnode");
if ($?) {
die("*** $0:\n".
" Error rebooting the opsnode ($opsnode)!\n");
}
# Reboot boss and wait for it to come back.
print "Rebooting bossnode ($bossnode).\n";
TBDebugTimeStamp("Rebooting bossnode");
system("$nodereboot -w $bossnode");
if ($?) {
die("*** $0:\n".
" Error rebooting the bossnode ($bossnode)!\n");
} }
}
# Reboot ops and wait for it to come back.
print "Rebooting opsnode ($opsnode).\n";
TBDebugTimeStamp("Rebooting opsnode");
system("$nodereboot -w $opsnode");
if ($?) {
die("*** $0:\n".
" Error rebooting the opsnode ($opsnode)!\n");
}
# Reboot boss and wait for it to come back.
print "Rebooting bossnode ($bossnode).\n";
TBDebugTimeStamp("Rebooting bossnode");
system("$nodereboot -w $bossnode");
if ($?) {
die("*** $0:\n".
" Error rebooting the bossnode ($bossnode)!\n");
} }
$EUID = 0; $EUID = 0;
# Reboot the experimental nodes. They will come up inside the inner elab. # Reboot the experimental nodes. They will come up inside the inner elab.
# DO NOT WAIT! They are not going to report ISUP from this point on. # DO NOT WAIT! They are not going to report ISUP from this point on.
if (@expnodes) { if (@expnodes) {
#
# First we try the magic pxeboot restart.
# The nodes should still be in PXEWAIT, so we send them a restart
# to make them re-DHCP. This should get them quickly reparented to
# the inner boss.
#
# If this doesn't work, we fall back on rebooting the nodes.
#
if ($restartnodes) {
TBDebugTimeStamp("Redirecting experimental nodes to inner boss");
my $stat = 0;
# Run as real user again.
$EUID = $UID;
foreach my $node (@expnodes) {
$stat = system("$noderestart $node");
last if ($stat);
}
$EUID = 0;
if ($stat) {
tbwarn("Node restart failed ($stat), falling back to reboot.");
goto rebootnodes;
}
#
# Ssh into inner boss and use a utility script to determine
# when the nodes have reported in and are in PXEWAIT (part of the
# inner elab). Note the short timeout, since this operation should
# be virtually instantaneous.
#
print "Waiting for nodes to restart and join the inner emulab.\n";
TBDebugTimeStamp("Waiting for inner nodes to restart");
$UID = 0;
$stat = system("$SSH -host $bossnode ".
"/usr/testbed/sbin/node_statewait -t 15 -a");
$UID = $SAVEUID;
if ($stat) {
tbwarn("Error ($stat) waiting for nodes to restart, falling back to reboot.");
goto rebootnodes;
}
goto restartworked;
}
rebootnodes:
print "Rebooting inner experimental nodes.\n"; print "Rebooting inner experimental nodes.\n";
TBDebugTimeStamp("Rebooting experimental nodes"); TBDebugTimeStamp("Rebooting experimental nodes");
# Run as real user again. # Run as real user again.
...@@ -445,6 +512,7 @@ if (@expnodes) { ...@@ -445,6 +512,7 @@ if (@expnodes) {
} }
$UID = $SAVEUID; $UID = $SAVEUID;
restartworked:
# #
# To avoid confusion later (with swapmod, which wants them to be ISUP), # To avoid confusion later (with swapmod, which wants them to be ISUP),
# and so the web interface does not show the nodes as down, set the # and so the web interface does not show the nodes as down, set the
...@@ -652,6 +720,16 @@ sub DumpDBGoo() ...@@ -652,6 +720,16 @@ sub DumpDBGoo()
# Clear any node reservations on the inside # Clear any node reservations on the inside
DBQueryFatal("update temp_${table} set ". DBQueryFatal("update temp_${table} set ".
" reserved_pid=null where reserved_pid is not null"); " reserved_pid=null where reserved_pid is not null");
# Put the inner nodes into "limbo" so they DTRT when restarted
if ($restartnodes) {
DBQueryFatal("update temp_${table} set".
" op_mode='PXEKERNEL',next_op_mode='',".
" eventstate='". TBDB_NODESTATE_PXELIMBO . "',".
" temp_boot_osid=NULL,next_boot_osid=NULL,".
" osid=NULL".
" where role='testnode'");
}
} }
DBQueryWarn("select * from temp_$table ". DBQueryWarn("select * from temp_$table ".
......
#!/usr/bin/perl -wT #!/usr/bin/perl -wT
# #
# EMULAB-COPYRIGHT # EMULAB-COPYRIGHT
# Copyright (c) 2000-2009 University of Utah and the Flux Group. # Copyright (c) 2000-2010 University of Utah and the Flux Group.
# All rights reserved. # All rights reserved.
# #
# node reboot library. Basically the backend to the node_reboot script, but # node reboot library. Basically the backend to the node_reboot script, but
...@@ -538,6 +538,7 @@ sub RebootNode { ...@@ -538,6 +538,7 @@ sub RebootNode {
return -2; return -2;
} }
if ($nodestate eq TBDB_NODESTATE_PXEWAIT() || if ($nodestate eq TBDB_NODESTATE_PXEWAIT() ||
$nodestate eq TBDB_NODESTATE_PXELIMBO() ||
$nodestate eq TBDB_NODESTATE_PXEWAKEUP()) { $nodestate eq TBDB_NODESTATE_PXEWAKEUP()) {
# #
# In killmode, we do not want to bother with sending a wakeup event. # In killmode, we do not want to bother with sending a wakeup event.
......
...@@ -82,6 +82,7 @@ my %vnode2pnode = (); ...@@ -82,6 +82,7 @@ my %vnode2pnode = ();
my %pnodevcount = (); my %pnodevcount = ();
my %plabvnodes = (); my %plabvnodes = ();
my %geninodes = (); my %geninodes = ();
my %einenodes = ();
my %osids = (); my %osids = ();
my %osmap = (); my %osmap = ();
my %canfail = (); my %canfail = ();
...@@ -225,7 +226,8 @@ if (TBExptPlabInElabPLC($pid, $eid, \$plcnode)) { ...@@ -225,7 +226,8 @@ if (TBExptPlabInElabPLC($pid, $eid, \$plcnode)) {
# Get the set of nodes, as well as the nodes table information for them. # Get the set of nodes, as well as the nodes table information for them.
# #
my $db_result = my $db_result =
DBQueryFatal("select n.*,l.pid,r.vname,r.sharing_mode from reserved as r ". DBQueryFatal("select n.*,l.pid,r.vname,r.sharing_mode,r.inner_elab_role ".
"from reserved as r ".
"left join nodes as n on n.node_id=r.node_id ". "left join nodes as n on n.node_id=r.node_id ".
"left join last_reservation as l on n.node_id=l.node_id ". "left join last_reservation as l on n.node_id=l.node_id ".
"where r.pid='$pid' and r.eid='$eid'"); "where r.pid='$pid' and r.eid='$eid'");
...@@ -248,6 +250,8 @@ while (my %row = $db_result->fetchhash()) { ...@@ -248,6 +250,8 @@ while (my %row = $db_result->fetchhash()) {
my $virtnode = $typeinfo->isvirtnode(); my $virtnode = $typeinfo->isvirtnode();
my $sharednode = defined($row{'sharing_mode'}) my $sharednode = defined($row{'sharing_mode'})
&& $row{'sharing_mode'} eq 'using_shared_local'; && $row{'sharing_mode'} eq 'using_shared_local';
my $iseinenode= $elabinelab && defined($row{'inner_elab_role'})
&& $row{'inner_elab_role'} eq 'node';
my $isremote = $typeinfo->isremotenode(); my $isremote = $typeinfo->isremotenode();
my $isgeninode= $typeinfo->isfednode(); my $isgeninode= $typeinfo->isfednode();
my $imageable = $typeinfo->imageable(); my $imageable = $typeinfo->imageable();
...@@ -282,6 +286,11 @@ while (my %row = $db_result->fetchhash()) { ...@@ -282,6 +286,11 @@ while (my %row = $db_result->fetchhash()) {
next; next;
} }
} }
elsif ($iseinenode) {
print "Will skip reload/reboot of inner elab node $node.\n";
$einenodes{$node} = 1;
next;
}
elsif ($subnode && !$imageable) { elsif ($subnode && !$imageable) {
print "Will skip subnode $node ISUP wait.\n"; print "Will skip subnode $node ISUP wait.\n";
} }
...@@ -530,6 +539,19 @@ while (my %row = $db_result->fetchhash()) { ...@@ -530,6 +539,19 @@ while (my %row = $db_result->fetchhash()) {
if $dbg; if $dbg;
} }
#
# XXX Inner elab nodes should never report in to us.
# If they do, make sure they wind up in PXEWAIT.
#
if (keys(%einenodes)) {
DBQueryFatal("update nodes set ".
" def_boot_osid=NULL,".
" next_boot_osid=NULL,".
" temp_boot_osid=NULL ".
"where node_id in (".
join(",", map("'$_'", keys %einenodes)). ")");
}
@all_nodes = (keys %nodes, keys %vnodes); @all_nodes = (keys %nodes, keys %vnodes);
# #
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# #
# EMULAB-COPYRIGHT # EMULAB-COPYRIGHT
# Copyright (c) 2000-2009 University of Utah and the Flux Group. # Copyright (c) 2000-2010 University of Utah and the Flux Group.
# All rights reserved. # All rights reserved.
# #
...@@ -245,7 +245,7 @@ if ($showmap && $state eq EXPTSTATE_ACTIVE) { ...@@ -245,7 +245,7 @@ if ($showmap && $state eq EXPTSTATE_ACTIVE) {
foreach $v (sort keys(%v2pmap)) { foreach $v (sort keys(%v2pmap)) {
my $p = $v2pmap{$v}->{PNODE}; my $p = $v2pmap{$v}->{PNODE};
my $t = $v2pmap{$v}->{TYPE}; my $t = $v2pmap{$v}->{TYPE};
my $o = $v2pmap{$v}->{OSID}; my $o = $v2pmap{$v}->{OSID} ? $v2pmap{$v}->{OSID} : "<NONE>";
my $m = $v2pmap{$v}->{VVP}; my $m = $v2pmap{$v}->{VVP};
printf "%-15s %-12s %-15s %s", $v, $t, $o, $p; printf "%-15s %-12s %-15s %s", $v, $t, $o, $p;
if ($m ne $p) { if ($m ne $p) {
......
...@@ -1422,12 +1422,17 @@ sub doSwapin($) { ...@@ -1422,12 +1422,17 @@ sub doSwapin($) {
# #
# Do linktest if user requested it at swapin. # Do linktest if user requested it at swapin.
# #
# We don't run linktest on elabinelabs since there may be no network
# (i.e., in the "single control network" case) and inner nodes are
# going to be sitting in PXEWAIT.
#
my $query_result = my $query_result =
DBQueryFatal("select linktest_level,linktest_pid from experiments ". DBQueryFatal("select linktest_level,linktest_pid from experiments ".
"where pid='$pid' and eid='$eid'"); "where pid='$pid' and eid='$eid'");
my ($linktest_level,$linktest_pid) = $query_result->fetchrow_array(); my ($linktest_level,$linktest_pid) = $query_result->fetchrow_array();
if ($linktest_level && ($type == REAL || $type == MODIFY)) { if (!$elabinelab && $linktest_level &&
($type == REAL || $type == MODIFY)) {
if ($linktest_pid) { if ($linktest_pid) {
tbwarn "Linktest is already running! $linktest_pid"; tbwarn "Linktest is already running! $linktest_pid";
}