Commit f36bcfab authored by Mike Hibler's avatar Mike Hibler

Support dynamically created NFS-root filesystems for admin MFS.

Significant hackary involved. Similar to exports_setup, there is a boss-side
script and an ops-side script to handle creation and destruction of the ZFS
clones that are used for the NFS filesystem. The rest was all about when to
invoke said scripts.

Creation is easy, we just do a clone whenever the TBAdminMfsSelect is called
to "turn on" node admin mode. Destruction is not so simple. If we destroyed
the clone on the corresponding TBAdminMfsSelect "off" call, then we could
yank the filesystem out from under the node if it was still running in the
MFS (e.g., "node_admin -n off node"). While that would probably be okay in
most uses, where at worst we would have to apod or power cycle the node, we
try to do better. TBAdminMfsSelect "off" instead just renames the clone
(to "<nodeid>-DEAD") so that it stays available if the node is running on
it at the time, but ensures that it will not get accidentally used by any
future boot. We check for, and destroy, any previous versions for a node
every time we invoke the nfsmfs_setup code for that node. We also destroy
live or dead clones whenever we call nfree. This ensures that all MFSes
get cleaned up at experiment swapout time.
parent 1f0cdf67
#!/usr/bin/perl -w
#
# Copyright (c) 2000-2013 University of Utah and the Flux Group.
# Copyright (c) 2000-2014 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -171,7 +171,7 @@ use vars qw(@ISA @EXPORT);
TBDB_RSRVROLE_SIMHOST TBDB_RSRVROLE_STORAGEHOST
TBDB_EXPT_WORKDIR
TB_OSID_MBKERNEL
TB_OSID_MBKERNEL TB_OSID_PATH_NFS
TB_OSID_FREEBSD_MFS TB_OSID_FRISBEE_MFS
TBDB_TBCONTROL_PXERESET TBDB_TBCONTROL_RESET
TBDB_TBCONTROL_RELOADDONE TBDB_TBCONTROL_RELOADDONE_V2
......@@ -377,6 +377,9 @@ sub TB_OSID_VERSLEN() { 12; }
# Magic OSID constants
sub TB_OSID_MBKERNEL() { "_KERNEL_"; } # multiboot kernel OSID
# Magic OSID path
sub TB_OSID_PATH_NFS() { "fs:/nfsroot" };
# Magic MFS constants
sub TB_OSID_FREEBSD_MFS() { "FREEBSD-MFS" };
sub TB_OSID_FRISBEE_MFS() { "FRISBEE-MFS" };
......
......@@ -1539,6 +1539,36 @@ sub default_osid($;$) {
return NodeTypeInfo($self)->default_osid($stuff);
}
sub adminmfs_osid($;$) {
my ($self,$stuff) = @_;
my $val = undef;
if (NodeAttribute($self, "adminmfs_osid", \$val) == 0 &&
defined($val)) {
return $val;
}
if (NodeTypeAttribute($self, "adminmfs_osid", \$val) == 0 &&
defined($val)) {
return $val;
}
return OSinfo->Lookup(TBOPSPID(), TB_OSID_FREEBSD_MFS())->osid();
}
sub diskloadmfs_osid($;$) {
my ($self,$stuff) = @_;
my $val = undef;
if (NodeAttribute($self, "diskloadmfs_osid", \$val) == 0 &&
defined($val)) {
return $val;
}
if (NodeTypeAttribute($self, "diskloadmfs_osid", \$val) == 0 &&
defined($val)) {
return $val;
}
return OSinfo->Lookup(TBOPSPID(), TB_OSID_FRISBEE_MFS())->osid();
}
sub default_imageid($;$) {
my ($self,$stuff) = @_;
my $val = undef;
......
......@@ -566,6 +566,15 @@ sub IsGeneric($)
return ((defined($version) && $version ne "") ? 0 : 1);
}
sub IsNfsMfs($)
{
my ($self) = @_;
return 0
if (!ref($self));
return ($self->mfs() && $self->path() eq TB_OSID_PATH_NFS());
}
#
# Boot command like. The caller supplies the default in $pref.
#
......
......@@ -1523,21 +1523,10 @@ sub TBNodeAdminOSID($)
my ($nodeid) = @_;
my $node = LocalNodeLookup($nodeid);
return 0
if (!defined($node));
my $mfs;
if ($node->NodeAttribute("adminmfs_osid", \$mfs) || !defined($mfs)) {
return 0
if ($node->NodeTypeAttribute("adminmfs_osid", \$mfs) != 0);
if ($node) {
return $node->adminmfs_osid();
}
$mfs = TB_OSID_FREEBSD_MFS()
if (!defined($mfs));
return $mfs;
return 0;
}
sub TBNodeDiskloadOSID($)
......@@ -1545,21 +1534,10 @@ sub TBNodeDiskloadOSID($)
my ($nodeid) = @_;
my $node = LocalNodeLookup($nodeid);
return 0
if (!defined($node));
my $mfs;
if ($node->NodeAttribute("diskloadmfs_osid", \$mfs) || !defined($mfs)) {
return 0
if ($node->NodeTypeAttribute("diskloadmfs_osid", \$mfs) != 0);
if ($node) {
return $node->diskloadmfs_osid();
}
$mfs = TB_OSID_FREEBSD_MFS()
if (!defined($mfs));
return $mfs;
return 0;
}
#
......
......@@ -75,6 +75,7 @@ my $osselect = "$TB/bin/os_select";
my $nodereboot = "$TB/bin/node_reboot";
my $makeconf = "$TB/sbin/dhcpd_makeconf";
my $snmpit = "$TB/bin/snmpit";
my $dynmfssetup = "$TB/sbin/nfsmfs_setup";
my $reloadpid = "emulab-ops";
my $pendingeid = "reloadpending";
my $rppendingeid = "repositionpending";
......@@ -85,6 +86,7 @@ my $lockedeid = NFREELOCKED_EID();
my @nodes;
my @freed_nodes = ();
my @dynanodes = ();
my @dynmfsnodes = ();
my $error = 0;
my %mustzero = ();
my $mustmakeconf = 0;
......@@ -462,6 +464,13 @@ foreach my $node (@freed_nodes) {
$mustclean = $clean;
}
#
# Remember if node has a dynamic (aka, NFS) MFS
#
if (OSinfo->Lookup($node->adminmfs_osid())->IsNfsMfs()) {
push(@dynmfsnodes, $node_id);
}
#
# If the node is virtual, release the shared resources it had
# reserved on the physical node.
......@@ -689,6 +698,15 @@ if (@dynanodes) {
Node::DeleteVnodes(@nodeids);
}
#
# XXX make sure that the nodes have destroyed their dynamic
# (aka NFS-based) MFSes.
#
if (@dynmfsnodes) {
system("$dynmfssetup -Df @dynmfsnodes") == 0 ||
print STDERR "*** WARNING: could not remove MFSes for @dynmfsnodes!\n";
}
######################################################################
# Step 3 - Set up console for freed nodes.
#
......
......@@ -64,7 +64,8 @@ SBIN_STUFF = resetvlans console_setup.proxy sched_reload named_setup \
nfstrace plabinelab smbpasswd_setup smbpasswd_setup.proxy \
rmproj snmpit.proxynew snmpit.proxyv2 pool_daemon \
checknodes_daemon snmpit.proxyv3 image_setup tcpp \
arplockdown bscontrol reportboot
arplockdown bscontrol reportboot \
nfsmfs_setup nfsmfs_setup.proxy
ifeq ($(ISMAINSITE),1)
SBIN_STUFF += repos_daemon
......@@ -78,7 +79,7 @@ CTRLSBIN_STUFF = console_setup.proxy sfskey_update.proxy \
CTRLBIN_STUFF = fetchtar.proxy
FSBIN_STUFF = exports_setup.proxy smbpasswd_setup.proxy
FSBIN_STUFF = exports_setup.proxy smbpasswd_setup.proxy nfsmfs_setup.proxy
WEB_SBIN_SCRIPTS= webrmproj webpanic webrmgroup \
webmkgroup websetgroups webmkproj \
......@@ -122,7 +123,7 @@ SETUID_SBIN_SCRIPTS = mkproj rmgroup mkgroup frisbeehelper \
rmuser idleswap named_setup exports_setup \
sfskey_update setgroups newnode_reboot vnode_setup \
elabinelab nfstrace rmproj arplockdown \
bscontrol
bscontrol nfsmfs_setup
SETUID_LIBX_SCRIPTS = console_setup spewrpmtar_verify
SETUID_SUEXEC_SCRIPTS= spewlogfile
......
#!/usr/bin/perl -wT
#
# Copyright (c) 2005-2010 University of Utah and the Flux Group.
# Copyright (c) 2005-2014 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -38,7 +38,8 @@ use Exporter;
use vars qw(@ISA @EXPORT);
@ISA = "Exporter";
@EXPORT = qw ( TBAdminMfsBoot TBAdminMfsSelect TBAdminMfsRunCmd );
@EXPORT = qw ( TBAdminMfsBoot TBAdminMfsSelect TBAdminMfsRunCmd
TBAdminMfsCreate TBAdminMfsDestroy);
# Must come after package declaration!
use libdb;
......@@ -51,6 +52,8 @@ use StateWait;
sub TBAdminMfsBoot($$@);
sub TBAdminMfsSelect($$@);
sub TBAdminMfsRunCmd($$@);
sub TBAdminMfsCreate($$@);
sub TBAdminMfsDestroy($$@);
# Configure variables
my $TB = "@prefix@";
......@@ -61,6 +64,7 @@ my $ELABINELAB = @ELABINELAB@;
my $nodereboot = "$TB/bin/node_reboot";
my $power = "$TB/bin/power";
my $osselect = "$TB/bin/os_select";
my $nfsmfssetup = "$TB/sbin/nfsmfs_setup";
#
# The number of nodes we will power on at a time and the time to wait
......@@ -339,6 +343,15 @@ sub TBAdminMfsSelect($$@)
}
}
my %cargs;
$cargs{'name'} = $me;
my @cfailed = ();
if (TBAdminMfsCreate(\%cargs, \@cfailed, @good)) {
print STDERR "*** $me:\n".
" Failed to create NFS FS for @cfailed; ".
"continuing, but those nodes won't boot!\n";
}
# clear partition boots for successful nodes
if ($only && system("$osselect -c @good")) {
print STDERR "*** $me:\n".
......@@ -356,6 +369,20 @@ sub TBAdminMfsSelect($$@)
return 1;
}
@good = @nodes;
#
# XXX note that this is a "soft" destroy.
# We will only rename the existing MFS so that if the node
# is currently running on it, it will continue to function.
#
my %cargs;
$cargs{'name'} = $me;
my @cfailed = ();
if (TBAdminMfsDestroy(\%cargs, \@cfailed, @good)) {
print STDERR "*** $me:\n".
" Failed to destroy NFS FS for @cfailed; ".
"continuing, but these FSes will need to be cleaned up!\n";
}
}
#
......@@ -571,3 +598,119 @@ sub TBAdminMfsRunCmd($$@)
print STDOUT "All nodes have completed their command.\n";
return 0;
}
#
# TBAdminMfsCreate(\%args, \@failed, @nodes)
#
# For the given list of nodes, find those that need their MFS created
# dynamically and do so.
#
# Arguments passed via the $args hashref:
#
# 'name' string identifying the caller for error messages
# 'force' 1 to force re-creation of the MFS if it already exists,
# 0 will just reuse any existing MFS.
#
# Returns zero if we successfully created all MFSes and non-zero otherwise.
# If the $failed ref is defined, it is an arrayref in which we return the
# list of nodes that failed.
#
# Note: NO PERMISSION CHECKING IS DONE. It is assumed that the caller
# has performed all the necessary checks.
#
sub TBAdminMfsCreate($$@)
{
my ($args, $failedref, @nodes) = @_;
return 0
if (@nodes == 0);
my $me = $args->{'name'};
my $force = exists($args->{'force'}) ? $args->{'force'} : 0;
#
# Find all the nodes that require a dynamic MFS
#
my @nfsnodes = ();
for my $node (@nodes) {
require OSinfo;
my $osobj = OSinfo->Lookup(TBNodeAdminOSID($node));
if ($osobj && $osobj->IsNfsMfs()) {
push @nfsnodes, $node;
}
}
#
# If any nodes are using an NFS-based MFS, set them up.
#
if (@nfsnodes > 0) {
my $args = $force ? "-f" : "";
if (system("$nfsmfssetup $args @nfsnodes")) {
@$failedref = @nfsnodes
if (defined($failedref));
return 1;
}
}
return 0;
}
#
# TBAdminMfsDestroy(\%args, \@failed, @nodes)
#
# For the given list of nodes, find those that use a dynamic MFS
# and destroy it.
#
# Arguments passed via the $args hashref:
#
# 'name' string identifying the caller for error messages
# 'force' 1 to force actual destruction of the MFS, screwing
# the node if it is currently running on it.
# 0 will just rename the existing MFS instead,
# with the old copy destroyed on next creation.
#
# Returns zero if we successfully destroyed all MFSes and non-zero otherwise.
# If the $failed ref is defined, it is an arrayref in which we return the
# list of nodes that failed.
#
# Note: NO PERMISSION CHECKING IS DONE. It is assumed that the caller
# has performed all the necessary checks.
#
sub TBAdminMfsDestroy($$@)
{
my ($args, $failedref, @nodes) = @_;
return 0
if (@nodes == 0);
my $me = $args->{'name'};
my $force = exists($args->{'force'}) ? $args->{'force'} : 0;
#
# Find all the nodes that require a dynamic MFS
#
my @nfsnodes = ();
for my $node (@nodes) {
require OSinfo;
my $osobj = OSinfo->Lookup(TBNodeAdminOSID($node));
if ($osobj && $osobj->IsNfsMfs()) {
push @nfsnodes, $node;
}
}
#
# If any nodes are using an NFS-based MFS, set them up.
#
if (@nfsnodes > 0) {
my $args = $force ? "-f" : "";
if (system("$nfsmfssetup -D $args @nfsnodes")) {
@$failedref = @nfsnodes
if (defined($failedref));
return 1;
}
}
return 0;
}
#!/usr/bin/perl -wT
#
# Copyright (c) 2000-2014 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
# This file is part of the Emulab network testbed software.
#
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this file. If not, see <http://www.gnu.org/licenses/>.
#
# }}}
#
use English;
use Getopt::Std;
#
# Create an NFS-based root filesystem for a node to act as the admin MFS.
# We only do this for nodes with the magic admin MFS OSID set unless force
# is specified.
#
sub usage()
{
print(STDERR
"Usage: nfsmfs_setup [-D] [-Ndfnq] node ...\n".
"switches and arguments:\n".
"-D - destroy rather than setup the MFS\n".
"-N - don't check DB to see if node uses an NFS MFS, just do it\n".
"-d - debug; enabled additional debugging\n".
"-f - force (re)creation of MFS even if it exists\n".
"-q - be quiet\n".
"-n - don't do anything, just say what would happen\n".
"node ... - list of nodes for which to create/destroy MFSes\n");
exit(-1);
}
my $optlist = "Ddfnq";
my $impotent = 0;
my $debug = 0;
my $destroy = 0;
my $nocheck = 0;
my $force = 0;
my $quiet = 0;
#
# Configure variables
#
my $TB = "@prefix@";
my $TBOPS = "@TBOPSEMAIL@";
my $TESTMODE = @TESTMODE@;
my $FSNODE = "@FSNODE@";
my $WITHZFS = @WITHZFS@;
my $SSH = "$TB/bin/sshtb -n -l root -host $FSNODE";
my $PROG = "$TB/sbin/nfsmfs_setup.proxy";
#
# We don't want to run this script unless its the real version.
#
if ($EUID != 0) {
die("*** $0:\n".
" Must be root! Maybe its a development version?\n");
}
#
# Only implemented with ZFS cloning.
#
if ($WITHZFS == 0) {
die("*** $0:\n".
" Only implemented with ZFS FS node right now.\n");
}
# un-taint path
$ENV{'PATH'} = '/bin:/usr/bin:/usr/sbin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
#
# Turn off line buffering on output
#
$| = 1;
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use Data::Dumper;
use Node;
use OSinfo;
#
# Check args.
#
my %options = ();
if (! getopts($optlist, \%options)) {
usage();
}
if (defined($options{"n"})) {
$impotent = 1;
}
if (defined($options{"d"})) {
$debug = 1;
}
if (defined($options{"f"})) {
$force = 1;
}
if (defined($options{"N"})) {
$nocheck = 1;
}
if (defined($options{"q"})) {
$quiet = 1;
}
if (defined($options{"D"})) {
$destroy = 1;
}
usage()
if (@ARGV == 0);
#
# We need to serialize this script.
#
if (!$TESTMODE) {
if ((my $locked = TBScriptLock("nfsmfs", 1)) != TBSCRIPTLOCK_OKAY()) {
exit(0)
if ($locked == TBSCRIPTLOCK_IGNORE);
fatal("Could not get the lock after a long time!\n");
}
}
my @nodes;
foreach my $nodeid (@ARGV) {
if ($nodeid =~ /^([-\w]+)$/) {
$nodeid = $1;
} else {
print STDERR "Skipping bogus node id '$nodeid'\n"
if (!$quiet);
next;
}
my $nodeobj = Node->Lookup($nodeid);
if (!$nodeobj) {
print STDERR "Skipping bogus node '$nodeid'\n"
if (!$quiet);
next;
}
if (!$nocheck) {
if (OSinfo->Lookup($nodeobj->adminmfs_osid())->IsNfsMfs()) {
push @nodes, $nodeid;
}
} else {
push @nodes, $nodeid;
}
}
if (@nodes == 0) {
print STDERR "Found no nodes that require NFS MFS.\n"
if (!$quiet);
exit(0);
}
#
# Let's do it!
#
if (!$TESTMODE) {
$UID = 0;
my $opts = $destroy ? "-D" : "";
$opts .= " -d" if ($debug);
$opts .= " -f" if ($force);
$opts .= " -n" if ($impotent);
if ($impotent) {
print STDERR "Would do: $SSH $PROG $opts @nodes\n";
}
system("$SSH $PROG $opts @nodes") == 0 or
fatal("Failed: $SSH $PROG $opts @nodes: $?");
#
# Release the lock!
#
TBScriptUnlock();
}
exit(0);
sub fatal {
local($msg) = $_[0];
TBScriptUnlock()
if (!$TESTMODE);
SENDMAIL($TBOPS, "NFS MFS setup: ", $msg)
if (!$debug);
die($msg);
}
#!/usr/bin/perl -wT
#
# Copyright (c) 2000-2014 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
# This file is part of the Emulab network testbed software.
#
# This file is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This file is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
# License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this file. If not, see <http://www.gnu.org/licenses/>.
#
# }}}
#
use English;
use Errno;
use Fcntl ':flock';
use Getopt::Std;
#
# Create a per-node NFS filesystem to act as the admin MFS.
# Currently we can only do this on ZFS by cloning a filesystem snapshot.
#
# usage: nfsmfs_setup [-Ddn] node_id ...
#
my %opts = ();
getopts('Ddfn', \%opts);
my $destroy = 0;
my $debug = 0;
my $doit = 1;
my $force = 0;
#
# Configure variables
#
my $TBOPS = "@TBOPSEMAIL@";
my $ZFSROOT = "@ZFS_ROOT@";
my $MOUNTPOINT = "/nfsroot";
my $ZFS = "/sbin/zfs";
# XXX this should be constructed per node type based on info passed in
my $GOLDEN = "$ZFSROOT$MOUNTPOINT/m400\@current";
my $etcdir;
my $exports;
my $exportsnew;
my $exportsback;
my $exportshead;
my $exportstail;
my $pidfile;
my $daemon;
# un-taint path
$ENV{'PATH'} = '/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
# Turn off line buffering on output
$| = 1;
#
# Testbed Support libraries
#
use lib "@prefix@/lib";
use libtestbed;
#
# Can only be done by root. We don't want users seting up or destroying
# these filesystems.
#
if ($UID != 0) {
die("Must be root!");
}
if (! -x "$ZFS") {
fatal("Can only be used with ZFS right now!");
}
if (defined($opts{'D'})) {
$destroy = 1;
}
if (defined($opts{'d'})) {
$debug = 1;
}
if (defined($opts{'f'})) {
$force = 1;
}
if (defined($opts{'n'})) {
$doit = 0;
}
if (@ARGV < 1) {
print STDERR "usage: nfsmfs_setup [-D] node_id ...\n";
exit(1);
}
#
# Make sure the golden filesystem exists
#
if (!$destroy && system("$ZFS list -o name -t snapshot | grep -q $GOLDEN")) {
fatal("ZFS snapshot '$GOLDEN' does not exist");
}
#
# Get a list of existing ZFS-based mount points.
#
my %mfs = ();
my @mounts = `zfs list -o mountpoint,name 2>/dev/null | grep $MOUNTPOINT`;
if ($?) {
fatal("zfs list failed!?");
}
foreach my $line (@mounts) {
if ($line =~ /^$MOUNTPOINT\/(\S+)\s+(\S+)/) {
$mfs{$1} = $2;
}
}
#
# For each node, see if we need to do something and do it!
#
my @failed = ();
foreach my $nodeid (@ARGV) {
my $cmd;
#
# Untaint.
#
if ($nodeid =~ /^([-\w]+)$/) {
$nodeid = $1;
} else {
next;
}
#
# First, do cleanup.
# Get rid of any old version of the MFS for this node.
#
my $onodeid = "$nodeid-DEAD";
if (exists($mfs{$onodeid})) {
$cmd = "$ZFS destroy $ZFSROOT$MOUNTPOINT/$onodeid";