Commit 0b90ef4f authored by Mike Hibler's avatar Mike Hibler
Browse files

Make "reset" operation work for blockstores.

Reset on local/remote blockstores ensures that there is no blockstore related
state left in the root filesystem (e.g., mounts in /etc/fstab, iSCSI config,
LVM/ZFS state). It does this in such a way that upon reboot, all the necessary
state is recreated.

What this means is that you should now be able to take an image of a node
that uses blockstores and have that image actually work on another node!
Previously, there could/would be leftover blockstore turds that would make
the new image fail to boot.

Of course, this won't work until the standard images are remade and will
then only work for those images or images derived from them.
parent 391c5224
#!/usr/bin/perl -w
#
# Copyright (c) 2004-2015 University of Utah and the Flux Group.
# Copyright (c) 2004-2016 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -118,27 +118,7 @@ SWITCH: for ($action) {
last SWITCH;
};
/^reset$/i && do {
#
# XXX this should be docleanup(3), but we have not implemented
# the nasty temporary removal of LVM/ZFS/etc state from the
# imageable filesystems for local blockstores.
#
# So for now we just do a shutdown. This will do the right thing
# for SAN-based blockstores which only use traditional filesystem
# mounts (i.e., no volume manager) and thus have no local disk
# state (as SAN mounts do not appear in /etc/fstab).
#
# It is also the right thing for the local SYSVOL case in which
# you want the blockstore to be included in the image.
#
# It is not correct for other blockstores, where you will get
# partial state in the image (e.g., in /etc/lvm or /etc/zfs).
#
# The moral is: if you are going to do a custom image, you should
# only use persistent SAN-based blockstores or create a SYSVOL local
# blockstore and take a whole-disk image!
#
docleanup(0);
docleanup(3);
last SWITCH;
};
# XXX non-standard, for debugging
......@@ -524,7 +504,21 @@ sub docleanup($)
fatal("Could not retrieve stashed storage config!");
}
if ($doteardown) {
#
# XXX the $dolocal is a quick hack to ensure that when we are
# doing a reconfig/reset ala:
# rc.storageremote reset
# rc.storagelocal reset
# that we don't remove the OLDCONFIG until the end. Otherwise
# we would not reconfig/reset the local blockstores because there
# would be no config when we reached rc.storagelocal! This relies
# heavily on the ordering of the two scripts in rc.config and the
# fact that we reverse that list of scripts when we shutdown.
# We use this hack below as well where we remove STORAGEMAP and
# DISKINFO. Note that the storage state will be inconsistent between
# removing the remote and local storage, but we can live with that.
#
if ($doteardown && $dolocal) {
unlink($OLDCONFIG);
}
}
......@@ -540,8 +534,16 @@ sub docleanup($)
}
}
if ($doteardown == 2) {
print "Forcing teardown of storage, ignore errors...\n";
my $msg = "";
if ($dolocal && !$doremote) {
$msg = "local ";
} elsif (!$dolocal && $doremote) {
$msg = "remote ";
}
if ($doteardown == 1 || $doteardown == 2) {
print "Forcing teardown of ${msg}storage, volatile blockstores WILL BE DESTROYED...\n";
} elsif ($doteardown == 3) {
print "Clearing ${msg}storage config from root filesystem, volatile blockstores will be preserved...\n";
}
my @cmds = ();
......@@ -578,7 +580,10 @@ sub docleanup($)
system("/bin/kill `cat $bsapidfile`");
}
unlink($STORAGEMAP, $DISKINFO);
# XXX see XXX comment above.
if ($dolocal) {
unlink($STORAGEMAP, $DISKINFO);
}
}
#
......@@ -755,7 +760,9 @@ sub process($$$$)
print "unmounted " . $href->{'MOUNTPOINT'} .
($doteardown ? " and " : " ");
}
if ($doteardown) {
if ($doteardown == 3) {
print "deactivated $dev";
} elsif ($doteardown) {
print "destroyed $dev";
}
print "\n";
......
#!/usr/bin/perl -wT
#
# Copyright (c) 2013-2015 University of Utah and the Flux Group.
# Copyright (c) 2013-2016 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -453,6 +453,27 @@ sub get_zpool_datasets($)
return @dsets;
}
#
# Return a list of names of active (mounted) datasets in the given pool.
# Returns an empty list if there is not such pool or no datasets.
#
sub get_zpool_active_datasets($)
{
my ($pool) = @_;
my @dsets = ();
if (open(FD, "$ZFS get -Hr -o name,value mounted $pool 2>/dev/null|")) {
while (<FD>) {
if (/^($pool\/\S+)\s+yes/) {
push(@dsets, $1);
}
}
close(FD);
}
return @dsets;
}
#
# Returns 1 if the volume manager has been initialized.
# For ZFS this means that the "emulab" zpool exists.
......@@ -466,6 +487,15 @@ sub is_lvm_initialized($)
if (mysystem("$ZPOOL list emulab >/dev/null 2>&1") == 0) {
return 1;
}
#
# zpool may not exist if we are rebooting after a "reset";
# in that case the pool will have been exported.
# Try an import to bring it back.
#
if (mysystem("$ZPOOL import emulab >/dev/null 2>&1") == 0) {
return 1;
}
} else {
if (get_vinum_drives() > 0) {
return 1;
......@@ -625,6 +655,7 @@ sub get_diskinfo($)
#
if ($usezfs) {
my @vdevs = get_zpool_vdevs("emulab");
foreach my $dev (@vdevs) {
if (exists($geominfo{$dev}) && $geominfo{$dev}{'type'} eq "DISK") {
$geominfo{$dev}{'inuse'} = 1;
......@@ -692,9 +723,9 @@ sub get_diskinfo($)
# See if this is a filesystem type we can deal with.
# If so, return the type suitable for use by fsck and mount.
#
sub get_fstype($$;$)
sub get_fstype($$;$$)
{
my ($href,$dev,$rwref) = @_;
my ($href,$dev,$rwref,$silent) = @_;
my $type = "";
#
......@@ -767,18 +798,70 @@ sub get_fstype($$;$)
if ($rwref) {
$$rwref = 0;
}
if ($FBSD_VERSION < 10) {
return undef;
if ($FBSD_VERSION >= 10) {
return "ext2fs";
}
return "ext2fs";
}
if (!$silent) {
my $lv = $href->{'VOLNAME'};
if ($type) {
warn("*** $lv: unsupported FS ($type) on $dev\n");
} else {
warn("*** $lv: unknown or no FS on $dev\n");
}
}
if ($rwref) {
$$rwref = 0;
}
return undef;
}
#
# Check that the device for a blockstore has a valid filesystem by
# fscking it. If $fixit is zero, run the fsck RO just to report if
# the filesystem is there and consistent, otherwise attempt to fix it.
# $redir is a redirect string for output of the fsck command.
#
# Returns 1 if all is well, 0 otherwise.
#
sub checkfs($$$)
{
my ($href,$fixit,$redir) = @_;
my $lv = $href->{'VOLNAME'};
my $mdev = $href->{'LVDEV'};
# determine the filesystem type
my $fstype = get_fstype($href, $mdev);
if (!$fstype) {
return 0;
}
my $fopt = "-p";
if (!$fixit || $href->{'PERMS'} eq "RO") {
$fopt = "-n";
}
#
# Note that we invoke EXT fsck directly as the FBSD 10.x
# era port does not install everything correctly for use
# by "fsck -t ext2fs".
#
my $FSCK = $BSD_FSCK;
if ($fstype eq "ext2fs" && -x "$EXT_FSCK") {
$FSCK = $EXT_FSCK;
} else {
$fopt .= " -t $fstype";
}
if (mysystem("$FSCK $fopt $mdev $redir")) {
warn("*** $lv: fsck of $mdev failed\n");
return 0;
}
return 1;
}
#
# Handle one-time operations.
# Return a cookie (object) with current state of storage subsystem.
......@@ -1132,34 +1215,26 @@ sub os_check_storage_element($$)
#
my $mpoint = $href->{'MOUNTPOINT'};
if ($mpoint) {
my $line = `$MOUNT | grep '^/dev/$dev on '`;
if (!$line) {
my $mopt = "";
my $fopt = "-p";
my $mdev = $href->{'LVDEV'};
my $mopt = "";
my $line = `$MOUNT | grep '^$mdev on '`;
if (!$line) {
# determine the filesystem type
my $rw = 0;
my $fstype = get_fstype($href, "/dev/$dev", \$rw);
my $fstype = get_fstype($href, $mdev, \$rw);
if (!$fstype) {
if (exists($href->{'FSTYPE'})) {
warn("*** $bsid: unsupported FS (".
$href->{'FSTYPE'}.
") on /dev/$dev\n");
} else {
warn("*** $bsid: unknown FS on /dev/$dev\n");
}
return -1;
}
# check for RO export and adjust options accordingly
if ($href->{'PERMS'} eq "RO") {
$mopt = "-o ro";
$fopt = "-n";
}
# OS only supports RO mounting, right now we just fail
elsif ($rw == 0) {
warn("*** $bsid: OS only supports RO mounting of ".
$href->{'FSTYPE'}. " FSes\n");
"$fstype FSes\n");
return -1;
}
......@@ -1169,35 +1244,28 @@ sub os_check_storage_element($$)
return -1;
}
#
# fsck it in case of an abrupt shutdown.
#
# Note that we invoke EXT fsck directly as the FBSD 10.x
# era port does not install everything correctly for use
# by "fsck -t ext2fs".
#
my $FSCK = $BSD_FSCK;
if ($fstype eq "ext2fs" && -x "$EXT_FSCK") {
$FSCK = $EXT_FSCK;
} else {
$fopt .= " -t $fstype";
}
if (mysystem("$FSCK $fopt /dev/$dev $redir")) {
warn("*** $bsid: fsck of /dev/$dev failed\n");
# fsck the filesystem in case of an abrupt shutdown.
if (!checkfs($href, 1, $redir)) {
return -1;
}
if (mysystem("$MOUNT $mopt -t $fstype /dev/$dev $mpoint $redir")) {
warn("*** $bsid: could not mount /dev/$dev on $mpoint\n");
# and mount it
if (mysystem("$MOUNT $mopt -t $fstype $mdev $mpoint $redir")) {
warn("*** $bsid: could not mount $mdev on $mpoint\n");
return -1;
}
}
elsif ($line !~ /^\/dev\/$dev on (\S+) / || $1 ne $mpoint) {
elsif ($line !~ /^${mdev} on (\S+) / || $1 ne $mpoint) {
warn("*** $bsid: mounted on $1, should be on $mpoint\n");
return -1;
}
}
# XXX set the fstype for reporting
if (!exists($href->{'FSTYPE'})) {
get_fstype($href, "/dev/$dev");
}
return 1;
}
......@@ -1329,24 +1397,65 @@ sub os_check_storage_slice($$)
return -1;
}
$href->{'FSTYPE'} = "zfs";
} else {
my $line = `$MOUNT | grep '^/dev/$mdev on '`;
if (!$line && mysystem("$MOUNT $mpoint")) {
goto done;
}
my $line = `$MOUNT | grep '^/dev/$mdev on '`;
if (!$line) {
#
# See if the mount exists in /etc/fstab.
#
# XXX Right now if it does not, it might be because we
# removed it prior to creating an image. So we make some
# additional sanity checks (right now fsck'ing the alleged FS)
# and if it passes, re-add the mount line.
#
$line = `grep '^/dev/$mdev\[\[:space:\]\]' /etc/fstab`;
if (!$line) {
warn("*** $lv: mount of /dev/$mdev missing from fstab; sanity checking and re-adding\n");
my $fstype = get_fstype($href, "/dev/$mdev");
if (!$fstype) {
return -1;
}
# XXX sanity check, is there a recognized FS on the dev?
# XXX checkfs needs LVDEV set
$href->{'LVDEV'} = "/dev/$mdev";
if (!checkfs($href, 0, "")) {
undef $href->{'LVDEV'};
return -1;
}
undef $href->{'LVDEV'};
if (!open(FD, ">>/etc/fstab")) {
warn("*** $lv: could not add mount to /etc/fstab\n");
return -1;
}
print FD "# /dev/$mdev added by $BINDIR/rc/rc.storage\n";
print FD "/dev/$mdev\t$mpoint\t$fstype\trw\t2\t2\n";
close(FD);
}
if (mysystem("$MOUNT $mpoint")) {
warn("*** $lv: is not mounted, should be on $mpoint\n");
return -1;
}
if ($line && ($line !~ /^\/dev\/$mdev on (\S+) / || $1 ne $mpoint)) {
} else {
if ($line !~ /^\/dev\/$mdev on (\S+) / || $1 ne $mpoint) {
warn("*** $lv: mounted on $1, should be on $mpoint\n");
return -1;
}
$href->{'FSTYPE'} = "ufs";
}
$href->{'FSTYPE'} = "ufs";
}
if ($devtype ne "ZFS") {
$mdev = "/dev/$mdev";
}
$href->{'LVDEV'} = "$mdev";
done:
# XXX set the fstype for reporting
if (!exists($href->{'FSTYPE'})) {
get_fstype($href, $mdev);
}
$href->{'LVDEV'} = $mdev;
return 1;
}
......@@ -1403,46 +1512,27 @@ sub os_create_storage($$)
#
if ($href->{'CLASS'} eq "SAN" && $href->{'PROTO'} eq "iSCSI" &&
$href->{'PERSIST'} != 0) {
# determine the filesystem type
# check for the easy errors first, before time consuming fsck
my $rw = 0;
$fstype = get_fstype($href, $mdev, \$rw);
if (!$fstype) {
if (exists($href->{'FSTYPE'})) {
warn("*** $lv: unsupported FS (".
$href->{'FSTYPE'}.
") on $mdev\n");
} else {
warn("*** $lv: unknown FS on $mdev\n");
}
return 0;
}
# check for RO export and adjust options accordingly
# check for RO export and adjust mount options accordingly
if ($href->{'PERMS'} eq "RO") {
$mopt = "-o ro";
$fopt = "-n";
}
# OS only supports RO mounting, right now we just fail
elsif ($rw == 0) {
warn("*** $lv: OS only supports RO mounting of ".
$href->{'FSTYPE'}. " FSes\n");
"$fstype FSes\n");
return 0;
}
#
# Note that we invoke EXT fsck directly as the FBSD 10.x
# era port does not install everything correctly for use
# by "fsck -t ext2fs".
#
my $FSCK = $BSD_FSCK;
if ($fstype eq "ext2fs" && -x "$EXT_FSCK") {
$FSCK = $EXT_FSCK;
} else {
$fopt .= " -t $fstype";
}
if (mysystem("$FSCK $fopt $mdev $redir")) {
warn("*** $lv: fsck ($fstype) of persistent store $mdev failed\n");
# finally do the fsck, fixing errors if possible
if (!checkfs($href, 1, $redir)) {
return 0;
}
}
......@@ -1451,9 +1541,21 @@ sub os_create_storage($$)
#
else {
if (mysystem("$MKFS -b $UFSBS $mdev $redir")) {
#
# XXX hmm...apparently the iSCSI device node can appear
# before it is ready for I/O, so we can get here before the
# device is really ready. So wait a second and try again.
#
if ($href->{'CLASS'} eq "SAN" && $href->{'PROTO'} eq "iSCSI") {
sleep(1);
if (mysystem("$MKFS -b $UFSBS $mdev $redir") == 0) {
goto isok;
}
}
warn("*** $lv: could not create FS$logmsg\n");
return 0;
}
isok:
$href->{'FSTYPE'} = "ufs";
}
......@@ -2058,6 +2160,9 @@ sub os_remove_storage_element($$$)
#
# teardown==0 means we are rebooting: unmount and shutdown gvinum
# teardown==1 means we are reconfiguring and will be destroying everything
# teardown==2 means the same as 1 but we ignore errors and alway plow ahead
# teardown==3 means we are taking an image and we recoverably remove
# blockstore state from the root filesystem.
#
sub os_remove_storage_slice($$$)
{
......@@ -2135,6 +2240,42 @@ sub os_remove_storage_slice($$$)
}
}
#
# Teardown for imaging (3).
# Here we just want to clear blockstore related state from the
# root filesystem so that we get a clean image:
#
# For SYSVOL, there is nothing further to do. If they are taking
# a full disk image, then the blockstore will be included in the
# image unless the imagezip called explicitly ignores the partition.
# We can live with this as full images are discouraged now.
#
# For NONSYSVOL, aka a ZFS zpool on the extra disks, we try
# exporting the pool once all the blockstores have been unmounted.
#
# For ANY, we treat it the same as NONSYSVOL. There is a potential
# problem that a full disk image would include sda4 which would
# appear as part of an incomplete zpool when the image is loaded
# elsewhere. This will cause lots of warnings, but again, we don't
# care so much about full images.
#
if ($teardown == 3) {
if (get_zpool_active_datasets("emulab") == 0 &&
mysystem("$ZPOOL export emulab $redir")) {
warn("*** $lv: could not export zpool 'emulab'\n");
}
#
# If we are the one that enabled ZFS, disable it
#
if (!mysystem("grep -q '^# zfs_enable added by.*rc.storage' /etc/rc.conf")) {
if (mysystem("sed -i -e '/^# zfs_enable added by.*rc.storage/,+1d' /etc/rc.conf")) {
warn("*** $lv: could not remove zfs_enable from /etc/rc.conf\n");
}
}
return 1;
}
#
# Remove LV
#
......
......@@ -481,18 +481,24 @@ sub get_diskinfo()
# We only do this if we know there are volume groups, else lvs will fail.
#
if ($gotvgs &&
open(FD, "lvs -o vg_name,lv_name,lv_size --units m --noheadings|")) {
open(FD, "lvs -o vg_name,lv_name,lv_size,lv_attr --units m --noheadings|")) {
while (<FD>) {
if (/^\s+(\S+)\s+(\S+)\s+(\d+)\.\d+m$/) {
if (/^\s+(\S+)\s+(\S+)\s+(\d+)\.\d+m\s+([-a-zA-Z]{9})$/) {
my $vg = $1;
my $lv = $2;
my $size = $3;
my $attrs = $4;
my $dev = "$vg/$lv";
$geominfo{$dev}{'level'} = 2;
$geominfo{$dev}{'type'} = "LVM";
$geominfo{$dev}{'size'} = $size;
$geominfo{$dev}{'inuse'} = 1;
if ($attrs =~ /^....a....$/) {
$geominfo{$dev}{'active'} = 1;
} else {
$geominfo{$dev}{'active'} = 0;
}
}
}
close(FD);
......@@ -505,9 +511,9 @@ sub get_diskinfo()
# See if this is a filesystem type we can deal with.
# If so, return the type suitable for use by fsck and mount.
#
sub get_fstype($$;$)
sub get_fstype($$;$$)
{
my ($href,$dev,$rwref) = @_;
my ($href,$dev,$rwref,$silent) = @_;
my $type = "";
#
......@@ -548,12 +554,55 @@ sub get_fstype($$;$)
return "ufs";
}
if (!$silent) {
my $lv = $href->{'VOLNAME'};
if ($type) {
warn("*** $lv: unsupported FS ($type) on $dev\n");
} else {
warn("*** $lv: unknown or no FS on $dev\n");
}
}
if ($rwref) {
$$rwref = 0;
}
return undef;
}
#
# Check that the device for a blockstore has a valid filesystem by
# fscking it. If $fixit is zero, run the fsck RO just to report if
# the filesystem is there and consistent, otherwise attempt to fix it.
# $redir is a redirect string for output of the fsck command.
#
# Returns 1 if all is well, 0 otherwise.
#
sub checkfs($$$)
{
my ($href,$fixit,$redir) = @_;
my $lv = $href->{'VOLNAME'};
my $mdev = $href->{'LVDEV'};
# determine the filesystem type
my $fstype = get_fstype($href, $mdev);
if (!$fstype) {
return 0;
}
my $fopt = "-p";
if (!$fixit || $href->{'PERMS'} eq "RO") {
$fopt = "-n";
}
# XXX cannot fsck ufs, right now we just pretend everything is okay
if ($fstype ne "ufs" &&
mysystem("$FSCK $fopt $mdev $redir")) {