Commit d746f6ca authored by Mike Hibler's avatar Mike Hibler

Linux+LVM clientside for node-local storage.

Also, refactored the OS specific stuff into a new liblocstorage.pm
which is included by rc.stoarge. Note that there is no OS-independent
libstorage.pm, at least for now.
parent c1d21b9a
......@@ -58,6 +58,7 @@ my $STORAGEMAP = "$BOOTDIR/storagemap";
#
use libsetup;
use liblocsetup;
use liblocstorage;
use libtmcc;
use librc;
......@@ -238,6 +239,14 @@ sub process($$$;$)
my ($so,$href,$dosetup,$doteardown) = @_;
my $class = $href->{'CLASS'};
#
# XXX get rid of any trailing slashes on the mountpoint so it
# doesn't cause grief for the OS-dependent backend.
#
if (exists($href->{'MOUNTPOINT'})) {
$href->{'MOUNTPOINT'} =~ s#/+$##;
}
if ($href->{'CMD'} eq "ELEMENT") {
# look up the host name and convert to IP
if (exists($href->{'HOSTID'})) {
......@@ -281,7 +290,7 @@ sub process($$$;$)
warn("*** Unknown storage slice class '$class'\n");
return 0;
}
if ($href->{'BSID'} !~ /^ALL_(SPACE|SYSVOL|NONSYSVOL)$/) {
if ($href->{'BSID'} !~ /^(ANY|SYSVOL|NONSYSVOL)$/) {
warn("*** Unknown storage slice bsid '".$href->{'BSID'}."'\n");
return 0;
}
......@@ -329,6 +338,7 @@ sub process($$$;$)
}
print "\n";
} else {
print " Deconfiguring '" . $href->{'VOLNAME'} . "'...\n";
if (!os_remove_storage($so, $href, $doteardown)) {
warn("*** Could not remove storage device '" .
$href->{'VOLNAME'} . "'\n");
......@@ -358,6 +368,7 @@ sub process($$$;$)
# If setting up, do it. Otherwise there is nothing to do.
#
if ($dosetup) {
print " Configuring '" . $href->{'VOLNAME'} . "'...\n";
if (!os_create_storage($so, $href)) {
warn("*** Could not create storage device '" .
$href->{'VOLNAME'} . "'\n");
......
......@@ -3400,11 +3400,11 @@ sub getarpinfo($;$)
# IDX :=
# \d+ -- monotonically increasing number indicating order of operations
# BSID :=
# (ALL_SPACE|ALL_SYSVOL|ALL_NONSYSVOL) -- i.e. where to take space from
# "ALL_SPACE" will take from any disk, possibly from multiple disks via
# (ANY|SYSVOL|NONSYSVOL) -- i.e. where to take space from
# "ANY" will take from any disk, possibly from multiple disks via
# use of a logical volume manager
# "ALL_SYSVOL" will take from any remaining space on the boot disk
# "ALL_NONSYSVOL" will take from any space on any non-boot disk, possibly
# "SYSVOL" will take from any remaining space on the boot disk
# "NONSYSVOL" will take from any space on any non-boot disk, possibly
# from multiple disks via a LVM.
# VOLNAME :=
# string -- Emulab name for the element
......
#
# Copyright (c) 2000-2012 University of Utah and the Flux Group.
# Copyright (c) 2000-2013 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -239,6 +239,7 @@ script-install: dir-install $(SCRIPTS)
$(INSTALL) -m 755 $(SRCDIR)/reboot_prepare $(BINDIR)/reboot_prepare
$(INSTALL) -m 755 $(SRCDIR)/ixpboot $(BINDIR)/ixpboot
$(INSTALL) -m 755 $(SRCDIR)/liblocsetup.pm $(BINDIR)/liblocsetup.pm
$(INSTALL) -m 755 $(SRCDIR)/liblocstorage.pm $(BINDIR)/liblocstorage.pm
$(INSTALL) -m 755 $(SRCDIR)/libvnode.pm $(BINDIR)/libvnode.pm
$(INSTALL) -m 755 $(SRCDIR)/rc.delayagent $(BINDIR)/rc/rc.delayagent
$(INSTALL) -m 755 $(SRCDIR)/rc.healthd $(BINDIR)/rc/rc.healthd
......
......@@ -1395,1212 +1395,4 @@ sub os_setstaticarp($$)
return 0;
}
#
# To find the block stores exported from a target portal:
#
# iscontrol -d -t <storage-host>
#
# To use a remote iSCSI target, the info has to be in /etc/iscsi.conf:
#
# <bsid> {
# initiatorname = <our hostname>
# targetname = <iqn>
# targetaddress = <storage-host>
# }
#
# To login to a remote iSCSI target:
#
# iscontrol -c /etc/iscsi.conf -n <bsid>
#
# The session ID for the resulting session can be determined from the
# sysctl net.iscsi_initiator info:
#
# net.iscsi_initiator.<session>.targetname: <iqn>
# net.iscsi_initiator.<session>.targeaddress: <storage-host-IP>
#
# To stop a session (logout) you must first determine its pid from
# the net.iscsi_initiator info:
#
# net.iscsi_initiator.<session>.pid: <pid>
#
# and then send it a HUP:
#
# kill -HUP <pid>
#
# Once a blockstore is added, it will appear as a /dev/da? device.
# I have not found a straight-forward way to map session to device.
# What we do now is to use the session ID to match up info from
# "camcontrol identify da<N> -v". camcontrol will return output like:
#
# (pass3:iscsi0:0:0:0): ATAPI_IDENTIFY. ACB: ...
# ...
#
# where N in "iscsiN" will be the session.
#
sub iscsi_to_dev($)
{
my ($session) = @_;
#
# XXX this is a total hack
#
my @lines = `ls /dev/da* 2>&1`;
foreach (@lines) {
if (m#^/dev/(da\d+)$#) {
my $dev = $1;
my $out = `camcontrol identify $dev -v 2>&1`;
if ($out =~ /^\(pass\d+:iscsi(\d+):/) {
if ($1 == $session) {
return $dev;
}
}
}
}
return undef;
}
sub serial_to_dev($)
{
my ($sn) = @_;
#
# XXX this is a total hack
#
if (! -x "$SMARTCTL") {
return undef;
}
my @lines = `ls /dev/da* 2>&1`;
foreach (@lines) {
if (m#^/dev/(da\d+)$#) {
my $dev = $1;
my $out = `$SMARTCTL -i /dev/$dev 2>&1 | grep 'Serial Number'`;
if ($out =~ /^Serial Number:\s+$sn/) {
return $dev;
}
}
}
return undef;
}
sub uuid_to_session($)
{
my ($uuid) = @_;
my @lines = `sysctl net.iscsi_initiator 2>&1`;
foreach (@lines) {
if (/net\.iscsi_initiator\.(\d+)\.targetname: $uuid/) {
return $1;
}
}
return undef;
}
sub uuid_to_daemonpid($)
{
my ($uuid) = @_;
my $session;
my @lines = `sysctl net.iscsi_initiator 2>&1`;
foreach (@lines) {
if (/net\.iscsi_initiator\.(\d+)\.targetname: $uuid/) {
$session = $1;
next;
}
if (/net\.iscsi_initiator\.(\d+)\.pid: (\d+)/) {
if (defined($session) && $1 == $session) {
return $2;
}
}
}
return undef;
}
#
# Return the name (e.g., "da0") of the boot disk, aka the "system volume".
#
sub get_bootdisk()
{
my $disk = undef;
my $line = `$MOUNT | grep ' on / '`;
if ($line && $line =~ /^\/dev\/(\S+)s1a on \//) {
$disk = $1;
}
return $disk;
}
#
# We are relying on GEOM, so use its kernel info to tell us what
# the disks are.
#
sub get_geominfo()
{
my %geominfo = ();
my @lines = `sysctl -n kern.geom.conftxt`;
chomp(@lines);
if (@lines > 0) {
# FBSD9 and above.
foreach (@lines) {
next if ($_ eq "");
my @vals = split /\s/;
# assume 2k sector size means a CD drive
if ($vals[0] == 0 && $vals[1] eq "DISK" && $vals[4] == 2048) {
next;
}
my $dev = $vals[2];
$geominfo{$dev}{'level'} = $vals[0];
$geominfo{$dev}{'type'} = $vals[1];
# size is in bytes, convert to MiB
$geominfo{$dev}{'size'} = int($vals[3] / 1024 / 1024);
if ($vals[1] eq "DISK") {
$geominfo{$dev}{'inuse'} = 0;
}
}
} else {
# FBSD8: no sysctl, have to parse geom output
my ($curdev,$curpart,$skipping);
# first find all the disks
if (!open(FD, "geom disk list|")) {
warn("*** get_geominfo: could not execute geom command\n");
return undef;
}
while (<FD>) {
if (/^\d+\.\s+Name:\s+(\S+)$/) {
$curdev = $1;
$geominfo{$curdev}{'level'} = 0;
$geominfo{$curdev}{'type'} = "DISK";
$geominfo{$curdev}{'inuse'} = 0;
next;
}
if (/\sMediasize:\s+(\d+)\s/) {
if ($curdev) {
$geominfo{$curdev}{'size'} = int($1 / 1024 / 1024);
$curdev = undef;
}
next;
}
$curdev = undef;
}
close(FD);
# now find all the partitions on those disks
if (!open(FD, "geom part list|")) {
warn("*** get_geominfo: could not execute geom command\n");
return undef;
}
$skipping = 1;
$curdev = $curpart = undef;
while (<FD>) {
if (/^Geom name:\s+(\S+)/) {
$curdev = $1;
if (exists($geominfo{$curdev})) {
$skipping = 2;
}
next;
}
next if ($skipping < 2);
if (/^Providers:/) {
$skipping = 3;
next;
}
next if ($skipping < 3);
if (/^\d+\.\s+Name:\s+(\S+)$/) {
$curpart = $1;
$geominfo{$curpart}{'level'} = $geominfo{$curdev}{'level'} + 1;
$geominfo{$curpart}{'type'} = "PART";
next;
}
if (/\sMediasize:\s+(\d+)\s/) {
$geominfo{$curpart}{'size'} = int($1 / 1024 / 1024);
next;
}
if (/^Consumers:/) {
$skipping = 1;
next;
}
}
close(FD);
# and finally, vinums
if (!open(FD, "geom vinum list|")) {
warn("*** get_geominfo: could not execute geom command\n");
return undef;
}
$curpart = undef;
$skipping = 1;
while (<FD>) {
if (/^Providers:/) {
$skipping = 2;
next;
}
next if ($skipping < 2);
if (/^\d+\.\s+Name:\s+(\S+)$/) {
$curpart = $1;
$geominfo{$curpart}{'level'} = 2;
$geominfo{$curpart}{'type'} = "VINUM";
next;
}
if (/\sMediasize:\s+(\d+)\s/) {
$geominfo{$curpart}{'size'} = int($1 / 1024 / 1024);
next;
}
if (/^Consumers:/) {
$skipping = 1;
next;
}
}
close(FD);
}
#
# Make a pass through and mark disks that are in use where "in use"
# means "has a partition".
#
foreach my $dev (keys %geominfo) {
if ($geominfo{$dev}{'type'} eq "PART" &&
$geominfo{$dev}{'level'} == 1 &&
$dev =~ /^(.*)s\d+$/) {
if (exists($geominfo{$1})) {
$geominfo{$1}{'inuse'} = 1;
}
}
}
return \%geominfo;
}
#
# Handle one-time operations.
# Return a cookie (object) with current state of storage subsystem.
#
sub os_init_storage($)
{
my ($lref) = @_;
my $gotlocal = 0;
my $gotnonlocal = 0;
my $gotelement = 0;
my $gotslice = 0;
my $gotiscsi = 0;
my $needavol = 0;
my $needall = 0;
my %so = ();
foreach my $href (@{$lref}) {
if ($href->{'CMD'} eq "ELEMENT") {
$gotelement++;
} elsif ($href->{'CMD'} eq "SLICE") {
$gotslice++;
if ($href->{'BSID'} eq "SYSVOL" ||
$href->{'BSID'} eq "NONSYSVOL") {
$needavol = 1;
} elsif ($href->{'BSID'} eq "ANY") {
$needall = 1;
}
}
if ($href->{'CLASS'} eq "local") {
$gotlocal++;
} else {
$gotnonlocal++;
if ($href->{'PROTO'} eq "iSCSI") {
$gotiscsi++;
}
}
}
# check for local storage incompatibility
if ($needall && $needavol) {
warn("*** storage: Incompatible local volumes.\n");
return undef;
}
# initialize volume manage if needed for local slices
if ($gotlocal && $gotslice) {
#
# gvinum: put module load in /boot/loader.conf so that /etc/fstab
# mounts will work.
#
if (mysystem("grep -q 'geom_vinum_load=\"YES\"' /boot/loader.conf")) {
if (!open(FD, ">>/boot/loader.conf")) {
warn("*** storage: could not enable gvinum in /boot/loader.conf\n");
return undef;
}
print FD "# added by $BINDIR/rc/rc.storage\n";
print FD "geom_vinum_load=\"YES\"\n";
close(FD);
# and do a one-time start
mysystem("gvinum start");
}
#
# Grab the bootdisk and current GEOM state
#
my $bdisk = get_bootdisk();
my $ginfo = get_geominfo();
if (!exists($ginfo->{$bdisk}) || $ginfo->{$bdisk}->{'inuse'} == 0) {
warn("*** storage: bootdisk '$bdisk' marked as not in use!?\n");
return undef;
}
$so{'BOOTDISK'} = $bdisk;
$so{'GEOMINFO'} = $ginfo;
if (0) {
print STDERR "BOOTDISK='$bdisk'\nGEOMINFO=\n";
foreach my $dev (keys %$ginfo) {
my $type = $ginfo->{$dev}->{'type'};
my $lev = $ginfo->{$dev}->{'level'};
my $size = $ginfo->{$dev}->{'size'};
print STDERR "name=$dev, type=$type, level=$lev, size=$size\n";
}
return undef;
}
}
if ($gotiscsi) {
my $redir = ">/dev/null 2>&1";
if (! -x "$ISCSI") {
warn("*** storage: $ISCSI does not exist, cannot continue\n");
return undef;
}
#
# XXX load initiator driver
#
if (mysystem("kldstat | grep -q iscsi_initiator") &&
mysystem("kldload iscsi_initiator.ko $redir")) {
warn("*** storage: Could not load iscsi_initiator kernel module\n");
return undef;
}
}
$so{'INITIALIZED'} = 1;
return \%so;
}
#
# os_check_storage(sobject,confighash)
#
# Determines if the storage unit described by confighash exists and
# is properly configured. Returns zero if it doesn't exist, 1 if it
# exists and is correct, -1 otherwise.
#
# Side-effect: Creates the hash member $href->{'LNAME'} with the /dev
# name of the storage unit.
#
sub os_check_storage($$)
{
my ($so,$href) = @_;
if ($href->{'CMD'} eq "ELEMENT") {
return os_check_storage_element($so,$href);
}
if ($href->{'CMD'} eq "SLICE") {
return os_check_storage_slice($so,$href);
}
return -1;
}
sub os_check_storage_element($$)
{
my ($so,$href) = @_;
my $CANDISCOVER = 0;
#my $redir = "";
my $redir = ">/dev/null 2>&1";
#
# iSCSI:
# make sure iscsi_initiator kernel module is loaded
# make sure the IQN exists
# make sure there is an entry in /etc/iscsi.conf.
#
if ($href->{'CLASS'} eq "SAN" && $href->{'PROTO'} eq "iSCSI") {
my $hostip = $href->{'HOSTIP'};
my $uuid = $href->{'UUID'};
my $bsid = $href->{'VOLNAME'};
my @lines;
my $cmd;
#
# See if the block store exists on the indicated server.
# If not, something is very wrong, return -1.
#
# Note that the server may not support discovery. If not, we don't
# do it since it is only a sanity check anyway.
#
if ($CANDISCOVER) {
@lines = `$ISCSI -d -t $hostip 2>&1`;
if ($? != 0) {
warn("*** could not find exported iSCSI block stores\n");
return -1;
}
my $taddr = "";
for (my $i = 0; $i < scalar(@lines); $i++) {
# found target, look at next
if ($lines[$i] =~ /^TargetName=$uuid/ &&
$lines[$i+1] =~ /^TargetAddress=($hostip.*)/) {
$taddr = $1;
last;
}
}
if (!$taddr) {
warn("*** could not find iSCSI block store '$uuid'\n");
return -1;
}
}
#
# See if it is in the config file.
# If not, we have not done the one-time initialization, return 0.
#
if (! -r "$ISCSICNF" || mysystem("grep -q '$uuid' $ISCSICNF")) {
return 0;
}
#
# XXX hmm...FreeBSD does not have an /etc/rc.d script for starting
# up iscontrol instances. So we have to do it everytime right now.
#
# First, check and see if there is a session active for this
# blockstore. If not we must start one.
#
my $session = uuid_to_session($uuid);
if (!defined($session)) {
if (mysystem("$ISCSI -c $ISCSICNF -n $bsid $redir")) {
warn("*** $bsid: could not create iSCSI session\n");
return -1;
}
sleep(1);
$session = uuid_to_session($uuid);
}
#
# Figure out the device name from the session and report all is good.
#
my $dev = iscsi_to_dev($session);
if (defined($dev)) {
$href->{'LNAME'} = $dev;
return 1;
}
#
# Otherwise, we are in some indeterminite state, return -1.
#
warn("*** $bsid: found iSCSI session but could not determine local device\n");
return -1;
}
#
# local disk:
# make sure disk exists
#
if ($href->{'CLASS'} eq "local") {
my $bsid = $href->{'VOLNAME'};
my $sn = $href->{'UUID'};
my $dev = serial_to_dev($sn);
if (defined($dev)) {
$href->{'LNAME'} = $dev;
return 1;
}
# for physical disks, there is no way to "create" it so return error