Commit fbe5f38f authored by Mike Hibler's avatar Mike Hibler

Semi-hack to ensure that Wisconsin nodes don't include their SSDs

in blockstore-related VGs.

Right now, you have to decide globally and in advance, what disk types
are going to be included in blockstore pools. Then you set the sitevar
accordingly and then set the DB sysvol/nonsysvol/any node_type_features
to reflect the amount of storage available on just drives of that type.

This value is passed to clients via the otherwise unused PROTO field
of the blockstore line (when CMD=SLICE and CLASS=local), so this change
is backward compatible (OS images with older client code will ignore it
and just give you blockstores including all the devices).

So at Wisconsin, I set storage/local/disktype to "HDD-only" and tweak
the node_type_attributes '?+disk_any' and '?+disk_nonsysvol' to not
include the space for the 1 or 2 SSD drives in each machine. tmcd passes
the PROTO=HDD-only value and the client sees that and does not include
any SSD devices among the eligible devices from which to create the VG.

The hope is that ultimately, we could get rid of the sitevar and use the
PROTO field to select, per-blockstore, its type (only HDD, only SSD).
But that will require additional per node (type) assign features
differentiating the amount of each type available.
parent 74470747
......@@ -661,6 +661,9 @@ sub process($$$$)
warn("*** Unknown storage slice bsid '".$href->{'BSID'}."'\n");
return 0;
}
if (!exists($href->{'PROTO'})) {
$href->{'PROTO'} = "local";
}
if (exists($href->{'DATASET'}) && $dosetup) {
#
# We are going to load from a dataset image via frisbee.
......
......@@ -85,6 +85,7 @@ my $SGDISK = "/sbin/sgdisk";
my $GDISK = "/sbin/gdisk";
my $PPROBE = "/sbin/partprobe";
my $FRISBEE = "/usr/local/bin/frisbee";
my $HDPARM = "/sbin/hdparm";
#
#
......@@ -237,6 +238,87 @@ sub serial_to_dev($$)
return undef;
}
#
# Determine if a disk is "SSD" or "HDD"
#
sub get_disktype($)
{
my ($dev) = @_;
my @lines;
#
# Assume NVMe is SSSD.
# Older hdparm and smartctl don't seem to handle NVMe
#
if ($dev =~ /^nvme\d+n\d+/) {
return "SSD";
}
#
# Try hdparm first since it is a standard utility
#
if (-x "$HDPARM") {
if (open(HFD, "$HDPARM -I /dev/$dev 2>/dev/null |")) {
my $isssd = 0;
while (my $line = <HFD>) {
chomp($line);
if ($line =~ /:\s+solid state device$/i) {
$isssd = 1;
last;
}
}
close(HFD);
return ($isssd ? "SSD" : "HDD");
}
}
#
# Try using "smartctl -i"
#
if (-x "$SMARTCTL") {
if (open(HFD, "$SMARTCTL -i /dev/$dev 2&>1 |")) {
my $isssd = -1;
my $model ="";
while (my $line = <HFD>) {
chomp($line);
if ($line =~ /^rotation rate:\s+(\S.*)/i) {
if ($1 =~ /solid state device/i) {
$isssd = 1;
} else {
$isssd = 0;
}
last;
}
# XXX if we don't find rotation rate, we will fall back on this
if ($line =~ /^device model:\s+(\S.*)/i) {
$model = $1;
next;
}
}
close(HFD);
if ($isssd >= 0) {
return ($isssd ? "SSD" : "HDD");
}
#
# XXX older versions of smartctl (e.g., in CentOS 6-ish)
# don't return "Rotation Rate". This is a fall-back hack as
# we know that at least Intel SSDs have SSD in the model name.
#
if ($model =~ /SSD/) {
return "SSD";
}
}
}
# Assume it is a spinning disk.
return "HDD";
}
#
# Return the name (e.g., "sda") of the boot disk, aka the "system volume".
#
......@@ -443,6 +525,7 @@ sub get_diskinfo()
$geominfo{$dev}{'size'} = int($size / 1024);
$geominfo{$dev}{'inuse'} = 0;
$geominfo{$dev}{'ptabtype'} = get_ptabtype($dev);
$geominfo{$dev}{'disktype'} = get_disktype($dev);
}
}
}
......@@ -700,7 +783,7 @@ sub os_init_storage($)
} elsif ($href->{'CMD'} eq "SLICE") {
$gotslice++;
if ($href->{'BSID'} eq "SYSVOL" ||
$href->{'BSID'} eq "ONSYSVOL") {
$href->{'BSID'} eq "NONSYSVOL") {
$needavol = 1;
} elsif ($href->{'BSID'} eq "ANY") {
$needall = 1;
......@@ -823,7 +906,8 @@ sub os_show_storage($)
my $inuse = sprintf("%X", $dinfo->{$dev}->{'inuse'});
print STDERR " name=$dev, type=$type, level=$lev, size=$size, inuse=$inuse";
if ($type eq "DISK") {
print STDERR ", pttype=", $dinfo->{$dev}->{'ptabtype'};
print STDERR ", disktype=", $dinfo->{$dev}->{'disktype'},
", pttype=", $dinfo->{$dev}->{'ptabtype'};
}
elsif ($type eq "LVM") {
print STDERR ", active=", $dinfo->{$dev}->{'active'};
......@@ -1049,17 +1133,13 @@ sub os_check_storage_slice($$)
# local storage:
# if BSID==SYSVOL:
# see if 4th part of boot disk exists (eg: da0s4) and
# is of type freebsd
# is of type linux
# else if BSID==NONSYSVOL:
# see if there is a concat volume with appropriate name
# see if there is a logical volume with appropriate name
# else if BSID==ANY:
# see if there is a concat volume with appropriate name
# see if there is a logical volume with appropriate name
# if there is a mountpoint, see if it exists in /etc/fstab
#
# List all volumes:
# gvinum lv
#
#
if ($href->{'CLASS'} eq "local") {
my $lv = $href->{'VOLNAME'};
my ($dev, $devtype, $rdev);
......@@ -1566,6 +1646,17 @@ sub os_create_storage_slice($$$)
my @devs = ();
my $dev;
#
# Deterimine if we should use SSDs in the construction
# of the volume group.
#
my $disktype = "";
if ($href->{'PROTO'} eq "SATA") {
$disktype = "HDD";
} elsif ($href->{'PROTO'} eq "NVMe") {
$disktype = "SSD";
}
if ($bsid eq "ANY") {
my $pchr = "";
if ($bdisk =~ /^nvme/) {
......@@ -1603,14 +1694,19 @@ sub os_create_storage_slice($$$)
push(@devs, "/dev/$dev");
}
elsif (exists($ginfo->{$dev}) &&
$ginfo->{$dev}->{'inuse'} == 0) {
$ginfo->{$dev}->{'inuse'} == 0 &&
(!$disktype ||
$ginfo->{$bdisk}->{'disktype'} eq $disktype)) {
push(@devs, "/dev/$dev");
}
}
skipp4:
foreach $dev (keys %$ginfo) {
if ($ginfo->{$dev}->{'type'} eq "DISK" &&
$ginfo->{$dev}->{'inuse'} == 0) {
$ginfo->{$dev}->{'inuse'} == 0 &&
(!$disktype ||
$ginfo->{$dev}->{'disktype'} eq $disktype)) {
push(@devs, "/dev/$dev");
}
}
......
......@@ -289,7 +289,7 @@ typedef struct {
static int iptonodeid(struct in_addr, tmcdreq_t *, char*);
static int checkdbredirect(tmcdreq_t *);
static int sendstoreconf(int sock, int tcp, tmcdreq_t *reqp, char *bscmd,
char *vname, int dopersist);
char *vname, int dopersist, char *localproto);
static int get_imagestrings(tmcdreq_t *reqp, imstrings_t *imstrings);
#ifdef EVENTSYS
......@@ -4482,7 +4482,7 @@ COMMAND_PROTOTYPE(dostorageconfig)
char buf[MYBUFSIZE];
char *bufp, *ebufp = &buf[sizeof(buf)];
char *mynodeid;
char *vname, *bsid, *hostid;
char *vname, *bsid, *hostid, *localproto;
int rv;
int volsize, bsidx, cmdidx = 1;
int nrows, nrows2, nattrs;
......@@ -4538,7 +4538,7 @@ COMMAND_PROTOTYPE(dostorageconfig)
OUTPUT(buf, sizeof(buf),
"CMD=EXPORT IDX=%d VOLNAME=%s",
cmdidx++, vname);
rv = sendstoreconf(sock, tcp, reqp, buf, vname, 0);
rv = sendstoreconf(sock, tcp, reqp, buf, vname, 0, NULL);
mysql_free_result(res);
return rv;
......@@ -4633,6 +4633,39 @@ COMMAND_PROTOTYPE(dostorageconfig)
}
mysql_free_result(res);
/*
* XXX short term hack
*
* Currently, we are not using the PROTO field for local
* blockstores. So we use it to convey to the user whether
* NONSYSVOL and ANY storage pools should be composed of
* HDD-only, SSD-only, or all storage types. We query the
* sitevar "storage/local/disktypes" for this info.
*
* So right now we decide in advance what types of storage
* the pools should include, set the sitevar, and also
* set the values of the existing DB nonsysvol/any features
* to include only that storage.
*
* Ultimately, we could get rid of the sitevar and use the
* PROTO field to select, per-blockstore, its type. But that
* will require additional per node (type) assign features
* differentiating the amount of each type available.
*/
localproto = NULL;
res = mydb_query("select value,defaultvalue from sitevariables "
"where name='storage/local/disktypes'", 2);
if (res) {
if ((int)mysql_num_rows(res) > 0) {
row = mysql_fetch_row(res);
if (row[0] && row[0][0])
localproto = strdup(row[0]);
else if (row[1] && row[1][0])
localproto = strdup(row[1]);
}
mysql_free_result(res);
}
/*
* Send across local blockstore volumes (slices). These don't
* show up in the reserved table, existing entirely in the
......@@ -4644,11 +4677,12 @@ COMMAND_PROTOTYPE(dostorageconfig)
"where exptidx=%d and "
"fixed='%s'",
2, reqp->exptidx, reqp->nickname);
if (!res) {
error("STORAGECONFIG: %s: DB Error getting virt_blockstore "
"info.\n",
mynodeid);
if (localproto)
free(localproto);
return 1;
}
......@@ -4660,9 +4694,11 @@ COMMAND_PROTOTYPE(dostorageconfig)
OUTPUT(buf, sizeof(buf),
"CMD=SLICE IDX=%d VOLNAME=%s VOLSIZE=%d",
cmdidx++, vname, volsize);
sendstoreconf(sock, tcp, reqp, buf, vname, 0);
sendstoreconf(sock, tcp, reqp, buf, vname, 0, localproto);
}
mysql_free_result(res);
if (localproto)
free(localproto);
/*
* Now to send the remote elements (a.k.a SAN disks). Figuring
......@@ -4733,7 +4769,7 @@ COMMAND_PROTOTYPE(dostorageconfig)
OUTPUT(buf, sizeof(buf),
"CMD=ELEMENT IDX=%d HOSTID=%s VOLNAME=%s VOLSIZE=%d",
cmdidx++, hostid, vname, volsize);
sendstoreconf(sock, tcp, reqp, buf, vname, 1);
sendstoreconf(sock, tcp, reqp, buf, vname, 1, NULL);
}
mysql_free_result(res);
......@@ -4744,7 +4780,7 @@ COMMAND_PROTOTYPE(dostorageconfig)
/* Helper function for "dostorageconfig" */
static int
sendstoreconf(int sock, int tcp, tmcdreq_t *reqp, char *bscmd, char *vname,
int dopersist)
int dopersist, char *localproto)
{
MYSQL_RES *res, *res2;
MYSQL_ROW row, row2;
......@@ -4890,6 +4926,28 @@ sendstoreconf(int sock, int tcp, tmcdreq_t *reqp, char *bscmd, char *vname,
"%s CLASS=%s BSID=%s",
bscmd, class, placement);
/*
* If there is a global local storage type, we pass that
* along (see the "short term hack" comment above in
* dostorageconfig).
*
* XXX Since the clientside has a fixed set of values it will
* accept for PROTO, we map these as:
*
* Any => PROTO="local"
* SSD-only => PROTO="NVMe"
* HDD-only => PROTO=<any other> (e.g., "SATA", "PATA")
*/
if (strlen(protocol) == 0 && localproto != NULL) {
if (strcasecmp(localproto, "any") == 0) {
protocol = "local";
} else if (strcasecmp(localproto, "ssd-only") == 0) {
protocol = "NVMe";
} else if (strcasecmp(localproto, "hdd-only") == 0) {
protocol = "SATA";
}
}
/* Add the protocol to the buffer, if present.*/
if (strlen(protocol)) {
bufp += OUTPUT(bufp, ebufp-bufp, " PROTO=%s",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment