From 037086d7d049f212f783f120aadad636da6e2c39 Mon Sep 17 00:00:00 2001 From: Mike Hibler <hibler@cs.utah.edu> Date: Tue, 8 Dec 2020 16:36:32 -0700 Subject: [PATCH] Fixups for occasional image backed dataset capture failures. The so-called "storage map" in the blockstore code was losing information due to the way it was being constructed. This required a hack to compensate, which in turn had some failure modes. And the whole thing would fail if you tried to capture a FreeBSD blockstore (I wonder why no one ever noticed?) And, because I am so good, I even fixed a bug that was not there when I started! --- clientside/os/create-image | 25 +++++- clientside/os/create-versioned-image | 22 ++++- clientside/tmcc/common/config/rc.storage | 109 ++++++++++++++++++----- 3 files changed, 129 insertions(+), 27 deletions(-) diff --git a/clientside/os/create-image b/clientside/os/create-image index fd04ed56a6..ca05c1861b 100755 --- a/clientside/os/create-image +++ b/clientside/os/create-image @@ -1,7 +1,7 @@ #!/usr/bin/perl -w # -# Copyright (c) 2000-2019 University of Utah and the Flux Group. +# Copyright (c) 2000-2020 University of Utah and the Flux Group. # # {{{EMULAB-LICENSE # @@ -197,12 +197,18 @@ if (defined($bsname)) { die("Could not find storage configuration for $bsname\n"); } # + # Cannot handle FreeBSD non-SYSVOL (non-UFS) filesystems right now. + # + if ($^O ne "linux" && $bsref->{'BSID'} ne "SYSVOL") { + die("Cannot image non-UFS filesystems\n"); + } + # # The storage map tells us the device info. # open(MAP, TMSTORAGEMAP()) or die("Could not open the storage map!\n"); while (<MAP>) { - if ($_ =~ /^([-\w]+)\s+([-\w\.\/]+)\s+([-\w\.\/]+)\s*/) { + if ($_ =~ /^([-\w]+)\s+([-\w\.\/]+)(?:\s+([-\w\.\/]+))?/) { if ($1 eq $bsname) { $device = $2; # @@ -210,8 +216,21 @@ if (defined($bsname)) { # which seems wrong since now I have to figure it out # in order to check to see if its mounted. # + # XXX this is due to a bug in the storagemap code that + # has since been fixed, but we check it just in case. + # + # XXX even if the map does have the /dev/emulab/... name, + # we need to use the /dev/mapper/... name instead since + # that is what shows up with "mount" and "mount" is what + # is used in os_ismounted below. + # if ($bsref->{'BSID'} ne "SYSVOL") { - $device = "/dev/mapper/emulab-${bsname}"; + print STDERR + "WARNING: malformed storage map entry ignored!\n" + if (!defined($3)); + # XXX don't forget to double down on the dashes. + (my $rdev = $bsname) =~ s/-/--/g; + $device = "/dev/mapper/emulab-$rdev"; } $bsref->{'DEVICE'} = $device; } diff --git a/clientside/os/create-versioned-image b/clientside/os/create-versioned-image index 66af6c1781..721acbf3e4 100644 --- a/clientside/os/create-versioned-image +++ b/clientside/os/create-versioned-image @@ -243,12 +243,19 @@ sub map_bsname($) goto bad; } # + # Cannot handle FreeBSD non-SYSVOL (non-UFS) filesystems right now. + # + if ($^O ne "linux" && $bsref->{'BSID'} ne "SYSVOL") { + print STDERR "Cannot image non-UFS filesystems\n"; + goto bad; + } + # # The storage map tells us the device info. # open(MAP, TMSTORAGEMAP()) or die("Could not open the storage map!\n"); while (<MAP>) { - if ($_ =~ /^([-\w]+)\s+([-\w\.\/]+)\s+([-\w\.\/]+)\s*/) { + if ($_ =~ /^([-\w]+)\s+([-\w\.\/]+)(?:\s+([-\w\.\/]+))?/) { if ($1 eq $bsname) { $device = $2; # @@ -256,10 +263,21 @@ sub map_bsname($) # which seems wrong since now I have to figure it out # in order to check to see if its mounted. # + # XXX this is due to a bug in the storagemap code that + # has since been fixed, but we check it just in case. + # + # XXX even if the map does have the /dev/emulab/... name, + # we need to use the /dev/mapper/... name instead since + # that is what shows up with "mount" and "mount" is what + # is used in os_ismounted below. + # if ($bsref->{'BSID'} ne "SYSVOL") { + print STDERR + "WARNING: malformed storage map entry ignored!\n" + if (!defined($3)); # XXX don't forget to double down on the dashes. (my $rdev = $bsname) =~ s/-/--/g; - $device = "/dev/mapper/emulab-$rdev"; + $device = "/dev/mapper/emulab-$rdev"; } $bsref->{'DEVICE'} = $device; } diff --git a/clientside/tmcc/common/config/rc.storage b/clientside/tmcc/common/config/rc.storage index 5f33cdf91c..f3df2dac7c 100644 --- a/clientside/tmcc/common/config/rc.storage +++ b/clientside/tmcc/common/config/rc.storage @@ -143,6 +143,63 @@ SWITCH: for ($action) { } exit(0); +sub writemap($;$$) +{ + my ($btype, $cmdp, $dinfo) = @_; + + if ($btype eq "full") { + if (open(MAP, ">$STORAGEMAP")) { + if (open(PMAP, "<$STORAGEMAP.local")) { + while (<PMAP>) { + print MAP "$_"; + } + close(PMAP); + } + if (open(PMAP, "<$STORAGEMAP.remote")) { + while (<PMAP>) { + print MAP "$_"; + } + close(PMAP); + } + close(MAP); + } else { + warn("*** Could not create storage map: $STORAGEMAP\n"); + } + return; + } + + if (open(MAP, ">$STORAGEMAP.$btype")) { + my @cmds = (); + foreach my $href (@$cmdp) { + if ($btype eq "local" && $href->{'CLASS'} eq "local") { + push(@cmds, $href); + } elsif ($btype eq "remote" && $href->{'CLASS'} ne "local") { + push(@cmds, $href); + } + } + foreach my $cmd (@cmds) { + print MAP $cmd->{'VOLNAME'}; + if (exists($cmd->{'LVDEV'})) { + print MAP " " . $cmd->{'LVDEV'}; + } + if (exists($cmd->{'MOUNTPOINT'})) { + print MAP " " . $cmd->{'MOUNTPOINT'}; + my $dev = $cmd->{'LVDEV'}; + if ($dev) { + $dev =~ s/^\/dev\///; + if ($dinfo && exists($dinfo->{$dev})) { + $dinfo->{$dev}->{'mountpoint'} = $cmd->{'MOUNTPOINT'}; + } + } + } + print MAP "\n"; + } + close(MAP); + } else { + warn("*** Could not create storage map: $STORAGEMAP.$btype\n"); + } +} + # # Boot Action. # @@ -318,30 +375,33 @@ sub doboot() # # Stash mapping of block stores to local names for the convenience - # of the user. The storage map always gets all blockstores. + # of the user. + # + # We build the storage map in pieces, $STORAGEMAP.{local,remote} and + # then concatonate the pieces to form the full map. + # + # We used to just write all the blockstores to $STORAGEMAP even when + # invoked for either just local or remote. The problem is that we do + # not process all blockstores when invoked with -L or -R, we only + # process those of the given type. However, it is the processing step + # that sets LVDEV for local blockstores. So if we execute remote + # blockstore setup after local setup (the normal order), then it would + # overwrite $STORAGEMAP with info for local blockstores that did not + # have LVDEV set. This, coupled with a broken RE in blockstore (dataset) + # image capture, would lead to failure when capturing image-backed + # datasets on some occasions. + # + # The hope is that this convoluted procedure will allow you to run + # the type specific rc file even after boot time and have the resulting + # STORAGEMAP make sense. # - if (open(MAP, ">$STORAGEMAP")) { - foreach my $cmd (@allcmds) { - print MAP $cmd->{'VOLNAME'}; - if (exists($cmd->{'LVDEV'})) { - print MAP " " . $cmd->{'LVDEV'}; - } - if (exists($cmd->{'MOUNTPOINT'})) { - print MAP " " . $cmd->{'MOUNTPOINT'}; - my $dev = $cmd->{'LVDEV'}; - if ($dev) { - $dev =~ s/^\/dev\///; - if (exists($dinfo->{$dev})) { - $dinfo->{$dev}->{'mountpoint'} = $cmd->{'MOUNTPOINT'}; - } - } - } - print MAP "\n"; - } - close(MAP); - } else { - warn("*** Could not create storage map: $STORAGEMAP\n"); + if ($dolocal) { + writemap("local", \@allcmds, $dinfo); + } + if ($doremote) { + writemap("remote", \@allcmds, $dinfo); } + writemap("full"); # # Since we went to a lot of trouble to compute it, also dump @@ -591,8 +651,13 @@ sub docleanup($) # XXX see XXX comment above. if ($dolocal) { + unlink("$STORAGEMAP.local"); unlink($STORAGEMAP, $DISKINFO); } + if ($doremote) { + unlink("$STORAGEMAP.remote"); + } + writemap("full"); } # -- GitLab