Commit 4ec701e7 authored by Mike Hibler's avatar Mike Hibler

1. Beef up "admin mode" support.

* Add libadminmfs.pm with routines for entering/exiting and executing
  commands in, the admin MFS.  Node admin and firewall swapout (see
  below) now use this, the image creation process does not yet.

* Add swapout time hooks for running an admin mode process, likely to
  be used to collect swapout time state.  Currently controlled globally
  by two new sitevars.

* Modified node_admin to use the library and added a "-c <command>"
  option to have nodes go into admin mode and run a command.  I don't
  really expect this to be useful, it was just a testing vehicle for
  the library.

2. Improved the swapout process for firewalled experiments.  Largely
   just generalized what we already did for paniced experiments.
   At swapout, firewalled nodes are:

   - powered off
   - set to boot into admin mode and run a disk zapper
   - powered on

  The swapout process then waits for all nodes to successfully complete
  disk zapage, at which point the nodes are nfree'ed as usual.  Any
  failure of the above process, marks the experiment as panic'ed (to
  ensure that we are involved in cleanup) and sends mail to testbed-ops
  describing the state of the nodes.

3. Added the aforementioned disk zapper, a little C program in the MFS
   which zeroes out the MBR and partition boot blocks (but not the MBR
   partition table or FS superblocks).  This is added insurance that if
   a node somehow gets diverted after being nfree'd but before getting
   the disk reloaded (e.g., goes to hwdown), that we cannot accidentally
   boot from the disk.  This program gets installed in the admin MFS.

4. Related to firewalls, modified swapin to use the new documented
   "snmpit -N" to get the firewall VLAN number rather than parsing the
   output that was a side-effect of VLAN creation.
parent 919128a0
......@@ -2224,7 +2224,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
os/imagezip/shd/GNUmakefile \
os/frisbee.redux/GNUmakefile os/growdisk/GNUmakefile \
os/syncd/GNUmakefile os/dijkstra/GNUmakefile \
os/genhostsfile/GNUmakefile \
os/genhostsfile/GNUmakefile os/zapdisk/GNUmakefile \
pxe/GNUmakefile pxe/bootinfo.restart \
security/GNUmakefile security/lastlog_daemon \
sensors/GNUmakefile sensors/slothd/GNUmakefile \
......@@ -2271,7 +2271,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/portstats tbsetup/vnode_setup tbsetup/staticroutes \
tbsetup/console_setup.proxy tbsetup/exports_setup.proxy \
tbsetup/checkports tbsetup/webnodereboot tbsetup/libaudit.pm \
tbsetup/libreboot.pm tbsetup/libosload.pm \
tbsetup/libreboot.pm tbsetup/libosload.pm tbsetup/libadminmfs.pm \
tbsetup/sfskey_update tbsetup/sfskey_update.proxy \
tbsetup/idleswap tbsetup/webidleswap tbsetup/switchmac \
tbsetup/newnode_reboot tbsetup/webnodeattributes \
......
......@@ -662,7 +662,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
os/imagezip/shd/GNUmakefile \
os/frisbee.redux/GNUmakefile os/growdisk/GNUmakefile \
os/syncd/GNUmakefile os/dijkstra/GNUmakefile \
os/genhostsfile/GNUmakefile \
os/genhostsfile/GNUmakefile os/zapdisk/GNUmakefile \
pxe/GNUmakefile pxe/bootinfo.restart \
security/GNUmakefile security/lastlog_daemon \
sensors/GNUmakefile sensors/slothd/GNUmakefile \
......@@ -709,7 +709,7 @@ outfiles="$outfiles Makeconf GNUmakefile \
tbsetup/portstats tbsetup/vnode_setup tbsetup/staticroutes \
tbsetup/console_setup.proxy tbsetup/exports_setup.proxy \
tbsetup/checkports tbsetup/webnodereboot tbsetup/libaudit.pm \
tbsetup/libreboot.pm tbsetup/libosload.pm \
tbsetup/libreboot.pm tbsetup/libosload.pm tbsetup/libadminmfs.pm \
tbsetup/sfskey_update tbsetup/sfskey_update.proxy \
tbsetup/idleswap tbsetup/webidleswap tbsetup/switchmac \
tbsetup/newnode_reboot tbsetup/webnodeattributes \
......
......@@ -172,8 +172,9 @@ use vars qw(@ISA @EXPORT);
TBExptDestroy TBIPtoNodeID TBNodeBootReset TBNodeStateWait
TBLeaderMailList ExpGroup TBExptSetSwapUID TBExptSetThumbNail
TBNodeAllocCheck TBPlabNodeUsername MarkPhysNodeDown TBExptIsElabInElab
TBExptFirewall TBNodeFirewall TBSetExptFirewallVlan
TBClearExptFirewallVlan TBNodeConsoleTail
TBExptFirewall TBNodeFirewall TBExptFirewallAndPort
TBSetExptFirewallVlan TBClearExptFirewallVlan
TBNodeConsoleTail TBExptGetSwapoutAction
TBNodeSubNodes
TBNodeAdminOSID TBNodeDiskloadOSID
......@@ -4004,6 +4005,32 @@ sub TBExptFirewall ($$;$$$) {
return 1;
}
#
# Get the firewall node name and port number for an experiment;
# e.g., for use in an snmpit call.
# Return 1 if successful, 0 on error.
#
sub TBExptFirewallAndPort($$$$) {
my ($pid, $eid, $fwnodep, $fwportp) = @_;
my $fwnode;
if (!TBExptFirewall($pid, $eid, \$fwnode)) {
return 0;
}
my $query_result =
DBQueryWarn("select card1 from wires ".
"where node_id1='$fwnode' AND type='Control'");
if (!$query_result || !$query_result->numrows) {
return 0;
}
$$fwnodep = $fwnode;
($$fwportp) = $query_result->fetchrow_array();
return 1;
}
#
# Set the firewall VLAN number for an experiment.
#
......@@ -4155,6 +4182,33 @@ sub TBExptGetPanicBit($$$) {
return 1;
}
#
# See if there is an admin MFS swapout action associated with the experiment.
# For now we just look at a globally defined action via sitevar.
#
# Returns 1 if there is a swapout action (with $ref hash filled in),
# 0 otherwise.
#
sub TBExptGetSwapoutAction($$$) {
my ($pid, $eid, $ref) = @_;
my ($action, $faction);
if (TBGetSiteVar("swap/swapout_command", \$action)) {
my $failisfatal = 1;
if (TBGetSiteVar("swap/swapout_command_failaction", \$faction)) {
$failisfatal = ($faction eq "fail");
}
%$ref = ('command' => $action, 'isfatal' => $failisfatal);
return 1;
}
# Someday maybe check for per-experiment setting
%$ref = ();
return 0;
}
#
# Issue a DB query. Argument is a string. Returns the actual query object, so
# it is up to the caller to test it. I would not for one moment view this
......
......@@ -56,10 +56,12 @@ endif
mfs:
$(MAKE) -C imagezip client
$(MAKE) -C zapdisk mfs
mfs-install: mfs
$(INSTALL_PROGRAM) $(SRCDIR)/create-image $(LBINDIR)/create-image
$(MAKE) -C imagezip client-install
$(MAKE) -C zapdisk mfs-install
frisbee-mfs:
$(MAKE) -C frisbee.redux client
......
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2005 University of Utah and the Flux Group.
# All rights reserved.
#
SRCDIR = @srcdir@
TESTBED_SRCDIR = @top_srcdir@
OBJDIR = ../..
SUBDIR = os/zapdisk
CBINDIR = $(DESTDIR)$(CLIENT_BINDIR)
LBINDIR = $(DESTDIR)/usr/local/bin
include $(OBJDIR)/Makeconf
all:
include $(TESTBED_SRCDIR)/GNUmakerules
CFLAGS = -O -g -static
zapdisk: zapdisk.o
$(CC) $(CFLAGS) zapdisk.o -o zapdisk
cp zapdisk zapdisk.debug
strip zapdisk
install:
mfs: zapdisk
mfs-install: mfs
$(INSTALL_PROGRAM) -m 700 -s zapdisk$(EXE) $(CBINDIR)/zapdisk$(EXE)
$(INSTALL_PROGRAM) -m 700 $(SRCDIR)/diskzap $(LBINDIR)/diskzap
clean:
rm -f *.o zapdisk zapdisk.debug
#!/bin/sh
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2005 University of Utah and the Flux Group.
# All rights reserved.
#
# Front-end script to run the bootblock zapper.
#
# This is run on nodes that were behind a firewall and presumed tainted.
# It prevents them from ever booting from the disk by zeroing the MBR and
# partition boot blocks.
#
args="-BvZ" # the real deal
#args="-Bv" # fakin it
if [ -r /etc/emulab/paths.sh ]; then
. /etc/emulab/paths.sh
else
BINDIR=/etc/testbed
fi
if [ ! -x "$BINDIR/zapdisk" ]; then
echo "$BINDIR/zapdisk not found!"
exit -1
fi
#
# XXX We really should not be using a heuristic to figure this out.
# We should get the info from Emulab central.
#
if [ $# -eq 0 ]; then
set -- `dmesg | egrep '(ad|da|ar|amrd)[0-9]: [0-9]+MB' | \
sed -e 's/^\([a-z][^:]*\):.*/\1/'`
fi
status=0
for disk in $*; do
echo -n "Zapping bootblocks for $disk..."
if [ -r "/dev/$disk" ]; then
$BINDIR/zapdisk $args /dev/$disk
if [ $? -ne 0 ]; then
echo "FAILED!"
status=`expr $status + 1`
else
echo "OK"
fi
else
echo "SKIPPED!"
fi
done
exit $status
/*
* EMULAB-COPYRIGHT
* Copyright (c) 2005 University of Utah and the Flux Group.
* All rights reserved.
*/
/*
* Another little utility that groks DOS partitions and neuters boot blocks
* and/or superblocks.
*
* XXX should be combined with dostype.c.
*/
#include <stdlib.h>
#include <limits.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <errno.h>
#ifndef __CYGWIN__
#include <sys/types.h>
#include <inttypes.h>
#if __FreeBSD__ >= 5
#include <sys/diskmbr.h>
#endif
#endif
/*
* For superblocks we wipe the first 8192 bytes,
* For boot blocks just the first 512.
*/
#define SB_ZAPSIZE 8192
#define BB_ZAPSIZE 512
#define MAX_ZAPSIZE 8192
#define BOOT_MAGIC 0xAA55
#define DOSBBSECTOR 0
#define DOSPARTOFF 446
#define NDOSPART 4
struct dospart {
uint8_t dp_flag; /* bootstrap flags */
uint8_t dp_shd; /* starting head */
uint8_t dp_ssect; /* starting sector */
uint8_t dp_scyl; /* starting cylinder */
uint8_t dp_typ; /* partition type */
uint8_t dp_ehd; /* end head */
uint8_t dp_esect; /* end sector */
uint8_t dp_ecyl; /* end cylinder */
uint32_t dp_start; /* absolute starting sector number */
uint32_t dp_size; /* partition size in sectors */
};
struct doslabel {
int8_t align[sizeof(short)]; /* Force alignment */
int8_t pad2[DOSPARTOFF];
struct dospart parts[NDOSPART];
uint16_t magic;
};
#define DOSLABELSIZE \
(DOSPARTOFF + NDOSPART*sizeof(struct dospart) + sizeof(uint16_t))
static int verbose = 0;
static int pnum = 0;
static int bootblocks = 0;
static int superblocks = 0;
static int doit = 0;
static char zapdata[MAX_ZAPSIZE];
static int zapsize;
int readmbr(char *dev);
int zappart(char *dev, int pnum);
int zapmbr(char *disk);
static void
usage(void)
{
fprintf(stderr, "usage: "
"zapdisk [-BS] <diskdev>\n"
" -p <pnum> operate only on the given partition\n"
" -B zap MBR and partition boot programs\n"
" -S zap possible superblocks in all partitions\n"
" -Z really do the zap and don't just talk about it\n"
" <diskdev> disk special file to operate on\n");
exit(1);
}
int
main(int argc, char **argv)
{
char *disk;
int ch, i;
int errors = 0;
while ((ch = getopt(argc, argv, "p:vBSZ")) != -1)
switch(ch) {
case 'Z':
doit++;
break;
case 'p':
pnum = atoi(optarg);
break;
case 'v':
verbose++;
break;
case 'B':
bootblocks++;
break;
case 'S':
superblocks++;
break;
case '?':
default:
usage();
}
argc -= optind;
argv += optind;
if (argc < 1)
usage();
if (!bootblocks && !superblocks) {
fprintf(stderr, "Must specify either -B or -S\n");
usage();
}
disk = argv[0];
if (pnum < 0 || pnum > 4) {
fprintf(stderr, "Invalid partition number %d\n", pnum);
exit(1);
}
#ifdef __CYGWIN__
fprintf(stderr, "Does't work under Windows yet\n");
exit(1);
#else
if (readmbr(disk)) {
fprintf(stderr, "zapdisk only works on disks with DOS MBR\n");
exit(1);
}
/*
* We are assuming that writing zeros provides proper zap-age
*/
zapsize = superblocks ? SB_ZAPSIZE : BB_ZAPSIZE;
memset(zapdata, 0, zapsize);
for (i = 1; i <= 4; i++)
if (pnum == 0 || i == pnum)
if (zappart(disk, i))
errors++;
if (pnum == 0 && bootblocks)
if (zapmbr(disk))
errors++;
exit(errors);
#endif
}
static struct doslabel doslabel;
int
readmbr(char *dev)
{
int fd, cc;
fd = open(dev, O_RDONLY);
if (fd < 0) {
perror(dev);
return 1;
}
if (lseek(fd, (off_t)0, SEEK_SET) < 0) {
perror("Could not seek to DOS label");
close(fd);
return 1;
}
if ((cc = read(fd, doslabel.pad2, DOSLABELSIZE)) < 0) {
perror("Could not read DOS label");
close(fd);
return 1;
}
if (cc != DOSLABELSIZE) {
fprintf(stderr, "Could not get the entire DOS label\n");
close(fd);
return 1;
}
if (doslabel.magic != BOOT_MAGIC) {
fprintf(stderr, "Wrong magic number in DOS partition table\n");
close(fd);
return 1;
}
return 0;
}
/*
* Zap the bootblock/superblock in a partition.
*/
int
zappart(char *dev, int pnum)
{
int fd, cc;
struct dospart *pinfo = &doslabel.parts[pnum-1];
if (verbose)
printf("part%d: start=%d, size=%d\n",
pnum, pinfo->dp_start, pinfo->dp_size);
if (pinfo->dp_start == 0 || pinfo->dp_size == 0) {
if (verbose || !doit)
printf("part%d: empty, skipped\n", pnum);
return 0;
}
fd = open(dev, O_RDWR);
if (fd < 0) {
perror(dev);
return 1;
}
if (lseek(fd, (off_t)pinfo->dp_start * 512, SEEK_SET) < 0) {
perror("Could not seek to partition start");
close(fd);
return 1;
}
if (!doit) {
printf("part%d: would zero %d bytes at sector %d\n",
pnum, zapsize, pinfo->dp_start);
cc = zapsize;
} else {
if (verbose)
printf("part%d: zeroing %d bytes at sector %d\n",
pnum, zapsize, pinfo->dp_start);
cc = write(fd, zapdata, zapsize);
}
if (cc != zapsize) {
perror("Could not zap partition block");
close(fd);
return 1;
}
close(fd);
return 0;
}
/*
* All manner of dark magic is required to write the MBR on various OSes.
*/
int
zapmbr(char *disk)
{
int fd, fdw = -1;
int cc;
/*
* For the MBR we just zero out the 400+ bytes before the
* partition table.
*/
memset(doslabel.pad2, 0, sizeof(doslabel.pad2));
fd = open(disk, O_RDWR);
#ifdef DIOCSMBR
/*
* Deal with FreeBSD5 funkyness for writing the MBR. You have to use
* an ioctl on the disk to do it. But, you apparently cannot perform
* the ioctl on the "whole disk" device, you have to do it on a slice
* device. So we try opening slice devices until we get one.
*
* This code was derived from fdisk.
*/
if (fd < 0 && errno == EPERM) {
fd = open(disk, O_RDONLY);
if (fd >= 0) {
char sstr[64];
int p;
for (p = 1; p <= 4; p++) {
snprintf(sstr, sizeof sstr, "%ss%d", disk, p);
fdw = open(sstr, O_RDWR);
if (fdw >= 0)
break;
}
close(fd);
if (fdw < 0)
fd = -1;
}
}
#endif
if (fd < 0) {
perror(disk);
exit(1);
}
if (!doit) {
printf("mbr: would zero %d bytes at sector 0\n",
sizeof(doslabel.pad2));
close(fd);
close(fdw);
return 0;
}
if (verbose)
printf("mbr: zeroing %d bytes at sector 0\n",
sizeof(doslabel.pad2));
if (fdw < 0) {
if (lseek(fd, (off_t)0, SEEK_SET) < 0) {
perror("Could not seek to DOS label");
close(fd);
return 1;
}
cc = write(fd, doslabel.pad2, DOSLABELSIZE);
if (cc != DOSLABELSIZE) {
perror("Could not write DOS label");
close(fd);
return 1;
}
}
#ifdef DIOCSMBR
else {
if (ioctl(fdw, DIOCSMBR, doslabel.pad2) < 0) {
perror("Could not write DOS label");
close(fdw);
return 1;
}
close(fdw);
}
#endif
return 0;
}
......@@ -54,8 +54,8 @@ LIB_STUFF = libtbsetup.pm exitonwarn.pm libtestbed.pm snmpit_intel.pm \
snmpit_foundry.pm snmpit_stack.pm snmpit_remote.pm \
snmpit_nortel.pm \
libaudit.pm libreboot.pm libosload.pm libtestbed.py \
power_mail.pm power_whol.pm \
libtblog.pm
libadminmfs.pm libtblog.pm \
power_mail.pm power_whol.pm
#
# Force dependencies on the scripts so that they will be rerun through
......
This diff is collapsed.
......@@ -2,7 +2,7 @@
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2004 University of Utah and the Flux Group.
# Copyright (c) 2000-2005 University of Utah and the Flux Group.
# All rights reserved.
#
......@@ -120,9 +120,7 @@ else {
#
# See if the experiment is firewalled. Error if not.
#
my $firewall;
my $firewalled = TBExptFirewall($pid, $eid, \$firewall);
my $firewalled = TBExptFirewall($pid, $eid);
if (!$firewalled) {
die("*** $0:\n".
" Experiment $pid/$eid is not firewalled!\n");
......@@ -217,14 +215,13 @@ if (! UserDBInfo($expt_head_login, \$expt_head_name, \$expt_head_email)) {
$expt_head_email = $TBOPS;
}
$query_result =
DBQueryFatal("select card1 from wires ".
"where node_id1='$firewall' AND type='Control'");
if (!$query_result->numrows) {
#
# Get firewall node and port info
#
my ($firewall, $port);
if (!TBExptFirewallAndPort($pid, $eid, \$firewall, \$port)) {
fatal("Could not determine firewall port for $pid/$eid!");
}
my ($port) = $query_result->fetchrow_array();
#
# Call snmpit.
......
This diff is collapsed.
......@@ -14,15 +14,18 @@ sub usage()
{
print STDOUT "Usage: node_admin [-h] [-n | -w] <on | off> [node ....]\n";
print STDOUT " node_admin [-h] [-n | -w] -e pid,eid <on | off>\n";
print STDOUT "-h This message\n";
print STDOUT "-n Do not reboot node\n";
print STDOUT "-w Wait for node to come back up if rebooted\n";
print STDOUT "-e Operate on all nodes in an experiment\n";
print STDOUT "-h This message\n";
print STDOUT "-n Do not reboot node\n";
print STDOUT "-w Wait for node to come back up if rebooted\n";
print STDOUT "-e Operate on all nodes in an experiment\n";
print STDOUT "-c cmd Run command in MFS and wait for completion\n".
" (-n and -w apply after the command is run).\n";
exit(-1);
}
my $optlist = "hnwe:";
my $optlist = "hnwe:c:";
my $waitmode = 0;
my $reboot = 1;
my $runcmd = "";
#
# Configure variables
......@@ -35,7 +38,7 @@ my $TB = "@prefix@";
use lib "@prefix@/lib";
use libdb;
use libtestbed;
use StateWait;
use libadminmfs;
#
# Turn off line buffering on output
......@@ -73,11 +76,17 @@ if (defined($options{"w"})) {
$waitmode = 1;
}
if (!@ARGV) {
usage();
}
my $onoff;
my $onoff = shift(@ARGV);
if (defined($options{"c"})) {
$runcmd = $options{"c"};
$onoff = "on";
} else {
if (!@ARGV) {
usage();
}
$onoff = shift(@ARGV);
}
if ($onoff ne "on" && $onoff ne "off") {
usage();
......@@ -160,87 +169,63 @@ else {
}
}
# Switcheroo the osids on the nodes.
if ($onoff eq "on") {
my %adminosid = ();
for my $node (@nodes) {
my $osid = TBNodeAdminOSID($node);
push @{$adminosid{$osid}}, $node;
}
for my $osid (keys %adminosid) {
my @n = @{$adminosid{$osid}};
system("$osselect -t $osid @n") and
my @bad;
my %args;
if ($runcmd ne "") {
#
# Reboot into admin more and run the command
#
%args = ();
$args{'name'} = $0;
$args{'command'} = $runcmd;
if (TBAdminMfsRunCmd(\%args, undef, @nodes)) {
die("*** $0:\n".
" Failed to set temp boot to $osid for some of: @n\n");
" Failed to run '$runcmd' on some of @nodes!\n");
}
}
else {
system("$osselect -c -t @nodes") and
die("*** $0:\n".
" Failed to clear temp boot for some nodes!\n");
}
# Is this needed anymore?
DBQueryFatal("update nodes set startupcmd='', startstatus='none' ".
"where " . join(" or ", map("node_id='$_'", @nodes)));
#
# Turn admin mode back off and optionally reboot back to the old OS
#
%args = ();
$args{'name'} = $0;
$args{'on'} = 0;
$args{'clearall'} = 0;
if (TBAdminMfsSelect(\%args, \@bad, @nodes)) {
die("*** $0:\n".
" Could not turn admin mode off for @bad!\n");
}
#
# Reboot nodes
#
if ($reboot) {
if ($waitmode) {
$StateWait::debug = 0;
#
# Initialize the statewait library.
#
my @states = ();
my @finished = ();
my @failed = ();
#
# Only wait for MFSSETUP when going into the MFS. When coming out
# of MFS, just wait for generic ISUP.
#
push(@states, TBDB_NODESTATE_MFSSETUP())
if ($onoff eq "on");
push(@states, TBDB_NODESTATE_ISUP());
if (initStateWait(\@states, @nodes)) {
if ($reboot) {
%args = ();
$args{'name'} = $0;
$args{'on'} = 0;
$args{'reboot'} = $reboot;
$args{'wait'} = $waitmode;
if (TBAdminMfsBoot(\%args, \@bad, @nodes)) {
die("*** $0:\n".
" Failed to initialize the statewait library!\n");
" Did not properly reboot @bad after command!\n");
}
}
exit(0);
}
# Reboot nodes *after* setting up statewait above.
if (system("$nodereboot @nodes")) {
$args{'name'} = $0;
$args{'on'} = ($onoff eq "on");
$args{'clearall'} = 0;
if (TBAdminMfsSelect(\%args, \@bad, @nodes)) {
die("*** $0:\n".
" Could not turn admin mode $onoff for @bad!\n");
}