Commit e468cc49 authored by David Johnson's avatar David Johnson

Fix a nasty docker/mkvnode.pl race inspired by bootvnodes/vnodesetup.

This is probably true for Xen too, but in some cases, the
vnodesetup early-release hackwaitandexit timeout of 30 seconds
causes a race condition.  Normally, the first node sets up
significant network state, and sometimes flips MAC addresses
around from interface to interface -- OR puts a physical interface
into a bridge, then changes the bridge's MAC address.  There is a
short window of time where both the bridge and the new member
interface share a MAC address -- and if the tmcc ifconfig assembly
process for vnodes following the first vnode resolves
the wrong device's MAC address and uses that to flesh out the
ifconfig info, the vnodesetup will be in a world of hurt (i.e., you
might see an attempt to make a vlan device out of a vlan device).
The chance of this happening is miniscule, but I've seen it.

So, at least for docker for now, we protect the first vnode against
the 30-second timeout in vnodesetup hackwaitandexit, and we wait for the
actual running file to be written, or error.

This is probably applicable to any linux mkvnode.pl path, but I suppose
it would have been another hundred thousand vnode creates before I saw
it again.
parent a9827417
#!/usr/bin/perl -w
#
# Copyright (c) 2000-2016 University of Utah and the Flux Group.
# Copyright (c) 2000-2016, 2018 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -68,7 +68,7 @@ my $action;
# Prototypes
sub prebootvnodes($$);
sub postbootvnodes($$);
sub bootvnode($$$);
sub bootvnode($$$;$);
#
# Parse command arguments. Once we return from getopts, all that should be
......@@ -198,8 +198,29 @@ if (defined($action) && !$reconfig) {
push(@vnfiles, $1);
}
}
#
# This is probably true for Xen too, but in some cases, the
# vnodesetup early-release hackwaitandexit timeout of 30 seconds
# causes a race condition. Normally, the first node sets up
# significant network state, and sometimes flips MAC addresses
# around from interface to interface -- OR puts a physical interface
# into a bridge, then changes the bridge's MAC address. There is a
# short window of time where both the bridge and the new member
# interface share a MAC address -- and if the tmcc ifconfig resolves
# the wrong device's MAC address and uses that to flesh out the
# ifconfig info, the vnodesetup will be in a world of hurt. The
# chance of this happening is miniscule, but I've seen it.
#
# So, at least for docker for now, we protect the first vnode against
# the 30-second timeout in vnodesetup hackwaitandexit.
#
my $vht;
if (GENVNODETYPE() eq 'docker') {
$vht = 0;
}
foreach my $file (sort byvnode @vnfiles) {
bootvnode($file, $action, (-e "$vndir/$file/fakejail" ? 0 : 1));
bootvnode($file, $action, (-e "$vndir/$file/fakejail" ? 0 : 1),$vht);
$vht = undef;
}
exit(0);
}
......@@ -281,9 +302,14 @@ exit(0)
prebootvnodes(\%curvnodelist, \%newvnodelist)
if (!$fakejails);
my $vht;
if (GENVNODETYPE() eq 'docker') {
$vht = 0;
}
foreach my $vnode (sort byvnode keys(%newvnodelist)) {
# Blocks until mostly setup.
bootvnode($vnode, "boot", $newvnodelist{$vnode});
bootvnode($vnode, "boot", $newvnodelist{$vnode},$vht);
$vht = undef;
}
postbootvnodes(\%curvnodelist, \%newvnodelist)
......@@ -380,9 +406,9 @@ sub postbootvnodes($$)
#
# Helper function to boot/kill/halt/reboot a specific vnode.
#
sub bootvnode($$$)
sub bootvnode($$$;$)
{
my ($vnode, $action, $jailed) = @_;
my ($vnode, $action, $jailed, $vnodesetup_hackwaitandexit_timeout) = @_;
my $opt;
my $act;
my $extrawait = $waittime;
......@@ -408,6 +434,9 @@ sub bootvnode($$$)
#$extrawait = 20 if (GENVNODETYPE() eq "xen");
}
$opt .= ($jailed ? " -jVt" : " -i");
if (defined($vnodesetup_hackwaitandexit_timeout)) {
$opt .= " -F $vnodesetup_hackwaitandexit_timeout";
}
print "$act vnode $vnode with options '$opt' at " .
libsetup::TBTimeStamp() . "\n";
......
#!/usr/bin/perl -wT
#
# Copyright (c) 2000-2014 University of Utah and the Flux Group.
# Copyright (c) 2000-2014, 2018 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -31,7 +31,7 @@ use POSIX ":sys_wait_h";
#
# Prototypes
#
sub hackwaitandexit($);
sub hackwaitandexit($;$);
# Drag in path stuff so we can find emulab stuff.
BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }
......@@ -49,12 +49,14 @@ sub usage()
" -i creates a fake virtual node.\n".
"\n".
"Use -b when starting the virtual node at boot time.\n".
"Use -F <timeout> to change the hackwaitandexit timeout;\n".
" < 1 means no timeout.\n".
"Use -r when rebooting the virtual node.\n".
"Use -h when halting the virtual node.\n".
"Use -k when killing the virtual node (removes filesystems).\n";
exit(1);
}
my $optlist = "kbdjsVrhptie";
my $optlist = "kbdjsVrhptieF:";
# Locals
my $killit = 0;
......@@ -73,6 +75,7 @@ my $leavejail = 0;
my $timestamps = 0;
my $jailpid;
my $cleanupstate = "SHUTDOWN";
my $hackwaitandexit_timeout = 30;
#
# Turn off line buffering on output
......@@ -172,6 +175,14 @@ if (defined($options{"p"})) {
if (defined($options{"i"})) {
$fakevnode = 1;
}
if (defined($options{"F"})) {
if ($options{"F"} < 1) {
$hackwaitandexit_timeout = 0;
}
else {
$hackwaitandexit_timeout = int($options{"F"});
}
}
if (@ARGV != 1) {
usage();
}
......@@ -319,7 +330,7 @@ if (!$debug && !$interactive && (my $cpid = TBBackGround($logname))) {
# setup first. This whole approach is wildly hacky.
#
if ($dojail) {
hackwaitandexit($cpid);
hackwaitandexit($cpid,$hackwaitandexit_timeout);
}
exit(0);
}
......@@ -704,7 +715,7 @@ sub rebootvnode() {
return -1;
}
hackwaitandexit(0);
hackwaitandexit(0,$hackwaitandexit_timeout);
return 0;
}
......@@ -941,12 +952,18 @@ sub removeconfdir($)
# for setup failure from the direct child, so we can tell the caller.
# Otherwise, need to use the normal wait path (timeout or TBFAILED).
#
sub hackwaitandexit($)
sub hackwaitandexit($;$)
{
my $cpid = shift();
my ($cpid,$count) = @_;
my $now = time();
my $goofy;
my $count = 30;
if (!defined($count)) {
$count = 30;
}
my $forever = 0;
if ($count < 1) {
$forever = 1;
}
# The first case is for our own (non-plab) vservers.
if (-e "/vservers") {
......@@ -963,7 +980,7 @@ sub hackwaitandexit($)
$goofy = CONFDIR() . "/root/var/run/emulab-watchdog.pid";
}
while ($count--) {
while ($forever || $count--) {
sleep(1);
if (-e $goofy) {
my ($mtime,$ctime) = (stat($goofy))[8,9];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment