Commit 40528b13 authored by Elijah Grubb's avatar Elijah Grubb

Merge remote-tracking branch 'origin/master' into entrypoint-bug-fix

parents a8061efe e468cc49
#!/usr/bin/perl -w
#
# Copyright (c) 2000-2016 University of Utah and the Flux Group.
# Copyright (c) 2000-2016, 2018 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -68,7 +68,7 @@ my $action;
# Prototypes
sub prebootvnodes($$);
sub postbootvnodes($$);
sub bootvnode($$$);
sub bootvnode($$$;$);
#
# Parse command arguments. Once we return from getopts, all that should be
......@@ -198,8 +198,29 @@ if (defined($action) && !$reconfig) {
push(@vnfiles, $1);
}
}
#
# This is probably true for Xen too, but in some cases, the
# vnodesetup early-release hackwaitandexit timeout of 30 seconds
# causes a race condition. Normally, the first node sets up
# significant network state, and sometimes flips MAC addresses
# around from interface to interface -- OR puts a physical interface
# into a bridge, then changes the bridge's MAC address. There is a
# short window of time where both the bridge and the new member
# interface share a MAC address -- and if the tmcc ifconfig resolves
# the wrong device's MAC address and uses that to flesh out the
# ifconfig info, the vnodesetup will be in a world of hurt. The
# chance of this happening is miniscule, but I've seen it.
#
# So, at least for docker for now, we protect the first vnode against
# the 30-second timeout in vnodesetup hackwaitandexit.
#
my $vht;
if (GENVNODETYPE() eq 'docker') {
$vht = 0;
}
foreach my $file (sort byvnode @vnfiles) {
bootvnode($file, $action, (-e "$vndir/$file/fakejail" ? 0 : 1));
bootvnode($file, $action, (-e "$vndir/$file/fakejail" ? 0 : 1),$vht);
$vht = undef;
}
exit(0);
}
......@@ -281,9 +302,14 @@ exit(0)
prebootvnodes(\%curvnodelist, \%newvnodelist)
if (!$fakejails);
my $vht;
if (GENVNODETYPE() eq 'docker') {
$vht = 0;
}
foreach my $vnode (sort byvnode keys(%newvnodelist)) {
# Blocks until mostly setup.
bootvnode($vnode, "boot", $newvnodelist{$vnode});
bootvnode($vnode, "boot", $newvnodelist{$vnode},$vht);
$vht = undef;
}
postbootvnodes(\%curvnodelist, \%newvnodelist)
......@@ -380,9 +406,9 @@ sub postbootvnodes($$)
#
# Helper function to boot/kill/halt/reboot a specific vnode.
#
sub bootvnode($$$)
sub bootvnode($$$;$)
{
my ($vnode, $action, $jailed) = @_;
my ($vnode, $action, $jailed, $vnodesetup_hackwaitandexit_timeout) = @_;
my $opt;
my $act;
my $extrawait = $waittime;
......@@ -408,6 +434,9 @@ sub bootvnode($$$)
#$extrawait = 20 if (GENVNODETYPE() eq "xen");
}
$opt .= ($jailed ? " -jVt" : " -i");
if (defined($vnodesetup_hackwaitandexit_timeout)) {
$opt .= " -F $vnodesetup_hackwaitandexit_timeout";
}
print "$act vnode $vnode with options '$opt' at " .
libsetup::TBTimeStamp() . "\n";
......
#!/usr/bin/perl -wT
#
# Copyright (c) 2000-2014 University of Utah and the Flux Group.
# Copyright (c) 2000-2014, 2018 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -31,7 +31,7 @@ use POSIX ":sys_wait_h";
#
# Prototypes
#
sub hackwaitandexit($);
sub hackwaitandexit($;$);
# Drag in path stuff so we can find emulab stuff.
BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }
......@@ -49,12 +49,14 @@ sub usage()
" -i creates a fake virtual node.\n".
"\n".
"Use -b when starting the virtual node at boot time.\n".
"Use -F <timeout> to change the hackwaitandexit timeout;\n".
" < 1 means no timeout.\n".
"Use -r when rebooting the virtual node.\n".
"Use -h when halting the virtual node.\n".
"Use -k when killing the virtual node (removes filesystems).\n";
exit(1);
}
my $optlist = "kbdjsVrhptie";
my $optlist = "kbdjsVrhptieF:";
# Locals
my $killit = 0;
......@@ -73,6 +75,7 @@ my $leavejail = 0;
my $timestamps = 0;
my $jailpid;
my $cleanupstate = "SHUTDOWN";
my $hackwaitandexit_timeout = 30;
#
# Turn off line buffering on output
......@@ -172,6 +175,14 @@ if (defined($options{"p"})) {
if (defined($options{"i"})) {
$fakevnode = 1;
}
if (defined($options{"F"})) {
if ($options{"F"} < 1) {
$hackwaitandexit_timeout = 0;
}
else {
$hackwaitandexit_timeout = int($options{"F"});
}
}
if (@ARGV != 1) {
usage();
}
......@@ -319,7 +330,7 @@ if (!$debug && !$interactive && (my $cpid = TBBackGround($logname))) {
# setup first. This whole approach is wildly hacky.
#
if ($dojail) {
hackwaitandexit($cpid);
hackwaitandexit($cpid,$hackwaitandexit_timeout);
}
exit(0);
}
......@@ -704,7 +715,7 @@ sub rebootvnode() {
return -1;
}
hackwaitandexit(0);
hackwaitandexit(0,$hackwaitandexit_timeout);
return 0;
}
......@@ -941,12 +952,18 @@ sub removeconfdir($)
# for setup failure from the direct child, so we can tell the caller.
# Otherwise, need to use the normal wait path (timeout or TBFAILED).
#
sub hackwaitandexit($)
sub hackwaitandexit($;$)
{
my $cpid = shift();
my ($cpid,$count) = @_;
my $now = time();
my $goofy;
my $count = 30;
if (!defined($count)) {
$count = 30;
}
my $forever = 0;
if ($count < 1) {
$forever = 1;
}
# The first case is for our own (non-plab) vservers.
if (-e "/vservers") {
......@@ -963,7 +980,7 @@ sub hackwaitandexit($)
$goofy = CONFDIR() . "/root/var/run/emulab-watchdog.pid";
}
while ($count--) {
while ($forever || $count--) {
sleep(1);
if (-e $goofy) {
my ($mtime,$ctime) = (stat($goofy))[8,9];
......
......@@ -3240,24 +3240,29 @@ sub vnodePreConfigControlNetwork($$$$$$$$$$$$)
warn("could not find bossip anywhere; aborting!");
return -1;
}
my $retries = 30;
my $retries = 4;
my @addrs = ();
my $uname = "users";
while ($retries > 0) {
(undef,undef,undef,undef,@addrs) = gethostbyname("users");
(undef,undef,undef,undef,@addrs) = gethostbyname($uname);
if ($? || @addrs == 0) {
warn("could not resolve users.$bossdomain; retrying!");
sleep(2);
warn("could not resolve $uname; retrying!");
sleep(4);
}
else {
last;
}
$uname = "users.$shortdomain";
$retries -= 1;
}
my $ops_ip;
if (@addrs == 0) {
warn("could not resolve users.$bossdomain; aborting!");
return -1;
warn("could not resolve users.$bossdomain; sending name to iptables!");
$ops_ip = "users";
}
else {
$ops_ip = inet_ntoa($addrs[0]);
}
my $ops_ip = inet_ntoa($addrs[0]);
my $local_tmcd_port = $TMCD_PORT + $vmid;
#
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment