Commit 96794781 authored by David Johnson's avatar David Johnson

Clientside Docker vnode support.

See clientside/tmcc/linux/docker/README.md for design notes.
See clientside/tmcc/linux/docker/dockerfiles/README.md for a description
of how we automatically Emulabize existing Docker images.

Also, this mostly fits within the existing vnodesetup path, but I did modify
mkvnode.pl to allow the libvnode backend to provide a vnodePoll wait
loop instead of the builtin vnodeState loop.
parent 3fe30c60
......@@ -70,7 +70,7 @@ use librc;
# Not all clients support this.
#
exit(0)
if (MFS() || REMOTE() || JAILED() || INXENVM() || PLAB());
if (MFS() || REMOTE() || JAILED() || INXENVM() || INDOCKERVM() || PLAB());
# Protos.
sub doboot();
......
......@@ -67,7 +67,7 @@ my $pathname = $HOSTSFILE; # Default path from liblocsetup.
# Not all clients support this.
#
exit(0)
if (MFS() || (REMOTE() && !(REMOTEDED() || PLAB() || JAILED())));
if (MFS() || (REMOTE() && !(REMOTEDED() || PLAB() || JAILED())) || INDOCKERVM());
# Protos.
sub doboot();
......
#!/usr/bin/perl -wT
#
# Copyright (c) 2008-2014 University of Utah and the Flux Group.
# Copyright (c) 2008-2014, 2017 University of Utah and the Flux Group.
#
# {{{EMULAB-LICENSE
#
......@@ -30,6 +30,7 @@ use Exporter;
VNODE_STATUS_INIT VNODE_STATUS_STOPPING VNODE_STATUS_UNKNOWN
VNODE_STATUS_MOUNTED
VNODE_PATH
VNODE_POLL_ERROR VNODE_POLL_STOP VNODE_POLL_CONTINUE
findVirtControlNet
);
......@@ -42,8 +43,16 @@ sub VNODE_STATUS_MOUNTED() { return "mounted"; }
sub VNODE_STATUS_BOOTING() { return "booting"; }
sub VNODE_STATUS_INIT() { return "init"; }
sub VNODE_STATUS_STOPPING(){ return "stopping"; }
sub VNODE_STATUS_PAUSED(){ return "paused"; }
sub VNODE_STATUS_UNKNOWN() { return "unknown"; }
#
# Valid constants that can be returned by vnodePoll.
#
sub VNODE_POLL_ERROR() { return -1 }
sub VNODE_POLL_STOP() { return 1; }
sub VNODE_POLL_CONTINUE() { return 0; }
# VM path stuff
my $VMPATH = "$VARDIR/vminfo";
sub VNODE_PATH(;$) {
......
......@@ -62,7 +62,7 @@ use Exporter;
TMGATEDCONFIG TMSYNCSERVER TMKEYHASH TMNODEID TMNODEUUID TMEVENTKEY
TMCREATOR TMSWAPPER TMFWCONFIG TMGENVNODECONFIG
TMSTORAGEMAP TMDISKINFO TMEXTRAFS
INXENVM INVZVM
INXENVM INVZVM INDOCKERVM
);
# Must come after package declaration!
......@@ -562,6 +562,7 @@ sub setFSRVTYPE($) {
#
sub INXENVM() { return ($ingenvnode && GENVNODETYPE() eq "xen"); }
sub INVZVM() { return ($ingenvnode && GENVNODETYPE() eq "openvz"); }
sub INDOCKERVM(){ return ($ingenvnode && GENVNODETYPE() eq "docker"); }
#
# Reset to a moderately clean state.
......
......@@ -84,10 +84,14 @@ use libvnode;
# Helpers
sub MyFatal($);
sub hasLibOp($);
sub safeLibOp($$$;@);
sub CleanupVM();
sub TearDownStaleVM();
sub StoreState();
sub ReadState();
sub BackendVnodePoll();
sub DefaultVnodePoll();
# Locals
my $CTRLIPFILE = "/var/emulab/boot/myip";
......@@ -223,7 +227,7 @@ foreach my $type (@nodetypes) {
}
$libops{$type}{'init'}->();
# need to do this for each type encountered.
# need to do this for each type encountered.
TBDebugTimeStampWithDate("starting $type rootPreConfig()");
$libops{$type}{'rootPreConfig'}->($BOSSIP);
TBDebugTimeStampWithDate("finished $type rootPreConfig()");
......@@ -758,7 +762,14 @@ if (defined(VNCONFIG('SSHDPORT')) && VNCONFIG('SSHDPORT') ne "" &&
# it running in its new context. Still, lets protect it with a timer
# since it might get hung up inside and we do not want to get stuck here.
#
my $needschildmon;
if (!$ISXENVM) {
$needschildmon = 1;
}
else {
$needschildmon = 0;
}
if ($needschildmon) {
my $childpid = fork();
if ($childpid) {
my $timedout = 0;
......@@ -791,12 +802,23 @@ if (!$ISXENVM) {
print STDERR "*** ERROR: vnodeBoot failed\n";
exit(1);
}
# NB: store the state, so that vnodeBoot too has writable $private!
if (StoreState()) {
MyFatal("Could not store container state to disk");
}
exit(0);
}
}
elsif (safeLibOp('vnodeBoot', 1, 1)) {
MyFatal("$vnodeid container startup failed.");
}
if ($needschildmon) {
# NB: before continuing, read the state stored in the child above
# after vnodeBoot!
if (ReadState()) {
MyFatal("Could not read container state from disk after vnodeBoot");
}
}
if (safeLibOp('vnodePostConfig', 1, 1)) {
MyFatal("vnodePostConfig failed");
}
......@@ -818,61 +840,132 @@ if (StoreState()) {
mysystem("touch $RUNNING_FILE");
$running = 1;
#
# Poll as desired by the backend. See comments below for
# BackendVnodePoll() and DefaultVnodePoll().
#
if (hasLibOp("vnodePoll")) {
BackendVnodePoll();
}
else {
DefaultVnodePoll();
}
exit(CleanupVM());
#
# Invoke the backend to poll the vnode for status changes that mkvnode
# should/must respond to. This means that honoring the
# vnodesetup/mkvnode semantics is now in the hands of the backend, if it
# wants. For instance, the backend can choose to allow this mkvnode
# monitor to continue waiting even if the vnode is stopped for long
# periods of time.
#
# (More recently, other backends (Docker) require that we catch VM state
# transitions more frequently than this loop allows. Note the special
# case in the loop where there's a 15-second special case check to see
# if a Xen VM was reoboted from the inside, and ends up restarting
# successfully. To handle these kinds of special cases, it's no problem
# to allow backends to control the loop; if we are interrupted via
# signal, and are supposed to be cleaning = 1 or whatever, we just don't
# call vnodePoll again (and just call vnodeState a final couple times),
# as in the original loop. As long as backends don't override our
# signal handlers, we're good to follow the original semantics of
# vnodesetup/mkvnode. We can also modify the semantics slightly,
# i.e. to allow the mkvnode monitor to hang around even if the vnode is
# down (like if the user manually invokes `docker stop`).
#
sub BackendVnodePoll()
{
while (1) {
my ($status,$event) = ('','');
my $ret = eval {
$libops{$vmtype}{'vnodePoll'}->($vnodeid, $vmid,
\%vnconfig, $vnstate->{'private'},
\$status,\$event);
};
my $err = $@;
if ($err) {
fatal("*** ERROR: vnodePoll: $err\n");
return (-1,$err);
}
if ($ret == libgenvnode::VNODE_POLL_STOP()) {
TBDebugTimeStamp("vnodePoll told us to stop polling; cleaning up!");
last;
}
elsif ($ret == libgenvnode::VNODE_POLL_ERROR()) {
TBDebugTimeStamp("vnodePoll errored ($err); cleaning up!".
" status=$status, event=$event");
last;
}
else {
TBDebugTimeStamp("vnodePoll told us to continue polling;".
" status=$status, event=$event");
}
}
}
#
# The default polling implementation.
#
# This loop is to catch when the container stops. We used to run a sleep
# inside and wait for it to exit, but that is not portable across the
# backends, and the return value did not indicate how it exited. So, lets
# just loop, asking for the status every few seconds.
# just loop, asking for the status every few seconds.
#
# XXX Turn off debugging during this loop to keep the log file from growing.
#
TBDebugTimeStampsOff()
if ($debug);
sub DefaultVnodePoll()
{
# XXX Turn off debugging during this loop to keep the log file from
# growing.
TBDebugTimeStampsOff()
if ($debug);
while (1) {
sleep(5);
while (1) {
sleep(5);
#
# If the container exits, either it rebooted from the inside or
# the physical node is rebooting, or we are actively trying to kill
# it cause our parent (vnodesetup) told us to. In all cases, we just
# exit and let the parent decide what to do.
#
my ($ret,$err) = safeLibOp('vnodeState', 0, 0);
if ($err) {
fatal("*** ERROR: vnodeState: $err\n");
}
if ($ret ne VNODE_STATUS_RUNNING()) {
print "Container is no longer running.\n";
if (!$cleaning) {
#
# Rebooted from inside, but not cause we told it to, so
# leave intact.
#
# But before we fold, lets wait a moment and check again
# since in XEN, the user can type reboot, which causes the
# domain to disappear for a while. We do not want to be
# fooled by that. Halt is another issue; if the user halts
# from inside the container it is never coming back and the
# user has screwed himself. Need to restart from the frontend.
#
sleep(15);
($ret,$err) = safeLibOp('vnodeState', 0, 0);
if ($err) {
fatal("*** ERROR: vnodeState: $err\n");
}
if ($ret eq VNODE_STATUS_RUNNING()) {
print "Container has restarted itself.\n";
next;
#
# If the container exits, either it rebooted from the inside or
# the physical node is rebooting, or we are actively trying to kill
# it cause our parent (vnodesetup) told us to. In all cases, we just
# exit and let the parent decide what to do.
#
my ($ret,$err) = safeLibOp('vnodeState', 0, 0);
if ($err) {
fatal("*** ERROR: vnodeState: $err\n");
}
if ($ret ne VNODE_STATUS_RUNNING()) {
print "Container is no longer running.\n";
if (!$cleaning) {
#
# Rebooted from inside, but not cause we told it to, so
# leave intact.
#
# But before we fold, lets wait a moment and check again
# since in XEN, the user can type reboot, which causes the
# domain to disappear for a while. We do not want to be
# fooled by that. Halt is another issue; if the user halts
# from inside the container it is never coming back and the
# user has screwed himself. Need to restart from the frontend.
#
sleep(15);
($ret,$err) = safeLibOp('vnodeState', 0, 0);
if ($err) {
fatal("*** ERROR: vnodeState: $err\n");
}
if ($ret eq VNODE_STATUS_RUNNING()) {
print "Container has restarted itself.\n";
next;
}
$leaveme = $LEAVEME_REBOOT;
}
$leaveme = $LEAVEME_REBOOT;
last;
}
last;
}
TBDebugTimeStampsOn()
if ($debug);
}
TBDebugTimeStampsOn()
if ($debug);
exit(CleanupVM());
#
# Teardown a container. This should not be used if the mkvnode process
......@@ -1062,6 +1155,15 @@ sub MyFatal($)
#
# Helpers:
#
sub hasLibOp($) {
my ($op,) = @_;
return 1
if (exists($libops{$vmtype}{$op}) && defined($libops{$vmtype}{$op}));
return 0;
}
sub safeLibOp($$$;@) {
my ($op,$autolog,$autoerr,@args) = @_;
......@@ -1128,3 +1230,18 @@ sub StoreState()
}
return 0;
}
sub ReadState()
{
# Read the state from disk.
print "Reading state from disk ...\n"
if ($debug);
my $ret = eval { $vnstate = Storable::retrieve("$VNDIR/vnode.state"); };
if ($@) {
print STDERR "$@";
return -1;
}
return 0;
}
......@@ -336,7 +336,7 @@ sub doboot()
#
# This stuff is run regardless of reservation status.
#
if (-x "$RCDIR/rc.ipod" && ! WINDOWS()) {
if (-x "$RCDIR/rc.ipod" && ! WINDOWS() && ! INDOCKERVM()) {
print("Setting up Ping of Death\n");
# This is allowed to fail by default; ipod might not be supported.
if (!exists($manifest{'rc.ipod'})
......
......@@ -461,3 +461,24 @@ openvz-guest-pack: $(OPENVZGUEST_TEMPLATE)
@if [ -e "$(OPENVZGUEST)" ]; then \
cp -fp $(OPENVZGUEST_TEMPLATE) $(OPENVZGUEST); \
fi
docker-install: dir-install
$(INSTALL) -m 755 $(SRCDIR)/../common/vnodesetup $(BINDIR)/
$(INSTALL) -m 755 $(SRCDIR)/../common/mkvnode.pl $(BINDIR)/
$(INSTALL) -m 755 $(SRCDIR)/../common/libutil.pm $(BINDIR)/
$(INSTALL) -m 755 $(SRCDIR)/../common/bootvnodes $(BINDIR)/
$(INSTALL) -m 755 $(SRCDIR)/libvnode.pm $(BINDIR)/
$(INSTALL) -m 755 $(SRCDIR)/docker/libvnode_docker.pm $(BINDIR)/
$(INSTALL) -m 755 $(SRCDIR)/vnodectl $(BINDIR)/
echo "docker" > $(ETCDIR)/genvmtype
$(INSTALL) -m 755 -o root -g $(DIRGROUP) -d $(ETCDIR)/docker
$(INSTALL) -m 755 -o root -g $(DIRGROUP) -d $(ETCDIR)/docker/scripts
$(INSTALL) -m 755 -o root -g $(DIRGROUP) -d $(ETCDIR)/docker/container-utils
$(INSTALL) -m 755 -o root -g $(DIRGROUP) -d $(ETCDIR)/docker/dockerfiles
rsync -a --delete $(SRCDIR)/docker/scripts/ $(ETCDIR)/docker/scripts/
rsync -a --delete $(SRCDIR)/docker/container-utils/ $(ETCDIR)/docker/container-utils/
rsync -a --delete $(SRCDIR)/docker/dockerfiles/ $(ETCDIR)/docker/dockerfiles/
# $(INSTALL) -m 755 $(SRCDIR)/docker/analyze.sh $(ETCDIR)/docker/
# $(INSTALL) -m 755 $(SRCDIR)/docker/analyze-image.sh $(ETCDIR)/docker/
docker-guest-install:
This diff is collapsed.
#!/bin/sh
#
# This is a simple script that prints out properties of a given Docker
# image. We attempt to "Emulabize" Docker images so that they can run
# more fully on an Emulab testbed, if the user allows. Docker images
# usually either run something very specific (i.e. a daemon like httpd;
# a one-shot SSG like jekyll), or nothing specific (i.e. bash). To run
# in a testbed context, a Docker container is almost always going to
# need to stay up for longer periods, support interactive
# experimentation, etc. This means the container needs a real init;
# sshd; and syslog at minimum.
#
# Anyway, we cannot learn everything we need to know about the image
# from docker inspect. So we instantiate the image in a dummy container
# and examine the FS. We are going to use the image anyway, so this
# work is not wasted, and this method is faster and easier than
# flattening and exporting the image to a tarball and unpacking it, or
# other alternatives.
#
# We look specifically for:
#
# * an OS we support (Centos/Fedora/RHEL; Ubuntu; Debian); or a
# packaging mechanism we support (apt-get, yum, dnf).
#
# * existence of an init we support (we support systemd on dedicated
# nodes, but not shared; we support runit; we support upstart).
#
# * existence and enablement of sshd (we will configure openssh to
# suit our purposes, but not any other kind of sshd).
#
#
# Find the distro, release number, etc.
#
if [ -r /etc/lsb-release ]; then
dist=`(. /etc/lsb-release; echo $DISTRIB_ID | tr '[A-Z]' '[a-z]')`
rel=`(. /etc/lsb-release; echo $DISTRIB_RELEASE)`
major=`echo $rel | cut -d. -f1`
minor=`echo $rel | cut -d. -f2`
fi
if [ -z "$dist" -a -r /etc/os-release ]; then
dist=`(. /etc/os-release ; echo $ID | tr '[A-Z]' '[a-z]')`
rel=`(. /etc/os-release ; echo $VERSION_ID)`
major=$rel
minor=''
fi
if [ -z "$dist" -a -r /etc/redhat-release ]; then
trel=`grep 'Red Hat' /etc/redhat-release | sed -e 's/Red Hat Linux release \([0-9]\(\.[0-9]\)\?\).*/\1/'`
if [ -n "$trel" ]; then
dist="redhat"
rel=$trel
fi
trel=`grep 'Fedora' /etc/redhat-release | sed -e 's/Fedora .*release \([0-9.]\+\).*/\1/'`
if [ -n "$trel" ]; then
dist="fedora"
rel=$trel
fi
trel=`grep 'CentOS' /etc/redhat-release | sed -e 's/CentOS .*release \([0-9.]\+\).*/\1/'`
if [ -n "$trel" ]; then
dist="centos"
rel=$trel
fi
if [ -n "$trel" ]; then
major=$trel
minor=''
fi
fi
if [ -r /etc/centos-release ]; then
trel=`grep 'CentOS' /etc/centos-release | sed -e 's/CentOS .*release \([0-9.]\+\).*/\1/'`
dist="centos"
rel=$trel
major=`echo $rel | sed -nre 's/^([0-9]+).*$/\1/p'`
minor=`echo $rel | sed -nre 's/^[0-9]+\.([0-9]+).*$/\1/p'`
fi
if [ -n "$dist" -a -z "$tag" ]; then
tag="${dist}${major}"
if [ -n "$minor" ]; then
mintag="${tag}-${minor}"
else
mintag=''
fi
fi
#
# Find the package manager.
#
pkgtool=''
if [ -n "$dist" ]; then
case $dist in
fedora|centos)
if [ -z "$pkgtool" -a -f /usr/bin/dnf ]; then
pkgtool=/usr/bin/dnf
fi
if [ -z "$pkgtool" -a -f /usr/bin/yum ]; then
pkgtool=/usr/bin/yum
fi
if [ -z "$pkgtool" ]; then
pkgtool=`which dnf`
if [ ! $? -eq 0 ]; then
pkgtool=`which yum`
fi
fi
if [ -n "$pkgtool" ]; then
if [ -f /usr/bin/rpm ]; then
basepkgtool=/usr/bin/rpm
else
basepkgtool=`which rpm`
fi
basepkgtype='rpm'
fi
;;
ubuntu|debian)
if [ -z "$pkgtool" -a -f /usr/bin/apt-get ]; then
pkgtool=/usr/bin/apt-get
else
pkgtool=`which apt-get`
fi
if [ -f /usr/bin/dpkg ]; then
basepkgtool=/usr/bin/dpkg
else
basepkgtool=`which dpkg`
fi
if [ -n "$pkgtool" ]; then
basepkgtype='deb'
fi
;;
*)
if [ -z "$pkgtool" ]; then
if [ -f /usr/bin/apt-get ]; then
pkgtool=/usr/bin/apt-get
else
pkgtool=`which apt-get`
fi
if [ -n "$pkgtool" ]; then
if [ -f /usr/bin/dpkg ]; then
basepkgtool=/usr/bin/dpkg
else
basepkgtool=`which dpkg`
fi
if [ -n "$pkgtool" ]; then
basepkgtype='deb'
fi
fi
fi
if [ -z "$pkgtool" ]; then
if [ -f /usr/bin/dnf ]; then
pkgtool=/usr/bin/dnf
fi
if [ -z "$pkgtool" -a -f /usr/bin/yum ]; then
pkgtool=/usr/bin/yum
fi
if [ -z "$pkgtool" ]; then
pkgtool=`which dnf`
if [ ! $? -eq 0 ]; then
pkgtool=`which yum`
fi
fi
if [ -n "$pkgtool" ]; then
if [ -f /usr/bin/rpm ]; then
basepkgtool=/usr/bin/rpm
else
basepkgtool=`which rpm`
fi
basepkgtype='rpm'
fi
fi
;;
esac
fi
#
# Figure out the init.
#
if [ -e /sbin/init ]; then
initpath=`readlink -f /sbin/init`
initprog=`echo $initpath | sed -rne 's/^.*\/([^\/]*)$/\1/p'`
if [ "$initprog" = "systemd" ]; then
initvers=`$initpath --version | sed -nre 's/^systemd\s+([0-9]+)$/\1/p'`
else
initvers=`$initpath --version | sed -nre 's/^.*upstart ([0-9\.\-]+).*$/\1/p'`
if [ $? -eq 0 -a -n "$initvers" ]; then
initprog='upstart'
else
initvers=''
fi
fi
fi
#
# Figure out the sshd, if any. Every sane packaging tool automatically
# enables sshd when installing it.
#
HAS_SSHD=0
SSHD_PACKAGE=''
if [ -n "$basepkgtype" -a "$basepkgtype" = "deb" ]; then
SSHD_PACKAGE=openssh-server
dpkg -l openssh-server | grep -iq status.\*installed >/dev/null 2>&1
if [ $? -eq 0 ]; then
HAS_SSHD=1
SSHD_PACKAGE=openssh-server
#find /etc/rc*.d -name \*ssh | grep
fi
elif [ -n "$basepkgtype" -a "$basepkgtype" = "rpm" ]; then
SSHD_PACKAGE=openssh-server
rpm -q openssh-server >/dev/null 2>&1
if [ $? -eq 0 ]; then
HAS_SSHD=1
#if [ -L /etc/systemd/system/multi-user.target.wants/sshd.service ]; then
# SSHD_ENABLED=1
#fi
fi
fi
#
# Find a syslogger.
#
HAS_SYSLOG=0
SYSLOG_PACKAGE='rsyslog'
if [ -n "$basepkgtype" -a "$basepkgtype" = "deb" ]; then
dpkg -l rsyslog | grep -iq status.\*installed >/dev/null 2>&1
if [ $? -eq 0 ]; then
SYSLOG_PACKAGE=rsyslog
HAS_SYSLOG=1
fi
if [ $HAS_SYSLOG -eq 0 ]; then
dpkg -l syslog-ng >/dev/null 2>&1
if [ $? -eq 0 ]; then
SYSLOG_PACKAGE=syslog-ng
HAS_SYSLOG=1
fi
fi
elif [ -n "$basepkgtype" -a "$basepkgtype" = "rpm" ]; then
rpm -q rsyslog >/dev/null 2>&1
if [ $? -eq 0 ]; then
SYSLOG_PACKAGE=rsyslog
HAS_SYSLOG=1
fi
if [ $HAS_SYSLOG -eq 0 ]; then
rpm -q syslog-ng >/dev/null 2>&1
if [ $? -eq 0 ]; then
SYSLOG_PACKAGE=syslog-ng
HAS_SYSLOG=1
fi
fi
fi
#
# Finally, have we done a full Emulab install on this image already? Or
# have we simply done a basic install? 'clientside' means packages +
# client side. 'minpackages' means init, sshd, syslog. 'packages'
# means minpackages + others.
#
if [ -f /etc/emulab/emulabization-type ]; then
EMULABIZATION=`cat /etc/emulab/emulabization-type`
fi
if [ -f /etc/emulab/version ]; then
EMULABIZATION=`cat /etc/emulab/version`
fi
echo "TAG=$tag"
echo "MINTAG=$mintag"
echo "DIST=$dist"
echo "REL=$rel"
echo "MAJOR=$major"
echo "MINOR=$minor"
echo "PKGTOOL=$pkgtool"
echo "BASEPKGTOOL=$basepkgtool"
echo "BASEPKGTYPE=$basepkgtype"
echo "INITPATH=$initpath"
echo "INITPROG=$initprog"
echo "HAS_SSHD=$HAS_SSHD"
echo "SSHD_PACKAGE=$SSHD_PACKAGE"
echo "HAS_SYSLOG=$HAS_SYSLOG"
echo "SYSLOG_PACKAGE=$SYSLOG_PACKAGE"
echo "EMULABIZATION=$EMULABIZATION"
echo "EMULABVERSION=$EMULABVERSION"
exit 0
This diff is collapsed.
#!/bin/sh
set -x
if [ -f /tmp/sources.list.backup ]; then
mv /tmp/sources.list.backup /etc/apt/sources.list
fi
yum clean all
rm -f /tmp/yum-updated
rm -rf /tmp/* /var/tmp*
exit 0
#!/bin/sh
# system one time tasks
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
/etc/init.d/rcS
/etc/init.d/rmnologin
touch /etc/runit/stopit
chmod 0 /etc/runit/stopit