Commit 0910c65c authored by Leigh B. Stoller's avatar Leigh B. Stoller

First cut at porting our jail setup to linux vservers. Most of the

changes are on the client side where I took mkjail and retargeted it
to vservers (called it mkvserver.pl, clever eh?) in the linux
directory. The real time sync was understanding how vservers work, how
they boot how they die, how they handle signals, etc. Very interesting
and very bizarre. Anyway, this first cut is done with the version 2.2
vserver code which does not virtualize the network stack or even the
loopback device, so I pretty much ignored the experimental network and
the host routine stuff. So, in your NS file you can now do this:

	set ns [new Simulator]
	set v0 [$ns node]
	set v1 [$ns node]

	tb-set-hardware $v0 pcvm
	tb-set-hardware $v1 pcvm
	tb-set-node-os $v0 FC-VSERVER
	tb-set-node-os $v1 FC-VSERVER

As you can see, I am using the osid to indicate jails vs
vservers. There are some small changes in assign_wrapper that use the
nextosid of the osid to map to the actual osid to install on the
hosting node. If you try to collocate a jail and a vserver assign will
refuse, cause we use features and desires for the osids. Sweet.

Oh, the ssh button in the web interface does not work yet cause the page
assumes that local virtnodes can bind to port 22 in each vserver, but
that will not work yet.
parent 9deae976
......@@ -395,7 +395,6 @@ sub nodetypeistype($) { return exists($node_types{$_[0]}); }
sub nodetypetype($) { return $node_types{$_[0]}->type(); }
sub nodetypeclass($) { return $node_types{$_[0]}->class(); }
sub nodedelayosid($) { return $node_types{$_[0]}->delay_osid(); }
sub nodejailosid($) { return $node_types{$_[0]}->jail_osid(); }
sub nodedefaultosid($) { return $node_types{$_[0]}->default_osid(); }
sub nodetypeisremote($) { return $node_types{$_[0]}->isremotenode(); }
sub nodetypeisvirt($) { return $node_types{$_[0]}->isvirtnode(); }
......@@ -411,6 +410,7 @@ sub nodetypesimcap($) { return $node_types{$_[0]}->simnode_capacity(); }
my %osids = ();
sub osidpath($) { return $osids{$_[0]}->{"path"}; }
sub osidos($) { return $osids{$_[0]}->{"OS"}; }
sub osidhaspath($) { my $path = osidpath($_[0]);
return (defined $path) && ($path ne "")};
sub osidnextosid($) {
......@@ -2985,7 +2985,7 @@ sub InitPnode($$)
$osid = $sim_osid;
}
else {
$osid = nodejailosid(physnodetype($pnode));
$osid = nodejailosid($pnode);
}
my $cmdline = TBGetOSBootCmd(osidnextosid($osid),
......@@ -3048,7 +3048,7 @@ sub InitPnode($$)
$osid = $jail_osid;
}
else {
$osid = nodejailosid(physnodetype($pnode));
$osid = nodejailosid($pnode);
}
$expt_stats{"jailnodes"} += 1;
#
......@@ -3058,7 +3058,9 @@ sub InitPnode($$)
$role = TBDB_RSRVROLE_VIRTHOST;
# XXX Must have routing on jail hosting nodes. Change me.
$routertype = TBDB_ROUTERTYPE_MANUAL;
$cmdline = "/kernel.jail";
if (osidos($osid) eq "FreeBSD") {
$cmdline = "/kernel.jail";
}
$cmdline_role = "vnodehost";
}
fatal("No OSID is defined for internal node $vname!")
......@@ -3878,8 +3880,9 @@ sub LoadPhysInfo()
# Get paths from os_info, so that we can identify OSKit/MFS OSes, basically
# those which do not load a disk image
#
$query_result = DBQueryFatal("select osid, path, nextosid from os_info");
while (my ($osid, $path, $nextosid) = $query_result->fetchrow()) {
$query_result =
DBQueryFatal("select osid,path,nextosid,OS from os_info");
while (my ($osid, $path, $nextosid, $OS) = $query_result->fetchrow()) {
$osids{$osid} = {};
if ($path) {
$osids{$osid}->{"path"} = $path;
......@@ -3887,6 +3890,7 @@ sub LoadPhysInfo()
if (defined($nextosid)) {
$osids{$osid}->{"nextosid"} = TBResolveNextOSID($osid,$pid,$eid);
}
$osids{$osid}->{"OS"} = $OS;
}
}
sub interfacespeedmbps($$) {
......@@ -5661,3 +5665,26 @@ sub LoadCurrent()
printdb "Old Reserved Nodes: " . join(" ", keys %oldreservednodes) . "\n";
}
#
# This is special. Look at the osid of the virtnodes on this pnode and
# map to a suitable osid using the nextosid field. This overloads nextosid
# to some extent ...
#
sub nodejailosid($)
{
my ($pnode) = @_;
my @vnodelist = @{$virtnodes{$pnode}};
#
# We know at this point that all vnodes on this pnode want the same
# osid cause of assign (osid features/desires). So just use the first
# one to figure out what osid for the physical node.
#
my $vnode = $vnodelist[0];
my $osid = virtnodeosid($vnode);
my $nextosid = osidnextosid($osid);
printdb "Mapping jail osid to $osid ($nextosid) on $pnode\n";
return $nextosid;
}
......@@ -87,6 +87,11 @@ if ($UID != 0) {
" Must be root to run this script!\n");
}
# We need to know this below.
my $sysname = `uname -s`;
chomp($sysname);
my $islinux = ($sysname eq "Linux");
#
# Put this into the background and log its output. We *must* do this cause
# we do not want to halt the boot if the testbed is down!
......@@ -233,7 +238,7 @@ if ($fakejails) {
# This will fail if it already exists. Keep going on any failure though.
#
if (!REMOTE()) {
system("mkextrafs.pl $vndir");
system("mkextrafs.pl " . ($islinux ? "/vservers" : $vndir));
}
#
......@@ -243,23 +248,27 @@ if (! -e "/local") {
system("ln -s " . LOCALROOTFS() . " /local");
}
#
# Make sure enough vn devices exist
#
for (my $i = 0;
$i < scalar(keys(%newvnodelist)) + scalar(keys(%curvnodelist)); $i++) {
my $dev = "vn${i}";
if (! -e "/dev/${dev}c") {
system("(cd /dev; ./MAKEDEV $dev)");
if (!$islinux) {
#
# Make sure enough vn devices exist
#
for (my $i = 0;
$i < scalar(keys(%newvnodelist)) + scalar(keys(%curvnodelist));
$i++) {
my $dev = "vn${i}";
if (! -e "/dev/${dev}c") {
system("(cd /dev; ./MAKEDEV $dev)");
}
}
}
#
# XXX tweak IP interrupt queue size to accomodate up to 8 x 100Mb trivial links
# XXX turn on local hack to retry on NFS EACCES errors (thanks mountd!)
#
system("sysctl net.inet.ip.intr_queue_maxlen=128 >/dev/null 2>&1");
system("sysctl vfs.nfs.eacces_retry_enable=1 >/dev/null 2>&1");
#
# XXX tweak IP interrupt queue size to accomodate up to 8 x 100Mb
# trivial links XXX turn on local hack to retry on NFS EACCES
# errors (thanks mountd!)
#
system("sysctl net.inet.ip.intr_queue_maxlen=128 >/dev/null 2>&1");
system("sysctl vfs.nfs.eacces_retry_enable=1 >/dev/null 2>&1");
}
#
# XXX grossed out yet? Try this one: the mount command will HUP mountd
......@@ -278,11 +287,13 @@ foreach my $vnode (sort(keys(%newvnodelist))) {
bootvnode($vnode, "boot", $newvnodelist{$vnode});
}
my $PIDFILE = "/var/run/progagent.pid";
if (!$islinux) {
my $PIDFILE = "/var/run/progagent.pid";
foreach my $vnode (keys(%newvnodelist)) {
system("rtprio 15 -`cat $vndir/$vnode/root/$PIDFILE`")
if (-e "$vndir/$vnode/root/$PIDFILE");
foreach my $vnode (keys(%newvnodelist)) {
system("rtprio 15 -`cat $vndir/$vnode/root/$PIDFILE`")
if (-e "$vndir/$vnode/root/$PIDFILE");
}
}
exit(0);
......@@ -794,8 +794,16 @@ sub removeconfdir($)
sub hackwaitandexit()
{
my $now = time();
my $goofy = CONFDIR() . "/root/var/run/emulab-watchdog.pid";
my $goofy;
my $count = 60;
# The first case is for our own (non-plab) vservers.
if (-e "/vservers") {
$goofy = "/vservers/$vnodeid/var/run/emulab-watchdog.pid";
}
else {
$goofy = CONFDIR() . "/root/var/run/emulab-watchdog.pid";
}
while ($count--) {
sleep(1);
......
#
# EMULAB-COPYRIGHT
# Copyright (c) 2000-2007 University of Utah and the Flux Group.
# Copyright (c) 2000-2008 University of Utah and the Flux Group.
# All rights reserved.
#
......@@ -32,6 +32,7 @@ SYSETCDIR ?= $(DESTDIR)/etc
ETCDIR ?= $(DESTDIR)$(CLIENT_ETCDIR)
BINDIR ?= $(DESTDIR)$(CLIENT_BINDIR)
VARDIR ?= $(DESTDIR)$(CLIENT_VARDIR)
VSDIR ?= $(BINDIR)/vserver
RCDIR ?= $(SYSETCDIR)/rc.d
INSTALL ?= /usr/bin/install -c
COMMON ?= $(SRCDIR)/../common
......@@ -41,7 +42,7 @@ DEFRUNLVLDIR ?= $(RCDIR)/rc3.d
RRCDIR ?= /etc/rc.d
install client-install: common-install etc-install \
sup-install script-install bin-install
sup-install script-install bin-install vserver-install
@echo "Remember to install the PEM files if necessary"
simple-install: common-install script-install bin-install
......@@ -154,3 +155,9 @@ sfs-install:
# create ifcfg-eth? files
ifcfgs: $(SRCDIR)/mkifcfgs $(SRCDIR)/ifcfg.template
$(SRCDIR)/mkifcfgs $(SRCDIR)/ifcfg.template
vserver-install: dir-install
$(INSTALL) -m 755 -o root -g root -d $(VSDIR)
$(INSTALL) -m 755 $(SRCDIR)/rc.invserver $(VSDIR)/rc.invserver
$(INSTALL) -m 755 $(SRCDIR)/mkvserver.pl $(BINDIR)/mkvserver.pl
-ln -sf $(BINDIR)/mkvserver.pl $(BINDIR)/mkjail.pl
This diff is collapsed.
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
# Copyright (c) 2008 University of Utah and the Flux Group.
# All rights reserved.
#
use strict;
use English;
use Getopt::Std;
sub usage()
{
print "Usage: [-d] start|stop\n";
exit(1);
}
my $optlist = "d";
my $debug = 0;
my $action = "start";
# Turn off line buffering on output
$| = 1;
# Drag in path stuff so we can find emulab stuff.
BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }
#
# Load the emulab libraries. It will now
#
use libsetup;
use libtmcc;
use librc;
# Protos.
sub BootFatal($);
sub DoBoot();
# Script specific goo.
my $RCDIR = "$BINDIR/rc";
# Parse command line.
my %options;
if (! getopts($optlist, \%options)) {
usage();
}
if (defined($options{'d'})) {
$debug = 1;
}
# Allow default above.
if (@ARGV) {
$action = $ARGV[0];
}
usage()
if ($action ne "stop" && $action ne "start");
#
# On shutdown kill everything.
#
if ($action eq "stop") {
print "Killing all processes and exiting ...\n";
$SIG{TERM} = 'IGNORE';
system("kill -TERM -1");
sleep(1);
system("kill -KILL -1");
exit(0);
}
# So that rc will know to invoke us to stop.
system("touch /var/lock/subsys/invserver") == 0
or die("Could not create lock file!\n");
if (! -e "$BOOTDIR/realname") {
print "$BOOTDIR/realname does not exit!\n";
exit(1);
}
my $vnodeid = `cat $BOOTDIR/realname`;
chomp($vnodeid);
#
# Put this into the environment, which is the protocol for setting up jails.
#
$ENV{'TMCCVNODEID'} = $vnodeid;
DoBoot();
exit(0);
#
# Boot Action.
#
sub DoBoot()
{
TBDebugTimeStamp("rc.invserver starting up");
print("Checking Testbed reservation status\n");
my ($pid, $eid, $vname) = jailsetup();
#
# At this point, if we are a free node just return. Something went wacky.
#
if (!defined($pid)) {
return;
}
#
# Do this *before* config scripts. At the moment we use a hacky test
# in vnodesetup to see if the watchdog is running, which tells vnodesetup
# that the vnode has finished setting up. By doing it now, we allow vnode
# setup to proceed in parallel (okay, at most two are going to partially
# overlap, but that is better then nothing at all!
#
TBDebugTimeStamp("rc.injail starting watchdog");
if (-x "$BINDIR/watchdog") {
print("Starting testbed watchdog daemon\n");
system("$BINDIR/watchdog start");
if ($?) {
BootFatal("Error running $BINDIR/watchdog");
}
}
#
# This is where we run all of the config scripts. These talk to the
# DB and setup the node the way it is supposed to be.
#
TBDebugTimeStamp("rc.injail running config scripts");
print("Running config scripts\n");
system("$RCDIR/rc.config boot");
if ($?) {
BootFatal("Error running $RCDIR/rc.config");
}
TBDebugTimeStamp("rc.injail done running config scripts");
# Linktest daemon now runs inside jails.
if (-x "$RCDIR/rc.linktest") {
print("Starting linktest daemon\n");
system("$RCDIR/rc.linktest start");
if ($?) {
BootFatal("Error running $RCDIR/rc.linktest");
}
}
print("Informing the testbed that we are up and running\n");
if (tmcc(TMCCCMD_STATE(), "ISUP") < 0) {
BootFatal("Error sending ISUP to Emulab Control!");
}
#
# Very hacky ... with vservers it is difficult to return an exit
# code to the outer environment. So, touch this file and the outer
# environment knows we got this far okay.
#
system("touch $BOOTDIR/vrunning");
}
#
# This version of fatal sends boot status to tmcd, and then generates
# a TBFAILED state transition.
#
sub BootFatal($)
{
my ($msg) = @_;
if (tmcc(TMCCCMD_BOOTERRNO(), "-1") < 0) {
print "Error sending boot errno to Emulab Control!\n";
}
if (tmcc(TMCCCMD_STATE(), "TBFAILED") < 0) {
print "Error sending TBFAILED to Emulab Control!\n";
}
exit(-1);
}
#!/usr/bin/perl -w
#
# EMULAB-COPYRIGHT
# Copyright (c) 2008 University of Utah and the Flux Group.
# All rights reserved.
#
use strict;
use English;
use Getopt::Std;
sub usage()
{
print "Usage: [-d] start|stop\n";
exit(1);
}
my $optlist = "d";
my $debug = 0;
my $action = "start";
# Turn off line buffering on output
$| = 1;
# Drag in path stuff so we can find emulab stuff.
BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }
#
# Load the emulab libraries. It will now
#
use libsetup;
use libtmcc;
use librc;
# Protos.
sub BootFatal($);
sub DoBoot();
# Script specific goo.
my $RCDIR = "$BINDIR/rc";
# Parse command line.
my %options;
if (! getopts($optlist, \%options)) {
usage();
}
if (defined($options{'d'})) {
$debug = 1;
}
# Allow default above.
if (@ARGV) {
$action = $ARGV[0];
}
usage()
if ($action ne "stop" && $action ne "start");
#
# On shutdown kill everything.
#
if ($action eq "stop") {
print "Killing all processes and exiting ...\n";
$SIG{TERM} = 'IGNORE';
system("kill -TERM -1");
sleep(1);
system("kill -KILL -1");
exit(0);
}
# So that rc will know to invoke us to stop.
system("touch /var/lock/subsys/invserver") == 0
or die("Could not create lock file!\n");
if (! -e "$BOOTDIR/realname") {
print "$BOOTDIR/realname does not exit!\n";
exit(1);
}
my $vnodeid = `cat $BOOTDIR/realname`;
chomp($vnodeid);
#
# Put this into the environment, which is the protocol for setting up jails.
#
$ENV{'TMCCVNODEID'} = $vnodeid;
DoBoot();
exit(0);
#
# Boot Action.
#
sub DoBoot()
{
TBDebugTimeStamp("rc.invserver starting up");
print("Checking Testbed reservation status\n");
my ($pid, $eid, $vname) = jailsetup();
#
# At this point, if we are a free node just return. Something went wacky.
#
if (!defined($pid)) {
return;
}
#
# Do this *before* config scripts. At the moment we use a hacky test
# in vnodesetup to see if the watchdog is running, which tells vnodesetup
# that the vnode has finished setting up. By doing it now, we allow vnode
# setup to proceed in parallel (okay, at most two are going to partially
# overlap, but that is better then nothing at all!
#
TBDebugTimeStamp("rc.injail starting watchdog");
if (-x "$BINDIR/watchdog") {
print("Starting testbed watchdog daemon\n");
system("$BINDIR/watchdog start");
if ($?) {
BootFatal("Error running $BINDIR/watchdog");
}
}
#
# This is where we run all of the config scripts. These talk to the
# DB and setup the node the way it is supposed to be.
#
TBDebugTimeStamp("rc.injail running config scripts");
print("Running config scripts\n");
system("$RCDIR/rc.config boot");
if ($?) {
BootFatal("Error running $RCDIR/rc.config");
}
TBDebugTimeStamp("rc.injail done running config scripts");
# Linktest daemon now runs inside jails.
if (-x "$RCDIR/rc.linktest") {
print("Starting linktest daemon\n");
system("$RCDIR/rc.linktest start");
if ($?) {
BootFatal("Error running $RCDIR/rc.linktest");
}
}
print("Informing the testbed that we are up and running\n");
if (tmcc(TMCCCMD_STATE(), "ISUP") < 0) {
BootFatal("Error sending ISUP to Emulab Control!");
}
#
# Very hacky ... with vservers it is difficult to return an exit
# code to the outer environment. So, touch this file and the outer
# environment knows we got this far okay.
#
system("touch $BOOTDIR/vrunning");
}
#
# This version of fatal sends boot status to tmcd, and then generates
# a TBFAILED state transition.
#
sub BootFatal($)
{
my ($msg) = @_;
if (tmcc(TMCCCMD_BOOTERRNO(), "-1") < 0) {
print "Error sending boot errno to Emulab Control!\n";
}
if (tmcc(TMCCCMD_STATE(), "TBFAILED") < 0) {
print "Error sending TBFAILED to Emulab Control!\n";
}
exit(-1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment