Commit 4c091c96 authored by Leigh B Stoller's avatar Leigh B Stoller
Browse files

Changes for *remote* XEN shared nodes, as on the I2 pcpg nodes.

Since the pcpg-i2 nodes are so very flaky, lets try something that
does not require them to rebooted or imaged!

The key change is that on these remote nodes, we do not bridge the
physical control interface to the VM control interfaces. There is no
point since there are no routable IPs we can use, nor is there a
192.168 network that would be useful.

However, we still want to give the VMs their 192.168 address and we
still want multiple VMs on the same host to talk to each other, and we
we still want the VMs to be able to access the outside world with NAT.
So we still create the xenbr0 bridge, and give it the router address
(192.168.0.1). Any traffic heading out will be NAT's as normal, and
you can ssh into the VM using the physical host IP and the per VM
sshd port number.
parent 2dd08ebb
......@@ -80,16 +80,23 @@ my $XENBUS_PATH = $ENV{'XENBUS_PATH'};
my $bridge = `xenstore-read "$XENBUS_PATH/bridge"`;
chomp($bridge);
my ($bossdomain) = tmccbossinfo();
die("Could not get bossname from tmcc!")
if (!defined($bossdomain));
if ($bossdomain =~ /^[-\w]+\.(.*)$/) {
$bossdomain = $1;
}
# We need these IP addresses.
my $boss_ip = `host boss | grep 'has address'`;
my $boss_ip = `host boss.${bossdomain} | grep 'has address'`;
if ($boss_ip =~ /has address ([0-9\.]*)$/) {
$boss_ip = $1;
}
my $ops_ip = `host ops | grep 'has address'`;
my $ops_ip = `host ops.${bossdomain} | grep 'has address'`;
if ($ops_ip =~ /has address ([0-9\.]*)$/) {
$ops_ip = $1;
}
my $fs_ip = `host fs | grep 'has address'`;
my $fs_ip = `host fs.${bossdomain} | grep 'has address'`;
if ($fs_ip =~ /has address ([0-9\.]*)$/) {
$fs_ip = $1;
}
......@@ -204,14 +211,22 @@ sub Online()
#
# If the source is from the vnode, headed to the local control
# net, no need for any NAT; just let it through.
#
mysystem2("$IPTABLES -t nat -A POSTROUTING -j ACCEPT " .
" -s $vnode_ip -d $network/$cnet_mask");
return -1
if ($?);
#
# On a remote node (pcpg) we are not bridged to the control
# network, and so we route to the control network, and then
# rely on the SNAT rule below.
#
if (!REMOTEDED()) {
mysystem2("$IPTABLES -t nat -A POSTROUTING -j ACCEPT " .
" -s $vnode_ip -d $network/$cnet_mask");
return -1
if ($?);
}
#
# Ditto for the jail network.
# Ditto for the jail network. On a remote node, the only
# jail network in on our node, and all of them are bridged
# togther anyway.
#
mysystem2("$IPTABLES -t nat -A POSTROUTING -j ACCEPT " .
" -s $vnode_ip -d $jail_network/$jail_netmask");
......@@ -225,7 +240,7 @@ sub Online()
# net IP, using SNAT.
#
mysystem2("$IPTABLES -t nat -A POSTROUTING ".
" -s $vnode_ip -o $bridge -j SNAT --to-source $host_ip");
" -s $vnode_ip -o $outer_controlif -j SNAT --to-source $host_ip");
return 0;
}
......@@ -266,13 +281,15 @@ sub Offline()
}
mysystem2("$IPTABLES -t nat -D POSTROUTING -j ACCEPT " .
" -s $vnode_ip -d $jail_network/$jail_netmask");
" -s $vnode_ip -d $jail_network/$jail_netmask");
mysystem2("$IPTABLES -t nat -D POSTROUTING -j ACCEPT " .
" -s $vnode_ip -d $network/$cnet_mask");
if (!REMOTEDED()) {
mysystem2("$IPTABLES -t nat -D POSTROUTING -j ACCEPT " .
" -s $vnode_ip -d $network/$cnet_mask");
}
mysystem2("$IPTABLES -t nat -D POSTROUTING ".
" -s $vnode_ip -o $bridge -j SNAT --to-source $host_ip");
" -s $vnode_ip -o $outer_controlif -j SNAT --to-source $host_ip");
# evproxy
mysystem2("$IPTABLES -t nat -D PREROUTING -j DNAT -p tcp ".
......
......@@ -207,6 +207,9 @@ my $DOSNAP = 0;
my $OVSCTL = "/usr/local/bin/ovs-vsctl";
my $OVSSTART = "/usr/local/share/openvswitch/scripts/ovs-ctl";
my $ISREMOTENODE = REMOTEDED();
my $BRIDGENAME = "xenbr0";
#
# Information about the running Xen hypervisor
#
......@@ -307,6 +310,17 @@ sub rootPreConfig()
print "Configuring root vnode context\n";
#
# For compatibility with existing (physical host) Emulab images,
# the physical host provides DHCP info for the vnodes. We manage
# the dhcpd.conf file here. See below.
#
# Note that we must first add an alias to the control net bridge so
# that we (the physical host) are in the same subnet as the vnodes,
# otherwise dhcpd will fail.
#
my ($alias_iface, $alias_ip, $alias_mask);
#
# Start the Xen daemon if not running.
# There doesn't seem to be a sure fire way to tell this.
......@@ -314,33 +328,62 @@ sub rootPreConfig()
# set up a bridge device for the control network, so we look for this.
# The bridge should have the same name as the control network interface.
#
my ($cnet_iface,undef,undef,undef,undef,undef,$cnet_gw) = findControlNet();
if (!existsBridge($cnet_iface)) {
print "Starting xend and configuring cnet bridge...\n"
if ($debug);
mysystem("/usr/sbin/xend start");
if (!$ISREMOTENODE) {
my ($cnet_iface,undef,undef,undef,undef,undef,$cnet_gw) =
findControlNet();
if (!existsBridge($cnet_iface)) {
print "Starting xend and configuring cnet bridge...\n"
if ($debug);
mysystem("/usr/sbin/xend start");
#
# xend tends to lose the default route, so make sure it exists.
#
system("route del default >/dev/null 2>&1");
mysystem("route add default gw $cnet_gw");
}
($alias_ip,$alias_mask) = domain0ControlNet();
$alias_iface = "$cnet_iface:1";
#
# xend tends to lose the default route, so make sure it exists.
# We use xen's antispoofing when constructing the guest control net
# interfaces. This is most useful on a shared host, but no
# harm in doing it all the time.
#
system("route del default >/dev/null 2>&1");
mysystem("route add default gw $cnet_gw");
mysystem("$IPTABLES -P FORWARD DROP");
mysystem("$IPTABLES -F FORWARD");
# This says to forward traffic across the bridge.
mysystem("$IPTABLES -A FORWARD ".
"-m physdev --physdev-in $cnet_iface -j ACCEPT");
}
else {
if (!existsBridge($BRIDGENAME)) {
if (mysystem2("$BRCTL addbr $BRIDGENAME")) {
TBScriptUnlock();
return -1;
}
#
# We do not set the mac address; we want it to take
# on the address of the attached vif interfaces so that
# arp works. This is quite kludgy of course, but otherwise
# the arp comes into the bridge interface and then kernel
# drops it. There is a brouter (ebtables) work around
# but not worth worrying about.
#
}
(undef,$alias_mask,$alias_ip) = findVirtControlNet();
$alias_iface = $BRIDGENAME;
}
if (system("ifconfig $alias_iface | grep -q 'inet addr'")) {
print "Creating $alias_iface alias...\n";
mysystem("ifconfig $alias_iface $alias_ip netmask $alias_mask");
}
# For tunnels
mysystem("$MODPROBE openvswitch");
mysystem("$OVSSTART --delete-bridges start");
#
# We use xen's antispoofing when constructing the guest control net
# interfaces. This is most useful on a shared host, but no harm
# in doing it all the time.
#
mysystem("$IPTABLES -P FORWARD DROP");
mysystem("$IPTABLES -F FORWARD");
mysystem("$IPTABLES -A FORWARD ".
"-m physdev --physdev-in $cnet_iface -j ACCEPT");
# For bandwidth contraints.
mysystem("$MODPROBE ifb numifbs=$MAXIFB");
# Create a DB to manage them.
......@@ -419,22 +462,6 @@ sub rootPreConfig()
#
mysystem("vgchange -a y $VGNAME");
#
# For compatibility with existing (physical host) Emulab images,
# the physical host provides DHCP info for the vnodes. So we create
# a skeleton dhcpd.conf file here.
#
# Note that we must first add an alias to the control net bridge so
# that we (the physical host) are in the same subnet as the vnodes,
# otherwise dhcpd will fail.
#
if (system("ifconfig $cnet_iface:1 | grep -q 'inet addr'")) {
print "Creating $cnet_iface:1 alias...\n"
if ($debug);
my ($vip,$vmask) = domain0ControlNet();
mysystem("ifconfig $cnet_iface:1 $vip netmask $vmask");
}
print "Creating dhcp.conf skeleton...\n"
if ($debug);
createDHCP();
......@@ -1109,7 +1136,7 @@ sub vnodePreConfigControlNetwork($$$$$$$$$$$$)
my $fmac = fixupMac($mac);
# Note physical host control net IF is really a bridge
my ($cbridge) = findControlNet();
my ($cbridge) = ($ISREMOTENODE ? ($BRIDGENAME) : findControlNet());
my $cscript = "$VMDIR/$vnode_id/cnet-$mac";
# Save info for the control net interface for config file.
......@@ -1135,7 +1162,7 @@ sub vnodePreConfigControlNetwork($$$$$$$$$$$$)
# a route to reach the vnodes. Do it for the entire network,
# and no need to remove it.
if (!$isroutable && system("$NETSTAT -r | grep -q $network")) {
if (!$ISREMOTENODE && system("$NETSTAT -r | grep -q $network")) {
mysystem2("$ROUTE add -net $network netmask $mask dev $cbridge");
if ($?) {
return -1;
......@@ -2112,6 +2139,10 @@ sub disk_hacks($)
if (-f "$path/etc/init/ttyS0.conf") {
system("sed -i.bak -e 's/ttyS0/hvc0/' $path/etc/init/ttyS0.conf");
}
if (-e "$BINDIR/tmcc-nossl.bin") {
system("/bin/cp -f $BINDIR/tmcc-nossl.bin $path/$BINDIR/tmcc.bin");
}
}
sub configFile($)
......
......@@ -29,6 +29,7 @@ BEGIN { require "/etc/emulab/paths.pm"; import emulabpaths; }
use libsetup;
use liblocsetup;
use libgenvnode;
#
# Set up bridging in a way that will make Xen 4 happy
......@@ -69,6 +70,32 @@ if (@ARGV == 1) {
usage();
}
#
# Special case for the remoteded nodes (pcpg, pcpg-i2). We want the
# bridge, but it is to create a private network for the VMs; the
# control interface is not attached to the bridge, and the bridge
# serves as the router interface for the VMs. We create this bridge
# early so that dhcpd does not throw up and die cause there is no
# such interface, which causes a respawning deluge.
#
if (REMOTEDED()) {
if ($USE_OPENVSWITCH) {
system("$OVSCTL add-br $XENBR");
}
else {
system("$BRCTL addbr $XENBR");
}
die "xenbridge-setup: Unable to create bridge $XENBR\n"
if ($?);
my (undef,$alias_mask,$alias_ip) = findVirtControlNet();
system("$IFCONFIGBIN $XENBR $alias_ip netmask $alias_mask");
die "xenbridge-setup: Unable to ifconfig bridge $XENBR\n"
if ($?);
exit(0);
}
#
# If we weren't given an interface, find the control net
#
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment