Commit b7cff5a1 authored by Robert Ricci's avatar Robert Ricci

Merge branch 'master' of git-public.flux.utah.edu:/flux/git/emulab-devel

parents a36ba070 fa361e4b
......@@ -121,7 +121,7 @@ use vars qw(@ISA @EXPORT);
TBDB_ALLOCSTATE_RES_REBOOT_DIRTY TBDB_ALLOCSTATE_RES_REBOOT_CLEAN
TBDB_ALLOCSTATE_RES_READY TBDB_ALLOCSTATE_UNKNOWN
TBDB_ALLOCSTATE_RES_TEARDOWN TBDB_ALLOCSTATE_DEAD
TBDB_ALLOCSTATE_RES_RECONFIG
TBDB_ALLOCSTATE_RES_RECONFIG TBDB_ALLOCSTATE_RES_REBOOT
TBDB_STATS_PRELOAD TBDB_STATS_START TBDB_STATS_TERMINATE
TBDB_STATS_SWAPIN TBDB_STATS_SWAPOUT TBDB_STATS_SWAPMODIFY
......@@ -446,6 +446,7 @@ sub TBDB_ALLOCSTATE_RES_INIT_DIRTY() { "RES_INIT_DIRTY"; }
sub TBDB_ALLOCSTATE_RES_INIT_CLEAN() { "RES_INIT_CLEAN"; }
sub TBDB_ALLOCSTATE_RES_READY() { "RES_READY"; }
sub TBDB_ALLOCSTATE_RES_RECONFIG() { "RES_RECONFIG"; }
sub TBDB_ALLOCSTATE_RES_REBOOT() { "RES_REBOOT"; }
sub TBDB_ALLOCSTATE_RES_TEARDOWN() { "RES_TEARDOWN"; }
sub TBDB_ALLOCSTATE_UNKNOWN() { "UNKNOWN"; };
......
......@@ -1623,6 +1623,30 @@ sub WriteEnvVariables($)
return 0;
}
#
# Get value of a specific env variable
#
sub GetEnvVariable($$)
{
my ($self, $var) = @_;
# Must be a real reference.
return -1
if (! ref($self));
my $pid = $self->pid();
my $eid = $self->eid();
my $query_result =
DBQueryWarn("select value from virt_user_environment ".
"where pid='$pid' and eid='$eid' and name='$var'");
return undef
if (!defined($query_result) || !$query_result->numrows);
my ($value) = $query_result->fetchrow_array();
return $value;
}
#
# Experiment locking and state changes.
#
......
......@@ -135,6 +135,7 @@ sub Lookup($$)
$self->{"TYPEINFO"} = undef;
$self->{"ATTRS"} = undef;
$self->{"IFACES"} = undef;
$self->{"WAROW"} = undef;
$self->{"HASH"} = {};
bless($self, $class);
......@@ -153,6 +154,7 @@ sub DESTROY {
$self->{"ATTRS"} = undef;
$self->{"IFACES"} = undef;
$self->{"HASH"} = undef;
$self->{"WAROW"} = undef;
}
#
......@@ -191,6 +193,7 @@ sub MakeFake($$$$)
$self->{"TYPEINFO"} = undef;
$self->{"ATTRS"} = undef;
$self->{"IFACES"} = undef;
$self->{"WAROW"} = undef;
$self->{"HASH"} = {};
bless($self, $class);
......@@ -236,6 +239,7 @@ sub BulkLookup($$$)
$node->{"TYPEINFO"} = undef;
$node->{"ATTRS"} = undef;
$node->{"IFACES"} = undef;
$self->{"WAROW"} = undef;
$node->{"HASH"} = {};
$nodelist{$nodeid} = $node;
......@@ -540,6 +544,7 @@ sub Refresh($)
$self->{"TYPEINFO"} = undef;
$self->{"ATTRS"} = undef;
$self->{"IFACES"} = undef;
$self->{"WAROW"} = undef;
return 0;
}
......@@ -559,6 +564,33 @@ sub FlushAll($)
%nodes = ();
}
#
# Convenience access method for widearea info
#
sub WideAreaInfo($$)
{
my ($self, $slot) = @_;
my $node_id = $self->node_id();
if (!defined($self->{'WAROW'})) {
my $query_result =
DBQueryWarn("select * from widearea_nodeinfo ".
"where node_id='$node_id'");
if (!$query_result || !$query_result->numrows) {
print STDERR "*** $node_id is not a widearea node\n";
return undef;
}
$self->{'WAROW'} = $query_result->fetchrow_hashref();
}
if (!exists($self->{'WAROW'}->{$slot})) {
print STDERR
"*** Nonexistent slot '$slot' request for widearea node $node_id\n";
return undef;
}
return $self->{'WAROW'}->{$slot};
}
#
# Check permissions. Allow for either uid or a user ref until all code
# updated.
......@@ -772,7 +804,7 @@ sub SetAllocState($$)
my $now = time();
my $node_id = $self->node_id();
DBQueryWarn("update nodes set allocstate='$state', " .
" allocstate_timestamp=$now where node_id='$node_id'")
or return -1;
......@@ -1177,6 +1209,8 @@ sub external_resource_index($) {
return L__reservation($_[0], 'external_resource_index'); }
sub external_resource_id($) {
return L__reservation($_[0], 'external_resource_id'); }
sub external_resource_key($) {
return L__reservation($_[0], 'external_resource_key'); }
sub inner_elab_role($) {
return L__reservation($_[0], 'inner_elab_role'); }
......
......@@ -56,7 +56,7 @@ open(STDOUT, ">> $logname") or die("opening $logname for STDOUT: $!");
# Clear the frisbee_blobs table since any frisbee's
# that were running are obviously not running anymore!
#
DBQueryFatel("delete from frisbee_blobs");
DBQueryFatal("delete from frisbee_blobs");
#
# Clear the event scheduler pids. Its okay to set them to zero since
......
......@@ -43,6 +43,7 @@ use Node;
use libGeni;
use GeniResource;
use GeniHRN;
use GeniXML;
sub fatal($);
sub AllocNodes();
......@@ -51,7 +52,6 @@ sub Register();
sub UnRegister();
sub ClearAll();
sub StartAll();
sub RestartAll();
sub StopAll();
sub WaitAll();
sub PurgeAll();
......@@ -90,7 +90,7 @@ my $eid = shift;
my $action = shift;
if ($action =~
/^(alloc|free|clear|wait|purge|start|restart|mapnodes|status|stop|register|unregister||manifests|renew)$/) {
/^(alloc|free|clear|wait|purge|start|mapnodes|status|stop|register|unregister||manifests|renew)$/) {
$action = $1;
}
else {
......@@ -176,10 +176,6 @@ SWITCH: for ($action) {
StartAll();
last SWITCH;
};
/^restart$/ && do {
RestartAll();
last SWITCH;
};
/^stop$/ && do {
StopAll();
last SWITCH;
......@@ -280,18 +276,10 @@ sub PurgeAll()
sub StartAll()
{
libGeni::StartSlivers($experiment, $this_user, 0, $debug) == 0 or
libGeni::StartSlivers($experiment, $this_user, $debug) == 0 or
fatal("Cannot start slivers!\n");
}
sub RestartAll()
{
libGeni::StartSlivers($experiment, $this_user, 1, $debug) == 0 or
fatal("Cannot restart slivers!\n");
return 0;
}
sub StopAll()
{
my @resources = GeniResource->LookupAll($experiment);
......@@ -353,8 +341,8 @@ sub GetManifests()
print STDERR "No manifest for $resource\n";
next;
}
print STDERR Dumper($manifest);
print STDERR Dumper(GeniXML::Serialize($manifest, 1));
}
return 0;
......
This diff is collapsed.
--------------------------
Running in Standalone Mode
--------------------------
run-standalone <run-level> <run-path> <log-path> <host-list> [node-list]
Uses pssh to simultaneously run linktest in standalone mode for all
hosts listed.
run-level -- An integer betweeen 1 and 4 with higher numbers
performing more tests (see:
http://users.emulab.net/trac/emulab/wiki/linktest).
run-path -- Contains the standalone package (manifest described below).
log-path -- An empty directory where low-level error logs are kept.
host-list -- Comma-delimeted list of hostnames to run on.
node-list -- If the nodes have different control and experimental
interfaces, the hostlist should contain the control
interface names (accessed via pssh) and the nodelist
should contain experimental interface names (used
internally in linktest). If the nodes do not have
separate control interfaces, the nodelist may be
omitted.
Note: Currently node names (those listed in the node-list and the
nickname and syncserver hostname described below) are all assumed to
be a single unqualified name (like node-0).
standalone.sh <run-level> <run-path> <log-path> <node-list>
Invokes linktest with the appropriate arguments.
---------------------------
Standalone Package Manifest
---------------------------
Executables:
standalone.sh -- Invokes linktest in standalone mode
linktest.pl -- Main linktest script
bin/emulab-rude -- Link testing tool
bin/emulab-crude -- Link testing tool
bin/emulab-iperf -- Link testing tool
bin/emulab-sync -- Sync client for synchronizing different clients
-------------------------------------------------------------------------------
Configuration:
boot/nickname -- Contains a single line with the hostname of the
experimental interface on this node.
Example:
node0
-------------------------------------------------------------------------------
boot/syncserver -- Contains a single line with the hostname of the sync server.
Example:
node4
-------------------------------------------------------------------------------
boot/ltmap -- Contains 'virtual topology', the logical topology
implemented by tunnels and VLANs at layer 2. Format:
# the file format is simple:
# expr := h <node name>
# || l <src node> <dst node> <bw (Mb/s)> <latency (s)> <loss (%)>
Example:
h node0
h node1
h node2
h node3
h node4
h node5
l node2 node3 100000000 0.0000 0.000000 link2 droptail
l node3 node2 100000000 0.0000 0.000000 link2 droptail
l node2 node4 100000000 0.0000 0.000000 link3 droptail
l node4 node2 100000000 0.0000 0.000000 link3 droptail
l node1 node2 100000000 0.0000 0.000000 link1 droptail
l node2 node1 100000000 0.0000 0.000000 link1 droptail
l node0 node2 100000000 0.0000 0.000000 link0 droptail
l node2 node0 100000000 0.0000 0.000000 link0 droptail
l node3 node4 100000000 0.0000 0.000000 lan0 droptail
l node3 node5 100000000 0.0000 0.000000 lan0 droptail
l node4 node3 100000000 0.0000 0.000000 lan0 droptail
l node4 node5 100000000 0.0000 0.000000 lan0 droptail
l node5 node3 100000000 0.0000 0.000000 lan0 droptail
l node5 node4 100000000 0.0000 0.000000 lan0 droptail
-------------------------------------------------------------------------------
boot/ltpmap -- This file describes the physical link and node
characteristics. linktest currently expect version 2 (V 2) at the top
of the file. The format lists a number of nodes (lines beginning with
H) and a number of links (lines beginning with L).
Host lines:
H <node-name> <physical-node-name> <physical-host-name> <type> <osid> <os-name> <os-version> <os-features>
node-name -- node name from ltmap
physical-node-name -- physical name (used for debugging output)
physical-host-name -- host name of physical machine (node is
considered virtual if not identical to
physical-node-name).
type -- used for debugging output
osid -- not used
os-name -- "Linux" or "FreeBSD", different queuing behaviour based on OS
os-version -- not used
os-features -- Comma-delimited list. If 'linktest' feature is not
present, this node will be skipped.
Link lines:
L <first-node> <second-node> <link-name> <mac-address> <mpx-style> <dstyle>
first-node, second-node -- node names of link
link-name -- Name of link or lan. All pairs in the link/lan in every
direction must be represented.
mac-address -- MAC address of specified interface. Used for debugging output.
mpxstyle -- If 'veth' for virtual ethernet device, changes header size
calculations.
dstyle -- If ends in '-nobw', skips bandwidth test for this link. If
dstyle is 'linkdelay', this means that there is end-to-end
shaping (at end nodes) and some tests are slightly different
because of this.
Example:
V 2
H node0 pc20 pc20 pc600 526 Linux 0 ping,ssh,ipod,isup,linktest,linkdelays,vlans
H node1 pc15 pc15 pc600 526 Linux 0 ping,ssh,ipod,isup,linktest,linkdelays,vlans
H node2 pc28 pc28 pc600 526 Linux 0 ping,ssh,ipod,isup,linktest,linkdelays,vlans
H node3 pc5 pc5 pc600 526 Linux 0 ping,ssh,ipod,isup,linktest,linkdelays,vlans
H node4 pc6 pc6 pc600 526 Linux 0 ping,ssh,ipod,isup,linktest,linkdelays,vlans
H node5 pc13 pc13 pc600 526 Linux 0 ping,ssh,ipod,isup,linktest,linkdelays,vlans
L node2 node3 link2 00d0b71029ba none dnode
L node3 node2 link2 00d0b713f178 none dnode
L node2 node4 link3 00d0b70efaa4 none dnode
L node4 node2 link3 00d0b713f41c none dnode
L node1 node2 link1 00d0b713f6b1 none dnode
L node2 node1 link1 00d0b70ee067 none dnode
L node0 node2 link0 00d0b713f66b none dnode
L node2 node0 link0 00d0b71029ce none dnode
L node3 node4 lan0 00d0b713f473 none dnode
L node3 node5 lan0 00d0b713f473 none dnode
L node4 node3 lan0 00d0b713f607 none dnode
L node4 node5 lan0 00d0b713f607 none dnode
L node5 node3 lan0 00d0b713f689 none dnode
L node5 node4 lan0 00d0b713f689 none dnode
......@@ -340,6 +340,8 @@ if (-r $fname) {
$hostname = $1;
$exp_id = $2;
$proj_id = $3;
} elsif ($name =~ /^([-\@\w]*)$/) {
$hostname = $1;
} else {
die("Could not parse $fname info\n");
}
......
perl -w -T linktest.pl.in STARTAT=1 STOPAT=4 DOARP=0 NODES=node1,node2 PROJDIR=/proj BINDIR=/usr/testbed/lib VARDIR=/var/emulab
sudo perl -w -T /proj/tbres/duerig/src/event/linktest/linktest.pl STARTAT=1 STOPAT=$1 DOARP=0 LOGDIR=$3 BINDIR=$2 VARDIR=$2
<descriptors>
<osid>
<attribute name="magic">
<value></value>
</attribute>
<attribute name="features">
<value>ping,ssh,isup,veths,veth-ne,veth-en,mlinks,vlans</value>
</attribute>
<attribute name="version">
<value>0.1</value>
</attribute>
<attribute name="reboot_waittime">
<value>600</value>
</attribute>
<attribute name="description">
<value>Phoney OSID for GENI nodes</value>
</attribute>
<attribute name="mustclean">
<value>1</value>
</attribute>
<attribute name="shared">
<value>1</value>
</attribute>
<attribute name="op_mode">
<value>MINIMAL</value>
</attribute>
<attribute name="pid">
<value>emulab-ops</value>
</attribute>
<attribute name="osname">
<value>GENI</value>
</attribute>
<attribute name="OS">
<value>Other</value>
</attribute>
</osid>
<osid>
<attribute name="magic">
<value></value>
</attribute>
<attribute name="features">
<value>ping,ssh,isup,veths,veth-ne,veth-en,mlinks,vlans</value>
</attribute>
<attribute name="version">
<value>0.1</value>
</attribute>
<attribute name="reboot_waittime">
<value>600</value>
</attribute>
<attribute name="description">
<value>Phony OSID for Geni vnodes</value>
</attribute>
<attribute name="mustclean">
<value>1</value>
</attribute>
<attribute name="shared">
<value>1</value>
</attribute>
<attribute name="op_mode">
<value>MINIMAL</value>
</attribute>
<attribute name="pid">
<value>emulab-ops</value>
</attribute>
<attribute name="osname">
<value>GENIVM</value>
</attribute>
<attribute name="OS">
<value>Other</value>
</attribute>
<attribute name="nextosid">
<value>emulab-ops,GENI</value>
</attribute>
</osid>
</descriptors>
diff -ru linux-2.6.34/net/ipv4/Kconfig linux-2.6.34-emulab/net/ipv4/Kconfig
--- linux-2.6.34/net/ipv4/Kconfig 2008-08-29 09:50:50.000000000 -0600
+++ linux-2.6.34-emulab/net/ipv4/Kconfig 2008-08-29 09:53:39.000000000 -0600
@@ -627,3 +627,9 @@
If unsure, say N.
+#
+# Emulab special
+#
+config ICMP_PINGOFDEATH
+ bool "ICMP: ICMP Ping-of-Death (Emulab)"
+
diff -ru linux-2.6.34/net/ipv4/icmp.c linux-2.6.34-emulab/net/ipv4/icmp.c
--- linux-2.6.34/net/ipv4/icmp.c 2010-05-16 23:17:36.000000000 +0200
+++ linux-2.6.34-emulab/net/ipv4/icmp.c 2010-06-03 17:32:26.000000000 +0200
......
......@@ -46,6 +46,10 @@ NEW FEATURES:
data will go into every chunk and still compress that, but there
is no point since we pad out every chunk to 1MB.
[ The discovery protocol is now done. On-the-fly images has not
though we can distribute arbitrary files, we do so without creating
imagezip-format images. ]
Now one could imagine a super, caching frisbee server that creates
compressed images on the fly and caches them for later use. Perhaps
it would become more of a chunk server where it caches every chunk
......@@ -137,6 +141,13 @@ ENHANCEMENTS:
chunks. So maybe 1448B/blk * 768 blks/chunk == 1.06MB/chunk. PREQUEST
BlockMaps come down from 128 bytes to 96.
[ Support for jumbo packets has been done. This increases the block
size to 8192 and reduces the blocks/chunk to 128 to keep the chunk
size constant (so that we can continue to distribute our existing
images). Currently this requires static re-compilation of both the
client and server, though some support has been put in for negotiating
the blocksize (in join v2 messages). ]
6. Dynamic rate pacing in the server.
Our attempts to date have been pretty feeble. I think we have a
......@@ -152,6 +163,36 @@ ENHANCEMENTS:
and frisbeed pad it out, we can save a lot of disk space for these
small images.
[ DONE ]
8. Allow a server to serve multiple unicast clients.
Right now an instance of the server not only serves just a single
image, but only to a single destination address. This is reasonable
for broadcast/multicast but is overly restrictive for unicast. Changing
this should be minor, we just need to keep track of destinations
(addr/port) in the queue along with block ranges to send out. We would
need to back off the queue optimizations where we combine incoming
requests with those already in the queue (i.e., now we would also have
to make sure that they are for the same destination before we combine).
Minor changes would be needed to PacketSend/Recv to track the client
IP/port rather than just assuming/using the global mcastaddr/portnum.
9. Allow the frisbee client to be used in a pipe.
If we could pipe the output of frisbee into another utility, this would
make it more useful for arbitrary file distribution. For example:
frisbee -m <addr> -p <port> - | tar xzf
to download and unpack a tarfile. The problem is out-of-order processing
of chunks and there are a couple of ways around it. Frisbee can already
request chunks in-order, but it is also opportunistic and saves other
chunk data it needs that other clients have requested. We could just
ignore that data and keep re-requesting blocks in order as we need them,
or we could do some limited memory caching of incoming data; i.e., save
but don't decompress chunks until we need them. We could cache to disk
as well, but then we don't really save anything over just frisbeeing into
a tmp file and giving that to the next util in the pipeline.
PROBLEMS:
1. Have seen the clients run out of socket buffer space causing them
......
......@@ -206,10 +206,19 @@ CREATE TABLE `geni_resources` (
`manifest_idx` mediumint(8) unsigned NOT NULL default '0',
`ticket_idx` mediumint(8) unsigned NOT NULL default '0',
`newticket_idx` mediumint(8) unsigned NOT NULL default '0',
`rspec_idx` mediumint(8) unsigned default NULL,
PRIMARY KEY (`idx`),
UNIQUE KEY `manager` (`exptidx`,`manager_urn`(255))
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
DROP TABLE IF EXISTS `geni_rspecs`;
CREATE TABLE `geni_rspecs` (
`idx` int(10) unsigned NOT NULL auto_increment,
`created` datetime default NULL,
`rspec` text,
PRIMARY KEY (`idx`),
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
DROP TABLE IF EXISTS `geni_bindings`;
CREATE TABLE `geni_bindings` (
`slice_uuid` varchar(40) NOT NULL default '',
......
......@@ -239,6 +239,17 @@ sub GetCertificate($) { return $_[0]->{'CERTIFICATE'}; }
# An alias so that slivers look like aggregates.
sub resource_type($) { return field($_[0], "type"); }
# A place to stash a temporary rspec.
sub rspec($;$)
{
my ($self, $rspec) = @_;
if (defined($rspec)) {
$self->{'RSPEC'} = $rspec;
}
return $self->{'RSPEC'};
}
# Return the URN.
sub urn($)
{
......@@ -770,6 +781,8 @@ sub Start($$$)
return -1;
}
if ($reservation->SameExperiment($experiment)) {
my $vnode;
#
# Since this is an aggregate, some slivers may already be
# in the started state. Skip those, unless doing a restart.
......@@ -778,14 +791,28 @@ sub Start($$$)
if ($sliver->state() eq "started" && !$restart);
if ($node->isvirtnode()) {
$vnodes{$node->node_id} = $node;
# A virtnode on a shared physical node needs nothing else.
next
if ($node->sharing_mode());
# But if non-shared, have to make sure that the phys node
# gets loaded.
# A virtnode on a shared physical node needs reboot or setup
if ($node->sharing_mode()) {
if ($restart && $sliver->state() eq "started") {
$reboots{$node->node_id} = $node;
}
else {
$vnodes{$node->node_id} = $node;
}
next;
}
# See below.
$vnode = $node;
#
# Now it gets messy. Do not want to mess with the physnode
# if its running other vnodes, and we just need to fire up
# a new one. But if the physnode is going to get rebooted,
# then there is no need to do anything with the vnodes; they
# will boot up with the physnode.
#
# But, have to make sure that the phys node gets setup.
#
my $physnodeid = $node->phys_nodeid();
next
if (exists($poweron{$physnodeid}) ||
......@@ -798,21 +825,22 @@ sub Start($$$)
}
}
#
# Look to see if local physical node was stopped (powered off).
# If the node is not imageable, then there is not much to
# do except turn it on or reboot it. I am assuming that a
# a non imageable node is always in raw mode.
#
if (!$node->isremotenode() &&
($sliver->state() eq "stopped")) {
$poweron{$node->node_id} = $node;
}
else {
# node_reboot is smart enough to know that if a pnode
# is rebooted it can ignore the vnodes on it, so do
# not optimize this here.
$reboots{$node->node_id} = $node;
if (!$node->imageable()) {
if ($sliver->state() eq "stopped") {
$poweron{$node->node_id} = $node;
}
else {
$reboots{$node->node_id} = $node;
}
next;
}
next
if (!$node->imageable());
#
# See if the node is running the requested OS.
#
my $osinfo = OSinfo->Lookup($node->def_boot_osid());