Commit cde74d7a authored by Mike Hibler's avatar Mike Hibler

Mostly cleanup.

* generate the initial conditions for monitors to pass into the stubs
  (must use -i to start-experiment to do this)

* make sure install-tarball does not return til tarballs are installed!
  (or til timeout)

* move ip mapping and initial conditions files to /local/logs so they
  become part of the record

* fix up exit status' returned by auto-* stubs so program-agent
  can detect real failures
parent ff5cdfcd
......@@ -99,12 +99,14 @@ export TMPDIR="/var/tmp/";
export LOGDIR="/local/logs/"
#
# Temproary files we use
# Temproary files we use.
# We put them in /local/logs so they become part of the fossil record.
#
export IPMAP="/var/tmp/ip-mapping.txt"
export IPMAP="/local/logs/ip-mapping.txt"
export INITCOND="/local/logs/initial-conditions.txt"
#
# Important scrips/libraries
# Important scripts/libraries
#
export NETMOND="netmond"
export STUBD="stubd"
......@@ -112,6 +114,7 @@ export MAGENT="magent"
export MONITOR="monitor.py"
export DBMONITOR="dbmonitor.pl"
export GENIPMAP="gen-ip-mapping.pl"
export GENINITCOND="init-elabnodes.pl"
export NETMON_LIB="libnetmon.so"
#
......@@ -127,6 +130,16 @@ else
export ON_ELAB="yes"
fi
#
# Are we the "master" (sync server) node?
#
if `$SYNC -m`; then
export ON_MASTER="yes"
else
export ON_MASTER=""
fi
#
# Make a handy variable for running as root (ie. invoke sudo if necessary)
#
......@@ -199,20 +212,15 @@ fi
barrier_wait()
{
BARRIER=$1
#
# Are we the master?
#
$SYNC -m
MASTER=$?
if [ "$MASTER" = "1" ]; then
# I know, this looks backwards. But it's right
$SYNC -n $BARRIER
_rval=$?
else
if [ $ON_MASTER ]; then
WAITERS=`expr $PEERS - 1`
echo "Waiting up to $SYNCTIMO seconds for $WAITERS clients"
sync_timeout $SYNCTIMO $SYNC -n $BARRIER -i $WAITERS
_rval=$?
else
$SYNC -n $BARRIER
_rval=$?
fi
return $_rval
......
......@@ -29,7 +29,8 @@ echo $SH ${DBMONITOR_DIR}/run-dbmonitor.sh $ARGS
$SH ${DBMONITOR_DIR}/run-dbmonitor.sh $ARGS &
DBMONPID=$!
# Kill the monitor if we get killed - TODO: harsher kill?
trap "$AS_ROOT kill $DBMONPID" TERM
# Note that we assume that a kill of us is "normal" and just exit 0.
trap "$AS_ROOT kill $DBMONPID; exit 0" TERM
#
# Give it time to come up
......@@ -51,4 +52,4 @@ echo "Running!";
# Wait for our monitor to finish
#
wait $DBMONPID
exit 0
exit $?
......@@ -10,7 +10,7 @@ pid=$1
eid=$2
echo "##### reinstalling tarballs and RPMs"
$TEVC -e $pid/$eid now tfhosts start
$TEVC -w -t 30 -e $pid/$eid now tfhosts run
echo "WARNING: you will still need to run start-experiment to restart"
echo " stubs and monitors (if they have changed)."
......
......@@ -18,7 +18,8 @@ $SH ${MAGENT_DIR}/run-magent.sh & #$ARGS
# Kill the agent if we get killed - TODO: harsher kill?
# Because the magent backgrounds itself, it's harder to figure out
# what its pid is, just just do a killall
trap "$AS_ROOT killall $MAGENT" EXIT
# Note that we assume that a kill of us is "normal" and just exit 0.
trap "$AS_ROOT killall $MAGENT; exit 0" EXIT
#
# Give it time to come up
......@@ -49,3 +50,4 @@ echo "Running!";
# Wait for our agent to finish
#
wait
exit $?
#!/bin/sh
files="pelab/auto-pelab.ns pelab/common-env.sh pelab/dbmonitor pelab/libnetmon pelab/magent pelab/monitor pelab/pelabdb.pwd pelab/stub"
files="pelab/auto-pelab.ns pelab/common-env.sh pelab/init-elabnodes.pl pelab/dbmonitor pelab/libnetmon pelab/magent pelab/monitor pelab/pelabdb.pwd pelab/stub"
if [ $# != 1 ]; then
echo "usage: $0 tarball-path"
......
......@@ -20,7 +20,8 @@ echo $SH ${MONITOR_DIR}/run-monitor-libnetmon.sh $ARGS
$SH ${MONITOR_DIR}/run-monitor-libnetmon.sh $ARGS &
MONPID=$!
# Kill the monitor if we get killed - TODO: harsher kill?
trap "$AS_ROOT kill $MONPID; $AS_ROOT killall netmond" EXIT
# Note that we assume that a kill of us is "normal" and just exit 0.
trap "$AS_ROOT kill $MONPID; $AS_ROOT killall netmond; exit 0" EXIT
#
# Give it time to come up
......@@ -42,3 +43,4 @@ echo "Running!";
# Wait for our monitor to finish
#
wait
exit $?
10.0.0.1 10.1.0.1 elabc-elab-1
10.0.0.2 10.1.0.2 elabc-elab-2
......@@ -45,7 +45,15 @@ else
$PERL ${MONITOR_DIR}/$GENIPMAP > $IPMAP
fi
INITARG=""
if [ -r "/proj/$PROJECT/exp/$EXPERIMENT/tmp/initial-conditions.txt" ]; then
echo "Copy over initial conditions file for the real PlanetLab nodes";
cp -p /proj/$PROJECT/exp/$EXPERIMENT/tmp/initial-conditions.txt $INITCOND
INITARG="--initial=$INITCOND"
fi
#echo "Starting up monitor for $PROJECT/$EXPERIMENT $PELAB_IP $SIP";
echo "Starting up monitor with options --mapping=$IPMAP --experiment=$PROJECT/$EXPERIMENT --ip=$PELAB_IP --initial=$MONITOR_DIR/initial.txt";
exec $NETMON_DIR/$NETMOND -v 2 -f 262144 | tee $LOGDIR/libnetmon.out | $PYTHON $MONITOR_DIR/$MONITOR --mapping=$IPMAP --experiment=$PROJECT/$EXPERIMENT --ip=$PELAB_IP --initial=$MONITOR_DIR/initial.txt
exec $NETMON_DIR/$NETMOND -v 2 -f 262144 | tee $LOGDIR/libnetmon.out | $PYTHON $MONITOR_DIR/$MONITOR --mapping=$IPMAP --experiment=$PROJECT/$EXPERIMENT --ip=$PELAB_IP $INITARG
#exec $NETMON_DIR/$NETMOND -v 2 | $PYTHON $MONITOR_DIR/$MONITOR ip-mapping.txt $PROJECT/$EXPERIMENT $PELAB_IP $SIP
......@@ -10,6 +10,7 @@ my $PERL = "/usr/bin/perl";
my $EVENTSYS = "/usr/testbed/bin/eventsys_control";
my $NODELIST = "/usr/testbed/bin/node_list";
my $EXPINFO = "/usr/testbed/bin/expinfo";
my $INITCOND = "init-elabnodes.pl";
my $realplab = 0;
my $initelab = 0;
......@@ -38,6 +39,13 @@ sub usage()
}
my $optlist = "CS:s:M:m:piN:O:";
my $pelabdir;
if ($0 =~ /(.*)\/[^\/]+$/) {
$pelabdir = $1;
} else {
$pelabdir = ".";
}
#
# Parse command arguments.
#
......@@ -139,6 +147,9 @@ if (defined($options{"O"})) {
}
}
my $use_magent = ($stub_cmd =~ /magent/) ? 1 : 0;
my $use_simplemodel = ($mon_cmd =~ /dbmonitor/) ? 1 : 0;
#
# Let's help some boor bozos (Rob) get the right command line args
#
......@@ -230,10 +241,21 @@ if (system "$EVENTSYS -e $pid,$eid replay") {
print "##### Waiting for event system to start\n";
sleep(10);
if ($initelab) {
#
# Fetch and store the initial conditions for plab nodes if desired.
# We both stash them in a file and set them directly. The former is
# read by the monitor at start up to inform the stubs. The latter is
# for backward compat with the old monitor/stubs that did not do it for
# themselves.
#
if ($realplab && $initelab && !$use_simplemodel) {
print "##### Stashing initial path conditions for plab nodes\n";
if (system "$pelabdir/$INITCOND -o /proj/$pid/exp/$eid/tmp/initial-conditions.txt $pid $eid") {
warn "Could not acquire initial path conditions\n";
}
print "##### Initializing Emulab link characteristics\n";
if (system "$PERL init-elabnodes.pl $pid $eid") {
die "Error initializing Emulab links\n";
if (system "$pelabdir/$INITCOND $pid $eid") {
warn "Error initializing Emulab links with initial path conditions\n";
}
}
......@@ -266,10 +288,10 @@ if (system "$TEVC -e $pid/$eid now tdhosts start") {
#
my $startarg = "";
print "##### Starting stubs";
#if ($stub_cmdargs ne $UNKNOWN) {
# print " with: '$stub_cmd $stub_cmdargs'";
# $startarg = "'COMMAND=$stub_cmd $stub_cmdargs'";
#}
if ($stub_cmdargs ne $UNKNOWN) {
print " with: '$stub_cmd $stub_cmdargs'";
$startarg = "'COMMAND=$stub_cmd $stub_cmdargs'";
}
print "\n";
if ($realplab) {
if (system "$TEVC -e $pid/$eid now planetstubs start $startarg") {
......
......@@ -11,7 +11,8 @@ echo $SH ${STUB_DIR}/run-stub.sh $ARGS
$SH ${STUB_DIR}/run-stub.sh $ARGS &
STUBPID=$!
# Kill the stub if we get killed - TODO: harsher kill?
trap "$AS_ROOT kill $STUBPID; $AS_ROOT killall stubd" EXIT
# Note that we assume that a kill of us is "normal" and just exit 0.
trap "$AS_ROOT kill $STUBPID; $AS_ROOT killall stubd; exit 0" EXIT
#
# Give it time to come up
......@@ -42,3 +43,4 @@ echo "Running!";
# Wait for our stub to finish
#
wait
exit $?
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment