Commit d05b6989 authored by David Johnson's avatar David Johnson

Add a couple very lame, very fast multi-tent scalability tests.

I added these verbatim instead of refactoring the main driver script;
will have to do that another time.
parent 73aca117
Pipeline #1917 passed with stage
in 2 seconds
#!/bin/sh
#
# Another test script; this one runs hadoop on a bunch of nodes.
#
# ./test-hadoop.sh <testdir> <user-tenant> <service-tenant> \
# <networkname> <bridgename> <num-slaves> [hadoop-args ...]
#
# First we fire off the service wfagent; then all the VMs (waiting for
# them to boot); and then the user wfagent.
# (Note: the master VM gets m1.large so it can create nearly 80GB input file
# if the job needs. The other VMs get m1.medium, so they have 4GB RAM.)
#
# To figure out when the test is done, we keep ssh'ing to the NM and
# looking through the user wfa's log looking for ^DONE$ .
#
# At the end, we have to scp the workflow agent logfile and controller
# logfile over here to the ctl node into our testdir.
#
set -x
DIRNAME=`dirname $0`
. "$DIRNAME/test-lib.sh"
. "$SETUPLIB"
if [ $# -lt 8 ]; then
echo "USAGE: $0 <testdir> <testname> <utenant> <stenant> <ntenants> <networkname>"
echo " <bridgename> <num-slaves> [<hadoop-args>]"
exit 1
fi
#
# Launching all slaves from one nova command causes a Neutron lock!
# So don't do this for now by default.
#
FASTBOOT=1
#
# If WFAs don't show progress after this much time, abort.
#
WFATIMEOUT=1800
#
# If certain tests have already succeeded, just skip!
#
SKIPSUCCESSFUL=1
TESTDIR=$1
shift
TESTNAME=$1
shift
UTENANT=$1
shift
STENANT=$1
shift
NTENANTS=$1
shift
NETWORK=$1
shift
BRIDGE=$1
shift
NSLAVES=$1
shift
ARGS=""
if [ $# -gt 0 ]; then
ARGS="$@"
fi
if [ $SKIPSUCCESSFUL -eq 1 -a -f $TESTDIR/SUCCESS ]; then
echo "*** Not retesting Hadoop: $TESTNAME already Done successfully (results in $TESTDIR)!"
exit 0
fi
if [ -f $TESTDIR/SKIP ]; then
echo "*** WARNING: immediate skip triggered before tests!"
exit 0
fi
#
# Create some parallel SSH command lines...
#
PCHOSTS=""
for cn in $COMPUTENODES ; do
fqdn=`getfqdn $cn`
PCHOSTS="$PCHOSTS -H $fqdn"
done
PNHOSTS="$PCHOSTS"
fqdn=`getfqdn $NETWORKMANAGER`
PNHOSTS="-H $fqdn $PCHOSTS"
echo "*** Testing hadoop: $TESTDIR $TESTNAME $UTENANT $STENANT $NTENANTS $NETWORK $BRIDGE $NSLAVES \"$ARGS\" ..."
mkdir -p $TESTDIR
cd $TESTDIR
# NB: Be admin when we fire off wfas...
. "$OURDIR/admin-openrc.sh"
NETWORKID=`openstack network show $NETWORK | awk ' / id / {print $4}'`
#
# Create the service WFAs.
#
t=0
while [ $t -lt $NTENANTS ]; do
UTENANTID=`openstack project show $UTENANT-$t | awk ' / id / {print $4}'`
STENANTID=`openstack project show $STENANT-$t | awk ' / id / {print $4}'`
STENANTWFANAME="$STENANT-$t-hadoop"
neutron capnet-wfagent-create --tenant-id $STENANTID --name $STENANTWFANAME \
--master --wfapp-path /usr/bin/capnet-wfagent-service-tenant-hadoop-membrane \
--wfapp-args "broker_name=hadoop-$t" \
$NETWORK
if [ ! $? -eq 0 ]; then
echo "ERROR: creating service wfa $STENANTWFANAME; aborting!"
exit 1
fi
while [ 1 -eq 1 ]; do
STENANTWFAID=`neutron capnet-wfagent-list | awk " / $STENANTWFANAME / { print \\$2 }"`
if [ ! "x$STENANTWFAID" = "x" ]; then
break
fi
done
t=`expr $t + 1`
done
sleep 5
#
# Don't run Hadoop?!
#
NOHADOOP=0
FLAVOR=m1.medium
MFLAVOR=m1.large
if [ "x$TESTNOHADOOP" != "x" -a $TESTNOHADOOP -eq 1 ]; then
NOHADOOP=1
FLAVOR=m1.small
MFLAVOR=m1.small
$SSH $NETWORKMANAGER touch /var/tmp/NOHADOOP
fi
#
# Fire off VMs.
# NB: Be the tenant user when we fire off VMs.
#
t=0
while [ $t -lt $NTENANTS ]; do
. "$OURDIR/$UTENANT-$t-user-openrc.sh"
if [ ! $t -eq 0 ]; then
echo "*** Waiting for nodes to initialize before firing next round..."
sleep 60
fi
nova boot --image hadoop --flavor $MFLAVOR --nic net-id=$NETWORKID master
if [ ! $? -eq 0 ]; then
echo "ERROR: failed to create master VM; aborting!"
exit 1
fi
nova boot --image hadoop --flavor $FLAVOR --nic net-id=$NETWORKID resourcemanager
if [ ! $? -eq 0 ]; then
echo "ERROR: failed to create resourcemanager VM; aborting!"
exit 1
fi
if [ $FASTBOOT -eq 1 ]; then
nova boot --image hadoop --flavor $FLAVOR --nic net-id=$NETWORKID --min-count $NSLAVES slave
if [ ! $? -eq 0 ]; then
echo "ERROR: failed to create slave VMs; aborting!"
exit 1
fi
else
i=1
while [ $i -le $NSLAVES ]; do
nova boot --image hadoop --flavor $FLAVOR --nic net-id=$NETWORKID slave-$i
if [ ! $? -eq 0 ]; then
print "ERROR: failed to create slave-$i VM; aborting!"
exit 1
fi
i=`expr $i + 1`
done
fi
t=`expr $t + 1`
done
#
# Wait for all nodes to have booted, then wait 10 seconds more...
#
t=0
while [ $t -lt $NTENANTS ]; do
. "$OURDIR/$UTENANT-$t-user-openrc.sh"
allrunning=0
while [ $allrunning -eq 0 ]; do
echo "*** Still waiting for $UTENANT-$t nodes to reach Running state ..."
sleep 5
vsl=`nova list --limit -1 | awk ' / [0-9a-fA-F]*-[0-9a-fA-F-]* / { print $10 }' | xargs`
if [ ! $? -eq 0 ]; then
echo "Error listing nova VMs; will try again!"
continue
fi
allrunning=1
for status in $vsl ; do
if [ ! "x$status" = "xRunning" ]; then
allrunning=0
break
fi
done
done
t=`expr $t + 1`
done
#
# Hedge our bets and hope all have booted to network by this point :)
#
echo "*** Sleeping 16 seconds to let all VMs come online, hopefully ..."
sleep 16
#
# Grab the flow tables for all switches:
#
mkdir -p $TESTDIR/out.flowtables-post-node-boot $TESTDIR/err.flowtables-post-node-boot
$PSSH $PNHOSTS \
-o $TESTDIR/out.flowtables-post-node-boot \
-e $TESTDIR/err.flowtables-post-node-boot \
ovs-ofctl --protocol=OpenFlow13 dump-flows $BRIDGE
# NB: Be admin when we fire off wfas...
. "$OURDIR/admin-openrc.sh"
#
# Create the user wfa.
#
t=0
while [ $t -lt $NTENANTS ]; do
UTENANTID=`openstack project show $UTENANT-$t | awk ' / id / {print $4}'`
UTENANTWFANAME="$UTENANT-$t-hadoop"
neutron capnet-wfagent-create --tenant-id $UTENANTID --name $UTENANTWFANAME \
--master --wfapp-path /usr/bin/capnet-wfagent-user-tenant-hadoop-membrane \
--wfapp-args "broker_name=hadoop-$t" \
$NETWORK
if [ ! $? -eq 0 ]; then
echo "ERROR: creating user wfa; aborting!"
exit 1
fi
while [ 1 -eq 1 ]; do
UTENANTWFAID=`neutron capnet-wfagent-list | awk " / $UTENANTWFANAME / { print \\$2 }"`
if [ ! "x$UTENANTWFAID" = "x" ]; then
break
fi
done
t=`expr $t + 1`
done
#
# Wait for everything to finish, then sync back logfiles.
#
MULLOGFILE="/var/tmp/${BRIDGE}-mul-core.log"
CONTROLLERLOGFILE_SHORT="${BRIDGE}-controller.log"
CONTROLLERLOGFILE="/var/tmp/${CONTROLLERLOGFILE_SHORT}"
t=0
while [ $t -lt $NTENANTS ]; do
STENANTWFANAME="$STENANT-$t-hadoop"
UTENANTWFANAME="$UTENANT-$t-hadoop"
UTENANTWFAID=`neutron capnet-wfagent-list | awk " / $UTENANTWFANAME / { print \\$2 }"`
STENANTWFAID=`neutron capnet-wfagent-list | awk " / $STENANTWFANAME / { print \\$2 }"`
UWFALOGFILE_SHORT="wfagent.${UTENANTWFANAME}.${UTENANTWFAID}.log"
UWFALOGFILE="/var/tmp/${UWFALOGFILE_SHORT}"
SWFALOGFILE_SHORT="wfagent.${STENANTWFANAME}.${STENANTWFAID}.log"
SWFALOGFILE="/var/tmp/${SWFALOGFILE_SHORT}"
STATUS_UWFA=1
STATUS_SWFA=2
STAT_UWFA=""
STAT_SWFA=""
WAITED=0
while [ 1 -eq 1 ]; do
echo "*** Checking tenants $t..."
if [ ! $STATUS_UWFA -eq 0 ]; then
$SSH $NETWORKMANAGER \
grep -E '\(^DONE\$\)\|\(^Traceback\)' $UWFALOGFILE
STATUS_UWFA=$?
newstat=`$SSH $NETWORKMANAGER stat -c "'%s %y'" $UWFALOGFILE`
if [ "$newstat" != "$STAT_UWFA" ]; then
WAITED=0
echo "*** Progress in UWFA ($STAT_UWFA -> $newstat)";
STAT_UWFA="$newstat"
fi
fi
if [ ! $STATUS_SWFA -eq 0 ]; then
$SSH $NETWORKMANAGER \
grep -E "'(^Finished setting up Hadoop)|(^Traceback)'" $SWFALOGFILE
STATUS_SWFA=$?
newstat=`$SSH $NETWORKMANAGER stat -c "'%s %y'" $SWFALOGFILE`
if [ "$newstat" != "$STAT_SWFA" ]; then
WAITED=0
echo "*** Progress in SWFA ($STAT_SWFA -> $newstat)";
STAT_SWFA="$newstat"
fi
fi
if [ $STATUS_UWFA -eq 0 -a $STATUS_SWFA -eq 0 ]; then
break
fi
sleep 10
WAITED=`expr $WAITED + 10`
if [ $WFATIMEOUT -gt 0 -a $WAITED -ge $WFATIMEOUT ]; then
echo "*** ERROR: timeout, WFAs making no progress; aborting!"
break
fi
if [ -f $TESTDIR/SKIP ]; then
echo "*** ERROR: immediate skip triggered during test!"
break
fi
done
t=`expr $t + 1`
done
echo "*** Test has completed, fetching logs ..."
$SCP $NETWORKMANAGER:$CONTROLLERLOGFILE $TESTDIR/
$SCP $NETWORKMANAGER:$MULLOGFILE $TESTDIR/
$SCP $NETWORKMANAGER:/var/tmp/wfagent.\*.log $TESTDIR/
$SCP $NETWORKMANAGER:/var/tmp/cnc.metadata.\* $TESTDIR/
#
# Grab the flow tables for all switches again:
#
mkdir -p $TESTDIR/out.flowtables-post-hadoop $TESTDIR/err.flowtables-post-hadoop
$PSSH $PNHOSTS \
-o $TESTDIR/out.flowtables-post-hadoop \
-e $TESTDIR/err.flowtables-post-hadoop \
ovs-ofctl --protocol=OpenFlow13 dump-flows $BRIDGE
echo "MT Done ${TESTNAME}" \
| mailx -s "MT Done ${TESTNAME}" "$TESTEMAIL"
echo "*** Done ($TESTNAME); results in $TESTDIR"
exit 0
#!/bin/sh
#
# A script (untested, for archival purposes) that runs the commands used
# to generated the data for the OSDI 2016 submission.
#
set -x
DIRNAME=`dirname $0`
. "$DIRNAME/test-lib.sh"
. "$SETUPLIB"
HOMEDIR=`readlink -f ~`
TESTBASEDIRNAME="$HOMEDIR/capnet-tests"
TESTBASENAME="test"
NTENANTS=1
SIZES="100"
#SIZES="100 150"
#SIZES="50 100 150 200"
#SIZES="200 50 100 150"
ITERATIONS=5
UTENANT="tenant-0"
STENANT="service-0"
NETWORK="capnetlan-1-net"
BRIDGE="br-capnetlan-1"
SKIPSUCCESSFUL=1
if [ $# -ge 1 ]; then
TESTBASEDIRNAME="$1"
fi
if [ $# -ge 2 ]; then
TESTBASENAME="$2"
fi
if [ $# -ge 3 ]; then
NTENANTS=$3
fi
for size in $SIZES ; do
j=0
while [ $j -lt $ITERATIONS ]; do
j=`expr $j + 1`
testname="${TESTBASENAME}-${size}-${j}"
testdir="${TESTBASEDIRNAME}/${testname}"
if [ $SKIPSUCCESSFUL -eq 1 -a -f $TESTDIR/SUCCESS ]; then
echo "*** Not retesting Hadoop: $testname already Done successfully (results in $testdir)!"
exit 0
fi
if [ -f $TESTDIR/SKIP ]; then
echo "*** WARNING: immediate skip triggered before tests!"
exit 0
fi
#
# Clean out tenants and switches to make sure everything's
# clean. Sometimes a workflow agent doesn't get fully deleted,
# probably because the switch restart acts before the capnet
# agent can delete the wfagents. So we just add sleeps and do
# it all twice.
#
t=0
while [ $t -lt $NTENANTS ]; do
UTENANT="tenant-$t"
STENANT="service-$t"
echo "Cleaning out tenant $STENANT ..."
$DIRNAME/test-cleanup-tenant.sh $STENANT
sleep 4
echo "Cleaning out tenant $UTENANT ..."
$DIRNAME/test-cleanup-tenant.sh $UTENANT
sleep 4
t=`expr $t + 1`
done
echo "Cleaning out bridge $BRIDGE ..."
$DIRNAME/test-cleanup-sw-restart-controller.sh $BRIDGE
#
# Ensure all wfas are gone... XXX
#
. /root/setup/admin-openrc.sh
while [ 1 -eq 1 ]; do
wfas=`neutron capnet-wfagent-list --all-tenants | awk '/ / {print \$2}' | grep -v ^id`
if [ "x$wfas" = "x" ]; then
break
fi
for wfa in $wfas; do
neutron capnet-wfagent-delete $wfa
done
sleep 4
done
t=0
while [ $t -lt $NTENANTS ]; do
UTENANT="tenant-$t"
STENANT="service-$t"
echo "Cleaning out tenant $STENANT ..."
$DIRNAME/test-cleanup-tenant.sh $STENANT
sleep 4
echo "Cleaning out tenant $UTENANT ..."
$DIRNAME/test-cleanup-tenant.sh $UTENANT
sleep 4
t=`expr $t + 1`
done
echo "Cleaning out bridge $BRIDGE ..."
$DIRNAME/test-cleanup-sw-restart-controller.sh $BRIDGE
#
# Ok, run the test.
#
echo "Running test-$size-$j ..."
mkdir -p "$testdir"
cdir=`cwd`
cd $testdir
$DIRNAME/test-hadoop-mt.sh "$testdir" "$testname" tenant service $NTENANTS \
$NETWORK $BRIDGE $size | tee $testdir/test.log 2>&1
echo "Finished $testname (results in $testdir)..."
cd $cdir
done
done
echo "Done with $ITERATIONS iterations of test sizes $SIZES ."
exit 0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment