All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

Commit 4d0be5ac authored by David Johnson's avatar David Johnson

Skip successful tests; progress check and abort with no prog after 1800s.

parent a104f679
Pipeline #1900 passed with stage
in 2 seconds
......@@ -35,6 +35,14 @@ fi
# So don't do this for now by default.
#
FASTBOOT=0
#
# If WFAs don't show progress after this much time, abort.
#
WFATIMEOUT=1800
#
# If certain tests have already succeeded, just skip!
#
SKIPSUCCESSFUL=1
TESTDIR=$1
shift
......@@ -56,6 +64,11 @@ if [ $# -gt 0 ]; then
ARGS="$@"
fi
if [ $SKIPSUCCESSFUL -eq 1 -a -f $TESTDIR/SUCCESS ]; then
echo "*** Not retesting Hadoop: $TESTNAME already Done successfully (results in $TESTDIR)!"
exit 0
fi
#
# Create some parallel SSH command lines...
#
......@@ -203,21 +216,41 @@ SWFALOGFILE_SHORT="wfagent.${STENANTWFANAME}.${STENANTWFAID}.log"
SWFALOGFILE="/var/tmp/${SWFALOGFILE_SHORT}"
STATUS_UWFA=1
STATUS_SWFA=2
STAT_UWFA=""
STAT_SWFA=""
WAITED=0
while [ 1 -eq 1 ]; do
if [ ! $STATUS_UWFA -eq 0 ]; then
$SSH $NETWORKMANAGER \
grep -E '\(^DONE\$\)\|\(^Traceback\)' $UWFALOGFILE
STATUS_UWFA=$?
newstat=`$SSH $NETWORKMANAGER stat -c "'%s %y'" $UWFALOGFILE`
if [ "$newstat" != "$STAT_UWFA" ]; then
WAITED=0
echo "*** Progress in UWFA ($STAT_UWFA -> $newstat)";
STAT_UWFA="$newstat"
fi
fi
if [ ! $STATUS_SWFA -eq 0 ]; then
$SSH $NETWORKMANAGER \
grep -E "'(^Finished setting up Hadoop)|(^Traceback)'" $SWFALOGFILE
STATUS_SWFA=$?
newstat=`$SSH $NETWORKMANAGER stat -c "'%s %y'" $SWFALOGFILE`
if [ "$newstat" != "$STAT_SWFA" ]; then
WAITED=0
echo "*** Progress in SWFA ($STAT_SWFA -> $newstat)";
STAT_SWFA="$newstat"
fi
fi
if [ $STATUS_UWFA -eq 0 -a $STATUS_SWFA -eq 0 ]; then
break
fi
sleep 10
WAITED=`expr $WAITED + 10`
if [ $WFATIMEOUT -gt 0 -a $WAITED -ge $WFATIMEOUT ]; then
echo "*** ERROR: timeout, WFAs making no progress; aborting!"
break
fi
done
echo "*** Test has completed, fetching logs ..."
......@@ -262,6 +295,10 @@ if [ $SERR -eq 0 ]; then
else
SMSG=""
fi
NOPROGMSG=""
if [ $WFATIMEOUT -gt 0 -a $WAITED -ge $WFATIMEOUT ]; then
NOPROGMSG="No Progress"
fi
if [ $UERR -ne 0 -a $UDONE -eq 0 -a $SERR -ne 0 ]; then
touch $TESTDIR/SUCCESS
......@@ -277,7 +314,7 @@ else
# Controller logfiles get large; just send last 500 lines in attachment
TMPF=`mktemp`
tail -500 $TESTDIR/${CONTROLLERLOGFILE_SHORT} > $TMPF
echo "ERROR ${TESTNAME} $DONEMSG $UMSG $SMSG (UERR=$UERR UDONE=$UDONE SERR=$SERR)" \
echo "ERROR ${TESTNAME} $NOPROGMSG $DONEMSG $UMSG $SMSG (UERR=$UERR UDONE=$UDONE SERR=$SERR)" \
| mailx -s "ERROR ${TESTNAME} $DONEMSG $UMSG $SMSG" \
-a $TESTDIR/${UWFALOGFILE_SHORT} -a $TESTDIR/${SWFALOGFILE_SHORT} \
-a $TMPF "$TESTEMAIL"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment