528 lines
15 KiB
Bash
Executable File
528 lines
15 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
# global variables
|
|
BatchMode=0
|
|
NoRun=0
|
|
JobCheckInterval=10 #sleep time between jobs finished check
|
|
InputString=""
|
|
RunString=""
|
|
ExecFileNames="" #string of executable file names used
|
|
TestDirNames="" #string of names of TEST_* directories used
|
|
HOST=`hostname|cut -c1-4` #first 4 characters of host platform name
|
|
|
|
function usage
|
|
{
|
|
printf "\n"
|
|
printf "$0 [options] {test_path}/{test_name}.sh\n"
|
|
printf "\n"
|
|
printf " where: {test_path} is the directory path to the test script;\n"
|
|
printf " {test_name} is a user defined name for the test script\n"
|
|
printf "\n"
|
|
printf " with options:\n"
|
|
printf " -h|-help prints this usage information and exits\n"
|
|
printf " -n|-norun turn off execute mode, echo what would be run\n"
|
|
printf " -t|-trace echo each command\n"
|
|
printf " -D <var> define <var> when running tests\n"
|
|
printf "\n"
|
|
printf " This is the hypre test driver script. It is run stand-alone\n"
|
|
printf " or by the autotest regression test script. It is assumed that\n"
|
|
printf " there are test directories test/TEST_{solver} that contain:\n"
|
|
printf " 1. Individual test scripts named {test_name}.jobs that provide\n"
|
|
printf " the mpirun execution syntax\n"
|
|
printf " 2. Test run output files named {test_name}.out.{number}\n"
|
|
printf " 3. Individual scripts to compare (usually using diff) output\n"
|
|
printf " files from corresponding {test_name}.jobs scripts\n"
|
|
printf "\n"
|
|
printf " Ideally, the *.jobs and *.sh scripts can be run as stand-alone\n"
|
|
printf " shell script files. A test is considered successful when there \n"
|
|
printf " are no error files generated by the *.sh scripts.\n"
|
|
printf "\n"
|
|
printf " NOTE: This script knows about most of the ASC machines\n"
|
|
printf " and will automatically use the Livermore Computing Resource\n"
|
|
printf " Management (LCRM) batch system as needed.\n"
|
|
printf "\n"
|
|
printf " Example usage: ./runtest.sh -t TEST_sstruct/*.sh\n"
|
|
printf "\n"
|
|
}
|
|
|
|
# generate default command based on the first 4 characters of the platform name
|
|
function MpirunString
|
|
{
|
|
case $HOST in
|
|
alc*) shift
|
|
RunString="srun -p pdebug -n$*"
|
|
;;
|
|
peng*) shift
|
|
RunString="srun -p pdebug -n$*"
|
|
;;
|
|
thun*) shift
|
|
RunString="srun -p pdebug -n$*"
|
|
;;
|
|
vert*) shift
|
|
RunString="srun -p pdebug -n$*"
|
|
;;
|
|
*bgl*) shift
|
|
BatchMode=1
|
|
MY_NUM_TASKS=$1
|
|
MY_EXECUTE_DIR=`pwd`
|
|
MY_EXECUTE_JOB=`pwd`/$EXECFILE
|
|
shift
|
|
shift
|
|
MY_ARGS="$*"
|
|
RunString="mpirun -verbose 1 -np $MY_NUM_TASKS -exe $MY_EXECUTE_JOB"
|
|
RunString="${RunString} -cwd $MY_EXECUTE_DIR -args \" $MY_ARGS \" "
|
|
;;
|
|
up*) CPUS_PER_NODE=8
|
|
POE_NUM_PROCS=$2
|
|
POE_NUM_NODES=`expr $POE_NUM_PROCS + $CPUS_PER_NODE - 1`
|
|
POE_NUM_NODES=`expr $POE_NUM_NODES / $CPUS_PER_NODE`
|
|
shift
|
|
shift
|
|
MY_ARGS="$*"
|
|
# RunString="poe $EXECFILE -rmpool pbatch -procs $POE_NUM_PROCS"
|
|
# RunString="${RunString} -nodes $POE_NUM_NODES $MY_ARGS"
|
|
RunString="poe $MY_ARGS -rmpool pdebug -procs $POE_NUM_PROCS -nodes $POE_NUM_NODES"
|
|
;;
|
|
zeus*) shift
|
|
RunString="srun -p pdebug -n$*"
|
|
;;
|
|
atla*) shift
|
|
RunString="srun -p pdebug -n$*"
|
|
;;
|
|
tux*) BatchMode=0
|
|
MACHINES_FILE="hostname"
|
|
if [ ! -f $MACHINES_FILE ] ; then
|
|
hostname > $MACHINES_FILE
|
|
fi
|
|
MPIRUN=`type mpirun|sed -e 's/^.* //'`
|
|
RunString="$MPIRUN -machinefile $MACHINES_FILE $*"
|
|
;;
|
|
*) MPIRUN=`type mpirun|sed -e 's/^.* //'`
|
|
RunString="$MPIRUN $*"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
# determine the "number of nodes" desired by dividing the "number of processes"
|
|
# by the "number of CPU's per node" which can't be determined dynamically (real
|
|
# ugly hack)
|
|
function CalcNodes
|
|
{
|
|
NUM_PROCS=1
|
|
NUM_NODES=1
|
|
CPUS_PER_NODE=1
|
|
case $HOST in
|
|
alc*) CPUS_PER_NODE=2
|
|
;;
|
|
peng*) CPUS_PER_NODE=2
|
|
;;
|
|
thun*) CPUS_PER_NODE=4
|
|
;;
|
|
*bgl*) CPUS_PER_NODE=2
|
|
;;
|
|
up*) CPUS_PER_NODE=8
|
|
;;
|
|
vert*) CPUS_PER_NODE=2
|
|
;;
|
|
zeus*) CPUS_PER_NODE=8
|
|
;;
|
|
*) CPUS_PER_NODE=1
|
|
;;
|
|
esac
|
|
|
|
while [ "$1" ]
|
|
do
|
|
case $1 in
|
|
-n*) NUM_PROCS=$2
|
|
NUM_NODES=`expr $NUM_PROCS + $CPUS_PER_NODE - 1`
|
|
NUM_NODES=`expr $NUM_NODES / $CPUS_PER_NODE`
|
|
return $NUM_NODES
|
|
;;
|
|
*) shift
|
|
;;
|
|
esac
|
|
done
|
|
return 1
|
|
}
|
|
|
|
# extract the "number of processes/task"
|
|
function CalcProcs
|
|
{
|
|
while [ "$1" ]
|
|
do
|
|
case $1 in
|
|
-n*) return $2
|
|
;;
|
|
*) shift
|
|
;;
|
|
esac
|
|
done
|
|
return 1
|
|
}
|
|
|
|
# determine if HOST machine can process batch queues
|
|
# set to run in debug pool unless batch MUST be used.
|
|
function CheckBatch
|
|
{
|
|
case $HOST in
|
|
alc*) BATCH_MODE=0
|
|
;;
|
|
peng*) BATCH_MODE=0
|
|
;;
|
|
thun*) BATCH_MODE=0
|
|
;;
|
|
*bgl*) BATCH_MODE=1
|
|
;;
|
|
up*) BATCH_MODE=0
|
|
;;
|
|
vert*) BATCH_MODE=0
|
|
;;
|
|
zeus*) BATCH_MODE=0
|
|
;;
|
|
*) BATCH_MODE=0
|
|
;;
|
|
esac
|
|
return $BATCH_MODE
|
|
}
|
|
|
|
# check the path to the executable if the executable exists; save the name to
|
|
# ExecFileNames
|
|
function CheckPath
|
|
{
|
|
while [ "$1" ]
|
|
do
|
|
case $1 in
|
|
-n*) EXECFILE=$3
|
|
if [ -x $StartDir/$EXECFILE ] ; then
|
|
cp -f $StartDir/$EXECFILE $EXECFILE
|
|
ExecFileNames="$ExecFileNames $EXECFILE"
|
|
return 0
|
|
else
|
|
echo "Cannot find executable!!!"
|
|
return 1
|
|
fi
|
|
return 0
|
|
;;
|
|
*) shift
|
|
;;
|
|
esac
|
|
done
|
|
return 1
|
|
}
|
|
|
|
# initialize the common part of the " PsubCmd" string, ugly global vars!
|
|
# global "RunName" is assumed to be predefined
|
|
#
|
|
# on ubgl, as of 8/2006, only allowable number of nodes are 32, 128 and
|
|
# multiples of 512
|
|
function PsubCmdStub
|
|
{
|
|
CalcNodes "$@"
|
|
NumNodes=$?
|
|
CalcProcs "$@"
|
|
NumProcs=$?
|
|
case $HOST in
|
|
alc*) PsubCmd="psub -c alc,pbatch -b casc -r $RunName -ln $NumProcs"
|
|
;;
|
|
peng*) PsubCmd="psub -c pengra,pbatch -b casc -r $RunName -ln $NumProcs"
|
|
;;
|
|
thun*) PsubCmd="psub -c thunder,pbatch -b casc -r $RunName -ln $NumNodes -g $NumProcs"
|
|
;;
|
|
ubgl*) PsubCmd="psub -c ubgl -pool pbatch -b science -r $RunName -ln 32"
|
|
;;
|
|
up*) PsubCmd="psub -c up -pool pbatch -b a_casc -r $RunName -ln $NumProcs"
|
|
;;
|
|
vert*) PsubCmd="psub -c vertex,pbatch -b casc -r $RunName -ln $NumProcs"
|
|
;;
|
|
zeus*) PsubCmd="psub -c zeus,pbatch -b casc -r $RunName -ln $NumProcs"
|
|
;;
|
|
*) PsubCmd="psub -b casc -r $RunName -ln $NumProcs"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
# read job file line by line saving arguments
|
|
function ExecuteJobs
|
|
{
|
|
StartDir=$1
|
|
WorkingDir=$2
|
|
InputFile=$3
|
|
ReturnFlag=0 # error return flag
|
|
BatchFlag=0 # #BATCH option detected flag
|
|
BatchCount=0 # different numbering for #Batch option
|
|
PrevPid=0
|
|
SavePWD=`pwd`
|
|
##
|
|
## move to specified directory
|
|
cd $WorkingDir
|
|
|
|
## open *.jobs files for reading
|
|
while read InputLine
|
|
do
|
|
case $InputLine in
|
|
"#BATCH"*) BatchFlag=1
|
|
BatchFile=""
|
|
;;
|
|
|
|
"#END"*) BatchFlag=0
|
|
chmod +x $BatchFile
|
|
PsubCmd="$PsubCmd -o $OutFile -e $ErrFile `pwd`/$BatchFile"
|
|
if [ "$NoRun" -eq 0 ] ; then
|
|
CmdReply=`$PsubCmd`
|
|
fi
|
|
PrevPid=`echo $CmdReply | cut -d \ -f 2`
|
|
while [ "`pstat | grep $PrevPid`" ]
|
|
do
|
|
sleep $JobCheckInterval
|
|
done
|
|
BatchFile=""
|
|
;;
|
|
|
|
*mpirun*)
|
|
RunCmd=`echo $InputLine| sed -e 's/^[ \t]*mpirun[ \t]*//'`
|
|
RunCmd=`echo $RunCmd | sed -e 's/[ \t]*>.*$//'`
|
|
OutFile=`echo $InputLine | sed -e 's/^.*>//'`
|
|
OutFile=`echo $OutFile | sed -e 's/ //g'`
|
|
ErrFile=`echo $OutFile | sed -e 's/\.out\./.err./'`
|
|
RunName=`echo $OutFile | sed -e 's/\.out.*$//'`
|
|
CheckPath $RunCmd # check path to executable
|
|
if [ "$?" -gt 0 ] ; then
|
|
cat >> $RunName.err <<- EOF
|
|
Executable doesn't exist command:
|
|
$InputLine
|
|
EOF
|
|
ReturnFlag=1
|
|
break
|
|
fi
|
|
MpirunString $RunCmd # construct "RunString"
|
|
case $HOST in
|
|
*bgl*) RunString="${RunString} > `pwd`/$OutFile 2>`pwd`/$ErrFile"
|
|
esac
|
|
if [ "$BatchMode" -eq 0 ] ; then
|
|
${RunString} > $OutFile 2> $ErrFile </dev/null
|
|
else
|
|
if [ "$BatchFlag" -eq 0 ] ; then
|
|
BatchFile=`echo $OutFile | sed -e 's/\.out\./.batch./'`
|
|
cat > $BatchFile <<- EOF
|
|
cd `pwd`
|
|
${RunString}
|
|
EOF
|
|
chmod +x $BatchFile
|
|
PsubCmdStub ${RunCmd}
|
|
case $HOST in
|
|
*bgl*) PsubCmd="$PsubCmd `pwd`/$BatchFile"
|
|
;;
|
|
*) PsubCmd="$PsubCmd -o $OutFile -e $ErrFile `pwd`/$BatchFile"
|
|
;;
|
|
esac
|
|
if [ "$NoRun" -eq 0 ] ; then
|
|
CmdReply=`$PsubCmd`
|
|
fi
|
|
PrevPid=`echo $CmdReply | cut -d \ -f 2`
|
|
while [ "`pstat | grep $PrevPid`" ]
|
|
do
|
|
sleep $JobCheckInterval
|
|
done
|
|
else # BatchFlag set
|
|
if [ "$BatchFile" -eq "" ] ; then
|
|
BatchFile=$InputFile.batch.$BatchCount
|
|
BatchCount=BatchCount+1
|
|
cat > $BatchFile <<- EOF
|
|
cd `pwd`
|
|
${RunString}
|
|
EOF
|
|
else
|
|
cat >> $BatchFile <<- EOF
|
|
${RunString}
|
|
EOF
|
|
fi
|
|
PsubCmdStub ${RunCmd} # construct a PsubCmd string
|
|
fi # BatchFlag set
|
|
fi # BatchMode set
|
|
;;
|
|
|
|
*"#"*) :
|
|
;;
|
|
|
|
*)
|
|
NOTBLANK=`echo $InputLine | sed 's/[ \n\t]//g'`
|
|
if [ "$NOTBLANK" ] ; then
|
|
echo "Found something unexpected in $WorkingDir/$InputFile.jobs"
|
|
echo "--> $InputLine"
|
|
exit 1
|
|
fi
|
|
;;
|
|
esac
|
|
done < $InputFile.jobs # done with open *.jobs file for reading
|
|
cd $SavePWD
|
|
return $ReturnFlag
|
|
}
|
|
|
|
# compare output files as defined in *.sh files
|
|
function ExecuteTest
|
|
{
|
|
StartDir=$1
|
|
WorkingDir=$2
|
|
InputFile=$3
|
|
SavePWD=`pwd`
|
|
cd $WorkingDir
|
|
(cat $InputFile.err.* > $InputFile.err)
|
|
(./$InputFile.sh >> $InputFile.err 2>> $InputFile.err)
|
|
cd $SavePWD
|
|
}
|
|
|
|
# report errors from PURIFY and/or INSURE if run
|
|
function PostProcess
|
|
{
|
|
StartDir=$1
|
|
WorkingDir=$2
|
|
InputFile=$3
|
|
SavePWD=`pwd`
|
|
cd $WorkingDir
|
|
if [ "$BatchMode" -eq 0 ] ; then
|
|
if [ -f purify.log ] ; then
|
|
mv purify.log $InputFile.purify.log
|
|
grep -i hypre_ $InputFile.purify.log >> $InputFile.err
|
|
elif [ -f insure.log ] ; then
|
|
if [ -f ~/insure.log ] ; then
|
|
cat ~/insure.log >> insure.log
|
|
rm -f ~/insure.log*
|
|
fi
|
|
mv insure.log $InputFile.insure.log
|
|
grep -i hypre_ $InputFile.insure.log >> $InputFile.err
|
|
fi
|
|
fi
|
|
cd $SavePWD
|
|
}
|
|
|
|
|
|
# removes executable and hostname files from all TEST_* directories
|
|
function CleanUp
|
|
{
|
|
if [ "$BatchMode" -eq 0 ] ; then
|
|
for i in $TestDirNames
|
|
do
|
|
for j in $ExecFileNames
|
|
do
|
|
ExecuteFile=$i/$j
|
|
if [ -x $ExecuteFile ] ; then
|
|
rm -f $ExecuteFile
|
|
rm -f hostname
|
|
fi
|
|
done
|
|
ExecuteFile=$i/hostname
|
|
if [ -f $ExecuteFile ] ; then
|
|
rm -f $ExecuteFile
|
|
fi
|
|
done
|
|
fi
|
|
}
|
|
|
|
# process files
|
|
function StartCrunch
|
|
{
|
|
rm -f ~/insure.log*
|
|
|
|
CheckBatch
|
|
BatchMode=$?
|
|
ExecuteJobs "$@"
|
|
ExecuteTest "$@"
|
|
PostProcess "$@"
|
|
}
|
|
|
|
#==========================================================================
|
|
#==========================================================================
|
|
|
|
# main
|
|
|
|
while [ "$*" ]
|
|
do
|
|
case $1 in
|
|
-h|-help)
|
|
usage
|
|
exit
|
|
;;
|
|
-n|-norun)
|
|
NoRun=1
|
|
shift
|
|
;;
|
|
-t|-trace)
|
|
set -xv
|
|
shift
|
|
;;
|
|
-D)
|
|
shift
|
|
eval export `echo $1`=1
|
|
shift
|
|
;;
|
|
*) InputString=$1
|
|
if [ "$InputString" ] ; then
|
|
if [ -r $InputString ] ; then
|
|
FilePart=`basename $InputString .sh`
|
|
DirPart=`dirname $InputString`
|
|
CurDir=`pwd`
|
|
TestDirNames="$TestDirNames $DirPart"
|
|
case $DirPart in
|
|
TEST_examples)
|
|
ExampleFiles="ex1 ex2 ex3 ex4 ex5 ex6 ex7 ex8 ex9"
|
|
BabelExampleFiles="ex5b ex5b77"
|
|
cd ../examples
|
|
for file in $ExampleFiles $BabelExampleFiles
|
|
do
|
|
if [ -x $file ]
|
|
then
|
|
cp -f $file $CurDir
|
|
fi
|
|
done
|
|
cd $CurDir
|
|
;;
|
|
esac
|
|
if [ -r $DirPart/$FilePart.jobs ] ; then
|
|
StartCrunch $CurDir $DirPart $FilePart
|
|
else
|
|
printf "%s: test command file %s/%s.jobs does not exist\n" \
|
|
$0 $DirPart $FilePart
|
|
exit 1
|
|
fi
|
|
else
|
|
printf "%s: test command file %s does not exist\n" \
|
|
$0 $InputString
|
|
printf "can not find .sh file\n"
|
|
exit 1
|
|
fi
|
|
else
|
|
printf "%s: Strange input parameter=%s\n" $0 $InputString
|
|
exit 1
|
|
fi
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
#
|
|
# remove exectutable files from TEST_* directories
|
|
CleanUp $TestDirNames $ExecFileNames
|
|
|
|
# Filter misleading error messages
|
|
cat > runtest.filters <<EOF
|
|
job [0-9]* queued and waiting for resources
|
|
job [0-9]* has been allocated resources
|
|
SLURMINFO: Job [0-9]* is pending allocation of resources.
|
|
ATTENTION: [0-9\-]* Couldn't create .*, job may not be checkpointable
|
|
ATTENTION: [0-9\-]* Error opening file
|
|
EOF
|
|
for dir in $TestDirNames
|
|
do
|
|
for errfile in $( find $dir -name "*.err" )
|
|
do
|
|
if (egrep -f runtest.filters $errfile > /dev/null) ; then
|
|
original=`dirname $errfile`/`basename $errfile .err`.fil
|
|
echo "This file contains the original copy of $errfile before filtering" > $original
|
|
cat $errfile >> $original
|
|
mv $errfile $errfile.tmp
|
|
egrep -v -f runtest.filters $errfile.tmp > $errfile
|
|
rm -f $errfile.tmp
|
|
fi
|
|
done
|
|
done
|
|
rm -f runtest.filters
|