2019-07-24 02:41:34 +08:00
|
|
|
#!/bin/sh
|
2022-04-06 07:19:51 +08:00
|
|
|
# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
|
2019-07-24 02:41:34 +08:00
|
|
|
# HYPRE Project Developers. See the top-level COPYRIGHT file for details.
|
|
|
|
|
#
|
|
|
|
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
|
|
|
|
|
|
|
|
|
testname=`basename $0 .sh`
|
|
|
|
|
|
|
|
|
|
# Echo usage information
|
|
|
|
|
case $1 in
|
|
|
|
|
-h|-help)
|
|
|
|
|
cat <<EOF
|
|
|
|
|
|
2021-06-11 02:10:13 +08:00
|
|
|
**** Only run this script on the lassen cluster ****
|
2019-07-24 02:41:34 +08:00
|
|
|
|
|
|
|
|
$0 [-h|-help] {src_dir}
|
|
|
|
|
|
|
|
|
|
where: -h|-help prints this usage information and exits
|
|
|
|
|
{src_dir} is the hypre source directory
|
|
|
|
|
|
2022-03-05 04:05:32 +08:00
|
|
|
This script runs a number of tests suitable for the lassen cluster.
|
2019-07-24 02:41:34 +08:00
|
|
|
|
|
|
|
|
Example usage: $0 ../src
|
|
|
|
|
|
|
|
|
|
EOF
|
|
|
|
|
exit
|
|
|
|
|
;;
|
|
|
|
|
esac
|
|
|
|
|
|
|
|
|
|
# Setup
|
|
|
|
|
test_dir=`pwd`
|
|
|
|
|
output_dir=`pwd`/$testname.dir
|
|
|
|
|
rm -fr $output_dir
|
|
|
|
|
mkdir -p $output_dir
|
|
|
|
|
src_dir=`cd $1; pwd`
|
|
|
|
|
shift
|
|
|
|
|
|
|
|
|
|
# Basic build and run tests
|
|
|
|
|
mo="-j test"
|
|
|
|
|
eo=""
|
2021-05-25 08:16:35 +08:00
|
|
|
|
2021-06-22 05:36:46 +08:00
|
|
|
rtol="0.0"
|
|
|
|
|
atol="3e-15"
|
|
|
|
|
|
2021-05-25 08:16:35 +08:00
|
|
|
#save=`echo $(hostname) | sed 's/[0-9]\+$//'`
|
|
|
|
|
save="lassen"
|
|
|
|
|
|
|
|
|
|
##########
|
|
|
|
|
## CUDA ##
|
|
|
|
|
##########
|
|
|
|
|
|
|
|
|
|
# CUDA with UM in debug mode [ij, ams, struct, sstruct]
|
2022-10-07 23:39:00 +08:00
|
|
|
co="--with-cuda --enable-unified-memory --enable-persistent --enable-debug --with-gpu-arch=70 --with-memory-tracker --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
2021-06-22 05:36:46 +08:00
|
|
|
ro="-ij-gpu -ams -struct -sstruct -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
|
|
|
|
|
eo="-gpu -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
|
2021-05-25 08:16:35 +08:00
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -eo: $eo
|
|
|
|
|
./renametest.sh basic $output_dir/basic-cuda-um
|
|
|
|
|
|
2021-06-11 02:10:13 +08:00
|
|
|
#CUDA with UM and mixed-int
|
2022-07-06 08:10:43 +08:00
|
|
|
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
2021-06-22 05:36:46 +08:00
|
|
|
ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
|
2021-06-11 02:10:13 +08:00
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
|
|
|
|
./renametest.sh basic $output_dir/basic-cuda-um-mixedint
|
|
|
|
|
|
2022-07-16 02:20:41 +08:00
|
|
|
# CUDA with UM with shared library
|
2022-07-06 08:10:43 +08:00
|
|
|
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
2022-07-16 02:20:41 +08:00
|
|
|
ro="-gpumemcheck -rt -mpibind -cudamemcheck -save ${save}"
|
|
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
2019-07-24 02:41:34 +08:00
|
|
|
./renametest.sh basic $output_dir/basic-cuda-um-shared
|
|
|
|
|
|
2022-03-05 04:05:32 +08:00
|
|
|
#CUDA with UM and single precision
|
2022-07-06 08:10:43 +08:00
|
|
|
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
2022-03-05 04:05:32 +08:00
|
|
|
ro="-single -rt -mpibind -save ${save}"
|
|
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
|
|
|
|
|
./renametest.sh basic $output_dir/basic-cuda-um-single
|
|
|
|
|
|
2021-05-25 08:16:35 +08:00
|
|
|
# CUDA with UM without MPI [no run]
|
2022-07-06 08:10:43 +08:00
|
|
|
#co="--with-cuda --enable-unified-memory --without-MPI --with-gpu-arch=70 --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
2019-07-24 02:41:34 +08:00
|
|
|
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
|
|
|
|
#./renametest.sh basic $output_dir/basic-cuda-um-without-MPI
|
|
|
|
|
|
2022-07-06 08:10:43 +08:00
|
|
|
# CUDA without UM with device memory pool [struct]
|
|
|
|
|
co="--with-cuda --enable-device-memory-pool --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
|
|
|
|
ro="-struct -rt -mpibind -save ${save}"
|
2021-05-25 08:16:35 +08:00
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
|
|
|
|
./renametest.sh basic $output_dir/basic-cuda-nonum
|
|
|
|
|
|
2022-07-06 08:10:43 +08:00
|
|
|
# CUDA without UM with umpire [benchmark]
|
|
|
|
|
UMPIRE_DIR=/usr/workspace/hypre/ext-libs/Umpire/2022.03.1-nvcc10.1.243-sm_70-xl2021.09.22
|
|
|
|
|
co="--with-cuda --with-gpu-arch=70 --with-umpire --with-umpire-include=${UMPIRE_DIR}/include --with-umpire-lib-dirs=${UMPIRE_DIR}/lib --with-umpire-libs=umpire --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
|
|
|
|
ro="-bench -rt -mpibind -save ${save}"
|
|
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
|
|
|
|
./renametest.sh basic $output_dir/basic-cuda-bench
|
|
|
|
|
|
2022-10-07 23:39:00 +08:00
|
|
|
# run on CPU
|
|
|
|
|
co="--with-cuda --with-test-using-host --with-memory-tracker --enable-debug --with-gpu-arch=70"
|
|
|
|
|
ro="-ij-noilu -ams -struct -sstruct -rt -mpibind -save lassen_cpu"
|
|
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
|
|
|
|
./renametest.sh basic $output_dir/basic-cuda-cpu
|
|
|
|
|
|
2021-05-25 08:16:35 +08:00
|
|
|
############
|
|
|
|
|
## OMP4.5 ##
|
|
|
|
|
############
|
|
|
|
|
|
|
|
|
|
# OMP 4.5 with UM with shared library [no run]
|
|
|
|
|
#co="--with-device-openmp --enable-unified-memory --enable-shared --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029:1500-030:1501-308\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029:1500-030:1501-308\\'"
|
|
|
|
|
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
|
|
|
|
#./renametest.sh basic $output_dir/basic-deviceomp-um-shared
|
|
|
|
|
|
|
|
|
|
# OMP 4.5 without UM in debug mode [struct]
|
2022-07-06 08:10:43 +08:00
|
|
|
co="--with-device-openmp --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
2022-03-05 04:05:32 +08:00
|
|
|
ro="-struct -rt -mpibind -save ${save}"
|
2021-05-25 08:16:35 +08:00
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
|
|
|
|
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
|
|
|
|
|
|
2022-02-09 22:40:57 +08:00
|
|
|
#####################################
|
|
|
|
|
## CUDA + CMake build (only) tests ##
|
|
|
|
|
#####################################
|
|
|
|
|
|
2021-05-20 04:39:57 +08:00
|
|
|
mo="-j"
|
2023-05-18 11:01:41 +08:00
|
|
|
|
2021-05-20 04:39:57 +08:00
|
|
|
# CUDA with UM + CMake
|
2021-05-25 08:16:35 +08:00
|
|
|
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
|
2021-05-20 04:39:57 +08:00
|
|
|
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
|
|
|
|
./renametest.sh cmake $output_dir/cmake-cuda-um-ij
|
|
|
|
|
|
|
|
|
|
# CUDA with UM [shared library] + CMake
|
|
|
|
|
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_WITH_OPENMP=ON -DHYPRE_ENABLE_HOPSCOTCH=ON -DHYPRE_ENABLE_SHARED=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029 "\'" -DHYPRE_CUDA_SM=70"
|
|
|
|
|
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
|
|
|
|
./renametest.sh cmake $output_dir/cmake-cuda-um-shared
|
|
|
|
|
|
|
|
|
|
# CUDA w.o UM + CMake
|
|
|
|
|
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
|
|
|
|
|
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
|
|
|
|
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
|
|
|
|
|
|
2022-05-25 04:27:32 +08:00
|
|
|
####################################
|
|
|
|
|
## latest CUDA build (only) tests ##
|
|
|
|
|
####################################
|
2022-03-05 04:05:32 +08:00
|
|
|
|
2023-05-18 11:01:41 +08:00
|
|
|
# CUDA 11
|
2022-02-09 22:40:57 +08:00
|
|
|
module -q load cuda/11
|
|
|
|
|
module list cuda/11 |& grep "None found"
|
2022-05-25 04:27:32 +08:00
|
|
|
|
2023-05-18 11:01:41 +08:00
|
|
|
mo="-j test"
|
|
|
|
|
|
2022-05-25 04:27:32 +08:00
|
|
|
# CUDA with UM with async malloc [no run]
|
2022-07-06 08:10:43 +08:00
|
|
|
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
2022-02-03 00:40:15 +08:00
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
|
|
|
|
./renametest.sh basic $output_dir/basic-cuda11
|
|
|
|
|
|
2023-05-18 11:01:41 +08:00
|
|
|
# CUDA 12
|
|
|
|
|
module -q load cuda/12
|
|
|
|
|
module list cuda/12 |& grep "None found"
|
|
|
|
|
module -q load gcc/8.3.1
|
|
|
|
|
|
|
|
|
|
rtol="1e-2"
|
|
|
|
|
atol="1e-6"
|
|
|
|
|
|
|
|
|
|
# CUDA with UM in debug mode [ij, ams, struct, sstruct]
|
|
|
|
|
co="--with-cuda --enable-unified-memory --enable-thrust-nosync --enable-debug --with-gpu-arch=70 CC=mpicc CXX=mpicxx"
|
|
|
|
|
ro="-ij-gpu -ams -struct -sstruct -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
|
|
|
|
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
|
|
|
|
./renametest.sh basic $output_dir/basic-cuda12
|
|
|
|
|
|
2019-07-24 02:41:34 +08:00
|
|
|
# Echo to stderr all nonempty error files in $output_dir
|
|
|
|
|
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
|
|
|
|
|
do
|
|
|
|
|
echo $errfile >&2
|
|
|
|
|
done
|
|
|
|
|
|