diff --git a/AUTOTEST/machine-lassen.sh b/AUTOTEST/machine-lassen.sh index 121e60f34..7e3cabdc4 100755 --- a/AUTOTEST/machine-lassen.sh +++ b/AUTOTEST/machine-lassen.sh @@ -93,7 +93,9 @@ ro="-struct -rt -mpibind -save ${host}" ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro ./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct +################################# # CUDA + CMake build (only) tests +################################# mo="-j" # CUDA with UM + CMake co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70" @@ -110,6 +112,14 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_ ./test.sh cmake.sh $src_dir -co: $co -mo: $mo ./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct +############################ +# CUDA 11 build (only) tests +############################ +co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" +module load cuda/11 +./test.sh basic.sh $src_dir -co: $co -mo: $mo +./renametest.sh basic $output_dir/basic-cuda11 + # Echo to stderr all nonempty error files in $output_dir for errfile in $( find $output_dir ! -size 0 -name "*.err" ) do diff --git a/AUTOTEST/machine-ray.sh b/AUTOTEST/machine-ray.sh index 9af1d413f..745208f2d 100755 --- a/AUTOTEST/machine-ray.sh +++ b/AUTOTEST/machine-ray.sh @@ -93,7 +93,9 @@ ro="-struct -rt -mpibind -save ${host}" ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro ./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct +################################# # CUDA + CMake build (only) tests +################################# mo="-j" # CUDA with UM + CMake co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70" @@ -110,6 +112,14 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_ ./test.sh cmake.sh $src_dir -co: $co -mo: $mo ./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct +############################ +# CUDA 11 build (only) tests +############################ +co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" +module load cuda/11 +./test.sh basic.sh $src_dir -co: $co -mo: $mo +./renametest.sh basic $output_dir/basic-cuda11 + # Echo to stderr all nonempty error files in $output_dir for errfile in $( find $output_dir ! -size 0 -name "*.err" ) do diff --git a/src/config/configure.in b/src/config/configure.in index e53d2beca..59a8dc805 100644 --- a/src/config/configure.in +++ b/src/config/configure.in @@ -684,6 +684,13 @@ AS_HELP_STRING([--with-extra-CXXFLAGS=ARG], [EXTRA_CXXFLAGS=$withval] ) +AC_ARG_WITH(extra-CUFLAGS, +AS_HELP_STRING([--with-extra-CUFLAGS=ARG], + [Define extra CUDA compile flag, where ARG is a space-separated + list (enclosed in quotes) of directories.]), +[EXTRA_CUFLAGS=$withval] +) + AC_ARG_WITH(extra-BUILDFLAGS, AS_HELP_STRING([--with-extra-BUILDFLAGS=ARG], [Define extra library build flag, where ARG is a space-separated @@ -2346,7 +2353,7 @@ AS_IF([test x"$hypre_using_sycl" == x"yes"], AC_DEFINE(HYPRE_USING_SYCL, 1, [SYCL being used]) dnl (Ab)Using CUCC when compiling HIP - LINK_CC=${CUCC} + LINK_CC=${CUCC} LINK_CXX=${CUCC} SYCLFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel" @@ -2370,7 +2377,7 @@ AS_IF([test x"$hypre_using_sycl" == x"yes"], HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} ${MKLROOT}/lib/intel64/libmkl_sycl.a -Wl,-export-dynamic -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_sequential.a ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -lsycl -lOpenCL -lpthread -lm -ldl" HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include" ]) - + AS_IF([test x"$hypre_using_onemklsparse" == x"yes"], [AC_DEFINE(HYPRE_USING_ONEMKLSPARSE, 1, [onemkl::SPARSE being used])]) AS_IF([test x"$hypre_using_onemklblas" == x"yes"], [AC_DEFINE(HYPRE_USING_ONEMKLBLAS, 1, [onemkl::BLAS being used])]) @@ -2510,6 +2517,8 @@ then AC_DEFINE([HYPRE_WITH_GPU_AWARE_MPI],1,[Define to 1 if using GPU aware MPI]) fi +CUFLAGS="${CUFLAGS} ${EXTRA_CUFLAGS}" + dnl ********************************************************************* dnl * Set installation directories dnl ********************************************************************* diff --git a/src/configure b/src/configure index c77c3a11c..d57d55327 100755 --- a/src/configure +++ b/src/configure @@ -799,6 +799,7 @@ with_LD with_LDFLAGS with_extra_CFLAGS with_extra_CXXFLAGS +with_extra_CUFLAGS with_extra_BUILDFLAGS with_extra_incpath with_extra_ldpath @@ -1541,6 +1542,10 @@ Optional Packages: Define extra C++ compile flag, where ARG is a space-separated list (enclosed in quotes) of directories. + --with-extra-CUFLAGS=ARG + Define extra CUDA compile flag, where ARG is a + space-separated list (enclosed in quotes) of + directories. --with-extra-BUILDFLAGS=ARG Define extra library build flag, where ARG is a space-separated list (enclosed in quotes) of @@ -3404,6 +3409,14 @@ fi +# Check whether --with-extra-CUFLAGS was given. +if test "${with_extra_CUFLAGS+set}" = set; then : + withval=$with_extra_CUFLAGS; EXTRA_CUFLAGS=$withval + +fi + + + # Check whether --with-extra-BUILDFLAGS was given. if test "${with_extra_BUILDFLAGS+set}" = set; then : withval=$with_extra_BUILDFLAGS; EXTRA_BUILDFLAGS=$withval @@ -9184,7 +9197,7 @@ $as_echo "#define HYPRE_USING_GPU 1" >>confdefs.h $as_echo "#define HYPRE_USING_SYCL 1" >>confdefs.h - LINK_CC=${CUCC} + LINK_CC=${CUCC} LINK_CXX=${CUCC} SYCLFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel" @@ -9398,6 +9411,8 @@ $as_echo "#define HYPRE_WITH_GPU_AWARE_MPI 1" >>confdefs.h fi +CUFLAGS="${CUFLAGS} ${EXTRA_CUFLAGS}" + HYPRE_INSTALLDIR="${prefix}" HYPRE_LIBINSTALL="${libdir}" HYPRE_INCINSTALL="${includedir}" diff --git a/src/seq_mv/csr_matvec_device.c b/src/seq_mv/csr_matvec_device.c index fc681d475..737dcbc8f 100644 --- a/src/seq_mv/csr_matvec_device.c +++ b/src/seq_mv/csr_matvec_device.c @@ -13,6 +13,7 @@ #include "seq_mv.h" #include "_hypre_utilities.hpp" +#include "seq_mv.hpp" #if defined(HYPRE_USING_GPU)