fix cuda 11 build (#569)
This PR fixes CUDA 11 build after merging #549, also adds regression tests (build only) with CUDA 11.
This commit is contained in:
parent
a7bb784a45
commit
790e8e7826
@ -93,7 +93,9 @@ ro="-struct -rt -mpibind -save ${host}"
|
|||||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||||
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
|
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
|
||||||
|
|
||||||
|
#################################
|
||||||
# CUDA + CMake build (only) tests
|
# CUDA + CMake build (only) tests
|
||||||
|
#################################
|
||||||
mo="-j"
|
mo="-j"
|
||||||
# CUDA with UM + CMake
|
# CUDA with UM + CMake
|
||||||
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
|
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
|
||||||
@ -110,6 +112,14 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
|
|||||||
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
||||||
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
|
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
|
||||||
|
|
||||||
|
############################
|
||||||
|
# CUDA 11 build (only) tests
|
||||||
|
############################
|
||||||
|
co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
module load cuda/11
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11
|
||||||
|
|
||||||
# Echo to stderr all nonempty error files in $output_dir
|
# Echo to stderr all nonempty error files in $output_dir
|
||||||
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
|
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
|
||||||
do
|
do
|
||||||
|
|||||||
@ -93,7 +93,9 @@ ro="-struct -rt -mpibind -save ${host}"
|
|||||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||||
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
|
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
|
||||||
|
|
||||||
|
#################################
|
||||||
# CUDA + CMake build (only) tests
|
# CUDA + CMake build (only) tests
|
||||||
|
#################################
|
||||||
mo="-j"
|
mo="-j"
|
||||||
# CUDA with UM + CMake
|
# CUDA with UM + CMake
|
||||||
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
|
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
|
||||||
@ -110,6 +112,14 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
|
|||||||
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
||||||
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
|
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
|
||||||
|
|
||||||
|
############################
|
||||||
|
# CUDA 11 build (only) tests
|
||||||
|
############################
|
||||||
|
co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
module load cuda/11
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11
|
||||||
|
|
||||||
# Echo to stderr all nonempty error files in $output_dir
|
# Echo to stderr all nonempty error files in $output_dir
|
||||||
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
|
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
|
||||||
do
|
do
|
||||||
|
|||||||
@ -684,6 +684,13 @@ AS_HELP_STRING([--with-extra-CXXFLAGS=ARG],
|
|||||||
[EXTRA_CXXFLAGS=$withval]
|
[EXTRA_CXXFLAGS=$withval]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
AC_ARG_WITH(extra-CUFLAGS,
|
||||||
|
AS_HELP_STRING([--with-extra-CUFLAGS=ARG],
|
||||||
|
[Define extra CUDA compile flag, where ARG is a space-separated
|
||||||
|
list (enclosed in quotes) of directories.]),
|
||||||
|
[EXTRA_CUFLAGS=$withval]
|
||||||
|
)
|
||||||
|
|
||||||
AC_ARG_WITH(extra-BUILDFLAGS,
|
AC_ARG_WITH(extra-BUILDFLAGS,
|
||||||
AS_HELP_STRING([--with-extra-BUILDFLAGS=ARG],
|
AS_HELP_STRING([--with-extra-BUILDFLAGS=ARG],
|
||||||
[Define extra library build flag, where ARG is a space-separated
|
[Define extra library build flag, where ARG is a space-separated
|
||||||
@ -2346,7 +2353,7 @@ AS_IF([test x"$hypre_using_sycl" == x"yes"],
|
|||||||
AC_DEFINE(HYPRE_USING_SYCL, 1, [SYCL being used])
|
AC_DEFINE(HYPRE_USING_SYCL, 1, [SYCL being used])
|
||||||
|
|
||||||
dnl (Ab)Using CUCC when compiling HIP
|
dnl (Ab)Using CUCC when compiling HIP
|
||||||
LINK_CC=${CUCC}
|
LINK_CC=${CUCC}
|
||||||
LINK_CXX=${CUCC}
|
LINK_CXX=${CUCC}
|
||||||
|
|
||||||
SYCLFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel"
|
SYCLFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel"
|
||||||
@ -2370,7 +2377,7 @@ AS_IF([test x"$hypre_using_sycl" == x"yes"],
|
|||||||
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} ${MKLROOT}/lib/intel64/libmkl_sycl.a -Wl,-export-dynamic -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_sequential.a ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -lsycl -lOpenCL -lpthread -lm -ldl"
|
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} ${MKLROOT}/lib/intel64/libmkl_sycl.a -Wl,-export-dynamic -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_sequential.a ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -lsycl -lOpenCL -lpthread -lm -ldl"
|
||||||
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include"
|
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include"
|
||||||
])
|
])
|
||||||
|
|
||||||
AS_IF([test x"$hypre_using_onemklsparse" == x"yes"], [AC_DEFINE(HYPRE_USING_ONEMKLSPARSE, 1, [onemkl::SPARSE being used])])
|
AS_IF([test x"$hypre_using_onemklsparse" == x"yes"], [AC_DEFINE(HYPRE_USING_ONEMKLSPARSE, 1, [onemkl::SPARSE being used])])
|
||||||
|
|
||||||
AS_IF([test x"$hypre_using_onemklblas" == x"yes"], [AC_DEFINE(HYPRE_USING_ONEMKLBLAS, 1, [onemkl::BLAS being used])])
|
AS_IF([test x"$hypre_using_onemklblas" == x"yes"], [AC_DEFINE(HYPRE_USING_ONEMKLBLAS, 1, [onemkl::BLAS being used])])
|
||||||
@ -2510,6 +2517,8 @@ then
|
|||||||
AC_DEFINE([HYPRE_WITH_GPU_AWARE_MPI],1,[Define to 1 if using GPU aware MPI])
|
AC_DEFINE([HYPRE_WITH_GPU_AWARE_MPI],1,[Define to 1 if using GPU aware MPI])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
CUFLAGS="${CUFLAGS} ${EXTRA_CUFLAGS}"
|
||||||
|
|
||||||
dnl *********************************************************************
|
dnl *********************************************************************
|
||||||
dnl * Set installation directories
|
dnl * Set installation directories
|
||||||
dnl *********************************************************************
|
dnl *********************************************************************
|
||||||
|
|||||||
17
src/configure
vendored
17
src/configure
vendored
@ -799,6 +799,7 @@ with_LD
|
|||||||
with_LDFLAGS
|
with_LDFLAGS
|
||||||
with_extra_CFLAGS
|
with_extra_CFLAGS
|
||||||
with_extra_CXXFLAGS
|
with_extra_CXXFLAGS
|
||||||
|
with_extra_CUFLAGS
|
||||||
with_extra_BUILDFLAGS
|
with_extra_BUILDFLAGS
|
||||||
with_extra_incpath
|
with_extra_incpath
|
||||||
with_extra_ldpath
|
with_extra_ldpath
|
||||||
@ -1541,6 +1542,10 @@ Optional Packages:
|
|||||||
Define extra C++ compile flag, where ARG is a
|
Define extra C++ compile flag, where ARG is a
|
||||||
space-separated list (enclosed in quotes) of
|
space-separated list (enclosed in quotes) of
|
||||||
directories.
|
directories.
|
||||||
|
--with-extra-CUFLAGS=ARG
|
||||||
|
Define extra CUDA compile flag, where ARG is a
|
||||||
|
space-separated list (enclosed in quotes) of
|
||||||
|
directories.
|
||||||
--with-extra-BUILDFLAGS=ARG
|
--with-extra-BUILDFLAGS=ARG
|
||||||
Define extra library build flag, where ARG is a
|
Define extra library build flag, where ARG is a
|
||||||
space-separated list (enclosed in quotes) of
|
space-separated list (enclosed in quotes) of
|
||||||
@ -3404,6 +3409,14 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Check whether --with-extra-CUFLAGS was given.
|
||||||
|
if test "${with_extra_CUFLAGS+set}" = set; then :
|
||||||
|
withval=$with_extra_CUFLAGS; EXTRA_CUFLAGS=$withval
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Check whether --with-extra-BUILDFLAGS was given.
|
# Check whether --with-extra-BUILDFLAGS was given.
|
||||||
if test "${with_extra_BUILDFLAGS+set}" = set; then :
|
if test "${with_extra_BUILDFLAGS+set}" = set; then :
|
||||||
withval=$with_extra_BUILDFLAGS; EXTRA_BUILDFLAGS=$withval
|
withval=$with_extra_BUILDFLAGS; EXTRA_BUILDFLAGS=$withval
|
||||||
@ -9184,7 +9197,7 @@ $as_echo "#define HYPRE_USING_GPU 1" >>confdefs.h
|
|||||||
$as_echo "#define HYPRE_USING_SYCL 1" >>confdefs.h
|
$as_echo "#define HYPRE_USING_SYCL 1" >>confdefs.h
|
||||||
|
|
||||||
|
|
||||||
LINK_CC=${CUCC}
|
LINK_CC=${CUCC}
|
||||||
LINK_CXX=${CUCC}
|
LINK_CXX=${CUCC}
|
||||||
|
|
||||||
SYCLFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel"
|
SYCLFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel"
|
||||||
@ -9398,6 +9411,8 @@ $as_echo "#define HYPRE_WITH_GPU_AWARE_MPI 1" >>confdefs.h
|
|||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
CUFLAGS="${CUFLAGS} ${EXTRA_CUFLAGS}"
|
||||||
|
|
||||||
HYPRE_INSTALLDIR="${prefix}"
|
HYPRE_INSTALLDIR="${prefix}"
|
||||||
HYPRE_LIBINSTALL="${libdir}"
|
HYPRE_LIBINSTALL="${libdir}"
|
||||||
HYPRE_INCINSTALL="${includedir}"
|
HYPRE_INCINSTALL="${includedir}"
|
||||||
|
|||||||
@ -13,6 +13,7 @@
|
|||||||
|
|
||||||
#include "seq_mv.h"
|
#include "seq_mv.h"
|
||||||
#include "_hypre_utilities.hpp"
|
#include "_hypre_utilities.hpp"
|
||||||
|
#include "seq_mv.hpp"
|
||||||
|
|
||||||
#if defined(HYPRE_USING_GPU)
|
#if defined(HYPRE_USING_GPU)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user