Add cuSOLVER and rocSOLVER support (#840)

Add cuSOLVER and rocSOLVER support to configure (autotools build) and add macro calls for functions defined in these libraries.
This commit is contained in:
Victor A. Paludetto Magri 2023-03-17 12:04:07 -04:00 committed by GitHub
parent 2ec3effbde
commit 2a61eb41ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 285 additions and 57 deletions

View File

@ -175,7 +175,7 @@
/* Define to 1 if using cuRAND */ /* Define to 1 if using cuRAND */
#undef HYPRE_USING_CURAND #undef HYPRE_USING_CURAND
/* Define to 1 if using cuSolver */ /* Define to 1 if using cuSOLVER */
#undef HYPRE_USING_CUSOLVER #undef HYPRE_USING_CUSOLVER
/* Define to 1 if using cuSPARSE */ /* Define to 1 if using cuSPARSE */
@ -253,6 +253,9 @@
/* rocRAND being used */ /* rocRAND being used */
#undef HYPRE_USING_ROCRAND #undef HYPRE_USING_ROCRAND
/* rocSOLVER being used */
#undef HYPRE_USING_ROCSOLVER
/* rocSPARSE being used */ /* rocSPARSE being used */
#undef HYPRE_USING_ROCSPARSE #undef HYPRE_USING_ROCSPARSE

View File

@ -204,6 +204,7 @@ dnl *********************************************************************
hypre_using_hip=no hypre_using_hip=no
hypre_using_rocsparse=no hypre_using_rocsparse=no
hypre_using_rocblas=no hypre_using_rocblas=no
hypre_using_rocsolver=no
hypre_using_rocrand=no hypre_using_rocrand=no
hypre_found_hip=no hypre_found_hip=no
@ -1141,13 +1142,17 @@ AS_HELP_STRING([--with-hip],
AC_ARG_ENABLE(rocsparse, AC_ARG_ENABLE(rocsparse,
AS_HELP_STRING([--enable-rocsparse], AS_HELP_STRING([--enable-rocsparse],
[Use rocSPARSE (default is YES).]), [Use rocSPARSE (default is YES with HIP, otherwise NO).]),
[case "${enableval}" in [case "${enableval}" in
yes) hypre_using_rocsparse=yes ;; yes) hypre_using_rocsparse=yes ;;
no) hypre_using_rocsparse=no ;; no) hypre_using_rocsparse=no ;;
*) hypre_using_rocsparse=yes ;; *) hypre_using_rocsparse=yes ;;
esac], esac],
[hypre_using_rocsparse=yes] [case "$hypre_using_hip" in
yes) hypre_using_rocsparse=yes ;;
no) hypre_using_rocsparse=no ;;
*) hypre_using_rocsparse=no ;;
esac]
) )
AC_ARG_ENABLE(rocblas, AC_ARG_ENABLE(rocblas,
@ -1161,15 +1166,30 @@ AS_HELP_STRING([--enable-rocblas],
[hypre_using_rocblas=no] [hypre_using_rocblas=no]
) )
AC_ARG_ENABLE(rocsolver,
AS_HELP_STRING([--enable-rocsolver],
[Use rocSOLVER (default is NO).]),
[case "${enableval}" in
yes) hypre_using_rocsolver=yes; hypre_using_rocblas=yes ;;
no) hypre_using_rocsolver=no ;;
*) hypre_using_rocsolver=no ;;
esac],
[hypre_using_rocsolver=no]
)
AC_ARG_ENABLE(rocrand, AC_ARG_ENABLE(rocrand,
AS_HELP_STRING([--enable-rocrand], AS_HELP_STRING([--enable-rocrand],
[Use rocRAND (default is YES).]), [Use rocRAND (default is YES with HIP, otherwise NO).]),
[case "${enableval}" in [case "${enableval}" in
yes) hypre_using_rocrand=yes ;; yes) hypre_using_rocrand=yes ;;
no) hypre_using_rocrand=no ;; no) hypre_using_rocrand=no ;;
*) hypre_using_rocrand=yes ;; *) hypre_using_rocrand=yes ;;
esac], esac],
[hypre_using_rocrand=yes] [case "$hypre_using_hip" in
yes) hypre_using_rocrand=yes ;;
no) hypre_using_rocrand=no ;;
*) hypre_using_rocrand=no ;;
esac]
) )
dnl ***** SYCL dnl ***** SYCL
@ -1212,7 +1232,7 @@ AS_HELP_STRING([--with-gpu-arch=ARG],
AC_ARG_ENABLE(cublas, AC_ARG_ENABLE(cublas,
AS_HELP_STRING([--enable-cublas], AS_HELP_STRING([--enable-cublas],
[Use cuBLAS (default is YES with CUDA, otherwise is NO).]), [Use cuBLAS (default is YES with CUDA, otherwise NO).]),
[case "${enableval}" in [case "${enableval}" in
yes) hypre_using_cublas=yes ;; yes) hypre_using_cublas=yes ;;
no) hypre_using_cublas=no ;; no) hypre_using_cublas=no ;;
@ -1227,7 +1247,7 @@ AS_HELP_STRING([--enable-cublas],
AC_ARG_ENABLE(curand, AC_ARG_ENABLE(curand,
AS_HELP_STRING([--enable-curand], AS_HELP_STRING([--enable-curand],
[Use cuRAND (default is YES with CUDA, otherwise is NO).]), [Use cuRAND (default is YES with CUDA, otherwise NO).]),
[case "${enableval}" in [case "${enableval}" in
yes) hypre_using_curand=yes ;; yes) hypre_using_curand=yes ;;
no) hypre_using_curand=no ;; no) hypre_using_curand=no ;;
@ -1242,7 +1262,7 @@ AS_HELP_STRING([--enable-curand],
AC_ARG_ENABLE(cuda-streams, AC_ARG_ENABLE(cuda-streams,
AS_HELP_STRING([--enable-cuda-streams], AS_HELP_STRING([--enable-cuda-streams],
[Use CUDA streams (default is YES with CUDA, otherwise is NO).]), [Use CUDA streams (default is YES with CUDA, otherwise NO).]),
[case "${enableval}" in [case "${enableval}" in
yes) hypre_using_cuda_streams=yes ;; yes) hypre_using_cuda_streams=yes ;;
no) hypre_using_cuda_streams=no ;; no) hypre_using_cuda_streams=no ;;
@ -1257,7 +1277,7 @@ AS_HELP_STRING([--enable-cuda-streams],
AC_ARG_ENABLE(cusparse, AC_ARG_ENABLE(cusparse,
AS_HELP_STRING([--enable-cusparse], AS_HELP_STRING([--enable-cusparse],
[Use cuSPARSE (default is YES with CUDA, otherwise is NO).]), [Use cuSPARSE (default is YES with CUDA, otherwise NO).]),
[case "${enableval}" in [case "${enableval}" in
yes) hypre_using_cusparse=yes ;; yes) hypre_using_cusparse=yes ;;
no) hypre_using_cusparse=no ;; no) hypre_using_cusparse=no ;;
@ -2447,7 +2467,7 @@ then
if test "$hypre_using_cusolver" = "yes" if test "$hypre_using_cusolver" = "yes"
then then
AC_DEFINE(HYPRE_USING_CUSOLVER, 1, [Define to 1 if using cuSolver]) AC_DEFINE(HYPRE_USING_CUSOLVER, 1, [Define to 1 if using cuSOLVER])
HYPRE_CUDA_LIBS+=" -lcusolver" HYPRE_CUDA_LIBS+=" -lcusolver"
fi fi
@ -2539,26 +2559,31 @@ AS_IF([test x"$hypre_using_hip" == x"yes"],
dnl rocSPARSE, for things like dcsrmv on AMD GPUs dnl rocSPARSE, for things like dcsrmv on AMD GPUs
AS_IF([test x"$hypre_using_rocsparse" == x"yes"], AS_IF([test x"$hypre_using_rocsparse" == x"yes"],
[AC_DEFINE(HYPRE_USING_ROCSPARSE, 1, [rocSPARSE being used]) [AC_DEFINE(HYPRE_USING_ROCSPARSE, 1, [rocSPARSE being used])
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocsparse" HYPRE_HIP_LIBS+=" -lrocsparse"
]) ])
dnl Note rocSPARSE requires rocBLAS, so this is only controlling dnl rocBLAS: basic linear algebra operations on AMD GPUs
dnl whether HYPRE explicitly uses rocBLAS in other places or not.
dnl So we don't need to add any extra libs or anything.
AS_IF([test x"$hypre_using_rocblas" == x"yes"], AS_IF([test x"$hypre_using_rocblas" == x"yes"],
[AC_DEFINE(HYPRE_USING_ROCBLAS, 1, [rocBLAS being used]) [AC_DEFINE(HYPRE_USING_ROCBLAS, 1, [rocBLAS being used])
HYPRE_HIP_LIBS+=" -lrocblas"
]) ])
dnl rocSOLVER: dense linear solvers on AMD GPUs (requires rocBLAS)
AS_IF([test x"$hypre_using_rocsolver" == x"yes"],
[AC_DEFINE(HYPRE_USING_ROCSOLVER, 1, [rocSOLVER being used])
HYPRE_HIP_LIBS+=" -lrocsolver"
])
dnl rocRAND: random number generation on AMD GPUs dnl rocRAND: random number generation on AMD GPUs
AS_IF([test x"$hypre_using_rocrand" == x"yes"], AS_IF([test x"$hypre_using_rocrand" == x"yes"],
[AC_DEFINE(HYPRE_USING_ROCRAND, 1, [rocRAND being used]) [AC_DEFINE(HYPRE_USING_ROCRAND, 1, [rocRAND being used])
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocrand" HYPRE_HIP_LIBS+=" -lrocrand"
]) ])
dnl rocTX tracing API dnl rocTX tracing API
AS_IF([test x"$hypre_using_gpu_profiling" == x"yes"], AS_IF([test x"$hypre_using_gpu_profiling" == x"yes"],
[AC_DEFINE(HYPRE_USING_ROCTX, 1, [Define to 1 if using AMD rocTX profiling]) [AC_DEFINE(HYPRE_USING_ROCTX, 1, [Define to 1 if using AMD rocTX profiling])
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lroctx64" HYPRE_HIP_LIBS+=" -lroctx64"
]) ])
AS_IF([test x"$hypre_using_cuda_streams" == x"yes"], AS_IF([test x"$hypre_using_cuda_streams" == x"yes"],

62
src/configure vendored
View File

@ -859,6 +859,7 @@ with_cuda
with_hip with_hip
enable_rocsparse enable_rocsparse
enable_rocblas enable_rocblas
enable_rocsolver
enable_rocrand enable_rocrand
with_sycl with_sycl
with_cuda_home with_cuda_home
@ -1562,16 +1563,16 @@ Optional Features:
Use device async malloc (default is NO). Use device async malloc (default is NO).
--enable-gpu-profiling Use NVTX on CUDA, rocTX on HIP (default is NO). --enable-gpu-profiling Use NVTX on CUDA, rocTX on HIP (default is NO).
--enable-gpu-aware-mpi Use GPU memory aware MPI --enable-gpu-aware-mpi Use GPU memory aware MPI
--enable-rocsparse Use rocSPARSE (default is YES). --enable-rocsparse Use rocSPARSE (default is YES with HIP, otherwise
NO).
--enable-rocblas Use rocBLAS (default is NO). --enable-rocblas Use rocBLAS (default is NO).
--enable-rocrand Use rocRAND (default is YES). --enable-rocsolver Use rocSOLVER (default is NO).
--enable-cublas Use cuBLAS (default is YES with CUDA, otherwise is --enable-rocrand Use rocRAND (default is YES with HIP, otherwise NO).
NO). --enable-cublas Use cuBLAS (default is YES with CUDA, otherwise NO).
--enable-curand Use cuRAND (default is YES with CUDA, otherwise is --enable-curand Use cuRAND (default is YES with CUDA, otherwise NO).
NO).
--enable-cuda-streams Use CUDA streams (default is YES with CUDA, --enable-cuda-streams Use CUDA streams (default is YES with CUDA,
otherwise is NO). otherwise NO).
--enable-cusparse Use cuSPARSE (default is YES with CUDA, otherwise is --enable-cusparse Use cuSPARSE (default is YES with CUDA, otherwise
NO). NO).
--enable-cusolver Use cuSOLVER (default is NO). --enable-cusolver Use cuSOLVER (default is NO).
--enable-onemklsparse Use oneMKL sparse (default is YES). --enable-onemklsparse Use oneMKL sparse (default is YES).
@ -3347,6 +3348,7 @@ hypre_cxxstd=11
hypre_using_hip=no hypre_using_hip=no
hypre_using_rocsparse=no hypre_using_rocsparse=no
hypre_using_rocblas=no hypre_using_rocblas=no
hypre_using_rocsolver=no
hypre_using_rocrand=no hypre_using_rocrand=no
hypre_found_hip=no hypre_found_hip=no
@ -4598,7 +4600,11 @@ then :
*) hypre_using_rocsparse=yes ;; *) hypre_using_rocsparse=yes ;;
esac esac
else $as_nop else $as_nop
hypre_using_rocsparse=yes case "$hypre_using_hip" in
yes) hypre_using_rocsparse=yes ;;
no) hypre_using_rocsparse=no ;;
*) hypre_using_rocsparse=no ;;
esac
fi fi
@ -4617,6 +4623,20 @@ else $as_nop
fi fi
# Check whether --enable-rocsolver was given.
if test ${enable_rocsolver+y}
then :
enableval=$enable_rocsolver; case "${enableval}" in
yes) hypre_using_rocsolver=yes; hypre_using_rocblas=yes ;;
no) hypre_using_rocsolver=no ;;
*) hypre_using_rocsolver=no ;;
esac
else $as_nop
hypre_using_rocsolver=no
fi
# Check whether --enable-rocrand was given. # Check whether --enable-rocrand was given.
if test ${enable_rocrand+y} if test ${enable_rocrand+y}
then : then :
@ -4626,7 +4646,11 @@ then :
*) hypre_using_rocrand=yes ;; *) hypre_using_rocrand=yes ;;
esac esac
else $as_nop else $as_nop
hypre_using_rocrand=yes case "$hypre_using_hip" in
yes) hypre_using_rocrand=yes ;;
no) hypre_using_rocrand=no ;;
*) hypre_using_rocrand=no ;;
esac
fi fi
@ -10691,15 +10715,25 @@ then :
printf "%s\n" "#define HYPRE_USING_ROCSPARSE 1" >>confdefs.h printf "%s\n" "#define HYPRE_USING_ROCSPARSE 1" >>confdefs.h
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocsparse" HYPRE_HIP_LIBS+=" -lrocsparse"
fi fi
if test x"$hypre_using_rocblas" == x"yes" if test x"$hypre_using_rocblas" == x"yes"
then : then :
printf "%s\n" "#define HYPRE_USING_ROCBLAS 1" >>confdefs.h printf "%s\n" "#define HYPRE_USING_ROCBLAS 1" >>confdefs.h
HYPRE_HIP_LIBS+=" -lrocblas"
fi
if test x"$hypre_using_rocsolver" == x"yes"
then :
printf "%s\n" "#define HYPRE_USING_ROCSOLVER 1" >>confdefs.h
HYPRE_HIP_LIBS+=" -lrocsolver"
fi fi
@ -10708,7 +10742,7 @@ then :
printf "%s\n" "#define HYPRE_USING_ROCRAND 1" >>confdefs.h printf "%s\n" "#define HYPRE_USING_ROCRAND 1" >>confdefs.h
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocrand" HYPRE_HIP_LIBS+=" -lrocrand"
fi fi
@ -10717,7 +10751,7 @@ then :
printf "%s\n" "#define HYPRE_USING_ROCTX 1" >>confdefs.h printf "%s\n" "#define HYPRE_USING_ROCTX 1" >>confdefs.h
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lroctx64" HYPRE_HIP_LIBS+=" -lroctx64"
fi fi

View File

@ -1511,6 +1511,7 @@ typedef struct
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleVendorSolverHandle(hypre_handle) hypre_DeviceDataVendorSolverHandle(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle))

View File

@ -71,9 +71,22 @@ using hypre_DeviceItem = void*;
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cuda_profiler_api.h> #include <cuda_profiler_api.h>
#if defined(HYPRE_USING_CURAND)
#include <curand.h> #include <curand.h>
#endif
#if defined(HYPRE_USING_CUBLAS)
#include <cublas_v2.h> #include <cublas_v2.h>
#endif
#if defined(HYPRE_USING_CUSPARSE)
#include <cusparse.h> #include <cusparse.h>
#endif
#if defined(HYPRE_USING_CUSOLVER)
#include <cusolverDn.h>
#endif
#ifndef CUDART_VERSION #ifndef CUDART_VERSION
#error CUDART_VERSION Undefined! #error CUDART_VERSION Undefined!
@ -110,8 +123,16 @@ using hypre_DeviceItem = void*;
using hypre_DeviceItem = void*; using hypre_DeviceItem = void*;
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#if defined(HYPRE_USING_ROCBLAS)
#include <rocblas/rocblas.h>
#endif
#if defined(HYPRE_USING_ROCSPARSE) #if defined(HYPRE_USING_ROCSPARSE)
#include <rocsparse.h> #include <rocsparse/rocsparse.h>
#endif
#if defined(HYPRE_USING_ROCSOLVER)
#include <rocsolver/rocsolver.h>
#endif #endif
#if defined(HYPRE_USING_ROCRAND) #if defined(HYPRE_USING_ROCRAND)
@ -445,6 +466,14 @@ using hypre_DeviceItem = sycl::nd_item<3>;
hypre_assert(0); exit(1); \ hypre_assert(0); exit(1); \
} } while(0) } } while(0)
#define HYPRE_ROCBLAS_CALL(call) do { \
rocblas_status err = call; \
if (rocblas_status_success != err) { \
printf("rocBLAS ERROR (code = %d, %s) at %s:%d\n", \
err, rocblas_status_to_string(err), __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
#define HYPRE_CUSPARSE_CALL(call) do { \ #define HYPRE_CUSPARSE_CALL(call) do { \
cusparseStatus_t err = call; \ cusparseStatus_t err = call; \
if (CUSPARSE_STATUS_SUCCESS != err) { \ if (CUSPARSE_STATUS_SUCCESS != err) { \
@ -461,6 +490,22 @@ using hypre_DeviceItem = sycl::nd_item<3>;
assert(0); exit(1); \ assert(0); exit(1); \
} } while(0) } } while(0)
#define HYPRE_CUSOLVER_CALL(call) do { \
cusolverStatus_t err = call; \
if (CUSOLVER_STATUS_SUCCESS != err) { \
printf("cuSOLVER ERROR (code = %d) at %s:%d\n", \
err, __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
#define HYPRE_ROCSOLVER_CALL(call) do { \
rocblas_status err = call; \
if (rocblas_status_success != err) { \
printf("rocSOLVER ERROR (code = %d, %s) at %s:%d\n", \
err, rocblas_status_to_string(err), __FILE__, __LINE__); \
assert(0); exit(1); \
} } while(0)
#define HYPRE_CURAND_CALL(call) do { \ #define HYPRE_CURAND_CALL(call) do { \
curandStatus_t err = call; \ curandStatus_t err = call; \
if (CURAND_STATUS_SUCCESS != err) { \ if (CURAND_STATUS_SUCCESS != err) { \
@ -513,6 +558,12 @@ using hypre_DeviceItem = sycl::nd_item<3>;
struct hypre_cub_CachingDeviceAllocator; struct hypre_cub_CachingDeviceAllocator;
typedef struct hypre_cub_CachingDeviceAllocator hypre_cub_CachingDeviceAllocator; typedef struct hypre_cub_CachingDeviceAllocator hypre_cub_CachingDeviceAllocator;
#if defined(HYPRE_USING_CUSOLVER)
typedef cusolverDnHandle_t vendorSolverHandle_t;
#elif defined(HYPRE_USING_ROCSOLVER)
typedef rocblas_handle vendorSolverHandle_t;
#endif
struct hypre_DeviceData struct hypre_DeviceData
{ {
#if defined(HYPRE_USING_CURAND) #if defined(HYPRE_USING_CURAND)
@ -535,6 +586,10 @@ struct hypre_DeviceData
rocsparse_handle cusparse_handle; rocsparse_handle cusparse_handle;
#endif #endif
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
vendorSolverHandle_t vendor_solver_handle;
#endif
#if defined(HYPRE_USING_CUDA_STREAMS) #if defined(HYPRE_USING_CUDA_STREAMS)
#if defined(HYPRE_USING_CUDA) #if defined(HYPRE_USING_CUDA)
cudaStream_t streams[HYPRE_MAX_NUM_STREAMS]; cudaStream_t streams[HYPRE_MAX_NUM_STREAMS];
@ -625,37 +680,41 @@ struct hypre_DeviceData
#define hypre_DeviceDataUseGpuRand(data) ((data) -> use_gpu_rand) #define hypre_DeviceDataUseGpuRand(data) ((data) -> use_gpu_rand)
hypre_DeviceData* hypre_DeviceDataCreate(); hypre_DeviceData* hypre_DeviceDataCreate();
void hypre_DeviceDataDestroy(hypre_DeviceData* data); void hypre_DeviceDataDestroy(hypre_DeviceData* data);
#if defined(HYPRE_USING_CURAND) #if defined(HYPRE_USING_CURAND)
curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_ROCRAND) #if defined(HYPRE_USING_ROCRAND)
rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_CUBLAS) #if defined(HYPRE_USING_CUBLAS)
cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data); cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_CUSPARSE) #if defined(HYPRE_USING_CUSPARSE)
cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_ROCSPARSE) #if defined(HYPRE_USING_ROCSPARSE)
rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
#endif
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
vendorSolverHandle_t hypre_DeviceDataVendorSolverHandle(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_CUDA) #if defined(HYPRE_USING_CUDA)
cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data); cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
#elif defined(HYPRE_USING_HIP) #elif defined(HYPRE_USING_HIP)
hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data); hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
#elif defined(HYPRE_USING_SYCL) #elif defined(HYPRE_USING_SYCL)
sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data); sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data);
#endif #endif
// Data structure and accessor routines for Cuda Sparse Triangular Matrices // Data structure and accessor routines for Cuda Sparse Triangular Matrices

View File

@ -130,6 +130,17 @@ hypre_DeviceDataDestroy(hypre_DeviceData *data)
} }
#endif // #if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) #endif // #if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
if (data->vendor_solver_handle)
{
#if defined(HYPRE_USING_CUSOLVER)
HYPRE_CUSOLVER_CALL(cusolverDnDestroy(data->vendor_solver_handle));
#else
HYPRE_ROCBLAS_CALL(rocblas_destroy_handle(data->vendor_solver_handle));
#endif
}
#endif // #if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
#if defined(HYPRE_USING_CUDA_STREAMS) #if defined(HYPRE_USING_CUDA_STREAMS)
for (HYPRE_Int i = 0; i < HYPRE_MAX_NUM_STREAMS; i++) for (HYPRE_Int i = 0; i < HYPRE_MAX_NUM_STREAMS; i++)
{ {
@ -2813,6 +2824,38 @@ hypre_DeviceDataCusparseHandle(hypre_DeviceData *data)
} }
#endif // defined(HYPRE_USING_ROCSPARSE) #endif // defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
/*--------------------------------------------------------------------
* hypre_DeviceDataVendorSolverHandle
*--------------------------------------------------------------------*/
vendorSolverHandle_t
hypre_DeviceDataVendorSolverHandle(hypre_DeviceData *data)
{
if (data->vendor_solver_handle)
{
return data->vendor_solver_handle;
}
#if defined(HYPRE_USING_CUSOLVER)
cusolverDnHandle_t handle;
HYPRE_CUSOLVER_CALL( cusolverDnCreate(&handle) );
HYPRE_CUSOLVER_CALL( cusolverDnSetStream(handle, hypre_DeviceDataComputeStream(data)) );
#else
rocblas_handle handle;
HYPRE_ROCBLAS_CALL( rocblas_create_handle(&handle) );
HYPRE_ROCBLAS_CALL( rocblas_set_stream(handle, hypre_DeviceDataComputeStream(data)) );
#endif
data->vendor_solver_handle = handle;
return handle;
}
#endif // defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
#endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) #endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

View File

@ -19,9 +19,22 @@ using hypre_DeviceItem = void*;
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cuda_profiler_api.h> #include <cuda_profiler_api.h>
#if defined(HYPRE_USING_CURAND)
#include <curand.h> #include <curand.h>
#endif
#if defined(HYPRE_USING_CUBLAS)
#include <cublas_v2.h> #include <cublas_v2.h>
#endif
#if defined(HYPRE_USING_CUSPARSE)
#include <cusparse.h> #include <cusparse.h>
#endif
#if defined(HYPRE_USING_CUSOLVER)
#include <cusolverDn.h>
#endif
#ifndef CUDART_VERSION #ifndef CUDART_VERSION
#error CUDART_VERSION Undefined! #error CUDART_VERSION Undefined!
@ -58,8 +71,16 @@ using hypre_DeviceItem = void*;
using hypre_DeviceItem = void*; using hypre_DeviceItem = void*;
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#if defined(HYPRE_USING_ROCBLAS)
#include <rocblas/rocblas.h>
#endif
#if defined(HYPRE_USING_ROCSPARSE) #if defined(HYPRE_USING_ROCSPARSE)
#include <rocsparse.h> #include <rocsparse/rocsparse.h>
#endif
#if defined(HYPRE_USING_ROCSOLVER)
#include <rocsolver/rocsolver.h>
#endif #endif
#if defined(HYPRE_USING_ROCRAND) #if defined(HYPRE_USING_ROCRAND)
@ -393,6 +414,14 @@ using hypre_DeviceItem = sycl::nd_item<3>;
hypre_assert(0); exit(1); \ hypre_assert(0); exit(1); \
} } while(0) } } while(0)
#define HYPRE_ROCBLAS_CALL(call) do { \
rocblas_status err = call; \
if (rocblas_status_success != err) { \
printf("rocBLAS ERROR (code = %d, %s) at %s:%d\n", \
err, rocblas_status_to_string(err), __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
#define HYPRE_CUSPARSE_CALL(call) do { \ #define HYPRE_CUSPARSE_CALL(call) do { \
cusparseStatus_t err = call; \ cusparseStatus_t err = call; \
if (CUSPARSE_STATUS_SUCCESS != err) { \ if (CUSPARSE_STATUS_SUCCESS != err) { \
@ -409,6 +438,22 @@ using hypre_DeviceItem = sycl::nd_item<3>;
assert(0); exit(1); \ assert(0); exit(1); \
} } while(0) } } while(0)
#define HYPRE_CUSOLVER_CALL(call) do { \
cusolverStatus_t err = call; \
if (CUSOLVER_STATUS_SUCCESS != err) { \
printf("cuSOLVER ERROR (code = %d) at %s:%d\n", \
err, __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
#define HYPRE_ROCSOLVER_CALL(call) do { \
rocblas_status err = call; \
if (rocblas_status_success != err) { \
printf("rocSOLVER ERROR (code = %d, %s) at %s:%d\n", \
err, rocblas_status_to_string(err), __FILE__, __LINE__); \
assert(0); exit(1); \
} } while(0)
#define HYPRE_CURAND_CALL(call) do { \ #define HYPRE_CURAND_CALL(call) do { \
curandStatus_t err = call; \ curandStatus_t err = call; \
if (CURAND_STATUS_SUCCESS != err) { \ if (CURAND_STATUS_SUCCESS != err) { \
@ -461,6 +506,12 @@ using hypre_DeviceItem = sycl::nd_item<3>;
struct hypre_cub_CachingDeviceAllocator; struct hypre_cub_CachingDeviceAllocator;
typedef struct hypre_cub_CachingDeviceAllocator hypre_cub_CachingDeviceAllocator; typedef struct hypre_cub_CachingDeviceAllocator hypre_cub_CachingDeviceAllocator;
#if defined(HYPRE_USING_CUSOLVER)
typedef cusolverDnHandle_t vendorSolverHandle_t;
#elif defined(HYPRE_USING_ROCSOLVER)
typedef rocblas_handle vendorSolverHandle_t;
#endif
struct hypre_DeviceData struct hypre_DeviceData
{ {
#if defined(HYPRE_USING_CURAND) #if defined(HYPRE_USING_CURAND)
@ -483,6 +534,10 @@ struct hypre_DeviceData
rocsparse_handle cusparse_handle; rocsparse_handle cusparse_handle;
#endif #endif
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
vendorSolverHandle_t vendor_solver_handle;
#endif
#if defined(HYPRE_USING_CUDA_STREAMS) #if defined(HYPRE_USING_CUDA_STREAMS)
#if defined(HYPRE_USING_CUDA) #if defined(HYPRE_USING_CUDA)
cudaStream_t streams[HYPRE_MAX_NUM_STREAMS]; cudaStream_t streams[HYPRE_MAX_NUM_STREAMS];
@ -573,37 +628,41 @@ struct hypre_DeviceData
#define hypre_DeviceDataUseGpuRand(data) ((data) -> use_gpu_rand) #define hypre_DeviceDataUseGpuRand(data) ((data) -> use_gpu_rand)
hypre_DeviceData* hypre_DeviceDataCreate(); hypre_DeviceData* hypre_DeviceDataCreate();
void hypre_DeviceDataDestroy(hypre_DeviceData* data); void hypre_DeviceDataDestroy(hypre_DeviceData* data);
#if defined(HYPRE_USING_CURAND) #if defined(HYPRE_USING_CURAND)
curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_ROCRAND) #if defined(HYPRE_USING_ROCRAND)
rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data); rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_CUBLAS) #if defined(HYPRE_USING_CUBLAS)
cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data); cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_CUSPARSE) #if defined(HYPRE_USING_CUSPARSE)
cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_ROCSPARSE) #if defined(HYPRE_USING_ROCSPARSE)
rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data); rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
#endif
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
vendorSolverHandle_t hypre_DeviceDataVendorSolverHandle(hypre_DeviceData *data);
#endif #endif
#if defined(HYPRE_USING_CUDA) #if defined(HYPRE_USING_CUDA)
cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data); cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
#elif defined(HYPRE_USING_HIP) #elif defined(HYPRE_USING_HIP)
hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data); hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
#elif defined(HYPRE_USING_SYCL) #elif defined(HYPRE_USING_SYCL)
sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i); sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data); sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data);
#endif #endif
// Data structure and accessor routines for Cuda Sparse Triangular Matrices // Data structure and accessor routines for Cuda Sparse Triangular Matrices

View File

@ -306,6 +306,10 @@ HYPRE_Init(void)
hypre_HandleCurandGenerator(_hypre_handle); hypre_HandleCurandGenerator(_hypre_handle);
#endif #endif
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
hypre_HandleVendorSolverHandle(_hypre_handle);
#endif
/* Check if cuda arch flags in compiling match the device */ /* Check if cuda arch flags in compiling match the device */
#if defined(HYPRE_USING_CUDA) && defined(HYPRE_DEBUG) #if defined(HYPRE_USING_CUDA) && defined(HYPRE_DEBUG)
hypre_CudaCompileFlagCheck(); hypre_CudaCompileFlagCheck();
@ -629,4 +633,3 @@ HYPRE_GetExecutionPolicy(HYPRE_ExecutionPolicy *exec_policy)
return hypre_error_flag; return hypre_error_flag;
} }

View File

@ -57,6 +57,7 @@ typedef struct
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleVendorSolverHandle(hypre_handle) hypre_DeviceDataVendorSolverHandle(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle))