Add cuSOLVER and rocSOLVER support (#840)
Add cuSOLVER and rocSOLVER support to configure (autotools build) and add macro calls for functions defined in these libraries.
This commit is contained in:
parent
2ec3effbde
commit
2a61eb41ad
@ -175,7 +175,7 @@
|
||||
/* Define to 1 if using cuRAND */
|
||||
#undef HYPRE_USING_CURAND
|
||||
|
||||
/* Define to 1 if using cuSolver */
|
||||
/* Define to 1 if using cuSOLVER */
|
||||
#undef HYPRE_USING_CUSOLVER
|
||||
|
||||
/* Define to 1 if using cuSPARSE */
|
||||
@ -253,6 +253,9 @@
|
||||
/* rocRAND being used */
|
||||
#undef HYPRE_USING_ROCRAND
|
||||
|
||||
/* rocSOLVER being used */
|
||||
#undef HYPRE_USING_ROCSOLVER
|
||||
|
||||
/* rocSPARSE being used */
|
||||
#undef HYPRE_USING_ROCSPARSE
|
||||
|
||||
|
||||
@ -204,6 +204,7 @@ dnl *********************************************************************
|
||||
hypre_using_hip=no
|
||||
hypre_using_rocsparse=no
|
||||
hypre_using_rocblas=no
|
||||
hypre_using_rocsolver=no
|
||||
hypre_using_rocrand=no
|
||||
|
||||
hypre_found_hip=no
|
||||
@ -1141,13 +1142,17 @@ AS_HELP_STRING([--with-hip],
|
||||
|
||||
AC_ARG_ENABLE(rocsparse,
|
||||
AS_HELP_STRING([--enable-rocsparse],
|
||||
[Use rocSPARSE (default is YES).]),
|
||||
[Use rocSPARSE (default is YES with HIP, otherwise NO).]),
|
||||
[case "${enableval}" in
|
||||
yes) hypre_using_rocsparse=yes ;;
|
||||
no) hypre_using_rocsparse=no ;;
|
||||
*) hypre_using_rocsparse=yes ;;
|
||||
esac],
|
||||
[hypre_using_rocsparse=yes]
|
||||
[case "$hypre_using_hip" in
|
||||
yes) hypre_using_rocsparse=yes ;;
|
||||
no) hypre_using_rocsparse=no ;;
|
||||
*) hypre_using_rocsparse=no ;;
|
||||
esac]
|
||||
)
|
||||
|
||||
AC_ARG_ENABLE(rocblas,
|
||||
@ -1161,15 +1166,30 @@ AS_HELP_STRING([--enable-rocblas],
|
||||
[hypre_using_rocblas=no]
|
||||
)
|
||||
|
||||
AC_ARG_ENABLE(rocsolver,
|
||||
AS_HELP_STRING([--enable-rocsolver],
|
||||
[Use rocSOLVER (default is NO).]),
|
||||
[case "${enableval}" in
|
||||
yes) hypre_using_rocsolver=yes; hypre_using_rocblas=yes ;;
|
||||
no) hypre_using_rocsolver=no ;;
|
||||
*) hypre_using_rocsolver=no ;;
|
||||
esac],
|
||||
[hypre_using_rocsolver=no]
|
||||
)
|
||||
|
||||
AC_ARG_ENABLE(rocrand,
|
||||
AS_HELP_STRING([--enable-rocrand],
|
||||
[Use rocRAND (default is YES).]),
|
||||
[Use rocRAND (default is YES with HIP, otherwise NO).]),
|
||||
[case "${enableval}" in
|
||||
yes) hypre_using_rocrand=yes ;;
|
||||
no) hypre_using_rocrand=no ;;
|
||||
*) hypre_using_rocrand=yes ;;
|
||||
esac],
|
||||
[hypre_using_rocrand=yes]
|
||||
[case "$hypre_using_hip" in
|
||||
yes) hypre_using_rocrand=yes ;;
|
||||
no) hypre_using_rocrand=no ;;
|
||||
*) hypre_using_rocrand=no ;;
|
||||
esac]
|
||||
)
|
||||
|
||||
dnl ***** SYCL
|
||||
@ -1212,7 +1232,7 @@ AS_HELP_STRING([--with-gpu-arch=ARG],
|
||||
|
||||
AC_ARG_ENABLE(cublas,
|
||||
AS_HELP_STRING([--enable-cublas],
|
||||
[Use cuBLAS (default is YES with CUDA, otherwise is NO).]),
|
||||
[Use cuBLAS (default is YES with CUDA, otherwise NO).]),
|
||||
[case "${enableval}" in
|
||||
yes) hypre_using_cublas=yes ;;
|
||||
no) hypre_using_cublas=no ;;
|
||||
@ -1227,7 +1247,7 @@ AS_HELP_STRING([--enable-cublas],
|
||||
|
||||
AC_ARG_ENABLE(curand,
|
||||
AS_HELP_STRING([--enable-curand],
|
||||
[Use cuRAND (default is YES with CUDA, otherwise is NO).]),
|
||||
[Use cuRAND (default is YES with CUDA, otherwise NO).]),
|
||||
[case "${enableval}" in
|
||||
yes) hypre_using_curand=yes ;;
|
||||
no) hypre_using_curand=no ;;
|
||||
@ -1242,7 +1262,7 @@ AS_HELP_STRING([--enable-curand],
|
||||
|
||||
AC_ARG_ENABLE(cuda-streams,
|
||||
AS_HELP_STRING([--enable-cuda-streams],
|
||||
[Use CUDA streams (default is YES with CUDA, otherwise is NO).]),
|
||||
[Use CUDA streams (default is YES with CUDA, otherwise NO).]),
|
||||
[case "${enableval}" in
|
||||
yes) hypre_using_cuda_streams=yes ;;
|
||||
no) hypre_using_cuda_streams=no ;;
|
||||
@ -1257,7 +1277,7 @@ AS_HELP_STRING([--enable-cuda-streams],
|
||||
|
||||
AC_ARG_ENABLE(cusparse,
|
||||
AS_HELP_STRING([--enable-cusparse],
|
||||
[Use cuSPARSE (default is YES with CUDA, otherwise is NO).]),
|
||||
[Use cuSPARSE (default is YES with CUDA, otherwise NO).]),
|
||||
[case "${enableval}" in
|
||||
yes) hypre_using_cusparse=yes ;;
|
||||
no) hypre_using_cusparse=no ;;
|
||||
@ -2447,7 +2467,7 @@ then
|
||||
|
||||
if test "$hypre_using_cusolver" = "yes"
|
||||
then
|
||||
AC_DEFINE(HYPRE_USING_CUSOLVER, 1, [Define to 1 if using cuSolver])
|
||||
AC_DEFINE(HYPRE_USING_CUSOLVER, 1, [Define to 1 if using cuSOLVER])
|
||||
HYPRE_CUDA_LIBS+=" -lcusolver"
|
||||
fi
|
||||
|
||||
@ -2539,26 +2559,31 @@ AS_IF([test x"$hypre_using_hip" == x"yes"],
|
||||
dnl rocSPARSE, for things like dcsrmv on AMD GPUs
|
||||
AS_IF([test x"$hypre_using_rocsparse" == x"yes"],
|
||||
[AC_DEFINE(HYPRE_USING_ROCSPARSE, 1, [rocSPARSE being used])
|
||||
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocsparse"
|
||||
HYPRE_HIP_LIBS+=" -lrocsparse"
|
||||
])
|
||||
|
||||
dnl Note rocSPARSE requires rocBLAS, so this is only controlling
|
||||
dnl whether HYPRE explicitly uses rocBLAS in other places or not.
|
||||
dnl So we don't need to add any extra libs or anything.
|
||||
dnl rocBLAS: basic linear algebra operations on AMD GPUs
|
||||
AS_IF([test x"$hypre_using_rocblas" == x"yes"],
|
||||
[AC_DEFINE(HYPRE_USING_ROCBLAS, 1, [rocBLAS being used])
|
||||
HYPRE_HIP_LIBS+=" -lrocblas"
|
||||
])
|
||||
|
||||
dnl rocSOLVER: dense linear solvers on AMD GPUs (requires rocBLAS)
|
||||
AS_IF([test x"$hypre_using_rocsolver" == x"yes"],
|
||||
[AC_DEFINE(HYPRE_USING_ROCSOLVER, 1, [rocSOLVER being used])
|
||||
HYPRE_HIP_LIBS+=" -lrocsolver"
|
||||
])
|
||||
|
||||
dnl rocRAND: random number generation on AMD GPUs
|
||||
AS_IF([test x"$hypre_using_rocrand" == x"yes"],
|
||||
[AC_DEFINE(HYPRE_USING_ROCRAND, 1, [rocRAND being used])
|
||||
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocrand"
|
||||
HYPRE_HIP_LIBS+=" -lrocrand"
|
||||
])
|
||||
|
||||
dnl rocTX tracing API
|
||||
AS_IF([test x"$hypre_using_gpu_profiling" == x"yes"],
|
||||
[AC_DEFINE(HYPRE_USING_ROCTX, 1, [Define to 1 if using AMD rocTX profiling])
|
||||
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lroctx64"
|
||||
HYPRE_HIP_LIBS+=" -lroctx64"
|
||||
])
|
||||
|
||||
AS_IF([test x"$hypre_using_cuda_streams" == x"yes"],
|
||||
|
||||
62
src/configure
vendored
62
src/configure
vendored
@ -859,6 +859,7 @@ with_cuda
|
||||
with_hip
|
||||
enable_rocsparse
|
||||
enable_rocblas
|
||||
enable_rocsolver
|
||||
enable_rocrand
|
||||
with_sycl
|
||||
with_cuda_home
|
||||
@ -1562,16 +1563,16 @@ Optional Features:
|
||||
Use device async malloc (default is NO).
|
||||
--enable-gpu-profiling Use NVTX on CUDA, rocTX on HIP (default is NO).
|
||||
--enable-gpu-aware-mpi Use GPU memory aware MPI
|
||||
--enable-rocsparse Use rocSPARSE (default is YES).
|
||||
--enable-rocsparse Use rocSPARSE (default is YES with HIP, otherwise
|
||||
NO).
|
||||
--enable-rocblas Use rocBLAS (default is NO).
|
||||
--enable-rocrand Use rocRAND (default is YES).
|
||||
--enable-cublas Use cuBLAS (default is YES with CUDA, otherwise is
|
||||
NO).
|
||||
--enable-curand Use cuRAND (default is YES with CUDA, otherwise is
|
||||
NO).
|
||||
--enable-rocsolver Use rocSOLVER (default is NO).
|
||||
--enable-rocrand Use rocRAND (default is YES with HIP, otherwise NO).
|
||||
--enable-cublas Use cuBLAS (default is YES with CUDA, otherwise NO).
|
||||
--enable-curand Use cuRAND (default is YES with CUDA, otherwise NO).
|
||||
--enable-cuda-streams Use CUDA streams (default is YES with CUDA,
|
||||
otherwise is NO).
|
||||
--enable-cusparse Use cuSPARSE (default is YES with CUDA, otherwise is
|
||||
otherwise NO).
|
||||
--enable-cusparse Use cuSPARSE (default is YES with CUDA, otherwise
|
||||
NO).
|
||||
--enable-cusolver Use cuSOLVER (default is NO).
|
||||
--enable-onemklsparse Use oneMKL sparse (default is YES).
|
||||
@ -3347,6 +3348,7 @@ hypre_cxxstd=11
|
||||
hypre_using_hip=no
|
||||
hypre_using_rocsparse=no
|
||||
hypre_using_rocblas=no
|
||||
hypre_using_rocsolver=no
|
||||
hypre_using_rocrand=no
|
||||
|
||||
hypre_found_hip=no
|
||||
@ -4598,7 +4600,11 @@ then :
|
||||
*) hypre_using_rocsparse=yes ;;
|
||||
esac
|
||||
else $as_nop
|
||||
hypre_using_rocsparse=yes
|
||||
case "$hypre_using_hip" in
|
||||
yes) hypre_using_rocsparse=yes ;;
|
||||
no) hypre_using_rocsparse=no ;;
|
||||
*) hypre_using_rocsparse=no ;;
|
||||
esac
|
||||
|
||||
fi
|
||||
|
||||
@ -4617,6 +4623,20 @@ else $as_nop
|
||||
fi
|
||||
|
||||
|
||||
# Check whether --enable-rocsolver was given.
|
||||
if test ${enable_rocsolver+y}
|
||||
then :
|
||||
enableval=$enable_rocsolver; case "${enableval}" in
|
||||
yes) hypre_using_rocsolver=yes; hypre_using_rocblas=yes ;;
|
||||
no) hypre_using_rocsolver=no ;;
|
||||
*) hypre_using_rocsolver=no ;;
|
||||
esac
|
||||
else $as_nop
|
||||
hypre_using_rocsolver=no
|
||||
|
||||
fi
|
||||
|
||||
|
||||
# Check whether --enable-rocrand was given.
|
||||
if test ${enable_rocrand+y}
|
||||
then :
|
||||
@ -4626,7 +4646,11 @@ then :
|
||||
*) hypre_using_rocrand=yes ;;
|
||||
esac
|
||||
else $as_nop
|
||||
hypre_using_rocrand=yes
|
||||
case "$hypre_using_hip" in
|
||||
yes) hypre_using_rocrand=yes ;;
|
||||
no) hypre_using_rocrand=no ;;
|
||||
*) hypre_using_rocrand=no ;;
|
||||
esac
|
||||
|
||||
fi
|
||||
|
||||
@ -10691,15 +10715,25 @@ then :
|
||||
|
||||
printf "%s\n" "#define HYPRE_USING_ROCSPARSE 1" >>confdefs.h
|
||||
|
||||
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocsparse"
|
||||
HYPRE_HIP_LIBS+=" -lrocsparse"
|
||||
|
||||
fi
|
||||
|
||||
if test x"$hypre_using_rocblas" == x"yes"
|
||||
if test x"$hypre_using_rocblas" == x"yes"
|
||||
then :
|
||||
|
||||
printf "%s\n" "#define HYPRE_USING_ROCBLAS 1" >>confdefs.h
|
||||
|
||||
HYPRE_HIP_LIBS+=" -lrocblas"
|
||||
|
||||
fi
|
||||
|
||||
if test x"$hypre_using_rocsolver" == x"yes"
|
||||
then :
|
||||
|
||||
printf "%s\n" "#define HYPRE_USING_ROCSOLVER 1" >>confdefs.h
|
||||
|
||||
HYPRE_HIP_LIBS+=" -lrocsolver"
|
||||
|
||||
fi
|
||||
|
||||
@ -10708,7 +10742,7 @@ then :
|
||||
|
||||
printf "%s\n" "#define HYPRE_USING_ROCRAND 1" >>confdefs.h
|
||||
|
||||
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocrand"
|
||||
HYPRE_HIP_LIBS+=" -lrocrand"
|
||||
|
||||
fi
|
||||
|
||||
@ -10717,7 +10751,7 @@ then :
|
||||
|
||||
printf "%s\n" "#define HYPRE_USING_ROCTX 1" >>confdefs.h
|
||||
|
||||
HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lroctx64"
|
||||
HYPRE_HIP_LIBS+=" -lroctx64"
|
||||
|
||||
fi
|
||||
|
||||
|
||||
@ -1511,6 +1511,7 @@ typedef struct
|
||||
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleVendorSolverHandle(hypre_handle) hypre_DeviceDataVendorSolverHandle(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle))
|
||||
|
||||
@ -71,9 +71,22 @@ using hypre_DeviceItem = void*;
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_profiler_api.h>
|
||||
|
||||
#if defined(HYPRE_USING_CURAND)
|
||||
#include <curand.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUBLAS)
|
||||
#include <cublas_v2.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSPARSE)
|
||||
#include <cusparse.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER)
|
||||
#include <cusolverDn.h>
|
||||
#endif
|
||||
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION Undefined!
|
||||
@ -110,8 +123,16 @@ using hypre_DeviceItem = void*;
|
||||
using hypre_DeviceItem = void*;
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#if defined(HYPRE_USING_ROCBLAS)
|
||||
#include <rocblas/rocblas.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCSPARSE)
|
||||
#include <rocsparse.h>
|
||||
#include <rocsparse/rocsparse.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCSOLVER)
|
||||
#include <rocsolver/rocsolver.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCRAND)
|
||||
@ -445,6 +466,14 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
||||
hypre_assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_ROCBLAS_CALL(call) do { \
|
||||
rocblas_status err = call; \
|
||||
if (rocblas_status_success != err) { \
|
||||
printf("rocBLAS ERROR (code = %d, %s) at %s:%d\n", \
|
||||
err, rocblas_status_to_string(err), __FILE__, __LINE__); \
|
||||
hypre_assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_CUSPARSE_CALL(call) do { \
|
||||
cusparseStatus_t err = call; \
|
||||
if (CUSPARSE_STATUS_SUCCESS != err) { \
|
||||
@ -461,6 +490,22 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
||||
assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_CUSOLVER_CALL(call) do { \
|
||||
cusolverStatus_t err = call; \
|
||||
if (CUSOLVER_STATUS_SUCCESS != err) { \
|
||||
printf("cuSOLVER ERROR (code = %d) at %s:%d\n", \
|
||||
err, __FILE__, __LINE__); \
|
||||
hypre_assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_ROCSOLVER_CALL(call) do { \
|
||||
rocblas_status err = call; \
|
||||
if (rocblas_status_success != err) { \
|
||||
printf("rocSOLVER ERROR (code = %d, %s) at %s:%d\n", \
|
||||
err, rocblas_status_to_string(err), __FILE__, __LINE__); \
|
||||
assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_CURAND_CALL(call) do { \
|
||||
curandStatus_t err = call; \
|
||||
if (CURAND_STATUS_SUCCESS != err) { \
|
||||
@ -513,6 +558,12 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
||||
struct hypre_cub_CachingDeviceAllocator;
|
||||
typedef struct hypre_cub_CachingDeviceAllocator hypre_cub_CachingDeviceAllocator;
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER)
|
||||
typedef cusolverDnHandle_t vendorSolverHandle_t;
|
||||
#elif defined(HYPRE_USING_ROCSOLVER)
|
||||
typedef rocblas_handle vendorSolverHandle_t;
|
||||
#endif
|
||||
|
||||
struct hypre_DeviceData
|
||||
{
|
||||
#if defined(HYPRE_USING_CURAND)
|
||||
@ -535,6 +586,10 @@ struct hypre_DeviceData
|
||||
rocsparse_handle cusparse_handle;
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
|
||||
vendorSolverHandle_t vendor_solver_handle;
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUDA_STREAMS)
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
cudaStream_t streams[HYPRE_MAX_NUM_STREAMS];
|
||||
@ -625,37 +680,41 @@ struct hypre_DeviceData
|
||||
#define hypre_DeviceDataUseGpuRand(data) ((data) -> use_gpu_rand)
|
||||
|
||||
hypre_DeviceData* hypre_DeviceDataCreate();
|
||||
void hypre_DeviceDataDestroy(hypre_DeviceData* data);
|
||||
void hypre_DeviceDataDestroy(hypre_DeviceData* data);
|
||||
|
||||
#if defined(HYPRE_USING_CURAND)
|
||||
curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
|
||||
curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCRAND)
|
||||
rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
|
||||
rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUBLAS)
|
||||
cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data);
|
||||
cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSPARSE)
|
||||
cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
|
||||
cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCSPARSE)
|
||||
rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
|
||||
rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
|
||||
vendorSolverHandle_t hypre_DeviceDataVendorSolverHandle(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
#elif defined(HYPRE_USING_HIP)
|
||||
hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
#elif defined(HYPRE_USING_SYCL)
|
||||
sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
// Data structure and accessor routines for Cuda Sparse Triangular Matrices
|
||||
|
||||
@ -130,6 +130,17 @@ hypre_DeviceDataDestroy(hypre_DeviceData *data)
|
||||
}
|
||||
#endif // #if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
|
||||
if (data->vendor_solver_handle)
|
||||
{
|
||||
#if defined(HYPRE_USING_CUSOLVER)
|
||||
HYPRE_CUSOLVER_CALL(cusolverDnDestroy(data->vendor_solver_handle));
|
||||
#else
|
||||
HYPRE_ROCBLAS_CALL(rocblas_destroy_handle(data->vendor_solver_handle));
|
||||
#endif
|
||||
}
|
||||
#endif // #if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
|
||||
|
||||
#if defined(HYPRE_USING_CUDA_STREAMS)
|
||||
for (HYPRE_Int i = 0; i < HYPRE_MAX_NUM_STREAMS; i++)
|
||||
{
|
||||
@ -2813,6 +2824,38 @@ hypre_DeviceDataCusparseHandle(hypre_DeviceData *data)
|
||||
}
|
||||
#endif // defined(HYPRE_USING_ROCSPARSE)
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
|
||||
|
||||
/*--------------------------------------------------------------------
|
||||
* hypre_DeviceDataVendorSolverHandle
|
||||
*--------------------------------------------------------------------*/
|
||||
|
||||
vendorSolverHandle_t
|
||||
hypre_DeviceDataVendorSolverHandle(hypre_DeviceData *data)
|
||||
{
|
||||
if (data->vendor_solver_handle)
|
||||
{
|
||||
return data->vendor_solver_handle;
|
||||
}
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER)
|
||||
cusolverDnHandle_t handle;
|
||||
|
||||
HYPRE_CUSOLVER_CALL( cusolverDnCreate(&handle) );
|
||||
HYPRE_CUSOLVER_CALL( cusolverDnSetStream(handle, hypre_DeviceDataComputeStream(data)) );
|
||||
#else
|
||||
rocblas_handle handle;
|
||||
|
||||
HYPRE_ROCBLAS_CALL( rocblas_create_handle(&handle) );
|
||||
HYPRE_ROCBLAS_CALL( rocblas_set_stream(handle, hypre_DeviceDataComputeStream(data)) );
|
||||
#endif
|
||||
|
||||
data->vendor_solver_handle = handle;
|
||||
|
||||
return handle;
|
||||
}
|
||||
#endif // defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
|
||||
|
||||
#endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
|
||||
|
||||
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
@ -19,9 +19,22 @@ using hypre_DeviceItem = void*;
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_profiler_api.h>
|
||||
|
||||
#if defined(HYPRE_USING_CURAND)
|
||||
#include <curand.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUBLAS)
|
||||
#include <cublas_v2.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSPARSE)
|
||||
#include <cusparse.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER)
|
||||
#include <cusolverDn.h>
|
||||
#endif
|
||||
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION Undefined!
|
||||
@ -58,8 +71,16 @@ using hypre_DeviceItem = void*;
|
||||
using hypre_DeviceItem = void*;
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#if defined(HYPRE_USING_ROCBLAS)
|
||||
#include <rocblas/rocblas.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCSPARSE)
|
||||
#include <rocsparse.h>
|
||||
#include <rocsparse/rocsparse.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCSOLVER)
|
||||
#include <rocsolver/rocsolver.h>
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCRAND)
|
||||
@ -393,6 +414,14 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
||||
hypre_assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_ROCBLAS_CALL(call) do { \
|
||||
rocblas_status err = call; \
|
||||
if (rocblas_status_success != err) { \
|
||||
printf("rocBLAS ERROR (code = %d, %s) at %s:%d\n", \
|
||||
err, rocblas_status_to_string(err), __FILE__, __LINE__); \
|
||||
hypre_assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_CUSPARSE_CALL(call) do { \
|
||||
cusparseStatus_t err = call; \
|
||||
if (CUSPARSE_STATUS_SUCCESS != err) { \
|
||||
@ -409,6 +438,22 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
||||
assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_CUSOLVER_CALL(call) do { \
|
||||
cusolverStatus_t err = call; \
|
||||
if (CUSOLVER_STATUS_SUCCESS != err) { \
|
||||
printf("cuSOLVER ERROR (code = %d) at %s:%d\n", \
|
||||
err, __FILE__, __LINE__); \
|
||||
hypre_assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_ROCSOLVER_CALL(call) do { \
|
||||
rocblas_status err = call; \
|
||||
if (rocblas_status_success != err) { \
|
||||
printf("rocSOLVER ERROR (code = %d, %s) at %s:%d\n", \
|
||||
err, rocblas_status_to_string(err), __FILE__, __LINE__); \
|
||||
assert(0); exit(1); \
|
||||
} } while(0)
|
||||
|
||||
#define HYPRE_CURAND_CALL(call) do { \
|
||||
curandStatus_t err = call; \
|
||||
if (CURAND_STATUS_SUCCESS != err) { \
|
||||
@ -461,6 +506,12 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
||||
struct hypre_cub_CachingDeviceAllocator;
|
||||
typedef struct hypre_cub_CachingDeviceAllocator hypre_cub_CachingDeviceAllocator;
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER)
|
||||
typedef cusolverDnHandle_t vendorSolverHandle_t;
|
||||
#elif defined(HYPRE_USING_ROCSOLVER)
|
||||
typedef rocblas_handle vendorSolverHandle_t;
|
||||
#endif
|
||||
|
||||
struct hypre_DeviceData
|
||||
{
|
||||
#if defined(HYPRE_USING_CURAND)
|
||||
@ -483,6 +534,10 @@ struct hypre_DeviceData
|
||||
rocsparse_handle cusparse_handle;
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
|
||||
vendorSolverHandle_t vendor_solver_handle;
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUDA_STREAMS)
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
cudaStream_t streams[HYPRE_MAX_NUM_STREAMS];
|
||||
@ -573,37 +628,41 @@ struct hypre_DeviceData
|
||||
#define hypre_DeviceDataUseGpuRand(data) ((data) -> use_gpu_rand)
|
||||
|
||||
hypre_DeviceData* hypre_DeviceDataCreate();
|
||||
void hypre_DeviceDataDestroy(hypre_DeviceData* data);
|
||||
void hypre_DeviceDataDestroy(hypre_DeviceData* data);
|
||||
|
||||
#if defined(HYPRE_USING_CURAND)
|
||||
curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
|
||||
curandGenerator_t hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCRAND)
|
||||
rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
|
||||
rocrand_generator hypre_DeviceDataCurandGenerator(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUBLAS)
|
||||
cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data);
|
||||
cublasHandle_t hypre_DeviceDataCublasHandle(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSPARSE)
|
||||
cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
|
||||
cusparseHandle_t hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_ROCSPARSE)
|
||||
rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
|
||||
rocsparse_handle hypre_DeviceDataCusparseHandle(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
|
||||
vendorSolverHandle_t hypre_DeviceDataVendorSolverHandle(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
cudaStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
cudaStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
#elif defined(HYPRE_USING_HIP)
|
||||
hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
hipStream_t hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
hipStream_t hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
#elif defined(HYPRE_USING_SYCL)
|
||||
sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
sycl::queue* hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i);
|
||||
sycl::queue* hypre_DeviceDataComputeStream(hypre_DeviceData *data);
|
||||
#endif
|
||||
|
||||
// Data structure and accessor routines for Cuda Sparse Triangular Matrices
|
||||
|
||||
@ -306,6 +306,10 @@ HYPRE_Init(void)
|
||||
hypre_HandleCurandGenerator(_hypre_handle);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUSOLVER) || defined(HYPRE_USING_ROCSOLVER)
|
||||
hypre_HandleVendorSolverHandle(_hypre_handle);
|
||||
#endif
|
||||
|
||||
/* Check if cuda arch flags in compiling match the device */
|
||||
#if defined(HYPRE_USING_CUDA) && defined(HYPRE_DEBUG)
|
||||
hypre_CudaCompileFlagCheck();
|
||||
@ -629,4 +633,3 @@ HYPRE_GetExecutionPolicy(HYPRE_ExecutionPolicy *exec_policy)
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
|
||||
@ -57,6 +57,7 @@ typedef struct
|
||||
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCusparseHandle(hypre_handle) hypre_DeviceDataCusparseHandle(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleVendorSolverHandle(hypre_handle) hypre_DeviceDataVendorSolverHandle(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleComputeStream(hypre_handle) hypre_DeviceDataComputeStream(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCubBinGrowth(hypre_handle) hypre_DeviceDataCubBinGrowth(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCubMinBin(hypre_handle) hypre_DeviceDataCubMinBin(hypre_HandleDeviceData(hypre_handle))
|
||||
|
||||
Loading…
Reference in New Issue
Block a user