Cuda versions (#879)

This PR adds support and regression tests for all the versions from CUDA 9.0 to 12.0.
This commit is contained in:
Rui Peng Li 2023-06-15 06:26:12 -07:00 committed by GitHub
parent 7d1d9ca95c
commit 72f5f3e136
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 231 additions and 46 deletions

148
AUTOTEST/machine-lassen-cuda.sh Executable file
View File

@ -0,0 +1,148 @@
#!/bin/sh
# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
# HYPRE Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
testname=`basename $0 .sh`
# Echo usage information
case $1 in
-h|-help)
cat <<EOF
**** Only run this script on the lassen cluster ****
$0 [-h|-help] {src_dir}
where: -h|-help prints this usage information and exits
{src_dir} is the hypre source directory
This script runs a number of tests suitable for the lassen cluster.
Example usage: $0 ../src
EOF
exit
;;
esac
# Setup
test_dir=`pwd`
output_dir=`pwd`/$testname.dir
rm -fr $output_dir
mkdir -p $output_dir
src_dir=`cd $1; pwd`
shift
# Basic build and run tests
mo="-j test"
############################################
## Various CUDA verion build (only) tests ##
############################################
# CUDA 9.0 with UM [no run]
module -q load cuda/9.0
module list cuda/9.0 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda9_0
# CUDA 9.1 with UM [no run]
module -q load cuda/9.1
module list cuda/9.1 |& grep "None found"
module -q load gcc
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\' CC=mpicc CXX=mpicxx"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda9_1
# CUDA 9.2 with UM [no run]
module -q load cuda/9.2
module list cuda/9.2 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda9_2
# CUDA 10.2 with UM [no run]
module -q load cuda/10.2
module list cuda/10.2 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda10_2
# CUDA 11.0 with UM [no run]
module -q load cuda/11.0
module list cuda/11.0 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11_0
# CUDA 11.1 with UM [no run]
module -q load cuda/11.1
module list cuda/11.1 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11_1
# CUDA 11.2 with UM with async malloc [no run]
module -q load cuda/11.2
module list cuda/11.2 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11_2
# CUDA 11.3 with UM with async malloc [no run]
module -q load cuda/11.3
module list cuda/11.3 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11_3
# CUDA 11.4 with UM with async malloc [no run]
module -q load cuda/11.4
module list cuda/11.4 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11_4
# CUDA 11.5 with UM with async malloc [no run]
module -q load cuda/11.5
module list cuda/11.5 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11_5
# CUDA 11.6 with UM with async malloc [no run]
module -q load cuda/11.6
module list cuda/11.6 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11_6
# CUDA 11.7 with UM with async malloc [no run]
module -q load cuda/11.7
module list cuda/11.7 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11_7
# CUDA 11.8 with UM with async malloc [no run]
module -q load cuda/11.8
module list cuda/11.8 |& grep "None found"
module -q load xl
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11_8

View File

@ -45,9 +45,13 @@ atol="3e-15"
#save=`echo $(hostname) | sed 's/[0-9]\+$//'`
save="lassen"
##########
## CUDA ##
##########
######################
## DEFAULT CUDA ##
## (cuda/10.1.243) ##
######################
module -q load cuda
module -q load xl
# CUDA with UM in debug mode [ij, ams, struct, sstruct]
co="--with-cuda --enable-unified-memory --enable-persistent --enable-debug --with-gpu-arch=70 --with-memory-tracker --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
@ -62,7 +66,7 @@ ro="-error -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda-um-with-errors
#CUDA with UM and mixed-int
# CUDA with UM and mixed-int
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
@ -74,7 +78,7 @@ ro="-gpumemcheck -rt -mpibind -cudamemcheck -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda-um-shared
#CUDA with UM and single precision
# CUDA with UM and single precision
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
ro="-single -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
@ -140,34 +144,23 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
####################################
## latest CUDA build (only) tests ##
####################################
# CUDA 11
module -q load cuda/11
module list cuda/11 |& grep "None found"
mo="-j test"
# CUDA with UM with async malloc [no run]
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda11
# CUDA 12
module -q load cuda/12
module list cuda/12 |& grep "None found"
module -q load gcc/8.3.1
#######################
## latest CUDA tests ##
## cuda/12.0.0 ##
#######################
rtol="1e-2"
atol="1e-6"
mo="-j test"
# CUDA with UM in debug mode [ij, ams, struct, sstruct]
# CUDA 12.0 + GCC with UM in debug mode, thrust nosync [ij, ams, struct, sstruct]
module -q load gcc/8.3.1
module -q load cuda/12.0
module list cuda/12.0 |& grep "None found"
co="--with-cuda --enable-unified-memory --enable-thrust-nosync --enable-debug --with-gpu-arch=70 CC=mpicc CXX=mpicxx"
ro="-ij-gpu -ams -struct -sstruct -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda12
./renametest.sh basic $output_dir/basic-cuda12_0
# Echo to stderr all nonempty error files in $output_dir
for errfile in $( find $output_dir ! -size 0 -name "*.err" )

View File

@ -844,7 +844,7 @@ hypre_ParCSRMatrixExtractBlockDiagDevice( hypre_ParCSRMatrix *A,
#if defined(HYPRE_USING_CUBLAS)
HYPRE_CUBLAS_CALL(hypre_cublas_getriBatched(hypre_HandleCublasHandle(hypre_handle()),
blk_size,
tmpdiag_aop,
(const HYPRE_Real **) tmpdiag_aop,
blk_size,
pivots,
diag_aop,

View File

@ -12,6 +12,7 @@ extern "C" {
#endif
#if defined(HYPRE_USING_CUSPARSE)
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
cusparseSpMatDescr_t hypre_CSRMatrixToCusparseSpMat(const hypre_CSRMatrix *A, HYPRE_Int offset);
cusparseSpMatDescr_t hypre_CSRMatrixToCusparseSpMat_core( HYPRE_Int n, HYPRE_Int m,
@ -26,6 +27,7 @@ cusparseDnMatDescr_t hypre_VectorToCusparseDnMat_core(HYPRE_Complex *x_data, HYP
HYPRE_Int ncol, HYPRE_Int order);
cusparseDnMatDescr_t hypre_VectorToCusparseDnMat(const hypre_Vector *x);
#endif
HYPRE_Int hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int m, HYPRE_Int k, HYPRE_Int n,
cusparseMatDescr_t descr_A, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex *d_a,

View File

@ -1132,7 +1132,6 @@ hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix,
hypre_BoxGetSize(int_box, loop_size);
#if defined(HYPRE_USING_GPU)
if ( hypre_GetExecPolicy1(memory_location) == HYPRE_EXEC_DEVICE )
{
hypre_assert(ndim <= 3);
@ -1197,8 +1196,7 @@ hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix,
#undef DEVICE_VAR
#define DEVICE_VAR
}
else
#endif
#else
{
hypre_BoxLoop2Begin(ndim, loop_size,
box, start, stride, mi,
@ -1225,6 +1223,7 @@ hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix,
}
hypre_BoxLoop2End(mi, vi);
}
#endif
} /* end loop through boxman to entries */
hypre_TFree(boxman_to_entries, HYPRE_MEMORY_HOST);

View File

@ -44,15 +44,15 @@ Iters ||r||_2 conv.rate ||r||_2/||b||_2
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
1: hypre error in file "par_amg_solve.c", line 339, error code = 256
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence

View File

@ -42,13 +42,13 @@ Iters ||r||_2 conv.rate ||r||_2/||b||_2
2 5.536410e+01 1.000000 1.750767e+00
0: hypre error in file "par_amg_solve.c", line 339, error code = 256
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
1: hypre error in file "par_amg_solve.c", line 339, error code = 256
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence

View File

@ -112,6 +112,14 @@ using hypre_DeviceItem = void*;
#define CUB_IGNORE_DEPRECATED_CPP_DIALECT
#endif
#ifndef CUSPARSE_VERSION
#if defined(CUSPARSE_VER_MAJOR) && defined(CUSPARSE_VER_MINOR) && defined(CUSPARSE_VER_PATCH)
#define CUSPARSE_VERSION (CUSPARSE_VER_MAJOR * 1000 + CUSPARSE_VER_MINOR * 100 + CUSPARSE_VER_PATCH)
#else
#define CUSPARSE_VERSION CUDA_VERSION
#endif
#endif
#define CUSPARSE_NEWAPI_VERSION 11000
#define CUSPARSE_NEWSPMM_VERSION 11401
#define CUDA_MALLOCASYNC_VERSION 11020
@ -531,6 +539,7 @@ using hypre_DeviceItem = sycl::nd_item<3>;
hypre_assert(0); exit(1); \
} } while(0)
#if CUSPARSE_VERSION >= 10300
#define HYPRE_CUSPARSE_CALL(call) do { \
cusparseStatus_t err = call; \
if (CUSPARSE_STATUS_SUCCESS != err) { \
@ -538,6 +547,15 @@ using hypre_DeviceItem = sycl::nd_item<3>;
err, cusparseGetErrorString(err), __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
#else
#define HYPRE_CUSPARSE_CALL(call) do { \
cusparseStatus_t err = call; \
if (CUSPARSE_STATUS_SUCCESS != err) { \
printf("CUSPARSE ERROR (code = %d) at %s:%d\n", \
err, __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
#endif
#define HYPRE_ROCSPARSE_CALL(call) do { \
rocsparse_status err = call; \
@ -1901,7 +1919,9 @@ void hypre_DeviceDataCubCachingAllocatorDestroy(hypre_DeviceData *data);
cudaDataType hypre_HYPREComplexToCudaDataType();
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
cusparseIndexType_t hypre_HYPREIntToCusparseIndexType();
#endif
#endif // #if defined(HYPRE_USING_CUSPARSE)

View File

@ -2719,6 +2719,7 @@ hypre_HYPREComplexToCudaDataType()
#endif // #if defined(HYPRE_COMPLEX)
}
#if CUSPARSE_VERSION >= 10300
/*--------------------------------------------------------------------
* hypre_HYPREIntToCusparseIndexType
*
@ -2744,6 +2745,8 @@ hypre_HYPREIntToCusparseIndexType()
return CUSPARSE_INDEX_32I;
#endif
}
#endif
#endif // #if defined(HYPRE_USING_CUSPARSE)
#if defined(HYPRE_USING_CUBLAS)

View File

@ -60,6 +60,14 @@ using hypre_DeviceItem = void*;
#define CUB_IGNORE_DEPRECATED_CPP_DIALECT
#endif
#ifndef CUSPARSE_VERSION
#if defined(CUSPARSE_VER_MAJOR) && defined(CUSPARSE_VER_MINOR) && defined(CUSPARSE_VER_PATCH)
#define CUSPARSE_VERSION (CUSPARSE_VER_MAJOR * 1000 + CUSPARSE_VER_MINOR * 100 + CUSPARSE_VER_PATCH)
#else
#define CUSPARSE_VERSION CUDA_VERSION
#endif
#endif
#define CUSPARSE_NEWAPI_VERSION 11000
#define CUSPARSE_NEWSPMM_VERSION 11401
#define CUDA_MALLOCASYNC_VERSION 11020
@ -479,6 +487,7 @@ using hypre_DeviceItem = sycl::nd_item<3>;
hypre_assert(0); exit(1); \
} } while(0)
#if CUSPARSE_VERSION >= 10300
#define HYPRE_CUSPARSE_CALL(call) do { \
cusparseStatus_t err = call; \
if (CUSPARSE_STATUS_SUCCESS != err) { \
@ -486,6 +495,15 @@ using hypre_DeviceItem = sycl::nd_item<3>;
err, cusparseGetErrorString(err), __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
#else
#define HYPRE_CUSPARSE_CALL(call) do { \
cusparseStatus_t err = call; \
if (CUSPARSE_STATUS_SUCCESS != err) { \
printf("CUSPARSE ERROR (code = %d) at %s:%d\n", \
err, __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
#endif
#define HYPRE_ROCSPARSE_CALL(call) do { \
rocsparse_status err = call; \
@ -1849,7 +1867,9 @@ void hypre_DeviceDataCubCachingAllocatorDestroy(hypre_DeviceData *data);
cudaDataType hypre_HYPREComplexToCudaDataType();
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
cusparseIndexType_t hypre_HYPREIntToCusparseIndexType();
#endif
#endif // #if defined(HYPRE_USING_CUSPARSE)