Cuda versions (#879)
This PR adds support and regression tests for all the versions from CUDA 9.0 to 12.0.
This commit is contained in:
parent
7d1d9ca95c
commit
72f5f3e136
148
AUTOTEST/machine-lassen-cuda.sh
Executable file
148
AUTOTEST/machine-lassen-cuda.sh
Executable file
@ -0,0 +1,148 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
|
||||||
|
# HYPRE Project Developers. See the top-level COPYRIGHT file for details.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
|
testname=`basename $0 .sh`
|
||||||
|
|
||||||
|
# Echo usage information
|
||||||
|
case $1 in
|
||||||
|
-h|-help)
|
||||||
|
cat <<EOF
|
||||||
|
|
||||||
|
**** Only run this script on the lassen cluster ****
|
||||||
|
|
||||||
|
$0 [-h|-help] {src_dir}
|
||||||
|
|
||||||
|
where: -h|-help prints this usage information and exits
|
||||||
|
{src_dir} is the hypre source directory
|
||||||
|
|
||||||
|
This script runs a number of tests suitable for the lassen cluster.
|
||||||
|
|
||||||
|
Example usage: $0 ../src
|
||||||
|
|
||||||
|
EOF
|
||||||
|
exit
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Setup
|
||||||
|
test_dir=`pwd`
|
||||||
|
output_dir=`pwd`/$testname.dir
|
||||||
|
rm -fr $output_dir
|
||||||
|
mkdir -p $output_dir
|
||||||
|
src_dir=`cd $1; pwd`
|
||||||
|
shift
|
||||||
|
|
||||||
|
# Basic build and run tests
|
||||||
|
mo="-j test"
|
||||||
|
|
||||||
|
############################################
|
||||||
|
## Various CUDA verion build (only) tests ##
|
||||||
|
############################################
|
||||||
|
|
||||||
|
# CUDA 9.0 with UM [no run]
|
||||||
|
module -q load cuda/9.0
|
||||||
|
module list cuda/9.0 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda9_0
|
||||||
|
|
||||||
|
# CUDA 9.1 with UM [no run]
|
||||||
|
module -q load cuda/9.1
|
||||||
|
module list cuda/9.1 |& grep "None found"
|
||||||
|
module -q load gcc
|
||||||
|
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\' CC=mpicc CXX=mpicxx"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda9_1
|
||||||
|
|
||||||
|
# CUDA 9.2 with UM [no run]
|
||||||
|
module -q load cuda/9.2
|
||||||
|
module list cuda/9.2 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda9_2
|
||||||
|
|
||||||
|
# CUDA 10.2 with UM [no run]
|
||||||
|
module -q load cuda/10.2
|
||||||
|
module list cuda/10.2 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda10_2
|
||||||
|
|
||||||
|
# CUDA 11.0 with UM [no run]
|
||||||
|
module -q load cuda/11.0
|
||||||
|
module list cuda/11.0 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11_0
|
||||||
|
|
||||||
|
# CUDA 11.1 with UM [no run]
|
||||||
|
module -q load cuda/11.1
|
||||||
|
module list cuda/11.1 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11_1
|
||||||
|
|
||||||
|
# CUDA 11.2 with UM with async malloc [no run]
|
||||||
|
module -q load cuda/11.2
|
||||||
|
module list cuda/11.2 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11_2
|
||||||
|
|
||||||
|
# CUDA 11.3 with UM with async malloc [no run]
|
||||||
|
module -q load cuda/11.3
|
||||||
|
module list cuda/11.3 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11_3
|
||||||
|
|
||||||
|
# CUDA 11.4 with UM with async malloc [no run]
|
||||||
|
module -q load cuda/11.4
|
||||||
|
module list cuda/11.4 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11_4
|
||||||
|
|
||||||
|
# CUDA 11.5 with UM with async malloc [no run]
|
||||||
|
module -q load cuda/11.5
|
||||||
|
module list cuda/11.5 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11_5
|
||||||
|
|
||||||
|
# CUDA 11.6 with UM with async malloc [no run]
|
||||||
|
module -q load cuda/11.6
|
||||||
|
module list cuda/11.6 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11_6
|
||||||
|
|
||||||
|
# CUDA 11.7 with UM with async malloc [no run]
|
||||||
|
module -q load cuda/11.7
|
||||||
|
module list cuda/11.7 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11_7
|
||||||
|
|
||||||
|
# CUDA 11.8 with UM with async malloc [no run]
|
||||||
|
module -q load cuda/11.8
|
||||||
|
module list cuda/11.8 |& grep "None found"
|
||||||
|
module -q load xl
|
||||||
|
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||||
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||||
|
./renametest.sh basic $output_dir/basic-cuda11_8
|
||||||
|
|
||||||
@ -45,9 +45,13 @@ atol="3e-15"
|
|||||||
#save=`echo $(hostname) | sed 's/[0-9]\+$//'`
|
#save=`echo $(hostname) | sed 's/[0-9]\+$//'`
|
||||||
save="lassen"
|
save="lassen"
|
||||||
|
|
||||||
##########
|
######################
|
||||||
## CUDA ##
|
## DEFAULT CUDA ##
|
||||||
##########
|
## (cuda/10.1.243) ##
|
||||||
|
######################
|
||||||
|
|
||||||
|
module -q load cuda
|
||||||
|
module -q load xl
|
||||||
|
|
||||||
# CUDA with UM in debug mode [ij, ams, struct, sstruct]
|
# CUDA with UM in debug mode [ij, ams, struct, sstruct]
|
||||||
co="--with-cuda --enable-unified-memory --enable-persistent --enable-debug --with-gpu-arch=70 --with-memory-tracker --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
co="--with-cuda --enable-unified-memory --enable-persistent --enable-debug --with-gpu-arch=70 --with-memory-tracker --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||||
@ -62,7 +66,7 @@ ro="-error -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
|
|||||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||||
./renametest.sh basic $output_dir/basic-cuda-um-with-errors
|
./renametest.sh basic $output_dir/basic-cuda-um-with-errors
|
||||||
|
|
||||||
#CUDA with UM and mixed-int
|
# CUDA with UM and mixed-int
|
||||||
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||||
ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
|
ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
|
||||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||||
@ -74,7 +78,7 @@ ro="-gpumemcheck -rt -mpibind -cudamemcheck -save ${save}"
|
|||||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||||
./renametest.sh basic $output_dir/basic-cuda-um-shared
|
./renametest.sh basic $output_dir/basic-cuda-um-shared
|
||||||
|
|
||||||
#CUDA with UM and single precision
|
# CUDA with UM and single precision
|
||||||
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||||
ro="-single -rt -mpibind -save ${save}"
|
ro="-single -rt -mpibind -save ${save}"
|
||||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
|
||||||
@ -140,34 +144,23 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
|
|||||||
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
||||||
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
|
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
|
||||||
|
|
||||||
####################################
|
#######################
|
||||||
## latest CUDA build (only) tests ##
|
## latest CUDA tests ##
|
||||||
####################################
|
## cuda/12.0.0 ##
|
||||||
|
#######################
|
||||||
# CUDA 11
|
|
||||||
module -q load cuda/11
|
|
||||||
module list cuda/11 |& grep "None found"
|
|
||||||
|
|
||||||
mo="-j test"
|
|
||||||
|
|
||||||
# CUDA with UM with async malloc [no run]
|
|
||||||
co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
|
||||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
|
||||||
./renametest.sh basic $output_dir/basic-cuda11
|
|
||||||
|
|
||||||
# CUDA 12
|
|
||||||
module -q load cuda/12
|
|
||||||
module list cuda/12 |& grep "None found"
|
|
||||||
module -q load gcc/8.3.1
|
|
||||||
|
|
||||||
rtol="1e-2"
|
rtol="1e-2"
|
||||||
atol="1e-6"
|
atol="1e-6"
|
||||||
|
mo="-j test"
|
||||||
|
|
||||||
# CUDA with UM in debug mode [ij, ams, struct, sstruct]
|
# CUDA 12.0 + GCC with UM in debug mode, thrust nosync [ij, ams, struct, sstruct]
|
||||||
|
module -q load gcc/8.3.1
|
||||||
|
module -q load cuda/12.0
|
||||||
|
module list cuda/12.0 |& grep "None found"
|
||||||
co="--with-cuda --enable-unified-memory --enable-thrust-nosync --enable-debug --with-gpu-arch=70 CC=mpicc CXX=mpicxx"
|
co="--with-cuda --enable-unified-memory --enable-thrust-nosync --enable-debug --with-gpu-arch=70 CC=mpicc CXX=mpicxx"
|
||||||
ro="-ij-gpu -ams -struct -sstruct -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
|
ro="-ij-gpu -ams -struct -sstruct -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
|
||||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||||
./renametest.sh basic $output_dir/basic-cuda12
|
./renametest.sh basic $output_dir/basic-cuda12_0
|
||||||
|
|
||||||
# Echo to stderr all nonempty error files in $output_dir
|
# Echo to stderr all nonempty error files in $output_dir
|
||||||
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
|
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
|
||||||
|
|||||||
@ -844,7 +844,7 @@ hypre_ParCSRMatrixExtractBlockDiagDevice( hypre_ParCSRMatrix *A,
|
|||||||
#if defined(HYPRE_USING_CUBLAS)
|
#if defined(HYPRE_USING_CUBLAS)
|
||||||
HYPRE_CUBLAS_CALL(hypre_cublas_getriBatched(hypre_HandleCublasHandle(hypre_handle()),
|
HYPRE_CUBLAS_CALL(hypre_cublas_getriBatched(hypre_HandleCublasHandle(hypre_handle()),
|
||||||
blk_size,
|
blk_size,
|
||||||
tmpdiag_aop,
|
(const HYPRE_Real **) tmpdiag_aop,
|
||||||
blk_size,
|
blk_size,
|
||||||
pivots,
|
pivots,
|
||||||
diag_aop,
|
diag_aop,
|
||||||
|
|||||||
@ -12,6 +12,7 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_USING_CUSPARSE)
|
#if defined(HYPRE_USING_CUSPARSE)
|
||||||
|
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
|
||||||
cusparseSpMatDescr_t hypre_CSRMatrixToCusparseSpMat(const hypre_CSRMatrix *A, HYPRE_Int offset);
|
cusparseSpMatDescr_t hypre_CSRMatrixToCusparseSpMat(const hypre_CSRMatrix *A, HYPRE_Int offset);
|
||||||
|
|
||||||
cusparseSpMatDescr_t hypre_CSRMatrixToCusparseSpMat_core( HYPRE_Int n, HYPRE_Int m,
|
cusparseSpMatDescr_t hypre_CSRMatrixToCusparseSpMat_core( HYPRE_Int n, HYPRE_Int m,
|
||||||
@ -26,6 +27,7 @@ cusparseDnMatDescr_t hypre_VectorToCusparseDnMat_core(HYPRE_Complex *x_data, HYP
|
|||||||
HYPRE_Int ncol, HYPRE_Int order);
|
HYPRE_Int ncol, HYPRE_Int order);
|
||||||
|
|
||||||
cusparseDnMatDescr_t hypre_VectorToCusparseDnMat(const hypre_Vector *x);
|
cusparseDnMatDescr_t hypre_VectorToCusparseDnMat(const hypre_Vector *x);
|
||||||
|
#endif
|
||||||
|
|
||||||
HYPRE_Int hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int m, HYPRE_Int k, HYPRE_Int n,
|
HYPRE_Int hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int m, HYPRE_Int k, HYPRE_Int n,
|
||||||
cusparseMatDescr_t descr_A, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex *d_a,
|
cusparseMatDescr_t descr_A, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex *d_a,
|
||||||
|
|||||||
@ -1132,7 +1132,6 @@ hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix,
|
|||||||
hypre_BoxGetSize(int_box, loop_size);
|
hypre_BoxGetSize(int_box, loop_size);
|
||||||
|
|
||||||
#if defined(HYPRE_USING_GPU)
|
#if defined(HYPRE_USING_GPU)
|
||||||
if ( hypre_GetExecPolicy1(memory_location) == HYPRE_EXEC_DEVICE )
|
|
||||||
{
|
{
|
||||||
hypre_assert(ndim <= 3);
|
hypre_assert(ndim <= 3);
|
||||||
|
|
||||||
@ -1197,8 +1196,7 @@ hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix,
|
|||||||
#undef DEVICE_VAR
|
#undef DEVICE_VAR
|
||||||
#define DEVICE_VAR
|
#define DEVICE_VAR
|
||||||
}
|
}
|
||||||
else
|
#else
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
hypre_BoxLoop2Begin(ndim, loop_size,
|
hypre_BoxLoop2Begin(ndim, loop_size,
|
||||||
box, start, stride, mi,
|
box, start, stride, mi,
|
||||||
@ -1225,6 +1223,7 @@ hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix,
|
|||||||
}
|
}
|
||||||
hypre_BoxLoop2End(mi, vi);
|
hypre_BoxLoop2End(mi, vi);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
} /* end loop through boxman to entries */
|
} /* end loop through boxman to entries */
|
||||||
|
|
||||||
hypre_TFree(boxman_to_entries, HYPRE_MEMORY_HOST);
|
hypre_TFree(boxman_to_entries, HYPRE_MEMORY_HOST);
|
||||||
|
|||||||
@ -44,15 +44,15 @@ Iters ||r||_2 conv.rate ||r||_2/||b||_2
|
|||||||
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
||||||
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
||||||
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
||||||
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
1: hypre error in file "par_amg_solve.c", line 339, error code = 256
|
1: hypre error in file "par_amg_solve.c", line 339, error code = 256
|
||||||
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
||||||
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
||||||
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
||||||
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
|
|||||||
@ -42,13 +42,13 @@ Iters ||r||_2 conv.rate ||r||_2/||b||_2
|
|||||||
2 5.536410e+01 1.000000 1.750767e+00
|
2 5.536410e+01 1.000000 1.750767e+00
|
||||||
0: hypre error in file "par_amg_solve.c", line 339, error code = 256
|
0: hypre error in file "par_amg_solve.c", line 339, error code = 256
|
||||||
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
||||||
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
1: hypre error in file "par_amg_solve.c", line 339, error code = 256
|
1: hypre error in file "par_amg_solve.c", line 339, error code = 256
|
||||||
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG
|
||||||
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence
|
1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence
|
||||||
|
|||||||
@ -112,6 +112,14 @@ using hypre_DeviceItem = void*;
|
|||||||
#define CUB_IGNORE_DEPRECATED_CPP_DIALECT
|
#define CUB_IGNORE_DEPRECATED_CPP_DIALECT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef CUSPARSE_VERSION
|
||||||
|
#if defined(CUSPARSE_VER_MAJOR) && defined(CUSPARSE_VER_MINOR) && defined(CUSPARSE_VER_PATCH)
|
||||||
|
#define CUSPARSE_VERSION (CUSPARSE_VER_MAJOR * 1000 + CUSPARSE_VER_MINOR * 100 + CUSPARSE_VER_PATCH)
|
||||||
|
#else
|
||||||
|
#define CUSPARSE_VERSION CUDA_VERSION
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#define CUSPARSE_NEWAPI_VERSION 11000
|
#define CUSPARSE_NEWAPI_VERSION 11000
|
||||||
#define CUSPARSE_NEWSPMM_VERSION 11401
|
#define CUSPARSE_NEWSPMM_VERSION 11401
|
||||||
#define CUDA_MALLOCASYNC_VERSION 11020
|
#define CUDA_MALLOCASYNC_VERSION 11020
|
||||||
@ -531,6 +539,7 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
|||||||
hypre_assert(0); exit(1); \
|
hypre_assert(0); exit(1); \
|
||||||
} } while(0)
|
} } while(0)
|
||||||
|
|
||||||
|
#if CUSPARSE_VERSION >= 10300
|
||||||
#define HYPRE_CUSPARSE_CALL(call) do { \
|
#define HYPRE_CUSPARSE_CALL(call) do { \
|
||||||
cusparseStatus_t err = call; \
|
cusparseStatus_t err = call; \
|
||||||
if (CUSPARSE_STATUS_SUCCESS != err) { \
|
if (CUSPARSE_STATUS_SUCCESS != err) { \
|
||||||
@ -538,6 +547,15 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
|||||||
err, cusparseGetErrorString(err), __FILE__, __LINE__); \
|
err, cusparseGetErrorString(err), __FILE__, __LINE__); \
|
||||||
hypre_assert(0); exit(1); \
|
hypre_assert(0); exit(1); \
|
||||||
} } while(0)
|
} } while(0)
|
||||||
|
#else
|
||||||
|
#define HYPRE_CUSPARSE_CALL(call) do { \
|
||||||
|
cusparseStatus_t err = call; \
|
||||||
|
if (CUSPARSE_STATUS_SUCCESS != err) { \
|
||||||
|
printf("CUSPARSE ERROR (code = %d) at %s:%d\n", \
|
||||||
|
err, __FILE__, __LINE__); \
|
||||||
|
hypre_assert(0); exit(1); \
|
||||||
|
} } while(0)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define HYPRE_ROCSPARSE_CALL(call) do { \
|
#define HYPRE_ROCSPARSE_CALL(call) do { \
|
||||||
rocsparse_status err = call; \
|
rocsparse_status err = call; \
|
||||||
@ -1901,7 +1919,9 @@ void hypre_DeviceDataCubCachingAllocatorDestroy(hypre_DeviceData *data);
|
|||||||
|
|
||||||
cudaDataType hypre_HYPREComplexToCudaDataType();
|
cudaDataType hypre_HYPREComplexToCudaDataType();
|
||||||
|
|
||||||
|
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
|
||||||
cusparseIndexType_t hypre_HYPREIntToCusparseIndexType();
|
cusparseIndexType_t hypre_HYPREIntToCusparseIndexType();
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // #if defined(HYPRE_USING_CUSPARSE)
|
#endif // #if defined(HYPRE_USING_CUSPARSE)
|
||||||
|
|
||||||
|
|||||||
@ -2719,6 +2719,7 @@ hypre_HYPREComplexToCudaDataType()
|
|||||||
#endif // #if defined(HYPRE_COMPLEX)
|
#endif // #if defined(HYPRE_COMPLEX)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CUSPARSE_VERSION >= 10300
|
||||||
/*--------------------------------------------------------------------
|
/*--------------------------------------------------------------------
|
||||||
* hypre_HYPREIntToCusparseIndexType
|
* hypre_HYPREIntToCusparseIndexType
|
||||||
*
|
*
|
||||||
@ -2744,6 +2745,8 @@ hypre_HYPREIntToCusparseIndexType()
|
|||||||
return CUSPARSE_INDEX_32I;
|
return CUSPARSE_INDEX_32I;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // #if defined(HYPRE_USING_CUSPARSE)
|
#endif // #if defined(HYPRE_USING_CUSPARSE)
|
||||||
|
|
||||||
#if defined(HYPRE_USING_CUBLAS)
|
#if defined(HYPRE_USING_CUBLAS)
|
||||||
|
|||||||
@ -60,6 +60,14 @@ using hypre_DeviceItem = void*;
|
|||||||
#define CUB_IGNORE_DEPRECATED_CPP_DIALECT
|
#define CUB_IGNORE_DEPRECATED_CPP_DIALECT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef CUSPARSE_VERSION
|
||||||
|
#if defined(CUSPARSE_VER_MAJOR) && defined(CUSPARSE_VER_MINOR) && defined(CUSPARSE_VER_PATCH)
|
||||||
|
#define CUSPARSE_VERSION (CUSPARSE_VER_MAJOR * 1000 + CUSPARSE_VER_MINOR * 100 + CUSPARSE_VER_PATCH)
|
||||||
|
#else
|
||||||
|
#define CUSPARSE_VERSION CUDA_VERSION
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#define CUSPARSE_NEWAPI_VERSION 11000
|
#define CUSPARSE_NEWAPI_VERSION 11000
|
||||||
#define CUSPARSE_NEWSPMM_VERSION 11401
|
#define CUSPARSE_NEWSPMM_VERSION 11401
|
||||||
#define CUDA_MALLOCASYNC_VERSION 11020
|
#define CUDA_MALLOCASYNC_VERSION 11020
|
||||||
@ -479,6 +487,7 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
|||||||
hypre_assert(0); exit(1); \
|
hypre_assert(0); exit(1); \
|
||||||
} } while(0)
|
} } while(0)
|
||||||
|
|
||||||
|
#if CUSPARSE_VERSION >= 10300
|
||||||
#define HYPRE_CUSPARSE_CALL(call) do { \
|
#define HYPRE_CUSPARSE_CALL(call) do { \
|
||||||
cusparseStatus_t err = call; \
|
cusparseStatus_t err = call; \
|
||||||
if (CUSPARSE_STATUS_SUCCESS != err) { \
|
if (CUSPARSE_STATUS_SUCCESS != err) { \
|
||||||
@ -486,6 +495,15 @@ using hypre_DeviceItem = sycl::nd_item<3>;
|
|||||||
err, cusparseGetErrorString(err), __FILE__, __LINE__); \
|
err, cusparseGetErrorString(err), __FILE__, __LINE__); \
|
||||||
hypre_assert(0); exit(1); \
|
hypre_assert(0); exit(1); \
|
||||||
} } while(0)
|
} } while(0)
|
||||||
|
#else
|
||||||
|
#define HYPRE_CUSPARSE_CALL(call) do { \
|
||||||
|
cusparseStatus_t err = call; \
|
||||||
|
if (CUSPARSE_STATUS_SUCCESS != err) { \
|
||||||
|
printf("CUSPARSE ERROR (code = %d) at %s:%d\n", \
|
||||||
|
err, __FILE__, __LINE__); \
|
||||||
|
hypre_assert(0); exit(1); \
|
||||||
|
} } while(0)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define HYPRE_ROCSPARSE_CALL(call) do { \
|
#define HYPRE_ROCSPARSE_CALL(call) do { \
|
||||||
rocsparse_status err = call; \
|
rocsparse_status err = call; \
|
||||||
@ -1849,7 +1867,9 @@ void hypre_DeviceDataCubCachingAllocatorDestroy(hypre_DeviceData *data);
|
|||||||
|
|
||||||
cudaDataType hypre_HYPREComplexToCudaDataType();
|
cudaDataType hypre_HYPREComplexToCudaDataType();
|
||||||
|
|
||||||
|
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
|
||||||
cusparseIndexType_t hypre_HYPREIntToCusparseIndexType();
|
cusparseIndexType_t hypre_HYPREIntToCusparseIndexType();
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // #if defined(HYPRE_USING_CUSPARSE)
|
#endif // #if defined(HYPRE_USING_CUSPARSE)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user