Cuda versions (#879)
This PR adds support and regression tests for all the versions from CUDA 9.0 to 12.0.
This commit is contained in:
		
							parent
							
								
									7d1d9ca95c
								
							
						
					
					
						commit
						72f5f3e136
					
				
							
								
								
									
										148
									
								
								AUTOTEST/machine-lassen-cuda.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										148
									
								
								AUTOTEST/machine-lassen-cuda.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,148 @@ | ||||
| #!/bin/sh | ||||
| # Copyright (c) 1998 Lawrence Livermore National Security, LLC and other | ||||
| # HYPRE Project Developers. See the top-level COPYRIGHT file for details. | ||||
| # | ||||
| # SPDX-License-Identifier: (Apache-2.0 OR MIT) | ||||
| 
 | ||||
| testname=`basename $0 .sh` | ||||
| 
 | ||||
| # Echo usage information | ||||
| case $1 in | ||||
|    -h|-help) | ||||
|       cat <<EOF | ||||
| 
 | ||||
|    **** Only run this script on the lassen cluster **** | ||||
| 
 | ||||
|    $0 [-h|-help] {src_dir} | ||||
| 
 | ||||
|    where: -h|-help   prints this usage information and exits | ||||
|           {src_dir}  is the hypre source directory | ||||
| 
 | ||||
|    This script runs a number of tests suitable for the lassen cluster. | ||||
| 
 | ||||
|    Example usage: $0 ../src | ||||
| 
 | ||||
| EOF | ||||
|       exit | ||||
|       ;; | ||||
| esac | ||||
| 
 | ||||
| # Setup | ||||
| test_dir=`pwd` | ||||
| output_dir=`pwd`/$testname.dir | ||||
| rm -fr $output_dir | ||||
| mkdir -p $output_dir | ||||
| src_dir=`cd $1; pwd` | ||||
| shift | ||||
| 
 | ||||
| # Basic build and run tests | ||||
| mo="-j test" | ||||
| 
 | ||||
| ############################################ | ||||
| ## Various CUDA verion build (only) tests ## | ||||
| ############################################ | ||||
| 
 | ||||
| # CUDA 9.0 with UM [no run] | ||||
| module -q load cuda/9.0 | ||||
| module list cuda/9.0 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda9_0 | ||||
| 
 | ||||
| # CUDA 9.1 with UM [no run] | ||||
| module -q load cuda/9.1 | ||||
| module list cuda/9.1 |& grep "None found" | ||||
| module -q load gcc | ||||
| co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\' CC=mpicc CXX=mpicxx" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda9_1 | ||||
| 
 | ||||
| # CUDA 9.2 with UM [no run] | ||||
| module -q load cuda/9.2 | ||||
| module list cuda/9.2 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda9_2 | ||||
| 
 | ||||
| # CUDA 10.2 with UM [no run] | ||||
| module -q load cuda/10.2 | ||||
| module list cuda/10.2 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda10_2 | ||||
| 
 | ||||
| # CUDA 11.0 with UM [no run] | ||||
| module -q load cuda/11.0 | ||||
| module list cuda/11.0 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11_0 | ||||
| 
 | ||||
| # CUDA 11.1 with UM [no run] | ||||
| module -q load cuda/11.1 | ||||
| module list cuda/11.1 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11_1 | ||||
| 
 | ||||
| # CUDA 11.2 with UM with async malloc [no run] | ||||
| module -q load cuda/11.2 | ||||
| module list cuda/11.2 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11_2 | ||||
| 
 | ||||
| # CUDA 11.3 with UM with async malloc [no run] | ||||
| module -q load cuda/11.3 | ||||
| module list cuda/11.3 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11_3 | ||||
| 
 | ||||
| # CUDA 11.4 with UM with async malloc [no run] | ||||
| module -q load cuda/11.4 | ||||
| module list cuda/11.4 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11_4 | ||||
| 
 | ||||
| # CUDA 11.5 with UM with async malloc [no run] | ||||
| module -q load cuda/11.5 | ||||
| module list cuda/11.5 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11_5 | ||||
| 
 | ||||
| # CUDA 11.6 with UM with async malloc [no run] | ||||
| module -q load cuda/11.6 | ||||
| module list cuda/11.6 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11_6 | ||||
| 
 | ||||
| # CUDA 11.7 with UM with async malloc [no run] | ||||
| module -q load cuda/11.7 | ||||
| module list cuda/11.7 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11_7 | ||||
| 
 | ||||
| # CUDA 11.8 with UM with async malloc [no run] | ||||
| module -q load cuda/11.8 | ||||
| module list cuda/11.8 |& grep "None found" | ||||
| module -q load xl | ||||
| co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11_8 | ||||
| 
 | ||||
| @ -45,9 +45,13 @@ atol="3e-15" | ||||
| #save=`echo $(hostname) | sed 's/[0-9]\+$//'` | ||||
| save="lassen" | ||||
| 
 | ||||
| ########## | ||||
| ## CUDA ## | ||||
| ########## | ||||
| ###################### | ||||
| ##   DEFAULT CUDA   ## | ||||
| ##  (cuda/10.1.243) ## | ||||
| ###################### | ||||
| 
 | ||||
| module -q load cuda | ||||
| module -q load xl | ||||
| 
 | ||||
| # CUDA with UM in debug mode [ij, ams, struct, sstruct] | ||||
| co="--with-cuda --enable-unified-memory --enable-persistent --enable-debug --with-gpu-arch=70 --with-memory-tracker --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'" | ||||
| @ -62,7 +66,7 @@ ro="-error -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro | ||||
| ./renametest.sh basic $output_dir/basic-cuda-um-with-errors | ||||
| 
 | ||||
| #CUDA with UM and mixed-int | ||||
| # CUDA with UM and mixed-int | ||||
| co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'" | ||||
| ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro | ||||
| @ -74,7 +78,7 @@ ro="-gpumemcheck -rt -mpibind -cudamemcheck -save ${save}" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro | ||||
| ./renametest.sh basic $output_dir/basic-cuda-um-shared | ||||
| 
 | ||||
| #CUDA with UM and single precision | ||||
| # CUDA with UM and single precision | ||||
| co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'" | ||||
| ro="-single -rt -mpibind -save ${save}" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro} | ||||
| @ -140,34 +144,23 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_ | ||||
| ./test.sh cmake.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct | ||||
| 
 | ||||
| #################################### | ||||
| ## latest CUDA build (only) tests ## | ||||
| #################################### | ||||
| 
 | ||||
| # CUDA 11 | ||||
| module -q load cuda/11 | ||||
| module list cuda/11 |& grep "None found" | ||||
| 
 | ||||
| mo="-j test" | ||||
| 
 | ||||
| # CUDA with UM with async malloc [no run] | ||||
| co="--with-cuda --enable-unified-memory --enable-device-malloc-async --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
| ./renametest.sh basic $output_dir/basic-cuda11 | ||||
| 
 | ||||
| # CUDA 12 | ||||
| module -q load cuda/12 | ||||
| module list cuda/12 |& grep "None found" | ||||
| module -q load gcc/8.3.1 | ||||
| ####################### | ||||
| ## latest CUDA tests ## | ||||
| ##    cuda/12.0.0    ## | ||||
| ####################### | ||||
| 
 | ||||
| rtol="1e-2" | ||||
| atol="1e-6" | ||||
| mo="-j test" | ||||
| 
 | ||||
| # CUDA with UM in debug mode [ij, ams, struct, sstruct] | ||||
| # CUDA 12.0 + GCC with UM in debug mode, thrust nosync [ij, ams, struct, sstruct] | ||||
| module -q load gcc/8.3.1 | ||||
| module -q load cuda/12.0 | ||||
| module list cuda/12.0 |& grep "None found" | ||||
| co="--with-cuda --enable-unified-memory --enable-thrust-nosync --enable-debug --with-gpu-arch=70 CC=mpicc CXX=mpicxx" | ||||
| ro="-ij-gpu -ams -struct -sstruct -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro | ||||
| ./renametest.sh basic $output_dir/basic-cuda12 | ||||
| ./renametest.sh basic $output_dir/basic-cuda12_0 | ||||
| 
 | ||||
| # Echo to stderr all nonempty error files in $output_dir | ||||
| for errfile in $( find $output_dir ! -size 0 -name "*.err" ) | ||||
|  | ||||
| @ -844,7 +844,7 @@ hypre_ParCSRMatrixExtractBlockDiagDevice( hypre_ParCSRMatrix   *A, | ||||
| #if defined(HYPRE_USING_CUBLAS) | ||||
|       HYPRE_CUBLAS_CALL(hypre_cublas_getriBatched(hypre_HandleCublasHandle(hypre_handle()), | ||||
|                                                   blk_size, | ||||
|                                                   tmpdiag_aop, | ||||
|                                                   (const HYPRE_Real **) tmpdiag_aop, | ||||
|                                                   blk_size, | ||||
|                                                   pivots, | ||||
|                                                   diag_aop, | ||||
|  | ||||
| @ -12,6 +12,7 @@ extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| #if defined(HYPRE_USING_CUSPARSE) | ||||
| #if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION | ||||
| cusparseSpMatDescr_t hypre_CSRMatrixToCusparseSpMat(const hypre_CSRMatrix *A, HYPRE_Int offset); | ||||
| 
 | ||||
| cusparseSpMatDescr_t hypre_CSRMatrixToCusparseSpMat_core( HYPRE_Int n, HYPRE_Int m, | ||||
| @ -26,6 +27,7 @@ cusparseDnMatDescr_t hypre_VectorToCusparseDnMat_core(HYPRE_Complex *x_data, HYP | ||||
|                                                       HYPRE_Int ncol, HYPRE_Int order); | ||||
| 
 | ||||
| cusparseDnMatDescr_t hypre_VectorToCusparseDnMat(const hypre_Vector *x); | ||||
| #endif | ||||
| 
 | ||||
| HYPRE_Int hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int m, HYPRE_Int k, HYPRE_Int n, | ||||
|                                               cusparseMatDescr_t descr_A, HYPRE_Int nnzA, HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex *d_a, | ||||
|  | ||||
| @ -1132,7 +1132,6 @@ hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix, | ||||
|                hypre_BoxGetSize(int_box, loop_size); | ||||
| 
 | ||||
| #if defined(HYPRE_USING_GPU) | ||||
|                if ( hypre_GetExecPolicy1(memory_location) == HYPRE_EXEC_DEVICE ) | ||||
|                { | ||||
|                   hypre_assert(ndim <= 3); | ||||
| 
 | ||||
| @ -1197,8 +1196,7 @@ hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix, | ||||
| #undef DEVICE_VAR | ||||
| #define DEVICE_VAR | ||||
|                } | ||||
|                else | ||||
| #endif | ||||
| #else | ||||
|                { | ||||
|                   hypre_BoxLoop2Begin(ndim, loop_size, | ||||
|                                       box,       start, stride, mi, | ||||
| @ -1225,6 +1223,7 @@ hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix, | ||||
|                   } | ||||
|                   hypre_BoxLoop2End(mi, vi); | ||||
|                } | ||||
| #endif | ||||
|             } /* end loop through boxman to entries */ | ||||
| 
 | ||||
|             hypre_TFree(boxman_to_entries, HYPRE_MEMORY_HOST); | ||||
|  | ||||
| @ -44,15 +44,15 @@ Iters       ||r||_2     conv.rate  ||r||_2/||b||_2 | ||||
| 0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG | ||||
| 0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG | ||||
| 0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG | ||||
| 0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 1: hypre error in file "par_amg_solve.c", line 339, error code = 256 | ||||
| 1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG | ||||
| 1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG | ||||
| 1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG | ||||
| 1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
|  | ||||
| @ -42,13 +42,13 @@ Iters       ||r||_2     conv.rate  ||r||_2/||b||_2 | ||||
|     2    5.536410e+01    1.000000    1.750767e+00 | ||||
| 0: hypre error in file "par_amg_solve.c", line 339, error code = 256 | ||||
| 0: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG | ||||
| 0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 0: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 1: hypre error in file "par_amg_solve.c", line 339, error code = 256 | ||||
| 1: hypre error in file "pcg.c", line 709, error code = 256 - Subnormal gamma value in PCG | ||||
| 1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 782, error code = 256 - Reached max iterations in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
| 1: hypre error in file "pcg.c", line 784, error code = 256 - Reached max iterations 2 in PCG before convergence | ||||
|  | ||||
| @ -112,6 +112,14 @@ using hypre_DeviceItem = void*; | ||||
| #define CUB_IGNORE_DEPRECATED_CPP_DIALECT | ||||
| #endif | ||||
| 
 | ||||
| #ifndef CUSPARSE_VERSION | ||||
| #if defined(CUSPARSE_VER_MAJOR) && defined(CUSPARSE_VER_MINOR) && defined(CUSPARSE_VER_PATCH) | ||||
| #define CUSPARSE_VERSION (CUSPARSE_VER_MAJOR * 1000 + CUSPARSE_VER_MINOR *  100 + CUSPARSE_VER_PATCH) | ||||
| #else | ||||
| #define CUSPARSE_VERSION CUDA_VERSION | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| #define CUSPARSE_NEWAPI_VERSION 11000 | ||||
| #define CUSPARSE_NEWSPMM_VERSION 11401 | ||||
| #define CUDA_MALLOCASYNC_VERSION 11020 | ||||
| @ -531,6 +539,7 @@ using hypre_DeviceItem = sycl::nd_item<3>; | ||||
|       hypre_assert(0); exit(1);                                                              \ | ||||
|    } } while(0) | ||||
| 
 | ||||
| #if CUSPARSE_VERSION >= 10300 | ||||
| #define HYPRE_CUSPARSE_CALL(call) do {                                                       \ | ||||
|    cusparseStatus_t err = call;                                                              \ | ||||
|    if (CUSPARSE_STATUS_SUCCESS != err) {                                                     \ | ||||
| @ -538,6 +547,15 @@ using hypre_DeviceItem = sycl::nd_item<3>; | ||||
|             err, cusparseGetErrorString(err), __FILE__, __LINE__);                           \ | ||||
|       hypre_assert(0); exit(1);                                                              \ | ||||
|    } } while(0) | ||||
| #else | ||||
| #define HYPRE_CUSPARSE_CALL(call) do {                                                       \ | ||||
|    cusparseStatus_t err = call;                                                              \ | ||||
|    if (CUSPARSE_STATUS_SUCCESS != err) {                                                     \ | ||||
|       printf("CUSPARSE ERROR (code = %d) at %s:%d\n",                                        \ | ||||
|             err, __FILE__, __LINE__);                                                        \ | ||||
|       hypre_assert(0); exit(1);                                                              \ | ||||
|    } } while(0) | ||||
| #endif | ||||
| 
 | ||||
| #define HYPRE_ROCSPARSE_CALL(call) do {                                                      \ | ||||
|    rocsparse_status err = call;                                                              \ | ||||
| @ -1901,7 +1919,9 @@ void hypre_DeviceDataCubCachingAllocatorDestroy(hypre_DeviceData *data); | ||||
| 
 | ||||
| cudaDataType hypre_HYPREComplexToCudaDataType(); | ||||
| 
 | ||||
| #if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION | ||||
| cusparseIndexType_t hypre_HYPREIntToCusparseIndexType(); | ||||
| #endif | ||||
| 
 | ||||
| #endif // #if defined(HYPRE_USING_CUSPARSE)
 | ||||
| 
 | ||||
|  | ||||
| @ -2719,6 +2719,7 @@ hypre_HYPREComplexToCudaDataType() | ||||
| #endif // #if defined(HYPRE_COMPLEX)
 | ||||
| } | ||||
| 
 | ||||
| #if CUSPARSE_VERSION >= 10300 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypre_HYPREIntToCusparseIndexType | ||||
|  * | ||||
| @ -2744,6 +2745,8 @@ hypre_HYPREIntToCusparseIndexType() | ||||
|    return CUSPARSE_INDEX_32I; | ||||
| #endif | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #endif // #if defined(HYPRE_USING_CUSPARSE)
 | ||||
| 
 | ||||
| #if defined(HYPRE_USING_CUBLAS) | ||||
|  | ||||
| @ -60,6 +60,14 @@ using hypre_DeviceItem = void*; | ||||
| #define CUB_IGNORE_DEPRECATED_CPP_DIALECT | ||||
| #endif | ||||
| 
 | ||||
| #ifndef CUSPARSE_VERSION | ||||
| #if defined(CUSPARSE_VER_MAJOR) && defined(CUSPARSE_VER_MINOR) && defined(CUSPARSE_VER_PATCH) | ||||
| #define CUSPARSE_VERSION (CUSPARSE_VER_MAJOR * 1000 + CUSPARSE_VER_MINOR *  100 + CUSPARSE_VER_PATCH) | ||||
| #else | ||||
| #define CUSPARSE_VERSION CUDA_VERSION | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| #define CUSPARSE_NEWAPI_VERSION 11000 | ||||
| #define CUSPARSE_NEWSPMM_VERSION 11401 | ||||
| #define CUDA_MALLOCASYNC_VERSION 11020 | ||||
| @ -479,6 +487,7 @@ using hypre_DeviceItem = sycl::nd_item<3>; | ||||
|       hypre_assert(0); exit(1);                                                              \ | ||||
|    } } while(0) | ||||
| 
 | ||||
| #if CUSPARSE_VERSION >= 10300 | ||||
| #define HYPRE_CUSPARSE_CALL(call) do {                                                       \ | ||||
|    cusparseStatus_t err = call;                                                              \ | ||||
|    if (CUSPARSE_STATUS_SUCCESS != err) {                                                     \ | ||||
| @ -486,6 +495,15 @@ using hypre_DeviceItem = sycl::nd_item<3>; | ||||
|             err, cusparseGetErrorString(err), __FILE__, __LINE__);                           \ | ||||
|       hypre_assert(0); exit(1);                                                              \ | ||||
|    } } while(0) | ||||
| #else | ||||
| #define HYPRE_CUSPARSE_CALL(call) do {                                                       \ | ||||
|    cusparseStatus_t err = call;                                                              \ | ||||
|    if (CUSPARSE_STATUS_SUCCESS != err) {                                                     \ | ||||
|       printf("CUSPARSE ERROR (code = %d) at %s:%d\n",                                        \ | ||||
|             err, __FILE__, __LINE__);                                                        \ | ||||
|       hypre_assert(0); exit(1);                                                              \ | ||||
|    } } while(0) | ||||
| #endif | ||||
| 
 | ||||
| #define HYPRE_ROCSPARSE_CALL(call) do {                                                      \ | ||||
|    rocsparse_status err = call;                                                              \ | ||||
| @ -1849,7 +1867,9 @@ void hypre_DeviceDataCubCachingAllocatorDestroy(hypre_DeviceData *data); | ||||
| 
 | ||||
| cudaDataType hypre_HYPREComplexToCudaDataType(); | ||||
| 
 | ||||
| #if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION | ||||
| cusparseIndexType_t hypre_HYPREIntToCusparseIndexType(); | ||||
| #endif | ||||
| 
 | ||||
| #endif // #if defined(HYPRE_USING_CUSPARSE)
 | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Rui Peng Li
						Rui Peng Li