GPU support with single precision (#572)
This PR fixes the GPU support with single precision.
This commit is contained in:
parent
ebd6eb88c3
commit
95e6433fc7
@ -18,7 +18,7 @@ case $1 in
|
||||
where: -h|-help prints this usage information and exits
|
||||
{src_dir} is the hypre source directory
|
||||
|
||||
This script runs a number of tests suitable for the syrah cluster.
|
||||
This script runs a number of tests suitable for the lassen cluster.
|
||||
|
||||
Example usage: $0 ../src
|
||||
|
||||
@ -67,6 +67,12 @@ co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enabl
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||
./renametest.sh basic $output_dir/basic-cuda-um-shared
|
||||
|
||||
#CUDA with UM and single precision
|
||||
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
ro="-single -rt -mpibind -save ${save}"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
|
||||
./renametest.sh basic $output_dir/basic-cuda-um-single
|
||||
|
||||
# CUDA with UM without MPI [no run]
|
||||
#co="--with-cuda --enable-unified-memory --without-MPI --with-gpu-arch=\\'60 70\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||
@ -89,7 +95,7 @@ ro="-bench -struct -rt -mpibind -save ${save}"
|
||||
|
||||
# OMP 4.5 without UM in debug mode [struct]
|
||||
co="--with-device-openmp --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
ro="-struct -rt -mpibind -save ${host}"
|
||||
ro="-struct -rt -mpibind -save ${save}"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
|
||||
|
||||
@ -116,6 +122,7 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
|
||||
################################
|
||||
## CUDA 11 build (only) tests ##
|
||||
################################
|
||||
|
||||
co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||
module -q load cuda/11
|
||||
module list cuda/11 |& grep "None found"
|
||||
|
||||
@ -18,7 +18,7 @@ case $1 in
|
||||
where: -h|-help prints this usage information and exits
|
||||
{src_dir} is the hypre source directory
|
||||
|
||||
This script runs a number of tests suitable for the syrah cluster.
|
||||
This script runs a number of tests suitable for the ray cluster.
|
||||
|
||||
Example usage: $0 ../src
|
||||
|
||||
@ -67,6 +67,12 @@ co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enabl
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||
./renametest.sh basic $output_dir/basic-cuda-um-shared
|
||||
|
||||
#CUDA with UM and single precision
|
||||
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
ro="-single -rt -mpibind -save ${save}"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
|
||||
./renametest.sh basic $output_dir/basic-cuda-um-single
|
||||
|
||||
# CUDA with UM without MPI [no run]
|
||||
#co="--with-cuda --enable-unified-memory --without-MPI --with-gpu-arch=\\'60 70\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||
@ -89,13 +95,14 @@ ro="-bench -struct -rt -mpibind -save ${save}"
|
||||
|
||||
# OMP 4.5 without UM in debug mode [struct]
|
||||
co="--with-device-openmp --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
ro="-struct -rt -mpibind -save ${host}"
|
||||
ro="-struct -rt -mpibind -save ${save}"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
|
||||
|
||||
#################################
|
||||
# CUDA + CMake build (only) tests
|
||||
#################################
|
||||
#####################################
|
||||
## CUDA + CMake build (only) tests ##
|
||||
#####################################
|
||||
|
||||
mo="-j"
|
||||
# CUDA with UM + CMake
|
||||
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
|
||||
@ -112,9 +119,10 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
|
||||
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
|
||||
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
|
||||
|
||||
############################
|
||||
# CUDA 11 build (only) tests
|
||||
############################
|
||||
################################
|
||||
## CUDA 11 build (only) tests ##
|
||||
################################
|
||||
|
||||
co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
|
||||
module -q load cuda/11
|
||||
module list cuda/11 |& grep "None found"
|
||||
|
||||
@ -11,14 +11,14 @@ case $1 in
|
||||
-h|-help)
|
||||
cat <<EOF
|
||||
|
||||
**** Only run this script on the lassen cluster ****
|
||||
**** Only run this script on the redwood cluster ****
|
||||
|
||||
$0 [-h|-help] {src_dir}
|
||||
|
||||
where: -h|-help prints this usage information and exits
|
||||
{src_dir} is the hypre source directory
|
||||
|
||||
This script runs a number of tests suitable for the syrah cluster.
|
||||
This script runs a number of tests suitable for the redwood cluster.
|
||||
|
||||
Example usage: $0 ../src
|
||||
|
||||
@ -55,6 +55,11 @@ ro="-bench -struct -rt -save ${save} -D MV2_USE_CUDA=1"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||
./renametest.sh basic $output_dir/basic-hip-nonum
|
||||
|
||||
#HIP with UM and single precision [no run]
|
||||
co="--with-hip --enable-unified-memory --enable-single --enable-debug --with-MPI-include=/opt/cray/pe/cray-mvapich2_nogpu/2.3.5/infiniband/cray/10.0/include --with-MPI-lib-dirs=/opt/cray/pe/cray-mvapich2_nogpu/2.3.5/infiniband/cray/10.0/lib --with-MPI-libs=mpi --with-gpu-arch=\\'gfx906,gfx908\\'"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||
./renametest.sh basic $output_dir/basic-hip-um-single
|
||||
|
||||
# Echo to stderr all nonempty error files in $output_dir
|
||||
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
|
||||
do
|
||||
|
||||
@ -164,8 +164,8 @@ hypre_fprintf(stderr, "blocks= %i\n", blocks);
|
||||
void SubdomainGraph_dhPrintStatsLong(SubdomainGraph_dh s, FILE *fp)
|
||||
{
|
||||
START_FUNC_DH
|
||||
HYPRE_Int i, j, k;
|
||||
HYPRE_Real max = 0, min = INT_MAX;
|
||||
HYPRE_Int i, j, k;
|
||||
HYPRE_Real max = 0, min = (HYPRE_Real) INT_MAX;
|
||||
|
||||
hypre_fprintf(fp, "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
|
||||
hypre_fprintf(fp, "colors used = %i\n", s->colors);
|
||||
@ -243,7 +243,7 @@ void SubdomainGraph_dhPrintStatsLong(SubdomainGraph_dh s, FILE *fp)
|
||||
|
||||
} else {
|
||||
/*-----------------------------------------
|
||||
* local n2o_row permutation
|
||||
* local n2o_row permutation
|
||||
*-----------------------------------------*/
|
||||
hypre_fprintf(fp, "\nlocal n2o_row permutation:\n");
|
||||
hypre_fprintf(fp, "--------------------------\n");
|
||||
@ -1427,7 +1427,7 @@ void SubdomainGraph_dhDump(SubdomainGraph_dh s, char *filename)
|
||||
hypre_fprintf(fp, "%i ", s->bdry_count[i]);
|
||||
}
|
||||
hypre_fprintf(fp, "\n");
|
||||
|
||||
|
||||
}
|
||||
|
||||
/* write subdomain graph */
|
||||
@ -1567,7 +1567,7 @@ void find_bdry_nodes_seq_private(SubdomainGraph_dh s, HYPRE_Int m, void* A)
|
||||
tmp = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR;
|
||||
for (i=0; i<m; ++i) tmp[i] = 0;
|
||||
|
||||
/*------------------------------------------
|
||||
/*------------------------------------------
|
||||
* mark all boundary nodes
|
||||
*------------------------------------------ */
|
||||
for (i=0; i<blocks; ++i) {
|
||||
|
||||
@ -1013,7 +1013,7 @@ hypreDevice_extendWtoP( HYPRE_Int P_nr_of_rows,
|
||||
PC_i,
|
||||
P_diag_j );
|
||||
|
||||
hypreDevice_ScatterConstant(P_diag_data, W_nr_of_cols, PC_i, 1.0);
|
||||
hypreDevice_ScatterConstant(P_diag_data, W_nr_of_cols, PC_i, (HYPRE_Complex) 1.0);
|
||||
|
||||
hypre_TFree(PC_i, HYPRE_MEMORY_DEVICE);
|
||||
}
|
||||
|
||||
@ -1725,13 +1725,13 @@ hypre_CSRMatrixSortRow(hypre_CSRMatrix *A)
|
||||
* @param[in,out] *d_a_sorted On Start: Unsorted values. On Return: Sorted values corresponding with column indices
|
||||
*/
|
||||
void
|
||||
hypre_SortCSRCusparse( HYPRE_Int n,
|
||||
HYPRE_Int m,
|
||||
HYPRE_Int nnzA,
|
||||
cusparseMatDescr_t descrA,
|
||||
const HYPRE_Int *d_ia,
|
||||
HYPRE_Int *d_ja_sorted,
|
||||
HYPRE_Complex *d_a_sorted )
|
||||
hypre_SortCSRCusparse( HYPRE_Int n,
|
||||
HYPRE_Int m,
|
||||
HYPRE_Int nnzA,
|
||||
cusparseMatDescr_t descrA,
|
||||
const HYPRE_Int *d_ia,
|
||||
HYPRE_Int *d_ja_sorted,
|
||||
HYPRE_Complex *d_a_sorted )
|
||||
{
|
||||
cusparseHandle_t cusparsehandle = hypre_HandleCusparseHandle(hypre_handle());
|
||||
|
||||
@ -1741,10 +1741,8 @@ hypre_SortCSRCusparse( HYPRE_Int n,
|
||||
csru2csrInfo_t sortInfoA;
|
||||
HYPRE_CUSPARSE_CALL( cusparseCreateCsru2csrInfo(&sortInfoA) );
|
||||
|
||||
HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
|
||||
HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
|
||||
|
||||
if (isDoublePrecision)
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
{
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsru2csr_bufferSizeExt(cusparsehandle,
|
||||
n, m, nnzA, d_a_sorted, d_ia, d_ja_sorted,
|
||||
@ -1756,18 +1754,20 @@ hypre_SortCSRCusparse( HYPRE_Int n,
|
||||
n, m, nnzA, descrA, d_a_sorted, d_ia, d_ja_sorted,
|
||||
sortInfoA, pBuffer) );
|
||||
}
|
||||
else if (isSinglePrecision)
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
{
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsru2csr_bufferSizeExt(cusparsehandle,
|
||||
n, m, nnzA, (float *) d_a_sorted, d_ia, d_ja_sorted,
|
||||
n, m, nnzA, d_a_sorted, d_ia, d_ja_sorted,
|
||||
sortInfoA, &pBufferSizeInBytes));
|
||||
|
||||
pBuffer = hypre_TAlloc(char, pBufferSizeInBytes, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsru2csr(cusparsehandle,
|
||||
n, m, nnzA, descrA, (float *)d_a_sorted, d_ia, d_ja_sorted,
|
||||
n, m, nnzA, descrA, d_a_sorted, d_ia, d_ja_sorted,
|
||||
sortInfoA, pBuffer) );
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
hypre_TFree(pBuffer, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_CUSPARSE_CALL(cusparseDestroyCsru2csrInfo(sortInfoA));
|
||||
@ -1861,8 +1861,15 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
|
||||
{
|
||||
HYPRE_CUSPARSE_CALL( cusparseCreateCsrsv2Info(&hypre_CsrsvDataInfoL(csrsv_data)) );
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
|
||||
{
|
||||
@ -1873,10 +1880,19 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
|
||||
hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoL(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoL(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
cusparseStatus_t status = cusparseXcsrsv2_zeroPivot(handle, hypre_CsrsvDataInfoL(csrsv_data),
|
||||
&structural_zero);
|
||||
@ -1890,11 +1906,21 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1904,8 +1930,15 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
|
||||
{
|
||||
HYPRE_CUSPARSE_CALL( cusparseCreateCsrsv2Info(&hypre_CsrsvDataInfoU(csrsv_data)) );
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
|
||||
{
|
||||
@ -1916,10 +1949,19 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
|
||||
hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoU(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoU(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
cusparseStatus_t status = cusparseXcsrsv2_zeroPivot(handle, hypre_CsrsvDataInfoU(csrsv_data),
|
||||
&structural_zero);
|
||||
@ -1929,15 +1971,24 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
|
||||
hypre_sprintf(msg, "hypre_CSRMatrixTriLowerUpperSolveCusparse A(%d,%d) is missing\n",
|
||||
structural_zero, structural_zero);
|
||||
hypre_error_w_msg(1, msg);
|
||||
//hypre_assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
|
||||
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
return hypre_error_flag;
|
||||
@ -2035,8 +2086,15 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
|
||||
{
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_create_mat_info(&hypre_CsrsvDataInfoL(csrsv_data)) );
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_buffer_size(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_buffer_size(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
|
||||
{
|
||||
@ -2047,10 +2105,19 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
|
||||
hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_analysis(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoL(csrsv_data), rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_analysis(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoL(csrsv_data), rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
rocsparse_status status = rocsparse_csrsv_zero_pivot(handle, descr,
|
||||
hypre_CsrsvDataInfoL(csrsv_data), &structural_zero);
|
||||
@ -2064,11 +2131,21 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_solve(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
|
||||
rocsparse_solve_policy_auto,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_solve(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
|
||||
rocsparse_solve_policy_auto,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2078,8 +2155,15 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
|
||||
{
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_create_mat_info(&hypre_CsrsvDataInfoU(csrsv_data)) );
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_buffer_size(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_buffer_size(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
|
||||
{
|
||||
@ -2090,10 +2174,19 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
|
||||
hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_analysis(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoU(csrsv_data), rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_analysis(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoU(csrsv_data), rocsparse_analysis_policy_reuse,
|
||||
rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
rocsparse_status status = rocsparse_csrsv_zero_pivot(handle, descr,
|
||||
hypre_CsrsvDataInfoU(csrsv_data), &structural_zero);
|
||||
@ -2107,11 +2200,21 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_solve(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
|
||||
rocsparse_solve_policy_auto,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_solve(handle, rocsparse_operation_none,
|
||||
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
|
||||
hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
|
||||
rocsparse_solve_policy_auto,
|
||||
hypre_CsrsvDataBuffer(csrsv_data)) );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
return hypre_error_flag;
|
||||
@ -2126,13 +2229,13 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
|
||||
* @param[in,out] *d_a_sorted On Start: Unsorted values. On Return: Sorted values corresponding with column indices
|
||||
*/
|
||||
void
|
||||
hypre_SortCSRRocsparse( HYPRE_Int n,
|
||||
HYPRE_Int m,
|
||||
HYPRE_Int nnzA,
|
||||
rocsparse_mat_descr descrA,
|
||||
const HYPRE_Int *d_ia,
|
||||
HYPRE_Int *d_ja_sorted,
|
||||
HYPRE_Complex *d_a_sorted )
|
||||
hypre_SortCSRRocsparse( HYPRE_Int n,
|
||||
HYPRE_Int m,
|
||||
HYPRE_Int nnzA,
|
||||
rocsparse_mat_descr descrA,
|
||||
const HYPRE_Int *d_ia,
|
||||
HYPRE_Int *d_ja_sorted,
|
||||
HYPRE_Complex *d_a_sorted )
|
||||
{
|
||||
rocsparse_handle handle = hypre_HandleCusparseHandle(hypre_handle());
|
||||
|
||||
@ -2140,9 +2243,6 @@ hypre_SortCSRRocsparse( HYPRE_Int n,
|
||||
void *pBuffer = NULL;
|
||||
HYPRE_Int *P = NULL;
|
||||
|
||||
HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
|
||||
HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
|
||||
|
||||
// FIXME: There is not in-place version of csr sort in rocSPARSE currently, so we make
|
||||
// a temporary copy of the data for gthr, sort that, and then copy the sorted values
|
||||
// back to the array being returned. Where there is an in-place version available,
|
||||
@ -2160,16 +2260,19 @@ hypre_SortCSRRocsparse( HYPRE_Int n,
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_csrsort(handle, n, m, nnzA, descrA, d_ia, d_ja_sorted, P,
|
||||
pBuffer) );
|
||||
|
||||
if (isDoublePrecision)
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
{
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
|
||||
rocsparse_index_base_zero) );
|
||||
}
|
||||
else if (isSinglePrecision)
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
{
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_sgthr(handle, nnzA, (float *) d_a_sorted, (float *) d_a_tmp, P,
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_sgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
|
||||
rocsparse_index_base_zero) );
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
hypre_TFree(pBuffer, HYPRE_MEMORY_DEVICE);
|
||||
hypre_TFree(P, HYPRE_MEMORY_DEVICE);
|
||||
@ -2183,7 +2286,8 @@ hypre_SortCSRRocsparse( HYPRE_Int n,
|
||||
void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix)
|
||||
{
|
||||
#if defined(HYPRE_USING_ROCSPARSE)
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrmv_analysis(hypre_HandleCusparseHandle(hypre_handle()),
|
||||
rocsparse_operation_none,
|
||||
hypre_CSRMatrixNumRows(matrix),
|
||||
@ -2194,6 +2298,19 @@ void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix)
|
||||
hypre_CSRMatrixI(matrix),
|
||||
hypre_CSRMatrixJ(matrix),
|
||||
hypre_CSRMatrixGPUMatInfo(matrix)) );
|
||||
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrmv_analysis(hypre_HandleCusparseHandle(hypre_handle()),
|
||||
rocsparse_operation_none,
|
||||
hypre_CSRMatrixNumRows(matrix),
|
||||
hypre_CSRMatrixNumCols(matrix),
|
||||
hypre_CSRMatrixNumNonzeros(matrix),
|
||||
hypre_CSRMatrixGPUMatDescr(matrix),
|
||||
hypre_CSRMatrixData(matrix),
|
||||
hypre_CSRMatrixI(matrix),
|
||||
hypre_CSRMatrixJ(matrix),
|
||||
hypre_CSRMatrixGPUMatInfo(matrix)) );
|
||||
#endif
|
||||
#endif
|
||||
#endif // #if defined(HYPRE_USING_ROCSPARSE)
|
||||
}
|
||||
|
||||
|
||||
@ -241,6 +241,8 @@ hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans,
|
||||
B = A;
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsrmv(handle,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
hypre_CSRMatrixNumRows(B) - offset,
|
||||
@ -254,7 +256,22 @@ hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans,
|
||||
hypre_VectorData(x),
|
||||
&beta,
|
||||
hypre_VectorData(y) + offset) );
|
||||
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsrmv(handle,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
hypre_CSRMatrixNumRows(B) - offset,
|
||||
hypre_CSRMatrixNumCols(B),
|
||||
hypre_CSRMatrixNumNonzeros(B),
|
||||
&alpha,
|
||||
descr,
|
||||
hypre_CSRMatrixData(B),
|
||||
hypre_CSRMatrixI(B) + offset,
|
||||
hypre_CSRMatrixJ(B),
|
||||
hypre_VectorData(x),
|
||||
&beta,
|
||||
hypre_VectorData(y) + offset) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (trans)
|
||||
{
|
||||
@ -292,6 +309,8 @@ hypre_CSRMatrixMatvecRocsparse( HYPRE_Int trans,
|
||||
B = A;
|
||||
}
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrmv(handle,
|
||||
rocsparse_operation_none,
|
||||
hypre_CSRMatrixNumRows(B) - offset,
|
||||
@ -306,6 +325,23 @@ hypre_CSRMatrixMatvecRocsparse( HYPRE_Int trans,
|
||||
hypre_VectorData(x),
|
||||
&beta,
|
||||
hypre_VectorData(y) + offset) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrmv(handle,
|
||||
rocsparse_operation_none,
|
||||
hypre_CSRMatrixNumRows(B) - offset,
|
||||
hypre_CSRMatrixNumCols(B),
|
||||
hypre_CSRMatrixNumNonzeros(B),
|
||||
&alpha,
|
||||
descr,
|
||||
hypre_CSRMatrixData(B),
|
||||
hypre_CSRMatrixI(B) + offset,
|
||||
hypre_CSRMatrixJ(B),
|
||||
info,
|
||||
hypre_VectorData(x),
|
||||
&beta,
|
||||
hypre_VectorData(y) + offset) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (trans)
|
||||
{
|
||||
|
||||
@ -208,11 +208,6 @@ hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int m,
|
||||
cusparseOperation_t transA = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
||||
cusparseOperation_t transB = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
||||
|
||||
HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
|
||||
HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
|
||||
|
||||
hypre_assert(isDoublePrecision || isSinglePrecision);
|
||||
|
||||
/* Copy the unsorted over as the initial "sorted" */
|
||||
hypre_TMemcpy(d_ja_sorted, d_ja, HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
|
||||
hypre_TMemcpy(d_a_sorted, d_a, HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
|
||||
@ -250,20 +245,23 @@ hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int m,
|
||||
d_jc = hypre_TAlloc(HYPRE_Int, nnzC, HYPRE_MEMORY_DEVICE);
|
||||
d_c = hypre_TAlloc(HYPRE_Complex, nnzC, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
if (isDoublePrecision)
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
{
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsrgemm(cusparsehandle, transA, transB, m, n, k,
|
||||
descr_A, nnzA, d_a_sorted, d_ia, d_ja_sorted,
|
||||
descr_B, nnzB, d_b_sorted, d_ib, d_jb_sorted,
|
||||
descr_C, d_c, d_ic, d_jc) );
|
||||
}
|
||||
else if (isSinglePrecision)
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
{
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsrgemm(cusparsehandle, transA, transB, m, n, k,
|
||||
descr_A, nnzA, (float *) d_a_sorted, d_ia, d_ja_sorted,
|
||||
descr_B, nnzB, (float *) d_b_sorted, d_ib, d_jb_sorted,
|
||||
descr_C, (float *) d_c, d_ic, d_jc) );
|
||||
descr_A, nnzA, d_a_sorted, d_ia, d_ja_sorted,
|
||||
descr_B, nnzB, d_b_sorted, d_ib, d_jb_sorted,
|
||||
descr_C, d_c, d_ic, d_jc) );
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
*d_ic_out = d_ic;
|
||||
*d_jc_out = d_jc;
|
||||
|
||||
@ -46,11 +46,6 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
|
||||
rocsparse_operation transA = rocsparse_operation_none;
|
||||
rocsparse_operation transB = rocsparse_operation_none;
|
||||
|
||||
HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
|
||||
HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
|
||||
|
||||
hypre_assert(isDoublePrecision || isSinglePrecision);
|
||||
|
||||
/* Copy the unsorted over as the initial "sorted" */
|
||||
hypre_TMemcpy(d_ja_sorted, d_ja, HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
|
||||
hypre_TMemcpy(d_a_sorted, d_a, HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
|
||||
@ -82,7 +77,8 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
|
||||
size_t rs_buffer_size = 0;
|
||||
void *rs_buffer;
|
||||
|
||||
if (isDoublePrecision)
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
{
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrgemm_buffer_size(handle,
|
||||
transA, transB,
|
||||
@ -94,17 +90,19 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
|
||||
NULL, 0, NULL, NULL, // D is nothing
|
||||
infoC, &rs_buffer_size) );
|
||||
}
|
||||
else if (isSinglePrecision)
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
{
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrgemm_buffer_size(handle, transA, transB,
|
||||
m, n, k,
|
||||
(float *) &alpha, // \alpha = 1
|
||||
&alpha, // \alpha = 1
|
||||
descrA, nnzA, d_ia, d_ja_sorted,
|
||||
descrB, nnzB, d_ib, d_jb_sorted,
|
||||
NULL, // \beta = 0
|
||||
NULL, 0, NULL, NULL,
|
||||
infoC, &rs_buffer_size) );
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
rs_buffer = hypre_TAlloc(char, rs_buffer_size, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
@ -133,7 +131,8 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
|
||||
d_jc = hypre_TAlloc(HYPRE_Int, nnzC, HYPRE_MEMORY_DEVICE);
|
||||
d_c = hypre_TAlloc(HYPRE_Complex, nnzC, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
if (isDoublePrecision)
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
{
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrgemm(handle, transA, transB,
|
||||
m, n, k,
|
||||
@ -145,18 +144,20 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
|
||||
descrC, d_c, d_ic, d_jc,
|
||||
infoC, rs_buffer) );
|
||||
}
|
||||
else if (isSinglePrecision)
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
{
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsrgemm(handle, transA, transB,
|
||||
m, n, k,
|
||||
(float *) &alpha, // alpha = 1
|
||||
descrA, nnzA, (float *) d_a_sorted, d_ia, d_ja_sorted,
|
||||
descrB, nnzB, (float *) d_b_sorted, d_ib, d_jb_sorted,
|
||||
&alpha, // alpha = 1
|
||||
descrA, nnzA, d_a_sorted, d_ia, d_ja_sorted,
|
||||
descrB, nnzB, d_b_sorted, d_ib, d_jb_sorted,
|
||||
NULL, // beta = 0
|
||||
NULL, 0, NULL, NULL, NULL, // D is nothing
|
||||
descrC, (float *) d_c, d_ic, d_jc,
|
||||
descrC, d_c, d_ic, d_jc,
|
||||
infoC, rs_buffer) );
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Free up the memory needed by rocsparse
|
||||
hypre_TFree(rs_buffer, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
@ -64,13 +64,24 @@ hypreDevice_CSRSpTransCusparse(HYPRE_Int m, HYPRE_Int n, HYPRE
|
||||
|
||||
hypre_TFree(dBuffer, HYPRE_MEMORY_DEVICE);
|
||||
#else
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseDcsr2csc(handle,
|
||||
m, n, nnzA,
|
||||
d_aa, d_ia, d_ja,
|
||||
csc_a, csc_j, csc_i,
|
||||
action,
|
||||
CUSPARSE_INDEX_BASE_ZERO) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_CUSPARSE_CALL( cusparseScsr2csc(handle,
|
||||
m, n, nnzA,
|
||||
d_aa, d_ia, d_ja,
|
||||
csc_a, csc_j, csc_i,
|
||||
action,
|
||||
CUSPARSE_INDEX_BASE_ZERO) );
|
||||
#endif
|
||||
#endif /* #if !defined(HYPRE_COMPLEX) */
|
||||
#endif /* #if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION */
|
||||
|
||||
*d_ic_out = csc_i;
|
||||
*d_jc_out = csc_j;
|
||||
@ -124,6 +135,8 @@ hypreDevice_CSRSpTransRocsparse(HYPRE_Int m, HYPRE_Int n, HYPR
|
||||
void * buffer;
|
||||
buffer = hypre_TAlloc(char, buffer_size, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
#if !defined(HYPRE_COMPLEX)
|
||||
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_dcsr2csc(handle,
|
||||
m, n, nnzA,
|
||||
d_aa, d_ia, d_ja,
|
||||
@ -131,6 +144,17 @@ hypreDevice_CSRSpTransRocsparse(HYPRE_Int m, HYPRE_Int n, HYPR
|
||||
action,
|
||||
rocsparse_index_base_zero,
|
||||
buffer) );
|
||||
#elif defined(HYPRE_SINGLE)
|
||||
HYPRE_ROCSPARSE_CALL( rocsparse_scsr2csc(handle,
|
||||
m, n, nnzA,
|
||||
d_aa, d_ia, d_ja,
|
||||
csc_a, csc_j, csc_i,
|
||||
action,
|
||||
rocsparse_index_base_zero,
|
||||
buffer) );
|
||||
#endif
|
||||
#endif /* #if !defined(HYPRE_COMPLEX) */
|
||||
|
||||
hypre_TFree(buffer, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
*d_ic_out = csc_i;
|
||||
|
||||
145
src/test/TEST_single/solvers_ij.saved.lassen
Normal file
145
src/test/TEST_single/solvers_ij.saved.lassen
Normal file
@ -0,0 +1,145 @@
|
||||
# Output file: solvers_ij.out.0
|
||||
Iterations = 6
|
||||
Final Relative Residual Norm = 3.800163e-05
|
||||
|
||||
# Output file: solvers_ij.out.1
|
||||
Iterations = 26
|
||||
Final Relative Residual Norm = 7.193501e-05
|
||||
|
||||
# Output file: solvers_ij.out.2
|
||||
GMRES Iterations = 6
|
||||
Final GMRES Relative Residual Norm = 4.962838e-05
|
||||
|
||||
# Output file: solvers_ij.out.3
|
||||
GMRES Iterations = 39
|
||||
Final GMRES Relative Residual Norm = 9.043231e-05
|
||||
|
||||
# Output file: solvers_ij.out.4
|
||||
Iterations = 5
|
||||
Final Relative Residual Norm = 1.785553e-05
|
||||
|
||||
# Output file: solvers_ij.out.5
|
||||
Iterations = 103
|
||||
Final Relative Residual Norm = 8.784425e-05
|
||||
|
||||
# Output file: solvers_ij.out.6
|
||||
GMRES Iterations = 15
|
||||
Final GMRES Relative Residual Norm = 7.131146e-05
|
||||
|
||||
# Output file: solvers_ij.out.7
|
||||
Iterations = 13
|
||||
Final Relative Residual Norm = 7.750613e-05
|
||||
|
||||
# Output file: solvers_ij.out.8
|
||||
Iterations = 26
|
||||
PCG_Iterations = 0
|
||||
DSCG_Iterations = 26
|
||||
Final Relative Residual Norm = 7.193501e-05
|
||||
|
||||
# Output file: solvers_ij.out.9
|
||||
Iterations = 7
|
||||
PCG_Iterations = 3
|
||||
DSCG_Iterations = 4
|
||||
Final Relative Residual Norm = 8.301256e-05
|
||||
|
||||
# Output file: solvers_ij.out.10
|
||||
Iterations = 6
|
||||
PCG_Iterations = 4
|
||||
DSCG_Iterations = 2
|
||||
Final Relative Residual Norm = 2.785249e-05
|
||||
|
||||
# Output file: solvers_ij.out.11
|
||||
Iterations = 5
|
||||
PCG_Iterations = 2
|
||||
DSCG_Iterations = 3
|
||||
Final Relative Residual Norm = 1.361495e-05
|
||||
|
||||
# Output file: solvers_ij.out.sysh
|
||||
Average Convergence Factor = 0.196477
|
||||
|
||||
Complexity: grid = 1.392875
|
||||
operator = 2.633675
|
||||
cycle = 5.267332
|
||||
|
||||
# Output file: solvers_ij.out.sysn
|
||||
Average Convergence Factor = 0.533116
|
||||
|
||||
Complexity: grid = 1.390750
|
||||
operator = 2.080112
|
||||
cycle = 10.160150
|
||||
|
||||
# Output file: solvers_ij.out.sysu
|
||||
Average Convergence Factor = 0.737621
|
||||
|
||||
Complexity: grid = 1.390813
|
||||
operator = 2.718671
|
||||
cycle = 5.437173
|
||||
|
||||
# Output file: solvers_ij.out.101
|
||||
LGMRES Iterations = 39
|
||||
Final LGMRES Relative Residual Norm = 7.229704e-05
|
||||
|
||||
# Output file: solvers_ij.out.102
|
||||
LGMRES Iterations = 6
|
||||
Final LGMRES Relative Residual Norm = 4.962838e-05
|
||||
|
||||
# Output file: solvers_ij.out.103
|
||||
FlexGMRES Iterations = 39
|
||||
Final FlexGMRES Relative Residual Norm = 9.043153e-05
|
||||
|
||||
# Output file: solvers_ij.out.104
|
||||
FlexGMRES Iterations = 6
|
||||
Final FlexGMRES Relative Residual Norm = 4.961434e-05
|
||||
|
||||
# Output file: solvers_ij.out.105
|
||||
Iterations = 11
|
||||
Final Relative Residual Norm = 4.022052e-05
|
||||
|
||||
# Output file: solvers_ij.out.106
|
||||
Iterations = 11
|
||||
Final Relative Residual Norm = 4.022052e-05
|
||||
|
||||
# Output file: solvers_ij.out.107
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 6.623256e-05
|
||||
|
||||
# Output file: solvers_ij.out.108
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 6.623272e-05
|
||||
|
||||
# Output file: solvers_ij.out.109
|
||||
Iterations = 11
|
||||
Final Relative Residual Norm = 7.168805e-05
|
||||
|
||||
# Output file: solvers_ij.out.110
|
||||
Iterations = 11
|
||||
Final Relative Residual Norm = 7.168810e-05
|
||||
|
||||
# Output file: solvers_ij.out.111
|
||||
Iterations = 17
|
||||
Final Relative Residual Norm = 7.756719e-05
|
||||
|
||||
# Output file: solvers_ij.out.112
|
||||
GMRES Iterations = 21
|
||||
Final GMRES Relative Residual Norm = 7.660792e-05
|
||||
|
||||
# Output file: solvers_ij.out.113
|
||||
GMRES Iterations = 14
|
||||
Final GMRES Relative Residual Norm = 9.868194e-05
|
||||
|
||||
# Output file: solvers_ij.out.114
|
||||
BoomerAMG Iterations = 17
|
||||
Final Relative Residual Norm = 9.048652e-05
|
||||
|
||||
# Output file: solvers_ij.out.115
|
||||
BoomerAMG Iterations = 17
|
||||
Final Relative Residual Norm = 9.091324e-05
|
||||
|
||||
# Output file: solvers_ij.out.116
|
||||
GMRES Iterations = 8
|
||||
Final GMRES Relative Residual Norm = 7.735370e-05
|
||||
|
||||
# Output file: solvers_ij.out.117
|
||||
GMRES Iterations = 8
|
||||
Final GMRES Relative Residual Norm = 7.728816e-05
|
||||
|
||||
145
src/test/TEST_single/solvers_ij.saved.ray
Normal file
145
src/test/TEST_single/solvers_ij.saved.ray
Normal file
@ -0,0 +1,145 @@
|
||||
# Output file: solvers_ij.out.0
|
||||
Iterations = 6
|
||||
Final Relative Residual Norm = 3.800161e-05
|
||||
|
||||
# Output file: solvers_ij.out.1
|
||||
Iterations = 26
|
||||
Final Relative Residual Norm = 7.193501e-05
|
||||
|
||||
# Output file: solvers_ij.out.2
|
||||
GMRES Iterations = 6
|
||||
Final GMRES Relative Residual Norm = 4.962170e-05
|
||||
|
||||
# Output file: solvers_ij.out.3
|
||||
GMRES Iterations = 39
|
||||
Final GMRES Relative Residual Norm = 9.043231e-05
|
||||
|
||||
# Output file: solvers_ij.out.4
|
||||
Iterations = 5
|
||||
Final Relative Residual Norm = 1.785550e-05
|
||||
|
||||
# Output file: solvers_ij.out.5
|
||||
Iterations = 103
|
||||
Final Relative Residual Norm = 8.784425e-05
|
||||
|
||||
# Output file: solvers_ij.out.6
|
||||
GMRES Iterations = 15
|
||||
Final GMRES Relative Residual Norm = 7.131146e-05
|
||||
|
||||
# Output file: solvers_ij.out.7
|
||||
Iterations = 13
|
||||
Final Relative Residual Norm = 7.750613e-05
|
||||
|
||||
# Output file: solvers_ij.out.8
|
||||
Iterations = 26
|
||||
PCG_Iterations = 0
|
||||
DSCG_Iterations = 26
|
||||
Final Relative Residual Norm = 7.193501e-05
|
||||
|
||||
# Output file: solvers_ij.out.9
|
||||
Iterations = 7
|
||||
PCG_Iterations = 3
|
||||
DSCG_Iterations = 4
|
||||
Final Relative Residual Norm = 8.301259e-05
|
||||
|
||||
# Output file: solvers_ij.out.10
|
||||
Iterations = 6
|
||||
PCG_Iterations = 4
|
||||
DSCG_Iterations = 2
|
||||
Final Relative Residual Norm = 2.785446e-05
|
||||
|
||||
# Output file: solvers_ij.out.11
|
||||
Iterations = 5
|
||||
PCG_Iterations = 2
|
||||
DSCG_Iterations = 3
|
||||
Final Relative Residual Norm = 1.361500e-05
|
||||
|
||||
# Output file: solvers_ij.out.sysh
|
||||
Average Convergence Factor = 0.193839
|
||||
|
||||
Complexity: grid = 1.392875
|
||||
operator = 2.632649
|
||||
cycle = 5.265280
|
||||
|
||||
# Output file: solvers_ij.out.sysn
|
||||
Average Convergence Factor = 0.533116
|
||||
|
||||
Complexity: grid = 1.390750
|
||||
operator = 2.080112
|
||||
cycle = 10.160150
|
||||
|
||||
# Output file: solvers_ij.out.sysu
|
||||
Average Convergence Factor = 0.739514
|
||||
|
||||
Complexity: grid = 1.390563
|
||||
operator = 2.717318
|
||||
cycle = 5.434468
|
||||
|
||||
# Output file: solvers_ij.out.101
|
||||
LGMRES Iterations = 39
|
||||
Final LGMRES Relative Residual Norm = 7.229704e-05
|
||||
|
||||
# Output file: solvers_ij.out.102
|
||||
LGMRES Iterations = 6
|
||||
Final LGMRES Relative Residual Norm = 4.962170e-05
|
||||
|
||||
# Output file: solvers_ij.out.103
|
||||
FlexGMRES Iterations = 39
|
||||
Final FlexGMRES Relative Residual Norm = 9.043153e-05
|
||||
|
||||
# Output file: solvers_ij.out.104
|
||||
FlexGMRES Iterations = 6
|
||||
Final FlexGMRES Relative Residual Norm = 4.961419e-05
|
||||
|
||||
# Output file: solvers_ij.out.105
|
||||
Iterations = 11
|
||||
Final Relative Residual Norm = 4.434195e-05
|
||||
|
||||
# Output file: solvers_ij.out.106
|
||||
Iterations = 11
|
||||
Final Relative Residual Norm = 4.434195e-05
|
||||
|
||||
# Output file: solvers_ij.out.107
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 6.671497e-05
|
||||
|
||||
# Output file: solvers_ij.out.108
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 6.671443e-05
|
||||
|
||||
# Output file: solvers_ij.out.109
|
||||
Iterations = 11
|
||||
Final Relative Residual Norm = 7.168811e-05
|
||||
|
||||
# Output file: solvers_ij.out.110
|
||||
Iterations = 11
|
||||
Final Relative Residual Norm = 7.168806e-05
|
||||
|
||||
# Output file: solvers_ij.out.111
|
||||
Iterations = 17
|
||||
Final Relative Residual Norm = 7.756725e-05
|
||||
|
||||
# Output file: solvers_ij.out.112
|
||||
GMRES Iterations = 21
|
||||
Final GMRES Relative Residual Norm = 7.643850e-05
|
||||
|
||||
# Output file: solvers_ij.out.113
|
||||
GMRES Iterations = 14
|
||||
Final GMRES Relative Residual Norm = 9.851967e-05
|
||||
|
||||
# Output file: solvers_ij.out.114
|
||||
BoomerAMG Iterations = 18
|
||||
Final Relative Residual Norm = 6.353526e-05
|
||||
|
||||
# Output file: solvers_ij.out.115
|
||||
BoomerAMG Iterations = 18
|
||||
Final Relative Residual Norm = 6.077210e-05
|
||||
|
||||
# Output file: solvers_ij.out.116
|
||||
GMRES Iterations = 8
|
||||
Final GMRES Relative Residual Norm = 7.078722e-05
|
||||
|
||||
# Output file: solvers_ij.out.117
|
||||
GMRES Iterations = 8
|
||||
Final GMRES Relative Residual Norm = 7.086178e-05
|
||||
|
||||
120
src/test/TEST_single/solvers_struct.saved.lassen
Normal file
120
src/test/TEST_single/solvers_struct.saved.lassen
Normal file
@ -0,0 +1,120 @@
|
||||
# Output file: solvers_struct.out.0
|
||||
Iterations = 3
|
||||
Final Relative Residual Norm = 3.246689e-05
|
||||
|
||||
# Output file: solvers_struct.out.1
|
||||
Iterations = 6
|
||||
Final Relative Residual Norm = 2.055851e-05
|
||||
|
||||
# Output file: solvers_struct.out.2
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 5.377654e-05
|
||||
|
||||
# Output file: solvers_struct.out.3
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 3.718371e-05
|
||||
|
||||
# Output file: solvers_struct.out.4
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 3.718370e-05
|
||||
|
||||
# Output file: solvers_struct.out.10.lobpcg
|
||||
Iterations = 3
|
||||
Final Relative Residual Norm = 6.275833e-06
|
||||
|
||||
# Output file: solvers_struct.out.10.lobpcg.1
|
||||
Eigenvalue lambda 1.84366509318352e-01
|
||||
Residual 2.48082087637158e-05
|
||||
|
||||
# Output file: solvers_struct.out.10.lobpcg.3
|
||||
Iteration 10 bsize 2 maxres 4.33511973824352e-04
|
||||
Iteration 11 bsize 1 maxres 2.04666575882584e-04
|
||||
Iteration 12 bsize 1 maxres 8.50733777042478e-05
|
||||
|
||||
Eigenvalue lambda 1.84366509318352e-01
|
||||
Eigenvalue lambda 2.50882238149643e-01
|
||||
Eigenvalue lambda 3.60091090202332e-01
|
||||
Residual 7.41478434065357e-05
|
||||
Residual 4.07401748816483e-05
|
||||
Residual 8.50733777042478e-05
|
||||
|
||||
# Output file: solvers_struct.out.11.lobpcg
|
||||
Iterations = 6
|
||||
Final Relative Residual Norm = 2.112819e-05
|
||||
|
||||
# Output file: solvers_struct.out.11.lobpcg.1
|
||||
Eigenvalue lambda 1.84366583824158e-01
|
||||
Residual 3.15948745992500e-05
|
||||
|
||||
# Output file: solvers_struct.out.11.lobpcg.3
|
||||
Iteration 11 bsize 2 maxres 6.90118235070258e-04
|
||||
Iteration 12 bsize 2 maxres 2.52081663347781e-04
|
||||
Iteration 13 bsize 1 maxres 7.02887409715913e-05
|
||||
|
||||
Eigenvalue lambda 1.84366479516029e-01
|
||||
Eigenvalue lambda 2.50883370637894e-01
|
||||
Eigenvalue lambda 3.60090911388397e-01
|
||||
Residual 5.58231040486135e-05
|
||||
Residual 2.60377983067883e-05
|
||||
Residual 7.02887409715913e-05
|
||||
|
||||
# Output file: solvers_struct.out.17.lobpcg
|
||||
Iterations = 17
|
||||
Final Relative Residual Norm = 8.241194e-07
|
||||
|
||||
# Output file: solvers_struct.out.17.lobpcg.1
|
||||
Eigenvalue lambda 1.84366509318352e-01
|
||||
Residual 1.95705306396121e-05
|
||||
|
||||
# Output file: solvers_struct.out.17.lobpcg.3
|
||||
Iteration 10 bsize 2 maxres 3.62064485670999e-04
|
||||
Iteration 11 bsize 1 maxres 1.69921870110556e-04
|
||||
Iteration 12 bsize 1 maxres 7.11168977431953e-05
|
||||
|
||||
Eigenvalue lambda 1.84366405010223e-01
|
||||
Eigenvalue lambda 2.50881940126419e-01
|
||||
Eigenvalue lambda 3.60090613365173e-01
|
||||
Residual 5.52630408492405e-05
|
||||
Residual 3.09487622871529e-05
|
||||
Residual 7.11168977431953e-05
|
||||
|
||||
# Output file: solvers_struct.out.18.lobpcg
|
||||
Iterations = 32
|
||||
Final Relative Residual Norm = 8.266953e-07
|
||||
|
||||
# Output file: solvers_struct.out.18.lobpcg.1
|
||||
Eigenvalue lambda 1.84366241097450e-01
|
||||
Residual 4.44491524831392e-05
|
||||
|
||||
# Output file: solvers_struct.out.18.lobpcg.3
|
||||
Iteration 10 bsize 2 maxres 5.81342901568860e-04
|
||||
Iteration 11 bsize 1 maxres 1.98838606593199e-04
|
||||
Iteration 12 bsize 1 maxres 9.27079236134887e-05
|
||||
|
||||
Eigenvalue lambda 1.84366494417191e-01
|
||||
Eigenvalue lambda 2.50879585742950e-01
|
||||
Eigenvalue lambda 3.60090494155884e-01
|
||||
Residual 9.27079236134887e-05
|
||||
Residual 8.72101882123388e-05
|
||||
Residual 5.49681753909681e-05
|
||||
|
||||
# Output file: solvers_struct.out.19.lobpcg
|
||||
Iterations = 25
|
||||
Final Relative Residual Norm = 7.712439e-05
|
||||
|
||||
# Output file: solvers_struct.out.19.lobpcg.1
|
||||
Eigenvalue lambda 1.84366539120674e-01
|
||||
Residual 4.44510842498858e-05
|
||||
|
||||
# Output file: solvers_struct.out.19.lobpcg.3
|
||||
Iteration 10 bsize 2 maxres 5.81450236495584e-04
|
||||
Iteration 11 bsize 1 maxres 1.98705645743757e-04
|
||||
Iteration 12 bsize 1 maxres 9.26581269595772e-05
|
||||
|
||||
Eigenvalue lambda 1.84366509318352e-01
|
||||
Eigenvalue lambda 2.50874906778336e-01
|
||||
Eigenvalue lambda 3.60090017318726e-01
|
||||
Residual 9.26581269595772e-05
|
||||
Residual 8.80578954820521e-05
|
||||
Residual 5.49828182556666e-05
|
||||
|
||||
120
src/test/TEST_single/solvers_struct.saved.ray
Normal file
120
src/test/TEST_single/solvers_struct.saved.ray
Normal file
@ -0,0 +1,120 @@
|
||||
# Output file: solvers_struct.out.0
|
||||
Iterations = 3
|
||||
Final Relative Residual Norm = 3.246689e-05
|
||||
|
||||
# Output file: solvers_struct.out.1
|
||||
Iterations = 6
|
||||
Final Relative Residual Norm = 2.055851e-05
|
||||
|
||||
# Output file: solvers_struct.out.2
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 5.377654e-05
|
||||
|
||||
# Output file: solvers_struct.out.3
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 3.718371e-05
|
||||
|
||||
# Output file: solvers_struct.out.4
|
||||
Iterations = 16
|
||||
Final Relative Residual Norm = 3.718370e-05
|
||||
|
||||
# Output file: solvers_struct.out.10.lobpcg
|
||||
Iterations = 3
|
||||
Final Relative Residual Norm = 6.275833e-06
|
||||
|
||||
# Output file: solvers_struct.out.10.lobpcg.1
|
||||
Eigenvalue lambda 1.84366509318352e-01
|
||||
Residual 2.48082087637158e-05
|
||||
|
||||
# Output file: solvers_struct.out.10.lobpcg.3
|
||||
Iteration 10 bsize 2 maxres 4.33511973824352e-04
|
||||
Iteration 11 bsize 1 maxres 2.04666575882584e-04
|
||||
Iteration 12 bsize 1 maxres 8.50733777042478e-05
|
||||
|
||||
Eigenvalue lambda 1.84366509318352e-01
|
||||
Eigenvalue lambda 2.50882238149643e-01
|
||||
Eigenvalue lambda 3.60091090202332e-01
|
||||
Residual 7.41478434065357e-05
|
||||
Residual 4.07401748816483e-05
|
||||
Residual 8.50733777042478e-05
|
||||
|
||||
# Output file: solvers_struct.out.11.lobpcg
|
||||
Iterations = 6
|
||||
Final Relative Residual Norm = 2.112819e-05
|
||||
|
||||
# Output file: solvers_struct.out.11.lobpcg.1
|
||||
Eigenvalue lambda 1.84366583824158e-01
|
||||
Residual 3.15948745992500e-05
|
||||
|
||||
# Output file: solvers_struct.out.11.lobpcg.3
|
||||
Iteration 11 bsize 2 maxres 6.90118235070258e-04
|
||||
Iteration 12 bsize 2 maxres 2.52081663347781e-04
|
||||
Iteration 13 bsize 1 maxres 7.02887409715913e-05
|
||||
|
||||
Eigenvalue lambda 1.84366479516029e-01
|
||||
Eigenvalue lambda 2.50883370637894e-01
|
||||
Eigenvalue lambda 3.60090911388397e-01
|
||||
Residual 5.58231040486135e-05
|
||||
Residual 2.60377983067883e-05
|
||||
Residual 7.02887409715913e-05
|
||||
|
||||
# Output file: solvers_struct.out.17.lobpcg
|
||||
Iterations = 17
|
||||
Final Relative Residual Norm = 8.241194e-07
|
||||
|
||||
# Output file: solvers_struct.out.17.lobpcg.1
|
||||
Eigenvalue lambda 1.84366509318352e-01
|
||||
Residual 1.95705306396121e-05
|
||||
|
||||
# Output file: solvers_struct.out.17.lobpcg.3
|
||||
Iteration 10 bsize 2 maxres 3.62064485670999e-04
|
||||
Iteration 11 bsize 1 maxres 1.69921870110556e-04
|
||||
Iteration 12 bsize 1 maxres 7.11168977431953e-05
|
||||
|
||||
Eigenvalue lambda 1.84366405010223e-01
|
||||
Eigenvalue lambda 2.50881940126419e-01
|
||||
Eigenvalue lambda 3.60090613365173e-01
|
||||
Residual 5.52630408492405e-05
|
||||
Residual 3.09487622871529e-05
|
||||
Residual 7.11168977431953e-05
|
||||
|
||||
# Output file: solvers_struct.out.18.lobpcg
|
||||
Iterations = 32
|
||||
Final Relative Residual Norm = 8.266953e-07
|
||||
|
||||
# Output file: solvers_struct.out.18.lobpcg.1
|
||||
Eigenvalue lambda 1.84366241097450e-01
|
||||
Residual 4.44491524831392e-05
|
||||
|
||||
# Output file: solvers_struct.out.18.lobpcg.3
|
||||
Iteration 10 bsize 2 maxres 5.81342901568860e-04
|
||||
Iteration 11 bsize 1 maxres 1.98838606593199e-04
|
||||
Iteration 12 bsize 1 maxres 9.27079236134887e-05
|
||||
|
||||
Eigenvalue lambda 1.84366494417191e-01
|
||||
Eigenvalue lambda 2.50879585742950e-01
|
||||
Eigenvalue lambda 3.60090494155884e-01
|
||||
Residual 9.27079236134887e-05
|
||||
Residual 8.72101882123388e-05
|
||||
Residual 5.49681753909681e-05
|
||||
|
||||
# Output file: solvers_struct.out.19.lobpcg
|
||||
Iterations = 25
|
||||
Final Relative Residual Norm = 7.712439e-05
|
||||
|
||||
# Output file: solvers_struct.out.19.lobpcg.1
|
||||
Eigenvalue lambda 1.84366539120674e-01
|
||||
Residual 4.44510842498858e-05
|
||||
|
||||
# Output file: solvers_struct.out.19.lobpcg.3
|
||||
Iteration 10 bsize 2 maxres 5.81450236495584e-04
|
||||
Iteration 11 bsize 1 maxres 1.98705645743757e-04
|
||||
Iteration 12 bsize 1 maxres 9.26581269595772e-05
|
||||
|
||||
Eigenvalue lambda 1.84366509318352e-01
|
||||
Eigenvalue lambda 2.50874906778336e-01
|
||||
Eigenvalue lambda 3.60090017318726e-01
|
||||
Residual 9.26581269595772e-05
|
||||
Residual 8.80578954820521e-05
|
||||
Residual 5.49828182556666e-05
|
||||
|
||||
@ -98,6 +98,6 @@ HYPRE_Int hypre_RandI()
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Real hypre_Rand()
|
||||
{
|
||||
return ((HYPRE_Real)(hypre_RandI()) / m);
|
||||
return ((HYPRE_Real)(hypre_RandI()) / (HYPRE_Real)m);
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user