GPU support with single precision (#572)

This PR fixes the GPU support with single precision.
This commit is contained in:
Ruipeng Li 2022-03-04 12:05:32 -08:00 committed by GitHub
parent ebd6eb88c3
commit 95e6433fc7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 800 additions and 74 deletions

View File

@ -18,7 +18,7 @@ case $1 in
where: -h|-help prints this usage information and exits
{src_dir} is the hypre source directory
This script runs a number of tests suitable for the syrah cluster.
This script runs a number of tests suitable for the lassen cluster.
Example usage: $0 ../src
@ -67,6 +67,12 @@ co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enabl
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-um-shared
#CUDA with UM and single precision
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
ro="-single -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
./renametest.sh basic $output_dir/basic-cuda-um-single
# CUDA with UM without MPI [no run]
#co="--with-cuda --enable-unified-memory --without-MPI --with-gpu-arch=\\'60 70\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
@ -89,7 +95,7 @@ ro="-bench -struct -rt -mpibind -save ${save}"
# OMP 4.5 without UM in debug mode [struct]
co="--with-device-openmp --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
ro="-struct -rt -mpibind -save ${host}"
ro="-struct -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
@ -116,6 +122,7 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
################################
## CUDA 11 build (only) tests ##
################################
co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
module -q load cuda/11
module list cuda/11 |& grep "None found"

View File

@ -18,7 +18,7 @@ case $1 in
where: -h|-help prints this usage information and exits
{src_dir} is the hypre source directory
This script runs a number of tests suitable for the syrah cluster.
This script runs a number of tests suitable for the ray cluster.
Example usage: $0 ../src
@ -67,6 +67,12 @@ co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enabl
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-um-shared
#CUDA with UM and single precision
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
ro="-single -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
./renametest.sh basic $output_dir/basic-cuda-um-single
# CUDA with UM without MPI [no run]
#co="--with-cuda --enable-unified-memory --without-MPI --with-gpu-arch=\\'60 70\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
@ -89,13 +95,14 @@ ro="-bench -struct -rt -mpibind -save ${save}"
# OMP 4.5 without UM in debug mode [struct]
co="--with-device-openmp --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
ro="-struct -rt -mpibind -save ${host}"
ro="-struct -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
#################################
# CUDA + CMake build (only) tests
#################################
#####################################
## CUDA + CMake build (only) tests ##
#####################################
mo="-j"
# CUDA with UM + CMake
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
@ -112,9 +119,10 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct
############################
# CUDA 11 build (only) tests
############################
################################
## CUDA 11 build (only) tests ##
################################
co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
module -q load cuda/11
module list cuda/11 |& grep "None found"

View File

@ -11,14 +11,14 @@ case $1 in
-h|-help)
cat <<EOF
**** Only run this script on the lassen cluster ****
**** Only run this script on the redwood cluster ****
$0 [-h|-help] {src_dir}
where: -h|-help prints this usage information and exits
{src_dir} is the hypre source directory
This script runs a number of tests suitable for the syrah cluster.
This script runs a number of tests suitable for the redwood cluster.
Example usage: $0 ../src
@ -55,6 +55,11 @@ ro="-bench -struct -rt -save ${save} -D MV2_USE_CUDA=1"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-hip-nonum
#HIP with UM and single precision [no run]
co="--with-hip --enable-unified-memory --enable-single --enable-debug --with-MPI-include=/opt/cray/pe/cray-mvapich2_nogpu/2.3.5/infiniband/cray/10.0/include --with-MPI-lib-dirs=/opt/cray/pe/cray-mvapich2_nogpu/2.3.5/infiniband/cray/10.0/lib --with-MPI-libs=mpi --with-gpu-arch=\\'gfx906,gfx908\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-hip-um-single
# Echo to stderr all nonempty error files in $output_dir
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
do

View File

@ -164,8 +164,8 @@ hypre_fprintf(stderr, "blocks= %i\n", blocks);
void SubdomainGraph_dhPrintStatsLong(SubdomainGraph_dh s, FILE *fp)
{
START_FUNC_DH
HYPRE_Int i, j, k;
HYPRE_Real max = 0, min = INT_MAX;
HYPRE_Int i, j, k;
HYPRE_Real max = 0, min = (HYPRE_Real) INT_MAX;
hypre_fprintf(fp, "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
hypre_fprintf(fp, "colors used = %i\n", s->colors);
@ -243,7 +243,7 @@ void SubdomainGraph_dhPrintStatsLong(SubdomainGraph_dh s, FILE *fp)
} else {
/*-----------------------------------------
* local n2o_row permutation
* local n2o_row permutation
*-----------------------------------------*/
hypre_fprintf(fp, "\nlocal n2o_row permutation:\n");
hypre_fprintf(fp, "--------------------------\n");
@ -1427,7 +1427,7 @@ void SubdomainGraph_dhDump(SubdomainGraph_dh s, char *filename)
hypre_fprintf(fp, "%i ", s->bdry_count[i]);
}
hypre_fprintf(fp, "\n");
}
/* write subdomain graph */
@ -1567,7 +1567,7 @@ void find_bdry_nodes_seq_private(SubdomainGraph_dh s, HYPRE_Int m, void* A)
tmp = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR;
for (i=0; i<m; ++i) tmp[i] = 0;
/*------------------------------------------
/*------------------------------------------
* mark all boundary nodes
*------------------------------------------ */
for (i=0; i<blocks; ++i) {

View File

@ -1013,7 +1013,7 @@ hypreDevice_extendWtoP( HYPRE_Int P_nr_of_rows,
PC_i,
P_diag_j );
hypreDevice_ScatterConstant(P_diag_data, W_nr_of_cols, PC_i, 1.0);
hypreDevice_ScatterConstant(P_diag_data, W_nr_of_cols, PC_i, (HYPRE_Complex) 1.0);
hypre_TFree(PC_i, HYPRE_MEMORY_DEVICE);
}

View File

@ -1725,13 +1725,13 @@ hypre_CSRMatrixSortRow(hypre_CSRMatrix *A)
* @param[in,out] *d_a_sorted On Start: Unsorted values. On Return: Sorted values corresponding with column indices
*/
void
hypre_SortCSRCusparse( HYPRE_Int n,
HYPRE_Int m,
HYPRE_Int nnzA,
cusparseMatDescr_t descrA,
const HYPRE_Int *d_ia,
HYPRE_Int *d_ja_sorted,
HYPRE_Complex *d_a_sorted )
hypre_SortCSRCusparse( HYPRE_Int n,
HYPRE_Int m,
HYPRE_Int nnzA,
cusparseMatDescr_t descrA,
const HYPRE_Int *d_ia,
HYPRE_Int *d_ja_sorted,
HYPRE_Complex *d_a_sorted )
{
cusparseHandle_t cusparsehandle = hypre_HandleCusparseHandle(hypre_handle());
@ -1741,10 +1741,8 @@ hypre_SortCSRCusparse( HYPRE_Int n,
csru2csrInfo_t sortInfoA;
HYPRE_CUSPARSE_CALL( cusparseCreateCsru2csrInfo(&sortInfoA) );
HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
if (isDoublePrecision)
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
{
HYPRE_CUSPARSE_CALL( cusparseDcsru2csr_bufferSizeExt(cusparsehandle,
n, m, nnzA, d_a_sorted, d_ia, d_ja_sorted,
@ -1756,18 +1754,20 @@ hypre_SortCSRCusparse( HYPRE_Int n,
n, m, nnzA, descrA, d_a_sorted, d_ia, d_ja_sorted,
sortInfoA, pBuffer) );
}
else if (isSinglePrecision)
#elif defined(HYPRE_SINGLE)
{
HYPRE_CUSPARSE_CALL( cusparseScsru2csr_bufferSizeExt(cusparsehandle,
n, m, nnzA, (float *) d_a_sorted, d_ia, d_ja_sorted,
n, m, nnzA, d_a_sorted, d_ia, d_ja_sorted,
sortInfoA, &pBufferSizeInBytes));
pBuffer = hypre_TAlloc(char, pBufferSizeInBytes, HYPRE_MEMORY_DEVICE);
HYPRE_CUSPARSE_CALL( cusparseScsru2csr(cusparsehandle,
n, m, nnzA, descrA, (float *)d_a_sorted, d_ia, d_ja_sorted,
n, m, nnzA, descrA, d_a_sorted, d_ia, d_ja_sorted,
sortInfoA, pBuffer) );
}
#endif
#endif
hypre_TFree(pBuffer, HYPRE_MEMORY_DEVICE);
HYPRE_CUSPARSE_CALL(cusparseDestroyCsru2csrInfo(sortInfoA));
@ -1861,8 +1861,15 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
{
HYPRE_CUSPARSE_CALL( cusparseCreateCsrsv2Info(&hypre_CsrsvDataInfoL(csrsv_data)) );
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
#elif defined(HYPRE_SINGLE)
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
#endif
#endif
if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
{
@ -1873,10 +1880,19 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoL(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
hypre_CsrsvDataBuffer(csrsv_data)) );
#elif defined(HYPRE_SINGLE)
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoL(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
hypre_CsrsvDataBuffer(csrsv_data)) );
#endif
#endif
cusparseStatus_t status = cusparseXcsrsv2_zeroPivot(handle, hypre_CsrsvDataInfoL(csrsv_data),
&structural_zero);
@ -1890,11 +1906,21 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
}
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
hypre_CsrsvDataBuffer(csrsv_data)) );
#elif defined(HYPRE_SINGLE)
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
hypre_CsrsvDataBuffer(csrsv_data)) );
#endif
#endif
}
else
{
@ -1904,8 +1930,15 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
{
HYPRE_CUSPARSE_CALL( cusparseCreateCsrsv2Info(&hypre_CsrsvDataInfoU(csrsv_data)) );
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
#elif defined(HYPRE_SINGLE)
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
#endif
#endif
if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
{
@ -1916,10 +1949,19 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoU(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
hypre_CsrsvDataBuffer(csrsv_data)) );
#elif defined(HYPRE_SINGLE)
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoU(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
hypre_CsrsvDataBuffer(csrsv_data)) );
#endif
#endif
cusparseStatus_t status = cusparseXcsrsv2_zeroPivot(handle, hypre_CsrsvDataInfoU(csrsv_data),
&structural_zero);
@ -1929,15 +1971,24 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
hypre_sprintf(msg, "hypre_CSRMatrixTriLowerUpperSolveCusparse A(%d,%d) is missing\n",
structural_zero, structural_zero);
hypre_error_w_msg(1, msg);
//hypre_assert(0);
}
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
hypre_CsrsvDataBuffer(csrsv_data)) );
#elif defined(HYPRE_SINGLE)
HYPRE_CUSPARSE_CALL( cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
hypre_CsrsvDataBuffer(csrsv_data)) );
#endif
#endif
}
return hypre_error_flag;
@ -2035,8 +2086,15 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
{
HYPRE_ROCSPARSE_CALL( rocsparse_create_mat_info(&hypre_CsrsvDataInfoL(csrsv_data)) );
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_buffer_size(handle, rocsparse_operation_none,
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
#elif defined(HYPRE_SINGLE)
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_buffer_size(handle, rocsparse_operation_none,
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
#endif
#endif
if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
{
@ -2047,10 +2105,19 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_analysis(handle, rocsparse_operation_none,
nrow, nnzA, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoL(csrsv_data), rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
#elif defined(HYPRE_SINGLE)
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_analysis(handle, rocsparse_operation_none,
nrow, nnzA, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoL(csrsv_data), rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
#endif
#endif
rocsparse_status status = rocsparse_csrsv_zero_pivot(handle, descr,
hypre_CsrsvDataInfoL(csrsv_data), &structural_zero);
@ -2064,11 +2131,21 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
}
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_solve(handle, rocsparse_operation_none,
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
rocsparse_solve_policy_auto,
hypre_CsrsvDataBuffer(csrsv_data)) );
#elif defined(HYPRE_SINGLE)
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_solve(handle, rocsparse_operation_none,
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
rocsparse_solve_policy_auto,
hypre_CsrsvDataBuffer(csrsv_data)) );
#endif
#endif
}
else
{
@ -2078,8 +2155,15 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
{
HYPRE_ROCSPARSE_CALL( rocsparse_create_mat_info(&hypre_CsrsvDataInfoU(csrsv_data)) );
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_buffer_size(handle, rocsparse_operation_none,
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
#elif defined(HYPRE_SINGLE)
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_buffer_size(handle, rocsparse_operation_none,
nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
#endif
#endif
if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
{
@ -2090,10 +2174,19 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_analysis(handle, rocsparse_operation_none,
nrow, nnzA, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoU(csrsv_data), rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
#elif defined(HYPRE_SINGLE)
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_analysis(handle, rocsparse_operation_none,
nrow, nnzA, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoU(csrsv_data), rocsparse_analysis_policy_reuse,
rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
#endif
#endif
rocsparse_status status = rocsparse_csrsv_zero_pivot(handle, descr,
hypre_CsrsvDataInfoU(csrsv_data), &structural_zero);
@ -2107,11 +2200,21 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
}
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_solve(handle, rocsparse_operation_none,
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
rocsparse_solve_policy_auto,
hypre_CsrsvDataBuffer(csrsv_data)) );
#elif defined(HYPRE_SINGLE)
HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_solve(handle, rocsparse_operation_none,
nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
rocsparse_solve_policy_auto,
hypre_CsrsvDataBuffer(csrsv_data)) );
#endif
#endif
}
return hypre_error_flag;
@ -2126,13 +2229,13 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
* @param[in,out] *d_a_sorted On Start: Unsorted values. On Return: Sorted values corresponding with column indices
*/
void
hypre_SortCSRRocsparse( HYPRE_Int n,
HYPRE_Int m,
HYPRE_Int nnzA,
rocsparse_mat_descr descrA,
const HYPRE_Int *d_ia,
HYPRE_Int *d_ja_sorted,
HYPRE_Complex *d_a_sorted )
hypre_SortCSRRocsparse( HYPRE_Int n,
HYPRE_Int m,
HYPRE_Int nnzA,
rocsparse_mat_descr descrA,
const HYPRE_Int *d_ia,
HYPRE_Int *d_ja_sorted,
HYPRE_Complex *d_a_sorted )
{
rocsparse_handle handle = hypre_HandleCusparseHandle(hypre_handle());
@ -2140,9 +2243,6 @@ hypre_SortCSRRocsparse( HYPRE_Int n,
void *pBuffer = NULL;
HYPRE_Int *P = NULL;
HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
// FIXME: There is not in-place version of csr sort in rocSPARSE currently, so we make
// a temporary copy of the data for gthr, sort that, and then copy the sorted values
// back to the array being returned. Where there is an in-place version available,
@ -2160,16 +2260,19 @@ hypre_SortCSRRocsparse( HYPRE_Int n,
HYPRE_ROCSPARSE_CALL( rocsparse_csrsort(handle, n, m, nnzA, descrA, d_ia, d_ja_sorted, P,
pBuffer) );
if (isDoublePrecision)
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
{
HYPRE_ROCSPARSE_CALL( rocsparse_dgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
rocsparse_index_base_zero) );
}
else if (isSinglePrecision)
#elif defined(HYPRE_SINGLE)
{
HYPRE_ROCSPARSE_CALL( rocsparse_sgthr(handle, nnzA, (float *) d_a_sorted, (float *) d_a_tmp, P,
HYPRE_ROCSPARSE_CALL( rocsparse_sgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
rocsparse_index_base_zero) );
}
#endif
#endif
hypre_TFree(pBuffer, HYPRE_MEMORY_DEVICE);
hypre_TFree(P, HYPRE_MEMORY_DEVICE);
@ -2183,7 +2286,8 @@ hypre_SortCSRRocsparse( HYPRE_Int n,
void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix)
{
#if defined(HYPRE_USING_ROCSPARSE)
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrmv_analysis(hypre_HandleCusparseHandle(hypre_handle()),
rocsparse_operation_none,
hypre_CSRMatrixNumRows(matrix),
@ -2194,6 +2298,19 @@ void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix)
hypre_CSRMatrixI(matrix),
hypre_CSRMatrixJ(matrix),
hypre_CSRMatrixGPUMatInfo(matrix)) );
#elif defined(HYPRE_SINGLE)
HYPRE_ROCSPARSE_CALL( rocsparse_scsrmv_analysis(hypre_HandleCusparseHandle(hypre_handle()),
rocsparse_operation_none,
hypre_CSRMatrixNumRows(matrix),
hypre_CSRMatrixNumCols(matrix),
hypre_CSRMatrixNumNonzeros(matrix),
hypre_CSRMatrixGPUMatDescr(matrix),
hypre_CSRMatrixData(matrix),
hypre_CSRMatrixI(matrix),
hypre_CSRMatrixJ(matrix),
hypre_CSRMatrixGPUMatInfo(matrix)) );
#endif
#endif
#endif // #if defined(HYPRE_USING_ROCSPARSE)
}

View File

@ -241,6 +241,8 @@ hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans,
B = A;
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_CUSPARSE_CALL( cusparseDcsrmv(handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
hypre_CSRMatrixNumRows(B) - offset,
@ -254,7 +256,22 @@ hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans,
hypre_VectorData(x),
&beta,
hypre_VectorData(y) + offset) );
#elif defined(HYPRE_SINGLE)
HYPRE_CUSPARSE_CALL( cusparseScsrmv(handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
hypre_CSRMatrixNumRows(B) - offset,
hypre_CSRMatrixNumCols(B),
hypre_CSRMatrixNumNonzeros(B),
&alpha,
descr,
hypre_CSRMatrixData(B),
hypre_CSRMatrixI(B) + offset,
hypre_CSRMatrixJ(B),
hypre_VectorData(x),
&beta,
hypre_VectorData(y) + offset) );
#endif
#endif
if (trans)
{
@ -292,6 +309,8 @@ hypre_CSRMatrixMatvecRocsparse( HYPRE_Int trans,
B = A;
}
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrmv(handle,
rocsparse_operation_none,
hypre_CSRMatrixNumRows(B) - offset,
@ -306,6 +325,23 @@ hypre_CSRMatrixMatvecRocsparse( HYPRE_Int trans,
hypre_VectorData(x),
&beta,
hypre_VectorData(y) + offset) );
#elif defined(HYPRE_SINGLE)
HYPRE_ROCSPARSE_CALL( rocsparse_scsrmv(handle,
rocsparse_operation_none,
hypre_CSRMatrixNumRows(B) - offset,
hypre_CSRMatrixNumCols(B),
hypre_CSRMatrixNumNonzeros(B),
&alpha,
descr,
hypre_CSRMatrixData(B),
hypre_CSRMatrixI(B) + offset,
hypre_CSRMatrixJ(B),
info,
hypre_VectorData(x),
&beta,
hypre_VectorData(y) + offset) );
#endif
#endif
if (trans)
{

View File

@ -208,11 +208,6 @@ hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int m,
cusparseOperation_t transA = CUSPARSE_OPERATION_NON_TRANSPOSE;
cusparseOperation_t transB = CUSPARSE_OPERATION_NON_TRANSPOSE;
HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
hypre_assert(isDoublePrecision || isSinglePrecision);
/* Copy the unsorted over as the initial "sorted" */
hypre_TMemcpy(d_ja_sorted, d_ja, HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
hypre_TMemcpy(d_a_sorted, d_a, HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
@ -250,20 +245,23 @@ hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int m,
d_jc = hypre_TAlloc(HYPRE_Int, nnzC, HYPRE_MEMORY_DEVICE);
d_c = hypre_TAlloc(HYPRE_Complex, nnzC, HYPRE_MEMORY_DEVICE);
if (isDoublePrecision)
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
{
HYPRE_CUSPARSE_CALL( cusparseDcsrgemm(cusparsehandle, transA, transB, m, n, k,
descr_A, nnzA, d_a_sorted, d_ia, d_ja_sorted,
descr_B, nnzB, d_b_sorted, d_ib, d_jb_sorted,
descr_C, d_c, d_ic, d_jc) );
}
else if (isSinglePrecision)
#elif defined(HYPRE_SINGLE)
{
HYPRE_CUSPARSE_CALL( cusparseScsrgemm(cusparsehandle, transA, transB, m, n, k,
descr_A, nnzA, (float *) d_a_sorted, d_ia, d_ja_sorted,
descr_B, nnzB, (float *) d_b_sorted, d_ib, d_jb_sorted,
descr_C, (float *) d_c, d_ic, d_jc) );
descr_A, nnzA, d_a_sorted, d_ia, d_ja_sorted,
descr_B, nnzB, d_b_sorted, d_ib, d_jb_sorted,
descr_C, d_c, d_ic, d_jc) );
}
#endif
#endif
*d_ic_out = d_ic;
*d_jc_out = d_jc;

View File

@ -46,11 +46,6 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
rocsparse_operation transA = rocsparse_operation_none;
rocsparse_operation transB = rocsparse_operation_none;
HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
hypre_assert(isDoublePrecision || isSinglePrecision);
/* Copy the unsorted over as the initial "sorted" */
hypre_TMemcpy(d_ja_sorted, d_ja, HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
hypre_TMemcpy(d_a_sorted, d_a, HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
@ -82,7 +77,8 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
size_t rs_buffer_size = 0;
void *rs_buffer;
if (isDoublePrecision)
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
{
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrgemm_buffer_size(handle,
transA, transB,
@ -94,17 +90,19 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
NULL, 0, NULL, NULL, // D is nothing
infoC, &rs_buffer_size) );
}
else if (isSinglePrecision)
#elif defined(HYPRE_SINGLE)
{
HYPRE_ROCSPARSE_CALL( rocsparse_scsrgemm_buffer_size(handle, transA, transB,
m, n, k,
(float *) &alpha, // \alpha = 1
&alpha, // \alpha = 1
descrA, nnzA, d_ia, d_ja_sorted,
descrB, nnzB, d_ib, d_jb_sorted,
NULL, // \beta = 0
NULL, 0, NULL, NULL,
infoC, &rs_buffer_size) );
}
#endif
#endif
rs_buffer = hypre_TAlloc(char, rs_buffer_size, HYPRE_MEMORY_DEVICE);
@ -133,7 +131,8 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
d_jc = hypre_TAlloc(HYPRE_Int, nnzC, HYPRE_MEMORY_DEVICE);
d_c = hypre_TAlloc(HYPRE_Complex, nnzC, HYPRE_MEMORY_DEVICE);
if (isDoublePrecision)
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
{
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrgemm(handle, transA, transB,
m, n, k,
@ -145,18 +144,20 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
descrC, d_c, d_ic, d_jc,
infoC, rs_buffer) );
}
else if (isSinglePrecision)
#elif defined(HYPRE_SINGLE)
{
HYPRE_ROCSPARSE_CALL( rocsparse_scsrgemm(handle, transA, transB,
m, n, k,
(float *) &alpha, // alpha = 1
descrA, nnzA, (float *) d_a_sorted, d_ia, d_ja_sorted,
descrB, nnzB, (float *) d_b_sorted, d_ib, d_jb_sorted,
&alpha, // alpha = 1
descrA, nnzA, d_a_sorted, d_ia, d_ja_sorted,
descrB, nnzB, d_b_sorted, d_ib, d_jb_sorted,
NULL, // beta = 0
NULL, 0, NULL, NULL, NULL, // D is nothing
descrC, (float *) d_c, d_ic, d_jc,
descrC, d_c, d_ic, d_jc,
infoC, rs_buffer) );
}
#endif
#endif
// Free up the memory needed by rocsparse
hypre_TFree(rs_buffer, HYPRE_MEMORY_DEVICE);

View File

@ -64,13 +64,24 @@ hypreDevice_CSRSpTransCusparse(HYPRE_Int m, HYPRE_Int n, HYPRE
hypre_TFree(dBuffer, HYPRE_MEMORY_DEVICE);
#else
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_CUSPARSE_CALL( cusparseDcsr2csc(handle,
m, n, nnzA,
d_aa, d_ia, d_ja,
csc_a, csc_j, csc_i,
action,
CUSPARSE_INDEX_BASE_ZERO) );
#elif defined(HYPRE_SINGLE)
HYPRE_CUSPARSE_CALL( cusparseScsr2csc(handle,
m, n, nnzA,
d_aa, d_ia, d_ja,
csc_a, csc_j, csc_i,
action,
CUSPARSE_INDEX_BASE_ZERO) );
#endif
#endif /* #if !defined(HYPRE_COMPLEX) */
#endif /* #if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION */
*d_ic_out = csc_i;
*d_jc_out = csc_j;
@ -124,6 +135,8 @@ hypreDevice_CSRSpTransRocsparse(HYPRE_Int m, HYPRE_Int n, HYPR
void * buffer;
buffer = hypre_TAlloc(char, buffer_size, HYPRE_MEMORY_DEVICE);
#if !defined(HYPRE_COMPLEX)
#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
HYPRE_ROCSPARSE_CALL( rocsparse_dcsr2csc(handle,
m, n, nnzA,
d_aa, d_ia, d_ja,
@ -131,6 +144,17 @@ hypreDevice_CSRSpTransRocsparse(HYPRE_Int m, HYPRE_Int n, HYPR
action,
rocsparse_index_base_zero,
buffer) );
#elif defined(HYPRE_SINGLE)
HYPRE_ROCSPARSE_CALL( rocsparse_scsr2csc(handle,
m, n, nnzA,
d_aa, d_ia, d_ja,
csc_a, csc_j, csc_i,
action,
rocsparse_index_base_zero,
buffer) );
#endif
#endif /* #if !defined(HYPRE_COMPLEX) */
hypre_TFree(buffer, HYPRE_MEMORY_DEVICE);
*d_ic_out = csc_i;

View File

@ -0,0 +1,145 @@
# Output file: solvers_ij.out.0
Iterations = 6
Final Relative Residual Norm = 3.800163e-05
# Output file: solvers_ij.out.1
Iterations = 26
Final Relative Residual Norm = 7.193501e-05
# Output file: solvers_ij.out.2
GMRES Iterations = 6
Final GMRES Relative Residual Norm = 4.962838e-05
# Output file: solvers_ij.out.3
GMRES Iterations = 39
Final GMRES Relative Residual Norm = 9.043231e-05
# Output file: solvers_ij.out.4
Iterations = 5
Final Relative Residual Norm = 1.785553e-05
# Output file: solvers_ij.out.5
Iterations = 103
Final Relative Residual Norm = 8.784425e-05
# Output file: solvers_ij.out.6
GMRES Iterations = 15
Final GMRES Relative Residual Norm = 7.131146e-05
# Output file: solvers_ij.out.7
Iterations = 13
Final Relative Residual Norm = 7.750613e-05
# Output file: solvers_ij.out.8
Iterations = 26
PCG_Iterations = 0
DSCG_Iterations = 26
Final Relative Residual Norm = 7.193501e-05
# Output file: solvers_ij.out.9
Iterations = 7
PCG_Iterations = 3
DSCG_Iterations = 4
Final Relative Residual Norm = 8.301256e-05
# Output file: solvers_ij.out.10
Iterations = 6
PCG_Iterations = 4
DSCG_Iterations = 2
Final Relative Residual Norm = 2.785249e-05
# Output file: solvers_ij.out.11
Iterations = 5
PCG_Iterations = 2
DSCG_Iterations = 3
Final Relative Residual Norm = 1.361495e-05
# Output file: solvers_ij.out.sysh
Average Convergence Factor = 0.196477
Complexity: grid = 1.392875
operator = 2.633675
cycle = 5.267332
# Output file: solvers_ij.out.sysn
Average Convergence Factor = 0.533116
Complexity: grid = 1.390750
operator = 2.080112
cycle = 10.160150
# Output file: solvers_ij.out.sysu
Average Convergence Factor = 0.737621
Complexity: grid = 1.390813
operator = 2.718671
cycle = 5.437173
# Output file: solvers_ij.out.101
LGMRES Iterations = 39
Final LGMRES Relative Residual Norm = 7.229704e-05
# Output file: solvers_ij.out.102
LGMRES Iterations = 6
Final LGMRES Relative Residual Norm = 4.962838e-05
# Output file: solvers_ij.out.103
FlexGMRES Iterations = 39
Final FlexGMRES Relative Residual Norm = 9.043153e-05
# Output file: solvers_ij.out.104
FlexGMRES Iterations = 6
Final FlexGMRES Relative Residual Norm = 4.961434e-05
# Output file: solvers_ij.out.105
Iterations = 11
Final Relative Residual Norm = 4.022052e-05
# Output file: solvers_ij.out.106
Iterations = 11
Final Relative Residual Norm = 4.022052e-05
# Output file: solvers_ij.out.107
Iterations = 16
Final Relative Residual Norm = 6.623256e-05
# Output file: solvers_ij.out.108
Iterations = 16
Final Relative Residual Norm = 6.623272e-05
# Output file: solvers_ij.out.109
Iterations = 11
Final Relative Residual Norm = 7.168805e-05
# Output file: solvers_ij.out.110
Iterations = 11
Final Relative Residual Norm = 7.168810e-05
# Output file: solvers_ij.out.111
Iterations = 17
Final Relative Residual Norm = 7.756719e-05
# Output file: solvers_ij.out.112
GMRES Iterations = 21
Final GMRES Relative Residual Norm = 7.660792e-05
# Output file: solvers_ij.out.113
GMRES Iterations = 14
Final GMRES Relative Residual Norm = 9.868194e-05
# Output file: solvers_ij.out.114
BoomerAMG Iterations = 17
Final Relative Residual Norm = 9.048652e-05
# Output file: solvers_ij.out.115
BoomerAMG Iterations = 17
Final Relative Residual Norm = 9.091324e-05
# Output file: solvers_ij.out.116
GMRES Iterations = 8
Final GMRES Relative Residual Norm = 7.735370e-05
# Output file: solvers_ij.out.117
GMRES Iterations = 8
Final GMRES Relative Residual Norm = 7.728816e-05

View File

@ -0,0 +1,145 @@
# Output file: solvers_ij.out.0
Iterations = 6
Final Relative Residual Norm = 3.800161e-05
# Output file: solvers_ij.out.1
Iterations = 26
Final Relative Residual Norm = 7.193501e-05
# Output file: solvers_ij.out.2
GMRES Iterations = 6
Final GMRES Relative Residual Norm = 4.962170e-05
# Output file: solvers_ij.out.3
GMRES Iterations = 39
Final GMRES Relative Residual Norm = 9.043231e-05
# Output file: solvers_ij.out.4
Iterations = 5
Final Relative Residual Norm = 1.785550e-05
# Output file: solvers_ij.out.5
Iterations = 103
Final Relative Residual Norm = 8.784425e-05
# Output file: solvers_ij.out.6
GMRES Iterations = 15
Final GMRES Relative Residual Norm = 7.131146e-05
# Output file: solvers_ij.out.7
Iterations = 13
Final Relative Residual Norm = 7.750613e-05
# Output file: solvers_ij.out.8
Iterations = 26
PCG_Iterations = 0
DSCG_Iterations = 26
Final Relative Residual Norm = 7.193501e-05
# Output file: solvers_ij.out.9
Iterations = 7
PCG_Iterations = 3
DSCG_Iterations = 4
Final Relative Residual Norm = 8.301259e-05
# Output file: solvers_ij.out.10
Iterations = 6
PCG_Iterations = 4
DSCG_Iterations = 2
Final Relative Residual Norm = 2.785446e-05
# Output file: solvers_ij.out.11
Iterations = 5
PCG_Iterations = 2
DSCG_Iterations = 3
Final Relative Residual Norm = 1.361500e-05
# Output file: solvers_ij.out.sysh
Average Convergence Factor = 0.193839
Complexity: grid = 1.392875
operator = 2.632649
cycle = 5.265280
# Output file: solvers_ij.out.sysn
Average Convergence Factor = 0.533116
Complexity: grid = 1.390750
operator = 2.080112
cycle = 10.160150
# Output file: solvers_ij.out.sysu
Average Convergence Factor = 0.739514
Complexity: grid = 1.390563
operator = 2.717318
cycle = 5.434468
# Output file: solvers_ij.out.101
LGMRES Iterations = 39
Final LGMRES Relative Residual Norm = 7.229704e-05
# Output file: solvers_ij.out.102
LGMRES Iterations = 6
Final LGMRES Relative Residual Norm = 4.962170e-05
# Output file: solvers_ij.out.103
FlexGMRES Iterations = 39
Final FlexGMRES Relative Residual Norm = 9.043153e-05
# Output file: solvers_ij.out.104
FlexGMRES Iterations = 6
Final FlexGMRES Relative Residual Norm = 4.961419e-05
# Output file: solvers_ij.out.105
Iterations = 11
Final Relative Residual Norm = 4.434195e-05
# Output file: solvers_ij.out.106
Iterations = 11
Final Relative Residual Norm = 4.434195e-05
# Output file: solvers_ij.out.107
Iterations = 16
Final Relative Residual Norm = 6.671497e-05
# Output file: solvers_ij.out.108
Iterations = 16
Final Relative Residual Norm = 6.671443e-05
# Output file: solvers_ij.out.109
Iterations = 11
Final Relative Residual Norm = 7.168811e-05
# Output file: solvers_ij.out.110
Iterations = 11
Final Relative Residual Norm = 7.168806e-05
# Output file: solvers_ij.out.111
Iterations = 17
Final Relative Residual Norm = 7.756725e-05
# Output file: solvers_ij.out.112
GMRES Iterations = 21
Final GMRES Relative Residual Norm = 7.643850e-05
# Output file: solvers_ij.out.113
GMRES Iterations = 14
Final GMRES Relative Residual Norm = 9.851967e-05
# Output file: solvers_ij.out.114
BoomerAMG Iterations = 18
Final Relative Residual Norm = 6.353526e-05
# Output file: solvers_ij.out.115
BoomerAMG Iterations = 18
Final Relative Residual Norm = 6.077210e-05
# Output file: solvers_ij.out.116
GMRES Iterations = 8
Final GMRES Relative Residual Norm = 7.078722e-05
# Output file: solvers_ij.out.117
GMRES Iterations = 8
Final GMRES Relative Residual Norm = 7.086178e-05

View File

@ -0,0 +1,120 @@
# Output file: solvers_struct.out.0
Iterations = 3
Final Relative Residual Norm = 3.246689e-05
# Output file: solvers_struct.out.1
Iterations = 6
Final Relative Residual Norm = 2.055851e-05
# Output file: solvers_struct.out.2
Iterations = 16
Final Relative Residual Norm = 5.377654e-05
# Output file: solvers_struct.out.3
Iterations = 16
Final Relative Residual Norm = 3.718371e-05
# Output file: solvers_struct.out.4
Iterations = 16
Final Relative Residual Norm = 3.718370e-05
# Output file: solvers_struct.out.10.lobpcg
Iterations = 3
Final Relative Residual Norm = 6.275833e-06
# Output file: solvers_struct.out.10.lobpcg.1
Eigenvalue lambda 1.84366509318352e-01
Residual 2.48082087637158e-05
# Output file: solvers_struct.out.10.lobpcg.3
Iteration 10 bsize 2 maxres 4.33511973824352e-04
Iteration 11 bsize 1 maxres 2.04666575882584e-04
Iteration 12 bsize 1 maxres 8.50733777042478e-05
Eigenvalue lambda 1.84366509318352e-01
Eigenvalue lambda 2.50882238149643e-01
Eigenvalue lambda 3.60091090202332e-01
Residual 7.41478434065357e-05
Residual 4.07401748816483e-05
Residual 8.50733777042478e-05
# Output file: solvers_struct.out.11.lobpcg
Iterations = 6
Final Relative Residual Norm = 2.112819e-05
# Output file: solvers_struct.out.11.lobpcg.1
Eigenvalue lambda 1.84366583824158e-01
Residual 3.15948745992500e-05
# Output file: solvers_struct.out.11.lobpcg.3
Iteration 11 bsize 2 maxres 6.90118235070258e-04
Iteration 12 bsize 2 maxres 2.52081663347781e-04
Iteration 13 bsize 1 maxres 7.02887409715913e-05
Eigenvalue lambda 1.84366479516029e-01
Eigenvalue lambda 2.50883370637894e-01
Eigenvalue lambda 3.60090911388397e-01
Residual 5.58231040486135e-05
Residual 2.60377983067883e-05
Residual 7.02887409715913e-05
# Output file: solvers_struct.out.17.lobpcg
Iterations = 17
Final Relative Residual Norm = 8.241194e-07
# Output file: solvers_struct.out.17.lobpcg.1
Eigenvalue lambda 1.84366509318352e-01
Residual 1.95705306396121e-05
# Output file: solvers_struct.out.17.lobpcg.3
Iteration 10 bsize 2 maxres 3.62064485670999e-04
Iteration 11 bsize 1 maxres 1.69921870110556e-04
Iteration 12 bsize 1 maxres 7.11168977431953e-05
Eigenvalue lambda 1.84366405010223e-01
Eigenvalue lambda 2.50881940126419e-01
Eigenvalue lambda 3.60090613365173e-01
Residual 5.52630408492405e-05
Residual 3.09487622871529e-05
Residual 7.11168977431953e-05
# Output file: solvers_struct.out.18.lobpcg
Iterations = 32
Final Relative Residual Norm = 8.266953e-07
# Output file: solvers_struct.out.18.lobpcg.1
Eigenvalue lambda 1.84366241097450e-01
Residual 4.44491524831392e-05
# Output file: solvers_struct.out.18.lobpcg.3
Iteration 10 bsize 2 maxres 5.81342901568860e-04
Iteration 11 bsize 1 maxres 1.98838606593199e-04
Iteration 12 bsize 1 maxres 9.27079236134887e-05
Eigenvalue lambda 1.84366494417191e-01
Eigenvalue lambda 2.50879585742950e-01
Eigenvalue lambda 3.60090494155884e-01
Residual 9.27079236134887e-05
Residual 8.72101882123388e-05
Residual 5.49681753909681e-05
# Output file: solvers_struct.out.19.lobpcg
Iterations = 25
Final Relative Residual Norm = 7.712439e-05
# Output file: solvers_struct.out.19.lobpcg.1
Eigenvalue lambda 1.84366539120674e-01
Residual 4.44510842498858e-05
# Output file: solvers_struct.out.19.lobpcg.3
Iteration 10 bsize 2 maxres 5.81450236495584e-04
Iteration 11 bsize 1 maxres 1.98705645743757e-04
Iteration 12 bsize 1 maxres 9.26581269595772e-05
Eigenvalue lambda 1.84366509318352e-01
Eigenvalue lambda 2.50874906778336e-01
Eigenvalue lambda 3.60090017318726e-01
Residual 9.26581269595772e-05
Residual 8.80578954820521e-05
Residual 5.49828182556666e-05

View File

@ -0,0 +1,120 @@
# Output file: solvers_struct.out.0
Iterations = 3
Final Relative Residual Norm = 3.246689e-05
# Output file: solvers_struct.out.1
Iterations = 6
Final Relative Residual Norm = 2.055851e-05
# Output file: solvers_struct.out.2
Iterations = 16
Final Relative Residual Norm = 5.377654e-05
# Output file: solvers_struct.out.3
Iterations = 16
Final Relative Residual Norm = 3.718371e-05
# Output file: solvers_struct.out.4
Iterations = 16
Final Relative Residual Norm = 3.718370e-05
# Output file: solvers_struct.out.10.lobpcg
Iterations = 3
Final Relative Residual Norm = 6.275833e-06
# Output file: solvers_struct.out.10.lobpcg.1
Eigenvalue lambda 1.84366509318352e-01
Residual 2.48082087637158e-05
# Output file: solvers_struct.out.10.lobpcg.3
Iteration 10 bsize 2 maxres 4.33511973824352e-04
Iteration 11 bsize 1 maxres 2.04666575882584e-04
Iteration 12 bsize 1 maxres 8.50733777042478e-05
Eigenvalue lambda 1.84366509318352e-01
Eigenvalue lambda 2.50882238149643e-01
Eigenvalue lambda 3.60091090202332e-01
Residual 7.41478434065357e-05
Residual 4.07401748816483e-05
Residual 8.50733777042478e-05
# Output file: solvers_struct.out.11.lobpcg
Iterations = 6
Final Relative Residual Norm = 2.112819e-05
# Output file: solvers_struct.out.11.lobpcg.1
Eigenvalue lambda 1.84366583824158e-01
Residual 3.15948745992500e-05
# Output file: solvers_struct.out.11.lobpcg.3
Iteration 11 bsize 2 maxres 6.90118235070258e-04
Iteration 12 bsize 2 maxres 2.52081663347781e-04
Iteration 13 bsize 1 maxres 7.02887409715913e-05
Eigenvalue lambda 1.84366479516029e-01
Eigenvalue lambda 2.50883370637894e-01
Eigenvalue lambda 3.60090911388397e-01
Residual 5.58231040486135e-05
Residual 2.60377983067883e-05
Residual 7.02887409715913e-05
# Output file: solvers_struct.out.17.lobpcg
Iterations = 17
Final Relative Residual Norm = 8.241194e-07
# Output file: solvers_struct.out.17.lobpcg.1
Eigenvalue lambda 1.84366509318352e-01
Residual 1.95705306396121e-05
# Output file: solvers_struct.out.17.lobpcg.3
Iteration 10 bsize 2 maxres 3.62064485670999e-04
Iteration 11 bsize 1 maxres 1.69921870110556e-04
Iteration 12 bsize 1 maxres 7.11168977431953e-05
Eigenvalue lambda 1.84366405010223e-01
Eigenvalue lambda 2.50881940126419e-01
Eigenvalue lambda 3.60090613365173e-01
Residual 5.52630408492405e-05
Residual 3.09487622871529e-05
Residual 7.11168977431953e-05
# Output file: solvers_struct.out.18.lobpcg
Iterations = 32
Final Relative Residual Norm = 8.266953e-07
# Output file: solvers_struct.out.18.lobpcg.1
Eigenvalue lambda 1.84366241097450e-01
Residual 4.44491524831392e-05
# Output file: solvers_struct.out.18.lobpcg.3
Iteration 10 bsize 2 maxres 5.81342901568860e-04
Iteration 11 bsize 1 maxres 1.98838606593199e-04
Iteration 12 bsize 1 maxres 9.27079236134887e-05
Eigenvalue lambda 1.84366494417191e-01
Eigenvalue lambda 2.50879585742950e-01
Eigenvalue lambda 3.60090494155884e-01
Residual 9.27079236134887e-05
Residual 8.72101882123388e-05
Residual 5.49681753909681e-05
# Output file: solvers_struct.out.19.lobpcg
Iterations = 25
Final Relative Residual Norm = 7.712439e-05
# Output file: solvers_struct.out.19.lobpcg.1
Eigenvalue lambda 1.84366539120674e-01
Residual 4.44510842498858e-05
# Output file: solvers_struct.out.19.lobpcg.3
Iteration 10 bsize 2 maxres 5.81450236495584e-04
Iteration 11 bsize 1 maxres 1.98705645743757e-04
Iteration 12 bsize 1 maxres 9.26581269595772e-05
Eigenvalue lambda 1.84366509318352e-01
Eigenvalue lambda 2.50874906778336e-01
Eigenvalue lambda 3.60090017318726e-01
Residual 9.26581269595772e-05
Residual 8.80578954820521e-05
Residual 5.49828182556666e-05

View File

@ -98,6 +98,6 @@ HYPRE_Int hypre_RandI()
*--------------------------------------------------------------------------*/
HYPRE_Real hypre_Rand()
{
return ((HYPRE_Real)(hypre_RandI()) / m);
return ((HYPRE_Real)(hypre_RandI()) / (HYPRE_Real)m);
}