Amgdd fixes (#386)

This removes the masked matvec routine previously used for CF L1 Jacobi relaxation in the AMG-DD solver. There was a bug present in the GPU code and the bsrxmv cusparse routine no longer supports our use-case as of cuda 11. In addition, appropriate regression test results were saved for the GPU implementation of AMG-DD.
This commit is contained in:
Wayne Mitchell 2021-06-15 10:44:46 -07:00 committed by GitHub
parent ad5d7e009f
commit 5f8472b05c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 146 additions and 136 deletions

View File

@ -1,3 +1,2 @@
TEST_ij/agg_interp.sh
TEST_ij/[^a]*.sh
TEST_ij/*.sh

View File

@ -182,10 +182,10 @@ hypre_BoomerAMGDD_FAC_Relax( void *amgdd_vdata,
hypre_AMGDDCompGridVectorScale(-1.0, hypre_AMGDDCompGridTemp(compGrid));
}
for (i = 0; i < numRelax; i++)
{
(*hypre_ParAMGDDDataUserFACRelaxation(amgdd_data))(amgdd_vdata, level, cycle_param);
}
for (i = 0; i < numRelax; i++)
{
(*hypre_ParAMGDDDataUserFACRelaxation(amgdd_data))(amgdd_vdata, level, cycle_param);
}
if (hypre_AMGDDCompGridT(compGrid) || hypre_AMGDDCompGridQ(compGrid))
{

View File

@ -85,7 +85,7 @@ hypre_BoomerAMGDD_FAC_JacobiDevice( void *amgdd_vdata,
hypreDevice_IVAXPY(hypre_AMGDDCompGridNumNonOwnedRealNodes(compGrid),
&(hypre_AMGDDCompGridL1Norms(compGrid)[hypre_AMGDDCompGridNumOwnedNodes(compGrid)]),
hypre_VectorData(hypre_AMGDDCompGridVectorNonOwned(hypre_AMGDDCompGridTemp2(compGrid))),
hypre_VectorData(hypre_AMGDDCompGridVectorNonOwned(u)));
hypre_VectorData(hypre_AMGDDCompGridVectorNonOwned(u)));
return hypre_error_flag;
}
@ -108,11 +108,8 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata,
HYPRE_Int num_nonowned_rc = hypre_AMGDDCompGridNumNonOwnedRealCPoints(compGrid);
HYPRE_Int num_nonowned_rf = num_nonowned_r - num_nonowned_rc;
hypre_CSRMatrix *mat;
hypre_Vector *owned_tmp;
hypre_Vector *nonowned_tmp;
HYPRE_Real alpha = -relax_weight;
HYPRE_Real beta = relax_weight;
// Allocate temporary vector if necessary
if (!hypre_AMGDDCompGridTemp2(compGrid))
@ -123,40 +120,21 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata,
num_nonowned,
num_nonowned_r);
}
hypre_AMGDDCompGridVectorCopy(hypre_AMGDDCompGridF(compGrid),
hypre_AMGDDCompGridTemp2(compGrid));
hypre_AMGDDCompGridMatvec(-relax_weight,
hypre_AMGDDCompGridA(compGrid),
hypre_AMGDDCompGridU(compGrid),
relax_weight,
hypre_AMGDDCompGridTemp2(compGrid));
owned_tmp = hypre_AMGDDCompGridVectorOwned(hypre_AMGDDCompGridTemp2(compGrid));
nonowned_tmp = hypre_AMGDDCompGridVectorNonOwned(hypre_AMGDDCompGridTemp2(compGrid));
if (relax_set)
{
mat = hypre_AMGDDCompGridMatrixOwnedDiag(hypre_AMGDDCompGridA(compGrid));
beta = relax_weight;
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u,
beta, owned_tmp, owned_tmp,
hypre_AMGDDCompGridOwnedCMask(compGrid),
num_owned_c);
mat = hypre_AMGDDCompGridMatrixOwnedOffd(hypre_AMGDDCompGridA(compGrid));
beta = 1.0;
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u,
beta, owned_tmp, owned_tmp,
hypre_AMGDDCompGridOwnedCMask(compGrid),
num_owned_c);
mat = hypre_AMGDDCompGridMatrixNonOwnedDiag(hypre_AMGDDCompGridA(compGrid));
beta = relax_weight;
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u,
beta, nonowned_tmp, nonowned_tmp,
hypre_AMGDDCompGridNonOwnedCMask(compGrid),
num_nonowned_rc);
mat = hypre_AMGDDCompGridMatrixNonOwnedOffd(hypre_AMGDDCompGridA(compGrid));
beta = 1.0;
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u,
beta, nonowned_tmp, nonowned_tmp,
hypre_AMGDDCompGridNonOwnedCMask(compGrid),
num_nonowned_rc);
hypreDevice_MaskedIVAXPY(num_owned_c,
hypre_AMGDDCompGridL1Norms(compGrid),
@ -172,34 +150,6 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata,
}
else
{
mat = hypre_AMGDDCompGridMatrixOwnedDiag(hypre_AMGDDCompGridA(compGrid));
beta = relax_weight;
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u,
beta, owned_tmp, owned_tmp,
hypre_AMGDDCompGridOwnedFMask(compGrid),
num_owned_f);
mat = hypre_AMGDDCompGridMatrixOwnedOffd(hypre_AMGDDCompGridA(compGrid));
beta = 1.0;
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u,
beta, owned_tmp, owned_tmp,
hypre_AMGDDCompGridOwnedFMask(compGrid),
num_owned_f);
mat = hypre_AMGDDCompGridMatrixNonOwnedDiag(hypre_AMGDDCompGridA(compGrid));
beta = relax_weight;
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u,
beta, nonowned_tmp, nonowned_tmp,
hypre_AMGDDCompGridNonOwnedFMask(compGrid),
num_nonowned_rf);
mat = hypre_AMGDDCompGridMatrixNonOwnedOffd(hypre_AMGDDCompGridA(compGrid));
beta = 1.0;
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u,
beta, nonowned_tmp, nonowned_tmp,
hypre_AMGDDCompGridNonOwnedFMask(compGrid),
num_nonowned_rf);
hypreDevice_MaskedIVAXPY(num_owned_f,
hypre_AMGDDCompGridL1Norms(compGrid),
hypre_VectorData(owned_tmp),

View File

@ -122,58 +122,6 @@ hypre_CSRMatrixMatvecDevice( HYPRE_Int trans,
}
#if defined(HYPRE_USING_CUSPARSE)
HYPRE_Int
hypre_CSRMatrixMatvecMaskedDevice( HYPRE_Complex alpha,
hypre_CSRMatrix *A,
hypre_Vector *x,
HYPRE_Complex beta,
hypre_Vector *b,
hypre_Vector *y,
HYPRE_Int *mask,
HYPRE_Int size_of_mask )
{
if (hypre_VectorData(b) != hypre_VectorData(y))
{
hypre_TMemcpy( hypre_VectorData(y),
hypre_VectorData(b),
HYPRE_Complex,
hypre_CSRMatrixNumRows(A),
hypre_VectorMemoryLocation(y),
hypre_VectorMemoryLocation(b) );
}
hypre_CSRMatrixMatvecDevice2(0, alpha, A, x, beta, y, 0);
if ( hypre_CSRMatrixNumRows(A) > 0 && hypre_CSRMatrixNumCols(A) > 0 && hypre_CSRMatrixNumNonzeros(A) > 0 )
{
cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
cusparseMatDescr_t descr = hypre_CSRMatrixGPUMatDescr(A);
HYPRE_CUSPARSE_CALL( cusparseDbsrxmv(handle,
CUSPARSE_DIRECTION_ROW,
CUSPARSE_OPERATION_NON_TRANSPOSE,
size_of_mask,
hypre_CSRMatrixNumRows(A),
hypre_CSRMatrixNumCols(A),
hypre_CSRMatrixNumNonzeros(A),
&alpha,
descr,
hypre_CSRMatrixData(A),
mask,
hypre_CSRMatrixI(A),
hypre_CSRMatrixI(A) + 1,
hypre_CSRMatrixJ(A),
1,
hypre_VectorData(x),
&beta,
hypre_VectorData(y)) );
}
hypre_SyncCudaComputeStream(hypre_handle());
return hypre_error_flag;
}
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
HYPRE_Int
@ -304,20 +252,6 @@ hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans,
#endif // #if defined(HYPRE_USING_CUSPARSE)
#if defined(HYPRE_USING_ROCSPARSE)
// We need a stub for this function since it's called elsewhere
HYPRE_Int
hypre_CSRMatrixMatvecMaskedDevice( HYPRE_Complex /*alpha*/,
hypre_CSRMatrix * /*A*/,
hypre_Vector * /*x*/,
HYPRE_Complex /*beta*/,
hypre_Vector * /*b*/,
hypre_Vector * /*y*/,
HYPRE_Int * /*mask*/,
HYPRE_Int /*size_of_mask*/ )
{
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "hypre_CSRMatrixMatvecMaskedDevice not implemented for rocSPARSE!\n");
}
HYPRE_Int
hypre_CSRMatrixMatvecRocsparse( HYPRE_Int trans,
HYPRE_Complex alpha,

View File

@ -80,7 +80,6 @@ HYPRE_Int hypre_CSRMatrixMatvec_FF ( HYPRE_Complex alpha , hypre_CSRMatrix *A ,
/* csr_matvec_device.c */
HYPRE_Int hypre_CSRMatrixMatvecDevice(HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset );
HYPRE_Int hypre_CSRMatrixMatvecMaskedDevice(HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *mask, HYPRE_Int size_of_mask);
HYPRE_Int hypre_CSRMatrixMatvecCusparseNewAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset );
HYPRE_Int hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset );
HYPRE_Int hypre_CSRMatrixMatvecOMPOffload (HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *y, HYPRE_Int offset );

View File

@ -358,7 +358,6 @@ HYPRE_Int hypre_CSRMatrixMatvec_FF ( HYPRE_Complex alpha , hypre_CSRMatrix *A ,
/* csr_matvec_device.c */
HYPRE_Int hypre_CSRMatrixMatvecDevice(HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset );
HYPRE_Int hypre_CSRMatrixMatvecMaskedDevice(HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *mask, HYPRE_Int size_of_mask);
HYPRE_Int hypre_CSRMatrixMatvecCusparseNewAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset );
HYPRE_Int hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset );
HYPRE_Int hypre_CSRMatrixMatvecOMPOffload (HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *y, HYPRE_Int offset );

View File

@ -13,8 +13,8 @@
mpirun -np 1 ./ij -solver 90 -rhsrand > amgdd.out.900
mpirun -np 2 ./ij -solver 90 -rhsrand > amgdd.out.901
mpirun -np 4 ./ij -solver 90 -rhsrand > amgdd.out.902
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 3 > amgdd.out.903
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 4 > amgdd.out.904
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 2 > amgdd.out.903
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 3 > amgdd.out.904
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_padding 4 > amgdd.out.905
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_padding 4 -amgdd_num_ghost_layers 4 > amgdd.out.906
mpirun -np 1 ./ij -solver 91 -rhsrand > amgdd.out.910

View File

@ -12,7 +12,7 @@ Final Relative Residual Norm = 9.214818e-09
# Output file: amgdd.out.903
BoomerAMG-DD Iterations = 12
Final Relative Residual Norm = 3.342726e-09
Final Relative Residual Norm = 3.122094e-09
# Output file: amgdd.out.904
BoomerAMG-DD Iterations = 12
@ -61,3 +61,4 @@ Final GMRES Relative Residual Norm = 3.281089e-09
# Output file: amgdd.out.918
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 1.828708e-09

View File

@ -0,0 +1,64 @@
# Output file: amgdd.out.900
BoomerAMG-DD Iterations = 10
Final Relative Residual Norm = 9.801392e-09
# Output file: amgdd.out.901
BoomerAMG-DD Iterations = 11
Final Relative Residual Norm = 3.178097e-09
# Output file: amgdd.out.902
BoomerAMG-DD Iterations = 11
Final Relative Residual Norm = 3.692714e-09
# Output file: amgdd.out.903
BoomerAMG-DD Iterations = 25
Final Relative Residual Norm = 7.119102e-09
# Output file: amgdd.out.904
BoomerAMG-DD Iterations = 25
Final Relative Residual Norm = 7.302627e-09
# Output file: amgdd.out.905
BoomerAMG-DD Iterations = 11
Final Relative Residual Norm = 2.977878e-09
# Output file: amgdd.out.906
BoomerAMG-DD Iterations = 11
Final Relative Residual Norm = 2.983489e-09
# Output file: amgdd.out.910
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 2.624320e-09
# Output file: amgdd.out.911
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 5.697067e-09
# Output file: amgdd.out.912
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 6.478946e-09
# Output file: amgdd.out.913
GMRES Iterations = 6
Final GMRES Relative Residual Norm = 2.761150e-09
# Output file: amgdd.out.914
GMRES Iterations = 14
Final GMRES Relative Residual Norm = 6.422117e-09
# Output file: amgdd.out.915
GMRES Iterations = 5
Final GMRES Relative Residual Norm = 4.633358e-09
# Output file: amgdd.out.916
GMRES Iterations = 13
Final GMRES Relative Residual Norm = 1.734237e-03
# Output file: amgdd.out.917
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 4.096266e-09
# Output file: amgdd.out.918
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 2.043956e-09

View File

@ -0,0 +1,64 @@
# Output file: amgdd.out.900
BoomerAMG-DD Iterations = 10
Final Relative Residual Norm = 9.801392e-09
# Output file: amgdd.out.901
BoomerAMG-DD Iterations = 11
Final Relative Residual Norm = 3.178097e-09
# Output file: amgdd.out.902
BoomerAMG-DD Iterations = 11
Final Relative Residual Norm = 3.692714e-09
# Output file: amgdd.out.903
BoomerAMG-DD Iterations = 25
Final Relative Residual Norm = 7.119102e-09
# Output file: amgdd.out.904
BoomerAMG-DD Iterations = 25
Final Relative Residual Norm = 7.302627e-09
# Output file: amgdd.out.905
BoomerAMG-DD Iterations = 11
Final Relative Residual Norm = 2.977878e-09
# Output file: amgdd.out.906
BoomerAMG-DD Iterations = 11
Final Relative Residual Norm = 2.983489e-09
# Output file: amgdd.out.910
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 2.624320e-09
# Output file: amgdd.out.911
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 5.697067e-09
# Output file: amgdd.out.912
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 6.478946e-09
# Output file: amgdd.out.913
GMRES Iterations = 6
Final GMRES Relative Residual Norm = 2.761150e-09
# Output file: amgdd.out.914
GMRES Iterations = 14
Final GMRES Relative Residual Norm = 6.422117e-09
# Output file: amgdd.out.915
GMRES Iterations = 5
Final GMRES Relative Residual Norm = 4.633358e-09
# Output file: amgdd.out.916
GMRES Iterations = 13
Final GMRES Relative Residual Norm = 1.734237e-03
# Output file: amgdd.out.917
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 4.096266e-09
# Output file: amgdd.out.918
GMRES Iterations = 7
Final GMRES Relative Residual Norm = 2.043956e-09