Amgdd fixes (#386)
This removes the masked matvec routine previously used for CF L1 Jacobi relaxation in the AMG-DD solver. There was a bug present in the GPU code and the bsrxmv cusparse routine no longer supports our use-case as of cuda 11. In addition, appropriate regression test results were saved for the GPU implementation of AMG-DD.
This commit is contained in:
parent
ad5d7e009f
commit
5f8472b05c
@ -1,3 +1,2 @@
|
||||
TEST_ij/agg_interp.sh
|
||||
TEST_ij/[^a]*.sh
|
||||
TEST_ij/*.sh
|
||||
|
||||
|
||||
@ -182,10 +182,10 @@ hypre_BoomerAMGDD_FAC_Relax( void *amgdd_vdata,
|
||||
hypre_AMGDDCompGridVectorScale(-1.0, hypre_AMGDDCompGridTemp(compGrid));
|
||||
}
|
||||
|
||||
for (i = 0; i < numRelax; i++)
|
||||
{
|
||||
(*hypre_ParAMGDDDataUserFACRelaxation(amgdd_data))(amgdd_vdata, level, cycle_param);
|
||||
}
|
||||
for (i = 0; i < numRelax; i++)
|
||||
{
|
||||
(*hypre_ParAMGDDDataUserFACRelaxation(amgdd_data))(amgdd_vdata, level, cycle_param);
|
||||
}
|
||||
|
||||
if (hypre_AMGDDCompGridT(compGrid) || hypre_AMGDDCompGridQ(compGrid))
|
||||
{
|
||||
|
||||
@ -85,7 +85,7 @@ hypre_BoomerAMGDD_FAC_JacobiDevice( void *amgdd_vdata,
|
||||
hypreDevice_IVAXPY(hypre_AMGDDCompGridNumNonOwnedRealNodes(compGrid),
|
||||
&(hypre_AMGDDCompGridL1Norms(compGrid)[hypre_AMGDDCompGridNumOwnedNodes(compGrid)]),
|
||||
hypre_VectorData(hypre_AMGDDCompGridVectorNonOwned(hypre_AMGDDCompGridTemp2(compGrid))),
|
||||
hypre_VectorData(hypre_AMGDDCompGridVectorNonOwned(u)));
|
||||
hypre_VectorData(hypre_AMGDDCompGridVectorNonOwned(u)));
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
@ -108,11 +108,8 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata,
|
||||
HYPRE_Int num_nonowned_rc = hypre_AMGDDCompGridNumNonOwnedRealCPoints(compGrid);
|
||||
HYPRE_Int num_nonowned_rf = num_nonowned_r - num_nonowned_rc;
|
||||
|
||||
hypre_CSRMatrix *mat;
|
||||
hypre_Vector *owned_tmp;
|
||||
hypre_Vector *nonowned_tmp;
|
||||
HYPRE_Real alpha = -relax_weight;
|
||||
HYPRE_Real beta = relax_weight;
|
||||
|
||||
// Allocate temporary vector if necessary
|
||||
if (!hypre_AMGDDCompGridTemp2(compGrid))
|
||||
@ -123,40 +120,21 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata,
|
||||
num_nonowned,
|
||||
num_nonowned_r);
|
||||
}
|
||||
|
||||
hypre_AMGDDCompGridVectorCopy(hypre_AMGDDCompGridF(compGrid),
|
||||
hypre_AMGDDCompGridTemp2(compGrid));
|
||||
|
||||
hypre_AMGDDCompGridMatvec(-relax_weight,
|
||||
hypre_AMGDDCompGridA(compGrid),
|
||||
hypre_AMGDDCompGridU(compGrid),
|
||||
relax_weight,
|
||||
hypre_AMGDDCompGridTemp2(compGrid));
|
||||
|
||||
owned_tmp = hypre_AMGDDCompGridVectorOwned(hypre_AMGDDCompGridTemp2(compGrid));
|
||||
nonowned_tmp = hypre_AMGDDCompGridVectorNonOwned(hypre_AMGDDCompGridTemp2(compGrid));
|
||||
|
||||
if (relax_set)
|
||||
{
|
||||
mat = hypre_AMGDDCompGridMatrixOwnedDiag(hypre_AMGDDCompGridA(compGrid));
|
||||
beta = relax_weight;
|
||||
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u,
|
||||
beta, owned_tmp, owned_tmp,
|
||||
hypre_AMGDDCompGridOwnedCMask(compGrid),
|
||||
num_owned_c);
|
||||
|
||||
mat = hypre_AMGDDCompGridMatrixOwnedOffd(hypre_AMGDDCompGridA(compGrid));
|
||||
beta = 1.0;
|
||||
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u,
|
||||
beta, owned_tmp, owned_tmp,
|
||||
hypre_AMGDDCompGridOwnedCMask(compGrid),
|
||||
num_owned_c);
|
||||
|
||||
mat = hypre_AMGDDCompGridMatrixNonOwnedDiag(hypre_AMGDDCompGridA(compGrid));
|
||||
beta = relax_weight;
|
||||
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u,
|
||||
beta, nonowned_tmp, nonowned_tmp,
|
||||
hypre_AMGDDCompGridNonOwnedCMask(compGrid),
|
||||
num_nonowned_rc);
|
||||
|
||||
mat = hypre_AMGDDCompGridMatrixNonOwnedOffd(hypre_AMGDDCompGridA(compGrid));
|
||||
beta = 1.0;
|
||||
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u,
|
||||
beta, nonowned_tmp, nonowned_tmp,
|
||||
hypre_AMGDDCompGridNonOwnedCMask(compGrid),
|
||||
num_nonowned_rc);
|
||||
|
||||
hypreDevice_MaskedIVAXPY(num_owned_c,
|
||||
hypre_AMGDDCompGridL1Norms(compGrid),
|
||||
@ -172,34 +150,6 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata,
|
||||
}
|
||||
else
|
||||
{
|
||||
mat = hypre_AMGDDCompGridMatrixOwnedDiag(hypre_AMGDDCompGridA(compGrid));
|
||||
beta = relax_weight;
|
||||
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u,
|
||||
beta, owned_tmp, owned_tmp,
|
||||
hypre_AMGDDCompGridOwnedFMask(compGrid),
|
||||
num_owned_f);
|
||||
|
||||
mat = hypre_AMGDDCompGridMatrixOwnedOffd(hypre_AMGDDCompGridA(compGrid));
|
||||
beta = 1.0;
|
||||
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u,
|
||||
beta, owned_tmp, owned_tmp,
|
||||
hypre_AMGDDCompGridOwnedFMask(compGrid),
|
||||
num_owned_f);
|
||||
|
||||
mat = hypre_AMGDDCompGridMatrixNonOwnedDiag(hypre_AMGDDCompGridA(compGrid));
|
||||
beta = relax_weight;
|
||||
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u,
|
||||
beta, nonowned_tmp, nonowned_tmp,
|
||||
hypre_AMGDDCompGridNonOwnedFMask(compGrid),
|
||||
num_nonowned_rf);
|
||||
|
||||
mat = hypre_AMGDDCompGridMatrixNonOwnedOffd(hypre_AMGDDCompGridA(compGrid));
|
||||
beta = 1.0;
|
||||
hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u,
|
||||
beta, nonowned_tmp, nonowned_tmp,
|
||||
hypre_AMGDDCompGridNonOwnedFMask(compGrid),
|
||||
num_nonowned_rf);
|
||||
|
||||
hypreDevice_MaskedIVAXPY(num_owned_f,
|
||||
hypre_AMGDDCompGridL1Norms(compGrid),
|
||||
hypre_VectorData(owned_tmp),
|
||||
|
||||
@ -122,58 +122,6 @@ hypre_CSRMatrixMatvecDevice( HYPRE_Int trans,
|
||||
}
|
||||
|
||||
#if defined(HYPRE_USING_CUSPARSE)
|
||||
HYPRE_Int
|
||||
hypre_CSRMatrixMatvecMaskedDevice( HYPRE_Complex alpha,
|
||||
hypre_CSRMatrix *A,
|
||||
hypre_Vector *x,
|
||||
HYPRE_Complex beta,
|
||||
hypre_Vector *b,
|
||||
hypre_Vector *y,
|
||||
HYPRE_Int *mask,
|
||||
HYPRE_Int size_of_mask )
|
||||
{
|
||||
if (hypre_VectorData(b) != hypre_VectorData(y))
|
||||
{
|
||||
hypre_TMemcpy( hypre_VectorData(y),
|
||||
hypre_VectorData(b),
|
||||
HYPRE_Complex,
|
||||
hypre_CSRMatrixNumRows(A),
|
||||
hypre_VectorMemoryLocation(y),
|
||||
hypre_VectorMemoryLocation(b) );
|
||||
}
|
||||
|
||||
hypre_CSRMatrixMatvecDevice2(0, alpha, A, x, beta, y, 0);
|
||||
|
||||
if ( hypre_CSRMatrixNumRows(A) > 0 && hypre_CSRMatrixNumCols(A) > 0 && hypre_CSRMatrixNumNonzeros(A) > 0 )
|
||||
{
|
||||
cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
|
||||
cusparseMatDescr_t descr = hypre_CSRMatrixGPUMatDescr(A);
|
||||
|
||||
HYPRE_CUSPARSE_CALL( cusparseDbsrxmv(handle,
|
||||
CUSPARSE_DIRECTION_ROW,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
size_of_mask,
|
||||
hypre_CSRMatrixNumRows(A),
|
||||
hypre_CSRMatrixNumCols(A),
|
||||
hypre_CSRMatrixNumNonzeros(A),
|
||||
&alpha,
|
||||
descr,
|
||||
hypre_CSRMatrixData(A),
|
||||
mask,
|
||||
hypre_CSRMatrixI(A),
|
||||
hypre_CSRMatrixI(A) + 1,
|
||||
hypre_CSRMatrixJ(A),
|
||||
1,
|
||||
hypre_VectorData(x),
|
||||
&beta,
|
||||
hypre_VectorData(y)) );
|
||||
}
|
||||
|
||||
hypre_SyncCudaComputeStream(hypre_handle());
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
|
||||
|
||||
HYPRE_Int
|
||||
@ -304,20 +252,6 @@ hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans,
|
||||
#endif // #if defined(HYPRE_USING_CUSPARSE)
|
||||
|
||||
#if defined(HYPRE_USING_ROCSPARSE)
|
||||
// We need a stub for this function since it's called elsewhere
|
||||
HYPRE_Int
|
||||
hypre_CSRMatrixMatvecMaskedDevice( HYPRE_Complex /*alpha*/,
|
||||
hypre_CSRMatrix * /*A*/,
|
||||
hypre_Vector * /*x*/,
|
||||
HYPRE_Complex /*beta*/,
|
||||
hypre_Vector * /*b*/,
|
||||
hypre_Vector * /*y*/,
|
||||
HYPRE_Int * /*mask*/,
|
||||
HYPRE_Int /*size_of_mask*/ )
|
||||
{
|
||||
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "hypre_CSRMatrixMatvecMaskedDevice not implemented for rocSPARSE!\n");
|
||||
}
|
||||
|
||||
HYPRE_Int
|
||||
hypre_CSRMatrixMatvecRocsparse( HYPRE_Int trans,
|
||||
HYPRE_Complex alpha,
|
||||
|
||||
@ -80,7 +80,6 @@ HYPRE_Int hypre_CSRMatrixMatvec_FF ( HYPRE_Complex alpha , hypre_CSRMatrix *A ,
|
||||
|
||||
/* csr_matvec_device.c */
|
||||
HYPRE_Int hypre_CSRMatrixMatvecDevice(HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset );
|
||||
HYPRE_Int hypre_CSRMatrixMatvecMaskedDevice(HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *mask, HYPRE_Int size_of_mask);
|
||||
HYPRE_Int hypre_CSRMatrixMatvecCusparseNewAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset );
|
||||
HYPRE_Int hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset );
|
||||
HYPRE_Int hypre_CSRMatrixMatvecOMPOffload (HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *y, HYPRE_Int offset );
|
||||
|
||||
@ -358,7 +358,6 @@ HYPRE_Int hypre_CSRMatrixMatvec_FF ( HYPRE_Complex alpha , hypre_CSRMatrix *A ,
|
||||
|
||||
/* csr_matvec_device.c */
|
||||
HYPRE_Int hypre_CSRMatrixMatvecDevice(HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset );
|
||||
HYPRE_Int hypre_CSRMatrixMatvecMaskedDevice(HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *mask, HYPRE_Int size_of_mask);
|
||||
HYPRE_Int hypre_CSRMatrixMatvecCusparseNewAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset );
|
||||
HYPRE_Int hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset );
|
||||
HYPRE_Int hypre_CSRMatrixMatvecOMPOffload (HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *y, HYPRE_Int offset );
|
||||
|
||||
@ -13,8 +13,8 @@
|
||||
mpirun -np 1 ./ij -solver 90 -rhsrand > amgdd.out.900
|
||||
mpirun -np 2 ./ij -solver 90 -rhsrand > amgdd.out.901
|
||||
mpirun -np 4 ./ij -solver 90 -rhsrand > amgdd.out.902
|
||||
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 3 > amgdd.out.903
|
||||
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 4 > amgdd.out.904
|
||||
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 2 > amgdd.out.903
|
||||
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 3 > amgdd.out.904
|
||||
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_padding 4 > amgdd.out.905
|
||||
mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_padding 4 -amgdd_num_ghost_layers 4 > amgdd.out.906
|
||||
mpirun -np 1 ./ij -solver 91 -rhsrand > amgdd.out.910
|
||||
|
||||
@ -12,7 +12,7 @@ Final Relative Residual Norm = 9.214818e-09
|
||||
|
||||
# Output file: amgdd.out.903
|
||||
BoomerAMG-DD Iterations = 12
|
||||
Final Relative Residual Norm = 3.342726e-09
|
||||
Final Relative Residual Norm = 3.122094e-09
|
||||
|
||||
# Output file: amgdd.out.904
|
||||
BoomerAMG-DD Iterations = 12
|
||||
@ -61,3 +61,4 @@ Final GMRES Relative Residual Norm = 3.281089e-09
|
||||
# Output file: amgdd.out.918
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 1.828708e-09
|
||||
|
||||
|
||||
64
src/test/TEST_ij/amgdd.saved.lassen
Normal file
64
src/test/TEST_ij/amgdd.saved.lassen
Normal file
@ -0,0 +1,64 @@
|
||||
# Output file: amgdd.out.900
|
||||
BoomerAMG-DD Iterations = 10
|
||||
Final Relative Residual Norm = 9.801392e-09
|
||||
|
||||
# Output file: amgdd.out.901
|
||||
BoomerAMG-DD Iterations = 11
|
||||
Final Relative Residual Norm = 3.178097e-09
|
||||
|
||||
# Output file: amgdd.out.902
|
||||
BoomerAMG-DD Iterations = 11
|
||||
Final Relative Residual Norm = 3.692714e-09
|
||||
|
||||
# Output file: amgdd.out.903
|
||||
BoomerAMG-DD Iterations = 25
|
||||
Final Relative Residual Norm = 7.119102e-09
|
||||
|
||||
# Output file: amgdd.out.904
|
||||
BoomerAMG-DD Iterations = 25
|
||||
Final Relative Residual Norm = 7.302627e-09
|
||||
|
||||
# Output file: amgdd.out.905
|
||||
BoomerAMG-DD Iterations = 11
|
||||
Final Relative Residual Norm = 2.977878e-09
|
||||
|
||||
# Output file: amgdd.out.906
|
||||
BoomerAMG-DD Iterations = 11
|
||||
Final Relative Residual Norm = 2.983489e-09
|
||||
|
||||
# Output file: amgdd.out.910
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 2.624320e-09
|
||||
|
||||
# Output file: amgdd.out.911
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 5.697067e-09
|
||||
|
||||
# Output file: amgdd.out.912
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 6.478946e-09
|
||||
|
||||
# Output file: amgdd.out.913
|
||||
GMRES Iterations = 6
|
||||
Final GMRES Relative Residual Norm = 2.761150e-09
|
||||
|
||||
# Output file: amgdd.out.914
|
||||
GMRES Iterations = 14
|
||||
Final GMRES Relative Residual Norm = 6.422117e-09
|
||||
|
||||
# Output file: amgdd.out.915
|
||||
GMRES Iterations = 5
|
||||
Final GMRES Relative Residual Norm = 4.633358e-09
|
||||
|
||||
# Output file: amgdd.out.916
|
||||
GMRES Iterations = 13
|
||||
Final GMRES Relative Residual Norm = 1.734237e-03
|
||||
|
||||
# Output file: amgdd.out.917
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 4.096266e-09
|
||||
|
||||
# Output file: amgdd.out.918
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 2.043956e-09
|
||||
|
||||
64
src/test/TEST_ij/amgdd.saved.ray
Normal file
64
src/test/TEST_ij/amgdd.saved.ray
Normal file
@ -0,0 +1,64 @@
|
||||
# Output file: amgdd.out.900
|
||||
BoomerAMG-DD Iterations = 10
|
||||
Final Relative Residual Norm = 9.801392e-09
|
||||
|
||||
# Output file: amgdd.out.901
|
||||
BoomerAMG-DD Iterations = 11
|
||||
Final Relative Residual Norm = 3.178097e-09
|
||||
|
||||
# Output file: amgdd.out.902
|
||||
BoomerAMG-DD Iterations = 11
|
||||
Final Relative Residual Norm = 3.692714e-09
|
||||
|
||||
# Output file: amgdd.out.903
|
||||
BoomerAMG-DD Iterations = 25
|
||||
Final Relative Residual Norm = 7.119102e-09
|
||||
|
||||
# Output file: amgdd.out.904
|
||||
BoomerAMG-DD Iterations = 25
|
||||
Final Relative Residual Norm = 7.302627e-09
|
||||
|
||||
# Output file: amgdd.out.905
|
||||
BoomerAMG-DD Iterations = 11
|
||||
Final Relative Residual Norm = 2.977878e-09
|
||||
|
||||
# Output file: amgdd.out.906
|
||||
BoomerAMG-DD Iterations = 11
|
||||
Final Relative Residual Norm = 2.983489e-09
|
||||
|
||||
# Output file: amgdd.out.910
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 2.624320e-09
|
||||
|
||||
# Output file: amgdd.out.911
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 5.697067e-09
|
||||
|
||||
# Output file: amgdd.out.912
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 6.478946e-09
|
||||
|
||||
# Output file: amgdd.out.913
|
||||
GMRES Iterations = 6
|
||||
Final GMRES Relative Residual Norm = 2.761150e-09
|
||||
|
||||
# Output file: amgdd.out.914
|
||||
GMRES Iterations = 14
|
||||
Final GMRES Relative Residual Norm = 6.422117e-09
|
||||
|
||||
# Output file: amgdd.out.915
|
||||
GMRES Iterations = 5
|
||||
Final GMRES Relative Residual Norm = 4.633358e-09
|
||||
|
||||
# Output file: amgdd.out.916
|
||||
GMRES Iterations = 13
|
||||
Final GMRES Relative Residual Norm = 1.734237e-03
|
||||
|
||||
# Output file: amgdd.out.917
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 4.096266e-09
|
||||
|
||||
# Output file: amgdd.out.918
|
||||
GMRES Iterations = 7
|
||||
Final GMRES Relative Residual Norm = 2.043956e-09
|
||||
|
||||
Loading…
Reference in New Issue
Block a user