From 5f8472b05cf8bb2bed072c84a5e0866c33c82c4a Mon Sep 17 00:00:00 2001 From: Wayne Mitchell Date: Tue, 15 Jun 2021 10:44:46 -0700 Subject: [PATCH] Amgdd fixes (#386) This removes the masked matvec routine previously used for CF L1 Jacobi relaxation in the AMG-DD solver. There was a bug present in the GPU code and the bsrxmv cusparse routine no longer supports our use-case as of cuda 11. In addition, appropriate regression test results were saved for the GPU implementation of AMG-DD. --- AUTOTEST/runtests-ij-gpu | 3 +- src/parcsr_ls/par_amgdd_fac_cycle.c | 8 +-- src/parcsr_ls/par_amgdd_fac_cycle_device.c | 68 +++------------------- src/seq_mv/csr_matvec_device.c | 66 --------------------- src/seq_mv/protos.h | 1 - src/seq_mv/seq_mv.h | 1 - src/test/TEST_ij/amgdd.jobs | 4 +- src/test/TEST_ij/amgdd.saved | 3 +- src/test/TEST_ij/amgdd.saved.lassen | 64 ++++++++++++++++++++ src/test/TEST_ij/amgdd.saved.ray | 64 ++++++++++++++++++++ 10 files changed, 146 insertions(+), 136 deletions(-) create mode 100644 src/test/TEST_ij/amgdd.saved.lassen create mode 100644 src/test/TEST_ij/amgdd.saved.ray diff --git a/AUTOTEST/runtests-ij-gpu b/AUTOTEST/runtests-ij-gpu index dba8f3530..9205a9a1a 100644 --- a/AUTOTEST/runtests-ij-gpu +++ b/AUTOTEST/runtests-ij-gpu @@ -1,3 +1,2 @@ -TEST_ij/agg_interp.sh -TEST_ij/[^a]*.sh +TEST_ij/*.sh diff --git a/src/parcsr_ls/par_amgdd_fac_cycle.c b/src/parcsr_ls/par_amgdd_fac_cycle.c index 6cf3ffc37..005570935 100644 --- a/src/parcsr_ls/par_amgdd_fac_cycle.c +++ b/src/parcsr_ls/par_amgdd_fac_cycle.c @@ -182,10 +182,10 @@ hypre_BoomerAMGDD_FAC_Relax( void *amgdd_vdata, hypre_AMGDDCompGridVectorScale(-1.0, hypre_AMGDDCompGridTemp(compGrid)); } - for (i = 0; i < numRelax; i++) - { - (*hypre_ParAMGDDDataUserFACRelaxation(amgdd_data))(amgdd_vdata, level, cycle_param); - } + for (i = 0; i < numRelax; i++) + { + (*hypre_ParAMGDDDataUserFACRelaxation(amgdd_data))(amgdd_vdata, level, cycle_param); + } if (hypre_AMGDDCompGridT(compGrid) || hypre_AMGDDCompGridQ(compGrid)) { diff --git a/src/parcsr_ls/par_amgdd_fac_cycle_device.c b/src/parcsr_ls/par_amgdd_fac_cycle_device.c index 384efd3a7..0b302484a 100644 --- a/src/parcsr_ls/par_amgdd_fac_cycle_device.c +++ b/src/parcsr_ls/par_amgdd_fac_cycle_device.c @@ -85,7 +85,7 @@ hypre_BoomerAMGDD_FAC_JacobiDevice( void *amgdd_vdata, hypreDevice_IVAXPY(hypre_AMGDDCompGridNumNonOwnedRealNodes(compGrid), &(hypre_AMGDDCompGridL1Norms(compGrid)[hypre_AMGDDCompGridNumOwnedNodes(compGrid)]), hypre_VectorData(hypre_AMGDDCompGridVectorNonOwned(hypre_AMGDDCompGridTemp2(compGrid))), - hypre_VectorData(hypre_AMGDDCompGridVectorNonOwned(u))); + hypre_VectorData(hypre_AMGDDCompGridVectorNonOwned(u))); return hypre_error_flag; } @@ -108,11 +108,8 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata, HYPRE_Int num_nonowned_rc = hypre_AMGDDCompGridNumNonOwnedRealCPoints(compGrid); HYPRE_Int num_nonowned_rf = num_nonowned_r - num_nonowned_rc; - hypre_CSRMatrix *mat; hypre_Vector *owned_tmp; hypre_Vector *nonowned_tmp; - HYPRE_Real alpha = -relax_weight; - HYPRE_Real beta = relax_weight; // Allocate temporary vector if necessary if (!hypre_AMGDDCompGridTemp2(compGrid)) @@ -123,40 +120,21 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata, num_nonowned, num_nonowned_r); } + hypre_AMGDDCompGridVectorCopy(hypre_AMGDDCompGridF(compGrid), hypre_AMGDDCompGridTemp2(compGrid)); + + hypre_AMGDDCompGridMatvec(-relax_weight, + hypre_AMGDDCompGridA(compGrid), + hypre_AMGDDCompGridU(compGrid), + relax_weight, + hypre_AMGDDCompGridTemp2(compGrid)); + owned_tmp = hypre_AMGDDCompGridVectorOwned(hypre_AMGDDCompGridTemp2(compGrid)); nonowned_tmp = hypre_AMGDDCompGridVectorNonOwned(hypre_AMGDDCompGridTemp2(compGrid)); if (relax_set) { - mat = hypre_AMGDDCompGridMatrixOwnedDiag(hypre_AMGDDCompGridA(compGrid)); - beta = relax_weight; - hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u, - beta, owned_tmp, owned_tmp, - hypre_AMGDDCompGridOwnedCMask(compGrid), - num_owned_c); - - mat = hypre_AMGDDCompGridMatrixOwnedOffd(hypre_AMGDDCompGridA(compGrid)); - beta = 1.0; - hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u, - beta, owned_tmp, owned_tmp, - hypre_AMGDDCompGridOwnedCMask(compGrid), - num_owned_c); - - mat = hypre_AMGDDCompGridMatrixNonOwnedDiag(hypre_AMGDDCompGridA(compGrid)); - beta = relax_weight; - hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u, - beta, nonowned_tmp, nonowned_tmp, - hypre_AMGDDCompGridNonOwnedCMask(compGrid), - num_nonowned_rc); - - mat = hypre_AMGDDCompGridMatrixNonOwnedOffd(hypre_AMGDDCompGridA(compGrid)); - beta = 1.0; - hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u, - beta, nonowned_tmp, nonowned_tmp, - hypre_AMGDDCompGridNonOwnedCMask(compGrid), - num_nonowned_rc); hypreDevice_MaskedIVAXPY(num_owned_c, hypre_AMGDDCompGridL1Norms(compGrid), @@ -172,34 +150,6 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata, } else { - mat = hypre_AMGDDCompGridMatrixOwnedDiag(hypre_AMGDDCompGridA(compGrid)); - beta = relax_weight; - hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u, - beta, owned_tmp, owned_tmp, - hypre_AMGDDCompGridOwnedFMask(compGrid), - num_owned_f); - - mat = hypre_AMGDDCompGridMatrixOwnedOffd(hypre_AMGDDCompGridA(compGrid)); - beta = 1.0; - hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u, - beta, owned_tmp, owned_tmp, - hypre_AMGDDCompGridOwnedFMask(compGrid), - num_owned_f); - - mat = hypre_AMGDDCompGridMatrixNonOwnedDiag(hypre_AMGDDCompGridA(compGrid)); - beta = relax_weight; - hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, nonowned_u, - beta, nonowned_tmp, nonowned_tmp, - hypre_AMGDDCompGridNonOwnedFMask(compGrid), - num_nonowned_rf); - - mat = hypre_AMGDDCompGridMatrixNonOwnedOffd(hypre_AMGDDCompGridA(compGrid)); - beta = 1.0; - hypre_CSRMatrixMatvecMaskedDevice(alpha, mat, owned_u, - beta, nonowned_tmp, nonowned_tmp, - hypre_AMGDDCompGridNonOwnedFMask(compGrid), - num_nonowned_rf); - hypreDevice_MaskedIVAXPY(num_owned_f, hypre_AMGDDCompGridL1Norms(compGrid), hypre_VectorData(owned_tmp), diff --git a/src/seq_mv/csr_matvec_device.c b/src/seq_mv/csr_matvec_device.c index cd273fd93..ab2c259fe 100644 --- a/src/seq_mv/csr_matvec_device.c +++ b/src/seq_mv/csr_matvec_device.c @@ -122,58 +122,6 @@ hypre_CSRMatrixMatvecDevice( HYPRE_Int trans, } #if defined(HYPRE_USING_CUSPARSE) -HYPRE_Int -hypre_CSRMatrixMatvecMaskedDevice( HYPRE_Complex alpha, - hypre_CSRMatrix *A, - hypre_Vector *x, - HYPRE_Complex beta, - hypre_Vector *b, - hypre_Vector *y, - HYPRE_Int *mask, - HYPRE_Int size_of_mask ) -{ - if (hypre_VectorData(b) != hypre_VectorData(y)) - { - hypre_TMemcpy( hypre_VectorData(y), - hypre_VectorData(b), - HYPRE_Complex, - hypre_CSRMatrixNumRows(A), - hypre_VectorMemoryLocation(y), - hypre_VectorMemoryLocation(b) ); - } - - hypre_CSRMatrixMatvecDevice2(0, alpha, A, x, beta, y, 0); - - if ( hypre_CSRMatrixNumRows(A) > 0 && hypre_CSRMatrixNumCols(A) > 0 && hypre_CSRMatrixNumNonzeros(A) > 0 ) - { - cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle()); - cusparseMatDescr_t descr = hypre_CSRMatrixGPUMatDescr(A); - - HYPRE_CUSPARSE_CALL( cusparseDbsrxmv(handle, - CUSPARSE_DIRECTION_ROW, - CUSPARSE_OPERATION_NON_TRANSPOSE, - size_of_mask, - hypre_CSRMatrixNumRows(A), - hypre_CSRMatrixNumCols(A), - hypre_CSRMatrixNumNonzeros(A), - &alpha, - descr, - hypre_CSRMatrixData(A), - mask, - hypre_CSRMatrixI(A), - hypre_CSRMatrixI(A) + 1, - hypre_CSRMatrixJ(A), - 1, - hypre_VectorData(x), - &beta, - hypre_VectorData(y)) ); - } - - hypre_SyncCudaComputeStream(hypre_handle()); - - return hypre_error_flag; -} - #if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION HYPRE_Int @@ -304,20 +252,6 @@ hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans, #endif // #if defined(HYPRE_USING_CUSPARSE) #if defined(HYPRE_USING_ROCSPARSE) -// We need a stub for this function since it's called elsewhere -HYPRE_Int -hypre_CSRMatrixMatvecMaskedDevice( HYPRE_Complex /*alpha*/, - hypre_CSRMatrix * /*A*/, - hypre_Vector * /*x*/, - HYPRE_Complex /*beta*/, - hypre_Vector * /*b*/, - hypre_Vector * /*y*/, - HYPRE_Int * /*mask*/, - HYPRE_Int /*size_of_mask*/ ) -{ - hypre_error_w_msg(HYPRE_ERROR_GENERIC, "hypre_CSRMatrixMatvecMaskedDevice not implemented for rocSPARSE!\n"); -} - HYPRE_Int hypre_CSRMatrixMatvecRocsparse( HYPRE_Int trans, HYPRE_Complex alpha, diff --git a/src/seq_mv/protos.h b/src/seq_mv/protos.h index 6823800f3..ff816a904 100644 --- a/src/seq_mv/protos.h +++ b/src/seq_mv/protos.h @@ -80,7 +80,6 @@ HYPRE_Int hypre_CSRMatrixMatvec_FF ( HYPRE_Complex alpha , hypre_CSRMatrix *A , /* csr_matvec_device.c */ HYPRE_Int hypre_CSRMatrixMatvecDevice(HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset ); -HYPRE_Int hypre_CSRMatrixMatvecMaskedDevice(HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *mask, HYPRE_Int size_of_mask); HYPRE_Int hypre_CSRMatrixMatvecCusparseNewAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset ); HYPRE_Int hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset ); HYPRE_Int hypre_CSRMatrixMatvecOMPOffload (HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *y, HYPRE_Int offset ); diff --git a/src/seq_mv/seq_mv.h b/src/seq_mv/seq_mv.h index 42a84b53c..f7335bac3 100644 --- a/src/seq_mv/seq_mv.h +++ b/src/seq_mv/seq_mv.h @@ -358,7 +358,6 @@ HYPRE_Int hypre_CSRMatrixMatvec_FF ( HYPRE_Complex alpha , hypre_CSRMatrix *A , /* csr_matvec_device.c */ HYPRE_Int hypre_CSRMatrixMatvecDevice(HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset ); -HYPRE_Int hypre_CSRMatrixMatvecMaskedDevice(HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int *mask, HYPRE_Int size_of_mask); HYPRE_Int hypre_CSRMatrixMatvecCusparseNewAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset ); HYPRE_Int hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int offset ); HYPRE_Int hypre_CSRMatrixMatvecOMPOffload (HYPRE_Int trans, HYPRE_Complex alpha , hypre_CSRMatrix *A , hypre_Vector *x , HYPRE_Complex beta , hypre_Vector *y, HYPRE_Int offset ); diff --git a/src/test/TEST_ij/amgdd.jobs b/src/test/TEST_ij/amgdd.jobs index 5a0e92839..a5b99da16 100755 --- a/src/test/TEST_ij/amgdd.jobs +++ b/src/test/TEST_ij/amgdd.jobs @@ -13,8 +13,8 @@ mpirun -np 1 ./ij -solver 90 -rhsrand > amgdd.out.900 mpirun -np 2 ./ij -solver 90 -rhsrand > amgdd.out.901 mpirun -np 4 ./ij -solver 90 -rhsrand > amgdd.out.902 -mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 3 > amgdd.out.903 -mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 4 > amgdd.out.904 +mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 2 > amgdd.out.903 +mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_start_level 3 > amgdd.out.904 mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_padding 4 > amgdd.out.905 mpirun -np 4 ./ij -solver 90 -rhsrand -amgdd_padding 4 -amgdd_num_ghost_layers 4 > amgdd.out.906 mpirun -np 1 ./ij -solver 91 -rhsrand > amgdd.out.910 diff --git a/src/test/TEST_ij/amgdd.saved b/src/test/TEST_ij/amgdd.saved index ed1cdb210..7b4b43b81 100644 --- a/src/test/TEST_ij/amgdd.saved +++ b/src/test/TEST_ij/amgdd.saved @@ -12,7 +12,7 @@ Final Relative Residual Norm = 9.214818e-09 # Output file: amgdd.out.903 BoomerAMG-DD Iterations = 12 -Final Relative Residual Norm = 3.342726e-09 +Final Relative Residual Norm = 3.122094e-09 # Output file: amgdd.out.904 BoomerAMG-DD Iterations = 12 @@ -61,3 +61,4 @@ Final GMRES Relative Residual Norm = 3.281089e-09 # Output file: amgdd.out.918 GMRES Iterations = 7 Final GMRES Relative Residual Norm = 1.828708e-09 + diff --git a/src/test/TEST_ij/amgdd.saved.lassen b/src/test/TEST_ij/amgdd.saved.lassen new file mode 100644 index 000000000..5a6d1b4ee --- /dev/null +++ b/src/test/TEST_ij/amgdd.saved.lassen @@ -0,0 +1,64 @@ +# Output file: amgdd.out.900 +BoomerAMG-DD Iterations = 10 +Final Relative Residual Norm = 9.801392e-09 + +# Output file: amgdd.out.901 +BoomerAMG-DD Iterations = 11 +Final Relative Residual Norm = 3.178097e-09 + +# Output file: amgdd.out.902 +BoomerAMG-DD Iterations = 11 +Final Relative Residual Norm = 3.692714e-09 + +# Output file: amgdd.out.903 +BoomerAMG-DD Iterations = 25 +Final Relative Residual Norm = 7.119102e-09 + +# Output file: amgdd.out.904 +BoomerAMG-DD Iterations = 25 +Final Relative Residual Norm = 7.302627e-09 + +# Output file: amgdd.out.905 +BoomerAMG-DD Iterations = 11 +Final Relative Residual Norm = 2.977878e-09 + +# Output file: amgdd.out.906 +BoomerAMG-DD Iterations = 11 +Final Relative Residual Norm = 2.983489e-09 + +# Output file: amgdd.out.910 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 2.624320e-09 + +# Output file: amgdd.out.911 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 5.697067e-09 + +# Output file: amgdd.out.912 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 6.478946e-09 + +# Output file: amgdd.out.913 +GMRES Iterations = 6 +Final GMRES Relative Residual Norm = 2.761150e-09 + +# Output file: amgdd.out.914 +GMRES Iterations = 14 +Final GMRES Relative Residual Norm = 6.422117e-09 + +# Output file: amgdd.out.915 +GMRES Iterations = 5 +Final GMRES Relative Residual Norm = 4.633358e-09 + +# Output file: amgdd.out.916 +GMRES Iterations = 13 +Final GMRES Relative Residual Norm = 1.734237e-03 + +# Output file: amgdd.out.917 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 4.096266e-09 + +# Output file: amgdd.out.918 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 2.043956e-09 + diff --git a/src/test/TEST_ij/amgdd.saved.ray b/src/test/TEST_ij/amgdd.saved.ray new file mode 100644 index 000000000..5a6d1b4ee --- /dev/null +++ b/src/test/TEST_ij/amgdd.saved.ray @@ -0,0 +1,64 @@ +# Output file: amgdd.out.900 +BoomerAMG-DD Iterations = 10 +Final Relative Residual Norm = 9.801392e-09 + +# Output file: amgdd.out.901 +BoomerAMG-DD Iterations = 11 +Final Relative Residual Norm = 3.178097e-09 + +# Output file: amgdd.out.902 +BoomerAMG-DD Iterations = 11 +Final Relative Residual Norm = 3.692714e-09 + +# Output file: amgdd.out.903 +BoomerAMG-DD Iterations = 25 +Final Relative Residual Norm = 7.119102e-09 + +# Output file: amgdd.out.904 +BoomerAMG-DD Iterations = 25 +Final Relative Residual Norm = 7.302627e-09 + +# Output file: amgdd.out.905 +BoomerAMG-DD Iterations = 11 +Final Relative Residual Norm = 2.977878e-09 + +# Output file: amgdd.out.906 +BoomerAMG-DD Iterations = 11 +Final Relative Residual Norm = 2.983489e-09 + +# Output file: amgdd.out.910 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 2.624320e-09 + +# Output file: amgdd.out.911 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 5.697067e-09 + +# Output file: amgdd.out.912 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 6.478946e-09 + +# Output file: amgdd.out.913 +GMRES Iterations = 6 +Final GMRES Relative Residual Norm = 2.761150e-09 + +# Output file: amgdd.out.914 +GMRES Iterations = 14 +Final GMRES Relative Residual Norm = 6.422117e-09 + +# Output file: amgdd.out.915 +GMRES Iterations = 5 +Final GMRES Relative Residual Norm = 4.633358e-09 + +# Output file: amgdd.out.916 +GMRES Iterations = 13 +Final GMRES Relative Residual Norm = 1.734237e-03 + +# Output file: amgdd.out.917 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 4.096266e-09 + +# Output file: amgdd.out.918 +GMRES Iterations = 7 +Final GMRES Relative Residual Norm = 2.043956e-09 +