Fix MGR setup on GPU and MGR bugfixes. (#260)

This PR addresses initial changes necessary to perform MGR setup on GPUs. It also fixes a bug in the iterative use of AMG for F-relaxation, adds options to print statistics of the F-relaxation solver (for the V-cycle smoother), and places the diagonal of a transposed square matrix as the first element in the row. This last change required a couple minor changes to the saved files.
This commit is contained in:
Daniel Osei-Kuffuor 2021-05-10 13:19:34 -07:00 committed by GitHub
parent 3f12d47651
commit c272f6980c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 219 additions and 88 deletions

View File

@ -3784,10 +3784,12 @@ HYPRE_MGRSetLevelFRelaxNumFunctions(HYPRE_Solver solver, HYPRE_Int *num_function
* - 0 : injection \f$[0 I]\f$
* - 1 : unscaled (not recommended)
* - 2 : diagonal scaling (Jacobi)
* - 3 : approximate inverse
* - 4 : pAIR distance 1
* - 5 : pAIR distance 2
* - else : use classical modified interpolation
*
* These options are currently active for the last stage reduction. Intermediate
* reduction levels use injection. The default is injection.
* The default is injection.
**/
HYPRE_Int
HYPRE_MGRSetRestrictType( HYPRE_Solver solver,
@ -3806,16 +3808,17 @@ HYPRE_MGRSetNumRestrictSweeps( HYPRE_Solver solver,
HYPRE_Int nsweeps );
/**
* (Optional) Set the strategy for computing the MGR restriction operator.
* (Optional) Set the strategy for computing the MGR interpolation operator.
* Options for \e interp_type are:
*
* - 0 : injection \f$[0 I]^{T}\f$
* - 1 : unscaled (not recommended)
* - 2 : diagonal scaling (Jacobi)
* - else : use default (classical modified interpolation)
* - 3 : classical modified interpolation
* - 4 : approximate inverse
* - else : classical modified interpolation
*
* These options are currently active for the last stage reduction. Intermediate
* reduction levels use diagonal scaling.
* The default is diagonal scaling.
**/
HYPRE_Int
HYPRE_MGRSetInterpType( HYPRE_Solver solver,
@ -3879,6 +3882,14 @@ HYPRE_Int
HYPRE_MGRSetPrintLevel( HYPRE_Solver solver,
HYPRE_Int print_level );
HYPRE_Int
HYPRE_MGRSetFrelaxPrintLevel( HYPRE_Solver solver,
HYPRE_Int print_level );
HYPRE_Int
HYPRE_MGRSetCoarseGridPrintLevel( HYPRE_Solver solver,
HYPRE_Int print_level );
/**
* (Optional) Set the threshold to compress the coarse grid at each level
* Use threshold = 0.0 if no truncation is applied. Otherwise, set the threshold

View File

@ -358,6 +358,24 @@ HYPRE_MGRSetTruncateCoarseGridThreshold( HYPRE_Solver solver, HYPRE_Real thresho
return hypre_MGRSetTruncateCoarseGridThreshold( solver, threshold );
}
/*--------------------------------------------------------------------------
* HYPRE_MGRSetFrelaxPrintLevel
*--------------------------------------------------------------------------*/
HYPRE_Int
HYPRE_MGRSetFrelaxPrintLevel( HYPRE_Solver solver, HYPRE_Int print_level )
{
return hypre_MGRSetFrelaxPrintLevel( solver, print_level );
}
/*--------------------------------------------------------------------------
* HYPRE_MGRSetCoarseGridPrintLevel
*--------------------------------------------------------------------------*/
HYPRE_Int
HYPRE_MGRSetCoarseGridPrintLevel( HYPRE_Solver solver, HYPRE_Int print_level )
{
return hypre_MGRSetCoarseGridPrintLevel( solver, print_level );
}
/*--------------------------------------------------------------------------
* HYPRE_MGRSetPrintLevel
*--------------------------------------------------------------------------*/

View File

@ -2190,6 +2190,8 @@ HYPRE_Int hypre_MGRSetNumRelaxSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
HYPRE_Int hypre_MGRSetNumInterpSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
HYPRE_Int hypre_MGRSetNumRestrictSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
HYPRE_Int hypre_MGRSetPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
HYPRE_Int hypre_MGRSetFrelaxPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
HYPRE_Int hypre_MGRSetCoarseGridPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
HYPRE_Int hypre_MGRSetTruncateCoarseGridThreshold( void *mgr_vdata, HYPRE_Real threshold);
HYPRE_Int hypre_MGRSetLogging( void *mgr_vdata, HYPRE_Int logging );
HYPRE_Int hypre_MGRSetMaxIter( void *mgr_vdata, HYPRE_Int max_iter );

View File

@ -86,6 +86,8 @@ hypre_MGRCreate()
(mgr_data -> logging) = 0;
(mgr_data -> print_level) = 0;
(mgr_data -> frelax_print_level) = 0;
(mgr_data -> cg_print_level) = 0;
(mgr_data -> l1_norms) = NULL;
@ -1997,6 +1999,7 @@ hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A,
HYPRE_Int my_id;
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
hypre_MPI_Comm_rank(comm,&my_id);
HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A);
n_local_fine_grid = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
c_marker = hypre_CTAlloc(HYPRE_Int, n_local_fine_grid, HYPRE_MEMORY_HOST);
@ -2131,19 +2134,18 @@ hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A,
HYPRE_Int *A_h_correction_offd_i = hypre_CSRMatrixI(A_h_correction_offd);
HYPRE_Int *A_h_correction_offd_j = hypre_CSRMatrixJ(A_h_correction_offd);
// Allow for maximum dropping with Pmax = 0
//if (Pmax > 0)
//{
if (Pmax > 0)
{
if (ordering == 0) // interleaved ordering
{
HYPRE_Int *A_h_correction_diag_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints+1, HYPRE_MEMORY_HOST);
HYPRE_Int *A_h_correction_diag_j_new = hypre_CTAlloc(HYPRE_Int, (bsize + max_elmts)*n_local_cpoints, HYPRE_MEMORY_HOST);
HYPRE_Complex *A_h_correction_diag_data_new = hypre_CTAlloc(HYPRE_Complex, (bsize + max_elmts)*n_local_cpoints, HYPRE_MEMORY_HOST);
HYPRE_Int *A_h_correction_diag_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints+1, memory_location);
HYPRE_Int *A_h_correction_diag_j_new = hypre_CTAlloc(HYPRE_Int, (bsize + max_elmts)*n_local_cpoints, memory_location);
HYPRE_Complex *A_h_correction_diag_data_new = hypre_CTAlloc(HYPRE_Complex, (bsize + max_elmts)*n_local_cpoints, memory_location);
HYPRE_Int num_nonzeros_diag_new = 0;
HYPRE_Int *A_h_correction_offd_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints+1, HYPRE_MEMORY_HOST);
HYPRE_Int *A_h_correction_offd_j_new = hypre_CTAlloc(HYPRE_Int, max_elmts*n_local_cpoints, HYPRE_MEMORY_HOST);
HYPRE_Complex *A_h_correction_offd_data_new = hypre_CTAlloc(HYPRE_Complex, max_elmts*n_local_cpoints, HYPRE_MEMORY_HOST);
HYPRE_Int *A_h_correction_offd_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints+1, memory_location);
HYPRE_Int *A_h_correction_offd_j_new = hypre_CTAlloc(HYPRE_Int, max_elmts*n_local_cpoints, memory_location);
HYPRE_Complex *A_h_correction_offd_data_new = hypre_CTAlloc(HYPRE_Complex, max_elmts*n_local_cpoints, memory_location);
HYPRE_Int num_nonzeros_offd_new = 0;
@ -2212,17 +2214,17 @@ hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A,
hypre_TFree(aux_data, HYPRE_MEMORY_HOST);
}
hypre_TFree(A_h_correction_diag_i, HYPRE_MEMORY_HOST);
hypre_TFree(A_h_correction_diag_j, HYPRE_MEMORY_HOST);
hypre_TFree(A_h_correction_diag_data, HYPRE_MEMORY_HOST);
hypre_TFree(A_h_correction_diag_i, memory_location);
hypre_TFree(A_h_correction_diag_j, memory_location);
hypre_TFree(A_h_correction_diag_data, memory_location);
hypre_CSRMatrixI(A_h_correction_diag) = A_h_correction_diag_i_new;
hypre_CSRMatrixJ(A_h_correction_diag) = A_h_correction_diag_j_new;
hypre_CSRMatrixData(A_h_correction_diag) = A_h_correction_diag_data_new;
hypre_CSRMatrixNumNonzeros(A_h_correction_diag) = num_nonzeros_diag_new;
if (A_h_correction_offd_i) hypre_TFree(A_h_correction_offd_i, HYPRE_MEMORY_HOST);
if (A_h_correction_offd_j) hypre_TFree(A_h_correction_offd_j, HYPRE_MEMORY_HOST);
if (A_h_correction_offd_data) hypre_TFree(A_h_correction_offd_data, HYPRE_MEMORY_HOST);
if (A_h_correction_offd_i) hypre_TFree(A_h_correction_offd_i, memory_location);
if (A_h_correction_offd_j) hypre_TFree(A_h_correction_offd_j, memory_location);
if (A_h_correction_offd_data) hypre_TFree(A_h_correction_offd_data, memory_location);
hypre_CSRMatrixI(A_h_correction_offd) = A_h_correction_offd_i_new;
hypre_CSRMatrixJ(A_h_correction_offd) = A_h_correction_offd_j_new;
hypre_CSRMatrixData(A_h_correction_offd) = A_h_correction_offd_data_new;
@ -2230,10 +2232,10 @@ hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A,
}
else
{
hypre_printf("Error!! Block ordering is not supported at the moment\n");
hypre_printf("Error!! Block ordering for non-Galerkin coarse grid is not currently supported\n");
exit(-1);
}
//}
}
//hypre_MGRParCSRMatrixTruncate(A_h_correction, max_elmts);
//wall_time = time_getWallclockSeconds() - wall_time;
//hypre_printf("Filter A_h_correction time: %1.5f\n", wall_time);
@ -2744,14 +2746,15 @@ hypre_MGRBuildInterpApproximateInverse(hypre_ParCSRMatrix *A,
// Get A_FF
hypre_MGRGetSubBlock(A, F_marker, F_marker, 0, &A_ff);
// hypre_ParCSRMatrixPrintIJ(A_ff, 1, 1, "A_ff");
// Get A_FC
hypre_MGRGetSubBlock(A, F_marker, C_marker, 0, &A_fc);
hypre_MGRApproximateInverse(A_ff, &A_ff_inv);
hypre_ParCSRMatrixPrintIJ(A_ff_inv, 1, 1, "A_ff_inv");
hypre_ParCSRMatrixPrintIJ(A_fc, 1, 1, "A_fc");
// hypre_ParCSRMatrixPrintIJ(A_ff_inv, 1, 1, "A_ff_inv");
// hypre_ParCSRMatrixPrintIJ(A_fc, 1, 1, "A_fc");
minus_Wp = hypre_ParMatmul(A_ff_inv, A_fc);
hypre_ParCSRMatrixPrintIJ(minus_Wp, 1, 1, "Wp");
// hypre_ParCSRMatrixPrintIJ(minus_Wp, 1, 1, "Wp");
hypre_CSRMatrix *minus_Wp_diag = hypre_ParCSRMatrixDiag(minus_Wp);
HYPRE_Real *minus_Wp_diag_data = hypre_CSRMatrixData(minus_Wp_diag);
@ -3117,19 +3120,27 @@ hypre_MGRBuildInterp(hypre_ParCSRMatrix *A,
HYPRE_Int interp_type,
HYPRE_Int numsweeps)
{
//HYPRE_Int i;
// HYPRE_Int i;
hypre_ParCSRMatrix *P_ptr = NULL;
//HYPRE_Real jac_trunc_threshold = trunc_factor;
//HYPRE_Real jac_trunc_threshold_minus = 0.5*jac_trunc_threshold;
// HYPRE_Real jac_trunc_threshold = trunc_factor;
// HYPRE_Real jac_trunc_threshold_minus = 0.5*jac_trunc_threshold;
/* Interpolation for each level */
if (interp_type <3)
{
hypre_MGRBuildP( A,CF_marker,num_cpts_global,interp_type,debug_flag,&P_ptr);
/* Could do a few sweeps of Jacobi to further improve P */
//for(i=0; i<numsweeps; i++)
// hypre_BoomerAMGJacobiInterp(A, &P_ptr, S,1, NULL, CF_marker, 0, jac_trunc_threshold, jac_trunc_threshold_minus );
//hypre_BoomerAMGInterpTruncation(P_ptr, trunc_factor, max_elmts);
/* Could do a few sweeps of Jacobi to further improve Jacobi interpolation P */
/*
if(interp_type == 2)
{
for(i=0; i<numsweeps; i++)
{
hypre_BoomerAMGJacobiInterp(A, &P_ptr, S,1, NULL, CF_marker, 0, jac_trunc_threshold, jac_trunc_threshold_minus );
}
hypre_BoomerAMGInterpTruncation(P_ptr, trunc_factor, max_elmts);
}
*/
}
else if (interp_type == 4)
{
@ -3146,17 +3157,6 @@ hypre_MGRBuildInterp(hypre_ParCSRMatrix *A,
/* Classical modified interpolation */
hypre_BoomerAMGBuildInterp(A, CF_marker, S, num_cpts_global,1, NULL,debug_flag,
trunc_factor, max_elmts, &P_ptr);
/* Do k steps of Jacobi build W for P = [-W I].
* Note that BoomerAMGJacobiInterp assumes you have some initial P,
* hence we need to initialize P as above, before calling this routine.
* If numsweeps = 0, the following step is skipped and P is returned as is.
* Looping here is equivalent to improving P by Jacobi interpolation
*/
//for(i=0; i<numsweeps; i++)
// hypre_BoomerAMGJacobiInterp(A, &P_ptr, S,1, NULL, CF_marker,
// 0, jac_trunc_threshold,
// jac_trunc_threshold_minus );
}
/* set pointer to P */
@ -3193,14 +3193,8 @@ hypre_MGRBuildRestrict(hypre_ParCSRMatrix *A,
{
hypre_ParCSRMatrixTranspose(A, &AT, 1);
}
if (restrict_type > 5)
{
/* Build new strength matrix */
hypre_BoomerAMGCreateS(AT, strong_threshold, max_row_sum, 1, NULL, &ST);
/* use appropriate communication package for Strength matrix */
}
/* Interpolation for each level */
/* Restriction for each level */
if (restrict_type == 0)
{
hypre_MGRBuildP(A, CF_marker, num_cpts_global, restrict_type, debug_flag, &R_ptr);
@ -3208,31 +3202,20 @@ hypre_MGRBuildRestrict(hypre_ParCSRMatrix *A,
else if (restrict_type == 1 || restrict_type == 2)
{
hypre_MGRBuildP(AT, CF_marker, num_cpts_global, restrict_type, debug_flag, &R_ptr);
/* Could do a few sweeps of Jacobi to further improve P */
//for(i=0; i<numsweeps; i++)
// hypre_BoomerAMGJacobiInterp(A, &R_ptr, S,1, NULL, CF_marker, 0, jac_trunc_threshold, jac_trunc_threshold_minus );
//hypre_BoomerAMGInterpTruncation(R_ptr, trunc_factor, max_elmts);
}
else if (restrict_type == 4)
else if (restrict_type == 3)
{
hypre_MGRBuildInterpApproximateInverse(A, CF_marker, num_cpts_global, debug_flag, &R_ptr);
hypre_MGRBuildInterpApproximateInverse(AT, CF_marker, num_cpts_global, debug_flag, &R_ptr);
hypre_BoomerAMGInterpTruncation(R_ptr, trunc_factor, max_elmts);
}
else
{
/* Build new strength matrix */
hypre_BoomerAMGCreateS(AT, strong_threshold, max_row_sum, 1, NULL, &ST);
/* Classical modified interpolation */
hypre_BoomerAMGBuildInterp(AT, CF_marker, ST, num_cpts_global,1, NULL,debug_flag,
trunc_factor, max_elmts, &R_ptr);
/* Do k steps of Jacobi build W for P = [-W I].
* Note that BoomerAMGJacobiInterp assumes you have some initial P,
* hence we need to initialize P as above, before calling this routine.
* If numsweeps = 0, the following step is skipped and P is returned as is.
* Looping here is equivalent to improving P by Jacobi interpolation
*/
// for(i=0; i<numsweeps; i++)
// hypre_BoomerAMGJacobiInterp(A, &R_ptr, S,1, NULL, CF_marker, 0,
// jac_trunc_threshold, jac_trunc_threshold_minus);
}
/* set pointer to P */
@ -4492,6 +4475,24 @@ hypre_MGRSetTruncateCoarseGridThreshold( void *mgr_vdata, HYPRE_Real threshold)
return hypre_error_flag;
}
/* Set print level for F-relaxation solver */
HYPRE_Int
hypre_MGRSetFrelaxPrintLevel( void *mgr_vdata, HYPRE_Int print_level )
{
hypre_ParMGRData *mgr_data = (hypre_ParMGRData*) mgr_vdata;
(mgr_data -> frelax_print_level) = print_level;
return hypre_error_flag;
}
/* Set print level for coarse grid solver */
HYPRE_Int
hypre_MGRSetCoarseGridPrintLevel( void *mgr_vdata, HYPRE_Int print_level )
{
hypre_ParMGRData *mgr_data = (hypre_ParMGRData*) mgr_vdata;
(mgr_data -> cg_print_level) = print_level;
return hypre_error_flag;
}
/* Set print level for mgr solver */
HYPRE_Int
hypre_MGRSetPrintLevel( void *mgr_vdata, HYPRE_Int print_level )
@ -4501,7 +4502,7 @@ hypre_MGRSetPrintLevel( void *mgr_vdata, HYPRE_Int print_level )
return hypre_error_flag;
}
/* Set print level for mgr solver */
/* Set logging level for mgr solver */
HYPRE_Int
hypre_MGRSetLogging( void *mgr_vdata, HYPRE_Int logging )
{
@ -4615,6 +4616,7 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A,
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
hypre_ParCSRCommHandle *comm_handle;
HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A);
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
@ -4855,18 +4857,18 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A,
Ablock_diag_size = jj_counter;
Ablock_diag_i = hypre_CTAlloc(HYPRE_Int, ii_counter+1, HYPRE_MEMORY_HOST);
Ablock_diag_j = hypre_CTAlloc(HYPRE_Int, Ablock_diag_size, HYPRE_MEMORY_HOST);
Ablock_diag_data = hypre_CTAlloc(HYPRE_Real, Ablock_diag_size, HYPRE_MEMORY_HOST);
Ablock_diag_i = hypre_CTAlloc(HYPRE_Int, ii_counter+1, memory_location);
Ablock_diag_j = hypre_CTAlloc(HYPRE_Int, Ablock_diag_size, memory_location);
Ablock_diag_data = hypre_CTAlloc(HYPRE_Real, Ablock_diag_size, memory_location);
Ablock_diag_i[ii_counter] = jj_counter;
Ablock_offd_size = jj_counter_offd;
Ablock_offd_i = hypre_CTAlloc(HYPRE_Int, ii_counter+1, HYPRE_MEMORY_HOST);
Ablock_offd_j = hypre_CTAlloc(HYPRE_Int, Ablock_offd_size, HYPRE_MEMORY_HOST);
Ablock_offd_data = hypre_CTAlloc(HYPRE_Real, Ablock_offd_size, HYPRE_MEMORY_HOST);
Ablock_offd_i = hypre_CTAlloc(HYPRE_Int, ii_counter+1, memory_location);
Ablock_offd_j = hypre_CTAlloc(HYPRE_Int, Ablock_offd_size, memory_location);
Ablock_offd_data = hypre_CTAlloc(HYPRE_Real, Ablock_offd_size, memory_location);
/*-----------------------------------------------------------------------
* Intialize some stuff.
@ -5020,7 +5022,7 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A,
}
}
col_map_offd_Ablock = hypre_CTAlloc(HYPRE_BigInt, num_cols_Ablock_offd, HYPRE_MEMORY_HOST);
col_map_offd_Ablock = hypre_CTAlloc(HYPRE_BigInt, num_cols_Ablock_offd, memory_location);
tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_Ablock_offd, HYPRE_MEMORY_HOST);
index = 0;
for (i=0; i < num_cols_Ablock_offd; i++)

View File

@ -62,6 +62,8 @@ typedef struct
HYPRE_Int relax_type;
HYPRE_Int logging;
HYPRE_Int print_level;
HYPRE_Int frelax_print_level;
HYPRE_Int cg_print_level;
HYPRE_Int max_iter;
HYPRE_Int relax_order;
HYPRE_Int num_relax_sweeps;

View File

@ -107,7 +107,7 @@ hypre_MGRSetup( void *mgr_vdata,
hypre_ParCSRMatrix *A_ff_inv = (mgr_data -> A_ff_inv);
HYPRE_Int use_air = 0;
HYPRE_Int truncate_cg_threshold = (mgr_data -> truncate_coarse_grid_threshold);
HYPRE_Real truncate_cg_threshold = (mgr_data -> truncate_coarse_grid_threshold);
// HYPRE_Real wall_time;
/* ----- begin -----*/
@ -847,7 +847,7 @@ hypre_MGRSetup( void *mgr_vdata,
{
HYPRE_Int block_num_f_points = (lev == 0 ? block_size : block_num_coarse_indexes[lev-1]) - block_num_coarse_indexes[lev];
hypre_MGRComputeNonGalerkinCoarseGrid(A_array[lev], P, RT, block_num_f_points,
/* ordering */0, /* method */ 0, max_elmts, /* keep_stencil */ 0, CF_marker_array[lev], &RAP_ptr);
/* ordering */set_c_points_method, /* method (approx. inverse or not) */ 0, max_elmts, /* keep_stencil */ 0, CF_marker_array[lev], &RAP_ptr);
hypre_ParCSRMatrixOwnsColStarts(RAP_ptr) = 0;
hypre_ParCSRMatrixOwnsColStarts(P_array[lev]) = 0;
hypre_ParCSRMatrixOwnsRowStarts(RT) = 0;
@ -914,12 +914,12 @@ hypre_MGRSetup( void *mgr_vdata,
A_ff_array[lev] = A_ff_ptr;
aff_solver[lev] = (HYPRE_Solver*) hypre_BoomerAMGCreate();
hypre_BoomerAMGSetMaxIter(aff_solver[lev], 1);
hypre_BoomerAMGSetMaxIter(aff_solver[lev], mgr_data -> num_relax_sweeps);
hypre_BoomerAMGSetTol(aff_solver[lev], 0.0);
hypre_BoomerAMGSetRelaxOrder(aff_solver[lev], 1);
//hypre_BoomerAMGSetAggNumLevels(aff_solver[lev], 1);
hypre_BoomerAMGSetNumSweeps(aff_solver[lev], 3);
hypre_BoomerAMGSetPrintLevel(aff_solver[lev], print_level);
hypre_BoomerAMGSetPrintLevel(aff_solver[lev], mgr_data -> frelax_print_level);
hypre_BoomerAMGSetNumFunctions(aff_solver[lev], 1);
fine_grid_solver_setup(aff_solver[lev], A_ff_ptr, F_fine_array[lev+1], U_fine_array[lev+1]);
@ -1053,7 +1053,7 @@ hypre_MGRSetup( void *mgr_vdata,
hypre_BoomerAMGSetMaxIter ( default_cg_solver, 1 );
hypre_BoomerAMGSetTol ( default_cg_solver, 0.0 );
hypre_BoomerAMGSetRelaxOrder( default_cg_solver, 1);
hypre_BoomerAMGSetPrintLevel(default_cg_solver, print_level);
hypre_BoomerAMGSetPrintLevel(default_cg_solver, mgr_data -> cg_print_level);
/* set setup and solve functions */
coarse_grid_solver_setup = (HYPRE_Int (*)(void*, void*, void*, void*)) hypre_BoomerAMGSetup;
coarse_grid_solver_solve = (HYPRE_Int (*)(void*, void*, void*, void*)) hypre_BoomerAMGSolve;

View File

@ -579,6 +579,8 @@ hypre_MGRCycle( void *mgr_vdata,
HYPRE_Int fine_grid;
HYPRE_Int Not_Finished;
HYPRE_Int cycle_type;
HYPRE_Int print_level = (mgr_data -> print_level);
HYPRE_Int frelax_print_level = (mgr_data -> frelax_print_level);
hypre_ParCSRMatrix **A_array = (mgr_data -> A_array);
hypre_ParCSRMatrix **RT_array = (mgr_data -> RT_array);
@ -596,6 +598,7 @@ hypre_MGRCycle( void *mgr_vdata,
hypre_Vector **relax_l1_norms = (mgr_data -> l1_norms);
hypre_ParVector *Vtemp = (mgr_data -> Vtemp);
hypre_ParVector *Ztemp = (mgr_data -> Ztemp);
// hypre_ParVector *Utemp = (mgr_data -> Utemp);
hypre_ParVector **U_fine_array = (mgr_data -> U_fine_array);
hypre_ParVector **F_fine_array = (mgr_data -> F_fine_array);
@ -643,7 +646,7 @@ hypre_MGRCycle( void *mgr_vdata,
HYPRE_Real convergence_factor_cg;
hypre_BoomerAMGGetRelResidualNorm(cg_solver, &convergence_factor_cg);
(mgr_data -> cg_convergence_factor) = convergence_factor_cg;
if ((mgr_data -> print_level) > 1 && my_id == 0 && convergence_factor_cg > 1.0)
if ((print_level) > 1 && my_id == 0 && convergence_factor_cg > 1.0)
{
hypre_printf("Warning!!! Coarse grid solve diverges. Factor = %1.2e\n", convergence_factor_cg);
}
@ -715,11 +718,77 @@ hypre_MGRCycle( void *mgr_vdata,
// U_array[level], 1.0, F_array[level], Vtemp);
// convergence_factor_frelax = hypre_ParVectorInnerProd(Vtemp, Vtemp);
HYPRE_Real resnorm, init_resnorm;
HYPRE_Real rhs_norm, old_resnorm;
HYPRE_Real rel_resnorm = 1.0;
HYPRE_Real conv_factor = 1.0;
if (frelax_print_level > 1)
{
hypre_ParCSRMatrixMatvecOutOfPlace(-1.0, A_array[level],
U_array[level], 1.0, F_array[level], Vtemp);
resnorm = hypre_ParVectorInnerProd(Vtemp, Vtemp);
init_resnorm = resnorm;
rhs_norm = sqrt(hypre_ParVectorInnerProd(F_array[level], F_array[level]));
if (rhs_norm > HYPRE_REAL_EPSILON)
{
rel_resnorm = init_resnorm / rhs_norm;
}
else
{
/* rhs is zero, return a zero solution */
hypre_ParVectorSetConstantValues(U_array[0], 0.0);
HYPRE_ANNOTATE_FUNC_END;
return hypre_error_flag;
}
if(my_id == 0)
{
hypre_printf("\nBegin F-relaxation: V-Cycle Smoother \n");
hypre_printf(" relative\n");
hypre_printf(" residual factor residual\n");
hypre_printf(" -------- ------ --------\n");
hypre_printf(" Initial %e %e\n",init_resnorm,
rel_resnorm);
}
}
for(i=0; i<nsweeps; i++)
{
hypre_MGRFrelaxVcycle(FrelaxVcycleData[level], F_array[level], U_array[level]);
}
if(frelax_print_level > 1)
{
old_resnorm = resnorm;
hypre_ParCSRMatrixMatvecOutOfPlace(-1.0, A_array[level],
U_array[level], 1.0, F_array[level], Vtemp);
resnorm = hypre_ParVectorInnerProd(Vtemp, Vtemp);
if (old_resnorm) conv_factor = resnorm / old_resnorm;
else conv_factor = resnorm;
if (rhs_norm > HYPRE_REAL_EPSILON)
{
rel_resnorm = resnorm / rhs_norm;
}
else
{
rel_resnorm = resnorm;
}
if (my_id == 0 )
{
hypre_printf("\n V-Cycle %2d %e %f %e \n", i,
resnorm, conv_factor, rel_resnorm);
}
}
}
if (my_id == 0 && frelax_print_level > 1)
{
hypre_printf("End F-relaxation: V-Cycle Smoother \n\n");
}
// compute residual after solve
//hypre_ParCSRMatrixMatvecOutOfPlace(-1.0, A_array[level],
// U_array[level], 1.0, F_array[level], Vtemp);
@ -728,11 +797,22 @@ hypre_MGRCycle( void *mgr_vdata,
}
else if (Frelax_method[level] == 2)
{
hypre_ParVectorSetConstantValues(F_fine_array[coarse_grid], 0.0);
hypre_MGRAddVectorR(CF_marker[fine_grid], FMRK, 1.0, F_array[fine_grid], 0.0, &(F_fine_array[coarse_grid]));
hypre_ParVectorSetConstantValues(U_fine_array[coarse_grid], 0.0);
// We need to first compute residual to ensure that
// F-relaxation is reducing the global residual
alpha = -1.0;
beta = 1.0;
hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[fine_grid], U_array[fine_grid], beta, F_array[fine_grid], Vtemp);
// restrict to F points
// hypre_ParVectorSetConstantValues(F_fine_array[coarse_grid], 0.0);
hypre_MGRAddVectorR(CF_marker[fine_grid], FMRK, 1.0, Vtemp, 0.0, &(F_fine_array[coarse_grid]));
// initialize solution array
hypre_ParVectorSetConstantValues(U_fine_array[coarse_grid], 0.0);
// solve
fine_grid_solver_solve((mgr_data -> aff_solver)[fine_grid], A_ff_array[fine_grid], F_fine_array[coarse_grid],
U_fine_array[coarse_grid]);
// update solution
hypre_MGRAddVectorP(CF_marker[fine_grid], FMRK, 1.0, U_fine_array[coarse_grid], 1.0, &(U_array[fine_grid]));
}
else
@ -756,7 +836,7 @@ hypre_MGRCycle( void *mgr_vdata,
alpha = 1.0;
beta = 0.0;
if (restrict_type[fine_grid] > 3)
if (restrict_type[fine_grid] == 4 || restrict_type[fine_grid] == 5)
use_air = 1;
if (use_air)

View File

@ -1401,6 +1401,8 @@ HYPRE_Int hypre_MGRSetNumRelaxSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
HYPRE_Int hypre_MGRSetNumInterpSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
HYPRE_Int hypre_MGRSetNumRestrictSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
HYPRE_Int hypre_MGRSetPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
HYPRE_Int hypre_MGRSetFrelaxPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
HYPRE_Int hypre_MGRSetCoarseGridPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
HYPRE_Int hypre_MGRSetTruncateCoarseGridThreshold( void *mgr_vdata, HYPRE_Real threshold);
HYPRE_Int hypre_MGRSetLogging( void *mgr_vdata, HYPRE_Int logging );
HYPRE_Int hypre_MGRSetMaxIter( void *mgr_vdata, HYPRE_Int max_iter );

View File

@ -914,6 +914,12 @@ hypre_CSRMatrixTransposeHost(hypre_CSRMatrix *A,
hypre_CSRMatrixI(*AT)[num_cols_A] = num_nnzs_A;
hypre_TFree(bucket, HYPRE_MEMORY_HOST);
/* Move diagonal to first entry (for square matrices only)*/
if(num_rows_A == num_cols_A)
{
hypre_CSRMatrixReorder(*AT);
}
// Set rownnz and num_rownnz
if (hypre_CSRMatrixNumRownnz(A) < num_rows_A)
{

View File

@ -1207,6 +1207,14 @@ hypre_CSRMatrixTransposeDevice(hypre_CSRMatrix *A,
*AT_ptr = C;
hypre_SyncCudaComputeStream(hypre_handle());
/* Put diagonal at first entry (for square matrices only)*/
if(nrows_A == ncols_A)
{
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_CSRMatrixMoveDiagFirstDevice( *AT_ptr );
#endif
}
return hypre_error_flag;
}

View File

@ -24,7 +24,7 @@ Final Relative Residual Norm = 7.212489e-09
# Output file: elast.out.6
Iterations = 14
Final Relative Residual Norm = 1.005958e-09
Final Relative Residual Norm = 1.005959e-09
# Output file: elast.out.7
Iterations = 16

View File

@ -339,7 +339,7 @@ Final Relative Residual Norm = 7.556742e-09
# Output file: solvers.out.307
hypre_ILU Iterations = 38
Final Relative Residual Norm = 7.037599e-09
Final Relative Residual Norm = 7.037600e-09
# Output file: solvers.out.308
hypre_ILU Iterations = 26