Fix MGR setup on GPU and MGR bugfixes. (#260)
This PR addresses initial changes necessary to perform MGR setup on GPUs. It also fixes a bug in the iterative use of AMG for F-relaxation, adds options to print statistics of the F-relaxation solver (for the V-cycle smoother), and places the diagonal of a transposed square matrix as the first element in the row. This last change required a couple minor changes to the saved files.
This commit is contained in:
parent
3f12d47651
commit
c272f6980c
@ -3784,10 +3784,12 @@ HYPRE_MGRSetLevelFRelaxNumFunctions(HYPRE_Solver solver, HYPRE_Int *num_function
|
||||
* - 0 : injection \f$[0 I]\f$
|
||||
* - 1 : unscaled (not recommended)
|
||||
* - 2 : diagonal scaling (Jacobi)
|
||||
* - 3 : approximate inverse
|
||||
* - 4 : pAIR distance 1
|
||||
* - 5 : pAIR distance 2
|
||||
* - else : use classical modified interpolation
|
||||
*
|
||||
* These options are currently active for the last stage reduction. Intermediate
|
||||
* reduction levels use injection. The default is injection.
|
||||
* The default is injection.
|
||||
**/
|
||||
HYPRE_Int
|
||||
HYPRE_MGRSetRestrictType( HYPRE_Solver solver,
|
||||
@ -3806,16 +3808,17 @@ HYPRE_MGRSetNumRestrictSweeps( HYPRE_Solver solver,
|
||||
HYPRE_Int nsweeps );
|
||||
|
||||
/**
|
||||
* (Optional) Set the strategy for computing the MGR restriction operator.
|
||||
* (Optional) Set the strategy for computing the MGR interpolation operator.
|
||||
* Options for \e interp_type are:
|
||||
*
|
||||
* - 0 : injection \f$[0 I]^{T}\f$
|
||||
* - 1 : unscaled (not recommended)
|
||||
* - 2 : diagonal scaling (Jacobi)
|
||||
* - else : use default (classical modified interpolation)
|
||||
* - 3 : classical modified interpolation
|
||||
* - 4 : approximate inverse
|
||||
* - else : classical modified interpolation
|
||||
*
|
||||
* These options are currently active for the last stage reduction. Intermediate
|
||||
* reduction levels use diagonal scaling.
|
||||
* The default is diagonal scaling.
|
||||
**/
|
||||
HYPRE_Int
|
||||
HYPRE_MGRSetInterpType( HYPRE_Solver solver,
|
||||
@ -3879,6 +3882,14 @@ HYPRE_Int
|
||||
HYPRE_MGRSetPrintLevel( HYPRE_Solver solver,
|
||||
HYPRE_Int print_level );
|
||||
|
||||
HYPRE_Int
|
||||
HYPRE_MGRSetFrelaxPrintLevel( HYPRE_Solver solver,
|
||||
HYPRE_Int print_level );
|
||||
|
||||
HYPRE_Int
|
||||
HYPRE_MGRSetCoarseGridPrintLevel( HYPRE_Solver solver,
|
||||
HYPRE_Int print_level );
|
||||
|
||||
/**
|
||||
* (Optional) Set the threshold to compress the coarse grid at each level
|
||||
* Use threshold = 0.0 if no truncation is applied. Otherwise, set the threshold
|
||||
|
||||
@ -358,6 +358,24 @@ HYPRE_MGRSetTruncateCoarseGridThreshold( HYPRE_Solver solver, HYPRE_Real thresho
|
||||
return hypre_MGRSetTruncateCoarseGridThreshold( solver, threshold );
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
* HYPRE_MGRSetFrelaxPrintLevel
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
HYPRE_MGRSetFrelaxPrintLevel( HYPRE_Solver solver, HYPRE_Int print_level )
|
||||
{
|
||||
return hypre_MGRSetFrelaxPrintLevel( solver, print_level );
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
* HYPRE_MGRSetCoarseGridPrintLevel
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
HYPRE_MGRSetCoarseGridPrintLevel( HYPRE_Solver solver, HYPRE_Int print_level )
|
||||
{
|
||||
return hypre_MGRSetCoarseGridPrintLevel( solver, print_level );
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
* HYPRE_MGRSetPrintLevel
|
||||
*--------------------------------------------------------------------------*/
|
||||
|
||||
@ -2190,6 +2190,8 @@ HYPRE_Int hypre_MGRSetNumRelaxSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
|
||||
HYPRE_Int hypre_MGRSetNumInterpSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
|
||||
HYPRE_Int hypre_MGRSetNumRestrictSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
|
||||
HYPRE_Int hypre_MGRSetPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
|
||||
HYPRE_Int hypre_MGRSetFrelaxPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
|
||||
HYPRE_Int hypre_MGRSetCoarseGridPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
|
||||
HYPRE_Int hypre_MGRSetTruncateCoarseGridThreshold( void *mgr_vdata, HYPRE_Real threshold);
|
||||
HYPRE_Int hypre_MGRSetLogging( void *mgr_vdata, HYPRE_Int logging );
|
||||
HYPRE_Int hypre_MGRSetMaxIter( void *mgr_vdata, HYPRE_Int max_iter );
|
||||
|
||||
@ -86,6 +86,8 @@ hypre_MGRCreate()
|
||||
|
||||
(mgr_data -> logging) = 0;
|
||||
(mgr_data -> print_level) = 0;
|
||||
(mgr_data -> frelax_print_level) = 0;
|
||||
(mgr_data -> cg_print_level) = 0;
|
||||
|
||||
(mgr_data -> l1_norms) = NULL;
|
||||
|
||||
@ -1997,6 +1999,7 @@ hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int my_id;
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
hypre_MPI_Comm_rank(comm,&my_id);
|
||||
HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
|
||||
n_local_fine_grid = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
|
||||
c_marker = hypre_CTAlloc(HYPRE_Int, n_local_fine_grid, HYPRE_MEMORY_HOST);
|
||||
@ -2131,19 +2134,18 @@ hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *A_h_correction_offd_i = hypre_CSRMatrixI(A_h_correction_offd);
|
||||
HYPRE_Int *A_h_correction_offd_j = hypre_CSRMatrixJ(A_h_correction_offd);
|
||||
|
||||
// Allow for maximum dropping with Pmax = 0
|
||||
//if (Pmax > 0)
|
||||
//{
|
||||
if (Pmax > 0)
|
||||
{
|
||||
if (ordering == 0) // interleaved ordering
|
||||
{
|
||||
HYPRE_Int *A_h_correction_diag_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints+1, HYPRE_MEMORY_HOST);
|
||||
HYPRE_Int *A_h_correction_diag_j_new = hypre_CTAlloc(HYPRE_Int, (bsize + max_elmts)*n_local_cpoints, HYPRE_MEMORY_HOST);
|
||||
HYPRE_Complex *A_h_correction_diag_data_new = hypre_CTAlloc(HYPRE_Complex, (bsize + max_elmts)*n_local_cpoints, HYPRE_MEMORY_HOST);
|
||||
HYPRE_Int *A_h_correction_diag_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints+1, memory_location);
|
||||
HYPRE_Int *A_h_correction_diag_j_new = hypre_CTAlloc(HYPRE_Int, (bsize + max_elmts)*n_local_cpoints, memory_location);
|
||||
HYPRE_Complex *A_h_correction_diag_data_new = hypre_CTAlloc(HYPRE_Complex, (bsize + max_elmts)*n_local_cpoints, memory_location);
|
||||
HYPRE_Int num_nonzeros_diag_new = 0;
|
||||
|
||||
HYPRE_Int *A_h_correction_offd_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints+1, HYPRE_MEMORY_HOST);
|
||||
HYPRE_Int *A_h_correction_offd_j_new = hypre_CTAlloc(HYPRE_Int, max_elmts*n_local_cpoints, HYPRE_MEMORY_HOST);
|
||||
HYPRE_Complex *A_h_correction_offd_data_new = hypre_CTAlloc(HYPRE_Complex, max_elmts*n_local_cpoints, HYPRE_MEMORY_HOST);
|
||||
HYPRE_Int *A_h_correction_offd_i_new = hypre_CTAlloc(HYPRE_Int, n_local_cpoints+1, memory_location);
|
||||
HYPRE_Int *A_h_correction_offd_j_new = hypre_CTAlloc(HYPRE_Int, max_elmts*n_local_cpoints, memory_location);
|
||||
HYPRE_Complex *A_h_correction_offd_data_new = hypre_CTAlloc(HYPRE_Complex, max_elmts*n_local_cpoints, memory_location);
|
||||
HYPRE_Int num_nonzeros_offd_new = 0;
|
||||
|
||||
|
||||
@ -2212,17 +2214,17 @@ hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A,
|
||||
hypre_TFree(aux_data, HYPRE_MEMORY_HOST);
|
||||
}
|
||||
|
||||
hypre_TFree(A_h_correction_diag_i, HYPRE_MEMORY_HOST);
|
||||
hypre_TFree(A_h_correction_diag_j, HYPRE_MEMORY_HOST);
|
||||
hypre_TFree(A_h_correction_diag_data, HYPRE_MEMORY_HOST);
|
||||
hypre_TFree(A_h_correction_diag_i, memory_location);
|
||||
hypre_TFree(A_h_correction_diag_j, memory_location);
|
||||
hypre_TFree(A_h_correction_diag_data, memory_location);
|
||||
hypre_CSRMatrixI(A_h_correction_diag) = A_h_correction_diag_i_new;
|
||||
hypre_CSRMatrixJ(A_h_correction_diag) = A_h_correction_diag_j_new;
|
||||
hypre_CSRMatrixData(A_h_correction_diag) = A_h_correction_diag_data_new;
|
||||
hypre_CSRMatrixNumNonzeros(A_h_correction_diag) = num_nonzeros_diag_new;
|
||||
|
||||
if (A_h_correction_offd_i) hypre_TFree(A_h_correction_offd_i, HYPRE_MEMORY_HOST);
|
||||
if (A_h_correction_offd_j) hypre_TFree(A_h_correction_offd_j, HYPRE_MEMORY_HOST);
|
||||
if (A_h_correction_offd_data) hypre_TFree(A_h_correction_offd_data, HYPRE_MEMORY_HOST);
|
||||
if (A_h_correction_offd_i) hypre_TFree(A_h_correction_offd_i, memory_location);
|
||||
if (A_h_correction_offd_j) hypre_TFree(A_h_correction_offd_j, memory_location);
|
||||
if (A_h_correction_offd_data) hypre_TFree(A_h_correction_offd_data, memory_location);
|
||||
hypre_CSRMatrixI(A_h_correction_offd) = A_h_correction_offd_i_new;
|
||||
hypre_CSRMatrixJ(A_h_correction_offd) = A_h_correction_offd_j_new;
|
||||
hypre_CSRMatrixData(A_h_correction_offd) = A_h_correction_offd_data_new;
|
||||
@ -2230,10 +2232,10 @@ hypre_MGRComputeNonGalerkinCoarseGrid(hypre_ParCSRMatrix *A,
|
||||
}
|
||||
else
|
||||
{
|
||||
hypre_printf("Error!! Block ordering is not supported at the moment\n");
|
||||
hypre_printf("Error!! Block ordering for non-Galerkin coarse grid is not currently supported\n");
|
||||
exit(-1);
|
||||
}
|
||||
//}
|
||||
}
|
||||
//hypre_MGRParCSRMatrixTruncate(A_h_correction, max_elmts);
|
||||
//wall_time = time_getWallclockSeconds() - wall_time;
|
||||
//hypre_printf("Filter A_h_correction time: %1.5f\n", wall_time);
|
||||
@ -2744,14 +2746,15 @@ hypre_MGRBuildInterpApproximateInverse(hypre_ParCSRMatrix *A,
|
||||
|
||||
// Get A_FF
|
||||
hypre_MGRGetSubBlock(A, F_marker, F_marker, 0, &A_ff);
|
||||
// hypre_ParCSRMatrixPrintIJ(A_ff, 1, 1, "A_ff");
|
||||
// Get A_FC
|
||||
hypre_MGRGetSubBlock(A, F_marker, C_marker, 0, &A_fc);
|
||||
|
||||
hypre_MGRApproximateInverse(A_ff, &A_ff_inv);
|
||||
hypre_ParCSRMatrixPrintIJ(A_ff_inv, 1, 1, "A_ff_inv");
|
||||
hypre_ParCSRMatrixPrintIJ(A_fc, 1, 1, "A_fc");
|
||||
// hypre_ParCSRMatrixPrintIJ(A_ff_inv, 1, 1, "A_ff_inv");
|
||||
// hypre_ParCSRMatrixPrintIJ(A_fc, 1, 1, "A_fc");
|
||||
minus_Wp = hypre_ParMatmul(A_ff_inv, A_fc);
|
||||
hypre_ParCSRMatrixPrintIJ(minus_Wp, 1, 1, "Wp");
|
||||
// hypre_ParCSRMatrixPrintIJ(minus_Wp, 1, 1, "Wp");
|
||||
|
||||
hypre_CSRMatrix *minus_Wp_diag = hypre_ParCSRMatrixDiag(minus_Wp);
|
||||
HYPRE_Real *minus_Wp_diag_data = hypre_CSRMatrixData(minus_Wp_diag);
|
||||
@ -3117,19 +3120,27 @@ hypre_MGRBuildInterp(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int interp_type,
|
||||
HYPRE_Int numsweeps)
|
||||
{
|
||||
//HYPRE_Int i;
|
||||
// HYPRE_Int i;
|
||||
hypre_ParCSRMatrix *P_ptr = NULL;
|
||||
//HYPRE_Real jac_trunc_threshold = trunc_factor;
|
||||
//HYPRE_Real jac_trunc_threshold_minus = 0.5*jac_trunc_threshold;
|
||||
// HYPRE_Real jac_trunc_threshold = trunc_factor;
|
||||
// HYPRE_Real jac_trunc_threshold_minus = 0.5*jac_trunc_threshold;
|
||||
|
||||
/* Interpolation for each level */
|
||||
if (interp_type <3)
|
||||
{
|
||||
hypre_MGRBuildP( A,CF_marker,num_cpts_global,interp_type,debug_flag,&P_ptr);
|
||||
/* Could do a few sweeps of Jacobi to further improve P */
|
||||
//for(i=0; i<numsweeps; i++)
|
||||
// hypre_BoomerAMGJacobiInterp(A, &P_ptr, S,1, NULL, CF_marker, 0, jac_trunc_threshold, jac_trunc_threshold_minus );
|
||||
//hypre_BoomerAMGInterpTruncation(P_ptr, trunc_factor, max_elmts);
|
||||
|
||||
/* Could do a few sweeps of Jacobi to further improve Jacobi interpolation P */
|
||||
/*
|
||||
if(interp_type == 2)
|
||||
{
|
||||
for(i=0; i<numsweeps; i++)
|
||||
{
|
||||
hypre_BoomerAMGJacobiInterp(A, &P_ptr, S,1, NULL, CF_marker, 0, jac_trunc_threshold, jac_trunc_threshold_minus );
|
||||
}
|
||||
hypre_BoomerAMGInterpTruncation(P_ptr, trunc_factor, max_elmts);
|
||||
}
|
||||
*/
|
||||
}
|
||||
else if (interp_type == 4)
|
||||
{
|
||||
@ -3146,17 +3157,6 @@ hypre_MGRBuildInterp(hypre_ParCSRMatrix *A,
|
||||
/* Classical modified interpolation */
|
||||
hypre_BoomerAMGBuildInterp(A, CF_marker, S, num_cpts_global,1, NULL,debug_flag,
|
||||
trunc_factor, max_elmts, &P_ptr);
|
||||
|
||||
/* Do k steps of Jacobi build W for P = [-W I].
|
||||
* Note that BoomerAMGJacobiInterp assumes you have some initial P,
|
||||
* hence we need to initialize P as above, before calling this routine.
|
||||
* If numsweeps = 0, the following step is skipped and P is returned as is.
|
||||
* Looping here is equivalent to improving P by Jacobi interpolation
|
||||
*/
|
||||
//for(i=0; i<numsweeps; i++)
|
||||
// hypre_BoomerAMGJacobiInterp(A, &P_ptr, S,1, NULL, CF_marker,
|
||||
// 0, jac_trunc_threshold,
|
||||
// jac_trunc_threshold_minus );
|
||||
}
|
||||
|
||||
/* set pointer to P */
|
||||
@ -3193,14 +3193,8 @@ hypre_MGRBuildRestrict(hypre_ParCSRMatrix *A,
|
||||
{
|
||||
hypre_ParCSRMatrixTranspose(A, &AT, 1);
|
||||
}
|
||||
if (restrict_type > 5)
|
||||
{
|
||||
/* Build new strength matrix */
|
||||
hypre_BoomerAMGCreateS(AT, strong_threshold, max_row_sum, 1, NULL, &ST);
|
||||
/* use appropriate communication package for Strength matrix */
|
||||
}
|
||||
|
||||
/* Interpolation for each level */
|
||||
/* Restriction for each level */
|
||||
if (restrict_type == 0)
|
||||
{
|
||||
hypre_MGRBuildP(A, CF_marker, num_cpts_global, restrict_type, debug_flag, &R_ptr);
|
||||
@ -3208,31 +3202,20 @@ hypre_MGRBuildRestrict(hypre_ParCSRMatrix *A,
|
||||
else if (restrict_type == 1 || restrict_type == 2)
|
||||
{
|
||||
hypre_MGRBuildP(AT, CF_marker, num_cpts_global, restrict_type, debug_flag, &R_ptr);
|
||||
/* Could do a few sweeps of Jacobi to further improve P */
|
||||
//for(i=0; i<numsweeps; i++)
|
||||
// hypre_BoomerAMGJacobiInterp(A, &R_ptr, S,1, NULL, CF_marker, 0, jac_trunc_threshold, jac_trunc_threshold_minus );
|
||||
//hypre_BoomerAMGInterpTruncation(R_ptr, trunc_factor, max_elmts);
|
||||
}
|
||||
else if (restrict_type == 4)
|
||||
else if (restrict_type == 3)
|
||||
{
|
||||
hypre_MGRBuildInterpApproximateInverse(A, CF_marker, num_cpts_global, debug_flag, &R_ptr);
|
||||
hypre_MGRBuildInterpApproximateInverse(AT, CF_marker, num_cpts_global, debug_flag, &R_ptr);
|
||||
hypre_BoomerAMGInterpTruncation(R_ptr, trunc_factor, max_elmts);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Build new strength matrix */
|
||||
hypre_BoomerAMGCreateS(AT, strong_threshold, max_row_sum, 1, NULL, &ST);
|
||||
|
||||
/* Classical modified interpolation */
|
||||
hypre_BoomerAMGBuildInterp(AT, CF_marker, ST, num_cpts_global,1, NULL,debug_flag,
|
||||
trunc_factor, max_elmts, &R_ptr);
|
||||
|
||||
/* Do k steps of Jacobi build W for P = [-W I].
|
||||
* Note that BoomerAMGJacobiInterp assumes you have some initial P,
|
||||
* hence we need to initialize P as above, before calling this routine.
|
||||
* If numsweeps = 0, the following step is skipped and P is returned as is.
|
||||
* Looping here is equivalent to improving P by Jacobi interpolation
|
||||
*/
|
||||
// for(i=0; i<numsweeps; i++)
|
||||
// hypre_BoomerAMGJacobiInterp(A, &R_ptr, S,1, NULL, CF_marker, 0,
|
||||
// jac_trunc_threshold, jac_trunc_threshold_minus);
|
||||
}
|
||||
|
||||
/* set pointer to P */
|
||||
@ -4492,6 +4475,24 @@ hypre_MGRSetTruncateCoarseGridThreshold( void *mgr_vdata, HYPRE_Real threshold)
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
/* Set print level for F-relaxation solver */
|
||||
HYPRE_Int
|
||||
hypre_MGRSetFrelaxPrintLevel( void *mgr_vdata, HYPRE_Int print_level )
|
||||
{
|
||||
hypre_ParMGRData *mgr_data = (hypre_ParMGRData*) mgr_vdata;
|
||||
(mgr_data -> frelax_print_level) = print_level;
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
/* Set print level for coarse grid solver */
|
||||
HYPRE_Int
|
||||
hypre_MGRSetCoarseGridPrintLevel( void *mgr_vdata, HYPRE_Int print_level )
|
||||
{
|
||||
hypre_ParMGRData *mgr_data = (hypre_ParMGRData*) mgr_vdata;
|
||||
(mgr_data -> cg_print_level) = print_level;
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
/* Set print level for mgr solver */
|
||||
HYPRE_Int
|
||||
hypre_MGRSetPrintLevel( void *mgr_vdata, HYPRE_Int print_level )
|
||||
@ -4501,7 +4502,7 @@ hypre_MGRSetPrintLevel( void *mgr_vdata, HYPRE_Int print_level )
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
/* Set print level for mgr solver */
|
||||
/* Set logging level for mgr solver */
|
||||
HYPRE_Int
|
||||
hypre_MGRSetLogging( void *mgr_vdata, HYPRE_Int logging )
|
||||
{
|
||||
@ -4615,6 +4616,7 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A,
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
||||
hypre_ParCSRCommHandle *comm_handle;
|
||||
HYPRE_MemoryLocation memory_location = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
|
||||
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
|
||||
HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
|
||||
@ -4855,18 +4857,18 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A,
|
||||
|
||||
Ablock_diag_size = jj_counter;
|
||||
|
||||
Ablock_diag_i = hypre_CTAlloc(HYPRE_Int, ii_counter+1, HYPRE_MEMORY_HOST);
|
||||
Ablock_diag_j = hypre_CTAlloc(HYPRE_Int, Ablock_diag_size, HYPRE_MEMORY_HOST);
|
||||
Ablock_diag_data = hypre_CTAlloc(HYPRE_Real, Ablock_diag_size, HYPRE_MEMORY_HOST);
|
||||
Ablock_diag_i = hypre_CTAlloc(HYPRE_Int, ii_counter+1, memory_location);
|
||||
Ablock_diag_j = hypre_CTAlloc(HYPRE_Int, Ablock_diag_size, memory_location);
|
||||
Ablock_diag_data = hypre_CTAlloc(HYPRE_Real, Ablock_diag_size, memory_location);
|
||||
|
||||
Ablock_diag_i[ii_counter] = jj_counter;
|
||||
|
||||
|
||||
Ablock_offd_size = jj_counter_offd;
|
||||
|
||||
Ablock_offd_i = hypre_CTAlloc(HYPRE_Int, ii_counter+1, HYPRE_MEMORY_HOST);
|
||||
Ablock_offd_j = hypre_CTAlloc(HYPRE_Int, Ablock_offd_size, HYPRE_MEMORY_HOST);
|
||||
Ablock_offd_data = hypre_CTAlloc(HYPRE_Real, Ablock_offd_size, HYPRE_MEMORY_HOST);
|
||||
Ablock_offd_i = hypre_CTAlloc(HYPRE_Int, ii_counter+1, memory_location);
|
||||
Ablock_offd_j = hypre_CTAlloc(HYPRE_Int, Ablock_offd_size, memory_location);
|
||||
Ablock_offd_data = hypre_CTAlloc(HYPRE_Real, Ablock_offd_size, memory_location);
|
||||
|
||||
/*-----------------------------------------------------------------------
|
||||
* Intialize some stuff.
|
||||
@ -5020,7 +5022,7 @@ hypre_MGRGetSubBlock( hypre_ParCSRMatrix *A,
|
||||
}
|
||||
}
|
||||
|
||||
col_map_offd_Ablock = hypre_CTAlloc(HYPRE_BigInt, num_cols_Ablock_offd, HYPRE_MEMORY_HOST);
|
||||
col_map_offd_Ablock = hypre_CTAlloc(HYPRE_BigInt, num_cols_Ablock_offd, memory_location);
|
||||
tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_Ablock_offd, HYPRE_MEMORY_HOST);
|
||||
index = 0;
|
||||
for (i=0; i < num_cols_Ablock_offd; i++)
|
||||
|
||||
@ -62,6 +62,8 @@ typedef struct
|
||||
HYPRE_Int relax_type;
|
||||
HYPRE_Int logging;
|
||||
HYPRE_Int print_level;
|
||||
HYPRE_Int frelax_print_level;
|
||||
HYPRE_Int cg_print_level;
|
||||
HYPRE_Int max_iter;
|
||||
HYPRE_Int relax_order;
|
||||
HYPRE_Int num_relax_sweeps;
|
||||
|
||||
@ -107,7 +107,7 @@ hypre_MGRSetup( void *mgr_vdata,
|
||||
hypre_ParCSRMatrix *A_ff_inv = (mgr_data -> A_ff_inv);
|
||||
|
||||
HYPRE_Int use_air = 0;
|
||||
HYPRE_Int truncate_cg_threshold = (mgr_data -> truncate_coarse_grid_threshold);
|
||||
HYPRE_Real truncate_cg_threshold = (mgr_data -> truncate_coarse_grid_threshold);
|
||||
// HYPRE_Real wall_time;
|
||||
|
||||
/* ----- begin -----*/
|
||||
@ -847,7 +847,7 @@ hypre_MGRSetup( void *mgr_vdata,
|
||||
{
|
||||
HYPRE_Int block_num_f_points = (lev == 0 ? block_size : block_num_coarse_indexes[lev-1]) - block_num_coarse_indexes[lev];
|
||||
hypre_MGRComputeNonGalerkinCoarseGrid(A_array[lev], P, RT, block_num_f_points,
|
||||
/* ordering */0, /* method */ 0, max_elmts, /* keep_stencil */ 0, CF_marker_array[lev], &RAP_ptr);
|
||||
/* ordering */set_c_points_method, /* method (approx. inverse or not) */ 0, max_elmts, /* keep_stencil */ 0, CF_marker_array[lev], &RAP_ptr);
|
||||
hypre_ParCSRMatrixOwnsColStarts(RAP_ptr) = 0;
|
||||
hypre_ParCSRMatrixOwnsColStarts(P_array[lev]) = 0;
|
||||
hypre_ParCSRMatrixOwnsRowStarts(RT) = 0;
|
||||
@ -914,12 +914,12 @@ hypre_MGRSetup( void *mgr_vdata,
|
||||
A_ff_array[lev] = A_ff_ptr;
|
||||
|
||||
aff_solver[lev] = (HYPRE_Solver*) hypre_BoomerAMGCreate();
|
||||
hypre_BoomerAMGSetMaxIter(aff_solver[lev], 1);
|
||||
hypre_BoomerAMGSetMaxIter(aff_solver[lev], mgr_data -> num_relax_sweeps);
|
||||
hypre_BoomerAMGSetTol(aff_solver[lev], 0.0);
|
||||
hypre_BoomerAMGSetRelaxOrder(aff_solver[lev], 1);
|
||||
//hypre_BoomerAMGSetAggNumLevels(aff_solver[lev], 1);
|
||||
hypre_BoomerAMGSetNumSweeps(aff_solver[lev], 3);
|
||||
hypre_BoomerAMGSetPrintLevel(aff_solver[lev], print_level);
|
||||
hypre_BoomerAMGSetPrintLevel(aff_solver[lev], mgr_data -> frelax_print_level);
|
||||
hypre_BoomerAMGSetNumFunctions(aff_solver[lev], 1);
|
||||
|
||||
fine_grid_solver_setup(aff_solver[lev], A_ff_ptr, F_fine_array[lev+1], U_fine_array[lev+1]);
|
||||
@ -1053,7 +1053,7 @@ hypre_MGRSetup( void *mgr_vdata,
|
||||
hypre_BoomerAMGSetMaxIter ( default_cg_solver, 1 );
|
||||
hypre_BoomerAMGSetTol ( default_cg_solver, 0.0 );
|
||||
hypre_BoomerAMGSetRelaxOrder( default_cg_solver, 1);
|
||||
hypre_BoomerAMGSetPrintLevel(default_cg_solver, print_level);
|
||||
hypre_BoomerAMGSetPrintLevel(default_cg_solver, mgr_data -> cg_print_level);
|
||||
/* set setup and solve functions */
|
||||
coarse_grid_solver_setup = (HYPRE_Int (*)(void*, void*, void*, void*)) hypre_BoomerAMGSetup;
|
||||
coarse_grid_solver_solve = (HYPRE_Int (*)(void*, void*, void*, void*)) hypre_BoomerAMGSolve;
|
||||
|
||||
@ -579,6 +579,8 @@ hypre_MGRCycle( void *mgr_vdata,
|
||||
HYPRE_Int fine_grid;
|
||||
HYPRE_Int Not_Finished;
|
||||
HYPRE_Int cycle_type;
|
||||
HYPRE_Int print_level = (mgr_data -> print_level);
|
||||
HYPRE_Int frelax_print_level = (mgr_data -> frelax_print_level);
|
||||
|
||||
hypre_ParCSRMatrix **A_array = (mgr_data -> A_array);
|
||||
hypre_ParCSRMatrix **RT_array = (mgr_data -> RT_array);
|
||||
@ -596,6 +598,7 @@ hypre_MGRCycle( void *mgr_vdata,
|
||||
hypre_Vector **relax_l1_norms = (mgr_data -> l1_norms);
|
||||
hypre_ParVector *Vtemp = (mgr_data -> Vtemp);
|
||||
hypre_ParVector *Ztemp = (mgr_data -> Ztemp);
|
||||
// hypre_ParVector *Utemp = (mgr_data -> Utemp);
|
||||
|
||||
hypre_ParVector **U_fine_array = (mgr_data -> U_fine_array);
|
||||
hypre_ParVector **F_fine_array = (mgr_data -> F_fine_array);
|
||||
@ -643,7 +646,7 @@ hypre_MGRCycle( void *mgr_vdata,
|
||||
HYPRE_Real convergence_factor_cg;
|
||||
hypre_BoomerAMGGetRelResidualNorm(cg_solver, &convergence_factor_cg);
|
||||
(mgr_data -> cg_convergence_factor) = convergence_factor_cg;
|
||||
if ((mgr_data -> print_level) > 1 && my_id == 0 && convergence_factor_cg > 1.0)
|
||||
if ((print_level) > 1 && my_id == 0 && convergence_factor_cg > 1.0)
|
||||
{
|
||||
hypre_printf("Warning!!! Coarse grid solve diverges. Factor = %1.2e\n", convergence_factor_cg);
|
||||
}
|
||||
@ -715,11 +718,77 @@ hypre_MGRCycle( void *mgr_vdata,
|
||||
// U_array[level], 1.0, F_array[level], Vtemp);
|
||||
// convergence_factor_frelax = hypre_ParVectorInnerProd(Vtemp, Vtemp);
|
||||
|
||||
HYPRE_Real resnorm, init_resnorm;
|
||||
HYPRE_Real rhs_norm, old_resnorm;
|
||||
HYPRE_Real rel_resnorm = 1.0;
|
||||
HYPRE_Real conv_factor = 1.0;
|
||||
if (frelax_print_level > 1)
|
||||
{
|
||||
hypre_ParCSRMatrixMatvecOutOfPlace(-1.0, A_array[level],
|
||||
U_array[level], 1.0, F_array[level], Vtemp);
|
||||
|
||||
resnorm = hypre_ParVectorInnerProd(Vtemp, Vtemp);
|
||||
init_resnorm = resnorm;
|
||||
rhs_norm = sqrt(hypre_ParVectorInnerProd(F_array[level], F_array[level]));
|
||||
|
||||
if (rhs_norm > HYPRE_REAL_EPSILON)
|
||||
{
|
||||
rel_resnorm = init_resnorm / rhs_norm;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* rhs is zero, return a zero solution */
|
||||
hypre_ParVectorSetConstantValues(U_array[0], 0.0);
|
||||
|
||||
HYPRE_ANNOTATE_FUNC_END;
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
if(my_id == 0)
|
||||
{
|
||||
hypre_printf("\nBegin F-relaxation: V-Cycle Smoother \n");
|
||||
hypre_printf(" relative\n");
|
||||
hypre_printf(" residual factor residual\n");
|
||||
hypre_printf(" -------- ------ --------\n");
|
||||
hypre_printf(" Initial %e %e\n",init_resnorm,
|
||||
rel_resnorm);
|
||||
}
|
||||
}
|
||||
|
||||
for(i=0; i<nsweeps; i++)
|
||||
{
|
||||
hypre_MGRFrelaxVcycle(FrelaxVcycleData[level], F_array[level], U_array[level]);
|
||||
}
|
||||
|
||||
if(frelax_print_level > 1)
|
||||
{
|
||||
old_resnorm = resnorm;
|
||||
hypre_ParCSRMatrixMatvecOutOfPlace(-1.0, A_array[level],
|
||||
U_array[level], 1.0, F_array[level], Vtemp);
|
||||
resnorm = hypre_ParVectorInnerProd(Vtemp, Vtemp);
|
||||
|
||||
if (old_resnorm) conv_factor = resnorm / old_resnorm;
|
||||
else conv_factor = resnorm;
|
||||
|
||||
if (rhs_norm > HYPRE_REAL_EPSILON)
|
||||
{
|
||||
rel_resnorm = resnorm / rhs_norm;
|
||||
}
|
||||
else
|
||||
{
|
||||
rel_resnorm = resnorm;
|
||||
}
|
||||
|
||||
if (my_id == 0 )
|
||||
{
|
||||
hypre_printf("\n V-Cycle %2d %e %f %e \n", i,
|
||||
resnorm, conv_factor, rel_resnorm);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (my_id == 0 && frelax_print_level > 1)
|
||||
{
|
||||
hypre_printf("End F-relaxation: V-Cycle Smoother \n\n");
|
||||
}
|
||||
// compute residual after solve
|
||||
//hypre_ParCSRMatrixMatvecOutOfPlace(-1.0, A_array[level],
|
||||
// U_array[level], 1.0, F_array[level], Vtemp);
|
||||
@ -728,11 +797,22 @@ hypre_MGRCycle( void *mgr_vdata,
|
||||
}
|
||||
else if (Frelax_method[level] == 2)
|
||||
{
|
||||
hypre_ParVectorSetConstantValues(F_fine_array[coarse_grid], 0.0);
|
||||
hypre_MGRAddVectorR(CF_marker[fine_grid], FMRK, 1.0, F_array[fine_grid], 0.0, &(F_fine_array[coarse_grid]));
|
||||
hypre_ParVectorSetConstantValues(U_fine_array[coarse_grid], 0.0);
|
||||
// We need to first compute residual to ensure that
|
||||
// F-relaxation is reducing the global residual
|
||||
|
||||
alpha = -1.0;
|
||||
beta = 1.0;
|
||||
hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[fine_grid], U_array[fine_grid], beta, F_array[fine_grid], Vtemp);
|
||||
|
||||
// restrict to F points
|
||||
// hypre_ParVectorSetConstantValues(F_fine_array[coarse_grid], 0.0);
|
||||
hypre_MGRAddVectorR(CF_marker[fine_grid], FMRK, 1.0, Vtemp, 0.0, &(F_fine_array[coarse_grid]));
|
||||
// initialize solution array
|
||||
hypre_ParVectorSetConstantValues(U_fine_array[coarse_grid], 0.0);
|
||||
// solve
|
||||
fine_grid_solver_solve((mgr_data -> aff_solver)[fine_grid], A_ff_array[fine_grid], F_fine_array[coarse_grid],
|
||||
U_fine_array[coarse_grid]);
|
||||
// update solution
|
||||
hypre_MGRAddVectorP(CF_marker[fine_grid], FMRK, 1.0, U_fine_array[coarse_grid], 1.0, &(U_array[fine_grid]));
|
||||
}
|
||||
else
|
||||
@ -756,7 +836,7 @@ hypre_MGRCycle( void *mgr_vdata,
|
||||
alpha = 1.0;
|
||||
beta = 0.0;
|
||||
|
||||
if (restrict_type[fine_grid] > 3)
|
||||
if (restrict_type[fine_grid] == 4 || restrict_type[fine_grid] == 5)
|
||||
use_air = 1;
|
||||
|
||||
if (use_air)
|
||||
|
||||
@ -1401,6 +1401,8 @@ HYPRE_Int hypre_MGRSetNumRelaxSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
|
||||
HYPRE_Int hypre_MGRSetNumInterpSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
|
||||
HYPRE_Int hypre_MGRSetNumRestrictSweeps( void *mgr_vdata, HYPRE_Int nsweeps );
|
||||
HYPRE_Int hypre_MGRSetPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
|
||||
HYPRE_Int hypre_MGRSetFrelaxPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
|
||||
HYPRE_Int hypre_MGRSetCoarseGridPrintLevel( void *mgr_vdata, HYPRE_Int print_level );
|
||||
HYPRE_Int hypre_MGRSetTruncateCoarseGridThreshold( void *mgr_vdata, HYPRE_Real threshold);
|
||||
HYPRE_Int hypre_MGRSetLogging( void *mgr_vdata, HYPRE_Int logging );
|
||||
HYPRE_Int hypre_MGRSetMaxIter( void *mgr_vdata, HYPRE_Int max_iter );
|
||||
|
||||
@ -914,6 +914,12 @@ hypre_CSRMatrixTransposeHost(hypre_CSRMatrix *A,
|
||||
hypre_CSRMatrixI(*AT)[num_cols_A] = num_nnzs_A;
|
||||
hypre_TFree(bucket, HYPRE_MEMORY_HOST);
|
||||
|
||||
/* Move diagonal to first entry (for square matrices only)*/
|
||||
if(num_rows_A == num_cols_A)
|
||||
{
|
||||
hypre_CSRMatrixReorder(*AT);
|
||||
}
|
||||
|
||||
// Set rownnz and num_rownnz
|
||||
if (hypre_CSRMatrixNumRownnz(A) < num_rows_A)
|
||||
{
|
||||
|
||||
@ -1207,6 +1207,14 @@ hypre_CSRMatrixTransposeDevice(hypre_CSRMatrix *A,
|
||||
*AT_ptr = C;
|
||||
|
||||
hypre_SyncCudaComputeStream(hypre_handle());
|
||||
|
||||
/* Put diagonal at first entry (for square matrices only)*/
|
||||
if(nrows_A == ncols_A)
|
||||
{
|
||||
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
|
||||
hypre_CSRMatrixMoveDiagFirstDevice( *AT_ptr );
|
||||
#endif
|
||||
}
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
@ -24,7 +24,7 @@ Final Relative Residual Norm = 7.212489e-09
|
||||
|
||||
# Output file: elast.out.6
|
||||
Iterations = 14
|
||||
Final Relative Residual Norm = 1.005958e-09
|
||||
Final Relative Residual Norm = 1.005959e-09
|
||||
|
||||
# Output file: elast.out.7
|
||||
Iterations = 16
|
||||
|
||||
@ -339,7 +339,7 @@ Final Relative Residual Norm = 7.556742e-09
|
||||
|
||||
# Output file: solvers.out.307
|
||||
hypre_ILU Iterations = 38
|
||||
Final Relative Residual Norm = 7.037599e-09
|
||||
Final Relative Residual Norm = 7.037600e-09
|
||||
|
||||
# Output file: solvers.out.308
|
||||
hypre_ILU Iterations = 26
|
||||
|
||||
Loading…
Reference in New Issue
Block a user