hypre/src/parcsr_ls/par_mgr_solve.c
Victor A. P. Magri 8ff65e8124
Improve MGR data printing (#976)
This enhances what print_level can achieve in MGR. Particularly, now we can dump linear system info to files according to the print_level code. We also have the ability now of printing a sequence of linear systems to file (useful when hypre is used in time-stepping application).

A detailed list of changes is given below:

* Add utilities for creating/checking directories
* Add print_level codes to MGR and new info_path member
* Add hypre_MGRDataPrint
* Add call to hypre_MGRDataPrint and logic to update the print_level variable
* Update MGRSolve with new print_level logic
* Remove hypre_MGRWriteSolverParams
* Update documentation for HYPRE_MGRSetPrintLevel
* Implement new logic for HYPRE_MGR_PRINT_MODE_ASCII
2023-10-12 23:15:23 -04:00

1250 lines
50 KiB
C

/******************************************************************************
* Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
* HYPRE Project Developers. See the top-level COPYRIGHT file for details.
*
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/
/******************************************************************************
*
* MGR solve routine
*
*****************************************************************************/
#include "_hypre_parcsr_ls.h"
#include "par_mgr.h"
#include "par_amg.h"
/*--------------------------------------------------------------------
* hypre_MGRSolve
*--------------------------------------------------------------------*/
HYPRE_Int
hypre_MGRSolve( void *mgr_vdata,
hypre_ParCSRMatrix *A,
hypre_ParVector *f,
hypre_ParVector *u )
{
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
hypre_ParMGRData *mgr_data = (hypre_ParMGRData*) mgr_vdata;
hypre_ParCSRMatrix **A_array = (mgr_data -> A_array);
hypre_ParVector **F_array = (mgr_data -> F_array);
hypre_ParVector **U_array = (mgr_data -> U_array);
HYPRE_Real tol = (mgr_data -> tol);
HYPRE_Int logging = (mgr_data -> logging);
HYPRE_Int print_level = (mgr_data -> print_level);
HYPRE_Int max_iter = (mgr_data -> max_iter);
HYPRE_Real *norms = (mgr_data -> rel_res_norms);
hypre_ParVector *Vtemp = (mgr_data -> Vtemp);
// hypre_ParVector *Utemp = (mgr_data -> Utemp);
hypre_ParVector *residual;
HYPRE_Complex fp_zero = 0.0;
HYPRE_Complex fp_one = 1.0;
HYPRE_Complex fp_neg_one = - fp_one;
HYPRE_Real conv_factor = 0.0;
HYPRE_Real resnorm = 1.0;
HYPRE_Real init_resnorm = 0.0;
HYPRE_Real rel_resnorm;
HYPRE_Real rhs_norm = 0.0;
HYPRE_Real old_resnorm;
HYPRE_Real ieee_check = 0.;
HYPRE_Int iter, num_procs, my_id;
HYPRE_Solver cg_solver = (mgr_data -> coarse_grid_solver);
HYPRE_Int (*coarse_grid_solver_solve)(void*, void*, void*,
void*) = (mgr_data -> coarse_grid_solver_solve);
HYPRE_ANNOTATE_FUNC_BEGIN;
if (logging > 1)
{
residual = (mgr_data -> residual);
}
(mgr_data -> num_iterations) = 0;
if ((mgr_data -> num_coarse_levels) == 0)
{
/* Do scalar AMG solve when only one level */
coarse_grid_solver_solve(cg_solver, A, f, u);
HYPRE_BoomerAMGGetNumIterations(cg_solver, &iter);
HYPRE_BoomerAMGGetFinalRelativeResidualNorm(cg_solver, &rel_resnorm);
(mgr_data -> num_iterations) = iter;
(mgr_data -> final_rel_residual_norm) = rel_resnorm;
HYPRE_ANNOTATE_FUNC_END;
return hypre_error_flag;
}
U_array[0] = u;
F_array[0] = f;
hypre_MPI_Comm_size(comm, &num_procs);
hypre_MPI_Comm_rank(comm, &my_id);
/*-----------------------------------------------------------------------
* Write the solver parameters
*-----------------------------------------------------------------------*/
/* Print MGR and linear system info according to print level */
hypre_MGRDataPrint(mgr_vdata);
/*-----------------------------------------------------------------------
* write some initial info
*-----------------------------------------------------------------------*/
if (my_id == 0 && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE) && tol > 0.)
{
hypre_printf("\n\nMGR SOLVER SOLUTION INFO:\n");
}
/*-----------------------------------------------------------------------
* Compute initial fine-grid residual and print
*-----------------------------------------------------------------------*/
if ((print_level & HYPRE_MGR_PRINT_INFO_SOLVE) || logging > 1 || tol > 0.)
{
if (logging > 1)
{
hypre_ParVectorCopy(F_array[0], residual );
if (tol > hypre_cabs(fp_zero))
{
hypre_ParCSRMatrixMatvec(fp_neg_one, A_array[0], U_array[0], fp_one, residual);
}
resnorm = hypre_sqrt(hypre_ParVectorInnerProd(residual, residual));
}
else
{
hypre_ParVectorCopy(F_array[0], Vtemp);
if (tol > hypre_cabs(fp_zero))
{
hypre_ParCSRMatrixMatvec(fp_neg_one, A_array[0], U_array[0], fp_one, Vtemp);
}
resnorm = hypre_sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
}
/* Since it does not diminish performance, attempt to return an error flag
* and notify users when they supply bad input. */
if (resnorm != 0.)
{
ieee_check = resnorm / resnorm; /* INF -> NaN conversion */
}
if (ieee_check != ieee_check)
{
/* ...INFs or NaNs in input can make ieee_check a NaN. This test
* for ieee_check self-equality works on all IEEE-compliant compilers/
* machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
* by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be
* found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
if (print_level > 0)
{
hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
hypre_printf("ERROR -- hypre_MGRSolve: INFs and/or NaNs detected in input.\n");
hypre_printf("User probably placed non-numerics in supplied A, x_0, or b.\n");
hypre_printf("ERROR detected by Hypre ... END\n\n\n");
}
hypre_error(HYPRE_ERROR_GENERIC);
HYPRE_ANNOTATE_FUNC_END;
return hypre_error_flag;
}
init_resnorm = resnorm;
rhs_norm = hypre_sqrt(hypre_ParVectorInnerProd(f, f));
if (rhs_norm > HYPRE_REAL_EPSILON)
{
rel_resnorm = init_resnorm / rhs_norm;
}
else
{
/* rhs is zero, return a zero solution */
hypre_ParVectorSetZeros(U_array[0]);
if (logging > 0)
{
rel_resnorm = fp_zero;
(mgr_data -> final_rel_residual_norm) = rel_resnorm;
}
HYPRE_ANNOTATE_FUNC_END;
return hypre_error_flag;
}
}
else
{
rel_resnorm = 1.;
}
if (my_id == 0 && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE))
{
hypre_printf(" relative\n");
hypre_printf(" residual factor residual\n");
hypre_printf(" -------- ------ --------\n");
hypre_printf(" Initial %e %e\n", init_resnorm,
rel_resnorm);
}
/************** Main Solver Loop - always do 1 iteration ************/
iter = 0;
while ((rel_resnorm >= tol || iter < 1) && iter < max_iter)
{
/* Do one cycle of reduction solve on A*e = r */
hypre_MGRCycle(mgr_data, F_array, U_array);
/*---------------------------------------------------------------
* Compute fine-grid residual and residual norm
*----------------------------------------------------------------*/
if ((print_level & HYPRE_MGR_PRINT_INFO_SOLVE) || logging > 1 || tol > 0.)
{
old_resnorm = resnorm;
if (logging > 1)
{
hypre_ParVectorCopy(F_array[0], residual);
hypre_ParCSRMatrixMatvec(fp_neg_one, A_array[0], U_array[0], fp_one, residual);
resnorm = hypre_sqrt(hypre_ParVectorInnerProd(residual, residual));
}
else
{
hypre_ParVectorCopy(F_array[0], Vtemp);
hypre_ParCSRMatrixMatvec(fp_neg_one, A_array[0], U_array[0], fp_one, Vtemp);
resnorm = hypre_sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
}
conv_factor = (old_resnorm > HYPRE_REAL_EPSILON) ? (resnorm / old_resnorm) : resnorm;
rel_resnorm = (rhs_norm > HYPRE_REAL_EPSILON) ? (resnorm / rhs_norm) : resnorm;
norms[iter] = rel_resnorm;
}
++iter;
(mgr_data -> num_iterations) = iter;
(mgr_data -> final_rel_residual_norm) = rel_resnorm;
if (my_id == 0 && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE))
{
hypre_printf(" MGRCycle %2d %e %f %e \n", iter,
resnorm, conv_factor, rel_resnorm);
}
}
/* check convergence within max_iter */
if (iter == max_iter && tol > 0.)
{
hypre_error(HYPRE_ERROR_CONV);
if (!my_id && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE))
{
hypre_printf("\n\n==============================================");
hypre_printf("\n NOTE: Convergence tolerance was not achieved\n");
hypre_printf(" within the allowed %d iterations\n", max_iter);
hypre_printf("==============================================");
}
}
if ((my_id == 0) && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE))
{
if (iter > 0 && init_resnorm)
{
conv_factor = hypre_pow((resnorm / init_resnorm),
(fp_one / (HYPRE_Real) iter));
}
else
{
conv_factor = fp_one;
}
hypre_printf("\n\n Average Convergence Factor = %f \n", conv_factor);
}
HYPRE_ANNOTATE_FUNC_END;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
* hypre_MGRFrelaxVcycle
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_MGRFrelaxVcycle ( void *Frelax_vdata,
hypre_ParVector *f,
hypre_ParVector *u )
{
hypre_ParAMGData *Frelax_data = (hypre_ParAMGData*) Frelax_vdata;
HYPRE_Int Not_Finished = 0;
HYPRE_Int level = 0;
HYPRE_Int cycle_param = 1;
HYPRE_Int j, Solve_err_flag, coarse_grid, fine_grid;
HYPRE_Int local_size;
HYPRE_Int num_sweeps = 1;
HYPRE_Int relax_order = hypre_ParAMGDataRelaxOrder(Frelax_data);
HYPRE_Int relax_type = 3;
HYPRE_Real relax_weight = 1.0;
HYPRE_Real omega = 1.0;
hypre_ParVector **F_array = (Frelax_data) -> F_array;
hypre_ParVector **U_array = (Frelax_data) -> U_array;
hypre_ParCSRMatrix **A_array = ((Frelax_data) -> A_array);
hypre_ParCSRMatrix **R_array = ((Frelax_data) -> P_array);
hypre_ParCSRMatrix **P_array = ((Frelax_data) -> P_array);
hypre_IntArray **CF_marker_array = ((Frelax_data) -> CF_marker_array);
HYPRE_Int *CF_marker;
hypre_ParVector *Vtemp = (Frelax_data) -> Vtemp;
hypre_ParVector *Ztemp = (Frelax_data) -> Ztemp;
HYPRE_Int num_c_levels = (Frelax_data) -> num_levels;
hypre_ParVector *Aux_F = NULL;
hypre_ParVector *Aux_U = NULL;
HYPRE_Complex fp_zero = 0.0;
HYPRE_Complex fp_one = 1.0;
HYPRE_Complex fp_neg_one = - fp_one;
HYPRE_ANNOTATE_FUNC_BEGIN;
F_array[0] = f;
U_array[0] = u;
CF_marker = NULL;
if (CF_marker_array[0])
{
CF_marker = hypre_IntArrayData(CF_marker_array[0]);
}
/* (Re)set local_size for Vtemp */
local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[0]));
hypre_ParVectorSetLocalSize(Vtemp, local_size);
/* smoother on finest level:
* This is separated from subsequent levels since the finest level matrix
* may be larger than what is needed for the vcycle solve
*/
if (relax_order == 1) // C/F ordering for smoother
{
for (j = 0; j < num_sweeps; j++)
{
Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[0],
F_array[0],
CF_marker,
relax_type,
relax_order,
1,
relax_weight,
omega,
NULL,
U_array[0],
Vtemp,
Ztemp);
}
}
else // lexicographic ordering for smoother (on F points in CF marker)
{
for (j = 0; j < num_sweeps; j++)
{
Solve_err_flag = hypre_BoomerAMGRelax(A_array[0],
F_array[0],
CF_marker,
relax_type,
-1,
relax_weight,
omega,
NULL,
U_array[0],
Vtemp,
Ztemp);
}
}
/* coarse grids exist */
if (num_c_levels > 0)
{
Not_Finished = 1;
}
while (Not_Finished)
{
if (cycle_param == 1)
{
//hypre_printf("Vcycle smoother (down cycle): vtemp size = %d, level = %d \n", hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)), level);
/* compute coarse grid vectors */
fine_grid = level;
coarse_grid = level + 1;
hypre_ParVectorSetZeros(U_array[coarse_grid]);
/* Avoid unnecessary copy using out-of-place version of SpMV */
hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid], U_array[fine_grid],
fp_one, F_array[fine_grid], Vtemp);
hypre_ParCSRMatrixMatvecT(fp_one, R_array[fine_grid], Vtemp,
fp_zero, F_array[coarse_grid]);
/* update level */
++level;
/* Update scratch vector sizes */
local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level]));
hypre_ParVectorSetLocalSize(Vtemp, local_size);
hypre_ParVectorSetLocalSize(Ztemp, local_size);
CF_marker = NULL;
if (CF_marker_array[level])
{
CF_marker = hypre_IntArrayData(CF_marker_array[level]);
}
/* next level is coarsest level */
if (level == num_c_levels)
{
/* switch to coarsest level */
cycle_param = 3;
}
else
{
Aux_F = F_array[level];
Aux_U = U_array[level];
/* relax and visit next coarse grid */
for (j = 0; j < num_sweeps; j++)
{
Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[level],
Aux_F,
CF_marker,
relax_type,
relax_order,
cycle_param,
relax_weight,
omega,
NULL,
Aux_U,
Vtemp,
Ztemp);
}
cycle_param = 1;
}
}
else if (cycle_param == 3)
{
if (hypre_ParAMGDataUserCoarseRelaxType(Frelax_data) == 9)
{
/* solve the coarsest grid with Gaussian elimination */
hypre_GaussElimSolve(Frelax_data, level, 9);
}
else
{
/* solve with relaxation */
Aux_F = F_array[level];
Aux_U = U_array[level];
for (j = 0; j < num_sweeps; j++)
{
Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[level],
Aux_F,
CF_marker,
relax_type,
relax_order,
cycle_param,
relax_weight,
omega,
NULL,
Aux_U,
Vtemp,
Ztemp);
}
}
//hypre_printf("Vcycle smoother (coarse level): vtemp size = %d, level = %d \n", hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)), level);
cycle_param = 2;
}
else if (cycle_param == 2)
{
/*---------------------------------------------------------------
* Visit finer level next.
* Interpolate and add correction using hypre_ParCSRMatrixMatvec.
* Reset counters and cycling parameters for finer level.
*--------------------------------------------------------------*/
fine_grid = level - 1;
coarse_grid = level;
/* Update solution at the fine level */
hypre_ParCSRMatrixMatvec(fp_one, P_array[fine_grid],
U_array[coarse_grid],
fp_one, U_array[fine_grid]);
--level;
cycle_param = 2;
if (level == 0) { cycle_param = 99; }
/* Update scratch vector sizes */
local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level]));
hypre_ParVectorSetLocalSize(Vtemp, local_size);
hypre_ParVectorSetLocalSize(Ztemp, local_size);
//hypre_printf("Vcycle smoother (up cycle): vtemp size = %d, level = %d \n", hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)), level);
}
else
{
Not_Finished = 0;
}
}
HYPRE_ANNOTATE_FUNC_END;
return Solve_err_flag;
}
/*--------------------------------------------------------------------------
* hypre_MGRCycle
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_MGRCycle( void *mgr_vdata,
hypre_ParVector **F_array,
hypre_ParVector **U_array )
{
MPI_Comm comm;
hypre_ParMGRData *mgr_data = (hypre_ParMGRData*) mgr_vdata;
HYPRE_Int local_size;
HYPRE_Int level;
HYPRE_Int coarse_grid;
HYPRE_Int fine_grid;
HYPRE_Int Not_Finished;
HYPRE_Int cycle_type;
HYPRE_Int print_level = (mgr_data -> print_level);
HYPRE_Int frelax_print_level = (mgr_data -> frelax_print_level);
HYPRE_Complex *l1_norms;
HYPRE_Int *CF_marker_data;
hypre_ParCSRMatrix **A_array = (mgr_data -> A_array);
hypre_ParCSRMatrix **RT_array = (mgr_data -> RT_array);
hypre_ParCSRMatrix **P_array = (mgr_data -> P_array);
#if defined(HYPRE_USING_GPU)
hypre_ParCSRMatrix **B_array = (mgr_data -> B_array);
hypre_ParCSRMatrix **B_FF_array = (mgr_data -> B_FF_array);
hypre_ParCSRMatrix **P_FF_array = (mgr_data -> P_FF_array);
#endif
hypre_ParCSRMatrix *RAP = (mgr_data -> RAP);
HYPRE_Int use_default_cgrid_solver = (mgr_data -> use_default_cgrid_solver);
HYPRE_Solver cg_solver = (mgr_data -> coarse_grid_solver);
HYPRE_Int (*coarse_grid_solver_solve)(void*, void*, void*, void*) =
(mgr_data -> coarse_grid_solver_solve);
hypre_IntArray **CF_marker = (mgr_data -> CF_marker_array);
HYPRE_Int *nsweeps = (mgr_data -> num_relax_sweeps);
HYPRE_Int relax_type = (mgr_data -> relax_type);
HYPRE_Real relax_weight = (mgr_data -> relax_weight);
HYPRE_Real omega = (mgr_data -> omega);
hypre_Vector **l1_norms_array = (mgr_data -> l1_norms);
hypre_ParVector *Vtemp = (mgr_data -> Vtemp);
hypre_ParVector *Ztemp = (mgr_data -> Ztemp);
hypre_ParVector *Utemp = (mgr_data -> Utemp);
hypre_ParVector **U_fine_array = (mgr_data -> U_fine_array);
hypre_ParVector **F_fine_array = (mgr_data -> F_fine_array);
HYPRE_Int (*fine_grid_solver_solve)(void*, void*, void*, void*) =
(mgr_data -> fine_grid_solver_solve);
hypre_ParCSRMatrix **A_ff_array = (mgr_data -> A_ff_array);
HYPRE_Int i, relax_points;
HYPRE_Int num_coarse_levels = (mgr_data -> num_coarse_levels);
HYPRE_Complex fp_zero = 0.0;
HYPRE_Complex fp_one = 1.0;
HYPRE_Complex fp_neg_one = - fp_one;
HYPRE_Int *Frelax_type = (mgr_data -> Frelax_type);
HYPRE_Int *interp_type = (mgr_data -> interp_type);
hypre_ParAMGData **FrelaxVcycleData = (mgr_data -> FrelaxVcycleData);
HYPRE_Real **frelax_diaginv = (mgr_data -> frelax_diaginv);
HYPRE_Int *blk_size = (mgr_data -> blk_size);
HYPRE_Int block_size = (mgr_data -> block_size);
HYPRE_Int *block_num_coarse_indexes = (mgr_data -> block_num_coarse_indexes);
/* TODO (VPM): refactor names blk_size and block_size */
HYPRE_Int *level_smooth_type = (mgr_data -> level_smooth_type);
HYPRE_Int *level_smooth_iters = (mgr_data -> level_smooth_iters);
HYPRE_Int *restrict_type = (mgr_data -> restrict_type);
HYPRE_Int pre_smoothing = (mgr_data -> global_smooth_cycle) == 1 ? 1 : 0;
HYPRE_Int post_smoothing = (mgr_data -> global_smooth_cycle) == 2 ? 1 : 0;
HYPRE_Int use_air = 0;
HYPRE_Int my_id;
char region_name[1024];
char msg[1024];
#if defined(HYPRE_USING_GPU)
HYPRE_MemoryLocation memory_location;
HYPRE_ExecutionPolicy exec;
#endif
/* Initialize */
HYPRE_ANNOTATE_FUNC_BEGIN;
hypre_GpuProfilingPushRange("MGRCycle");
comm = hypre_ParCSRMatrixComm(A_array[0]);
hypre_MPI_Comm_rank(comm, &my_id);
Not_Finished = 1;
cycle_type = 1;
level = 0;
/***** Main loop ******/
while (Not_Finished)
{
/* Update scratch vector sizes */
local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level]));
hypre_ParVectorSetLocalSize(Vtemp, local_size);
hypre_ParVectorSetLocalSize(Ztemp, local_size);
hypre_ParVectorSetLocalSize(Utemp, local_size);
/* Do coarse grid correction solve */
if (cycle_type == 3)
{
/* call coarse grid solver here (default is BoomerAMG) */
hypre_sprintf(region_name, "%s-%d", "MGR_Level", level);
hypre_GpuProfilingPushRange(region_name);
HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
coarse_grid_solver_solve(cg_solver, RAP, F_array[level], U_array[level]);
if (use_default_cgrid_solver)
{
HYPRE_Real convergence_factor_cg;
hypre_BoomerAMGGetRelResidualNorm(cg_solver, &convergence_factor_cg);
(mgr_data -> cg_convergence_factor) = convergence_factor_cg;
if ((print_level) > 1 && my_id == 0 && convergence_factor_cg > hypre_cabs(fp_one))
{
hypre_printf("Warning!!! Coarse grid solve diverges. Factor = %1.2e\n",
convergence_factor_cg);
}
}
/* Error checking */
if (HYPRE_GetError())
{
hypre_sprintf(msg, "[%d]: Error from MGR's coarsest level solver (level %d)\n",
my_id, level);
hypre_error_w_msg(HYPRE_ERROR_GENERIC, msg);
HYPRE_ClearAllErrors();
}
/* DEBUG: print the coarse system indicated by mgr_data->print_coarse_system */
if (mgr_data -> print_coarse_system)
{
hypre_ParCSRMatrixPrintIJ(RAP, 1, 1, "RAP_mat");
hypre_ParVectorPrintIJ(F_array[level], 1, "RAP_rhs");
hypre_ParVectorPrintIJ(U_array[level], 1, "RAP_sol");
mgr_data -> print_coarse_system--;
}
/**** cycle up ***/
cycle_type = 2;
hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_REGION_END("%s", region_name);
}
/* Down cycle */
else if (cycle_type == 1)
{
/* Set fine/coarse grid level indices */
fine_grid = level;
coarse_grid = level + 1;
l1_norms = l1_norms_array[fine_grid] ?
hypre_VectorData(l1_norms_array[fine_grid]) : NULL;
CF_marker_data = hypre_IntArrayData(CF_marker[fine_grid]);
#if defined(HYPRE_USING_GPU)
memory_location = hypre_ParCSRMatrixMemoryLocation(A_array[fine_grid]);
exec = hypre_GetExecPolicy1(memory_location);
#endif
hypre_sprintf(region_name, "%s-%d", "MGR_Level", fine_grid);
hypre_GpuProfilingPushRange(region_name);
HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
/* Global pre smoothing sweeps */
if (pre_smoothing && (level_smooth_iters[fine_grid] > 0))
{
hypre_sprintf(region_name, "Global-Relax");
hypre_GpuProfilingPushRange(region_name);
HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
if ((level_smooth_type[fine_grid]) == 0 ||
(level_smooth_type[fine_grid]) == 1)
{
/* Block Jacobi/Gauss-Seidel smoother */
#if defined(HYPRE_USING_GPU)
if (exec == HYPRE_EXEC_DEVICE)
{
for (i = 0; i < level_smooth_iters[fine_grid]; i++)
{
hypre_MGRBlockRelaxSolveDevice(B_array[fine_grid], A_array[fine_grid],
F_array[fine_grid], U_array[fine_grid],
Vtemp, fp_one);
}
}
else
#endif
{
HYPRE_Real *level_diaginv = (mgr_data -> level_diaginv)[fine_grid];
HYPRE_Int level_blk_size = (level == 0) ? block_size :
block_num_coarse_indexes[level - 1];
HYPRE_Int nrows = hypre_ParCSRMatrixNumRows(A_array[fine_grid]);
HYPRE_Int n_block = nrows / level_blk_size;
HYPRE_Int left_size = nrows - n_block * level_blk_size;
for (i = 0; i < level_smooth_iters[fine_grid]; i++)
{
hypre_MGRBlockRelaxSolve(A_array[fine_grid], F_array[fine_grid],
U_array[fine_grid], level_blk_size,
n_block, left_size, level_smooth_type[fine_grid],
level_diaginv, Vtemp);
}
}
hypre_ParVectorAllZeros(U_array[fine_grid]) = 0;
}
else if ((level_smooth_type[fine_grid] > 1) &&
(level_smooth_type[fine_grid] < 7))
{
for (i = 0; i < level_smooth_iters[fine_grid]; i ++)
{
hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid], NULL,
level_smooth_type[fine_grid] - 1, 0, fp_one,
fp_zero, NULL, U_array[fine_grid], Vtemp, NULL);
}
}
else if (level_smooth_type[fine_grid] == 8)
{
/* Euclid ILU smoother */
for (i = 0; i < level_smooth_iters[fine_grid]; i++)
{
/* Compute residual */
hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
U_array[fine_grid], fp_one,
F_array[fine_grid], Vtemp);
/* Solve */
HYPRE_EuclidSolve((mgr_data -> level_smoother)[fine_grid],
A_array[fine_grid], Vtemp, Utemp);
/* Update solution */
hypre_ParVectorAxpy(fp_one, Utemp, U_array[fine_grid]);
hypre_ParVectorAllZeros(U_array[fine_grid]) = 0;
}
}
else if (level_smooth_type[fine_grid] == 16)
{
/* hypre_ILU smoother */
HYPRE_ILUSolve((mgr_data -> level_smoother)[fine_grid],
A_array[fine_grid], F_array[fine_grid],
U_array[fine_grid]);
hypre_ParVectorAllZeros(U_array[fine_grid]) = 0;
}
else
{
/* Generic relaxation interface */
for (i = 0; i < level_smooth_iters[fine_grid]; i++)
{
hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid],
NULL, level_smooth_type[fine_grid],
0, fp_one, fp_one, l1_norms,
U_array[fine_grid], Vtemp, Ztemp);
}
}
/* Error checking */
if (HYPRE_GetError())
{
hypre_sprintf(msg, "[%d]: Error from global pre-relaxation %d at level %d \n",
my_id, level_smooth_type[fine_grid], fine_grid);
hypre_error_w_msg(HYPRE_ERROR_GENERIC, msg);
HYPRE_ClearAllErrors();
}
hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_REGION_END("%s", region_name);
} /* End global pre-smoothing */
/* F-relaxation */
relax_points = -1;
hypre_sprintf(region_name, "F-Relax");
hypre_GpuProfilingPushRange(region_name);
HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
if (Frelax_type[fine_grid] == 0)
{
/* (single level) Block-relaxation for A_ff */
if (interp_type[fine_grid] == 12)
{
HYPRE_Int nrows = hypre_ParCSRMatrixNumRows(A_ff_array[fine_grid]);
HYPRE_Int n_block = nrows / blk_size[fine_grid];
HYPRE_Int left_size = nrows - n_block * blk_size[fine_grid];
for (i = 0; i < nsweeps[fine_grid]; i++)
{
/* F-relaxation is reducing the global residual, thus recompute it */
hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
U_array[fine_grid], fp_one,
F_array[fine_grid], Vtemp);
/* Restrict to F points */
#if defined(HYPRE_USING_GPU)
if (exec == HYPRE_EXEC_DEVICE)
{
hypre_ParCSRMatrixMatvecT(fp_one, P_FF_array[fine_grid], Vtemp,
fp_zero, F_fine_array[coarse_grid]);
}
else
#endif
{
hypre_MGRAddVectorR(CF_marker[fine_grid], FMRK, fp_one, Vtemp,
fp_zero, &(F_fine_array[coarse_grid]));
}
/* Set initial guess to zero */
hypre_ParVectorSetZeros(U_fine_array[coarse_grid]);
#if defined(HYPRE_USING_GPU)
if (exec == HYPRE_EXEC_DEVICE)
{
hypre_MGRBlockRelaxSolveDevice(B_FF_array[fine_grid],
A_ff_array[fine_grid],
F_fine_array[fine_grid],
U_fine_array[fine_grid],
Vtemp, fp_one);
}
else
#endif
{
hypre_MGRBlockRelaxSolve(A_ff_array[fine_grid], F_fine_array[coarse_grid],
U_fine_array[coarse_grid], blk_size[fine_grid],
n_block, left_size, 0, frelax_diaginv[fine_grid],
Vtemp);
}
/* Interpolate the solution back to the fine grid level */
#if defined(HYPRE_USING_GPU)
if (exec == HYPRE_EXEC_DEVICE)
{
hypre_ParCSRMatrixMatvec(fp_one, P_FF_array[fine_grid],
U_fine_array[coarse_grid], fp_one,
U_fine_array[fine_grid]);
}
else
#endif
{
hypre_MGRAddVectorP(CF_marker[fine_grid], FMRK, fp_one,
U_fine_array[coarse_grid], fp_one,
&(U_array[fine_grid]));
}
}
}
else
{
if (relax_type == 18)
{
#if defined(HYPRE_USING_GPU)
for (i = 0; i < nsweeps[fine_grid]; i++)
{
hypre_MGRRelaxL1JacobiDevice(A_array[fine_grid], F_array[fine_grid],
CF_marker_data, relax_points, relax_weight,
l1_norms, U_array[fine_grid], Vtemp);
}
#else
for (i = 0; i < nsweeps[fine_grid]; i++)
{
hypre_ParCSRRelax_L1_Jacobi(A_array[fine_grid], F_array[fine_grid],
CF_marker_data, relax_points, relax_weight,
l1_norms, U_array[fine_grid], Vtemp);
}
#endif
}
else
{
for (i = 0; i < nsweeps[fine_grid]; i++)
{
hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid],
CF_marker_data, relax_type, relax_points,
relax_weight, omega, l1_norms,
U_array[fine_grid], Vtemp, Ztemp);
}
}
}
}
else if (Frelax_type[fine_grid] == 1)
{
/* V-cycle smoother for A_ff */
//HYPRE_Real convergence_factor_frelax;
// compute residual before solve
// hypre_ParCSRMatrixMatvecOutOfPlace(-fp_one, A_array[fine_grid],
// U_array[fine_grid], fp_one,
// F_array[fine_grid], Vtemp);
// convergence_factor_frelax = hypre_ParVectorInnerProd(Vtemp, Vtemp);
HYPRE_Real resnorm, init_resnorm;
HYPRE_Real rhs_norm, old_resnorm;
HYPRE_Real rel_resnorm = fp_one;
HYPRE_Real conv_factor = fp_one;
if (frelax_print_level > 1)
{
hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
U_array[fine_grid], fp_one,
F_array[fine_grid], Vtemp);
resnorm = hypre_sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
init_resnorm = resnorm;
rhs_norm = hypre_sqrt(hypre_ParVectorInnerProd(F_array[fine_grid], F_array[fine_grid]));
if (rhs_norm > HYPRE_REAL_EPSILON)
{
rel_resnorm = init_resnorm / rhs_norm;
}
else
{
/* rhs is zero, return a zero solution */
hypre_ParVectorSetZeros(U_array[0]);
HYPRE_ANNOTATE_FUNC_END;
hypre_GpuProfilingPopRange();
return hypre_error_flag;
}
if (my_id == 0 && frelax_print_level > 1)
{
hypre_printf("\nBegin F-relaxation: V-Cycle Smoother \n");
hypre_printf(" relative\n");
hypre_printf(" residual factor residual\n");
hypre_printf(" -------- ------ --------\n");
hypre_printf(" Initial %e %e\n", init_resnorm,
rel_resnorm);
}
}
for (i = 0; i < nsweeps[fine_grid]; i++)
{
hypre_MGRFrelaxVcycle(FrelaxVcycleData[fine_grid],
F_array[fine_grid],
U_array[fine_grid]);
if (frelax_print_level > 1)
{
old_resnorm = resnorm;
hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
U_array[fine_grid], fp_one,
F_array[fine_grid], Vtemp);
resnorm = hypre_sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
conv_factor = (old_resnorm > HYPRE_REAL_EPSILON) ?
(resnorm / old_resnorm) : resnorm;
rel_resnorm = (rhs_norm > HYPRE_REAL_EPSILON) ? (resnorm / rhs_norm) : resnorm;
if (my_id == 0)
{
hypre_printf("\n V-Cycle %2d %e %f %e \n", i,
resnorm, conv_factor, rel_resnorm);
}
}
}
if (my_id == 0 && frelax_print_level > 1)
{
hypre_printf("End F-relaxation: V-Cycle Smoother \n\n");
}
// compute residual after solve
//hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
// U_array[fine_grid], fp_one,
// F_array[fine_grid], Vtemp);
//convergence_factor_frelax = hypre_ParVectorInnerProd(Vtemp, Vtemp)/convergence_factor_frelax;
//hypre_printf("F-relaxation V-cycle convergence factor: %5f\n", convergence_factor_frelax);
}
else if (Frelax_type[level] == 2 ||
Frelax_type[level] == 9 ||
Frelax_type[level] == 99 ||
Frelax_type[level] == 199)
{
/* We need to compute the residual first to ensure that
F-relaxation is reducing the global residual */
hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
U_array[fine_grid], fp_one,
F_array[fine_grid], Vtemp);
/* Restrict to F points */
#if defined (HYPRE_USING_GPU)
hypre_ParCSRMatrixMatvecT(fp_one, P_FF_array[fine_grid], Vtemp,
fp_zero, F_fine_array[coarse_grid]);
#else
hypre_MGRAddVectorR(CF_marker[fine_grid], FMRK, fp_one, Vtemp,
fp_zero, &(F_fine_array[coarse_grid]));
#endif
/* Set initial guess to zeros */
hypre_ParVectorSetZeros(U_fine_array[coarse_grid]);
if (Frelax_type[level] == 2)
{
/* Do F-relaxation using AMG */
fine_grid_solver_solve((mgr_data -> aff_solver)[fine_grid],
A_ff_array[fine_grid],
F_fine_array[coarse_grid],
U_fine_array[coarse_grid]);
}
else
{
/* Do F-relaxation using Gaussian Elimination */
hypre_GaussElimSolve((mgr_data -> GSElimData)[fine_grid],
level, Frelax_type[level]);
}
/* Interpolate the solution back to the fine grid level */
#if defined (HYPRE_USING_GPU)
hypre_ParCSRMatrixMatvec(fp_one, P_FF_array[fine_grid],
U_fine_array[coarse_grid], fp_one,
U_array[fine_grid]);
#else
hypre_MGRAddVectorP(CF_marker[fine_grid], FMRK, fp_one,
U_fine_array[coarse_grid], fp_one,
&(U_array[fine_grid]));
#endif
}
else
{
for (i = 0; i < nsweeps[fine_grid]; i++)
{
hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid],
CF_marker_data, Frelax_type[fine_grid],
relax_points, relax_weight, omega, l1_norms,
U_array[fine_grid], Vtemp, Ztemp);
}
}
/* Error checking */
if (HYPRE_GetError())
{
hypre_sprintf(msg, "[%d]: Error from F-relaxation %d at MGR level %d\n",
my_id, Frelax_type[fine_grid], fine_grid);
hypre_error_w_msg(HYPRE_ERROR_GENERIC, msg);
HYPRE_ClearAllErrors();
}
hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_REGION_END("%s", region_name);
/* Update residual and compute coarse-grid rhs */
hypre_sprintf(region_name, "Residual");
hypre_GpuProfilingPushRange(region_name);
HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
U_array[fine_grid], fp_one,
F_array[fine_grid], Vtemp);
hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_REGION_END("%s", region_name);
if ((restrict_type[fine_grid] == 4) ||
(restrict_type[fine_grid] == 5))
{
use_air = 1;
}
hypre_sprintf(region_name, "Restrict");
hypre_GpuProfilingPushRange(region_name);
HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
if (use_air)
{
/* no transpose necessary for R */
hypre_ParCSRMatrixMatvec(fp_one, RT_array[fine_grid], Vtemp,
fp_zero, F_array[coarse_grid]);
}
else
{
#if defined(HYPRE_USING_GPU)
if (restrict_type[fine_grid] > 0 || (exec == HYPRE_EXEC_DEVICE))
#else
if (restrict_type[fine_grid] > 0)
#endif
{
hypre_ParCSRMatrixMatvecT(fp_one, RT_array[fine_grid], Vtemp,
fp_zero, F_array[coarse_grid]);
}
else
{
hypre_MGRAddVectorR(CF_marker[fine_grid], CMRK, fp_one,
Vtemp, fp_zero, &(F_array[coarse_grid]));
}
}
hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_REGION_END("%s", region_name);
hypre_sprintf(region_name, "%s-%d", "MGR_Level", fine_grid);
hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_REGION_END("%s", region_name);
/* Initialize coarse grid solution array (VPM: double-check this for multiple cycles)*/
hypre_ParVectorSetZeros(U_array[coarse_grid]);
++level;
if (level == num_coarse_levels)
{
cycle_type = 3;
}
}
/* Up cycle */
else if (level != 0)
{
/* Set fine/coarse grid level indices */
fine_grid = level - 1;
coarse_grid = level;
l1_norms = l1_norms_array[fine_grid] ?
hypre_VectorData(l1_norms_array[fine_grid]) : NULL;
CF_marker_data = hypre_IntArrayData(CF_marker[fine_grid]);
#if defined(HYPRE_USING_GPU)
memory_location = hypre_ParCSRMatrixMemoryLocation(A_array[fine_grid]);
exec = hypre_GetExecPolicy1(memory_location);
#endif
hypre_sprintf(region_name, "%s-%d", "MGR_Level", fine_grid);
hypre_GpuProfilingPushRange(region_name);
HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
/* Interpolate */
hypre_sprintf(region_name, "Prolongate");
hypre_GpuProfilingPushRange(region_name);
HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
#if defined(HYPRE_USING_GPU)
if (interp_type[fine_grid] > 0 || (exec == HYPRE_EXEC_DEVICE))
#else
if (interp_type[fine_grid] > 0)
#endif
{
hypre_ParCSRMatrixMatvec(fp_one, P_array[fine_grid],
U_array[coarse_grid],
fp_one, U_array[fine_grid]);
}
else
{
hypre_MGRAddVectorP(CF_marker[fine_grid], CMRK, fp_one,
U_array[coarse_grid], fp_one,
&(U_array[fine_grid]));
}
hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_REGION_END("%s", region_name);
/* Global post smoothing sweeps */
if (post_smoothing & (level_smooth_iters[fine_grid] > 0))
{
hypre_sprintf(region_name, "Global-Relax");
hypre_GpuProfilingPushRange(region_name);
HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
/* Block Jacobi smoother */
if ((level_smooth_type[fine_grid] == 0) ||
(level_smooth_type[fine_grid] == 1))
{
#if defined(HYPRE_USING_GPU)
if (exec == HYPRE_EXEC_DEVICE)
{
for (i = 0; i < level_smooth_iters[fine_grid]; i++)
{
hypre_MGRBlockRelaxSolveDevice(B_array[fine_grid], A_array[fine_grid],
F_array[fine_grid], U_array[fine_grid],
Vtemp, fp_one);
}
}
else
#endif
{
HYPRE_Real *level_diaginv = (mgr_data -> level_diaginv)[fine_grid];
HYPRE_Int level_blk_size = (fine_grid == 0) ? block_size :
block_num_coarse_indexes[fine_grid - 1];
HYPRE_Int nrows = hypre_ParCSRMatrixNumRows(A_array[fine_grid]);
HYPRE_Int n_block = nrows / level_blk_size;
HYPRE_Int left_size = nrows - n_block * level_blk_size;
for (i = 0; i < level_smooth_iters[fine_grid]; i++)
{
hypre_MGRBlockRelaxSolve(A_array[fine_grid], F_array[fine_grid],
U_array[fine_grid], level_blk_size, n_block,
left_size, level_smooth_type[fine_grid],
level_diaginv, Vtemp);
}
}
}
else if ((level_smooth_type[fine_grid] > 1) && (level_smooth_type[fine_grid] < 7))
{
for (i = 0; i < level_smooth_iters[fine_grid]; i++)
{
hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid], NULL,
level_smooth_type[fine_grid] - 1, 0, fp_one,
fp_zero, l1_norms, U_array[fine_grid], Vtemp, NULL);
}
}
else if (level_smooth_type[fine_grid] == 8)
{
/* Euclid ILU */
for (i = 0; i < level_smooth_iters[fine_grid]; i++)
{
/* Compute residual */
hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
U_array[fine_grid], fp_one,
F_array[fine_grid], Vtemp);
/* Solve */
HYPRE_EuclidSolve((mgr_data -> level_smoother)[fine_grid],
A_array[fine_grid], Vtemp, Utemp);
/* Update solution */
hypre_ParVectorAxpy(fp_one, Utemp, U_array[fine_grid]);
}
}
else if (level_smooth_type[fine_grid] == 16)
{
/* HYPRE ILU */
HYPRE_ILUSolve((mgr_data -> level_smoother)[fine_grid],
A_array[fine_grid], F_array[fine_grid],
U_array[fine_grid]);
}
else
{
/* Generic relaxation interface */
for (i = 0; i < level_smooth_iters[level]; i++)
{
hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid],
NULL, level_smooth_type[fine_grid], 0,
fp_one, fp_one, l1_norms,
U_array[fine_grid], Vtemp, Ztemp);
}
}
/* Error checking */
if (HYPRE_GetError())
{
hypre_sprintf(msg, "[%d]: Error from global post-relaxation %d at MGR level %d\n",
my_id, level_smooth_type[fine_grid], fine_grid);
hypre_error_w_msg(HYPRE_ERROR_GENERIC, msg);
HYPRE_ClearAllErrors();
}
hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_REGION_END("%s", region_name);
} /* End post-smoothing */
hypre_sprintf(region_name, "%s-%d", "MGR_Level", fine_grid);
hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_REGION_END("%s", region_name);
--level;
} /* End interpolate */
else
{
Not_Finished = 0;
}
}
HYPRE_ANNOTATE_FUNC_END;
hypre_GpuProfilingPopRange();
return hypre_error_flag;
}