/******************************************************************************
 * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
 *
 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
 ******************************************************************************/

/******************************************************************************
 *
 * MGR solve routine
 *
 *****************************************************************************/
#include "_hypre_parcsr_ls.h"
#include "par_mgr.h"
#include "par_amg.h"

/*--------------------------------------------------------------------
 * hypre_MGRSolve
 *--------------------------------------------------------------------*/
HYPRE_Int
hypre_MGRSolve( void               *mgr_vdata,
                hypre_ParCSRMatrix *A,
                hypre_ParVector    *f,
                hypre_ParVector    *u )
{

   MPI_Comm              comm = hypre_ParCSRMatrixComm(A);
   hypre_ParMGRData   *mgr_data = (hypre_ParMGRData*) mgr_vdata;

   hypre_ParCSRMatrix  **A_array = (mgr_data -> A_array);
   hypre_ParVector    **F_array = (mgr_data -> F_array);
   hypre_ParVector    **U_array = (mgr_data -> U_array);

   HYPRE_Real           tol = (mgr_data -> tol);
   HYPRE_Int            logging = (mgr_data -> logging);
   HYPRE_Int            print_level = (mgr_data -> print_level);
   HYPRE_Int            max_iter = (mgr_data -> max_iter);
   HYPRE_Real           *norms = (mgr_data -> rel_res_norms);
   hypre_ParVector      *Vtemp = (mgr_data -> Vtemp);
   //   hypre_ParVector      *Utemp = (mgr_data -> Utemp);
   hypre_ParVector      *residual;

   HYPRE_Complex        fp_zero = 0.0;
   HYPRE_Complex        fp_one = 1.0;
   HYPRE_Complex        fp_neg_one = - fp_one;
   HYPRE_Real           conv_factor = 0.0;
   HYPRE_Real           resnorm = 1.0;
   HYPRE_Real           init_resnorm = 0.0;
   HYPRE_Real           rel_resnorm;
   HYPRE_Real           rhs_norm = 0.0;
   HYPRE_Real           old_resnorm;
   HYPRE_Real           ieee_check = 0.;

   HYPRE_Int            iter, num_procs, my_id;

   HYPRE_Solver         cg_solver = (mgr_data -> coarse_grid_solver);
   HYPRE_Int            (*coarse_grid_solver_solve)(void*, void*, void*,
                                                    void*) = (mgr_data -> coarse_grid_solver_solve);

   HYPRE_ANNOTATE_FUNC_BEGIN;
   if (logging > 1)
   {
      residual = (mgr_data -> residual);
   }

   (mgr_data -> num_iterations) = 0;

   if ((mgr_data -> num_coarse_levels) == 0)
   {
      /* Do scalar AMG solve when only one level */
      coarse_grid_solver_solve(cg_solver, A, f, u);
      HYPRE_BoomerAMGGetNumIterations(cg_solver, &iter);
      HYPRE_BoomerAMGGetFinalRelativeResidualNorm(cg_solver, &rel_resnorm);
      (mgr_data -> num_iterations) = iter;
      (mgr_data -> final_rel_residual_norm) = rel_resnorm;
      HYPRE_ANNOTATE_FUNC_END;

      return hypre_error_flag;
   }

   U_array[0] = u;
   F_array[0] = f;

   hypre_MPI_Comm_size(comm, &num_procs);
   hypre_MPI_Comm_rank(comm, &my_id);

   /*-----------------------------------------------------------------------
    *    Write the solver parameters
    *-----------------------------------------------------------------------*/

   /* Print MGR and linear system info according to print level */
   hypre_MGRDataPrint(mgr_vdata);

   /*-----------------------------------------------------------------------
    *     write some initial info
    *-----------------------------------------------------------------------*/

   if (my_id == 0 && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE) && tol > 0.)
   {
      hypre_printf("\n\nMGR SOLVER SOLUTION INFO:\n");
   }

   /*-----------------------------------------------------------------------
    *    Compute initial fine-grid residual and print
    *-----------------------------------------------------------------------*/

   if ((print_level & HYPRE_MGR_PRINT_INFO_SOLVE) || logging > 1 || tol > 0.)
   {
      if (logging > 1)
      {
         hypre_ParVectorCopy(F_array[0], residual );
         if (tol > hypre_cabs(fp_zero))
         {
            hypre_ParCSRMatrixMatvec(fp_neg_one, A_array[0], U_array[0], fp_one, residual);
         }
         resnorm = hypre_sqrt(hypre_ParVectorInnerProd(residual, residual));
      }
      else
      {
         hypre_ParVectorCopy(F_array[0], Vtemp);
         if (tol > hypre_cabs(fp_zero))
         {
            hypre_ParCSRMatrixMatvec(fp_neg_one, A_array[0], U_array[0], fp_one, Vtemp);
         }
         resnorm = hypre_sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
      }

      /* Since it does not diminish performance, attempt to return an error flag
       * and notify users when they supply bad input. */
      if (resnorm != 0.)
      {
         ieee_check = resnorm / resnorm; /* INF -> NaN conversion */
      }

      if (ieee_check != ieee_check)
      {
         /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
          * for ieee_check self-equality works on all IEEE-compliant compilers/
          * machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
          * by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
          * found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
         if (print_level > 0)
         {
            hypre_printf("\n\nERROR detected by Hypre ...  BEGIN\n");
            hypre_printf("ERROR -- hypre_MGRSolve: INFs and/or NaNs detected in input.\n");
            hypre_printf("User probably placed non-numerics in supplied A, x_0, or b.\n");
            hypre_printf("ERROR detected by Hypre ...  END\n\n\n");
         }
         hypre_error(HYPRE_ERROR_GENERIC);
         HYPRE_ANNOTATE_FUNC_END;

         return hypre_error_flag;
      }

      init_resnorm = resnorm;
      rhs_norm = hypre_sqrt(hypre_ParVectorInnerProd(f, f));
      if (rhs_norm > HYPRE_REAL_EPSILON)
      {
         rel_resnorm = init_resnorm / rhs_norm;
      }
      else
      {
         /* rhs is zero, return a zero solution */
         hypre_ParVectorSetZeros(U_array[0]);
         if (logging > 0)
         {
            rel_resnorm = fp_zero;
            (mgr_data -> final_rel_residual_norm) = rel_resnorm;
         }
         HYPRE_ANNOTATE_FUNC_END;

         return hypre_error_flag;
      }
   }
   else
   {
      rel_resnorm = 1.;
   }

   if (my_id == 0 && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE))
   {
      hypre_printf("                                            relative\n");
      hypre_printf("               residual        factor       residual\n");
      hypre_printf("               --------        ------       --------\n");
      hypre_printf("    Initial    %e                 %e\n", init_resnorm,
                   rel_resnorm);
   }

   /************** Main Solver Loop - always do 1 iteration ************/
   iter = 0;
   while ((rel_resnorm >= tol || iter < 1) && iter < max_iter)
   {
      /* Do one cycle of reduction solve on A*e = r */
      hypre_MGRCycle(mgr_data, F_array, U_array);

      /*---------------------------------------------------------------
       *    Compute  fine-grid residual and residual norm
       *----------------------------------------------------------------*/

      if ((print_level & HYPRE_MGR_PRINT_INFO_SOLVE) || logging > 1 || tol > 0.)
      {
         old_resnorm = resnorm;

         if (logging > 1)
         {
            hypre_ParVectorCopy(F_array[0], residual);
            hypre_ParCSRMatrixMatvec(fp_neg_one, A_array[0], U_array[0], fp_one, residual);
            resnorm = hypre_sqrt(hypre_ParVectorInnerProd(residual, residual));
         }
         else
         {
            hypre_ParVectorCopy(F_array[0], Vtemp);
            hypre_ParCSRMatrixMatvec(fp_neg_one, A_array[0], U_array[0], fp_one, Vtemp);
            resnorm = hypre_sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
         }

         conv_factor = (old_resnorm > HYPRE_REAL_EPSILON) ? (resnorm / old_resnorm) : resnorm;
         rel_resnorm = (rhs_norm > HYPRE_REAL_EPSILON) ? (resnorm / rhs_norm) : resnorm;
         norms[iter] = rel_resnorm;
      }

      ++iter;
      (mgr_data -> num_iterations) = iter;
      (mgr_data -> final_rel_residual_norm) = rel_resnorm;

      if (my_id == 0 && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE))
      {
         hypre_printf("    MGRCycle %2d   %e    %f     %e \n", iter,
                      resnorm, conv_factor, rel_resnorm);
      }
   }

   /* check convergence within max_iter */
   if (iter == max_iter && tol > 0.)
   {
      hypre_error(HYPRE_ERROR_CONV);

      if (!my_id && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE))
      {
         hypre_printf("\n\n==============================================");
         hypre_printf("\n NOTE: Convergence tolerance was not achieved\n");
         hypre_printf("      within the allowed %d iterations\n", max_iter);
         hypre_printf("==============================================");
      }
   }

   if ((my_id == 0) && (print_level & HYPRE_MGR_PRINT_INFO_SOLVE))
   {
      if (iter > 0 && init_resnorm)
      {
         conv_factor = hypre_pow((resnorm / init_resnorm),
                                 (fp_one / (HYPRE_Real) iter));
      }
      else
      {
         conv_factor = fp_one;
      }

      hypre_printf("\n\n Average Convergence Factor = %f \n", conv_factor);
   }

   HYPRE_ANNOTATE_FUNC_END;

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_MGRFrelaxVcycle
 *--------------------------------------------------------------------------*/

HYPRE_Int
hypre_MGRFrelaxVcycle ( void            *Frelax_vdata,
                        hypre_ParVector *f,
                        hypre_ParVector *u )
{
   hypre_ParAMGData    *Frelax_data = (hypre_ParAMGData*) Frelax_vdata;

   HYPRE_Int            Not_Finished = 0;
   HYPRE_Int            level = 0;
   HYPRE_Int            cycle_param = 1;
   HYPRE_Int            j, Solve_err_flag, coarse_grid, fine_grid;
   HYPRE_Int            local_size;
   HYPRE_Int            num_sweeps = 1;
   HYPRE_Int            relax_order = hypre_ParAMGDataRelaxOrder(Frelax_data);
   HYPRE_Int            relax_type = 3;
   HYPRE_Real           relax_weight = 1.0;
   HYPRE_Real           omega = 1.0;

   hypre_ParVector    **F_array = (Frelax_data) -> F_array;
   hypre_ParVector    **U_array = (Frelax_data) -> U_array;

   hypre_ParCSRMatrix **A_array = ((Frelax_data) -> A_array);
   hypre_ParCSRMatrix **R_array = ((Frelax_data) -> P_array);
   hypre_ParCSRMatrix **P_array = ((Frelax_data) -> P_array);
   hypre_IntArray     **CF_marker_array = ((Frelax_data) -> CF_marker_array);
   HYPRE_Int           *CF_marker;

   hypre_ParVector     *Vtemp = (Frelax_data) -> Vtemp;
   hypre_ParVector     *Ztemp = (Frelax_data) -> Ztemp;

   HYPRE_Int            num_c_levels = (Frelax_data) -> num_levels;

   hypre_ParVector     *Aux_F = NULL;
   hypre_ParVector     *Aux_U = NULL;

   HYPRE_Complex        fp_zero = 0.0;
   HYPRE_Complex        fp_one = 1.0;
   HYPRE_Complex        fp_neg_one = - fp_one;

   HYPRE_ANNOTATE_FUNC_BEGIN;

   F_array[0] = f;
   U_array[0] = u;

   CF_marker = NULL;
   if (CF_marker_array[0])
   {
      CF_marker = hypre_IntArrayData(CF_marker_array[0]);
   }

   /* (Re)set local_size for Vtemp */
   local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[0]));
   hypre_ParVectorSetLocalSize(Vtemp, local_size);

   /* smoother on finest level:
    * This is separated from subsequent levels since the finest level matrix
    * may be larger than what is needed for the vcycle solve
    */
   if (relax_order == 1) // C/F ordering for smoother
   {
      for (j = 0; j < num_sweeps; j++)
      {
         Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[0],
                                                 F_array[0],
                                                 CF_marker,
                                                 relax_type,
                                                 relax_order,
                                                 1,
                                                 relax_weight,
                                                 omega,
                                                 NULL,
                                                 U_array[0],
                                                 Vtemp,
                                                 Ztemp);
      }
   }
   else // lexicographic ordering for smoother (on F points in CF marker)
   {
      for (j = 0; j < num_sweeps; j++)
      {
         Solve_err_flag = hypre_BoomerAMGRelax(A_array[0],
                                               F_array[0],
                                               CF_marker,
                                               relax_type,
                                               -1,
                                               relax_weight,
                                               omega,
                                               NULL,
                                               U_array[0],
                                               Vtemp,
                                               Ztemp);
      }
   }

   /* coarse grids exist */
   if (num_c_levels > 0)
   {
      Not_Finished = 1;
   }

   while (Not_Finished)
   {
      if (cycle_param == 1)
      {
         //hypre_printf("Vcycle smoother (down cycle): vtemp size = %d, level = %d \n", hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)), level);
         /* compute coarse grid vectors */
         fine_grid   = level;
         coarse_grid = level + 1;

         hypre_ParVectorSetZeros(U_array[coarse_grid]);

         /* Avoid unnecessary copy using out-of-place version of SpMV */
         hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid], U_array[fine_grid],
                                            fp_one, F_array[fine_grid], Vtemp);

         hypre_ParCSRMatrixMatvecT(fp_one, R_array[fine_grid], Vtemp,
                                   fp_zero, F_array[coarse_grid]);

         /* update level */
         ++level;

         /* Update scratch vector sizes */
         local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level]));
         hypre_ParVectorSetLocalSize(Vtemp, local_size);
         hypre_ParVectorSetLocalSize(Ztemp, local_size);

         CF_marker = NULL;
         if (CF_marker_array[level])
         {
            CF_marker = hypre_IntArrayData(CF_marker_array[level]);
         }

         /* next level is coarsest level */
         if (level == num_c_levels)
         {
            /* switch to coarsest level */
            cycle_param = 3;
         }
         else
         {
            Aux_F = F_array[level];
            Aux_U = U_array[level];
            /* relax and visit next coarse grid */
            for (j = 0; j < num_sweeps; j++)
            {
               Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[level],
                                                       Aux_F,
                                                       CF_marker,
                                                       relax_type,
                                                       relax_order,
                                                       cycle_param,
                                                       relax_weight,
                                                       omega,
                                                       NULL,
                                                       Aux_U,
                                                       Vtemp,
                                                       Ztemp);
            }
            cycle_param = 1;
         }
      }
      else if (cycle_param == 3)
      {
         if (hypre_ParAMGDataUserCoarseRelaxType(Frelax_data) == 9)
         {
            /* solve the coarsest grid with Gaussian elimination */
            hypre_GaussElimSolve(Frelax_data, level, 9);
         }
         else
         {
            /* solve with relaxation */
            Aux_F = F_array[level];
            Aux_U = U_array[level];
            for (j = 0; j < num_sweeps; j++)
            {
               Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[level],
                                                       Aux_F,
                                                       CF_marker,
                                                       relax_type,
                                                       relax_order,
                                                       cycle_param,
                                                       relax_weight,
                                                       omega,
                                                       NULL,
                                                       Aux_U,
                                                       Vtemp,
                                                       Ztemp);
            }
         }
         //hypre_printf("Vcycle smoother (coarse level): vtemp size = %d, level = %d \n", hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)), level);
         cycle_param = 2;
      }
      else if (cycle_param == 2)
      {
         /*---------------------------------------------------------------
          * Visit finer level next.
          * Interpolate and add correction using hypre_ParCSRMatrixMatvec.
          * Reset counters and cycling parameters for finer level.
          *--------------------------------------------------------------*/

         fine_grid   = level - 1;
         coarse_grid = level;

         /* Update solution at the fine level */
         hypre_ParCSRMatrixMatvec(fp_one, P_array[fine_grid],
                                  U_array[coarse_grid],
                                  fp_one, U_array[fine_grid]);

         --level;
         cycle_param = 2;
         if (level == 0) { cycle_param = 99; }

         /* Update scratch vector sizes */
         local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level]));
         hypre_ParVectorSetLocalSize(Vtemp, local_size);
         hypre_ParVectorSetLocalSize(Ztemp, local_size);
         //hypre_printf("Vcycle smoother (up cycle): vtemp size = %d, level = %d \n", hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)), level);
      }
      else
      {
         Not_Finished = 0;
      }
   }
   HYPRE_ANNOTATE_FUNC_END;

   return Solve_err_flag;
}

/*--------------------------------------------------------------------------
 * hypre_MGRCycle
 *--------------------------------------------------------------------------*/

HYPRE_Int
hypre_MGRCycle( void              *mgr_vdata,
                hypre_ParVector  **F_array,
                hypre_ParVector  **U_array )
{
   MPI_Comm               comm;
   hypre_ParMGRData      *mgr_data = (hypre_ParMGRData*) mgr_vdata;

   HYPRE_Int              local_size;
   HYPRE_Int              level;
   HYPRE_Int              coarse_grid;
   HYPRE_Int              fine_grid;
   HYPRE_Int              Not_Finished;
   HYPRE_Int              cycle_type;
   HYPRE_Int              print_level = (mgr_data -> print_level);
   HYPRE_Int              frelax_print_level = (mgr_data -> frelax_print_level);

   HYPRE_Complex         *l1_norms;
   HYPRE_Int             *CF_marker_data;

   hypre_ParCSRMatrix   **A_array    = (mgr_data -> A_array);
   hypre_ParCSRMatrix   **RT_array   = (mgr_data -> RT_array);
   hypre_ParCSRMatrix   **P_array    = (mgr_data -> P_array);
#if defined(HYPRE_USING_GPU)
   hypre_ParCSRMatrix   **B_array    = (mgr_data -> B_array);
   hypre_ParCSRMatrix   **B_FF_array = (mgr_data -> B_FF_array);
   hypre_ParCSRMatrix   **P_FF_array = (mgr_data -> P_FF_array);
#endif
   hypre_ParCSRMatrix    *RAP        = (mgr_data -> RAP);
   HYPRE_Int              use_default_cgrid_solver = (mgr_data -> use_default_cgrid_solver);
   HYPRE_Solver           cg_solver = (mgr_data -> coarse_grid_solver);
   HYPRE_Int            (*coarse_grid_solver_solve)(void*, void*, void*, void*) =
      (mgr_data -> coarse_grid_solver_solve);

   hypre_IntArray       **CF_marker = (mgr_data -> CF_marker_array);
   HYPRE_Int             *nsweeps = (mgr_data -> num_relax_sweeps);
   HYPRE_Int              relax_type = (mgr_data -> relax_type);
   HYPRE_Real             relax_weight = (mgr_data -> relax_weight);
   HYPRE_Real             omega = (mgr_data -> omega);
   hypre_Vector         **l1_norms_array = (mgr_data -> l1_norms);
   hypre_ParVector       *Vtemp = (mgr_data -> Vtemp);
   hypre_ParVector       *Ztemp = (mgr_data -> Ztemp);
   hypre_ParVector       *Utemp = (mgr_data -> Utemp);

   hypre_ParVector      **U_fine_array = (mgr_data -> U_fine_array);
   hypre_ParVector      **F_fine_array = (mgr_data -> F_fine_array);
   HYPRE_Int            (*fine_grid_solver_solve)(void*, void*, void*, void*) =
      (mgr_data -> fine_grid_solver_solve);
   hypre_ParCSRMatrix   **A_ff_array = (mgr_data -> A_ff_array);

   HYPRE_Int              i, relax_points;
   HYPRE_Int              num_coarse_levels = (mgr_data -> num_coarse_levels);

   HYPRE_Complex          fp_zero = 0.0;
   HYPRE_Complex          fp_one = 1.0;
   HYPRE_Complex          fp_neg_one = - fp_one;

   HYPRE_Int             *Frelax_type = (mgr_data -> Frelax_type);
   HYPRE_Int             *interp_type = (mgr_data -> interp_type);
   hypre_ParAMGData     **FrelaxVcycleData = (mgr_data -> FrelaxVcycleData);
   HYPRE_Real           **frelax_diaginv = (mgr_data -> frelax_diaginv);
   HYPRE_Int             *blk_size = (mgr_data -> blk_size);
   HYPRE_Int              block_size = (mgr_data -> block_size);
   HYPRE_Int             *block_num_coarse_indexes = (mgr_data -> block_num_coarse_indexes);
   /* TODO (VPM): refactor names blk_size and block_size */

   HYPRE_Int             *level_smooth_type = (mgr_data -> level_smooth_type);
   HYPRE_Int             *level_smooth_iters = (mgr_data -> level_smooth_iters);

   HYPRE_Int             *restrict_type  = (mgr_data -> restrict_type);
   HYPRE_Int              pre_smoothing  = (mgr_data -> global_smooth_cycle) == 1 ? 1 : 0;
   HYPRE_Int              post_smoothing = (mgr_data -> global_smooth_cycle) == 2 ? 1 : 0;
   HYPRE_Int              use_air = 0;
   HYPRE_Int              my_id;
   char                   region_name[1024];
   char                   msg[1024];

#if defined(HYPRE_USING_GPU)
   HYPRE_MemoryLocation   memory_location;
   HYPRE_ExecutionPolicy  exec;
#endif

   /* Initialize */
   HYPRE_ANNOTATE_FUNC_BEGIN;
   hypre_GpuProfilingPushRange("MGRCycle");

   comm = hypre_ParCSRMatrixComm(A_array[0]);
   hypre_MPI_Comm_rank(comm, &my_id);

   Not_Finished = 1;
   cycle_type = 1;
   level = 0;

   /***** Main loop ******/
   while (Not_Finished)
   {
      /* Update scratch vector sizes */
      local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level]));
      hypre_ParVectorSetLocalSize(Vtemp, local_size);
      hypre_ParVectorSetLocalSize(Ztemp, local_size);
      hypre_ParVectorSetLocalSize(Utemp, local_size);

      /* Do coarse grid correction solve */
      if (cycle_type == 3)
      {
         /* call coarse grid solver here (default is BoomerAMG) */
         hypre_sprintf(region_name, "%s-%d", "MGR_Level", level);
         hypre_GpuProfilingPushRange(region_name);
         HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);

         coarse_grid_solver_solve(cg_solver, RAP, F_array[level], U_array[level]);
         if (use_default_cgrid_solver)
         {
            HYPRE_Real convergence_factor_cg;
            hypre_BoomerAMGGetRelResidualNorm(cg_solver, &convergence_factor_cg);
            (mgr_data -> cg_convergence_factor) = convergence_factor_cg;
            if ((print_level) > 1 && my_id == 0 && convergence_factor_cg > hypre_cabs(fp_one))
            {
               hypre_printf("Warning!!! Coarse grid solve diverges. Factor = %1.2e\n",
                            convergence_factor_cg);
            }
         }

         /* Error checking */
         if (HYPRE_GetError())
         {
            hypre_sprintf(msg, "[%d]: Error from MGR's coarsest level solver (level %d)\n",
                          my_id, level);
            hypre_error_w_msg(HYPRE_ERROR_GENERIC, msg);
            HYPRE_ClearAllErrors();
         }

         /* DEBUG: print the coarse system indicated by mgr_data->print_coarse_system */
         if (mgr_data -> print_coarse_system)
         {
            hypre_ParCSRMatrixPrintIJ(RAP, 1, 1, "RAP_mat");
            hypre_ParVectorPrintIJ(F_array[level], 1, "RAP_rhs");
            hypre_ParVectorPrintIJ(U_array[level], 1, "RAP_sol");
            mgr_data -> print_coarse_system--;
         }

         /**** cycle up ***/
         cycle_type = 2;

         hypre_GpuProfilingPopRange();
         HYPRE_ANNOTATE_REGION_END("%s", region_name);
      }
      /* Down cycle */
      else if (cycle_type == 1)
      {
         /* Set fine/coarse grid level indices */
         fine_grid       = level;
         coarse_grid     = level + 1;
         l1_norms        = l1_norms_array[fine_grid] ?
                           hypre_VectorData(l1_norms_array[fine_grid]) : NULL;
         CF_marker_data  = hypre_IntArrayData(CF_marker[fine_grid]);

#if defined(HYPRE_USING_GPU)
         memory_location = hypre_ParCSRMatrixMemoryLocation(A_array[fine_grid]);
         exec            = hypre_GetExecPolicy1(memory_location);
#endif

         hypre_sprintf(region_name, "%s-%d", "MGR_Level", fine_grid);
         hypre_GpuProfilingPushRange(region_name);
         HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);

         /* Global pre smoothing sweeps */
         if (pre_smoothing && (level_smooth_iters[fine_grid] > 0))
         {
            hypre_sprintf(region_name, "Global-Relax");
            hypre_GpuProfilingPushRange(region_name);
            HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);

            if ((level_smooth_type[fine_grid]) == 0 ||
                (level_smooth_type[fine_grid]) == 1)
            {
               /* Block Jacobi/Gauss-Seidel smoother */
#if defined(HYPRE_USING_GPU)
               if (exec == HYPRE_EXEC_DEVICE)
               {
                  for (i = 0; i < level_smooth_iters[fine_grid]; i++)
                  {
                     hypre_MGRBlockRelaxSolveDevice(B_array[fine_grid], A_array[fine_grid],
                                                    F_array[fine_grid], U_array[fine_grid],
                                                    Vtemp, fp_one);
                  }
               }
               else
#endif
               {
                  HYPRE_Real *level_diaginv  = (mgr_data -> level_diaginv)[fine_grid];
                  HYPRE_Int   level_blk_size = (level == 0) ? block_size :
                                               block_num_coarse_indexes[level - 1];
                  HYPRE_Int   nrows          = hypre_ParCSRMatrixNumRows(A_array[fine_grid]);
                  HYPRE_Int   n_block        = nrows / level_blk_size;
                  HYPRE_Int   left_size      = nrows - n_block * level_blk_size;
                  for (i = 0; i < level_smooth_iters[fine_grid]; i++)
                  {
                     hypre_MGRBlockRelaxSolve(A_array[fine_grid], F_array[fine_grid],
                                              U_array[fine_grid], level_blk_size,
                                              n_block, left_size, level_smooth_type[fine_grid],
                                              level_diaginv, Vtemp);
                  }
               }
               hypre_ParVectorAllZeros(U_array[fine_grid]) = 0;
            }
            else if ((level_smooth_type[fine_grid] > 1) &&
                     (level_smooth_type[fine_grid] < 7))
            {
               for (i = 0; i < level_smooth_iters[fine_grid]; i ++)
               {
                  hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid], NULL,
                                       level_smooth_type[fine_grid] - 1, 0, fp_one,
                                       fp_zero, NULL, U_array[fine_grid], Vtemp, NULL);
               }
            }
            else if (level_smooth_type[fine_grid] == 8)
            {
               /* Euclid ILU smoother */
               for (i = 0; i < level_smooth_iters[fine_grid]; i++)
               {
                  /* Compute residual */
                  hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
                                                     U_array[fine_grid], fp_one,
                                                     F_array[fine_grid], Vtemp);

                  /* Solve */
                  HYPRE_EuclidSolve((mgr_data -> level_smoother)[fine_grid],
                                    A_array[fine_grid], Vtemp, Utemp);

                  /* Update solution */
                  hypre_ParVectorAxpy(fp_one, Utemp, U_array[fine_grid]);
                  hypre_ParVectorAllZeros(U_array[fine_grid]) = 0;
               }
            }
            else if (level_smooth_type[fine_grid] == 16)
            {
               /* hypre_ILU smoother */
               HYPRE_ILUSolve((mgr_data -> level_smoother)[fine_grid],
                              A_array[fine_grid], F_array[fine_grid],
                              U_array[fine_grid]);
               hypre_ParVectorAllZeros(U_array[fine_grid]) = 0;
            }
            else
            {
               /* Generic relaxation interface */
               for (i = 0; i < level_smooth_iters[fine_grid]; i++)
               {
                  hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid],
                                       NULL, level_smooth_type[fine_grid],
                                       0, fp_one, fp_one, l1_norms,
                                       U_array[fine_grid], Vtemp, Ztemp);
               }
            }

            /* Error checking */
            if (HYPRE_GetError())
            {
               hypre_sprintf(msg, "[%d]: Error from global pre-relaxation %d at level %d \n",
                             my_id, level_smooth_type[fine_grid], fine_grid);
               hypre_error_w_msg(HYPRE_ERROR_GENERIC, msg);
               HYPRE_ClearAllErrors();
            }

            hypre_GpuProfilingPopRange();
            HYPRE_ANNOTATE_REGION_END("%s", region_name);
         } /* End global pre-smoothing */

         /* F-relaxation */
         relax_points = -1;
         hypre_sprintf(region_name, "F-Relax");
         hypre_GpuProfilingPushRange(region_name);
         HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);

         if (Frelax_type[fine_grid] == 0)
         {
            /* (single level) Block-relaxation for A_ff */
            if (interp_type[fine_grid] == 12)
            {
               HYPRE_Int  nrows     = hypre_ParCSRMatrixNumRows(A_ff_array[fine_grid]);
               HYPRE_Int  n_block   = nrows / blk_size[fine_grid];
               HYPRE_Int  left_size = nrows - n_block * blk_size[fine_grid];

               for (i = 0; i < nsweeps[fine_grid]; i++)
               {
                  /* F-relaxation is reducing the global residual, thus recompute it */
                  hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
                                                     U_array[fine_grid], fp_one,
                                                     F_array[fine_grid], Vtemp);

                  /* Restrict to F points */
#if defined(HYPRE_USING_GPU)
                  if (exec == HYPRE_EXEC_DEVICE)
                  {
                     hypre_ParCSRMatrixMatvecT(fp_one, P_FF_array[fine_grid], Vtemp,
                                               fp_zero, F_fine_array[coarse_grid]);
                  }
                  else
#endif
                  {
                     hypre_MGRAddVectorR(CF_marker[fine_grid], FMRK, fp_one, Vtemp,
                                         fp_zero, &(F_fine_array[coarse_grid]));
                  }

                  /* Set initial guess to zero */
                  hypre_ParVectorSetZeros(U_fine_array[coarse_grid]);

#if defined(HYPRE_USING_GPU)
                  if (exec == HYPRE_EXEC_DEVICE)
                  {
                     hypre_MGRBlockRelaxSolveDevice(B_FF_array[fine_grid],
                                                    A_ff_array[fine_grid],
                                                    F_fine_array[fine_grid],
                                                    U_fine_array[fine_grid],
                                                    Vtemp, fp_one);
                  }
                  else
#endif
                  {
                     hypre_MGRBlockRelaxSolve(A_ff_array[fine_grid], F_fine_array[coarse_grid],
                                              U_fine_array[coarse_grid], blk_size[fine_grid],
                                              n_block, left_size, 0, frelax_diaginv[fine_grid],
                                              Vtemp);
                  }

                  /* Interpolate the solution back to the fine grid level */
#if defined(HYPRE_USING_GPU)
                  if (exec == HYPRE_EXEC_DEVICE)
                  {
                     hypre_ParCSRMatrixMatvec(fp_one, P_FF_array[fine_grid],
                                              U_fine_array[coarse_grid], fp_one,
                                              U_fine_array[fine_grid]);
                  }
                  else
#endif
                  {
                     hypre_MGRAddVectorP(CF_marker[fine_grid], FMRK, fp_one,
                                         U_fine_array[coarse_grid], fp_one,
                                         &(U_array[fine_grid]));
                  }
               }
            }
            else
            {
               if (relax_type == 18)
               {
#if defined(HYPRE_USING_GPU)
                  for (i = 0; i < nsweeps[fine_grid]; i++)
                  {
                     hypre_MGRRelaxL1JacobiDevice(A_array[fine_grid], F_array[fine_grid],
                                                  CF_marker_data, relax_points, relax_weight,
                                                  l1_norms, U_array[fine_grid], Vtemp);
                  }
#else
                  for (i = 0; i < nsweeps[fine_grid]; i++)
                  {
                     hypre_ParCSRRelax_L1_Jacobi(A_array[fine_grid], F_array[fine_grid],
                                                 CF_marker_data, relax_points, relax_weight,
                                                 l1_norms, U_array[fine_grid], Vtemp);
                  }
#endif
               }
               else
               {
                  for (i = 0; i < nsweeps[fine_grid]; i++)
                  {
                     hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid],
                                          CF_marker_data, relax_type, relax_points,
                                          relax_weight, omega, l1_norms,
                                          U_array[fine_grid], Vtemp, Ztemp);
                  }
               }
            }
         }
         else if (Frelax_type[fine_grid] == 1)
         {
            /* V-cycle smoother for A_ff */
            //HYPRE_Real convergence_factor_frelax;
            // compute residual before solve
            // hypre_ParCSRMatrixMatvecOutOfPlace(-fp_one, A_array[fine_grid],
            //                                    U_array[fine_grid], fp_one,
            //                                    F_array[fine_grid], Vtemp);
            //  convergence_factor_frelax = hypre_ParVectorInnerProd(Vtemp, Vtemp);

            HYPRE_Real resnorm, init_resnorm;
            HYPRE_Real rhs_norm, old_resnorm;
            HYPRE_Real rel_resnorm = fp_one;
            HYPRE_Real conv_factor = fp_one;
            if (frelax_print_level > 1)
            {
               hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
                                                  U_array[fine_grid], fp_one,
                                                  F_array[fine_grid], Vtemp);

               resnorm = hypre_sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
               init_resnorm = resnorm;
               rhs_norm = hypre_sqrt(hypre_ParVectorInnerProd(F_array[fine_grid], F_array[fine_grid]));

               if (rhs_norm > HYPRE_REAL_EPSILON)
               {
                  rel_resnorm = init_resnorm / rhs_norm;
               }
               else
               {
                  /* rhs is zero, return a zero solution */
                  hypre_ParVectorSetZeros(U_array[0]);

                  HYPRE_ANNOTATE_FUNC_END;
                  hypre_GpuProfilingPopRange();

                  return hypre_error_flag;
               }
               if (my_id == 0 && frelax_print_level > 1)
               {
                  hypre_printf("\nBegin F-relaxation: V-Cycle Smoother \n");
                  hypre_printf("                                            relative\n");
                  hypre_printf("               residual        factor       residual\n");
                  hypre_printf("               --------        ------       --------\n");
                  hypre_printf("    Initial    %e                 %e\n", init_resnorm,
                               rel_resnorm);
               }
            }

            for (i = 0; i < nsweeps[fine_grid]; i++)
            {
               hypre_MGRFrelaxVcycle(FrelaxVcycleData[fine_grid],
                                     F_array[fine_grid],
                                     U_array[fine_grid]);

               if (frelax_print_level > 1)
               {
                  old_resnorm = resnorm;
                  hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
                                                     U_array[fine_grid], fp_one,
                                                     F_array[fine_grid], Vtemp);
                  resnorm = hypre_sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
                  conv_factor = (old_resnorm > HYPRE_REAL_EPSILON) ?
                                (resnorm / old_resnorm) : resnorm;
                  rel_resnorm = (rhs_norm > HYPRE_REAL_EPSILON) ? (resnorm / rhs_norm) : resnorm;

                  if (my_id == 0)
                  {
                     hypre_printf("\n    V-Cycle %2d   %e    %f     %e \n", i,
                                  resnorm, conv_factor, rel_resnorm);
                  }
               }
            }
            if (my_id == 0 && frelax_print_level > 1)
            {
               hypre_printf("End F-relaxation: V-Cycle Smoother \n\n");
            }
            // compute residual after solve
            //hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
            //                                   U_array[fine_grid], fp_one,
            //                                   F_array[fine_grid], Vtemp);
            //convergence_factor_frelax = hypre_ParVectorInnerProd(Vtemp, Vtemp)/convergence_factor_frelax;
            //hypre_printf("F-relaxation V-cycle convergence factor: %5f\n", convergence_factor_frelax);
         }
         else if (Frelax_type[level] == 2  ||
                  Frelax_type[level] == 9  ||
                  Frelax_type[level] == 99 ||
                  Frelax_type[level] == 199)
         {
            /* We need to compute the residual first to ensure that
               F-relaxation is reducing the global residual */
            hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
                                               U_array[fine_grid], fp_one,
                                               F_array[fine_grid], Vtemp);

            /* Restrict to F points */
#if defined (HYPRE_USING_GPU)
            hypre_ParCSRMatrixMatvecT(fp_one, P_FF_array[fine_grid], Vtemp,
                                      fp_zero, F_fine_array[coarse_grid]);
#else
            hypre_MGRAddVectorR(CF_marker[fine_grid], FMRK, fp_one, Vtemp,
                                fp_zero, &(F_fine_array[coarse_grid]));
#endif

            /* Set initial guess to zeros */
            hypre_ParVectorSetZeros(U_fine_array[coarse_grid]);

            if (Frelax_type[level] == 2)
            {
               /* Do F-relaxation using AMG */
               fine_grid_solver_solve((mgr_data -> aff_solver)[fine_grid],
                                      A_ff_array[fine_grid],
                                      F_fine_array[coarse_grid],
                                      U_fine_array[coarse_grid]);
            }
            else
            {
               /* Do F-relaxation using Gaussian Elimination */
               hypre_GaussElimSolve((mgr_data -> GSElimData)[fine_grid],
                                    level, Frelax_type[level]);
            }

            /* Interpolate the solution back to the fine grid level */
#if defined (HYPRE_USING_GPU)
            hypre_ParCSRMatrixMatvec(fp_one, P_FF_array[fine_grid],
                                     U_fine_array[coarse_grid], fp_one,
                                     U_array[fine_grid]);
#else
            hypre_MGRAddVectorP(CF_marker[fine_grid], FMRK, fp_one,
                                U_fine_array[coarse_grid], fp_one,
                                &(U_array[fine_grid]));
#endif
         }
         else
         {
            for (i = 0; i < nsweeps[fine_grid]; i++)
            {
               hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid],
                                    CF_marker_data, Frelax_type[fine_grid],
                                    relax_points, relax_weight, omega, l1_norms,
                                    U_array[fine_grid], Vtemp, Ztemp);
            }
         }

         /* Error checking */
         if (HYPRE_GetError())
         {
            hypre_sprintf(msg, "[%d]: Error from F-relaxation %d at MGR level %d\n",
                          my_id, Frelax_type[fine_grid], fine_grid);
            hypre_error_w_msg(HYPRE_ERROR_GENERIC, msg);
            HYPRE_ClearAllErrors();
         }

         hypre_GpuProfilingPopRange();
         HYPRE_ANNOTATE_REGION_END("%s", region_name);

         /* Update residual and compute coarse-grid rhs */
         hypre_sprintf(region_name, "Residual");
         hypre_GpuProfilingPushRange(region_name);
         HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);

         hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
                                            U_array[fine_grid], fp_one,
                                            F_array[fine_grid], Vtemp);

         hypre_GpuProfilingPopRange();
         HYPRE_ANNOTATE_REGION_END("%s", region_name);

         if ((restrict_type[fine_grid] == 4) ||
             (restrict_type[fine_grid] == 5))
         {
            use_air = 1;
         }

         hypre_sprintf(region_name, "Restrict");
         hypre_GpuProfilingPushRange(region_name);
         HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);
         if (use_air)
         {
            /* no transpose necessary for R */
            hypre_ParCSRMatrixMatvec(fp_one, RT_array[fine_grid], Vtemp,
                                     fp_zero, F_array[coarse_grid]);
         }
         else
         {
#if defined(HYPRE_USING_GPU)
            if (restrict_type[fine_grid] > 0 || (exec == HYPRE_EXEC_DEVICE))
#else
            if (restrict_type[fine_grid] > 0)
#endif
            {
               hypre_ParCSRMatrixMatvecT(fp_one, RT_array[fine_grid], Vtemp,
                                         fp_zero, F_array[coarse_grid]);
            }
            else
            {
               hypre_MGRAddVectorR(CF_marker[fine_grid], CMRK, fp_one,
                                   Vtemp, fp_zero, &(F_array[coarse_grid]));
            }
         }
         hypre_GpuProfilingPopRange();
         HYPRE_ANNOTATE_REGION_END("%s", region_name);

         hypre_sprintf(region_name, "%s-%d", "MGR_Level", fine_grid);
         hypre_GpuProfilingPopRange();
         HYPRE_ANNOTATE_REGION_END("%s", region_name);

         /* Initialize coarse grid solution array (VPM: double-check this for multiple cycles)*/
         hypre_ParVectorSetZeros(U_array[coarse_grid]);

         ++level;
         if (level == num_coarse_levels)
         {
            cycle_type = 3;
         }
      }
      /* Up cycle */
      else if (level != 0)
      {
         /* Set fine/coarse grid level indices */
         fine_grid       = level - 1;
         coarse_grid     = level;
         l1_norms        = l1_norms_array[fine_grid] ?
                           hypre_VectorData(l1_norms_array[fine_grid]) : NULL;
         CF_marker_data  = hypre_IntArrayData(CF_marker[fine_grid]);

#if defined(HYPRE_USING_GPU)
         memory_location = hypre_ParCSRMatrixMemoryLocation(A_array[fine_grid]);
         exec            = hypre_GetExecPolicy1(memory_location);
#endif

         hypre_sprintf(region_name, "%s-%d", "MGR_Level", fine_grid);
         hypre_GpuProfilingPushRange(region_name);
         HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);

         /* Interpolate */
         hypre_sprintf(region_name, "Prolongate");
         hypre_GpuProfilingPushRange(region_name);
         HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);

#if defined(HYPRE_USING_GPU)
         if (interp_type[fine_grid] > 0 || (exec == HYPRE_EXEC_DEVICE))
#else
         if (interp_type[fine_grid] > 0)
#endif
         {
            hypre_ParCSRMatrixMatvec(fp_one, P_array[fine_grid],
                                     U_array[coarse_grid],
                                     fp_one, U_array[fine_grid]);
         }
         else
         {
            hypre_MGRAddVectorP(CF_marker[fine_grid], CMRK, fp_one,
                                U_array[coarse_grid], fp_one,
                                &(U_array[fine_grid]));
         }

         hypre_GpuProfilingPopRange();
         HYPRE_ANNOTATE_REGION_END("%s", region_name);

         /* Global post smoothing sweeps */
         if (post_smoothing & (level_smooth_iters[fine_grid] > 0))
         {
            hypre_sprintf(region_name, "Global-Relax");
            hypre_GpuProfilingPushRange(region_name);
            HYPRE_ANNOTATE_REGION_BEGIN("%s", region_name);

            /* Block Jacobi smoother */
            if ((level_smooth_type[fine_grid] == 0) ||
                (level_smooth_type[fine_grid] == 1))
            {
#if defined(HYPRE_USING_GPU)
               if (exec == HYPRE_EXEC_DEVICE)
               {
                  for (i = 0; i < level_smooth_iters[fine_grid]; i++)
                  {
                     hypre_MGRBlockRelaxSolveDevice(B_array[fine_grid], A_array[fine_grid],
                                                    F_array[fine_grid], U_array[fine_grid],
                                                    Vtemp, fp_one);
                  }
               }
               else
#endif
               {
                  HYPRE_Real *level_diaginv  = (mgr_data -> level_diaginv)[fine_grid];
                  HYPRE_Int   level_blk_size = (fine_grid == 0) ? block_size :
                                               block_num_coarse_indexes[fine_grid - 1];
                  HYPRE_Int   nrows          = hypre_ParCSRMatrixNumRows(A_array[fine_grid]);
                  HYPRE_Int   n_block        = nrows / level_blk_size;
                  HYPRE_Int   left_size      = nrows - n_block * level_blk_size;
                  for (i = 0; i < level_smooth_iters[fine_grid]; i++)
                  {
                     hypre_MGRBlockRelaxSolve(A_array[fine_grid], F_array[fine_grid],
                                              U_array[fine_grid], level_blk_size, n_block,
                                              left_size, level_smooth_type[fine_grid],
                                              level_diaginv, Vtemp);
                  }
               }
            }
            else if ((level_smooth_type[fine_grid] > 1) && (level_smooth_type[fine_grid] < 7))
            {
               for (i = 0; i < level_smooth_iters[fine_grid]; i++)
               {
                  hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid], NULL,
                                       level_smooth_type[fine_grid] - 1, 0, fp_one,
                                       fp_zero, l1_norms, U_array[fine_grid], Vtemp, NULL);
               }
            }
            else if (level_smooth_type[fine_grid] == 8)
            {
               /* Euclid ILU */
               for (i = 0; i < level_smooth_iters[fine_grid]; i++)
               {
                  /* Compute residual */
                  hypre_ParCSRMatrixMatvecOutOfPlace(fp_neg_one, A_array[fine_grid],
                                                     U_array[fine_grid], fp_one,
                                                     F_array[fine_grid], Vtemp);
                  /* Solve */
                  HYPRE_EuclidSolve((mgr_data -> level_smoother)[fine_grid],
                                    A_array[fine_grid], Vtemp, Utemp);

                  /* Update solution */
                  hypre_ParVectorAxpy(fp_one, Utemp, U_array[fine_grid]);
               }
            }
            else if (level_smooth_type[fine_grid] == 16)
            {
               /* HYPRE ILU */
               HYPRE_ILUSolve((mgr_data -> level_smoother)[fine_grid],
                              A_array[fine_grid], F_array[fine_grid],
                              U_array[fine_grid]);
            }
            else
            {
               /* Generic relaxation interface */
               for (i = 0; i < level_smooth_iters[level]; i++)
               {
                  hypre_BoomerAMGRelax(A_array[fine_grid], F_array[fine_grid],
                                       NULL, level_smooth_type[fine_grid], 0,
                                       fp_one, fp_one, l1_norms,
                                       U_array[fine_grid], Vtemp, Ztemp);
               }
            }

            /* Error checking */
            if (HYPRE_GetError())
            {
               hypre_sprintf(msg, "[%d]: Error from global post-relaxation %d at MGR level %d\n",
                             my_id, level_smooth_type[fine_grid], fine_grid);
               hypre_error_w_msg(HYPRE_ERROR_GENERIC, msg);
               HYPRE_ClearAllErrors();
            }

            hypre_GpuProfilingPopRange();
            HYPRE_ANNOTATE_REGION_END("%s", region_name);
         } /* End post-smoothing */

         hypre_sprintf(region_name, "%s-%d", "MGR_Level", fine_grid);
         hypre_GpuProfilingPopRange();
         HYPRE_ANNOTATE_REGION_END("%s", region_name);

         --level;
      } /* End interpolate */
      else
      {
         Not_Finished = 0;
      }
   }
   HYPRE_ANNOTATE_FUNC_END;
   hypre_GpuProfilingPopRange();

   return hypre_error_flag;
}