hypre/parcsr_ls/ams.c

/*BHEADER**********************************************************************
 * Copyright (c) 2008,  Lawrence Livermore National Security, LLC.
 * Produced at the Lawrence Livermore National Laboratory.
 * This file is part of HYPRE.  See file COPYRIGHT for details.
 *
 * HYPRE is free software; you can redistribute it and/or modify it under the
 * terms of the GNU Lesser General Public License (as published by the Free
 * Software Foundation) version 2.1 dated February 1999.
 *
 * $Revision$
 ***********************************************************************EHEADER*/


#include "_hypre_parcsr_ls.h"
#include "float.h"
#include "ams.h"

/*--------------------------------------------------------------------------
 * hypre_ParCSRRelax
 *
 * Relaxation on the ParCSR matrix A with right-hand side f and
 * initial guess u. Possible values for relax_type are:
 *
 * 1 = l1-scaled Jacobi
 * 2 = l1-scaled block Gauss-Seidel/SSOR
 * 3 = Kaczmarz
 * 4 = truncated version of 2 (Remark 6.2 in smoothers paper)
 * x = BoomerAMG relaxation with relax_type = |x|
 * (16 = Cheby)
 *
 * The default value of relax_type is 2.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_ParCSRRelax(/* matrix to relax with */
                            hypre_ParCSRMatrix *A,
                            /* right-hand side */
                            hypre_ParVector *f,
                            /* relaxation type */
                            HYPRE_Int relax_type,
                            /* number of sweeps */
                            HYPRE_Int relax_times,
                            /* l1 norms of the rows of A */
                            double *l1_norms,
                            /* damping coefficient (usually <= 1) */
                            double relax_weight,
                            /* SOR parameter (usually in (0,2) */
                            double omega,
                            /* for cheby smoothers */
                            double max_eig_est,
                            double min_eig_est,
                            HYPRE_Int cheby_order,
                            double cheby_fraction,
                            /* initial/updated approximation */
                            hypre_ParVector *u,
                            /* temporary vector */
                            hypre_ParVector *v,
                            /* temporary vector */
                            hypre_ParVector *z)
{
   HYPRE_Int sweep;

   double *u_data = hypre_VectorData(hypre_ParVectorLocalVector(u));
   double *f_data = hypre_VectorData(hypre_ParVectorLocalVector(f));
   double *v_data = hypre_VectorData(hypre_ParVectorLocalVector(v));

   for (sweep = 0; sweep < relax_times; sweep++)
   {
      if (relax_type == 1) /* l1-scaled Jacobi */
      {
         HYPRE_Int i, num_rows = hypre_ParCSRMatrixNumRows(A);

         hypre_ParVectorCopy(f,v);
         hypre_ParCSRMatrixMatvec(-relax_weight, A, u, relax_weight, v);

         /* u += w D^{-1}(f - A u), where D_ii = ||A(i,:)||_1 */
         for (i = 0; i < num_rows; i++)
            u_data[i] += v_data[i] / l1_norms[i];
      }
      else if (relax_type == 2 || relax_type == 4) /* offd-l1-scaled block GS */
      {
         hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
         double *A_diag_data = hypre_CSRMatrixData(A_diag);
         HYPRE_Int *A_diag_I = hypre_CSRMatrixI(A_diag);
         HYPRE_Int *A_diag_J = hypre_CSRMatrixJ(A_diag);

         hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
         HYPRE_Int *A_offd_I = hypre_CSRMatrixI(A_offd);
         HYPRE_Int *A_offd_J = hypre_CSRMatrixJ(A_offd);
         double *A_offd_data = hypre_CSRMatrixData(A_offd);

         HYPRE_Int i, j;
         HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
         HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
         double *u_offd_data = hypre_TAlloc(double,num_cols_offd);

         double res;

         HYPRE_Int num_procs;
         hypre_MPI_Comm_size(hypre_ParCSRMatrixComm(A), &num_procs);

         /* Copy off-diagonal values of u to the current processor */
         if (num_procs > 1)
         {
            hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
            HYPRE_Int num_sends;
            double *u_buf_data;
            hypre_ParCSRCommHandle *comm_handle;

            HYPRE_Int index = 0, start;

            if (!comm_pkg)
            {
               hypre_MatvecCommPkgCreate(A);
               comm_pkg = hypre_ParCSRMatrixCommPkg(A);
            }

            num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
            u_buf_data = hypre_TAlloc(double,
                                      hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));

            for (i = 0; i < num_sends; i++)
            {
               start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
               for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
                  u_buf_data[index++] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
            }
            comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,u_buf_data,u_offd_data);
            hypre_ParCSRCommHandleDestroy(comm_handle);

            hypre_TFree(u_buf_data);
         }

         if (relax_weight == 1.0 && omega == 1.0) /* symmetric Gauss-Seidel */
         {
            /* Forward local pass */
            for (i = 0; i < num_rows; i++)
            {
               res = f_data[i];
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
                  res -= A_diag_data[j] * u_data[A_diag_J[j]];
               if (num_cols_offd)
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     res -= A_offd_data[j] * u_offd_data[A_offd_J[j]];
               u_data[i] += res / l1_norms[i];
            }
            /* Backward local pass */
            for (i = num_rows-1; i > -1; i--)
            {
               res = f_data[i];
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
                  res -= A_diag_data[j] * u_data[A_diag_J[j]];
               if (num_cols_offd)
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     res -= A_offd_data[j] * u_offd_data[A_offd_J[j]];
               u_data[i] += res / l1_norms[i];
            }
         }
         else if (relax_weight == 1.0) /* SSOR */
         {
            /* Forward local pass */
            for (i = 0; i < num_rows; i++)
            {
               res = f_data[i];
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
                  res -= A_diag_data[j] * u_data[A_diag_J[j]];
               if (num_cols_offd)
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     res -= A_offd_data[j] * u_offd_data[A_offd_J[j]];
               u_data[i] += omega * res / l1_norms[i];
            }
            /* Backward local pass */
            for (i = num_rows-1; i > -1; i--)
            {
               res = f_data[i];
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
                  res -= A_diag_data[j] * u_data[A_diag_J[j]];
               if (num_cols_offd)
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     res -= A_offd_data[j] * u_offd_data[A_offd_J[j]];
               u_data[i] += omega * res / l1_norms[i];
            }
         }
         else /* scaled SSOR */
         {
            double dif;
            double c1 = omega * relax_weight;
            double c2 = omega * (1.0 - relax_weight);

            /* Forward local pass (save initial guess in v_data) */
            for (i = 0; i < num_rows; i++)
            {
               dif = 0.0;
               v_data[i] = u_data[i];
               res = f_data[i];
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               {
                  res -= A_diag_data[j] * u_data[A_diag_J[j]];
                  if (A_diag_J[j] < i)
                     dif += A_diag_data[j] * (v_data[A_diag_J[j]] - u_data[A_diag_J[j]]);
               }
               if (num_cols_offd)
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     res -= A_offd_data[j] * u_offd_data[A_offd_J[j]];
               u_data[i] += (c1 * res + c2 * dif) / l1_norms[i];
            }
            /* Backward local pass */
            for (i = num_rows-1; i > -1; i--)
            {
               dif = 0.0;
               res = f_data[i];
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               {
                  res -= A_diag_data[j] * u_data[A_diag_J[j]];
                  if (A_diag_J[j] > i)
                     dif += A_diag_data[j] * (v_data[A_diag_J[j]] - u_data[A_diag_J[j]]);
               }
               if (num_cols_offd)
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     res -= A_offd_data[j] * u_offd_data[A_offd_J[j]];
               u_data[i] += (c1 * res + c2 * dif) / l1_norms[i];
            }
         }

         hypre_TFree(u_offd_data);
      }
      else if (relax_type == 3) /* Kaczmarz */
      {
         hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
         double *A_diag_data = hypre_CSRMatrixData(A_diag);
         HYPRE_Int *A_diag_I = hypre_CSRMatrixI(A_diag);
         HYPRE_Int *A_diag_J = hypre_CSRMatrixJ(A_diag);

         hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
         HYPRE_Int *A_offd_I = hypre_CSRMatrixI(A_offd);
         HYPRE_Int *A_offd_J = hypre_CSRMatrixJ(A_offd);
         double *A_offd_data = hypre_CSRMatrixData(A_offd);

         HYPRE_Int i, j;
         HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
         HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
         double *u_offd_data = hypre_TAlloc(double,num_cols_offd);

         double res;

         HYPRE_Int num_procs;
         hypre_MPI_Comm_size(hypre_ParCSRMatrixComm(A), &num_procs);

         /* Copy off-diagonal values of u to the current processor */
         if (num_procs > 1)
         {
            hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
            HYPRE_Int num_sends;
            double *u_buf_data;
            hypre_ParCSRCommHandle *comm_handle;

            HYPRE_Int index = 0, start;

            if (!comm_pkg)
            {
               hypre_MatvecCommPkgCreate(A);
               comm_pkg = hypre_ParCSRMatrixCommPkg(A);
            }

            num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
            u_buf_data = hypre_TAlloc(double,
                                      hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));

            for (i = 0; i < num_sends; i++)
            {
               start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
               for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
                  u_buf_data[index++] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
            }
            comm_handle = hypre_ParCSRCommHandleCreate(1,comm_pkg,u_buf_data,u_offd_data);
            hypre_ParCSRCommHandleDestroy(comm_handle);

            hypre_TFree(u_buf_data);
         }

         /* Forward local pass */
         for (i = 0; i < num_rows; i++)
         {
            res = f_data[i];
            for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               res -= A_diag_data[j] * u_data[A_diag_J[j]];
            if (num_cols_offd)
               for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                  res -= A_offd_data[j] * u_offd_data[A_offd_J[j]];
            res /= l1_norms[i];
            for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               u_data[A_diag_J[j]] += omega * res * A_diag_data[j];
         }

         /* Backward local pass */
         for (i = num_rows-1; i > -1; i--)
         {
            res = f_data[i];
            for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               res -= A_diag_data[j] * u_data[A_diag_J[j]];
            if (num_cols_offd)
               for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                  res -= A_offd_data[j] * u_offd_data[A_offd_J[j]];
            res /= l1_norms[i];
            for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               u_data[A_diag_J[j]] += omega * res * A_diag_data[j];
         }

         hypre_TFree(u_offd_data);
      }
      else /* call BoomerAMG relaxation */
      {
         if (relax_type == 16)
         {
            hypre_ParCSRRelax_Cheby(A,
                                    f,
                                    max_eig_est,
                                    min_eig_est,
                                    cheby_fraction, cheby_order, 1,
                                    0, u, v, z);


         }
         else
            hypre_BoomerAMGRelax(A, f, NULL, abs(relax_type), 0, relax_weight,
                                 omega, l1_norms, u, v, z);
      }
   }
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_ParVectorInRangeOf
 *
 * Return a vector that belongs to the range of a given matrix.
 *--------------------------------------------------------------------------*/

hypre_ParVector *hypre_ParVectorInRangeOf(hypre_ParCSRMatrix *A)
{
   hypre_ParVector *x;

   x = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
                             hypre_ParCSRMatrixGlobalNumRows(A),
                             hypre_ParCSRMatrixRowStarts(A));
   hypre_ParVectorInitialize(x);
   hypre_ParVectorOwnsData(x) = 1;
   hypre_ParVectorOwnsPartitioning(x) = 0;

   return x;
}

/*--------------------------------------------------------------------------
 * hypre_ParVectorInDomainOf
 *
 * Return a vector that belongs to the domain of a given matrix.
 *--------------------------------------------------------------------------*/

hypre_ParVector *hypre_ParVectorInDomainOf(hypre_ParCSRMatrix *A)
{
   hypre_ParVector *x;

   x = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
                             hypre_ParCSRMatrixGlobalNumCols(A),
                             hypre_ParCSRMatrixColStarts(A));
   hypre_ParVectorInitialize(x);
   hypre_ParVectorOwnsData(x) = 1;
   hypre_ParVectorOwnsPartitioning(x) = 0;

   return x;
}

/*--------------------------------------------------------------------------
 * hypre_ParVectorBlockSplit
 *
 * Extract the dim sub-vectors x_0,...,x_{dim-1} composing a parallel
 * block vector x. It is assumed that &x[i] = [x_0[i],...,x_{dim-1}[i]].
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_ParVectorBlockSplit(hypre_ParVector *x,
                                    hypre_ParVector *x_[3],
                                    HYPRE_Int dim)
{
   HYPRE_Int i, d, size_;
   double *x_data, *x_data_[3];

   size_ = hypre_VectorSize(hypre_ParVectorLocalVector(x_[0]));

   x_data = hypre_VectorData(hypre_ParVectorLocalVector(x));
   for (d = 0; d < dim; d++)
      x_data_[d] = hypre_VectorData(hypre_ParVectorLocalVector(x_[d]));

   for (i = 0; i < size_; i++)
      for (d = 0; d < dim; d++)
         x_data_[d][i] = x_data[dim*i+d];

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_ParVectorBlockGather
 *
 * Compose a parallel block vector x from dim given sub-vectors
 * x_0,...,x_{dim-1}, such that &x[i] = [x_0[i],...,x_{dim-1}[i]].
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_ParVectorBlockGather(hypre_ParVector *x,
                                     hypre_ParVector *x_[3],
                                     HYPRE_Int dim)
{
   HYPRE_Int i, d, size_;
   double *x_data, *x_data_[3];

   size_ = hypre_VectorSize(hypre_ParVectorLocalVector(x_[0]));

   x_data = hypre_VectorData(hypre_ParVectorLocalVector(x));
   for (d = 0; d < dim; d++)
      x_data_[d] = hypre_VectorData(hypre_ParVectorLocalVector(x_[d]));

   for (i = 0; i < size_; i++)
      for (d = 0; d < dim; d++)
         x_data[dim*i+d] = x_data_[d][i];

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_BoomerAMGBlockSolve
 *
 * Apply the block-diagonal solver diag(B) to the system diag(A) x = b.
 * Here B is a given BoomerAMG solver for A, while x and b are "block"
 * parallel vectors.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_BoomerAMGBlockSolve(void *B,
                                    hypre_ParCSRMatrix *A,
                                    hypre_ParVector *b,
                                    hypre_ParVector *x)
{
   HYPRE_Int d, dim;

   hypre_ParVector *b_[3];
   hypre_ParVector *x_[3];

   dim = hypre_ParVectorGlobalSize(x) / hypre_ParCSRMatrixGlobalNumRows(A);

   if (dim == 1)
   {
      hypre_BoomerAMGSolve(B, A, b, x);
      return hypre_error_flag;
   }

   for (d = 0; d < dim; d++)
   {
      b_[d] = hypre_ParVectorInRangeOf(A);
      x_[d] = hypre_ParVectorInRangeOf(A);
   }

   hypre_ParVectorBlockSplit(b, b_, dim);
   hypre_ParVectorBlockSplit(x, x_, dim);

   for (d = 0; d < dim; d++)
      hypre_BoomerAMGSolve(B, A, b_[d], x_[d]);

   hypre_ParVectorBlockGather(x, x_, dim);

   for (d = 0; d < dim; d++)
   {
      hypre_ParVectorDestroy(b_[d]);
      hypre_ParVectorDestroy(x_[d]);
   }

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_ParCSRMatrixFixZeroRows
 *
 * For every zero row in the matrix: set the diagonal element to 1.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_ParCSRMatrixFixZeroRows(hypre_ParCSRMatrix *A)
{
   HYPRE_Int i, j;
   double l1_norm;
   HYPRE_Int num_rows = hypre_ParCSRMatrixNumRows(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   HYPRE_Int *A_diag_I = hypre_CSRMatrixI(A_diag);
   HYPRE_Int *A_diag_J = hypre_CSRMatrixJ(A_diag);
   double *A_diag_data = hypre_CSRMatrixData(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int *A_offd_I = hypre_CSRMatrixI(A_offd);
   double *A_offd_data = hypre_CSRMatrixData(A_offd);
   HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);

   /* a row will be considered zero if its l1 norm is less than eps */
   double eps = DBL_EPSILON * 1e+4;

   for (i = 0; i < num_rows; i++)
   {
      l1_norm = 0.0;
      for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
         l1_norm += fabs(A_diag_data[j]);
      if (num_cols_offd)
         for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
            l1_norm += fabs(A_offd_data[j]);

      if (l1_norm < eps)
      {
         for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
            if (A_diag_J[j] == i)
               A_diag_data[j] = 1.0;
            else
               A_diag_data[j] = 0.0;
         if (num_cols_offd)
            for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
               A_offd_data[j] = 0.0;
      }
   }

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_ParCSRComputeL1Norms
 *
 * Compute the l1 norms of the rows of a given matrix, depending on
 * the option parameter:
 *
 * option 1 = Compute the l1 norm of the rows
 * option 2 = Compute the l1 norm of the (processor) off-diagonal
 *            part of the rows plus the diagonal of A
 * option 3 = Compute the l2 norm^2 of the rows
 * option 4 = Truncated version of option 2 based on Remark 6.2 in "Multigrid
 *            Smoothers for Ultra-Parallel Computing"
 *
 * The above computations are done in a CF manner, whenever the provided
 * cf_marker is not NULL.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A,
                                     HYPRE_Int option,
                                     HYPRE_Int *cf_marker,
                                     double **l1_norm_ptr)
{
   HYPRE_Int i, j;
   HYPRE_Int num_rows = hypre_ParCSRMatrixNumRows(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   HYPRE_Int *A_diag_I = hypre_CSRMatrixI(A_diag);
   HYPRE_Int *A_diag_J = hypre_CSRMatrixJ(A_diag);
   double *A_diag_data = hypre_CSRMatrixData(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int *A_offd_I = hypre_CSRMatrixI(A_offd);
   HYPRE_Int *A_offd_J = hypre_CSRMatrixJ(A_offd);
   double *A_offd_data = hypre_CSRMatrixData(A_offd);
   HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);

   double diag;
   double *l1_norm = hypre_TAlloc(double, num_rows);

   HYPRE_Int *cf_marker_offd = NULL;
   HYPRE_Int cf_diag;

   /* collect the cf marker data from other procs */
   if (cf_marker != NULL)
   {
      HYPRE_Int index;
      HYPRE_Int num_sends;
      HYPRE_Int start;
      HYPRE_Int *int_buf_data = NULL;

      hypre_ParCSRCommPkg  *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
      hypre_ParCSRCommHandle *comm_handle;

      if (num_cols_offd)
         cf_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      if (hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends))
         int_buf_data = hypre_CTAlloc(HYPRE_Int,
                                      hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
         {
            int_buf_data[index++] = cf_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
         }
      }
      comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
                                                 cf_marker_offd);
      hypre_ParCSRCommHandleDestroy(comm_handle);
      hypre_TFree(int_buf_data);
   }

   if (option == 1)
   {
      for (i = 0; i < num_rows; i++)
      {
         l1_norm[i] = 0.0;
         if (cf_marker == NULL)
         {
            /* Add the l1 norm of the diag part of the ith row */
            for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               l1_norm[i] += fabs(A_diag_data[j]);
            /* Add the l1 norm of the offd part of the ith row */
            if (num_cols_offd)
            {
               for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                  l1_norm[i] += fabs(A_offd_data[j]);
            }
         }
         else
         {
            cf_diag = cf_marker[i];
            /* Add the CF l1 norm of the diag part of the ith row */
            for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               if (cf_diag == cf_marker[A_diag_J[j]])
                  l1_norm[i] += fabs(A_diag_data[j]);
            /* Add the CF l1 norm of the offd part of the ith row */
            if (num_cols_offd)
            {
               for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                  if (cf_diag == cf_marker_offd[A_offd_J[j]])
                     l1_norm[i] += fabs(A_offd_data[j]);
            }
         }
      }
   }
   else if (option == 2)
   {
      for (i = 0; i < num_rows; i++)
      {
         /* Add the diag element of the ith row */
         l1_norm[i] = fabs(A_diag_data[A_diag_I[i]]);
         if (cf_marker == NULL)
         {
            /* Add the l1 norm of the offd part of the ith row */
            if (num_cols_offd)
            {
               for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                  l1_norm[i] += fabs(A_offd_data[j]);
            }
         }
         else
         {
            cf_diag = cf_marker[i];
            /* Add the CF l1 norm of the offd part of the ith row */
            if (num_cols_offd)
            {
               for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                  if (cf_diag == cf_marker_offd[A_offd_J[j]])
                     l1_norm[i] += fabs(A_offd_data[j]);
            }
         }
      }
   }
   else if (option == 3)
   {
      for (i = 0; i < num_rows; i++)
      {
         l1_norm[i] = 0.0;
         for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
            l1_norm[i] += A_diag_data[j] * A_diag_data[j];
         if (num_cols_offd)
            for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
               l1_norm[i] += A_offd_data[j] * A_offd_data[j];
      }
   }
   else if (option == 4)
   {
      for (i = 0; i < num_rows; i++)
      {
         /* Add the diag element of the ith row */
         diag = l1_norm[i] = fabs(A_diag_data[A_diag_I[i]]);
         if (cf_marker == NULL)
         {
            /* Add the scaled l1 norm of the offd part of the ith row */
            if (num_cols_offd)
            {
               for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                  l1_norm[i] += 0.5*fabs(A_offd_data[j]);
            }
         }
         else
         {
            cf_diag = cf_marker[i];
            /* Add the scaled CF l1 norm of the offd part of the ith row */
            if (num_cols_offd)
            {
               for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                  if (cf_diag == cf_marker_offd[A_offd_J[j]])
                     l1_norm[i] += 0.5*fabs(A_offd_data[j]);
            }
         }

         /* Truncate according to Remark 6.2 */
         if (l1_norm[i] <= 4.0/3.0*diag)
            l1_norm[i] = diag;
      }
   }

   /* Handle negative definite matrices */
   for (i = 0; i < num_rows; i++)
      if (A_diag_data[A_diag_I[i]] < 0)
         l1_norm[i] = -l1_norm[i];

   for (i = 0; i < num_rows; i++)
      if (fabs(l1_norm[i]) < DBL_EPSILON)
      {
         hypre_error_in_arg(1);
         break;
      }

   hypre_TFree(cf_marker_offd);

   *l1_norm_ptr = l1_norm;

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_ParCSRMatrixSetDiagRows
 *
 * For every row containing only a diagonal element: set it to d.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_ParCSRMatrixSetDiagRows(hypre_ParCSRMatrix *A, double d)
{
   HYPRE_Int i, j;
   HYPRE_Int num_rows = hypre_ParCSRMatrixNumRows(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   HYPRE_Int *A_diag_I = hypre_CSRMatrixI(A_diag);
   HYPRE_Int *A_diag_J = hypre_CSRMatrixJ(A_diag);
   double *A_diag_data = hypre_CSRMatrixData(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int *A_offd_I = hypre_CSRMatrixI(A_offd);
   HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);

   for (i = 0; i < num_rows; i++)
   {
      j = A_diag_I[i];
      if ((A_diag_I[i+1] == j+1) && (A_diag_J[j] == i) &&
          (!num_cols_offd || (A_offd_I[i+1] == A_offd_I[i])))
      {
         A_diag_data[j] = d;
      }
   }

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSCreate
 *
 * Allocate the AMS solver structure.
 *--------------------------------------------------------------------------*/

void * hypre_AMSCreate()
{
   hypre_AMSData *ams_data;

   ams_data = hypre_CTAlloc(hypre_AMSData, 1);

   /* Default parameters */

   ams_data -> dim = 3;                /* 3D problem */
   ams_data -> maxit = 20;             /* perform at most 20 iterations */
   ams_data -> tol = 1e-6;             /* convergence tolerance */
   ams_data -> print_level = 1;        /* print residual norm at each step */
   ams_data -> cycle_type = 1;         /* a 3-level multiplicative solver */
   ams_data -> A_relax_type = 2;       /* offd-l1-scaled GS */
   ams_data -> A_relax_times = 1;      /* one relaxation sweep */
   ams_data -> A_relax_weight = 1.0;   /* damping parameter */
   ams_data -> A_omega = 1.0;          /* SSOR coefficient */
   ams_data -> A_cheby_order = 2;      /* Cheby: order (1 -4 are vaild) */
   ams_data -> A_cheby_fraction = .3;  /* Cheby: fraction of spectrum to smooth */

   ams_data -> B_G_coarsen_type = 10;  /* HMIS coarsening */
   ams_data -> B_G_agg_levels = 1;     /* Levels of aggressive coarsening */
   ams_data -> B_G_relax_type = 3;     /* hybrid G-S/Jacobi */
   ams_data -> B_G_theta = 0.25;       /* strength threshold */
   ams_data -> B_G_interp_type = 0;    /* interpolation type */
   ams_data -> B_G_Pmax = 0;           /* max nonzero elements in interp. rows */
   ams_data -> B_Pi_coarsen_type = 10; /* HMIS coarsening */
   ams_data -> B_Pi_agg_levels = 1;    /* Levels of aggressive coarsening */
   ams_data -> B_Pi_relax_type = 3;    /* hybrid G-S/Jacobi */
   ams_data -> B_Pi_theta = 0.25;      /* strength threshold */
   ams_data -> B_Pi_interp_type = 0;   /* interpolation type */
   ams_data -> B_Pi_Pmax = 0;          /* max nonzero elements in interp. rows */
   ams_data -> beta_is_zero = 0;       /* the problem has a mass term */

   /* The rest of the fields are initialized using the Set functions */

   ams_data -> A    = NULL;
   ams_data -> G    = NULL;
   ams_data -> A_G  = NULL;
   ams_data -> B_G  = 0;
   ams_data -> Pi   = NULL;
   ams_data -> A_Pi = NULL;
   ams_data -> B_Pi = 0;
   ams_data -> x    = NULL;
   ams_data -> y    = NULL;
   ams_data -> z    = NULL;
   ams_data -> Gx   = NULL;
   ams_data -> Gy   = NULL;
   ams_data -> Gz   = NULL;

   ams_data -> r0  = NULL;
   ams_data -> g0  = NULL;
   ams_data -> r1  = NULL;
   ams_data -> g1  = NULL;
   ams_data -> r2  = NULL;
   ams_data -> g2  = NULL;

   ams_data -> Pix    = NULL;
   ams_data -> Piy    = NULL;
   ams_data -> Piz    = NULL;
   ams_data -> A_Pix  = NULL;
   ams_data -> A_Piy  = NULL;
   ams_data -> A_Piz  = NULL;
   ams_data -> B_Pix  = 0;
   ams_data -> B_Piy  = 0;
   ams_data -> B_Piz  = 0;

   ams_data -> interior_nodes       = NULL;
   ams_data -> G0                   = NULL;
   ams_data -> A_G0                 = NULL;
   ams_data -> B_G0                 = 0;
   ams_data -> projection_frequency = 5;

   ams_data -> A_l1_norms = NULL;
   ams_data -> A_max_eig_est = 0;
   ams_data -> A_min_eig_est = 0;

   ams_data -> owns_Pi   = 1;
   ams_data -> owns_A_G  = 0;
   ams_data -> owns_A_Pi = 0;

   return (void *) ams_data;
}

/*--------------------------------------------------------------------------
 * hypre_AMSDestroy
 *
 * Deallocate the AMS solver structure. Note that the input data (given
 * through the Set functions) is not destroyed.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSDestroy(void *solver)
{
   hypre_AMSData *ams_data = solver;

   if (!ams_data)
   {
      hypre_error_in_arg(1);
      return hypre_error_flag;
   }

   if (ams_data -> owns_A_G)
      if (ams_data -> A_G)
         hypre_ParCSRMatrixDestroy(ams_data -> A_G);
   if (!ams_data -> beta_is_zero)
      if (ams_data -> B_G)
         HYPRE_BoomerAMGDestroy(ams_data -> B_G);

   if (ams_data -> owns_Pi && ams_data -> Pi)
      hypre_ParCSRMatrixDestroy(ams_data -> Pi);
   if (ams_data -> owns_A_Pi)
      if (ams_data -> A_Pi)
         hypre_ParCSRMatrixDestroy(ams_data -> A_Pi);
   if (ams_data -> B_Pi)
      HYPRE_BoomerAMGDestroy(ams_data -> B_Pi);

   if (ams_data -> owns_Pi && ams_data -> Pix)
      hypre_ParCSRMatrixDestroy(ams_data -> Pix);
   if (ams_data -> A_Pix)
      hypre_ParCSRMatrixDestroy(ams_data -> A_Pix);
   if (ams_data -> B_Pix)
      HYPRE_BoomerAMGDestroy(ams_data -> B_Pix);
   if (ams_data -> owns_Pi && ams_data -> Piy)
      hypre_ParCSRMatrixDestroy(ams_data -> Piy);
   if (ams_data -> A_Piy)
      hypre_ParCSRMatrixDestroy(ams_data -> A_Piy);
   if (ams_data -> B_Piy)
      HYPRE_BoomerAMGDestroy(ams_data -> B_Piy);
   if (ams_data -> owns_Pi && ams_data -> Piz)
      hypre_ParCSRMatrixDestroy(ams_data -> Piz);
   if (ams_data -> A_Piz)
      hypre_ParCSRMatrixDestroy(ams_data -> A_Piz);
   if (ams_data -> B_Piz)
      HYPRE_BoomerAMGDestroy(ams_data -> B_Piz);

   if (ams_data -> r0)
      hypre_ParVectorDestroy(ams_data -> r0);
   if (ams_data -> g0)
      hypre_ParVectorDestroy(ams_data -> g0);
   if (ams_data -> r1)
      hypre_ParVectorDestroy(ams_data -> r1);
   if (ams_data -> g1)
      hypre_ParVectorDestroy(ams_data -> g1);
   if (ams_data -> r2)
      hypre_ParVectorDestroy(ams_data -> r2);
   if (ams_data -> g2)
      hypre_ParVectorDestroy(ams_data -> g2);

   if (ams_data -> G0)
      hypre_ParCSRMatrixDestroy(ams_data -> A);
   if (ams_data -> G0)
      hypre_ParCSRMatrixDestroy(ams_data -> G0);
   if (ams_data -> A_G0)
      hypre_ParCSRMatrixDestroy(ams_data -> A_G0);
   if (ams_data -> B_G0)
      HYPRE_BoomerAMGDestroy(ams_data -> B_G0);

   if (ams_data -> A_l1_norms)
      hypre_TFree(ams_data -> A_l1_norms);

   /* G, x, y ,z, Gx, Gy and Gz are not destroyed */

   if (ams_data)
      hypre_TFree(ams_data);

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetDimension
 *
 * Set problem dimension (2 or 3). By default we assume dim = 3.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetDimension(void *solver,
                                HYPRE_Int dim)
{
   hypre_AMSData *ams_data = solver;

   if (dim != 2 && dim != 3)
      hypre_error_in_arg(2);

   ams_data -> dim = dim;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetDiscreteGradient
 *
 * Set the discrete gradient matrix G.
 * This function should be called before hypre_AMSSetup()!
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetDiscreteGradient(void *solver,
                                       hypre_ParCSRMatrix *G)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> G = G;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetCoordinateVectors
 *
 * Set the x, y and z coordinates of the vertices in the mesh.
 *
 * Either SetCoordinateVectors or SetEdgeConstantVectors should be
 * called before hypre_AMSSetup()!
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetCoordinateVectors(void *solver,
                                        hypre_ParVector *x,
                                        hypre_ParVector *y,
                                        hypre_ParVector *z)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> x = x;
   ams_data -> y = y;
   ams_data -> z = z;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetEdgeConstantVectors
 *
 * Set the vectors Gx, Gy and Gz which give the representations of
 * the constant vector fields (1,0,0), (0,1,0) and (0,0,1) in the
 * edge element basis.
 *
 * Either SetCoordinateVectors or SetEdgeConstantVectors should be
 * called before hypre_AMSSetup()!
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetEdgeConstantVectors(void *solver,
                                          hypre_ParVector *Gx,
                                          hypre_ParVector *Gy,
                                          hypre_ParVector *Gz)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> Gx = Gx;
   ams_data -> Gy = Gy;
   ams_data -> Gz = Gz;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetInterpolations
 *
 * Set the (components of) the Nedelec interpolation matrix Pi=[Pix,Piy,Piz].
 *
 * This function is generally intended to be used only for high-order Nedelec
 * discretizations (in the lowest order case, Pi is constructed internally in
 * AMS from the discreet gradient matrix and the coordinates of the vertices),
 * though it can also be used in the lowest-order case or for other types of
 * discretizations (e.g. ones based on the second family of Nedelec elements).
 *
 * By definition, Pi is the matrix representation of the linear operator that
 * interpolates (high-order) vector nodal finite elements into the (high-order)
 * Nedelec space. The component matrices are defined as Pix phi = Pi (phi,0,0)
 * and similarly for Piy and Piz. Note that all these operators depend on the
 * choice of the basis and degrees of freedom in the high-order spaces.
 *
 * The column numbering of Pi should be node-based, i.e. the x/y/z components of
 * the first node (vertex or high-order dof) should be listed first, followed by
 * the x/y/z components of the second node and so on (see the documentation of
 * HYPRE_BoomerAMGSetDofFunc).
 *
 * If used, this function should be called before hypre_AMSSetup() and there is
 * no need to provide the vertex coordinates. Furthermore, only one of the sets
 * {Pi} and {Pix,Piy,Piz} needs to be specified (though it is OK to provide
 * both).  If Pix is NULL, then scalar Pi-based AMS cycles, i.e. those with
 * cycle_type > 10, will be unavailable.  Similarly, AMS cycles based on
 * monolithic Pi (cycle_type < 10) require that Pi is not NULL.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetInterpolations(void *solver,
                                     hypre_ParCSRMatrix *Pi,
                                     hypre_ParCSRMatrix *Pix,
                                     hypre_ParCSRMatrix *Piy,
                                     hypre_ParCSRMatrix *Piz)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> Pi = Pi;
   ams_data -> Pix = Pix;
   ams_data -> Piy = Piy;
   ams_data -> Piz = Piz;
   ams_data -> owns_Pi = 0;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetAlphaPoissonMatrix
 *
 * Set the matrix corresponding to the Poisson problem with coefficient
 * alpha (the curl-curl term coefficient in the Maxwell problem).
 *
 * If this function is called, the coarse space solver on the range
 * of Pi^T is a block-diagonal version of A_Pi. If this function is not
 * called, the coarse space solver on the range of Pi^T is constructed
 * as Pi^T A Pi in hypre_AMSSetup().
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetAlphaPoissonMatrix(void *solver,
                                         hypre_ParCSRMatrix *A_Pi)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> A_Pi = A_Pi;

   /* Penalize the eliminated degrees of freedom */
   hypre_ParCSRMatrixSetDiagRows(A_Pi, DBL_MAX);

   /* Make sure that the first entry in each row is the diagonal one. */
   /* hypre_CSRMatrixReorder(hypre_ParCSRMatrixDiag(A_Pi)); */

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetBetaPoissonMatrix
 *
 * Set the matrix corresponding to the Poisson problem with coefficient
 * beta (the mass term coefficient in the Maxwell problem).
 *
 * This function call is optional - if not given, the Poisson matrix will
 * be computed in hypre_AMSSetup(). If the given matrix is NULL, we assume
 * that beta is 0 and use two-level (instead of three-level) methods.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetBetaPoissonMatrix(void *solver,
                                        hypre_ParCSRMatrix *A_G)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> A_G = A_G;
   if (!A_G)
      ams_data -> beta_is_zero = 1;
   else
   {
      /* Penalize the eliminated degrees of freedom */
      hypre_ParCSRMatrixSetDiagRows(A_G, DBL_MAX);

      /* Make sure that the first entry in each row is the diagonal one. */
      /* hypre_CSRMatrixReorder(hypre_ParCSRMatrixDiag(A_G)); */
   }
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetInteriorNodes
 *
 * Set the list of nodes which are interior to the zero-conductivity region.
 *
 * Should be called before hypre_AMSSetup()!
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetInteriorNodes(void *solver,
                                    hypre_ParVector *interior_nodes)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> interior_nodes = interior_nodes;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetProjectionFrequency
 *
 * How often to project the r.h.s. onto the compatible sub-space Ker(G0^T),
 * when iterating with the solver.
 *
 * The default value is every 5th iteration.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetProjectionFrequency(void *solver,
                                          HYPRE_Int projection_frequency)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> projection_frequency = projection_frequency;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetMaxIter
 *
 * Set the maximum number of iterations in the three-level method.
 * The default value is 20. To use the AMS solver as a preconditioner,
 * set maxit to 1, tol to 0.0 and print_level to 0.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetMaxIter(void *solver,
                              HYPRE_Int maxit)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> maxit = maxit;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetTol
 *
 * Set the convergence tolerance (if the method is used as a solver).
 * The default value is 1e-6.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetTol(void *solver,
                          double tol)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> tol = tol;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetCycleType
 *
 * Choose which three-level solver to use. Possible values are:
 *
 *   1 = 3-level multipl. solver (01210)      <-- small solution time
 *   2 = 3-level additive solver (0+1+2)
 *   3 = 3-level multipl. solver (02120)
 *   4 = 3-level additive solver (010+2)
 *   5 = 3-level multipl. solver (0102010)    <-- small solution time
 *   6 = 3-level additive solver (1+020)
 *   7 = 3-level multipl. solver (0201020)    <-- small number of iterations
 *   8 = 3-level additive solver (0(1+2)0)    <-- small solution time
 *   9 = 3-level multipl. solver (01210) with discrete divergence
 *  11 = 5-level multipl. solver (013454310)  <-- small solution time, memory
 *  12 = 5-level additive solver (0+1+3+4+5)
 *  13 = 5-level multipl. solver (034515430)  <-- small solution time, memory
 *  14 = 5-level additive solver (01(3+4+5)10)
 *  20 = 2-level multipl. solver (0[12]0)
 *
 *   0 = a Hiptmair-like smoother (010)
 *
 * The default value is 1.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetCycleType(void *solver,
                                HYPRE_Int cycle_type)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> cycle_type = cycle_type;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetPrintLevel
 *
 * Control how much information is printed during the solution iterations.
 * The defaut values is 1 (print residual norm at each step).
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetPrintLevel(void *solver,
                                 HYPRE_Int print_level)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> print_level = print_level;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetSmoothingOptions
 *
 * Set relaxation parameters for A. Default values: 2, 1, 1.0, 1.0.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetSmoothingOptions(void *solver,
                                       HYPRE_Int A_relax_type,
                                       HYPRE_Int A_relax_times,
                                       double A_relax_weight,
                                       double A_omega)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> A_relax_type = A_relax_type;
   ams_data -> A_relax_times = A_relax_times;
   ams_data -> A_relax_weight = A_relax_weight;
   ams_data -> A_omega = A_omega;
   return hypre_error_flag;
}
/*--------------------------------------------------------------------------
 * hypre_AMSSetChebySmoothingOptions
 *  AB: note: this could be added to the above,
 *      but I didn't want to change parameter list)
 * Set parameters for chebyshev smoother for A. Default values: 2,.3.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetChebySmoothingOptions(void *solver,
                                            HYPRE_Int A_cheby_order,
                                            HYPRE_Int A_cheby_fraction)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> A_cheby_order =  A_cheby_order;
   ams_data -> A_cheby_fraction =  A_cheby_fraction;

   return hypre_error_flag;
}
/*--------------------------------------------------------------------------
 * hypre_AMSSetAlphaAMGOptions
 *
 * Set AMG parameters for B_Pi. Default values: 10, 1, 3, 0.25, 0, 0.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetAlphaAMGOptions(void *solver,
                                      HYPRE_Int B_Pi_coarsen_type,
                                      HYPRE_Int B_Pi_agg_levels,
                                      HYPRE_Int B_Pi_relax_type,
                                      double B_Pi_theta,
                                      HYPRE_Int B_Pi_interp_type,
                                      HYPRE_Int B_Pi_Pmax)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> B_Pi_coarsen_type = B_Pi_coarsen_type;
   ams_data -> B_Pi_agg_levels = B_Pi_agg_levels;
   ams_data -> B_Pi_relax_type = B_Pi_relax_type;
   ams_data -> B_Pi_theta = B_Pi_theta;
   ams_data -> B_Pi_interp_type = B_Pi_interp_type;
   ams_data -> B_Pi_Pmax = B_Pi_Pmax;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetBetaAMGOptions
 *
 * Set AMG parameters for B_G. Default values: 10, 1, 3, 0.25, 0, 0.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetBetaAMGOptions(void *solver,
                                     HYPRE_Int B_G_coarsen_type,
                                     HYPRE_Int B_G_agg_levels,
                                     HYPRE_Int B_G_relax_type,
                                     double B_G_theta,
                                     HYPRE_Int B_G_interp_type,
                                     HYPRE_Int B_G_Pmax)
{
   hypre_AMSData *ams_data = solver;
   ams_data -> B_G_coarsen_type = B_G_coarsen_type;
   ams_data -> B_G_agg_levels = B_G_agg_levels;
   ams_data -> B_G_relax_type = B_G_relax_type;
   ams_data -> B_G_theta = B_G_theta;
   ams_data -> B_G_interp_type = B_G_interp_type;
   ams_data -> B_G_Pmax = B_G_Pmax;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSComputePi
 *
 * Construct the Pi interpolation matrix, which maps the space of vector
 * linear finite elements to the space of edge finite elements.
 *
 * The construction is based on the fact that Pi = [Pi_x, Pi_y, Pi_z],
 * where each block has the same sparsity structure as G, and the entries
 * can be computed from the vectors Gx, Gy, Gz.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSComputePi(hypre_ParCSRMatrix *A,
                             hypre_ParCSRMatrix *G,
                             hypre_ParVector *Gx,
                             hypre_ParVector *Gy,
                             hypre_ParVector *Gz,
                             HYPRE_Int dim,
                             hypre_ParCSRMatrix **Pi_ptr)
{
   hypre_ParCSRMatrix *Pi;

   /* Compute Pi = [Pi_x, Pi_y, Pi_z] */
   {
      HYPRE_Int i, j, d;

      double *Gx_data, *Gy_data, *Gz_data;

      MPI_Comm comm = hypre_ParCSRMatrixComm(G);
      HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(G);
      HYPRE_Int global_num_cols = dim*hypre_ParCSRMatrixGlobalNumCols(G);
      HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(G);
      HYPRE_Int col_starts_size, *col_starts;
      HYPRE_Int num_cols_offd = dim*hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(G));
      HYPRE_Int num_nonzeros_diag = dim*hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(G));
      HYPRE_Int num_nonzeros_offd = dim*hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(G));
      HYPRE_Int *col_starts_G = hypre_ParCSRMatrixColStarts(G);
#ifdef HYPRE_NO_GLOBAL_PARTITION
      col_starts_size = 2;
#else
      HYPRE_Int num_procs;
      hypre_MPI_Comm_size(comm, &num_procs);
      col_starts_size = num_procs+1;
#endif
      col_starts = hypre_TAlloc(HYPRE_Int,col_starts_size);
      for (i = 0; i < col_starts_size; i++)
         col_starts[i] = dim * col_starts_G[i];

      Pi = hypre_ParCSRMatrixCreate(comm,
                                    global_num_rows,
                                    global_num_cols,
                                    row_starts,
                                    col_starts,
                                    num_cols_offd,
                                    num_nonzeros_diag,
                                    num_nonzeros_offd);

      hypre_ParCSRMatrixOwnsData(Pi) = 1;
      hypre_ParCSRMatrixOwnsRowStarts(Pi) = 0;
      hypre_ParCSRMatrixOwnsColStarts(Pi) = 1;

      hypre_ParCSRMatrixInitialize(Pi);

      Gx_data = hypre_VectorData(hypre_ParVectorLocalVector(Gx));
      Gy_data = hypre_VectorData(hypre_ParVectorLocalVector(Gy));
      if (dim == 3)
         Gz_data = hypre_VectorData(hypre_ParVectorLocalVector(Gz));

      /* Fill-in the diagonal part */
      {
         hypre_CSRMatrix *G_diag = hypre_ParCSRMatrixDiag(G);
         HYPRE_Int *G_diag_I = hypre_CSRMatrixI(G_diag);
         HYPRE_Int *G_diag_J = hypre_CSRMatrixJ(G_diag);

         HYPRE_Int G_diag_nrows = hypre_CSRMatrixNumRows(G_diag);
         HYPRE_Int G_diag_nnz = hypre_CSRMatrixNumNonzeros(G_diag);

         hypre_CSRMatrix *Pi_diag = hypre_ParCSRMatrixDiag(Pi);
         HYPRE_Int *Pi_diag_I = hypre_CSRMatrixI(Pi_diag);
         HYPRE_Int *Pi_diag_J = hypre_CSRMatrixJ(Pi_diag);
         double *Pi_diag_data = hypre_CSRMatrixData(Pi_diag);

         for (i = 0; i < G_diag_nrows+1; i++)
            Pi_diag_I[i] = dim * G_diag_I[i];

         for (i = 0; i < G_diag_nnz; i++)
            for (d = 0; d < dim; d++)
               Pi_diag_J[dim*i+d] = dim*G_diag_J[i]+d;

         for (i = 0; i < G_diag_nrows; i++)
            for (j = G_diag_I[i]; j < G_diag_I[i+1]; j++)
            {
               *Pi_diag_data++ = 0.5 * Gx_data[i];
               *Pi_diag_data++ = 0.5 * Gy_data[i];
               if (dim == 3)
                  *Pi_diag_data++ = 0.5 * Gz_data[i];
            }
      }

      /* Fill-in the off-diagonal part */
      {
         hypre_CSRMatrix *G_offd = hypre_ParCSRMatrixOffd(G);
         HYPRE_Int *G_offd_I = hypre_CSRMatrixI(G_offd);
         HYPRE_Int *G_offd_J = hypre_CSRMatrixJ(G_offd);

         HYPRE_Int G_offd_nrows = hypre_CSRMatrixNumRows(G_offd);
         HYPRE_Int G_offd_ncols = hypre_CSRMatrixNumCols(G_offd);
         HYPRE_Int G_offd_nnz = hypre_CSRMatrixNumNonzeros(G_offd);

         hypre_CSRMatrix *Pi_offd = hypre_ParCSRMatrixOffd(Pi);
         HYPRE_Int *Pi_offd_I = hypre_CSRMatrixI(Pi_offd);
         HYPRE_Int *Pi_offd_J = hypre_CSRMatrixJ(Pi_offd);
         double *Pi_offd_data = hypre_CSRMatrixData(Pi_offd);

         HYPRE_Int *G_cmap = hypre_ParCSRMatrixColMapOffd(G);
         HYPRE_Int *Pi_cmap = hypre_ParCSRMatrixColMapOffd(Pi);

         if (G_offd_ncols)
            for (i = 0; i < G_offd_nrows+1; i++)
               Pi_offd_I[i] = dim * G_offd_I[i];

         for (i = 0; i < G_offd_nnz; i++)
            for (d = 0; d < dim; d++)
               Pi_offd_J[dim*i+d] = dim*G_offd_J[i]+d;

         for (i = 0; i < G_offd_nrows; i++)
            for (j = G_offd_I[i]; j < G_offd_I[i+1]; j++)
            {
               *Pi_offd_data++ = 0.5 * Gx_data[i];
               *Pi_offd_data++ = 0.5 * Gy_data[i];
               if (dim == 3)
                  *Pi_offd_data++ = 0.5 * Gz_data[i];
            }

         for (i = 0; i < G_offd_ncols; i++)
            for (d = 0; d < dim; d++)
               Pi_cmap[dim*i+d] = dim*G_cmap[i]+d;
      }

   }

   *Pi_ptr = Pi;

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSComputePixyz
 *
 * Construct the components Pix, Piy, Piz of the interpolation matrix Pi,
 * which maps the space of vector linear finite elements to the space of
 * edge finite elements.
 *
 * The construction is based on the fact that each component has the same
 * sparsity structure as G, and the entries can be computed from the vectors
 * Gx, Gy, Gz.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSComputePixyz(hypre_ParCSRMatrix *A,
                                hypre_ParCSRMatrix *G,
                                hypre_ParVector *Gx,
                                hypre_ParVector *Gy,
                                hypre_ParVector *Gz,
                                HYPRE_Int dim,
                                hypre_ParCSRMatrix **Pix_ptr,
                                hypre_ParCSRMatrix **Piy_ptr,
                                hypre_ParCSRMatrix **Piz_ptr)
{
   hypre_ParCSRMatrix *Pix, *Piy, *Piz;

   /* Compute Pix, Piy, Piz  */
   {
      HYPRE_Int i, j;

      double *Gx_data, *Gy_data, *Gz_data;

      MPI_Comm comm = hypre_ParCSRMatrixComm(G);
      HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(G);
      HYPRE_Int global_num_cols = hypre_ParCSRMatrixGlobalNumCols(G);
      HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(G);
      HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(G);
      HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(G));
      HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(G));
      HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(G));

      Pix = hypre_ParCSRMatrixCreate(comm,
                                     global_num_rows,
                                     global_num_cols,
                                     row_starts,
                                     col_starts,
                                     num_cols_offd,
                                     num_nonzeros_diag,
                                     num_nonzeros_offd);
      hypre_ParCSRMatrixOwnsData(Pix) = 1;
      hypre_ParCSRMatrixOwnsRowStarts(Pix) = 0;
      hypre_ParCSRMatrixOwnsColStarts(Pix) = 0;
      hypre_ParCSRMatrixInitialize(Pix);

      Piy = hypre_ParCSRMatrixCreate(comm,
                                     global_num_rows,
                                     global_num_cols,
                                     row_starts,
                                     col_starts,
                                     num_cols_offd,
                                     num_nonzeros_diag,
                                     num_nonzeros_offd);
      hypre_ParCSRMatrixOwnsData(Piy) = 1;
      hypre_ParCSRMatrixOwnsRowStarts(Piy) = 0;
      hypre_ParCSRMatrixOwnsColStarts(Piy) = 0;
      hypre_ParCSRMatrixInitialize(Piy);

      if (dim == 3)
      {
         Piz = hypre_ParCSRMatrixCreate(comm,
                                        global_num_rows,
                                        global_num_cols,
                                        row_starts,
                                        col_starts,
                                        num_cols_offd,
                                        num_nonzeros_diag,
                                        num_nonzeros_offd);
         hypre_ParCSRMatrixOwnsData(Piz) = 1;
         hypre_ParCSRMatrixOwnsRowStarts(Piz) = 0;
         hypre_ParCSRMatrixOwnsColStarts(Piz) = 0;
         hypre_ParCSRMatrixInitialize(Piz);
      }

      Gx_data = hypre_VectorData(hypre_ParVectorLocalVector(Gx));
      Gy_data = hypre_VectorData(hypre_ParVectorLocalVector(Gy));
      if (dim == 3)
         Gz_data = hypre_VectorData(hypre_ParVectorLocalVector(Gz));

      /* Fill-in the diagonal part */
      if (dim == 3)
      {
         hypre_CSRMatrix *G_diag = hypre_ParCSRMatrixDiag(G);
         HYPRE_Int *G_diag_I = hypre_CSRMatrixI(G_diag);
         HYPRE_Int *G_diag_J = hypre_CSRMatrixJ(G_diag);

         HYPRE_Int G_diag_nrows = hypre_CSRMatrixNumRows(G_diag);
         HYPRE_Int G_diag_nnz = hypre_CSRMatrixNumNonzeros(G_diag);

         hypre_CSRMatrix *Pix_diag = hypre_ParCSRMatrixDiag(Pix);
         HYPRE_Int *Pix_diag_I = hypre_CSRMatrixI(Pix_diag);
         HYPRE_Int *Pix_diag_J = hypre_CSRMatrixJ(Pix_diag);
         double *Pix_diag_data = hypre_CSRMatrixData(Pix_diag);

         hypre_CSRMatrix *Piy_diag = hypre_ParCSRMatrixDiag(Piy);
         HYPRE_Int *Piy_diag_I = hypre_CSRMatrixI(Piy_diag);
         HYPRE_Int *Piy_diag_J = hypre_CSRMatrixJ(Piy_diag);
         double *Piy_diag_data = hypre_CSRMatrixData(Piy_diag);

         hypre_CSRMatrix *Piz_diag = hypre_ParCSRMatrixDiag(Piz);
         HYPRE_Int *Piz_diag_I = hypre_CSRMatrixI(Piz_diag);
         HYPRE_Int *Piz_diag_J = hypre_CSRMatrixJ(Piz_diag);
         double *Piz_diag_data = hypre_CSRMatrixData(Piz_diag);

         for (i = 0; i < G_diag_nrows+1; i++)
         {
            Pix_diag_I[i] = G_diag_I[i];
            Piy_diag_I[i] = G_diag_I[i];
            Piz_diag_I[i] = G_diag_I[i];
         }

         for (i = 0; i < G_diag_nnz; i++)
         {
            Pix_diag_J[i] = G_diag_J[i];
            Piy_diag_J[i] = G_diag_J[i];
            Piz_diag_J[i] = G_diag_J[i];
         }

         for (i = 0; i < G_diag_nrows; i++)
            for (j = G_diag_I[i]; j < G_diag_I[i+1]; j++)
            {
               *Pix_diag_data++ = 0.5 * Gx_data[i];
               *Piy_diag_data++ = 0.5 * Gy_data[i];
               *Piz_diag_data++ = 0.5 * Gz_data[i];
            }
      }
      else
      {
         hypre_CSRMatrix *G_diag = hypre_ParCSRMatrixDiag(G);
         HYPRE_Int *G_diag_I = hypre_CSRMatrixI(G_diag);
         HYPRE_Int *G_diag_J = hypre_CSRMatrixJ(G_diag);

         HYPRE_Int G_diag_nrows = hypre_CSRMatrixNumRows(G_diag);
         HYPRE_Int G_diag_nnz = hypre_CSRMatrixNumNonzeros(G_diag);

         hypre_CSRMatrix *Pix_diag = hypre_ParCSRMatrixDiag(Pix);
         HYPRE_Int *Pix_diag_I = hypre_CSRMatrixI(Pix_diag);
         HYPRE_Int *Pix_diag_J = hypre_CSRMatrixJ(Pix_diag);
         double *Pix_diag_data = hypre_CSRMatrixData(Pix_diag);

         hypre_CSRMatrix *Piy_diag = hypre_ParCSRMatrixDiag(Piy);
         HYPRE_Int *Piy_diag_I = hypre_CSRMatrixI(Piy_diag);
         HYPRE_Int *Piy_diag_J = hypre_CSRMatrixJ(Piy_diag);
         double *Piy_diag_data = hypre_CSRMatrixData(Piy_diag);

         for (i = 0; i < G_diag_nrows+1; i++)
         {
            Pix_diag_I[i] = G_diag_I[i];
            Piy_diag_I[i] = G_diag_I[i];
         }

         for (i = 0; i < G_diag_nnz; i++)
         {
            Pix_diag_J[i] = G_diag_J[i];
            Piy_diag_J[i] = G_diag_J[i];
         }

         for (i = 0; i < G_diag_nrows; i++)
            for (j = G_diag_I[i]; j < G_diag_I[i+1]; j++)
            {
               *Pix_diag_data++ = 0.5 * Gx_data[i];
               *Piy_diag_data++ = 0.5 * Gy_data[i];
            }
      }


      /* Fill-in the off-diagonal part */
      if (dim == 3)
      {
         hypre_CSRMatrix *G_offd = hypre_ParCSRMatrixOffd(G);
         HYPRE_Int *G_offd_I = hypre_CSRMatrixI(G_offd);
         HYPRE_Int *G_offd_J = hypre_CSRMatrixJ(G_offd);

         HYPRE_Int G_offd_nrows = hypre_CSRMatrixNumRows(G_offd);
         HYPRE_Int G_offd_ncols = hypre_CSRMatrixNumCols(G_offd);
         HYPRE_Int G_offd_nnz = hypre_CSRMatrixNumNonzeros(G_offd);

         hypre_CSRMatrix *Pix_offd = hypre_ParCSRMatrixOffd(Pix);
         HYPRE_Int *Pix_offd_I = hypre_CSRMatrixI(Pix_offd);
         HYPRE_Int *Pix_offd_J = hypre_CSRMatrixJ(Pix_offd);
         double *Pix_offd_data = hypre_CSRMatrixData(Pix_offd);

         hypre_CSRMatrix *Piy_offd = hypre_ParCSRMatrixOffd(Piy);
         HYPRE_Int *Piy_offd_I = hypre_CSRMatrixI(Piy_offd);
         HYPRE_Int *Piy_offd_J = hypre_CSRMatrixJ(Piy_offd);
         double *Piy_offd_data = hypre_CSRMatrixData(Piy_offd);

         hypre_CSRMatrix *Piz_offd = hypre_ParCSRMatrixOffd(Piz);
         HYPRE_Int *Piz_offd_I = hypre_CSRMatrixI(Piz_offd);
         HYPRE_Int *Piz_offd_J = hypre_CSRMatrixJ(Piz_offd);
         double *Piz_offd_data = hypre_CSRMatrixData(Piz_offd);

         HYPRE_Int *G_cmap = hypre_ParCSRMatrixColMapOffd(G);
         HYPRE_Int *Pix_cmap = hypre_ParCSRMatrixColMapOffd(Pix);
         HYPRE_Int *Piy_cmap = hypre_ParCSRMatrixColMapOffd(Piy);
         HYPRE_Int *Piz_cmap = hypre_ParCSRMatrixColMapOffd(Piz);

         if (G_offd_ncols)
            for (i = 0; i < G_offd_nrows+1; i++)
            {
               Pix_offd_I[i] = G_offd_I[i];
               Piy_offd_I[i] = G_offd_I[i];
               Piz_offd_I[i] = G_offd_I[i];
            }

         for (i = 0; i < G_offd_nnz; i++)
         {
            Pix_offd_J[i] = G_offd_J[i];
            Piy_offd_J[i] = G_offd_J[i];
            Piz_offd_J[i] = G_offd_J[i];
         }

         for (i = 0; i < G_offd_nrows; i++)
            for (j = G_offd_I[i]; j < G_offd_I[i+1]; j++)
            {
               *Pix_offd_data++ = 0.5 * Gx_data[i];
               *Piy_offd_data++ = 0.5 * Gy_data[i];
               *Piz_offd_data++ = 0.5 * Gz_data[i];
            }

         for (i = 0; i < G_offd_ncols; i++)
         {
            Pix_cmap[i] = G_cmap[i];
            Piy_cmap[i] = G_cmap[i];
            Piz_cmap[i] = G_cmap[i];
         }
      }
      else
      {
         hypre_CSRMatrix *G_offd = hypre_ParCSRMatrixOffd(G);
         HYPRE_Int *G_offd_I = hypre_CSRMatrixI(G_offd);
         HYPRE_Int *G_offd_J = hypre_CSRMatrixJ(G_offd);

         HYPRE_Int G_offd_nrows = hypre_CSRMatrixNumRows(G_offd);
         HYPRE_Int G_offd_ncols = hypre_CSRMatrixNumCols(G_offd);
         HYPRE_Int G_offd_nnz = hypre_CSRMatrixNumNonzeros(G_offd);

         hypre_CSRMatrix *Pix_offd = hypre_ParCSRMatrixOffd(Pix);
         HYPRE_Int *Pix_offd_I = hypre_CSRMatrixI(Pix_offd);
         HYPRE_Int *Pix_offd_J = hypre_CSRMatrixJ(Pix_offd);
         double *Pix_offd_data = hypre_CSRMatrixData(Pix_offd);

         hypre_CSRMatrix *Piy_offd = hypre_ParCSRMatrixOffd(Piy);
         HYPRE_Int *Piy_offd_I = hypre_CSRMatrixI(Piy_offd);
         HYPRE_Int *Piy_offd_J = hypre_CSRMatrixJ(Piy_offd);
         double *Piy_offd_data = hypre_CSRMatrixData(Piy_offd);

         HYPRE_Int *G_cmap = hypre_ParCSRMatrixColMapOffd(G);
         HYPRE_Int *Pix_cmap = hypre_ParCSRMatrixColMapOffd(Pix);
         HYPRE_Int *Piy_cmap = hypre_ParCSRMatrixColMapOffd(Piy);

         if (G_offd_ncols)
            for (i = 0; i < G_offd_nrows+1; i++)
            {
               Pix_offd_I[i] = G_offd_I[i];
               Piy_offd_I[i] = G_offd_I[i];
            }

         for (i = 0; i < G_offd_nnz; i++)
         {
            Pix_offd_J[i] = G_offd_J[i];
            Piy_offd_J[i] = G_offd_J[i];
         }

         for (i = 0; i < G_offd_nrows; i++)
            for (j = G_offd_I[i]; j < G_offd_I[i+1]; j++)
            {
               *Pix_offd_data++ = 0.5 * Gx_data[i];
               *Piy_offd_data++ = 0.5 * Gy_data[i];
            }

         for (i = 0; i < G_offd_ncols; i++)
         {
            Pix_cmap[i] = G_cmap[i];
            Piy_cmap[i] = G_cmap[i];
         }
      }
   }

   *Pix_ptr = Pix;
   *Piy_ptr = Piy;
   if (dim == 3)
      *Piz_ptr = Piz;

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSComputeGPi
 *
 * Construct the matrix [G,Pi] which can be considered an interpolation
 * matrix from S_h^4 (4 copies of the scalar linear finite element space)
 * to the edge finite elements space.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSComputeGPi(hypre_ParCSRMatrix *A,
                              hypre_ParCSRMatrix *G,
                              hypre_ParVector *Gx,
                              hypre_ParVector *Gy,
                              hypre_ParVector *Gz,
                              HYPRE_Int dim,
                              hypre_ParCSRMatrix **GPi_ptr)
{
   hypre_ParCSRMatrix *GPi;

   /* Take into account G */
   dim++;

   /* Compute GPi = [Pi_x, Pi_y, Pi_z, G] */
   {
      HYPRE_Int i, j, d;

      double *Gx_data, *Gy_data, *Gz_data;

      MPI_Comm comm = hypre_ParCSRMatrixComm(G);
      HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(G);
      HYPRE_Int global_num_cols = dim*hypre_ParCSRMatrixGlobalNumCols(G);
      HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(G);
      HYPRE_Int col_starts_size, *col_starts;
      HYPRE_Int num_cols_offd = dim*hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(G));
      HYPRE_Int num_nonzeros_diag = dim*hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(G));
      HYPRE_Int num_nonzeros_offd = dim*hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(G));
      HYPRE_Int *col_starts_G = hypre_ParCSRMatrixColStarts(G);
#ifdef HYPRE_NO_GLOBAL_PARTITION
      col_starts_size = 2;
#else
      HYPRE_Int num_procs;
      hypre_MPI_Comm_size(comm, &num_procs);
      col_starts_size = num_procs+1;
#endif
      col_starts = hypre_TAlloc(HYPRE_Int,col_starts_size);
      for (i = 0; i < col_starts_size; i++)
         col_starts[i] = dim * col_starts_G[i];

      GPi = hypre_ParCSRMatrixCreate(comm,
                                     global_num_rows,
                                     global_num_cols,
                                     row_starts,
                                     col_starts,
                                     num_cols_offd,
                                     num_nonzeros_diag,
                                     num_nonzeros_offd);

      hypre_ParCSRMatrixOwnsData(GPi) = 1;
      hypre_ParCSRMatrixOwnsRowStarts(GPi) = 0;
      hypre_ParCSRMatrixOwnsColStarts(GPi) = 1;

      hypre_ParCSRMatrixInitialize(GPi);

      Gx_data = hypre_VectorData(hypre_ParVectorLocalVector(Gx));
      Gy_data = hypre_VectorData(hypre_ParVectorLocalVector(Gy));
      if (dim == 4)
         Gz_data = hypre_VectorData(hypre_ParVectorLocalVector(Gz));

      /* Fill-in the diagonal part */
      {
         hypre_CSRMatrix *G_diag = hypre_ParCSRMatrixDiag(G);
         HYPRE_Int *G_diag_I = hypre_CSRMatrixI(G_diag);
         HYPRE_Int *G_diag_J = hypre_CSRMatrixJ(G_diag);
         double *G_diag_data = hypre_CSRMatrixData(G_diag);

         HYPRE_Int G_diag_nrows = hypre_CSRMatrixNumRows(G_diag);
         HYPRE_Int G_diag_nnz = hypre_CSRMatrixNumNonzeros(G_diag);

         hypre_CSRMatrix *GPi_diag = hypre_ParCSRMatrixDiag(GPi);
         HYPRE_Int *GPi_diag_I = hypre_CSRMatrixI(GPi_diag);
         HYPRE_Int *GPi_diag_J = hypre_CSRMatrixJ(GPi_diag);
         double *GPi_diag_data = hypre_CSRMatrixData(GPi_diag);

         for (i = 0; i < G_diag_nrows+1; i++)
            GPi_diag_I[i] = dim * G_diag_I[i];

         for (i = 0; i < G_diag_nnz; i++)
            for (d = 0; d < dim; d++)
               GPi_diag_J[dim*i+d] = dim*G_diag_J[i]+d;

         for (i = 0; i < G_diag_nrows; i++)
            for (j = G_diag_I[i]; j < G_diag_I[i+1]; j++)
            {
               *GPi_diag_data++ = G_diag_data[j];
               *GPi_diag_data++ = 0.5 * Gx_data[i];
               *GPi_diag_data++ = 0.5 * Gy_data[i];
               if (dim == 4)
                  *GPi_diag_data++ = 0.5 * Gz_data[i];
            }
      }

      /* Fill-in the off-diagonal part */
      {
         hypre_CSRMatrix *G_offd = hypre_ParCSRMatrixOffd(G);
         HYPRE_Int *G_offd_I = hypre_CSRMatrixI(G_offd);
         HYPRE_Int *G_offd_J = hypre_CSRMatrixJ(G_offd);
         double *G_offd_data = hypre_CSRMatrixData(G_offd);

         HYPRE_Int G_offd_nrows = hypre_CSRMatrixNumRows(G_offd);
         HYPRE_Int G_offd_ncols = hypre_CSRMatrixNumCols(G_offd);
         HYPRE_Int G_offd_nnz = hypre_CSRMatrixNumNonzeros(G_offd);

         hypre_CSRMatrix *GPi_offd = hypre_ParCSRMatrixOffd(GPi);
         HYPRE_Int *GPi_offd_I = hypre_CSRMatrixI(GPi_offd);
         HYPRE_Int *GPi_offd_J = hypre_CSRMatrixJ(GPi_offd);
         double *GPi_offd_data = hypre_CSRMatrixData(GPi_offd);

         HYPRE_Int *G_cmap = hypre_ParCSRMatrixColMapOffd(G);
         HYPRE_Int *GPi_cmap = hypre_ParCSRMatrixColMapOffd(GPi);

         if (G_offd_ncols)
            for (i = 0; i < G_offd_nrows+1; i++)
               GPi_offd_I[i] = dim * G_offd_I[i];

         for (i = 0; i < G_offd_nnz; i++)
            for (d = 0; d < dim; d++)
               GPi_offd_J[dim*i+d] = dim*G_offd_J[i]+d;

         for (i = 0; i < G_offd_nrows; i++)
            for (j = G_offd_I[i]; j < G_offd_I[i+1]; j++)
            {
               *GPi_offd_data++ = G_offd_data[j];
               *GPi_offd_data++ = 0.5 * Gx_data[i];
               *GPi_offd_data++ = 0.5 * Gy_data[i];
               if (dim == 4)
                  *GPi_offd_data++ = 0.5 * Gz_data[i];
            }

         for (i = 0; i < G_offd_ncols; i++)
            for (d = 0; d < dim; d++)
               GPi_cmap[dim*i+d] = dim*G_cmap[i]+d;
      }

   }

   *GPi_ptr = GPi;

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSetup
 *
 * Construct the AMS solver components.
 *
 * The following functions need to be called before hypre_AMSSetup():
 * - hypre_AMSSetDimension() (if solving a 2D problem)
 * - hypre_AMSSetDiscreteGradient()
 * - hypre_AMSSetCoordinateVectors() or hypre_AMSSetEdgeConstantVectors
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSetup(void *solver,
                         hypre_ParCSRMatrix *A,
                         hypre_ParVector *b,
                         hypre_ParVector *x)
{
   hypre_AMSData *ams_data = solver;

   HYPRE_Int input_info = 0;

   ams_data -> A = A;

   /* Modifications for problems with zero-conductivity regions */
   if (ams_data -> interior_nodes)
   {
      hypre_ParCSRMatrix *G0t, *Aorig = A;

      /* Make sure that multiple Setup()+Solve() give identical results */
      ams_data -> solve_counter = 0;

      /* Construct the discrete gradient matrix for the zero-conductivity region
         by eliminating the zero-conductivity nodes from G^t */
      hypre_ParCSRMatrixTranspose(ams_data -> G, &G0t, 1);
      {
         HYPRE_Int i, j;
         HYPRE_Int nv = hypre_ParCSRMatrixNumCols(ams_data -> G);
         hypre_CSRMatrix *G0td = hypre_ParCSRMatrixDiag(G0t);
         HYPRE_Int *G0tdI = hypre_CSRMatrixI(G0td);
         double *G0tdA = hypre_CSRMatrixData(G0td);
         hypre_CSRMatrix *G0to = hypre_ParCSRMatrixOffd(G0t);
         HYPRE_Int *G0toI = hypre_CSRMatrixI(G0to);
         double *G0toA = hypre_CSRMatrixData(G0to);
         double *interior_nodes_data=hypre_VectorData(
            hypre_ParVectorLocalVector((hypre_ParVector*) ams_data -> interior_nodes));

         for (i = 0; i < nv; i++)
         {
            if (interior_nodes_data[i] != 1)
            {
               for (j = G0tdI[i]; j < G0tdI[i+1]; j++)
                  G0tdA[j] = 0.0;
               if (G0toI)
                  for (j = G0toI[i]; j < G0toI[i+1]; j++)
                     G0toA[j] = 0.0;
            }
         }
      }
      hypre_ParCSRMatrixTranspose(G0t, & ams_data -> G0, 1);

      /* Construct the subspace matrix A_G0 = G0^T G0 */
      ams_data -> A_G0 = hypre_ParMatmul(G0t, ams_data -> G0);
      hypre_ParCSRMatrixFixZeroRows(ams_data -> A_G0);

      /* Create AMG solver for A_G0 */
      HYPRE_BoomerAMGCreate(&ams_data -> B_G0);
      HYPRE_BoomerAMGSetCoarsenType(ams_data -> B_G0, ams_data -> B_G_coarsen_type);
      HYPRE_BoomerAMGSetAggNumLevels(ams_data -> B_G0, ams_data -> B_G_agg_levels);
      HYPRE_BoomerAMGSetRelaxType(ams_data -> B_G0, ams_data -> B_G_relax_type);
      HYPRE_BoomerAMGSetNumSweeps(ams_data -> B_G0, 1);
      HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_G0, 25);
      HYPRE_BoomerAMGSetTol(ams_data -> B_G0, 0.0);
      HYPRE_BoomerAMGSetMaxIter(ams_data -> B_G0, 3); /* use just a few V-cycles */
      HYPRE_BoomerAMGSetStrongThreshold(ams_data -> B_G0, ams_data -> B_G_theta);
      HYPRE_BoomerAMGSetInterpType(ams_data -> B_G0, ams_data -> B_G_interp_type);
      HYPRE_BoomerAMGSetPMaxElmts(ams_data -> B_G0, ams_data -> B_G_Pmax);
      HYPRE_BoomerAMGSetup(ams_data -> B_G0,
                           (HYPRE_ParCSRMatrix)ams_data -> A_G0,
                           0, 0);

      /* Construct the preconditioner for ams_data->A = A + G0 G0^T.
         NOTE: this can be optimized significantly by taking into account that
         the sparsity pattern of A is subset of the sparsity pattern of G0 G0^T */
      {
         hypre_ParCSRMatrix *A = hypre_ParMatmul(ams_data -> G0, G0t);
         hypre_ParCSRMatrix *B = Aorig;
         hypre_ParCSRMatrix **C_ptr = &ams_data -> A;

         hypre_ParCSRMatrix *C;
         hypre_CSRMatrix *A_local, *B_local, *C_local, *C_tmp;

         MPI_Comm comm = hypre_ParCSRMatrixComm(A);
         HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(A);
         HYPRE_Int global_num_cols = hypre_ParCSRMatrixGlobalNumCols(A);
         HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A);
         HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(A);
         HYPRE_Int A_num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
         HYPRE_Int A_num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(A));
         HYPRE_Int A_num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(A));
         HYPRE_Int B_num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(B));
         HYPRE_Int B_num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(B));
         HYPRE_Int B_num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(B));

         A_local = hypre_MergeDiagAndOffd(A);
         B_local = hypre_MergeDiagAndOffd(B);
         /* scale (penalize) G0 G0^T before adding it to the matrix */
         {
            HYPRE_Int i, nnz = hypre_CSRMatrixNumNonzeros(A_local);
            double *data = hypre_CSRMatrixData(A_local);
            double *dataB = hypre_CSRMatrixData(B_local);
            HYPRE_Int nnzB = hypre_CSRMatrixNumNonzeros(B_local);
            double factor, lfactor;
            lfactor = -1;
            for (i = 0; i < nnzB; i++)
               if (fabs(dataB[i]) > lfactor)
                  lfactor = fabs(dataB[i]);
            lfactor *= 1e-10; /* scaling factor: max|A_ij|*1e-10 */
            hypre_MPI_Allreduce(&lfactor, &factor, 1, hypre_MPI_DOUBLE, hypre_MPI_MAX,
                                hypre_ParCSRMatrixComm(A));
            for (i = 0; i < nnz; i++)
               data[i] *= factor;
         }
         C_tmp = hypre_CSRMatrixAdd(A_local, B_local);
         C_local = hypre_CSRMatrixDeleteZeros(C_tmp,0.0);
         if (C_local)
            hypre_CSRMatrixDestroy(C_tmp);
         else
            C_local = C_tmp;

         C = hypre_ParCSRMatrixCreate (comm,
                                       global_num_rows,
                                       global_num_cols,
                                       row_starts,
                                       col_starts,
                                       A_num_cols_offd + B_num_cols_offd,
                                       A_num_nonzeros_diag + B_num_nonzeros_diag,
                                       A_num_nonzeros_offd + B_num_nonzeros_offd);
         GenerateDiagAndOffd(C_local, C,
                             hypre_ParCSRMatrixFirstColDiag(A),
                             hypre_ParCSRMatrixLastColDiag(A));
         hypre_ParCSRMatrixOwnsRowStarts(C) = 0;
         hypre_ParCSRMatrixOwnsColStarts(C) = 1;
         hypre_ParCSRMatrixOwnsColStarts(G0t) = 0;

         hypre_CSRMatrixDestroy(A_local);
         hypre_CSRMatrixDestroy(B_local);
         hypre_CSRMatrixDestroy(C_local);

         hypre_ParCSRMatrixDestroy(A);

         *C_ptr = C;
      }

      hypre_ParCSRMatrixDestroy(G0t);
   }

   /* Make sure that the first entry in each row is the diagonal one. */
   /* hypre_CSRMatrixReorder(hypre_ParCSRMatrixDiag(ams_data -> A)); */

   /* Compute the l1 norm of the rows of A */
   if (ams_data -> A_relax_type >= 1 && ams_data -> A_relax_type <= 4)
      hypre_ParCSRComputeL1Norms(ams_data -> A, ams_data -> A_relax_type,
                                 NULL, &ams_data -> A_l1_norms);

   /* Chebyshev? */
   if (ams_data -> A_relax_type == 16)
   {
      hypre_ParCSRMaxEigEstimateCG(ams_data->A, 1, 10,
                                   &ams_data->A_max_eig_est,
                                   &ams_data->A_min_eig_est);
   }

   /* If not given, compute Gx, Gy and Gz */
   {
      if (ams_data -> x != NULL && ams_data -> y != NULL &&
          (ams_data -> dim == 2 || ams_data -> z != NULL))
         input_info = 1;

      if (ams_data -> Gx != NULL && ams_data -> Gy != NULL &&
          (ams_data -> dim == 2 || ams_data -> Gz != NULL))
         input_info = 2;

      if (input_info == 1)
      {
         ams_data -> Gx = hypre_ParVectorInRangeOf(ams_data -> G);
         hypre_ParCSRMatrixMatvec (1.0, ams_data -> G, ams_data -> x, 0.0, ams_data -> Gx);
         ams_data -> Gy = hypre_ParVectorInRangeOf(ams_data -> G);
         hypre_ParCSRMatrixMatvec (1.0, ams_data -> G, ams_data -> y, 0.0, ams_data -> Gy);
         if (ams_data -> dim == 3)
         {
            ams_data -> Gz = hypre_ParVectorInRangeOf(ams_data -> G);
            hypre_ParCSRMatrixMatvec (1.0, ams_data -> G, ams_data -> z, 0.0, ams_data -> Gz);
         }
      }
   }

   if (ams_data -> Pi == NULL && ams_data -> Pix == NULL)
   {
      if (ams_data -> cycle_type == 20)
         /* Construct the combined interpolation matrix [G,Pi] */
         hypre_AMSComputeGPi(ams_data -> A,
                             ams_data -> G,
                             ams_data -> Gx,
                             ams_data -> Gy,
                             ams_data -> Gz,
                             ams_data -> dim,
                             &ams_data -> Pi);
      else if (ams_data -> cycle_type > 10)
         /* Construct Pi{x,y,z} instead of Pi = [Pix,Piy,Piz] */
         hypre_AMSComputePixyz(ams_data -> A,
                               ams_data -> G,
                               ams_data -> Gx,
                               ams_data -> Gy,
                               ams_data -> Gz,
                               ams_data -> dim,
                               &ams_data -> Pix,
                               &ams_data -> Piy,
                               &ams_data -> Piz);
      else
         /* Construct the Pi interpolation matrix */
         hypre_AMSComputePi(ams_data -> A,
                            ams_data -> G,
                            ams_data -> Gx,
                            ams_data -> Gy,
                            ams_data -> Gz,
                            ams_data -> dim,
                            &ams_data -> Pi);
   }

   /* Keep Gx, Gy and Gz only if use the method with discrete divergence
      stabilization (where we use them to compute the local mesh size). */
   if (input_info == 1 && ams_data -> cycle_type != 9)
   {
      hypre_ParVectorDestroy(ams_data -> Gx);
      hypre_ParVectorDestroy(ams_data -> Gy);
      if (ams_data -> dim == 3)
         hypre_ParVectorDestroy(ams_data -> Gz);
   }

   /* Create the AMG solver on the range of G^T */
   if (!ams_data -> beta_is_zero && ams_data -> cycle_type != 20)
   {
      HYPRE_BoomerAMGCreate(&ams_data -> B_G);
      HYPRE_BoomerAMGSetCoarsenType(ams_data -> B_G, ams_data -> B_G_coarsen_type);
      HYPRE_BoomerAMGSetAggNumLevels(ams_data -> B_G, ams_data -> B_G_agg_levels);
      HYPRE_BoomerAMGSetRelaxType(ams_data -> B_G, ams_data -> B_G_relax_type);
      HYPRE_BoomerAMGSetNumSweeps(ams_data -> B_G, 1);
      HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_G, 25);
      HYPRE_BoomerAMGSetTol(ams_data -> B_G, 0.0);
      HYPRE_BoomerAMGSetMaxIter(ams_data -> B_G, 1);
      HYPRE_BoomerAMGSetStrongThreshold(ams_data -> B_G, ams_data -> B_G_theta);
      HYPRE_BoomerAMGSetInterpType(ams_data -> B_G, ams_data -> B_G_interp_type);
      HYPRE_BoomerAMGSetPMaxElmts(ams_data -> B_G, ams_data -> B_G_Pmax);

      if (ams_data -> cycle_type == 0)
         HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_G, 2);

      /* don't use exact solve on the coarsest level (matrix may be singular) */
      HYPRE_BoomerAMGSetCycleRelaxType(ams_data -> B_G,
                                       ams_data -> B_G_relax_type,
                                       3);

      /* If not given, construct the coarse space matrix by RAP */
      if (!ams_data -> A_G)
      {
         if (!hypre_ParCSRMatrixCommPkg(ams_data -> G))
            hypre_MatvecCommPkgCreate(ams_data -> G);

         if (!hypre_ParCSRMatrixCommPkg(ams_data -> A))
            hypre_MatvecCommPkgCreate(ams_data -> A);

         hypre_BoomerAMGBuildCoarseOperator(ams_data -> G,
                                            ams_data -> A,
                                            ams_data -> G,
                                            &ams_data -> A_G);

         /* Make sure that A_G has no zero rows (this can happen
            if beta is zero in part of the domain). */
         hypre_ParCSRMatrixFixZeroRows(ams_data -> A_G);

         hypre_ParCSRMatrixOwnsColStarts(ams_data -> G) = 1;
         hypre_ParCSRMatrixOwnsRowStarts(ams_data -> A_G) = 0;

         ams_data -> owns_A_G = 1;
      }

      HYPRE_BoomerAMGSetup(ams_data -> B_G,
                           (HYPRE_ParCSRMatrix)ams_data -> A_G,
                           0, 0);
   }

   if (ams_data -> cycle_type > 10 && ams_data -> cycle_type != 20)
   /* Create the AMG solvers on the range of Pi{x,y,z}^T */
   {
      HYPRE_BoomerAMGCreate(&ams_data -> B_Pix);
      HYPRE_BoomerAMGSetCoarsenType(ams_data -> B_Pix, ams_data -> B_Pi_coarsen_type);
      HYPRE_BoomerAMGSetAggNumLevels(ams_data -> B_Pix, ams_data -> B_Pi_agg_levels);
      HYPRE_BoomerAMGSetRelaxType(ams_data -> B_Pix, ams_data -> B_Pi_relax_type);
      HYPRE_BoomerAMGSetNumSweeps(ams_data -> B_Pix, 1);
      HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_Pix, 25);
      HYPRE_BoomerAMGSetTol(ams_data -> B_Pix, 0.0);
      HYPRE_BoomerAMGSetMaxIter(ams_data -> B_Pix, 1);
      HYPRE_BoomerAMGSetStrongThreshold(ams_data -> B_Pix, ams_data -> B_Pi_theta);
      HYPRE_BoomerAMGSetInterpType(ams_data -> B_Pix, ams_data -> B_Pi_interp_type);
      HYPRE_BoomerAMGSetPMaxElmts(ams_data -> B_Pix, ams_data -> B_Pi_Pmax);

      HYPRE_BoomerAMGCreate(&ams_data -> B_Piy);
      HYPRE_BoomerAMGSetCoarsenType(ams_data -> B_Piy, ams_data -> B_Pi_coarsen_type);
      HYPRE_BoomerAMGSetAggNumLevels(ams_data -> B_Piy, ams_data -> B_Pi_agg_levels);
      HYPRE_BoomerAMGSetRelaxType(ams_data -> B_Piy, ams_data -> B_Pi_relax_type);
      HYPRE_BoomerAMGSetNumSweeps(ams_data -> B_Piy, 1);
      HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_Piy, 25);
      HYPRE_BoomerAMGSetTol(ams_data -> B_Piy, 0.0);
      HYPRE_BoomerAMGSetMaxIter(ams_data -> B_Piy, 1);
      HYPRE_BoomerAMGSetStrongThreshold(ams_data -> B_Piy, ams_data -> B_Pi_theta);
      HYPRE_BoomerAMGSetInterpType(ams_data -> B_Piy, ams_data -> B_Pi_interp_type);
      HYPRE_BoomerAMGSetPMaxElmts(ams_data -> B_Piy, ams_data -> B_Pi_Pmax);

      HYPRE_BoomerAMGCreate(&ams_data -> B_Piz);
      HYPRE_BoomerAMGSetCoarsenType(ams_data -> B_Piz, ams_data -> B_Pi_coarsen_type);
      HYPRE_BoomerAMGSetAggNumLevels(ams_data -> B_Piz, ams_data -> B_Pi_agg_levels);
      HYPRE_BoomerAMGSetRelaxType(ams_data -> B_Piz, ams_data -> B_Pi_relax_type);
      HYPRE_BoomerAMGSetNumSweeps(ams_data -> B_Piz, 1);
      HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_Piz, 25);
      HYPRE_BoomerAMGSetTol(ams_data -> B_Piz, 0.0);
      HYPRE_BoomerAMGSetMaxIter(ams_data -> B_Piz, 1);
      HYPRE_BoomerAMGSetStrongThreshold(ams_data -> B_Piz, ams_data -> B_Pi_theta);
      HYPRE_BoomerAMGSetInterpType(ams_data -> B_Piz, ams_data -> B_Pi_interp_type);
      HYPRE_BoomerAMGSetPMaxElmts(ams_data -> B_Piz, ams_data -> B_Pi_Pmax);

      if (ams_data -> beta_is_zero)
      {
         /* don't use exact solve on the coarsest level (matrices may be singular) */
         HYPRE_BoomerAMGSetCycleRelaxType(ams_data -> B_Pix,
                                          ams_data -> B_Pi_relax_type, 3);
         HYPRE_BoomerAMGSetCycleRelaxType(ams_data -> B_Piy,
                                          ams_data -> B_Pi_relax_type, 3);
         HYPRE_BoomerAMGSetCycleRelaxType(ams_data -> B_Piz,
                                          ams_data -> B_Pi_relax_type, 3);
      }

      if (ams_data -> cycle_type == 0)
      {
         HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_Pix, 2);
         HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_Piy, 2);
         HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_Piz, 2);
      }

      /* Construct the coarse space matrices by RAP */
      if (!hypre_ParCSRMatrixCommPkg(ams_data -> Pix))
         hypre_MatvecCommPkgCreate(ams_data -> Pix);
      hypre_BoomerAMGBuildCoarseOperator(ams_data -> Pix,
                                         ams_data -> A,
                                         ams_data -> Pix,
                                         &ams_data -> A_Pix);
      hypre_ParCSRMatrixOwnsRowStarts(ams_data -> A_Pix) = 0;
      hypre_ParCSRMatrixOwnsColStarts(ams_data -> A_Pix) = 0;
      HYPRE_BoomerAMGSetup(ams_data -> B_Pix,
                           (HYPRE_ParCSRMatrix)ams_data -> A_Pix,
                           0, 0);

      if (!hypre_ParCSRMatrixCommPkg(ams_data -> Piy))
         hypre_MatvecCommPkgCreate(ams_data -> Piy);
      hypre_BoomerAMGBuildCoarseOperator(ams_data -> Piy,
                                         ams_data -> A,
                                         ams_data -> Piy,
                                         &ams_data -> A_Piy);
      hypre_ParCSRMatrixOwnsRowStarts(ams_data -> A_Piy) = 0;
      hypre_ParCSRMatrixOwnsColStarts(ams_data -> A_Piy) = 0;
      HYPRE_BoomerAMGSetup(ams_data -> B_Piy,
                           (HYPRE_ParCSRMatrix)ams_data -> A_Piy,
                           0, 0);

      if (ams_data -> Piz)
      {
         if (!hypre_ParCSRMatrixCommPkg(ams_data -> Piz))
            hypre_MatvecCommPkgCreate(ams_data -> Piz);
         hypre_BoomerAMGBuildCoarseOperator(ams_data -> Piz,
                                            ams_data -> A,
                                            ams_data -> Piz,
                                            &ams_data -> A_Piz);
         hypre_ParCSRMatrixOwnsRowStarts(ams_data -> A_Piz) = 0;
         hypre_ParCSRMatrixOwnsColStarts(ams_data -> A_Piz) = 0;
         HYPRE_BoomerAMGSetup(ams_data -> B_Piz,
                              (HYPRE_ParCSRMatrix)ams_data -> A_Piz,
                              0, 0);
      }
   }
   else
   /* Create the AMG solver on the range of Pi^T */
   {
      HYPRE_BoomerAMGCreate(&ams_data -> B_Pi);
      HYPRE_BoomerAMGSetCoarsenType(ams_data -> B_Pi, ams_data -> B_Pi_coarsen_type);
      HYPRE_BoomerAMGSetAggNumLevels(ams_data -> B_Pi, ams_data -> B_Pi_agg_levels);
      HYPRE_BoomerAMGSetRelaxType(ams_data -> B_Pi, ams_data -> B_Pi_relax_type);
      HYPRE_BoomerAMGSetNumSweeps(ams_data -> B_Pi, 1);
      HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_Pi, 25);
      HYPRE_BoomerAMGSetTol(ams_data -> B_Pi, 0.0);
      HYPRE_BoomerAMGSetMaxIter(ams_data -> B_Pi, 1);
      HYPRE_BoomerAMGSetStrongThreshold(ams_data -> B_Pi, ams_data -> B_Pi_theta);
      HYPRE_BoomerAMGSetInterpType(ams_data -> B_Pi, ams_data -> B_Pi_interp_type);
      HYPRE_BoomerAMGSetPMaxElmts(ams_data -> B_Pi, ams_data -> B_Pi_Pmax);

      if (ams_data -> cycle_type == 0)
         HYPRE_BoomerAMGSetMaxLevels(ams_data -> B_Pi, 2);

      /* don't use exact solve on the coarsest level (matrix may be singular) */
      HYPRE_BoomerAMGSetCycleRelaxType(ams_data -> B_Pi,
                                       ams_data -> B_Pi_relax_type,
                                       3);

      /* If not given, construct the coarse space matrix by RAP and
         notify BoomerAMG that this is a dim x dim block system. */
      if (!ams_data -> A_Pi)
      {
         if (!hypre_ParCSRMatrixCommPkg(ams_data -> Pi))
            hypre_MatvecCommPkgCreate(ams_data -> Pi);

         if (!hypre_ParCSRMatrixCommPkg(ams_data -> A))
            hypre_MatvecCommPkgCreate(ams_data -> A);

         if (ams_data -> cycle_type == 9)
         {
            /* Add a discrete divergence term to A before computing  Pi^t A Pi */
            {
               hypre_ParCSRMatrix *Gt, *GGt, *ApGGt;
               hypre_ParCSRMatrixTranspose(ams_data -> G, &Gt, 1);
               hypre_ParCSRMatrixOwnsColStarts(Gt) = 0;
               hypre_ParCSRMatrixOwnsRowStarts(Gt) = 0;

               /* scale GGt by h^2 */
               {
                  double h2;
                  HYPRE_Int i, j, k, ne;

                  hypre_CSRMatrix *Gt_diag = hypre_ParCSRMatrixDiag(Gt);
                  HYPRE_Int Gt_num_rows = hypre_CSRMatrixNumRows(Gt_diag);
                  HYPRE_Int *Gt_diag_I = hypre_CSRMatrixI(Gt_diag);
                  HYPRE_Int *Gt_diag_J = hypre_CSRMatrixJ(Gt_diag);
                  double *Gt_diag_data = hypre_CSRMatrixData(Gt_diag);

                  hypre_CSRMatrix *Gt_offd = hypre_ParCSRMatrixOffd(Gt);
                  HYPRE_Int *Gt_offd_I = hypre_CSRMatrixI(Gt_offd);
                  double *Gt_offd_data = hypre_CSRMatrixData(Gt_offd);

                  double *Gx_data = hypre_VectorData(hypre_ParVectorLocalVector(ams_data -> Gx));
                  double *Gy_data = hypre_VectorData(hypre_ParVectorLocalVector(ams_data -> Gy));
                  double *Gz_data = hypre_VectorData(hypre_ParVectorLocalVector(ams_data -> Gz));

                  for (i = 0; i < Gt_num_rows; i++)
                  {
                     /* determine the characteristic mesh size for vertex i */
                     h2 = 0.0;
                     ne = 0;
                     for (j = Gt_diag_I[i]; j < Gt_diag_I[i+1]; j++)
                     {
                        k = Gt_diag_J[j];
                        h2 += Gx_data[k]*Gx_data[k]+Gy_data[k]*Gy_data[k]+Gz_data[k]*Gz_data[k];
                        ne++;
                     }

                     if (ne != 0)
                     {
                        h2 /= ne;
                        for (j = Gt_diag_I[i]; j < Gt_diag_I[i+1]; j++)
                           Gt_diag_data[j] *= h2;
                        for (j = Gt_offd_I[i]; j < Gt_offd_I[i+1]; j++)
                           Gt_offd_data[j] *= h2;
                     }
                  }
               }

               /* we only needed Gx, Gy and Gz to compute the local mesh size */
               if (input_info == 1)
               {
                  hypre_ParVectorDestroy(ams_data -> Gx);
                  hypre_ParVectorDestroy(ams_data -> Gy);
                  if (ams_data -> dim == 3)
                     hypre_ParVectorDestroy(ams_data -> Gz);
               }

               GGt = hypre_ParMatmul(ams_data -> G, Gt);
               hypre_ParCSRMatrixDestroy(Gt);

               /* hypre_ParCSRMatrixAdd(GGt, A, &ams_data -> A); */
               {
                  hypre_ParCSRMatrix *A = GGt;
                  hypre_ParCSRMatrix *B = ams_data -> A;
                  hypre_ParCSRMatrix **C_ptr = &ApGGt;

                  hypre_ParCSRMatrix *C;
                  hypre_CSRMatrix *A_local, *B_local, *C_local;

                  MPI_Comm comm = hypre_ParCSRMatrixComm(A);
                  HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(A);
                  HYPRE_Int global_num_cols = hypre_ParCSRMatrixGlobalNumCols(A);
                  HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A);
                  HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(A);
                  HYPRE_Int A_num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
                  HYPRE_Int A_num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(A));
                  HYPRE_Int A_num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(A));
                  HYPRE_Int B_num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(B));
                  HYPRE_Int B_num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(B));
                  HYPRE_Int B_num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(B));

                  A_local = hypre_MergeDiagAndOffd(A);
                  B_local = hypre_MergeDiagAndOffd(B);
                  C_local = hypre_CSRMatrixAdd(A_local, B_local);

                  C = hypre_ParCSRMatrixCreate (comm,
                                                global_num_rows,
                                                global_num_cols,
                                                row_starts,
                                                col_starts,
                                                A_num_cols_offd + B_num_cols_offd,
                                                A_num_nonzeros_diag + B_num_nonzeros_diag,
                                                A_num_nonzeros_offd + B_num_nonzeros_offd);
                  GenerateDiagAndOffd(C_local, C,
                                      hypre_ParCSRMatrixFirstColDiag(A),
                                      hypre_ParCSRMatrixLastColDiag(A));
                  hypre_ParCSRMatrixOwnsRowStarts(C) = 0;
                  hypre_ParCSRMatrixOwnsColStarts(C) = 0;

                  hypre_CSRMatrixDestroy(A_local);
                  hypre_CSRMatrixDestroy(B_local);
                  hypre_CSRMatrixDestroy(C_local);

                  *C_ptr = C;
               }

               hypre_ParCSRMatrixDestroy(GGt);

               hypre_BoomerAMGBuildCoarseOperator(ams_data -> Pi,
                                                  ApGGt,
                                                  ams_data -> Pi,
                                                  &ams_data -> A_Pi);
            }
         }
         else
         {
            hypre_BoomerAMGBuildCoarseOperator(ams_data -> Pi,
                                               ams_data -> A,
                                               ams_data -> Pi,
                                               &ams_data -> A_Pi);
         }

         ams_data -> owns_A_Pi = 1;

         if (ams_data -> cycle_type != 20)
            HYPRE_BoomerAMGSetNumFunctions(ams_data -> B_Pi, ams_data -> dim);
         else
            HYPRE_BoomerAMGSetNumFunctions(ams_data -> B_Pi, ams_data -> dim + 1);
         /* HYPRE_BoomerAMGSetNodal(ams_data -> B_Pi, 1); */
      }

      HYPRE_BoomerAMGSetup(ams_data -> B_Pi,
                           (HYPRE_ParCSRMatrix)ams_data -> A_Pi,
                           0, 0);
   }

   /* Allocate temporary vectors */
   ams_data -> r0 = hypre_ParVectorInRangeOf(ams_data -> A);
   ams_data -> g0 = hypre_ParVectorInRangeOf(ams_data -> A);
   if (ams_data -> A_G)
   {
      ams_data -> r1 = hypre_ParVectorInRangeOf(ams_data -> A_G);
      ams_data -> g1 = hypre_ParVectorInRangeOf(ams_data -> A_G);
   }
   if (ams_data -> r1 == NULL && ams_data -> A_Pix)
   {
      ams_data -> r1 = hypre_ParVectorInRangeOf(ams_data -> A_Pix);
      ams_data -> g1 = hypre_ParVectorInRangeOf(ams_data -> A_Pix);
   }
   if (ams_data -> Pi)
   {
      ams_data -> r2 = hypre_ParVectorInDomainOf(ams_data -> Pi);
      ams_data -> g2 = hypre_ParVectorInDomainOf(ams_data -> Pi);
   }

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSSolve
 *
 * Solve the system A x = b.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSSolve(void *solver,
                         hypre_ParCSRMatrix *A,
                         hypre_ParVector *b,
                         hypre_ParVector *x)
{
   hypre_AMSData *ams_data = solver;

   HYPRE_Int i, my_id = -1;
   double r0_norm, r_norm, b_norm, relative_resid = 0, old_resid;

   char cycle[30];
   hypre_ParCSRMatrix *Ai[5], *Pi[5];
   HYPRE_Solver Bi[5];
   HYPRE_PtrToSolverFcn HBi[5];
   hypre_ParVector *ri[5], *gi[5];

   hypre_ParVector *z = NULL;

   Ai[0] = ams_data -> A_G;    Pi[0] = ams_data -> G;
   Ai[1] = ams_data -> A_Pi;   Pi[1] = ams_data -> Pi;
   Ai[2] = ams_data -> A_Pix;  Pi[2] = ams_data -> Pix;
   Ai[3] = ams_data -> A_Piy;  Pi[3] = ams_data -> Piy;
   Ai[4] = ams_data -> A_Piz;  Pi[4] = ams_data -> Piz;

   Bi[0] = ams_data -> B_G;    HBi[0] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;
   Bi[1] = ams_data -> B_Pi;   HBi[1] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGBlockSolve;
   Bi[2] = ams_data -> B_Pix;  HBi[2] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;
   Bi[3] = ams_data -> B_Piy;  HBi[3] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;
   Bi[4] = ams_data -> B_Piz;  HBi[4] = (HYPRE_PtrToSolverFcn) hypre_BoomerAMGSolve;

   ri[0] = ams_data -> r1;     gi[0] = ams_data -> g1;
   ri[1] = ams_data -> r2;     gi[1] = ams_data -> g2;
   ri[2] = ams_data -> r1;     gi[2] = ams_data -> g1;
   ri[3] = ams_data -> r1;     gi[3] = ams_data -> g1;
   ri[4] = ams_data -> r1;     gi[4] = ams_data -> g1;

   /* may need to create an additional temporary vector for relaxation */
   if (hypre_NumThreads() > 1 ||  ams_data -> A_relax_type == 16)
   {
      z = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A),
                                hypre_ParCSRMatrixGlobalNumRows(A),
                                hypre_ParCSRMatrixRowStarts(A));
      hypre_ParVectorInitialize(z);
      hypre_ParVectorSetPartitioningOwner(z,0);
   }


   if (ams_data -> print_level > 0)
      hypre_MPI_Comm_rank(hypre_ParCSRMatrixComm(A), &my_id);

   /* Compatible subspace projection for problems with zero-conductivity regions.
      Note that this modifies the input (r.h.s.) vector b! */
   if ( (ams_data -> B_G0) &&
        (++ams_data->solve_counter % ( ams_data -> projection_frequency ) == 0) )
   {
      /* hypre_printf("Projecting onto the compatible subspace...\n"); */
      hypre_AMSProjectOutGradients(ams_data, b);
   }

   if (ams_data -> beta_is_zero)
   {
      switch (ams_data -> cycle_type)
      {
         case 0:
            hypre_sprintf(cycle,"%s","0");
            break;
         case 1:
         case 3:
         case 5:
         case 7:
         default:
            hypre_sprintf(cycle,"%s","020");
            break;
         case 2:
         case 4:
         case 6:
         case 8:
            hypre_sprintf(cycle,"%s","(0+2)");
            break;
         case 11:
         case 13:
            hypre_sprintf(cycle,"%s","0345430");
            break;
         case 12:
            hypre_sprintf(cycle,"%s","(0+3+4+5)");
            break;
         case 14:
            hypre_sprintf(cycle,"%s","0(+3+4+5)0");
            break;
      }
   }
   else
   {
      switch (ams_data -> cycle_type)
      {
         case 0:
            hypre_sprintf(cycle,"%s","010");
            break;
         case 1:
         default:
            hypre_sprintf(cycle,"%s","01210");
            break;
         case 2:
            hypre_sprintf(cycle,"%s","(0+1+2)");
            break;
         case 3:
            hypre_sprintf(cycle,"%s","02120");
            break;
         case 4:
            hypre_sprintf(cycle,"%s","(010+2)");
            break;
         case 5:
            hypre_sprintf(cycle,"%s","0102010");
            break;
         case 6:
            hypre_sprintf(cycle,"%s","(020+1)");
            break;
         case 7:
            hypre_sprintf(cycle,"%s","0201020");
            break;
         case 8:
            hypre_sprintf(cycle,"%s","0(+1+2)0");
            break;
         case 9:
            hypre_sprintf(cycle,"%s","01210");
            break;
         case 11:
            hypre_sprintf(cycle,"%s","013454310");
            break;
         case 12:
            hypre_sprintf(cycle,"%s","(0+1+3+4+5)");
            break;
         case 13:
            hypre_sprintf(cycle,"%s","034515430");
            break;
         case 14:
            hypre_sprintf(cycle,"%s","01(+3+4+5)10");
            break;
         case 20:
            hypre_sprintf(cycle,"%s","020");
            break;
      }
   }

   for (i = 0; i < ams_data -> maxit; i++)
   {
      /* Compute initial residual norms */
      if (ams_data -> maxit > 1 && i == 0)
      {
         hypre_ParVectorCopy(b, ams_data -> r0);
         hypre_ParCSRMatrixMatvec(-1.0, ams_data -> A, x, 1.0, ams_data -> r0);
         r_norm = sqrt(hypre_ParVectorInnerProd(ams_data -> r0,ams_data -> r0));
         r0_norm = r_norm;
         b_norm = sqrt(hypre_ParVectorInnerProd(b, b));
         if (b_norm)
            relative_resid = r_norm / b_norm;
         else
            relative_resid = r_norm;
         if (my_id == 0 && ams_data -> print_level > 0)
         {
            hypre_printf("                                            relative\n");
            hypre_printf("               residual        factor       residual\n");
            hypre_printf("               --------        ------       --------\n");
            hypre_printf("    Initial    %e                 %e\n",
                         r_norm, relative_resid);
         }
      }

      /* Apply the preconditioner */
      hypre_ParCSRSubspacePrec(ams_data -> A,
                               ams_data -> A_relax_type,
                               ams_data -> A_relax_times,
                               ams_data -> A_l1_norms,
                               ams_data -> A_relax_weight,
                               ams_data -> A_omega,
                               ams_data -> A_max_eig_est,
                               ams_data -> A_min_eig_est,
                               ams_data -> A_cheby_order,
                               ams_data -> A_cheby_fraction,
                               Ai, Bi, HBi, Pi, ri, gi,
                               b, x,
                               ams_data -> r0,
                               ams_data -> g0,
                               cycle,
                               z);

      /* Compute new residual norms */
      if (ams_data -> maxit > 1)
      {
         old_resid = r_norm;
         hypre_ParVectorCopy(b, ams_data -> r0);
         hypre_ParCSRMatrixMatvec(-1.0, ams_data -> A, x, 1.0, ams_data -> r0);
         r_norm = sqrt(hypre_ParVectorInnerProd(ams_data -> r0,ams_data -> r0));
         if (b_norm)
            relative_resid = r_norm / b_norm;
         else
            relative_resid = r_norm;
         if (my_id == 0 && ams_data -> print_level > 0)
            hypre_printf("    Cycle %2d   %e    %f     %e \n",
                         i+1, r_norm, r_norm / old_resid, relative_resid);
      }

      if (relative_resid < ams_data -> tol)
      {
         i++;
         break;
      }
   }

   if (my_id == 0 && ams_data -> print_level > 0 && ams_data -> maxit > 1)
      hypre_printf("\n\n Average Convergence Factor = %f\n\n",
                   pow((r_norm/r0_norm),(1.0/(double) i)));

   ams_data -> num_iterations = i;
   ams_data -> rel_resid_norm = relative_resid;

   if (ams_data -> num_iterations == ams_data -> maxit && ams_data -> tol > 0.0)
      hypre_error(HYPRE_ERROR_CONV);

   if (z)
      hypre_ParVectorDestroy(z);

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_ParCSRSubspacePrec
 *
 * General subspace preconditioner for A0 y = x, based on ParCSR storage.
 *
 * P[i] and A[i] are the interpolation and coarse grid matrices for
 * the (i+1)'th subspace. B[i] is an AMG solver for A[i]. r[i] and g[i]
 * are temporary vectors. A0_* are the fine grid smoothing parameters.
 *
 * The default mode is multiplicative, '+' changes the next correction
 * to additive, based on residual computed at '('.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_ParCSRSubspacePrec(/* fine space matrix */
                                   hypre_ParCSRMatrix *A0,
                                   /* relaxation parameters */
                                   HYPRE_Int A0_relax_type,
                                   HYPRE_Int A0_relax_times,
                                   double *A0_l1_norms,
                                   double A0_relax_weight,
                                   double A0_omega,
                                   double A0_max_eig_est,
                                   double A0_min_eig_est,
                                   HYPRE_Int A0_cheby_order,
                                   double A0_cheby_fraction,
                                   /* subspace matrices */
                                   hypre_ParCSRMatrix **A,
                                   /* subspace preconditioners */
                                   HYPRE_Solver *B,
                                   /* hypre solver functions for B */
                                   HYPRE_PtrToSolverFcn *HB,
                                   /* subspace interpolations */
                                   hypre_ParCSRMatrix **P,
                                   /* temporary subspace vectors */
                                   hypre_ParVector **r,
                                   hypre_ParVector **g,
                                   /* right-hand side */
                                   hypre_ParVector *x,
                                   /* current approximation */
                                   hypre_ParVector *y,
                                   /* current residual */
                                   hypre_ParVector *r0,
                                   /* temporary vector */
                                   hypre_ParVector *g0,
                                   char *cycle,
                                   /* temporary vector */
                                   hypre_ParVector *z)
{
   char *op;
   HYPRE_Int use_saved_residual = 0;

   for (op = cycle; *op != '\0'; op++)
   {
      /* do nothing */
      if (*op == ')')
         continue;

      /* compute the residual: r = x - Ay */
      else if (*op == '(')
      {
         hypre_ParVectorCopy(x,r0);
         hypre_ParCSRMatrixMatvec(-1.0, A0, y, 1.0, r0);
      }

      /* switch to additive correction */
      else if (*op == '+')
      {
         use_saved_residual = 1;
         continue;
      }

      /* smooth: y += S (x - Ay) */
      else if (*op == '0')
      {
         hypre_ParCSRRelax(A0, x,
                           A0_relax_type,
                           A0_relax_times,
                           A0_l1_norms,
                           A0_relax_weight,
                           A0_omega,
                           A0_max_eig_est,
                           A0_min_eig_est,
                           A0_cheby_order,
                           A0_cheby_fraction,
                           y, g0, z);
      }

      /* subspace correction: y += P B^{-1} P^t r */
      else
      {
         HYPRE_Int i = *op - '1';
         if (i < 0)
            hypre_error_in_arg(16);

         /* skip empty subspaces */
         if (!A[i]) continue;

         /* compute the residual? */
         if (use_saved_residual)
         {
            use_saved_residual = 0;
            hypre_ParCSRMatrixMatvecT(1.0, P[i], r0, 0.0, r[i]);
         }
         else
         {
            hypre_ParVectorCopy(x,g0);
            hypre_ParCSRMatrixMatvec(-1.0, A0, y, 1.0, g0);
            hypre_ParCSRMatrixMatvecT(1.0, P[i], g0, 0.0, r[i]);
         }

         hypre_ParVectorSetConstantValues(g[i], 0.0);
         (*HB[i]) (B[i], (HYPRE_Matrix)A[i],
                   (HYPRE_Vector)r[i], (HYPRE_Vector)g[i]);
         hypre_ParCSRMatrixMatvec(1.0, P[i], g[i], 0.0, g0);
         hypre_ParVectorAxpy(1.0, g0, y);
      }
   }

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSGetNumIterations
 *
 * Get the number of AMS iterations.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSGetNumIterations(void *solver,
                                    HYPRE_Int *num_iterations)
{
   hypre_AMSData *ams_data = solver;
   *num_iterations = ams_data -> num_iterations;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSGetFinalRelativeResidualNorm
 *
 * Get the final relative residual norm in AMS.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSGetFinalRelativeResidualNorm(void *solver,
                                                double *rel_resid_norm)
{
   hypre_AMSData *ams_data = solver;
   *rel_resid_norm = ams_data -> rel_resid_norm;
   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSProjectOutGradients
 *
 * For problems with zero-conductivity regions, project the vector onto the
 * compatible subspace: x = (I - G0 (G0^t G0)^{-1} G0^T) x, where G0 is the
 * discrete gradient restricted to the interior nodes of the regions with
 * zero conductivity. This ensures that x is orthogonal to the gradients in
 * the range of G0.
 *
 * This function is typically called after the solution iteration is complete,
 * in order to facilitate the visualization of the computed field. Without it
 * the values in the zero-conductivity regions contain kernel components.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSProjectOutGradients(void *solver,
                                       hypre_ParVector *x)
{
   hypre_AMSData *ams_data = solver;

   if (ams_data -> B_G0)
   {
      hypre_ParCSRMatrixMatvecT(1.0, ams_data -> G0, x, 0.0, ams_data -> r1);
      hypre_ParVectorSetConstantValues(ams_data -> g1, 0.0);
      hypre_BoomerAMGSolve(ams_data -> B_G0, ams_data -> A_G0, ams_data -> r1, ams_data -> g1);
      hypre_ParCSRMatrixMatvec(1.0, ams_data -> G0, ams_data -> g1, 0.0, ams_data -> g0);
      hypre_ParVectorAxpy(-1.0, ams_data -> g0, x);
   }

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSConstructDiscreteGradient
 *
 * Construct and return the lowest-order discrete gradient matrix G, based on:
 * - a matrix on the egdes (e.g. the stiffness matrix A)
 * - a vector on the vertices (e.g. the x coordinates)
 * - the array edge_vertex, which lists the global indexes of the
 *   vertices of the local edges.
 *
 * We assume that edge_vertex lists the edge vertices consecutively,
 * and that the orientation of all edges is consistent. More specificaly:
 * If edge_orientation = 1, the edges are already oriented.
 * If edge_orientation = 2, the orientation of edge i depends only on the
 *                          sign of edge_vertex[2*i+1] - edge_vertex[2*i].
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSConstructDiscreteGradient(hypre_ParCSRMatrix *A,
                                             hypre_ParVector *x_coord,
                                             HYPRE_Int *edge_vertex,
                                             HYPRE_Int edge_orientation,
                                             hypre_ParCSRMatrix **G_ptr)
{
   hypre_ParCSRMatrix *G;

   HYPRE_Int nedges;

   nedges = hypre_ParCSRMatrixNumRows(A);

   /* Construct the local part of G based on edge_vertex and the edge
      and vertex partitionings from A and x_coord */
   {
      HYPRE_Int i, *I = hypre_CTAlloc(HYPRE_Int, nedges+1);
      HYPRE_Int part_size, *row_starts, *col_starts;
      double *data = hypre_CTAlloc(double, 2*nedges);
      hypre_CSRMatrix *local = hypre_CSRMatrixCreate (nedges,
                                                      hypre_ParVectorGlobalSize(x_coord),
                                                      2*nedges);

      for (i = 0; i <= nedges; i++)
         I[i] = 2*i;

      if (edge_orientation == 1)
      {
         /* Assume that the edges are already oriented */
         for (i = 0; i < 2*nedges; i+=2)
         {
            data[i]   = -1.0;
            data[i+1] =  1.0;
         }
      }
      else if (edge_orientation == 2)
      {
         /* Assume that the edge orientation is based on the vertex indexes */
         for (i = 0; i < 2*nedges; i+=2)
         {
            if (edge_vertex[i] < edge_vertex[i+1])
            {
               data[i]   = -1.0;
               data[i+1] =  1.0;
            }
            else
            {
               data[i]   =  1.0;
               data[i+1] = -1.0;
            }
         }
      }
      else
         hypre_error_in_arg(4);


      hypre_CSRMatrixI(local) = I;
      hypre_CSRMatrixJ(local) = edge_vertex;
      hypre_CSRMatrixData(local) = data;

      hypre_CSRMatrixRownnz(local) = NULL;
      hypre_CSRMatrixOwnsData(local) = 1;
      hypre_CSRMatrixNumRownnz(local) = nedges;

      /* Copy partitioning from A and x_coord (previously they were re-used) */
#ifdef HYPRE_NO_GLOBAL_PARTITION
      part_size = 2;
#else
      hypre_MPI_Comm_size(hypre_ParCSRMatrixComm(A), &part_size);
      part_size++;
#endif
      row_starts = hypre_TAlloc(HYPRE_Int,part_size);
      col_starts = hypre_TAlloc(HYPRE_Int,part_size);
      for (i = 0; i < part_size; i++)
      {
         row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i];
         col_starts[i] = hypre_ParVectorPartitioning(x_coord)[i];
      }

      /* Generate the discrete gradient matrix */
      G = hypre_ParCSRMatrixCreate(hypre_ParCSRMatrixComm(A),
                                   hypre_ParCSRMatrixGlobalNumRows(A),
                                   hypre_ParVectorGlobalSize(x_coord),
                                   row_starts, col_starts, 0, 0, 0);
      hypre_ParCSRMatrixOwnsRowStarts(G) = 1;
      hypre_ParCSRMatrixOwnsColStarts(G) = 1;

      GenerateDiagAndOffd(local, G,
                          hypre_ParVectorFirstIndex(x_coord),
                          hypre_ParVectorLastIndex(x_coord));


      /* Account for empty rows in G. These may appear when A includes only
         the interior (non-Dirichlet b.c.) edges. */
      {
         hypre_CSRMatrix *G_diag = hypre_ParCSRMatrixDiag(G);
         G_diag->num_cols = hypre_VectorSize(hypre_ParVectorLocalVector(x_coord));
      }

      /* Free the local matrix */
      hypre_CSRMatrixJ(local) = NULL;
      hypre_CSRMatrixDestroy(local);
   }

   *G_ptr = G;

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSFEISetup
 *
 * Construct an AMS solver object based on the following data:
 *
 *    A              - the edge element stiffness matrix
 *    num_vert       - number of vertices (nodes) in the processor
 *    num_local_vert - number of vertices owned by the processor
 *    vert_number    - global indexes of the vertices in the processor
 *    vert_coord     - coordinates of the vertices in the processor
 *    num_edges      - number of edges owned by the processor
 *    edge_vertex    - the vertices of the edges owned by the processor.
 *                     Vertices are in local numbering (the same as in
 *                     vert_number), and edge orientation is always from
 *                     the first to the second vertex.
 *
 * Here we distinguish between vertices that belong to elements in the
 * current processor, and the subset of these vertices that is owned by
 * the processor.
 *
 * This function is written specifically for input from the FEI and should
 * be called before hypre_AMSSetup().
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSFEISetup(void *solver,
                            hypre_ParCSRMatrix *A,
                            hypre_ParVector *b,
                            hypre_ParVector *x,
                            HYPRE_Int num_vert,
                            HYPRE_Int num_local_vert,
                            HYPRE_Int *vert_number,
                            double *vert_coord,
                            HYPRE_Int num_edges,
                            HYPRE_Int *edge_vertex)
{
   hypre_AMSData *ams_data = solver;

   HYPRE_Int i, j;

   hypre_ParCSRMatrix *G;
   hypre_ParVector *x_coord, *y_coord, *z_coord;
   double *x_data, *y_data, *z_data;

   MPI_Comm comm = hypre_ParCSRMatrixComm(A);
   HYPRE_Int *vert_part, num_global_vert;
   HYPRE_Int vert_start, vert_end;

   /* Find the processor partitioning of the vertices */
#ifdef HYPRE_NO_GLOBAL_PARTITION
   vert_part = hypre_TAlloc(HYPRE_Int,2);
   hypre_MPI_Scan(&num_local_vert, &vert_part[1], 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
   vert_part[0] = vert_part[1] - num_local_vert;
   hypre_MPI_Allreduce(&num_local_vert, &num_global_vert, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
#else
   HYPRE_Int num_procs;
   hypre_MPI_Comm_size(comm, &num_procs);
   vert_part = hypre_TAlloc(HYPRE_Int,num_procs+1);
   hypre_MPI_Allgather(&num_local_vert, 1, HYPRE_MPI_INT, &vert_part[1], 1, HYPRE_MPI_INT, comm);
   vert_part[0] = 0;
   for (i = 0; i < num_procs; i++)
      vert_part[i+1] += vert_part[i];
   num_global_vert = vert_part[num_procs];
#endif

   /* Construct hypre parallel vectors for the vertex coordinates */
   x_coord = hypre_ParVectorCreate(comm, num_global_vert, vert_part);
   hypre_ParVectorInitialize(x_coord);
   hypre_ParVectorOwnsData(x_coord) = 1;
   hypre_ParVectorOwnsPartitioning(x_coord) = 0;
   x_data = hypre_VectorData(hypre_ParVectorLocalVector(x_coord));

   y_coord = hypre_ParVectorCreate(comm, num_global_vert, vert_part);
   hypre_ParVectorInitialize(y_coord);
   hypre_ParVectorOwnsData(y_coord) = 1;
   hypre_ParVectorOwnsPartitioning(y_coord) = 0;
   y_data = hypre_VectorData(hypre_ParVectorLocalVector(y_coord));

   z_coord = hypre_ParVectorCreate(comm, num_global_vert, vert_part);
   hypre_ParVectorInitialize(z_coord);
   hypre_ParVectorOwnsData(z_coord) = 1;
   hypre_ParVectorOwnsPartitioning(z_coord) = 0;
   z_data = hypre_VectorData(hypre_ParVectorLocalVector(z_coord));

   vert_start = hypre_ParVectorFirstIndex(x_coord);
   vert_end   = hypre_ParVectorLastIndex(x_coord);

   /* Save coordinates of locally owned vertices */
   for (i = 0; i < num_vert; i++)
   {
      if (vert_number[i] >= vert_start && vert_number[i] <= vert_end)
      {
         j = vert_number[i] - vert_start;
         x_data[j] = vert_coord[3*i];
         y_data[j] = vert_coord[3*i+1];
         z_data[j] = vert_coord[3*i+2];
      }
   }

   /* Change vertex numbers from local to global */
   for (i = 0; i < 2*num_edges; i++)
      edge_vertex[i] = vert_number[edge_vertex[i]];

   /* Construct the local part of G based on edge_vertex */
   {
      /* HYPRE_Int num_edges = hypre_ParCSRMatrixNumRows(A); */
      HYPRE_Int *I = hypre_CTAlloc(HYPRE_Int, num_edges+1);
      double *data = hypre_CTAlloc(double, 2*num_edges);
      hypre_CSRMatrix *local = hypre_CSRMatrixCreate (num_edges,
                                                      num_global_vert,
                                                      2*num_edges);

      for (i = 0; i <= num_edges; i++)
         I[i] = 2*i;

      /* Assume that the edge orientation is based on the vertex indexes */
      for (i = 0; i < 2*num_edges; i+=2)
      {
         data[i]   =  1.0;
         data[i+1] = -1.0;
      }

      hypre_CSRMatrixI(local) = I;
      hypre_CSRMatrixJ(local) = edge_vertex;
      hypre_CSRMatrixData(local) = data;

      hypre_CSRMatrixRownnz(local) = NULL;
      hypre_CSRMatrixOwnsData(local) = 1;
      hypre_CSRMatrixNumRownnz(local) = num_edges;

      G = hypre_ParCSRMatrixCreate(comm,
                                   hypre_ParCSRMatrixGlobalNumRows(A),
                                   num_global_vert,
                                   hypre_ParCSRMatrixRowStarts(A),
                                   vert_part,
                                   0, 0, 0);
      hypre_ParCSRMatrixOwnsRowStarts(G) = 0;
      hypre_ParCSRMatrixOwnsColStarts(G) = 1;

      GenerateDiagAndOffd(local, G, vert_start, vert_end);

      hypre_CSRMatrixJ(local) = NULL;
      hypre_CSRMatrixDestroy(local);
   }

   ams_data -> G = G;

   ams_data -> x = x_coord;
   ams_data -> y = y_coord;
   ams_data -> z = z_coord;

   return hypre_error_flag;
}

/*--------------------------------------------------------------------------
 * hypre_AMSFEIDestroy
 *
 * Free the additional memory allocated in hypre_AMSFEISetup().
 *
 * This function is written specifically for input from the FEI and should
 * be called before hypre_AMSDestroy().
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_AMSFEIDestroy(void *solver)
{
   hypre_AMSData *ams_data = solver;

   if (ams_data -> G)
      hypre_ParCSRMatrixDestroy(ams_data -> G);

   if (ams_data -> x)
      hypre_ParVectorDestroy(ams_data -> x);
   if (ams_data -> y)
      hypre_ParVectorDestroy(ams_data -> y);
   if (ams_data -> z)
      hypre_ParVectorDestroy(ams_data -> z);

   return hypre_error_flag;
}
/*--------------------------------------------------------------------------
 * hypre_ParCSRComputeL1Norms Threads
 *
 * Compute the l1 norms of the rows of a given matrix, depending on
 * the option parameter:
 *
 * option 1 = Compute the l1 norm of the rows
 * option 2 = Compute the l1 norm of the (processor) off-diagonal
 *            part of the rows plus the diagonal of A
 * option 3 = Compute the l2 norm^2 of the rows
 * option 4 = Truncated version of option 2 based on Remark 6.2 in "Multigrid
 *            Smoothers for Ultra-Parallel Computing"
 *
 * The above computations are done in a CF manner, whenever the provided
 * cf_marker is not NULL.
 *--------------------------------------------------------------------------*/

HYPRE_Int hypre_ParCSRComputeL1NormsThreads(hypre_ParCSRMatrix *A,
                                            HYPRE_Int option,
                                            HYPRE_Int num_threads,
                                            HYPRE_Int *cf_marker,
                                            double **l1_norm_ptr)
{
   HYPRE_Int i, j, k;
   HYPRE_Int num_rows = hypre_ParCSRMatrixNumRows(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   HYPRE_Int *A_diag_I = hypre_CSRMatrixI(A_diag);
   HYPRE_Int *A_diag_J = hypre_CSRMatrixJ(A_diag);
   double *A_diag_data = hypre_CSRMatrixData(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int *A_offd_I = hypre_CSRMatrixI(A_offd);
   HYPRE_Int *A_offd_J = hypre_CSRMatrixJ(A_offd);
   double *A_offd_data = hypre_CSRMatrixData(A_offd);
   HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(A_offd);

   double diag;
   double *l1_norm = hypre_CTAlloc(double, num_rows);
   HYPRE_Int ii, ns, ne, rest, size;

   HYPRE_Int *cf_marker_offd = NULL;
   HYPRE_Int cf_diag;

   /* collect the cf marker data from other procs */
   if (cf_marker != NULL)
   {
      HYPRE_Int index;
      HYPRE_Int num_sends;
      HYPRE_Int start;
      HYPRE_Int *int_buf_data = NULL;

      hypre_ParCSRCommPkg  *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
      hypre_ParCSRCommHandle *comm_handle;

      if (num_cols_offd)
         cf_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      if (hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends))
         int_buf_data = hypre_CTAlloc(HYPRE_Int,
                                      hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));
      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++)
         {
            int_buf_data[index++] = cf_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
         }
      }
      comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data,
                                                 cf_marker_offd);
      hypre_ParCSRCommHandleDestroy(comm_handle);
      hypre_TFree(int_buf_data);
   }

#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i,ii,j,k,ns,ne,rest,size,diag) HYPRE_SMP_SCHEDULE
#endif
   for (k = 0; k < num_threads; k++)
   {
      size = num_rows/num_threads;
      rest = num_rows - size*num_threads;
      if (k < rest)
      {
         ns = k*size+k;
         ne = (k+1)*size+k+1;
      }
      else
      {
         ns = k*size+rest;
         ne = (k+1)*size+rest;
      }

      if (option == 1)
      {
         for (i = ns; i < ne; i++)
         {
            l1_norm[i] = 0.0;
            if (cf_marker == NULL)
            {
               /* Add the l1 norm of the diag part of the ith row */
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
                  l1_norm[i] += fabs(A_diag_data[j]);
               /* Add the l1 norm of the offd part of the ith row */
               if (num_cols_offd)
               {
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     l1_norm[i] += fabs(A_offd_data[j]);
               }
            }
            else
            {
               cf_diag = cf_marker[i];
               /* Add the CF l1 norm of the diag part of the ith row */
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
                  if (cf_diag == cf_marker[A_diag_J[j]])
                     l1_norm[i] += fabs(A_diag_data[j]);
               /* Add the CF l1 norm of the offd part of the ith row */
               if (num_cols_offd)
               {
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     if (cf_diag == cf_marker_offd[A_offd_J[j]])
                        l1_norm[i] += fabs(A_offd_data[j]);
               }
            }
         }
      }
      else if (option == 2)
      {
         for (i = ns; i < ne; i++)
         {
            l1_norm[i] = 0.0;
            if (cf_marker == NULL)
            {
               /* Add the diagonal and the local off-thread part of the ith row */
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               {
                  ii = A_diag_J[j];
                  if (ii == i || ii < ns || ii >= ne)
                     l1_norm[i] += fabs(A_diag_data[j]);
               }
               /* Add the l1 norm of the offd part of the ith row */
               if (num_cols_offd)
               {
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     l1_norm[i] += fabs(A_offd_data[j]);
               }
            }
            else
            {
               cf_diag = cf_marker[i];
               /* Add the diagonal and the local off-thread part of the ith row */
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               {
                  ii = A_diag_J[j];
                  if ((ii == i || ii < ns || ii >= ne) &&
                      (cf_diag == cf_marker_offd[A_offd_J[j]]))
                     l1_norm[i] += fabs(A_diag_data[j]);
               }
               /* Add the CF l1 norm of the offd part of the ith row */
               if (num_cols_offd)
               {
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     if (cf_diag == cf_marker_offd[A_offd_J[j]])
                        l1_norm[i] += fabs(A_offd_data[j]);
               }
            }
         }
      }
      else if (option == 3)
      {
         for (i = ns; i < ne; i++)
         {
            l1_norm[i] = 0.0;
            for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               l1_norm[i] += A_diag_data[j] * A_diag_data[j];
            if (num_cols_offd)
               for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                  l1_norm[i] += A_offd_data[j] * A_offd_data[j];
         }
      }
      else if (option == 4)
      {
         for (i = ns; i < ne; i++)
         {
            l1_norm[i] = 0.0;
            if (cf_marker == NULL)
            {
               /* Add the diagonal and the local off-thread part of the ith row */
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               {
                  ii = A_diag_J[j];
                  if (ii == i || ii < ns || ii >= ne)
                  {
                     if (ii == i)
                     {
                        diag = fabs(A_diag_data[j]);
                        l1_norm[i] += fabs(A_diag_data[j]);
                     }
                     else
                        l1_norm[i] += 0.5*fabs(A_diag_data[j]);
                  }
               }
               /* Add the l1 norm of the offd part of the ith row */
               if (num_cols_offd)
               {
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     l1_norm[i] += 0.5*fabs(A_offd_data[j]);
               }
            }
            else
            {
               cf_diag = cf_marker[i];
               /* Add the diagonal and the local off-thread part of the ith row */
               for (j = A_diag_I[i]; j < A_diag_I[i+1]; j++)
               {
                  ii = A_diag_J[j];
                  if ((ii == i || ii < ns || ii >= ne) &&
                      (cf_diag == cf_marker_offd[A_offd_J[j]]))
                  {
                     if (ii == i)
                     {
                        diag = fabs(A_diag_data[j]);
                        l1_norm[i] += fabs(A_diag_data[j]);
                     }
                     else
                        l1_norm[i] += 0.5*fabs(A_diag_data[j]);
                  }
               }
               /* Add the CF l1 norm of the offd part of the ith row */
               if (num_cols_offd)
               {
                  for (j = A_offd_I[i]; j < A_offd_I[i+1]; j++)
                     if (cf_diag == cf_marker_offd[A_offd_J[j]])
                        l1_norm[i] += 0.5*fabs(A_offd_data[j]);
               }
            }

            /* Truncate according to Remark 6.2 */
            if (l1_norm[i] <= 4.0/3.0*diag)
               l1_norm[i] = diag;
         }
      }

      /* Handle negative definite matrices */
      for (i = ns; i < ne; i++)
         if (A_diag_data[A_diag_I[i]] < 0)
            l1_norm[i] = -l1_norm[i];

      for (i = ns; i < ne; i++)
         if (fabs(l1_norm[i]) < DBL_EPSILON)
         {
            hypre_error_in_arg(1);
            break;
         }

   }

   hypre_TFree(cf_marker_offd);

   *l1_norm_ptr = l1_norm;

   return hypre_error_flag;
}


/*--------------------------------------------------------------------------
 * hypre_ParCSRRelaxThreads
 * 1 = l1-scaled Jacobi
 * 2 = l1-scaled block Gauss-Seidel/SSOR
 *--------------------------------------------------------------------------*/
HYPRE_Int  hypre_ParCSRRelaxThreads(hypre_ParCSRMatrix *A,
                                    hypre_ParVector    *f,
                                    HYPRE_Int           relax_type,
                                    HYPRE_Int           relax_times,
                                    double             *l1_norms,
                                    double              relax_weight,
                                    double              omega,
                                    hypre_ParVector    *u,
                                    hypre_ParVector    *Vtemp,
                                    hypre_ParVector    *z)
{
   MPI_Comm         comm         = hypre_ParCSRMatrixComm(A);
   hypre_CSRMatrix *A_diag       = hypre_ParCSRMatrixDiag(A);
   double          *A_diag_data  = hypre_CSRMatrixData(A_diag);
   HYPRE_Int       *A_diag_i     = hypre_CSRMatrixI(A_diag);
   HYPRE_Int       *A_diag_j     = hypre_CSRMatrixJ(A_diag);
   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int       *A_offd_i     = hypre_CSRMatrixI(A_offd);
   double          *A_offd_data  = hypre_CSRMatrixData(A_offd);
   HYPRE_Int       *A_offd_j     = hypre_CSRMatrixJ(A_offd);
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   hypre_ParCSRCommHandle *comm_handle;

   HYPRE_Int        n             = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int        num_cols_offd = hypre_CSRMatrixNumCols(A_offd);

   hypre_Vector    *u_local = hypre_ParVectorLocalVector(u);
   double          *u_data  = hypre_VectorData(u_local);

   hypre_Vector    *f_local = hypre_ParVectorLocalVector(f);
   double          *f_data  = hypre_VectorData(f_local);

   hypre_Vector    *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
   double          *Vtemp_data = hypre_VectorData(Vtemp_local);
   double          *Vext_data;
   double          *v_buf_data;
   double          *tmp_data;

   HYPRE_Int        i, j;
   HYPRE_Int        ii, jj;
   HYPRE_Int        ns, ne, size, rest;
   HYPRE_Int        relax_error = 0;
   HYPRE_Int        num_sends;
   HYPRE_Int        index, start;
   HYPRE_Int        num_procs, num_threads, my_id;

   double           zero = 0.0;
   double           res, res2;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);
   num_threads = hypre_NumThreads();

   /* only allow jacobi and GS */
   if (relax_type > 2)
      relax_type = 2;

   /*-----------------------------------------------------------------
    * Copy current approximation into temporary vector.
    *-----------------------------------------------------------------*/
   if (num_procs > 1)
   {
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      v_buf_data = hypre_CTAlloc(double,
                                 hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends));

      Vext_data = hypre_CTAlloc(double,num_cols_offd);
      if (num_cols_offd)
      {
         A_offd_j = hypre_CSRMatrixJ(A_offd);
         A_offd_data = hypre_CSRMatrixData(A_offd);
      }

      index = 0;
      for (i = 0; i < num_sends; i++)
      {
         start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i);
         for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg,i+1); j++)
            v_buf_data[index++]
               = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)];
      }

      comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, v_buf_data,
                                                 Vext_data);

      /*-----------------------------------------------------------------
       * Copy current approximation into temporary vector.
       *-----------------------------------------------------------------*/
      hypre_ParCSRCommHandleDestroy(comm_handle);
      comm_handle = NULL;
   }

   if (relax_type == 1) /* Jacobi */
   {

#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
      for (i = 0; i < n; i++)
      {
         Vtemp_data[i] = u_data[i];
      }
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i,ii,jj,res) HYPRE_SMP_SCHEDULE
#endif
      for (i = 0; i < n; i++)
      {
         /*-----------------------------------------------------------
          * If diagonal is nonzero, relax point i; otherwise, skip it.
          *-----------------------------------------------------------*/
         if (A_diag_data[A_diag_i[i]] != zero)
         {
            res = f_data[i];
            for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
            {
               ii = A_diag_j[jj];
               res -= A_diag_data[jj] * Vtemp_data[ii];
            }
            for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
            {
               ii = A_offd_j[jj];
               res -= A_offd_data[jj] * Vext_data[ii];
            }
            u_data[i] += (relax_weight*res)/l1_norms[i];
         }
      }
   }
   else if (relax_type == 2) /* GS */
   {
      if (relax_weight == 1 && omega == 1)
      {
         tmp_data = hypre_CTAlloc(double,n);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i = 0; i < n; i++)
            tmp_data[i] = u_data[i];
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i,ii,j,jj,ns,ne,res,rest,size) HYPRE_SMP_SCHEDULE
#endif
         for (j = 0; j < num_threads; j++)
         {
            size = n/num_threads;
            rest = n - size*num_threads;
            if (j < rest)
            {
               ns = j*size+j;
               ne = (j+1)*size+j+1;
            }
            else
            {
               ns = j*size+rest;
               ne = (j+1)*size+rest;
            }
            for (i = ns; i < ne; i++)   /* interior points first */
            {
               /*-----------------------------------------------------------
                * If diagonal is nonzero, relax point i; otherwise, skip it.
                *-----------------------------------------------------------*/

               if (A_diag_data[A_diag_i[i]] != zero)
               {
                  res = f_data[i];
                  for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
                  {
                     ii = A_diag_j[jj];
                     if (ii >= ns && ii < ne)
                     {
                        res -= A_diag_data[jj] * u_data[ii];
                     }
                     else
                        res -= A_diag_data[jj] * tmp_data[ii];
                  }
                  for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
                  {
                     ii = A_offd_j[jj];
                     res -= A_offd_data[jj] * Vext_data[ii];
                  }
                  u_data[i] += res / l1_norms[i];
               }
            }
            for (i = ne-1; i > ns-1; i--)       /* interior points first */
            {
               /*-----------------------------------------------------------
                * If diagonal is nonzero, relax point i; otherwise, skip it.
                *-----------------------------------------------------------*/

               if (A_diag_data[A_diag_i[i]] != zero)
               {
                  res = f_data[i];
                  for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
                  {
                     ii = A_diag_j[jj];
                     if (ii >= ns && ii < ne)
                     {
                        res -= A_diag_data[jj] * u_data[ii];
                     }
                     else
                        res -= A_diag_data[jj] * tmp_data[ii];
                  }
                  for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
                  {
                     ii = A_offd_j[jj];
                     res -= A_offd_data[jj] * Vext_data[ii];
                  }
                  u_data[i] += res / l1_norms[i];
               }
            }
         }
         hypre_TFree(tmp_data);
      }
      else
      {
         double c1 = omega*relax_weight;
         double c2 = omega*(1.0-relax_weight);
         tmp_data = hypre_CTAlloc(double,n);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i = 0; i < n; i++)
         {
            tmp_data[i] = u_data[i];
         }
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i,ii,j,jj,ns,ne,res,rest,size) HYPRE_SMP_SCHEDULE
#endif
         for (j = 0; j < num_threads; j++)
         {
            size = n/num_threads;
            rest = n - size*num_threads;
            if (j < rest)
            {
               ns = j*size+j;
               ne = (j+1)*size+j+1;
            }
            else
            {
               ns = j*size+rest;
               ne = (j+1)*size+rest;
            }
            for (i = ns; i < ne; i++)   /* interior points first */
            {
               /*-----------------------------------------------------------
                * If diagonal is nonzero, relax point i; otherwise, skip it.
                *-----------------------------------------------------------*/

               if (A_diag_data[A_diag_i[i]] != zero)
               {
                  res2 = 0.0;
                  res = f_data[i];
                  Vtemp_data[i] = u_data[i];
                  for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
                  {
                     ii = A_diag_j[jj];
                     if (ii >= ns && ii < ne)
                     {
                        res -= A_diag_data[jj] * u_data[ii];
                        if (ii < i)
                           res2 += A_diag_data[jj] * (Vtemp_data[ii] - u_data[ii]);
                     }
                     else
                        res -= A_diag_data[jj] * tmp_data[ii];
                  }
                  for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
                  {
                     ii = A_offd_j[jj];
                     res -= A_offd_data[jj] * Vext_data[ii];
                  }
                  u_data[i] += (c1*res + c2*res2) / l1_norms[i];
               }
            }
            for (i = ne-1; i > ns-1; i--)       /* interior points first */
            {
               /*-----------------------------------------------------------
                * If diagonal is nonzero, relax point i; otherwise, skip it.
                *-----------------------------------------------------------*/

               if (A_diag_data[A_diag_i[i]] != zero)
               {
                  res2 = 0.0;
                  res = f_data[i];
                  for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
                  {
                     ii = A_diag_j[jj];
                     if (ii >= ns && ii < ne)
                     {
                        res -= A_diag_data[jj] * u_data[ii];
                        if (ii > i)
                           res2 += A_diag_data[jj] * (Vtemp_data[ii] - u_data[ii]);
                     }
                     else
                        res -= A_diag_data[jj] * tmp_data[ii];
                  }
                  for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++)
                  {
                     ii = A_offd_j[jj];
                     res -= A_offd_data[jj] * Vext_data[ii];
                  }
                  u_data[i] += (c1*res + c2*res2) / l1_norms[i];
               }
            }
         }
         hypre_TFree(tmp_data);
      }
   } /* end of Jacobi or G.S. */


   if (num_procs > 1)
   {
      hypre_TFree(Vext_data);
      hypre_TFree(v_buf_data);
   }

   return(relax_error);
}