hypre/struct_ls/red_black_constantcoef_gs.c

519 lines
19 KiB
C

/*BHEADER**********************************************************************
* Copyright (c) 2008, Lawrence Livermore National Security, LLC.
* Produced at the Lawrence Livermore National Laboratory.
* This file is part of HYPRE. See file COPYRIGHT for details.
*
* HYPRE is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License (as published by the Free
* Software Foundation) version 2.1 dated February 1999.
*
* $Revision$
***********************************************************************EHEADER*/
#include "_hypre_struct_ls.h"
#include "red_black_gs.h"
#ifndef hypre_abs
#define hypre_abs(a) (((a)>0) ? (a) : -(a))
#endif
/*--------------------------------------------------------------------------
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_RedBlackConstantCoefGS( void *relax_vdata,
hypre_StructMatrix *A,
hypre_StructVector *b,
hypre_StructVector *x )
{
hypre_RedBlackGSData *relax_data = relax_vdata;
HYPRE_Int max_iter = (relax_data -> max_iter);
HYPRE_Int zero_guess = (relax_data -> zero_guess);
HYPRE_Int rb_start = (relax_data -> rb_start);
HYPRE_Int diag_rank = (relax_data -> diag_rank);
hypre_ComputePkg *compute_pkg = (relax_data -> compute_pkg);
hypre_CommHandle *comm_handle;
hypre_BoxArrayArray *compute_box_aa;
hypre_BoxArray *compute_box_a;
hypre_Box *compute_box;
hypre_Box *A_dbox;
hypre_Box *b_dbox;
hypre_Box *x_dbox;
HYPRE_Int Ai, Astart, Ani, Anj;
HYPRE_Int bi, bstart, bni, bnj;
HYPRE_Int xi, xstart, xni, xnj;
HYPRE_Int xoff0, xoff1, xoff2, xoff3, xoff4, xoff5;
double *Ap;
double *App;
double *bp;
double *xp;
/* constant coefficient */
HYPRE_Int constant_coeff= hypre_StructMatrixConstantCoefficient(A);
double App0, App1, App2, App3, App4, App5, AApd;
hypre_IndexRef start;
hypre_Index loop_size;
hypre_StructStencil *stencil;
hypre_Index *stencil_shape;
HYPRE_Int stencil_size;
HYPRE_Int offd[6];
HYPRE_Int iter, rb, redblack;
HYPRE_Int compute_i, i, j, ii, jj, kk;
HYPRE_Int ni, nj, nk;
/*----------------------------------------------------------
* Initialize some things and deal with special cases
*----------------------------------------------------------*/
hypre_BeginTiming(relax_data -> time_index);
hypre_StructMatrixDestroy(relax_data -> A);
hypre_StructVectorDestroy(relax_data -> b);
hypre_StructVectorDestroy(relax_data -> x);
(relax_data -> A) = hypre_StructMatrixRef(A);
(relax_data -> x) = hypre_StructVectorRef(x);
(relax_data -> b) = hypre_StructVectorRef(b);
(relax_data -> num_iterations) = 0;
/* if max_iter is zero, return */
if (max_iter == 0)
{
/* if using a zero initial guess, return zero */
if (zero_guess)
{
hypre_StructVectorSetConstantValues(x, 0.0);
}
hypre_EndTiming(relax_data -> time_index);
return hypre_error_flag;
}
else
{
stencil = hypre_StructMatrixStencil(A);
stencil_shape = hypre_StructStencilShape(stencil);
stencil_size = hypre_StructStencilSize(stencil);
/* get off-diag entry ranks ready */
i = 0;
for (j = 0; j < stencil_size; j++)
{
if (j != diag_rank)
{
offd[i] = j;
i++;
}
}
}
hypre_StructVectorClearBoundGhostValues(x, 0);
/*----------------------------------------------------------
* Do zero_guess iteration
*----------------------------------------------------------*/
rb = rb_start;
iter = 0;
if (zero_guess)
{
for (compute_i = 0; compute_i < 2; compute_i++)
{
switch(compute_i)
{
case 0:
{
compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
}
break;
case 1:
{
compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
}
break;
}
hypre_ForBoxArrayI(i, compute_box_aa)
{
compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
A_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
b_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
x_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
bp = hypre_StructVectorBoxData(b, i);
xp = hypre_StructVectorBoxData(x, i);
hypre_ForBoxI(j, compute_box_a)
{
compute_box = hypre_BoxArrayBox(compute_box_a, j);
start = hypre_BoxIMin(compute_box);
hypre_BoxGetSize(compute_box, loop_size);
/* Are we relaxing index start or start+(1,0,0)? */
redblack = hypre_abs(hypre_IndexX(start) +
hypre_IndexY(start) +
hypre_IndexZ(start) + rb) % 2;
bstart = hypre_BoxIndexRank(b_dbox, start);
xstart = hypre_BoxIndexRank(x_dbox, start);
ni = hypre_IndexX(loop_size);
nj = hypre_IndexY(loop_size);
nk = hypre_IndexZ(loop_size);
bni = hypre_BoxSizeX(b_dbox);
xni = hypre_BoxSizeX(x_dbox);
bnj = hypre_BoxSizeY(b_dbox);
xnj = hypre_BoxSizeY(x_dbox);
if (constant_coeff == 1)
{
Ai= hypre_CCBoxIndexRank(A_dbox, start);
AApd= 1.0/Ap[Ai];
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(ii,jj,bi,xi,kk) HYPRE_SMP_SCHEDULE
#endif
for (kk = 0; kk < nk; kk++)
{
for (jj = 0; jj < nj; jj++)
{
ii = (kk + jj + redblack) % 2;
bi = bstart + kk*bnj*bni + jj*bni + ii;
xi = xstart + kk*xnj*xni + jj*xni + ii;
for (; ii < ni; ii+=2, bi+=2, xi+=2)
{
xp[xi] = bp[bi]*AApd;
}
}
}
}
else /* variable coefficient diag */
{
Astart = hypre_BoxIndexRank(A_dbox, start);
Ani = hypre_BoxSizeX(A_dbox);
Anj = hypre_BoxSizeY(A_dbox);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(ii,jj,Ai,bi,xi,kk) HYPRE_SMP_SCHEDULE
#endif
for (kk = 0; kk < nk; kk++)
{
for (jj = 0; jj < nj; jj++)
{
ii = (kk + jj + redblack) % 2;
Ai = Astart + kk*Anj*Ani + jj*Ani + ii;
bi = bstart + kk*bnj*bni + jj*bni + ii;
xi = xstart + kk*xnj*xni + jj*xni + ii;
for (; ii < ni; ii+=2, Ai+=2, bi+=2, xi+=2)
{
xp[xi] = bp[bi] / Ap[Ai];
}
}
}
}
}
}
}
rb = (rb + 1) % 2;
iter++;
}
/*----------------------------------------------------------
* Do regular iterations
*----------------------------------------------------------*/
while (iter < 2*max_iter)
{
for (compute_i = 0; compute_i < 2; compute_i++)
{
switch(compute_i)
{
case 0:
{
xp = hypre_StructVectorData(x);
hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
}
break;
case 1:
{
hypre_FinalizeIndtComputations(comm_handle);
compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
}
break;
}
hypre_ForBoxArrayI(i, compute_box_aa)
{
compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
A_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
b_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
x_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
bp = hypre_StructVectorBoxData(b, i);
xp = hypre_StructVectorBoxData(x, i);
hypre_ForBoxI(j, compute_box_a)
{
compute_box = hypre_BoxArrayBox(compute_box_a, j);
start = hypre_BoxIMin(compute_box);
hypre_BoxGetSize(compute_box, loop_size);
/* Are we relaxing index start or start+(1,0,0)? */
redblack = hypre_abs(hypre_IndexX(start) +
hypre_IndexY(start) +
hypre_IndexZ(start) + rb) % 2;
bstart = hypre_BoxIndexRank(b_dbox, start);
xstart = hypre_BoxIndexRank(x_dbox, start);
ni = hypre_IndexX(loop_size);
nj = hypre_IndexY(loop_size);
nk = hypre_IndexZ(loop_size);
bni= hypre_BoxSizeX(b_dbox);
xni= hypre_BoxSizeX(x_dbox);
bnj= hypre_BoxSizeY(b_dbox);
xnj= hypre_BoxSizeY(x_dbox);
Ai = hypre_CCBoxIndexRank(A_dbox, start);
switch(stencil_size)
{
case 7:
App = hypre_StructMatrixBoxData(A, i, offd[5]);
App5= App[Ai];
App = hypre_StructMatrixBoxData(A, i, offd[4]);
App4= App[Ai];
xoff5 = hypre_BoxOffsetDistance(
x_dbox, stencil_shape[offd[5]]);
xoff4 = hypre_BoxOffsetDistance(
x_dbox, stencil_shape[offd[4]]);
case 5:
App = hypre_StructMatrixBoxData(A, i, offd[3]);
App3= App[Ai];
App = hypre_StructMatrixBoxData(A, i, offd[2]);
App2= App[Ai];
xoff3 = hypre_BoxOffsetDistance(
x_dbox, stencil_shape[offd[3]]);
xoff2 = hypre_BoxOffsetDistance(
x_dbox, stencil_shape[offd[2]]);
case 3:
App = hypre_StructMatrixBoxData(A, i, offd[1]);
App1= App[Ai];
App = hypre_StructMatrixBoxData(A, i, offd[0]);
App0= App[Ai];
xoff1 = hypre_BoxOffsetDistance(
x_dbox, stencil_shape[offd[1]]);
xoff0 = hypre_BoxOffsetDistance(
x_dbox, stencil_shape[offd[0]]);
break;
}
if (constant_coeff == 1)
{
AApd = 1/Ap[Ai];
switch(stencil_size)
{
case 7:
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(ii,jj,bi,xi,kk) HYPRE_SMP_SCHEDULE
#endif
for (kk = 0; kk < nk; kk++)
{
for (jj = 0; jj < nj; jj++)
{
ii = (kk + jj + redblack) % 2;
bi = bstart + kk*bnj*bni + jj*bni + ii;
xi = xstart + kk*xnj*xni + jj*xni + ii;
for (; ii < ni; ii+=2, bi+=2, xi+=2)
{
xp[xi] =
(bp[bi] -
App0*xp[xi + xoff0] -
App1*xp[xi + xoff1] -
App2*xp[xi + xoff2] -
App3*xp[xi + xoff3] -
App4*xp[xi + xoff4] -
App5*xp[xi + xoff5])*AApd;
}
}
}
break;
case 5:
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(ii,jj,bi,xi,kk) HYPRE_SMP_SCHEDULE
#endif
for (kk = 0; kk < nk; kk++)
{
for (jj = 0; jj < nj; jj++)
{
ii = (kk + jj + redblack) % 2;
bi = bstart + kk*bnj*bni + jj*bni + ii;
xi = xstart + kk*xnj*xni + jj*xni + ii;
for (; ii < ni; ii+=2, bi+=2, xi+=2)
{
xp[xi] =
(bp[bi] -
App0*xp[xi + xoff0] -
App1*xp[xi + xoff1] -
App2*xp[xi + xoff2] -
App3*xp[xi + xoff3])*AApd;
}
}
}
break;
case 3:
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(ii,jj,bi,xi,kk) HYPRE_SMP_SCHEDULE
#endif
for (kk = 0; kk < nk; kk++)
{
for (jj = 0; jj < nj; jj++)
{
ii = (kk + jj + redblack) % 2;
bi = bstart + kk*bnj*bni + jj*bni + ii;
xi = xstart + kk*xnj*xni + jj*xni + ii;
for (; ii < ni; ii+=2, bi+=2, xi+=2)
{
xp[xi] =
(bp[bi] -
App0*xp[xi + xoff0] -
App1*xp[xi + xoff1])*AApd;
}
}
}
break;
}
} /* if (constant_coeff == 1) */
else /* variable diagonal */
{
Astart = hypre_BoxIndexRank(A_dbox, start);
Ani = hypre_BoxSizeX(A_dbox);
Anj = hypre_BoxSizeY(A_dbox);
switch(stencil_size)
{
case 7:
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(ii,jj,Ai,bi,xi,kk) HYPRE_SMP_SCHEDULE
#endif
for (kk = 0; kk < nk; kk++)
{
for (jj = 0; jj < nj; jj++)
{
ii = (kk + jj + redblack) % 2;
Ai = Astart + kk*Anj*Ani + jj*Ani + ii;
bi = bstart + kk*bnj*bni + jj*bni + ii;
xi = xstart + kk*xnj*xni + jj*xni + ii;
for (; ii < ni; ii+=2, Ai+=2, bi+=2, xi+=2)
{
xp[xi] =
(bp[bi] -
App0*xp[xi + xoff0] -
App1*xp[xi + xoff1] -
App2*xp[xi + xoff2] -
App3*xp[xi + xoff3] -
App4*xp[xi + xoff4] -
App5*xp[xi + xoff5]) / Ap[Ai];
}
}
}
break;
case 5:
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(ii,jj,Ai,bi,xi,kk) HYPRE_SMP_SCHEDULE
#endif
for (kk = 0; kk < nk; kk++)
{
for (jj = 0; jj < nj; jj++)
{
ii = (kk + jj + redblack) % 2;
Ai = Astart + kk*Anj*Ani + jj*Ani + ii;
bi = bstart + kk*bnj*bni + jj*bni + ii;
xi = xstart + kk*xnj*xni + jj*xni + ii;
for (; ii < ni; ii+=2, Ai+=2, bi+=2, xi+=2)
{
xp[xi] =
(bp[bi] -
App0*xp[xi + xoff0] -
App1*xp[xi + xoff1] -
App2*xp[xi + xoff2] -
App3*xp[xi + xoff3]) / Ap[Ai];
}
}
}
break;
case 3:
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(ii,jj,Ai,bi,xi,kk) HYPRE_SMP_SCHEDULE
#endif
for (kk = 0; kk < nk; kk++)
{
for (jj = 0; jj < nj; jj++)
{
ii = (kk + jj + redblack) % 2;
Ai = Astart + kk*Anj*Ani + jj*Ani + ii;
bi = bstart + kk*bnj*bni + jj*bni + ii;
xi = xstart + kk*xnj*xni + jj*xni + ii;
for (; ii < ni; ii+=2, Ai+=2, bi+=2, xi+=2)
{
xp[xi] =
(bp[bi] -
App0*xp[xi + xoff0] -
App1*xp[xi + xoff1]) / Ap[Ai];
}
}
}
break;
} /* switch(stencil_size) */
} /* else */
}
}
}
rb = (rb + 1) % 2;
iter++;
}
(relax_data -> num_iterations) = iter / 2;
/*-----------------------------------------------------------------------
* Return
*-----------------------------------------------------------------------*/
hypre_IncFLOPCount(relax_data -> flops);
hypre_EndTiming(relax_data -> time_index);
return hypre_error_flag;
}