hypre/struct_ls/smg_residual_unrolled.c

937 lines
40 KiB
C

/*BHEADER**********************************************************************
* (c) 1997 The Regents of the University of California
*
* See the file COPYRIGHT_and_DISCLAIMER for a complete copyright
* notice, contact person, and disclaimer.
*
* $Revision$
*********************************************************************EHEADER*/
/******************************************************************************
*
* Routine for computing residuals in the SMG code
*
*****************************************************************************/
#include "headers.h"
/*--------------------------------------------------------------------------
* hypre_SMGResidualData data structure
*--------------------------------------------------------------------------*/
typedef struct
{
hypre_Index base_index;
hypre_Index base_stride;
hypre_StructMatrix *A;
hypre_StructVector *x;
hypre_StructVector *b;
hypre_StructVector *r;
hypre_BoxArray *base_points;
hypre_ComputePkg *compute_pkg;
int time_index;
int flops;
} hypre_SMGResidualData;
/*--------------------------------------------------------------------------
* hypre_SMGResidualCreate
*--------------------------------------------------------------------------*/
void *
hypre_SMGResidualCreate( )
{
hypre_SMGResidualData *residual_data;
residual_data = hypre_CTAlloc(hypre_SMGResidualData, 1);
(residual_data -> time_index) = hypre_InitializeTiming("SMGResidual");
/* set defaults */
hypre_SetIndex((residual_data -> base_index), 0, 0, 0);
hypre_SetIndex((residual_data -> base_stride), 1, 1, 1);
return (void *) residual_data;
}
/*--------------------------------------------------------------------------
* hypre_SMGResidualSetup
*--------------------------------------------------------------------------*/
int
hypre_SMGResidualSetup( void *residual_vdata,
hypre_StructMatrix *A,
hypre_StructVector *x,
hypre_StructVector *b,
hypre_StructVector *r )
{
int ierr;
hypre_SMGResidualData *residual_data = residual_vdata;
hypre_IndexRef base_index = (residual_data -> base_index);
hypre_IndexRef base_stride = (residual_data -> base_stride);
hypre_Index unit_stride;
hypre_StructGrid *grid;
hypre_StructStencil *stencil;
hypre_BoxArrayArray *send_boxes;
hypre_BoxArrayArray *recv_boxes;
int **send_processes;
int **recv_processes;
hypre_BoxArrayArray *indt_boxes;
hypre_BoxArrayArray *dept_boxes;
hypre_BoxArray *base_points;
hypre_ComputePkg *compute_pkg;
/*----------------------------------------------------------
* Set up base points and the compute package
*----------------------------------------------------------*/
grid = hypre_StructMatrixGrid(A);
stencil = hypre_StructMatrixStencil(A);
hypre_SetIndex(unit_stride, 1, 1, 1);
base_points = hypre_BoxArrayDuplicate(hypre_StructGridBoxes(grid));
hypre_ProjectBoxArray(base_points, base_index, base_stride);
hypre_CreateComputeInfo(grid, stencil,
&send_boxes, &recv_boxes,
&send_processes, &recv_processes,
&indt_boxes, &dept_boxes);
hypre_ProjectBoxArrayArray(indt_boxes, base_index, base_stride);
hypre_ProjectBoxArrayArray(dept_boxes, base_index, base_stride);
hypre_ComputePkgCreate(send_boxes, recv_boxes,
unit_stride, unit_stride,
send_processes, recv_processes,
indt_boxes, dept_boxes,
base_stride, grid,
hypre_StructVectorDataSpace(x), 1,
&compute_pkg);
/*----------------------------------------------------------
* Set up the residual data structure
*----------------------------------------------------------*/
(residual_data -> A) = hypre_StructMatrixRef(A);
(residual_data -> x) = hypre_StructVectorRef(x);
(residual_data -> b) = hypre_StructVectorRef(b);
(residual_data -> r) = hypre_StructVectorRef(r);
(residual_data -> base_points) = base_points;
(residual_data -> compute_pkg) = compute_pkg;
/*-----------------------------------------------------
* Compute flops
*-----------------------------------------------------*/
(residual_data -> flops) =
(hypre_StructMatrixGlobalSize(A) + hypre_StructVectorGlobalSize(x)) /
(hypre_IndexX(base_stride) *
hypre_IndexY(base_stride) *
hypre_IndexZ(base_stride) );
return ierr;
}
/*--------------------------------------------------------------------------
* hypre_SMGResidual
*--------------------------------------------------------------------------*/
int
hypre_SMGResidual( void *residual_vdata,
hypre_StructMatrix *A,
hypre_StructVector *x,
hypre_StructVector *b,
hypre_StructVector *r )
{
int ierr;
hypre_SMGResidualData *residual_data = residual_vdata;
hypre_IndexRef base_stride = (residual_data -> base_stride);
hypre_BoxArray *base_points = (residual_data -> base_points);
hypre_ComputePkg *compute_pkg = (residual_data -> compute_pkg);
hypre_CommHandle *comm_handle;
hypre_BoxArrayArray *compute_box_aa;
hypre_BoxArray *compute_box_a;
hypre_Box *compute_box;
hypre_Box *A_data_box;
hypre_Box *x_data_box;
hypre_Box *b_data_box;
hypre_Box *r_data_box;
int Ai;
int xi;
int bi;
int ri;
double *Ap0;
double *xp0;
double *bp;
double *rp;
hypre_Index loop_size;
hypre_IndexRef start;
hypre_StructStencil *stencil;
hypre_Index *stencil_shape;
int stencil_size;
int compute_i, i, j, si;
int loopi, loopj, loopk;
double *Ap1, *Ap2;
double *Ap3, *Ap4;
double *Ap5, *Ap6;
double *Ap7, *Ap8, *Ap9;
double *Ap10, *Ap11, *Ap12, *Ap13, *Ap14;
double *Ap15, *Ap16, *Ap17, *Ap18;
double *Ap19, *Ap20, *Ap21, *Ap22, *Ap23, *Ap24, *Ap25, *Ap26;
double *xp1, *xp2;
double *xp3, *xp4;
double *xp5, *xp6;
double *xp7, *xp8, *xp9;
double *xp10, *xp11, *xp12, *xp13, *xp14;
double *xp15, *xp16, *xp17, *xp18;
double *xp19, *xp20, *xp21, *xp22, *xp23, *xp24, *xp25, *xp26;
hypre_BeginTiming(residual_data -> time_index);
/*-----------------------------------------------------------------------
* Compute residual r = b - Ax
*-----------------------------------------------------------------------*/
stencil = hypre_StructMatrixStencil(A);
stencil_shape = hypre_StructStencilShape(stencil);
stencil_size = hypre_StructStencilSize(stencil);
for (compute_i = 0; compute_i < 2; compute_i++)
{
switch(compute_i)
{
case 0:
{
xp0 = hypre_StructVectorData(x);
hypre_InitializeIndtComputations(compute_pkg, xp0, &comm_handle);
compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
/*----------------------------------------
* Copy b into r
*----------------------------------------*/
compute_box_a = base_points;
hypre_ForBoxI(i, compute_box_a)
{
compute_box = hypre_BoxArrayBox(compute_box_a, i);
start = hypre_BoxIMin(compute_box);
b_data_box =
hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
r_data_box =
hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);
bp = hypre_StructVectorBoxData(b, i);
rp = hypre_StructVectorBoxData(r, i);
hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
hypre_BoxLoop2Begin(loop_size,
b_data_box, start, base_stride, bi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri)
{
rp[ri] = bp[bi];
}
hypre_BoxLoop2End(bi, ri);
}
}
break;
case 1:
{
hypre_FinalizeIndtComputations(comm_handle);
compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
}
break;
}
/*--------------------------------------------------------------------
* Compute r -= A*x
*--------------------------------------------------------------------*/
hypre_ForBoxArrayI(i, compute_box_aa)
{
compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);
A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);
rp = hypre_StructVectorBoxData(r, i);
/*--------------------------------------------------------------
* Switch statement to direct control (based on stencil size) to
* code to get pointers and offsets fo A and x.
*--------------------------------------------------------------*/
switch (stencil_size)
{
case 1:
Ap0 = hypre_StructMatrixBoxData(A, i, 0);
xp0 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
break;
case 3:
Ap0 = hypre_StructMatrixBoxData(A, i, 0);
Ap1 = hypre_StructMatrixBoxData(A, i, 1);
Ap2 = hypre_StructMatrixBoxData(A, i, 2);
xp0 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
xp1 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
xp2 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
break;
case 5:
Ap0 = hypre_StructMatrixBoxData(A, i, 0);
Ap1 = hypre_StructMatrixBoxData(A, i, 1);
Ap2 = hypre_StructMatrixBoxData(A, i, 2);
Ap3 = hypre_StructMatrixBoxData(A, i, 3);
Ap4 = hypre_StructMatrixBoxData(A, i, 4);
xp0 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
xp1 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
xp2 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
xp3 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
xp4 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
break;
case 7:
Ap0 = hypre_StructMatrixBoxData(A, i, 0);
Ap1 = hypre_StructMatrixBoxData(A, i, 1);
Ap2 = hypre_StructMatrixBoxData(A, i, 2);
Ap3 = hypre_StructMatrixBoxData(A, i, 3);
Ap4 = hypre_StructMatrixBoxData(A, i, 4);
Ap5 = hypre_StructMatrixBoxData(A, i, 5);
Ap6 = hypre_StructMatrixBoxData(A, i, 6);
xp0 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
xp1 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
xp2 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
xp3 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
xp4 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
xp5 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
xp6 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
break;
case 9:
Ap0 = hypre_StructMatrixBoxData(A, i, 0);
Ap1 = hypre_StructMatrixBoxData(A, i, 1);
Ap2 = hypre_StructMatrixBoxData(A, i, 2);
Ap3 = hypre_StructMatrixBoxData(A, i, 3);
Ap4 = hypre_StructMatrixBoxData(A, i, 4);
Ap5 = hypre_StructMatrixBoxData(A, i, 5);
Ap6 = hypre_StructMatrixBoxData(A, i, 6);
Ap7 = hypre_StructMatrixBoxData(A, i, 7);
Ap8 = hypre_StructMatrixBoxData(A, i, 8);
xp0 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
xp1 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
xp2 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
xp3 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
xp4 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
xp5 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
xp6 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
xp7 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
xp8 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
break;
case 15:
Ap0 = hypre_StructMatrixBoxData(A, i, 0);
Ap1 = hypre_StructMatrixBoxData(A, i, 1);
Ap2 = hypre_StructMatrixBoxData(A, i, 2);
Ap3 = hypre_StructMatrixBoxData(A, i, 3);
Ap4 = hypre_StructMatrixBoxData(A, i, 4);
Ap5 = hypre_StructMatrixBoxData(A, i, 5);
Ap6 = hypre_StructMatrixBoxData(A, i, 6);
Ap7 = hypre_StructMatrixBoxData(A, i, 7);
Ap8 = hypre_StructMatrixBoxData(A, i, 8);
Ap9 = hypre_StructMatrixBoxData(A, i, 9);
Ap10 = hypre_StructMatrixBoxData(A, i, 10);
Ap11 = hypre_StructMatrixBoxData(A, i, 11);
Ap12 = hypre_StructMatrixBoxData(A, i, 12);
Ap13 = hypre_StructMatrixBoxData(A, i, 13);
Ap14 = hypre_StructMatrixBoxData(A, i, 14);
xp0 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
xp1 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
xp2 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
xp3 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
xp4 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
xp5 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
xp6 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
xp7 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
xp8 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
xp9 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
xp10 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
xp11 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
xp12 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
xp13 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
xp14 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
break;
case 19:
Ap0 = hypre_StructMatrixBoxData(A, i, 0);
Ap1 = hypre_StructMatrixBoxData(A, i, 1);
Ap2 = hypre_StructMatrixBoxData(A, i, 2);
Ap3 = hypre_StructMatrixBoxData(A, i, 3);
Ap4 = hypre_StructMatrixBoxData(A, i, 4);
Ap5 = hypre_StructMatrixBoxData(A, i, 5);
Ap6 = hypre_StructMatrixBoxData(A, i, 6);
Ap7 = hypre_StructMatrixBoxData(A, i, 7);
Ap8 = hypre_StructMatrixBoxData(A, i, 8);
Ap9 = hypre_StructMatrixBoxData(A, i, 9);
Ap10 = hypre_StructMatrixBoxData(A, i, 10);
Ap11 = hypre_StructMatrixBoxData(A, i, 11);
Ap12 = hypre_StructMatrixBoxData(A, i, 12);
Ap13 = hypre_StructMatrixBoxData(A, i, 13);
Ap14 = hypre_StructMatrixBoxData(A, i, 14);
Ap15 = hypre_StructMatrixBoxData(A, i, 15);
Ap16 = hypre_StructMatrixBoxData(A, i, 16);
Ap17 = hypre_StructMatrixBoxData(A, i, 17);
Ap18 = hypre_StructMatrixBoxData(A, i, 18);
xp0 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
xp1 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
xp2 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
xp3 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
xp4 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
xp5 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
xp6 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
xp7 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
xp8 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
xp9 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
xp10 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
xp11 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
xp12 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
xp13 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
xp14 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
xp15 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]);
xp16 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]);
xp17 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]);
xp18 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]);
break;
case 27:
Ap0 = hypre_StructMatrixBoxData(A, i, 0);
Ap1 = hypre_StructMatrixBoxData(A, i, 1);
Ap2 = hypre_StructMatrixBoxData(A, i, 2);
Ap3 = hypre_StructMatrixBoxData(A, i, 3);
Ap4 = hypre_StructMatrixBoxData(A, i, 4);
Ap5 = hypre_StructMatrixBoxData(A, i, 5);
Ap6 = hypre_StructMatrixBoxData(A, i, 6);
Ap7 = hypre_StructMatrixBoxData(A, i, 7);
Ap8 = hypre_StructMatrixBoxData(A, i, 8);
Ap9 = hypre_StructMatrixBoxData(A, i, 9);
Ap10 = hypre_StructMatrixBoxData(A, i, 10);
Ap11 = hypre_StructMatrixBoxData(A, i, 11);
Ap12 = hypre_StructMatrixBoxData(A, i, 12);
Ap13 = hypre_StructMatrixBoxData(A, i, 13);
Ap14 = hypre_StructMatrixBoxData(A, i, 14);
Ap15 = hypre_StructMatrixBoxData(A, i, 15);
Ap16 = hypre_StructMatrixBoxData(A, i, 16);
Ap17 = hypre_StructMatrixBoxData(A, i, 17);
Ap18 = hypre_StructMatrixBoxData(A, i, 18);
Ap19 = hypre_StructMatrixBoxData(A, i, 19);
Ap20 = hypre_StructMatrixBoxData(A, i, 20);
Ap21 = hypre_StructMatrixBoxData(A, i, 21);
Ap22 = hypre_StructMatrixBoxData(A, i, 22);
Ap23 = hypre_StructMatrixBoxData(A, i, 23);
Ap24 = hypre_StructMatrixBoxData(A, i, 24);
Ap25 = hypre_StructMatrixBoxData(A, i, 25);
Ap26 = hypre_StructMatrixBoxData(A, i, 26);
xp0 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
xp1 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
xp2 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
xp3 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
xp4 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
xp5 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
xp6 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
xp7 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
xp8 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
xp9 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
xp10 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
xp11 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
xp12 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
xp13 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
xp14 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
xp15 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]);
xp16 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]);
xp17 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]);
xp18 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]);
xp19 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[19]);
xp20 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[20]);
xp21 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[21]);
xp22 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[22]);
xp23 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[23]);
xp24 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[24]);
xp25 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[25]);
xp26 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[26]);
break;
default:
;
}
hypre_ForBoxI(j, compute_box_a)
{
compute_box = hypre_BoxArrayBox(compute_box_a, j);
start = hypre_BoxIMin(compute_box);
/*------------------------------------------------------
* Switch statement to direct control to appropriate
* box loop depending on stencil size
*------------------------------------------------------*/
switch (stencil_size)
{
case 1:
hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
hypre_BoxLoop3Begin(loop_size,
A_data_box, start, base_stride, Ai,
x_data_box, start, base_stride, xi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
{
rp[ri] = rp[ri]
- Ap0[Ai] * xp0[xi];
}
hypre_BoxLoop3End(Ai, xi, ri);
break;
case 3:
hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
hypre_BoxLoop3Begin(loop_size,
A_data_box, start, base_stride, Ai,
x_data_box, start, base_stride, xi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
{
rp[ri] = rp[ri]
- Ap0[Ai] * xp0[xi]
- Ap1[Ai] * xp1[xi]
- Ap2[Ai] * xp2[xi];
}
hypre_BoxLoop3End(Ai, xi, ri);
break;
case 5:
hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
hypre_BoxLoop3Begin(loop_size,
A_data_box, start, base_stride, Ai,
x_data_box, start, base_stride, xi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
{
rp[ri] = rp[ri]
- Ap0[Ai] * xp0[xi]
- Ap1[Ai] * xp1[xi]
- Ap2[Ai] * xp2[xi]
- Ap3[Ai] * xp3[xi]
- Ap4[Ai] * xp4[xi];
}
hypre_BoxLoop3End(Ai, xi, ri);
break;
case 7:
hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
hypre_BoxLoop3Begin(loop_size,
A_data_box, start, base_stride, Ai,
x_data_box, start, base_stride, xi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
{
rp[ri] = rp[ri]
- Ap0[Ai] * xp0[xi]
- Ap1[Ai] * xp1[xi]
- Ap2[Ai] * xp2[xi]
- Ap3[Ai] * xp3[xi]
- Ap4[Ai] * xp4[xi]
- Ap5[Ai] * xp5[xi]
- Ap6[Ai] * xp6[xi];
}
hypre_BoxLoop3End(Ai, xi, ri);
break;
case 9:
hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
hypre_BoxLoop3Begin(loop_size,
A_data_box, start, base_stride, Ai,
x_data_box, start, base_stride, xi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
{
rp[ri] = rp[ri]
- Ap0[Ai] * xp0[xi]
- Ap1[Ai] * xp1[xi]
- Ap2[Ai] * xp2[xi]
- Ap3[Ai] * xp3[xi]
- Ap4[Ai] * xp4[xi]
- Ap5[Ai] * xp5[xi]
- Ap6[Ai] * xp6[xi]
- Ap7[Ai] * xp7[xi]
- Ap8[Ai] * xp8[xi];
}
hypre_BoxLoop3End(Ai, xi, ri);
break;
case 15:
hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
hypre_BoxLoop3Begin(loop_size,
A_data_box, start, base_stride, Ai,
x_data_box, start, base_stride, xi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
{
rp[ri] = rp[ri]
- Ap0[Ai] * xp0[xi]
- Ap1[Ai] * xp1[xi]
- Ap2[Ai] * xp2[xi]
- Ap3[Ai] * xp3[xi]
- Ap4[Ai] * xp4[xi]
- Ap5[Ai] * xp5[xi]
- Ap6[Ai] * xp6[xi]
- Ap7[Ai] * xp7[xi]
- Ap8[Ai] * xp8[xi]
- Ap9[Ai] * xp9[xi]
- Ap10[Ai] * xp10[xi]
- Ap11[Ai] * xp11[xi]
- Ap12[Ai] * xp12[xi]
- Ap13[Ai] * xp13[xi]
- Ap14[Ai] * xp14[xi];
}
hypre_BoxLoop3End(Ai, xi, ri);
break;
case 19:
hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
hypre_BoxLoop3Begin(loop_size,
A_data_box, start, base_stride, Ai,
x_data_box, start, base_stride, xi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
{
rp[ri] = rp[ri]
- Ap0[Ai] * xp0[xi]
- Ap1[Ai] * xp1[xi]
- Ap2[Ai] * xp2[xi]
- Ap3[Ai] * xp3[xi]
- Ap4[Ai] * xp4[xi]
- Ap5[Ai] * xp5[xi]
- Ap6[Ai] * xp6[xi]
- Ap7[Ai] * xp7[xi]
- Ap8[Ai] * xp8[xi]
- Ap9[Ai] * xp9[xi]
- Ap10[Ai] * xp10[xi]
- Ap11[Ai] * xp11[xi]
- Ap12[Ai] * xp12[xi]
- Ap13[Ai] * xp13[xi]
- Ap14[Ai] * xp14[xi]
- Ap15[Ai] * xp15[xi]
- Ap16[Ai] * xp16[xi]
- Ap17[Ai] * xp17[xi]
- Ap18[Ai] * xp18[xi];
}
hypre_BoxLoop3End(Ai, xi, ri);
break;
case 27:
hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
hypre_BoxLoop3Begin(loop_size,
A_data_box, start, base_stride, Ai,
x_data_box, start, base_stride, xi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
{
rp[ri] = rp[ri]
- Ap0[Ai] * xp0[xi]
- Ap1[Ai] * xp1[xi]
- Ap2[Ai] * xp2[xi]
- Ap3[Ai] * xp3[xi]
- Ap4[Ai] * xp4[xi]
- Ap5[Ai] * xp5[xi]
- Ap6[Ai] * xp6[xi]
- Ap7[Ai] * xp7[xi]
- Ap8[Ai] * xp8[xi]
- Ap9[Ai] * xp9[xi]
- Ap10[Ai] * xp10[xi]
- Ap11[Ai] * xp11[xi]
- Ap12[Ai] * xp12[xi]
- Ap13[Ai] * xp13[xi]
- Ap14[Ai] * xp14[xi]
- Ap15[Ai] * xp15[xi]
- Ap16[Ai] * xp16[xi]
- Ap17[Ai] * xp17[xi]
- Ap18[Ai] * xp18[xi]
- Ap19[Ai] * xp19[xi]
- Ap20[Ai] * xp20[xi]
- Ap21[Ai] * xp21[xi]
- Ap22[Ai] * xp22[xi]
- Ap23[Ai] * xp23[xi]
- Ap24[Ai] * xp24[xi]
- Ap25[Ai] * xp25[xi]
- Ap26[Ai] * xp26[xi];
}
hypre_BoxLoop3End(Ai, xi, ri);
break;
default:
for (si = 0; si < stencil_size; si++)
{
Ap0 = hypre_StructMatrixBoxData(A, i, si);
xp0 = hypre_StructVectorBoxData(x, i) +
hypre_BoxOffsetDistance(x_data_box, stencil_shape[si]);
hypre_BoxGetStrideSize(compute_box, base_stride,
loop_size);
hypre_BoxLoop3Begin(loop_size,
A_data_box, start, base_stride, Ai,
x_data_box, start, base_stride, xi,
r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
{
rp[ri] -= Ap0[Ai] * xp0[xi];
}
hypre_BoxLoop3End(Ai, xi, ri);
}
}
}
}
}
/*-----------------------------------------------------------------------
* Return
*-----------------------------------------------------------------------*/
hypre_IncFLOPCount(residual_data -> flops);
hypre_EndTiming(residual_data -> time_index);
return ierr;
}
/*--------------------------------------------------------------------------
* hypre_SMGResidualSetBase
*--------------------------------------------------------------------------*/
int
hypre_SMGResidualSetBase( void *residual_vdata,
hypre_Index base_index,
hypre_Index base_stride )
{
hypre_SMGResidualData *residual_data = residual_vdata;
int d;
int ierr = 0;
for (d = 0; d < 3; d++)
{
hypre_IndexD((residual_data -> base_index), d)
= hypre_IndexD(base_index, d);
hypre_IndexD((residual_data -> base_stride), d)
= hypre_IndexD(base_stride, d);
}
return ierr;
}
/*--------------------------------------------------------------------------
* hypre_SMGResidualDestroy
*--------------------------------------------------------------------------*/
int
hypre_SMGResidualDestroy( void *residual_vdata )
{
int ierr;
hypre_SMGResidualData *residual_data = residual_vdata;
if (residual_data)
{
hypre_StructMatrixDestroy(residual_data -> A);
hypre_StructVectorDestroy(residual_data -> x);
hypre_StructVectorDestroy(residual_data -> b);
hypre_StructVectorDestroy(residual_data -> r);
hypre_BoxArrayDestroy(residual_data -> base_points);
hypre_ComputePkgDestroy(residual_data -> compute_pkg );
hypre_FinalizeTiming(residual_data -> time_index);
hypre_TFree(residual_data);
}
return ierr;
}