1102 lines
36 KiB
C
1102 lines
36 KiB
C
/*--------------------------------------------------------------------------
|
|
* Test driver for the StructIJ version of the structured grid interface.
|
|
* Here, the structgrid interface is implemented on top of the IJ interface,
|
|
* which in turn constructs a parcsr matrix under the hood.
|
|
*--------------------------------------------------------------------------*/
|
|
|
|
/*------------------------------------------------------------------------
|
|
* The problem is the standard 7-point laplacian in 3D with grid and
|
|
* anisotropy determined as command line arguments, as in the
|
|
* struct_linear_solvers test driver. 1D and 2D problems are possible
|
|
* too, but these have not been tested. Do `driver -help' for usage info.
|
|
*------------------------------------------------------------------------*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
|
|
#include "utilities.h"
|
|
#include "HYPRE_structIJ_mv.h"
|
|
#include "HYPRE_parcsr_ls.h"
|
|
|
|
int
|
|
main( int argc,
|
|
char *argv[] )
|
|
{
|
|
int arg_index;
|
|
int print_usage;
|
|
int nx, ny, nz;
|
|
int P, Q, R;
|
|
int bx, by, bz;
|
|
double cx, cy, cz;
|
|
int solver_id;
|
|
int ierr;
|
|
|
|
int p, q, r;
|
|
int dim;
|
|
int nblocks, volume;
|
|
|
|
int istart[3];
|
|
int **iupper;
|
|
int **ilower;
|
|
|
|
int **offsets;
|
|
|
|
HYPRE_StructGrid grid;
|
|
HYPRE_StructStencil stencil;
|
|
|
|
int *stencil_indices;
|
|
double *values;
|
|
|
|
int i, s;
|
|
int ix, iy, iz, ib;
|
|
|
|
int ioutdat;
|
|
int debug_flag;
|
|
int max_levels = 25;
|
|
int num_iterations;
|
|
double final_res_norm;
|
|
|
|
HYPRE_StructIJMatrix A_structij;
|
|
HYPRE_StructIJVector b_structij;
|
|
HYPRE_StructIJVector x_structij;
|
|
HYPRE_ParCSRMatrix A_parcsr;
|
|
HYPRE_ParVector b_parcsr;
|
|
HYPRE_ParVector x_parcsr;
|
|
|
|
HYPRE_Solver amg_solver;
|
|
HYPRE_Solver pcg_solver;
|
|
HYPRE_Solver pcg_precond;
|
|
|
|
int num_procs, myid;
|
|
int *partitioning;
|
|
int *part_b;
|
|
int *part_x;
|
|
int *row_sizes;
|
|
int *diag_sizes;
|
|
int *offdiag_sizes;
|
|
|
|
int time_index;
|
|
|
|
/* parameters for BoomerAMG */
|
|
double strong_threshold;
|
|
double trunc_factor;
|
|
int cycle_type;
|
|
int coarsen_type = 0;
|
|
int hybrid = 1;
|
|
int measure_type = 0;
|
|
int *num_grid_sweeps;
|
|
int *grid_relax_type;
|
|
int **grid_relax_points;
|
|
int relax_default;
|
|
double *relax_weight;
|
|
double tol = 1.0e-6;
|
|
|
|
/* parameters for PILUT */
|
|
double drop_tol = -1;
|
|
int nonzeros_to_keep = -1;
|
|
|
|
/* parameters for GMRES */
|
|
int k_dim;
|
|
|
|
MPI_Init(&argc, &argv);
|
|
|
|
MPI_Comm_size(MPI_COMM_WORLD, &num_procs );
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &myid );
|
|
|
|
/*-----------------------------------------------------------
|
|
* Set defaults
|
|
*-----------------------------------------------------------*/
|
|
|
|
dim = 3;
|
|
|
|
nx = 10;
|
|
ny = 10;
|
|
nz = 10;
|
|
|
|
P = num_procs;
|
|
Q = 1;
|
|
R = 1;
|
|
|
|
bx = 1;
|
|
by = 1;
|
|
bz = 1;
|
|
|
|
cx = 1.0;
|
|
cy = 1.0;
|
|
cz = 1.0;
|
|
|
|
solver_id = 0;
|
|
|
|
istart[0] = -17;
|
|
istart[1] = 0;
|
|
istart[2] = 32;
|
|
|
|
relax_default = 3;
|
|
debug_flag = 0;
|
|
|
|
ioutdat = 3;
|
|
|
|
/*-----------------------------------------------------------
|
|
* Parse command line
|
|
*-----------------------------------------------------------*/
|
|
|
|
print_usage = 0;
|
|
arg_index = 1;
|
|
|
|
while ( (arg_index < argc) && (!print_usage) )
|
|
{
|
|
if ( strcmp(argv[arg_index], "-n") == 0 )
|
|
{
|
|
arg_index++;
|
|
nx = atoi(argv[arg_index++]);
|
|
ny = atoi(argv[arg_index++]);
|
|
nz = atoi(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-P") == 0 )
|
|
{
|
|
arg_index++;
|
|
P = atoi(argv[arg_index++]);
|
|
Q = atoi(argv[arg_index++]);
|
|
R = atoi(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-b") == 0 )
|
|
{
|
|
arg_index++;
|
|
bx = atoi(argv[arg_index++]);
|
|
by = atoi(argv[arg_index++]);
|
|
bz = atoi(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-c") == 0 )
|
|
{
|
|
arg_index++;
|
|
cx = atof(argv[arg_index++]);
|
|
cy = atof(argv[arg_index++]);
|
|
cz = atof(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-d") == 0 )
|
|
{
|
|
arg_index++;
|
|
dim = atoi(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-solver") == 0 )
|
|
{
|
|
arg_index++;
|
|
solver_id = atoi(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-ruge") == 0 )
|
|
{
|
|
arg_index++;
|
|
coarsen_type = 1;
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-ruge2b") == 0 )
|
|
{
|
|
arg_index++;
|
|
coarsen_type = 2;
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-ruge3") == 0 )
|
|
{
|
|
arg_index++;
|
|
coarsen_type = 3;
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-ruge3c") == 0 )
|
|
{
|
|
arg_index++;
|
|
coarsen_type = 4;
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-rugerlx") == 0 )
|
|
{
|
|
arg_index++;
|
|
coarsen_type = 5;
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-falgout") == 0 )
|
|
{
|
|
arg_index++;
|
|
coarsen_type = 6;
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-nohybrid") == 0 )
|
|
{
|
|
arg_index++;
|
|
hybrid = -1;
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-gm") == 0 )
|
|
{
|
|
arg_index++;
|
|
measure_type = 1;
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-rlx") == 0 )
|
|
{
|
|
arg_index++;
|
|
relax_default = atoi(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-dbg") == 0 )
|
|
{
|
|
arg_index++;
|
|
debug_flag = atoi(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-help") == 0 )
|
|
{
|
|
print_usage = 1;
|
|
}
|
|
else
|
|
{
|
|
arg_index++;
|
|
}
|
|
}
|
|
|
|
/* for CGNR preconditioned with BoomerAMG, only relaxation scheme 2 is
|
|
implemented, i.e. Jacobi relaxation with Matvec */
|
|
if (solver_id == 5) relax_default = 2;
|
|
|
|
/* defaults for BoomerAMG */
|
|
if (solver_id == 0 || solver_id == 1 || solver_id == 3 || solver_id == 5)
|
|
{
|
|
strong_threshold = 0.25;
|
|
trunc_factor = 0.0;
|
|
cycle_type = 1;
|
|
|
|
num_grid_sweeps = hypre_CTAlloc(int,4);
|
|
grid_relax_type = hypre_CTAlloc(int,4);
|
|
grid_relax_points = hypre_CTAlloc(int *,4);
|
|
relax_weight = hypre_CTAlloc(double, max_levels);
|
|
|
|
for (i=0; i < max_levels; i++)
|
|
relax_weight[i] = 0.0;
|
|
|
|
if (coarsen_type == 5)
|
|
{
|
|
/* fine grid */
|
|
num_grid_sweeps[0] = 3;
|
|
grid_relax_type[0] = relax_default;
|
|
grid_relax_points[0] = hypre_CTAlloc(int, 4);
|
|
grid_relax_points[0][0] = -2;
|
|
grid_relax_points[0][1] = -1;
|
|
grid_relax_points[0][2] = 1;
|
|
|
|
/* down cycle */
|
|
num_grid_sweeps[1] = 4;
|
|
grid_relax_type[1] = relax_default;
|
|
grid_relax_points[1] = hypre_CTAlloc(int, 4);
|
|
grid_relax_points[1][0] = -1;
|
|
grid_relax_points[1][1] = 1;
|
|
grid_relax_points[1][2] = -2;
|
|
grid_relax_points[1][3] = -2;
|
|
|
|
/* up cycle */
|
|
num_grid_sweeps[2] = 4;
|
|
grid_relax_type[2] = relax_default;
|
|
grid_relax_points[2] = hypre_CTAlloc(int, 4);
|
|
grid_relax_points[2][0] = -2;
|
|
grid_relax_points[2][1] = -2;
|
|
grid_relax_points[2][2] = 1;
|
|
grid_relax_points[2][3] = -1;
|
|
}
|
|
else
|
|
{
|
|
/* fine grid */
|
|
num_grid_sweeps[0] = 2;
|
|
grid_relax_type[0] = relax_default;
|
|
grid_relax_points[0] = hypre_CTAlloc(int, 2);
|
|
grid_relax_points[0][0] = 1;
|
|
grid_relax_points[0][1] = -1;
|
|
|
|
/* down cycle */
|
|
num_grid_sweeps[1] = 2;
|
|
grid_relax_type[1] = relax_default;
|
|
grid_relax_points[1] = hypre_CTAlloc(int, 2);
|
|
grid_relax_points[1][0] = 1;
|
|
grid_relax_points[1][1] = -1;
|
|
|
|
/* up cycle */
|
|
num_grid_sweeps[2] = 2;
|
|
grid_relax_type[2] = relax_default;
|
|
grid_relax_points[2] = hypre_CTAlloc(int, 2);
|
|
grid_relax_points[2][0] = -1;
|
|
grid_relax_points[2][1] = 1;
|
|
}
|
|
/* coarsest grid */
|
|
num_grid_sweeps[3] = 1;
|
|
grid_relax_type[3] = 9;
|
|
grid_relax_points[3] = hypre_CTAlloc(int, 1);
|
|
grid_relax_points[3][0] = 0;
|
|
}
|
|
/* defaults for GMRES */
|
|
|
|
k_dim = 5;
|
|
|
|
arg_index = 0;
|
|
while (arg_index < argc)
|
|
{
|
|
if ( strcmp(argv[arg_index], "-k") == 0 )
|
|
{
|
|
arg_index++;
|
|
k_dim = atoi(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-w") == 0 )
|
|
{
|
|
arg_index++;
|
|
if (solver_id == 0 || solver_id == 1 || solver_id == 3
|
|
|| solver_id == 5 )
|
|
{
|
|
relax_weight[0] = atof(argv[arg_index++]);
|
|
for (i=1; i < max_levels; i++)
|
|
relax_weight[i] = relax_weight[0];
|
|
}
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-th") == 0 )
|
|
{
|
|
arg_index++;
|
|
strong_threshold = atof(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-tol") == 0 )
|
|
{
|
|
arg_index++;
|
|
tol = atof(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-drop_tol") == 0 )
|
|
{
|
|
arg_index++;
|
|
drop_tol = atof(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-nonzeros_to_keep") == 0 )
|
|
{
|
|
arg_index++;
|
|
nonzeros_to_keep = atoi(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-tr") == 0 )
|
|
{
|
|
arg_index++;
|
|
trunc_factor = atof(argv[arg_index++]);
|
|
}
|
|
else if ( strcmp(argv[arg_index], "-iout") == 0 )
|
|
{
|
|
arg_index++;
|
|
ioutdat = atoi(argv[arg_index++]);
|
|
}
|
|
else
|
|
{
|
|
arg_index++;
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------
|
|
* Print usage info
|
|
*-----------------------------------------------------------*/
|
|
|
|
if ( (print_usage) && (myid == 0) )
|
|
{
|
|
printf("\n");
|
|
printf("Usage: %s [<options>]\n", argv[0]);
|
|
printf("\n");
|
|
printf(" -n <nx> <ny> <nz> : problem size per block\n");
|
|
printf(" -b <bx> <by> <bz> : blocking per processor\n");
|
|
printf(" -P <Px> <Py> <Pz> : processor topology\n");
|
|
printf(" -c <cx> <cy> <cz> : diffusion coefficients\n");
|
|
printf("\n");
|
|
printf(" -rhsrand : rhs is random vector\n");
|
|
printf(" -xisone : solution of all ones\n");
|
|
printf("\n");
|
|
printf(" -solver <ID> : solver ID\n");
|
|
printf(" 0=AMG 1=AMG-PCG \n");
|
|
printf(" 2=DS-PCG 3=AMG-GMRES \n");
|
|
printf(" 4=DS-GMRES 5=AMG-CGNR \n");
|
|
printf(" 6=DS-CGNR 7=PILUT-GMRES \n");
|
|
printf(" 8=ParaSails-PCG \n");
|
|
printf("\n");
|
|
printf(" -ruge : Ruge coarsening (local)\n");
|
|
printf(" -ruge3 : third pass on boundary\n");
|
|
printf(" -ruge3c : third pass on boundary, keep c-points\n");
|
|
printf(" -ruge2b : 2nd pass is global\n");
|
|
printf(" -rugerlx : relaxes special points\n");
|
|
printf(" -falgout : local ruge followed by LJP\n");
|
|
printf(" -nohybrid : no switch in coarsening\n");
|
|
printf(" -gm : use global measures\n");
|
|
printf("\n");
|
|
printf(" -rlx <val> : relaxation type\n");
|
|
printf(" 0=Weighted Jacobi \n");
|
|
printf(" 1=Gauss-Seidel (very slow!) \n");
|
|
printf(" 3=Hybrid Jacobi/Gauss-Seidel \n");
|
|
printf("\n");
|
|
printf(" -th <val> : set AMG threshold Theta = val \n");
|
|
printf(" -tr <val> : set AMG interpolation truncation factor = val \n");
|
|
printf(" -tol <val> : set AMG convergence tolerance to val\n");
|
|
printf(" -w <val> : set Jacobi relax weight = val\n");
|
|
printf(" -k <val> : dimension Krylov space for GMRES\n");
|
|
printf("\n");
|
|
printf(" -drop_tol <val> : set threshold for dropping in PILUT\n");
|
|
printf(" -nonzeros_to_keep <val>: number of nonzeros in each row to keep\n");
|
|
printf("\n");
|
|
printf(" -iout <val> : set output flag\n");
|
|
printf(" 0=no output 1=matrix stats\n");
|
|
printf(" 2=cycle stats 3=matrix & cycle stats\n");
|
|
printf("\n");
|
|
printf(" -dbg <val> : set debug flag\n");
|
|
printf(" 0=no debugging\n 1=internal timing\n 2=interpolation truncation\n 3=more detailed timing in coarsening routine\n");
|
|
exit(1);
|
|
}
|
|
|
|
if ((P*Q*R) != num_procs)
|
|
{
|
|
printf("Error: Invalid number of processors or processor topology \n");
|
|
exit(1);
|
|
}
|
|
|
|
/*-----------------------------------------------------------
|
|
* Print driver parameters
|
|
*-----------------------------------------------------------*/
|
|
|
|
if (myid == 0)
|
|
{
|
|
printf("Running with these driver parameters:\n");
|
|
printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
|
|
printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
|
|
printf(" (bx, by, bz) = (%d, %d, %d)\n", bx, by, bz);
|
|
printf(" (cx, cy, cz) = (%f, %f, %f)\n", cx, cy, cz);
|
|
printf(" dim = %d\n", dim);
|
|
printf(" solver ID = %d\n", solver_id);
|
|
}
|
|
|
|
/*-----------------------------------------------------------
|
|
* Set up the grid structure
|
|
*-----------------------------------------------------------*/
|
|
|
|
switch (dim)
|
|
{
|
|
case 1:
|
|
volume = nx;
|
|
nblocks = bx;
|
|
stencil_indices = hypre_CTAlloc(int, 2);
|
|
offsets = hypre_CTAlloc(int*, 2);
|
|
offsets[0] = hypre_CTAlloc(int, 1);
|
|
offsets[0][0] = -1;
|
|
offsets[1] = hypre_CTAlloc(int, 1);
|
|
offsets[1][0] = 0;
|
|
/* compute p from P and myid */
|
|
p = myid % P;
|
|
break;
|
|
case 2:
|
|
volume = nx*ny;
|
|
nblocks = bx*by;
|
|
stencil_indices = hypre_CTAlloc(int, 3);
|
|
offsets = hypre_CTAlloc(int*, 3);
|
|
offsets[0] = hypre_CTAlloc(int, 2);
|
|
offsets[0][0] = -1;
|
|
offsets[0][1] = 0;
|
|
offsets[1] = hypre_CTAlloc(int, 2);
|
|
offsets[1][0] = 0;
|
|
offsets[1][1] = -1;
|
|
offsets[2] = hypre_CTAlloc(int, 2);
|
|
offsets[2][0] = 0;
|
|
offsets[2][1] = 0;
|
|
/* compute p,q from P,Q and myid */
|
|
p = myid % P;
|
|
q = (( myid - p)/P) % Q;
|
|
break;
|
|
case 3:
|
|
volume = nx*ny*nz;
|
|
nblocks = bx*by*bz;
|
|
stencil_indices = hypre_CTAlloc(int, 7);
|
|
offsets = hypre_CTAlloc(int*, 7);
|
|
offsets[0] = hypre_CTAlloc(int, 3);
|
|
offsets[0][0] = -1;
|
|
offsets[0][1] = 0;
|
|
offsets[0][2] = 0;
|
|
offsets[1] = hypre_CTAlloc(int, 3);
|
|
offsets[1][0] = 0;
|
|
offsets[1][1] = -1;
|
|
offsets[1][2] = 0;
|
|
offsets[2] = hypre_CTAlloc(int, 3);
|
|
offsets[2][0] = 0;
|
|
offsets[2][1] = 0;
|
|
offsets[2][2] = -1;
|
|
offsets[3] = hypre_CTAlloc(int, 3);
|
|
offsets[3][0] = 0;
|
|
offsets[3][1] = 0;
|
|
offsets[3][2] = 0;
|
|
offsets[4] = hypre_CTAlloc(int, 3);
|
|
offsets[4][0] = 1;
|
|
offsets[4][1] = 0;
|
|
offsets[4][2] = 0;
|
|
offsets[5] = hypre_CTAlloc(int, 3);
|
|
offsets[5][0] = 0;
|
|
offsets[5][1] = 1;
|
|
offsets[5][2] = 0;
|
|
offsets[6] = hypre_CTAlloc(int, 3);
|
|
offsets[6][0] = 0;
|
|
offsets[6][1] = 0;
|
|
offsets[6][2] = 1;
|
|
/* compute p,q,r from P,Q,R and myid */
|
|
p = myid % P;
|
|
q = (( myid - p)/P) % Q;
|
|
r = ( myid - p - P*q)/( P*Q );
|
|
break;
|
|
}
|
|
|
|
ilower = hypre_CTAlloc(int*, nblocks);
|
|
iupper = hypre_CTAlloc(int*, nblocks);
|
|
for (i = 0; i < nblocks; i++)
|
|
{
|
|
ilower[i] = hypre_CTAlloc(int, dim);
|
|
iupper[i] = hypre_CTAlloc(int, dim);
|
|
}
|
|
|
|
/* compute ilower and iupper from (p,q,r), (bx,by,bz), and (nx,ny,nz) */
|
|
ib = 0;
|
|
switch (dim)
|
|
{
|
|
case 1:
|
|
for (ix = 0; ix < bx; ix++)
|
|
{
|
|
ilower[ib][0] = istart[0]+ nx*(bx*p+ix);
|
|
iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1;
|
|
ib++;
|
|
}
|
|
break;
|
|
case 2:
|
|
for (iy = 0; iy < by; iy++)
|
|
for (ix = 0; ix < bx; ix++)
|
|
{
|
|
ilower[ib][0] = istart[0]+ nx*(bx*p+ix);
|
|
iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1;
|
|
ilower[ib][1] = istart[1]+ ny*(by*q+iy);
|
|
iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1;
|
|
ib++;
|
|
}
|
|
break;
|
|
case 3:
|
|
for (iz = 0; iz < bz; iz++)
|
|
for (iy = 0; iy < by; iy++)
|
|
for (ix = 0; ix < bx; ix++)
|
|
{
|
|
ilower[ib][0] = istart[0]+ nx*(bx*p+ix);
|
|
iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1;
|
|
ilower[ib][1] = istart[1]+ ny*(by*q+iy);
|
|
iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1;
|
|
ilower[ib][2] = istart[2]+ nz*(bz*r+iz);
|
|
iupper[ib][2] = istart[2]+ nz*(bz*r+iz+1) - 1;
|
|
ib++;
|
|
}
|
|
break;
|
|
}
|
|
|
|
HYPRE_StructGridCreate(MPI_COMM_WORLD, dim, &grid);
|
|
for (ib = 0; ib < nblocks; ib++)
|
|
{
|
|
HYPRE_StructGridSetExtents(grid, ilower[ib], iupper[ib]);
|
|
}
|
|
HYPRE_StructGridAssemble(grid);
|
|
|
|
/*-----------------------------------------------------------
|
|
* Set up the stencil structure
|
|
*-----------------------------------------------------------*/
|
|
|
|
HYPRE_StructStencilCreate(dim, 7, &stencil);
|
|
for (s = 0; s < 7; s++)
|
|
{
|
|
HYPRE_StructStencilSetElement(stencil, s, offsets[s]);
|
|
}
|
|
|
|
/*-----------------------------------------------------------
|
|
* Set up the matrix structure
|
|
*-----------------------------------------------------------*/
|
|
|
|
ierr = HYPRE_StructIJMatrixCreate(MPI_COMM_WORLD, grid, stencil, &A_structij);
|
|
ierr += HYPRE_StructIJMatrixInitialize(A_structij);
|
|
|
|
/*-----------------------------------------------------------
|
|
* Fill in the matrix elements
|
|
*-----------------------------------------------------------*/
|
|
|
|
values = hypre_CTAlloc(double, 7*volume);
|
|
|
|
/* Set the coefficients for the grid */
|
|
for (i = 0; i < 7*volume; i += 7)
|
|
{
|
|
for (s = 0; s < 7; s++)
|
|
{
|
|
stencil_indices[s] = s;
|
|
switch (dim)
|
|
{
|
|
case 1:
|
|
values[i ] = -cx;
|
|
values[i+1] = 2.0*(cx);
|
|
break;
|
|
case 2:
|
|
values[i ] = -cx;
|
|
values[i+1] = -cy;
|
|
values[i+2] = 2.0*(cx+cy);
|
|
break;
|
|
case 3:
|
|
values[i ] = -cx;
|
|
values[i+1] = -cy;
|
|
values[i+2] = -cz;
|
|
values[i+3] = 2.0*(cx+cy+cz);
|
|
values[i+4] = -cx;
|
|
values[i+5] = -cy;
|
|
values[i+6] = -cz;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (ib = 0; ib < nblocks; ib++)
|
|
{
|
|
ierr += HYPRE_StructIJMatrixSetBoxValues (A_structij, ilower[ib], iupper[ib],
|
|
7, stencil_indices, values);
|
|
}
|
|
|
|
ierr += HYPRE_StructIJMatrixAssemble(A_structij);
|
|
|
|
if (ierr)
|
|
{
|
|
printf("Error in driver building IJMatrix.\n");
|
|
return(-1);
|
|
}
|
|
|
|
/*-----------------------------------------------------------
|
|
* Get the underlying matrix
|
|
*-----------------------------------------------------------*/
|
|
|
|
A_parcsr = (HYPRE_ParCSRMatrix) HYPRE_StructIJMatrixGetLocalStorage( A_structij );
|
|
|
|
/*-----------------------------------------------------------
|
|
* Need partitioning information for setting up vectors
|
|
*-----------------------------------------------------------*/
|
|
|
|
HYPRE_ParCSRMatrixGetRowPartitioning(A_parcsr, &partitioning);
|
|
|
|
part_b = hypre_CTAlloc(int, num_procs+1);
|
|
part_x = hypre_CTAlloc(int, num_procs+1);
|
|
for (i=0; i < num_procs+1; i++)
|
|
{
|
|
part_b[i] = partitioning[i];
|
|
part_x[i] = partitioning[i];
|
|
}
|
|
|
|
/*-----------------------------------------------------------
|
|
* Set up the RHS b = (1, 1, ..., 1)^T and initial guess x = 0
|
|
*-----------------------------------------------------------*/
|
|
|
|
hypre_TFree(values);
|
|
values = hypre_CTAlloc(double, volume);
|
|
|
|
ierr = HYPRE_StructIJVectorCreate(MPI_COMM_WORLD, grid, stencil, &b_structij);
|
|
ierr += HYPRE_StructIJVectorInitialize(b_structij);
|
|
/*
|
|
SetPartitioning comes after Initialize because Initialize sets
|
|
LocalStorageType, and that must be done before SetPartitioning.
|
|
*/
|
|
ierr += HYPRE_StructIJVectorSetPartitioning(b_structij, (const int *) part_b);
|
|
for (i = 0; i < volume; i++)
|
|
{
|
|
values[i] = 1.0;
|
|
}
|
|
for (ib = 0; ib < nblocks; ib++)
|
|
{
|
|
ierr += HYPRE_StructIJVectorSetBoxValues(b_structij, ilower[ib], iupper[ib],
|
|
values);
|
|
}
|
|
ierr += HYPRE_StructIJVectorAssemble(b_structij);
|
|
|
|
b_parcsr = (HYPRE_ParVector) HYPRE_StructIJVectorGetLocalStorage( b_structij );
|
|
|
|
ierr = HYPRE_StructIJVectorCreate(MPI_COMM_WORLD, grid, stencil, &x_structij);
|
|
ierr += HYPRE_StructIJVectorInitialize(x_structij);
|
|
ierr += HYPRE_StructIJVectorSetPartitioning(x_structij, (const int *) part_x);
|
|
for (i = 0; i < volume; i++)
|
|
{
|
|
values[i] = 0.0;
|
|
}
|
|
for (ib = 0; ib < nblocks; ib++)
|
|
{
|
|
ierr += HYPRE_StructIJVectorSetBoxValues(x_structij, ilower[ib], iupper[ib],
|
|
values);
|
|
}
|
|
ierr += HYPRE_StructIJVectorAssemble(x_structij);
|
|
|
|
x_parcsr = (HYPRE_ParVector) HYPRE_StructIJVectorGetLocalStorage( x_structij );
|
|
|
|
hypre_TFree(values);
|
|
|
|
/*-----------------------------------------------------------
|
|
* Solve the system using AMG
|
|
*-----------------------------------------------------------*/
|
|
|
|
if (solver_id == 0)
|
|
{
|
|
if (myid == 0) printf("Solver: AMG\n");
|
|
time_index = hypre_InitializeTiming("BoomerAMG Setup");
|
|
hypre_BeginTiming(time_index);
|
|
|
|
HYPRE_BoomerAMGCreate(&amg_solver);
|
|
HYPRE_BoomerAMGSetCoarsenType(amg_solver, (hybrid*coarsen_type));
|
|
HYPRE_BoomerAMGSetMeasureType(amg_solver, measure_type);
|
|
HYPRE_BoomerAMGSetTol(amg_solver, tol);
|
|
HYPRE_BoomerAMGSetStrongThreshold(amg_solver, strong_threshold);
|
|
HYPRE_BoomerAMGSetTruncFactor(amg_solver, trunc_factor);
|
|
/* note: log is written to standard output, not to file */
|
|
HYPRE_BoomerAMGSetLogging(amg_solver, ioutdat, "driver.out.log");
|
|
HYPRE_BoomerAMGSetCycleType(amg_solver, cycle_type);
|
|
HYPRE_BoomerAMGSetNumGridSweeps(amg_solver, num_grid_sweeps);
|
|
HYPRE_BoomerAMGSetGridRelaxType(amg_solver, grid_relax_type);
|
|
HYPRE_BoomerAMGSetRelaxWeight(amg_solver, relax_weight);
|
|
HYPRE_BoomerAMGSetGridRelaxPoints(amg_solver, grid_relax_points);
|
|
HYPRE_BoomerAMGSetMaxLevels(amg_solver, max_levels);
|
|
HYPRE_BoomerAMGSetDebugFlag(amg_solver, debug_flag);
|
|
|
|
HYPRE_BoomerAMGSetup(amg_solver, A_parcsr, b_parcsr, x_parcsr);
|
|
|
|
hypre_EndTiming(time_index);
|
|
hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD);
|
|
hypre_FinalizeTiming(time_index);
|
|
hypre_ClearTiming();
|
|
|
|
time_index = hypre_InitializeTiming("BoomerAMG Solve");
|
|
hypre_BeginTiming(time_index);
|
|
|
|
HYPRE_BoomerAMGSolve(amg_solver, A_parcsr, b_parcsr, x_parcsr);
|
|
|
|
hypre_EndTiming(time_index);
|
|
hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD);
|
|
hypre_FinalizeTiming(time_index);
|
|
hypre_ClearTiming();
|
|
|
|
HYPRE_BoomerAMGDestroy(amg_solver);
|
|
}
|
|
|
|
/*-----------------------------------------------------------
|
|
* Solve the system using PCG
|
|
*-----------------------------------------------------------*/
|
|
|
|
if (solver_id == 1 || solver_id == 2 || solver_id == 8)
|
|
{
|
|
time_index = hypre_InitializeTiming("PCG Setup");
|
|
hypre_BeginTiming(time_index);
|
|
|
|
HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &pcg_solver);
|
|
HYPRE_ParCSRPCGSetMaxIter(pcg_solver, 500);
|
|
HYPRE_ParCSRPCGSetTol(pcg_solver, tol);
|
|
HYPRE_ParCSRPCGSetTwoNorm(pcg_solver, 1);
|
|
HYPRE_ParCSRPCGSetRelChange(pcg_solver, 0);
|
|
HYPRE_ParCSRPCGSetLogging(pcg_solver, 1);
|
|
|
|
if (solver_id == 1)
|
|
{
|
|
/* use BoomerAMG as preconditioner */
|
|
if (myid == 0) printf("Solver: AMG-PCG\n");
|
|
HYPRE_BoomerAMGCreate(&pcg_precond);
|
|
HYPRE_BoomerAMGSetCoarsenType(pcg_precond, (hybrid*coarsen_type));
|
|
HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
|
|
HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
|
|
HYPRE_BoomerAMGSetLogging(pcg_precond, ioutdat, "driver.out.log");
|
|
HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
|
|
HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
|
|
HYPRE_BoomerAMGSetNumGridSweeps(pcg_precond, num_grid_sweeps);
|
|
HYPRE_BoomerAMGSetGridRelaxType(pcg_precond, grid_relax_type);
|
|
HYPRE_BoomerAMGSetRelaxWeight(pcg_precond, relax_weight);
|
|
HYPRE_BoomerAMGSetGridRelaxPoints(pcg_precond, grid_relax_points);
|
|
HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
|
|
HYPRE_ParCSRPCGSetPrecond(pcg_solver,
|
|
HYPRE_BoomerAMGSolve,
|
|
HYPRE_BoomerAMGSetup,
|
|
pcg_precond);
|
|
}
|
|
else if (solver_id == 2)
|
|
{
|
|
|
|
/* use diagonal scaling as preconditioner */
|
|
if (myid == 0) printf("Solver: DS-PCG\n");
|
|
pcg_precond = NULL;
|
|
|
|
HYPRE_ParCSRPCGSetPrecond(pcg_solver,
|
|
HYPRE_ParCSRDiagScale,
|
|
HYPRE_ParCSRDiagScaleSetup,
|
|
pcg_precond);
|
|
}
|
|
else if (solver_id == 8)
|
|
{
|
|
/* use ParaSails preconditioner */
|
|
if (myid == 0) printf("Solver: ParaSails-PCG\n");
|
|
|
|
HYPRE_ParCSRParaSailsCreate(MPI_COMM_WORLD, &pcg_precond);
|
|
HYPRE_ParCSRParaSailsSetParams(pcg_precond, 0.1, 1);
|
|
|
|
HYPRE_ParCSRPCGSetPrecond(pcg_solver,
|
|
HYPRE_ParCSRParaSailsSolve,
|
|
HYPRE_ParCSRParaSailsSetup,
|
|
pcg_precond);
|
|
}
|
|
|
|
HYPRE_ParCSRPCGSetup(pcg_solver, A_parcsr, b_parcsr, x_parcsr);
|
|
|
|
hypre_EndTiming(time_index);
|
|
hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD);
|
|
hypre_FinalizeTiming(time_index);
|
|
hypre_ClearTiming();
|
|
|
|
time_index = hypre_InitializeTiming("PCG Solve");
|
|
hypre_BeginTiming(time_index);
|
|
|
|
HYPRE_ParCSRPCGSolve(pcg_solver, A_parcsr, b_parcsr, x_parcsr);
|
|
|
|
hypre_EndTiming(time_index);
|
|
hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD);
|
|
hypre_FinalizeTiming(time_index);
|
|
hypre_ClearTiming();
|
|
|
|
HYPRE_ParCSRPCGGetNumIterations(pcg_solver, &num_iterations);
|
|
HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(pcg_solver, &final_res_norm);
|
|
HYPRE_ParCSRPCGDestroy(pcg_solver);
|
|
|
|
if (solver_id == 1)
|
|
{
|
|
HYPRE_BoomerAMGDestroy(pcg_precond);
|
|
}
|
|
else if (solver_id == 8)
|
|
{
|
|
HYPRE_ParCSRParaSailsDestroy(pcg_precond);
|
|
}
|
|
|
|
if (myid == 0)
|
|
{
|
|
printf("\n");
|
|
printf("Iterations = %d\n", num_iterations);
|
|
printf("Final Relative Residual Norm = %e\n", final_res_norm);
|
|
printf("\n");
|
|
}
|
|
|
|
}
|
|
|
|
/*-----------------------------------------------------------
|
|
* Solve the system using GMRES
|
|
*-----------------------------------------------------------*/
|
|
|
|
if (solver_id == 3 || solver_id == 4 || solver_id == 7)
|
|
{
|
|
time_index = hypre_InitializeTiming("GMRES Setup");
|
|
hypre_BeginTiming(time_index);
|
|
|
|
HYPRE_ParCSRGMRESCreate(MPI_COMM_WORLD, &pcg_solver);
|
|
HYPRE_ParCSRGMRESSetKDim(pcg_solver, k_dim);
|
|
HYPRE_ParCSRGMRESSetMaxIter(pcg_solver, 100);
|
|
HYPRE_ParCSRGMRESSetTol(pcg_solver, tol);
|
|
HYPRE_ParCSRGMRESSetLogging(pcg_solver, 1);
|
|
|
|
if (solver_id == 3)
|
|
{
|
|
/* use BoomerAMG as preconditioner */
|
|
if (myid == 0) printf("Solver: AMG-GMRES\n");
|
|
|
|
HYPRE_BoomerAMGCreate(&pcg_precond);
|
|
HYPRE_BoomerAMGSetCoarsenType(pcg_precond, (hybrid*coarsen_type));
|
|
HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
|
|
HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
|
|
HYPRE_BoomerAMGSetLogging(pcg_precond, ioutdat, "driver.out.log");
|
|
HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
|
|
HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
|
|
HYPRE_BoomerAMGSetNumGridSweeps(pcg_precond, num_grid_sweeps);
|
|
HYPRE_BoomerAMGSetGridRelaxType(pcg_precond, grid_relax_type);
|
|
HYPRE_BoomerAMGSetRelaxWeight(pcg_precond, relax_weight);
|
|
HYPRE_BoomerAMGSetGridRelaxPoints(pcg_precond, grid_relax_points);
|
|
HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
|
|
HYPRE_ParCSRGMRESSetPrecond(pcg_solver,
|
|
HYPRE_BoomerAMGSolve,
|
|
HYPRE_BoomerAMGSetup,
|
|
pcg_precond);
|
|
}
|
|
else if (solver_id == 4)
|
|
{
|
|
/* use diagonal scaling as preconditioner */
|
|
if (myid == 0) printf("Solver: DS-GMRES\n");
|
|
pcg_precond = NULL;
|
|
|
|
HYPRE_ParCSRGMRESSetPrecond(pcg_solver,
|
|
HYPRE_ParCSRDiagScale,
|
|
HYPRE_ParCSRDiagScaleSetup,
|
|
pcg_precond);
|
|
}
|
|
else if (solver_id == 7)
|
|
{
|
|
/* use PILUT as preconditioner */
|
|
if (myid == 0) printf("Solver: Pilut-GMRES\n");
|
|
|
|
ierr = HYPRE_ParCSRPilutCreate( MPI_COMM_WORLD, &pcg_precond );
|
|
if (ierr) {
|
|
printf("Error in ParPilutCreate\n");
|
|
}
|
|
|
|
HYPRE_ParCSRGMRESSetPrecond(pcg_solver,
|
|
HYPRE_ParCSRPilutSolve,
|
|
HYPRE_ParCSRPilutSetup,
|
|
pcg_precond);
|
|
|
|
if (drop_tol >= 0 )
|
|
HYPRE_ParCSRPilutSetDropTolerance( pcg_precond,
|
|
drop_tol );
|
|
|
|
if (nonzeros_to_keep >= 0 )
|
|
HYPRE_ParCSRPilutSetFactorRowSize( pcg_precond,
|
|
nonzeros_to_keep );
|
|
}
|
|
|
|
HYPRE_ParCSRGMRESSetup(pcg_solver, A_parcsr, b_parcsr, x_parcsr);
|
|
|
|
hypre_EndTiming(time_index);
|
|
hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD);
|
|
hypre_FinalizeTiming(time_index);
|
|
hypre_ClearTiming();
|
|
|
|
time_index = hypre_InitializeTiming("GMRES Solve");
|
|
hypre_BeginTiming(time_index);
|
|
|
|
HYPRE_ParCSRGMRESSolve(pcg_solver, A_parcsr, b_parcsr, x_parcsr);
|
|
|
|
hypre_EndTiming(time_index);
|
|
hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD);
|
|
hypre_FinalizeTiming(time_index);
|
|
hypre_ClearTiming();
|
|
|
|
HYPRE_ParCSRGMRESGetNumIterations(pcg_solver, &num_iterations);
|
|
HYPRE_ParCSRGMRESGetFinalRelativeResidualNorm(pcg_solver,&final_res_norm);
|
|
HYPRE_ParCSRGMRESDestroy(pcg_solver);
|
|
|
|
if (solver_id == 3)
|
|
{
|
|
HYPRE_BoomerAMGDestroy(pcg_precond);
|
|
}
|
|
|
|
if (solver_id == 7)
|
|
{
|
|
HYPRE_ParCSRPilutDestroy(pcg_precond);
|
|
}
|
|
|
|
if (myid == 0)
|
|
{
|
|
printf("\n");
|
|
printf("GMRES Iterations = %d\n", num_iterations);
|
|
printf("Final GMRES Relative Residual Norm = %e\n", final_res_norm);
|
|
printf("\n");
|
|
}
|
|
}
|
|
/*-----------------------------------------------------------
|
|
* Solve the system using CGNR
|
|
*-----------------------------------------------------------*/
|
|
|
|
if (solver_id == 5 || solver_id == 6)
|
|
{
|
|
time_index = hypre_InitializeTiming("CGNR Setup");
|
|
hypre_BeginTiming(time_index);
|
|
|
|
HYPRE_ParCSRCGNRCreate(MPI_COMM_WORLD, &pcg_solver);
|
|
HYPRE_ParCSRCGNRSetMaxIter(pcg_solver, 1000);
|
|
HYPRE_ParCSRCGNRSetTol(pcg_solver, tol);
|
|
HYPRE_ParCSRCGNRSetLogging(pcg_solver, 1);
|
|
|
|
if (solver_id == 5)
|
|
{
|
|
/* use BoomerAMG as preconditioner */
|
|
if (myid == 0) printf("Solver: AMG-CGNR\n");
|
|
|
|
HYPRE_BoomerAMGCreate(&pcg_precond);
|
|
HYPRE_BoomerAMGSetCoarsenType(pcg_precond, (hybrid*coarsen_type));
|
|
HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
|
|
HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
|
|
HYPRE_BoomerAMGSetLogging(pcg_precond, ioutdat, "driver.out.log");
|
|
HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
|
|
HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
|
|
HYPRE_BoomerAMGSetNumGridSweeps(pcg_precond, num_grid_sweeps);
|
|
HYPRE_BoomerAMGSetGridRelaxType(pcg_precond, grid_relax_type);
|
|
HYPRE_BoomerAMGSetRelaxWeight(pcg_precond, relax_weight);
|
|
HYPRE_BoomerAMGSetGridRelaxPoints(pcg_precond, grid_relax_points);
|
|
HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
|
|
HYPRE_ParCSRCGNRSetPrecond(pcg_solver,
|
|
HYPRE_BoomerAMGSolve,
|
|
HYPRE_BoomerAMGSolveT,
|
|
HYPRE_BoomerAMGSetup,
|
|
pcg_precond);
|
|
}
|
|
else if (solver_id == 6)
|
|
{
|
|
/* use diagonal scaling as preconditioner */
|
|
if (myid == 0) printf("Solver: DS-CGNR\n");
|
|
pcg_precond = NULL;
|
|
|
|
HYPRE_ParCSRCGNRSetPrecond(pcg_solver,
|
|
HYPRE_ParCSRDiagScale,
|
|
HYPRE_ParCSRDiagScale,
|
|
HYPRE_ParCSRDiagScaleSetup,
|
|
pcg_precond);
|
|
}
|
|
|
|
HYPRE_ParCSRCGNRSetup(pcg_solver, A_parcsr, b_parcsr, x_parcsr);
|
|
|
|
hypre_EndTiming(time_index);
|
|
hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD);
|
|
hypre_FinalizeTiming(time_index);
|
|
hypre_ClearTiming();
|
|
|
|
time_index = hypre_InitializeTiming("CGNR Solve");
|
|
hypre_BeginTiming(time_index);
|
|
|
|
HYPRE_ParCSRCGNRSolve(pcg_solver, A_parcsr, b_parcsr, x_parcsr);
|
|
|
|
hypre_EndTiming(time_index);
|
|
hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD);
|
|
hypre_FinalizeTiming(time_index);
|
|
hypre_ClearTiming();
|
|
|
|
HYPRE_ParCSRCGNRGetNumIterations(pcg_solver, &num_iterations);
|
|
HYPRE_ParCSRCGNRGetFinalRelativeResidualNorm(pcg_solver,&final_res_norm);
|
|
HYPRE_ParCSRCGNRDestroy(pcg_solver);
|
|
|
|
if (solver_id == 5)
|
|
{
|
|
HYPRE_BoomerAMGDestroy(pcg_precond);
|
|
}
|
|
if (myid == 0)
|
|
{
|
|
printf("\n");
|
|
printf("Iterations = %d\n", num_iterations);
|
|
printf("Final Relative Residual Norm = %e\n", final_res_norm);
|
|
printf("\n");
|
|
}
|
|
}
|
|
/*-----------------------------------------------------------
|
|
* Print the solution and other info
|
|
*-----------------------------------------------------------*/
|
|
|
|
#if 0
|
|
HYPRE_PrintCSRVector(x_parcsr, "driver.out.x");
|
|
#endif
|
|
|
|
/*-----------------------------------------------------------
|
|
* Clean up
|
|
*-----------------------------------------------------------*/
|
|
|
|
HYPRE_StructGridDestroy(grid);
|
|
HYPRE_StructStencilDestroy(stencil);
|
|
HYPRE_StructIJMatrixDestroy(A_structij);
|
|
HYPRE_StructIJVectorDestroy(b_structij);
|
|
HYPRE_StructIJVectorDestroy(x_structij);
|
|
HYPRE_ParCSRMatrixDestroy(A_parcsr);
|
|
HYPRE_ParVectorDestroy(b_parcsr);
|
|
HYPRE_ParVectorDestroy(x_parcsr);
|
|
|
|
for (i = 0; i < nblocks; i++)
|
|
{
|
|
hypre_TFree(iupper[i]);
|
|
hypre_TFree(ilower[i]);
|
|
}
|
|
hypre_TFree(ilower);
|
|
hypre_TFree(iupper);
|
|
hypre_TFree(stencil_indices);
|
|
|
|
for ( i = 0; i < 7; i++)
|
|
hypre_TFree(offsets[i]);
|
|
hypre_TFree(offsets);
|
|
|
|
MPI_Finalize();
|
|
|
|
return (0);
|
|
}
|