2 stage gs (#314)

This PR (by @PaulMullowney #282) includes updates/changes to the 2 stage GS preconditioner.

Co-authored-by: Paul Mullowney <pmullown@nrel.gov>
This commit is contained in:
Ruipeng Li 2021-03-24 15:14:00 -07:00 committed by GitHub
parent 8002200aa0
commit 366b80f89b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 115 additions and 93 deletions

View File

@ -1963,6 +1963,8 @@ HYPRE_Int hypre_BoomerAMGRelaxKaczmarz( hypre_ParCSRMatrix *A, hypre_ParVector *
HYPRE_Int hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *r, hypre_ParVector *z, HYPRE_Int choice);
HYPRE_Int hypre_BoomerAMGRelaxTwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp, HYPRE_Int num_inner_iters );
HYPRE_Int hypre_BoomerAMGRelax11TwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );
HYPRE_Int hypre_BoomerAMGRelax12TwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );

View File

@ -326,7 +326,7 @@ hypre_BoomerAMGCycle( void *amg_vdata,
/* If no coarsening occurred, apply a simple smoother once */
Aux_U = U_array[level];
Aux_F = F_array[level];
num_sweep = 1;
num_sweep = num_grid_sweeps[0];
/* TK: Use the user relax type (instead of 0) to allow for setting a
convergent smoother (e.g. in the solution of singular problems). */
relax_type = hypre_ParAMGDataUserRelaxType(amg_data);

View File

@ -55,8 +55,9 @@ hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A,
* triangular)
* relax_type = 11 -> Two Stage approximation to GS. Uses the strict lower
* part of the diagonal matrix
* relax_type = 12 -> Two Stage approximation to GS. Uses the full diagonal
* matrix
* relax_type = 12 -> Two Stage approximation to GS. Uses the strict lower
* part of the diagonal matrix and a second iteration
* for additional error approximation
* relax_type = 13 -> hybrid L1 Gauss-Seidel forward solve
* relax_type = 14 -> hybrid L1 Gauss-Seidel backward solve
* relax_type = 15 -> CG
@ -1450,13 +1451,16 @@ hypre_BoomerAMGRelaxKaczmarz( hypre_ParCSRMatrix *A,
return hypre_error_flag;
}
HYPRE_Int
hypre_BoomerAMGRelax11TwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
hypre_ParVector *f,
HYPRE_Real relax_weight,
HYPRE_Real omega,
hypre_ParVector *u,
hypre_ParVector *Vtemp )
hypre_BoomerAMGRelaxTwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
hypre_ParVector *f,
HYPRE_Real relax_weight,
HYPRE_Real omega,
hypre_ParVector *u,
hypre_ParVector *Vtemp,
HYPRE_Int num_inner_iters)
{
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
@ -1467,9 +1471,8 @@ hypre_BoomerAMGRelax11TwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
HYPRE_Complex *Vtemp_data = hypre_VectorData(Vtemp_local);
hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
HYPRE_Complex *u_data = hypre_VectorData(u_local);
HYPRE_Int i, jj, ii;
hypre_ParCSRMatrixMatvecOutOfPlace(-relax_weight, A, u, relax_weight, f, Vtemp);
HYPRE_Int i, k, jj, ii;
HYPRE_Complex multiplier = 1.0;
/* Need to check that EVERY diagonal is nonzero first. If any are, throw exception */
for (i = 0; i < num_rows; i++)
@ -1480,18 +1483,45 @@ hypre_BoomerAMGRelax11TwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
}
}
hypre_ParCSRMatrixMatvecOutOfPlace(-relax_weight, A, u, relax_weight, f, Vtemp);
for (i = 0; i < num_rows; i++) /* Run the smoother */
{
HYPRE_Complex res = 0.0;
for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
// V = V/D
Vtemp_data[i] /= A_diag_data[A_diag_i[i]];
// u = u + m*v
u_data[i] += multiplier * Vtemp_data[i];
}
// adjust for the alternating series
multiplier *= -1.0;
for (k = 0; k < num_inner_iters; ++k)
{
// By going from bottom to top, we can update Vtemp in place because
// we're operating with the strict, lower triangular matrix
for (i = num_rows-1; i >=0; i--) /* Run the smoother */
{
ii = A_diag_j[jj];
if (ii < i)
// spmv for the row first
HYPRE_Complex res = 0.0;
for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
{
res -= (A_diag_data[jj] / A_diag_data[A_diag_i[ii]]) * Vtemp_data[ii];
ii = A_diag_j[jj];
if (ii < i)
{
res += A_diag_data[jj] * Vtemp_data[ii];
}
}
// diagonal scaling has to come after the spmv accumulation. It's a row scaling
// not column
Vtemp_data[i] = res / A_diag_data[A_diag_i[i]];
u_data[i] += multiplier * Vtemp_data[i];
}
u_data[i] += (Vtemp_data[i] + omega*res) / A_diag_data[A_diag_i[i]];
// adjust for the alternating series
multiplier *= -1.0;
}
return hypre_error_flag;
@ -1520,51 +1550,7 @@ hypre_BoomerAMGRelax11TwoStageGaussSeidel( hypre_ParCSRMatrix *A,
else
#endif
{
hypre_BoomerAMGRelax11TwoStageGaussSeidelHost(A, f, relax_weight, omega, u, Vtemp);
}
return hypre_error_flag;
}
HYPRE_Int
hypre_BoomerAMGRelax12TwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
hypre_ParVector *f,
HYPRE_Real relax_weight,
HYPRE_Real omega,
hypre_ParVector *u,
hypre_ParVector *Vtemp )
{
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
HYPRE_Complex *Vtemp_data = hypre_VectorData(Vtemp_local);
hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
HYPRE_Complex *u_data = hypre_VectorData(u_local);
HYPRE_Int i, jj, ii;
hypre_ParCSRMatrixMatvecOutOfPlace(-relax_weight, A, u, relax_weight, f, Vtemp);
/* Need to check that EVERY diagonal is nonzero first. If any are, throw exception */
for (i = 0; i < num_rows; i++)
{
if (A_diag_data[A_diag_i[i]] == 0.0)
{
hypre_error_in_arg(1);
}
}
for (i = 0; i < num_rows; i++) /* Run the smoother */
{
HYPRE_Complex res = Vtemp_data[i];
for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
{
ii = A_diag_j[jj];
res -= (A_diag_data[jj] / A_diag_data[A_diag_i[ii]]) * Vtemp_data[ii];
}
u_data[i] += (Vtemp_data[i] + omega*res) / A_diag_data[A_diag_i[i]];
hypre_BoomerAMGRelaxTwoStageGaussSeidelHost(A, f, relax_weight, omega, u, Vtemp, 1);
}
return hypre_error_flag;
@ -1589,12 +1575,12 @@ hypre_BoomerAMGRelax12TwoStageGaussSeidel( hypre_ParCSRMatrix *A,
if (exec == HYPRE_EXEC_DEVICE)
{
hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice(A, f, relax_weight, omega, u, Vtemp, Ztemp, 0);
hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice(A, f, relax_weight, omega, u, Vtemp, Ztemp, 2);
}
else
#endif
{
hypre_BoomerAMGRelax12TwoStageGaussSeidelHost(A, f, relax_weight, omega, u, Vtemp);
hypre_BoomerAMGRelaxTwoStageGaussSeidelHost(A, f, relax_weight, omega, u, Vtemp, 2);
}
return hypre_error_flag;

View File

@ -74,32 +74,6 @@ hypre_BoomerAMGRelaxHybridGaussSeidelDevice( hypre_ParCSRMatrix *A,
return hypre_error_flag;
}
/* option 0: inout = inout + D^{-1}*[(1+w)*r - w*A*x]
* option 1: inout = inout + D^{-1}*[r - tril(A,-1)*x]
* Note: r is modified */
void
hypre_TwoStageGaussSeidelMatvec(hypre_CSRMatrix *A,
hypre_Vector *x,
hypre_Vector *r,
HYPRE_Complex omega,
hypre_Vector *inout,
HYPRE_Int option)
{
if (option == 0)
{
/* spmv with the full matrix */
hypre_CSRMatrixMatvecDevice(0.0, -omega, A, x, 1.0 + omega, r, r, 0.0);
}
else
{
/* spmv with L */
hypre_CSRMatrixSpMVDevice(-1.0, A, x, 1.0, r, -2);
}
hypreDevice_DiagScaleVector(hypre_CSRMatrixNumRows(A), hypre_CSRMatrixI(A), hypre_CSRMatrixData(A),
hypre_VectorData(r), 1.0, hypre_VectorData(inout));
}
HYPRE_Int
hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A,
hypre_ParVector *f,
@ -108,7 +82,7 @@ hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A,
hypre_ParVector *u,
hypre_ParVector *r,
hypre_ParVector *z,
HYPRE_Int choice)
HYPRE_Int num_inner_iters)
{
hypre_NvtxPushRange("BoomerAMGRelaxTwoStageGaussSeidelDevice");
@ -119,14 +93,36 @@ hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A,
hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
hypre_Vector *r_local = hypre_ParVectorLocalVector(r);
hypre_Vector *z_local = hypre_ParVectorLocalVector(z);
HYPRE_Complex *u_data = hypre_VectorData(u_local);
HYPRE_Complex *r_data = hypre_VectorData(r_local);
HYPRE_Complex *z_data = hypre_VectorData(z_local);
HYPRE_Int zsize = hypre_VectorSize(z_local);
HYPRE_Int rsize = hypre_VectorSize(r_local);
HYPRE_Complex multiplier = 1.0;
HYPRE_Int i;
hypre_ParCSRMatrixMatvecOutOfPlace(-relax_weight, A, u, relax_weight, f, r);
hypreDevice_DiagScaleVector(num_rows, A_diag_i, A_diag_data, r_data, 0.0, z_data);
hypre_TwoStageGaussSeidelMatvec(A_diag, z_local, r_local, omega, u_local, choice);
// set this so that axpy works out properly. Reset later.
hypre_VectorSize(z_local) = rsize;
// 1) u = u + z
hypre_SeqVectorAxpy(multiplier, z_local, u_local);
multiplier *= -1.0;
for (i = 0; i < num_inner_iters; ++i)
{
// 2) r = Lz
hypre_CSRMatrixSpMVDevice(1.0, A_diag, z_local, 0.0, r_local, -2);
// 3) z = r/D, u = u + m*z
hypreDevice_DiagScaleVector2(num_rows, A_diag_i, A_diag_data, r_data, multiplier, z_data, u_data);
multiplier *= -1.0;
}
// reset this
hypre_VectorSize(z_local) = zsize;
hypre_NvtxPopRange();
@ -134,3 +130,4 @@ hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A,
}
#endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */

View File

@ -1176,6 +1176,8 @@ HYPRE_Int hypre_BoomerAMGRelaxKaczmarz( hypre_ParCSRMatrix *A, hypre_ParVector *
HYPRE_Int hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *r, hypre_ParVector *z, HYPRE_Int choice);
HYPRE_Int hypre_BoomerAMGRelaxTwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp, HYPRE_Int num_inner_iters );
HYPRE_Int hypre_BoomerAMGRelax11TwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );
HYPRE_Int hypre_BoomerAMGRelax12TwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );

View File

@ -1605,6 +1605,7 @@ void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPR
HYPRE_Int hypre_SyncCudaComputeStream(hypre_Handle *hypre_handle);
HYPRE_Int hypre_SyncCudaDevice(hypre_Handle *hypre_handle);
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y);
HYPRE_Int hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z);
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
HYPRE_Int hypreDevice_MaskedIVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y, HYPRE_Int *mask);
HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v);

View File

@ -614,6 +614,39 @@ hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
return hypre_error_flag;
}
__global__ void
hypreCUDAKernel_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z)
{
HYPRE_Int i = hypre_cuda_get_grid_thread_id<1,1>();
if (i < n)
{
HYPRE_Complex t = x[i] / A_data[A_i[i]];
y[i] = t;
z[i] += beta*t;
}
}
/* y = diag(A) \ x
* z = beta * (diag(A) \ x) + z
* Note: Assume A_i[i] points to the ith diagonal entry of A */
HYPRE_Int
hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z)
{
/* trivial case */
if (n <= 0)
{
return hypre_error_flag;
}
dim3 bDim = hypre_GetDefaultCUDABlockDimension();
dim3 gDim = hypre_GetDefaultCUDAGridDimension(n, "thread", bDim);
HYPRE_CUDA_LAUNCH( hypreCUDAKernel_DiagScaleVector2, gDim, bDim, n, A_i, A_data, x, beta, y, z );
return hypre_error_flag;
}
__global__ void
hypreCUDAKernel_BigToSmallCopy( HYPRE_Int* __restrict__ tgt,
const HYPRE_BigInt* __restrict__ src,

View File

@ -249,6 +249,7 @@ void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPR
HYPRE_Int hypre_SyncCudaComputeStream(hypre_Handle *hypre_handle);
HYPRE_Int hypre_SyncCudaDevice(hypre_Handle *hypre_handle);
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y);
HYPRE_Int hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z);
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
HYPRE_Int hypreDevice_MaskedIVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y, HYPRE_Int *mask);
HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v);