2 stage gs (#314)
This PR (by @PaulMullowney #282) includes updates/changes to the 2 stage GS preconditioner. Co-authored-by: Paul Mullowney <pmullown@nrel.gov>
This commit is contained in:
parent
8002200aa0
commit
366b80f89b
@ -1963,6 +1963,8 @@ HYPRE_Int hypre_BoomerAMGRelaxKaczmarz( hypre_ParCSRMatrix *A, hypre_ParVector *
|
||||
|
||||
HYPRE_Int hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *r, hypre_ParVector *z, HYPRE_Int choice);
|
||||
|
||||
HYPRE_Int hypre_BoomerAMGRelaxTwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp, HYPRE_Int num_inner_iters );
|
||||
|
||||
HYPRE_Int hypre_BoomerAMGRelax11TwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );
|
||||
|
||||
HYPRE_Int hypre_BoomerAMGRelax12TwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );
|
||||
|
||||
@ -326,7 +326,7 @@ hypre_BoomerAMGCycle( void *amg_vdata,
|
||||
/* If no coarsening occurred, apply a simple smoother once */
|
||||
Aux_U = U_array[level];
|
||||
Aux_F = F_array[level];
|
||||
num_sweep = 1;
|
||||
num_sweep = num_grid_sweeps[0];
|
||||
/* TK: Use the user relax type (instead of 0) to allow for setting a
|
||||
convergent smoother (e.g. in the solution of singular problems). */
|
||||
relax_type = hypre_ParAMGDataUserRelaxType(amg_data);
|
||||
|
||||
@ -55,8 +55,9 @@ hypre_BoomerAMGRelax( hypre_ParCSRMatrix *A,
|
||||
* triangular)
|
||||
* relax_type = 11 -> Two Stage approximation to GS. Uses the strict lower
|
||||
* part of the diagonal matrix
|
||||
* relax_type = 12 -> Two Stage approximation to GS. Uses the full diagonal
|
||||
* matrix
|
||||
* relax_type = 12 -> Two Stage approximation to GS. Uses the strict lower
|
||||
* part of the diagonal matrix and a second iteration
|
||||
* for additional error approximation
|
||||
* relax_type = 13 -> hybrid L1 Gauss-Seidel forward solve
|
||||
* relax_type = 14 -> hybrid L1 Gauss-Seidel backward solve
|
||||
* relax_type = 15 -> CG
|
||||
@ -1450,13 +1451,16 @@ hypre_BoomerAMGRelaxKaczmarz( hypre_ParCSRMatrix *A,
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
|
||||
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGRelax11TwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
|
||||
hypre_ParVector *f,
|
||||
HYPRE_Real relax_weight,
|
||||
HYPRE_Real omega,
|
||||
hypre_ParVector *u,
|
||||
hypre_ParVector *Vtemp )
|
||||
hypre_BoomerAMGRelaxTwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
|
||||
hypre_ParVector *f,
|
||||
HYPRE_Real relax_weight,
|
||||
HYPRE_Real omega,
|
||||
hypre_ParVector *u,
|
||||
hypre_ParVector *Vtemp,
|
||||
HYPRE_Int num_inner_iters)
|
||||
{
|
||||
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
|
||||
HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
|
||||
@ -1467,9 +1471,8 @@ hypre_BoomerAMGRelax11TwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
|
||||
HYPRE_Complex *Vtemp_data = hypre_VectorData(Vtemp_local);
|
||||
hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
|
||||
HYPRE_Complex *u_data = hypre_VectorData(u_local);
|
||||
HYPRE_Int i, jj, ii;
|
||||
|
||||
hypre_ParCSRMatrixMatvecOutOfPlace(-relax_weight, A, u, relax_weight, f, Vtemp);
|
||||
HYPRE_Int i, k, jj, ii;
|
||||
HYPRE_Complex multiplier = 1.0;
|
||||
|
||||
/* Need to check that EVERY diagonal is nonzero first. If any are, throw exception */
|
||||
for (i = 0; i < num_rows; i++)
|
||||
@ -1480,18 +1483,45 @@ hypre_BoomerAMGRelax11TwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
|
||||
}
|
||||
}
|
||||
|
||||
hypre_ParCSRMatrixMatvecOutOfPlace(-relax_weight, A, u, relax_weight, f, Vtemp);
|
||||
|
||||
|
||||
for (i = 0; i < num_rows; i++) /* Run the smoother */
|
||||
{
|
||||
HYPRE_Complex res = 0.0;
|
||||
for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
|
||||
// V = V/D
|
||||
Vtemp_data[i] /= A_diag_data[A_diag_i[i]];
|
||||
|
||||
// u = u + m*v
|
||||
u_data[i] += multiplier * Vtemp_data[i];
|
||||
}
|
||||
|
||||
// adjust for the alternating series
|
||||
multiplier *= -1.0;
|
||||
|
||||
for (k = 0; k < num_inner_iters; ++k)
|
||||
{
|
||||
// By going from bottom to top, we can update Vtemp in place because
|
||||
// we're operating with the strict, lower triangular matrix
|
||||
for (i = num_rows-1; i >=0; i--) /* Run the smoother */
|
||||
{
|
||||
ii = A_diag_j[jj];
|
||||
if (ii < i)
|
||||
// spmv for the row first
|
||||
HYPRE_Complex res = 0.0;
|
||||
for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
|
||||
{
|
||||
res -= (A_diag_data[jj] / A_diag_data[A_diag_i[ii]]) * Vtemp_data[ii];
|
||||
ii = A_diag_j[jj];
|
||||
if (ii < i)
|
||||
{
|
||||
res += A_diag_data[jj] * Vtemp_data[ii];
|
||||
}
|
||||
}
|
||||
// diagonal scaling has to come after the spmv accumulation. It's a row scaling
|
||||
// not column
|
||||
Vtemp_data[i] = res / A_diag_data[A_diag_i[i]];
|
||||
u_data[i] += multiplier * Vtemp_data[i];
|
||||
}
|
||||
u_data[i] += (Vtemp_data[i] + omega*res) / A_diag_data[A_diag_i[i]];
|
||||
|
||||
// adjust for the alternating series
|
||||
multiplier *= -1.0;
|
||||
}
|
||||
|
||||
return hypre_error_flag;
|
||||
@ -1520,51 +1550,7 @@ hypre_BoomerAMGRelax11TwoStageGaussSeidel( hypre_ParCSRMatrix *A,
|
||||
else
|
||||
#endif
|
||||
{
|
||||
hypre_BoomerAMGRelax11TwoStageGaussSeidelHost(A, f, relax_weight, omega, u, Vtemp);
|
||||
}
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGRelax12TwoStageGaussSeidelHost( hypre_ParCSRMatrix *A,
|
||||
hypre_ParVector *f,
|
||||
HYPRE_Real relax_weight,
|
||||
HYPRE_Real omega,
|
||||
hypre_ParVector *u,
|
||||
hypre_ParVector *Vtemp )
|
||||
{
|
||||
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
|
||||
HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
|
||||
HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag);
|
||||
HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
|
||||
HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
|
||||
hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp);
|
||||
HYPRE_Complex *Vtemp_data = hypre_VectorData(Vtemp_local);
|
||||
hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
|
||||
HYPRE_Complex *u_data = hypre_VectorData(u_local);
|
||||
HYPRE_Int i, jj, ii;
|
||||
|
||||
hypre_ParCSRMatrixMatvecOutOfPlace(-relax_weight, A, u, relax_weight, f, Vtemp);
|
||||
|
||||
/* Need to check that EVERY diagonal is nonzero first. If any are, throw exception */
|
||||
for (i = 0; i < num_rows; i++)
|
||||
{
|
||||
if (A_diag_data[A_diag_i[i]] == 0.0)
|
||||
{
|
||||
hypre_error_in_arg(1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < num_rows; i++) /* Run the smoother */
|
||||
{
|
||||
HYPRE_Complex res = Vtemp_data[i];
|
||||
for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++)
|
||||
{
|
||||
ii = A_diag_j[jj];
|
||||
res -= (A_diag_data[jj] / A_diag_data[A_diag_i[ii]]) * Vtemp_data[ii];
|
||||
}
|
||||
u_data[i] += (Vtemp_data[i] + omega*res) / A_diag_data[A_diag_i[i]];
|
||||
hypre_BoomerAMGRelaxTwoStageGaussSeidelHost(A, f, relax_weight, omega, u, Vtemp, 1);
|
||||
}
|
||||
|
||||
return hypre_error_flag;
|
||||
@ -1589,12 +1575,12 @@ hypre_BoomerAMGRelax12TwoStageGaussSeidel( hypre_ParCSRMatrix *A,
|
||||
|
||||
if (exec == HYPRE_EXEC_DEVICE)
|
||||
{
|
||||
hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice(A, f, relax_weight, omega, u, Vtemp, Ztemp, 0);
|
||||
hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice(A, f, relax_weight, omega, u, Vtemp, Ztemp, 2);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
hypre_BoomerAMGRelax12TwoStageGaussSeidelHost(A, f, relax_weight, omega, u, Vtemp);
|
||||
hypre_BoomerAMGRelaxTwoStageGaussSeidelHost(A, f, relax_weight, omega, u, Vtemp, 2);
|
||||
}
|
||||
|
||||
return hypre_error_flag;
|
||||
|
||||
@ -74,32 +74,6 @@ hypre_BoomerAMGRelaxHybridGaussSeidelDevice( hypre_ParCSRMatrix *A,
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
/* option 0: inout = inout + D^{-1}*[(1+w)*r - w*A*x]
|
||||
* option 1: inout = inout + D^{-1}*[r - tril(A,-1)*x]
|
||||
* Note: r is modified */
|
||||
void
|
||||
hypre_TwoStageGaussSeidelMatvec(hypre_CSRMatrix *A,
|
||||
hypre_Vector *x,
|
||||
hypre_Vector *r,
|
||||
HYPRE_Complex omega,
|
||||
hypre_Vector *inout,
|
||||
HYPRE_Int option)
|
||||
{
|
||||
if (option == 0)
|
||||
{
|
||||
/* spmv with the full matrix */
|
||||
hypre_CSRMatrixMatvecDevice(0.0, -omega, A, x, 1.0 + omega, r, r, 0.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* spmv with L */
|
||||
hypre_CSRMatrixSpMVDevice(-1.0, A, x, 1.0, r, -2);
|
||||
}
|
||||
|
||||
hypreDevice_DiagScaleVector(hypre_CSRMatrixNumRows(A), hypre_CSRMatrixI(A), hypre_CSRMatrixData(A),
|
||||
hypre_VectorData(r), 1.0, hypre_VectorData(inout));
|
||||
}
|
||||
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A,
|
||||
hypre_ParVector *f,
|
||||
@ -108,7 +82,7 @@ hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A,
|
||||
hypre_ParVector *u,
|
||||
hypre_ParVector *r,
|
||||
hypre_ParVector *z,
|
||||
HYPRE_Int choice)
|
||||
HYPRE_Int num_inner_iters)
|
||||
{
|
||||
hypre_NvtxPushRange("BoomerAMGRelaxTwoStageGaussSeidelDevice");
|
||||
|
||||
@ -119,14 +93,36 @@ hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A,
|
||||
hypre_Vector *u_local = hypre_ParVectorLocalVector(u);
|
||||
hypre_Vector *r_local = hypre_ParVectorLocalVector(r);
|
||||
hypre_Vector *z_local = hypre_ParVectorLocalVector(z);
|
||||
HYPRE_Complex *u_data = hypre_VectorData(u_local);
|
||||
HYPRE_Complex *r_data = hypre_VectorData(r_local);
|
||||
HYPRE_Complex *z_data = hypre_VectorData(z_local);
|
||||
HYPRE_Int zsize = hypre_VectorSize(z_local);
|
||||
HYPRE_Int rsize = hypre_VectorSize(r_local);
|
||||
HYPRE_Complex multiplier = 1.0;
|
||||
HYPRE_Int i;
|
||||
|
||||
hypre_ParCSRMatrixMatvecOutOfPlace(-relax_weight, A, u, relax_weight, f, r);
|
||||
|
||||
hypreDevice_DiagScaleVector(num_rows, A_diag_i, A_diag_data, r_data, 0.0, z_data);
|
||||
|
||||
hypre_TwoStageGaussSeidelMatvec(A_diag, z_local, r_local, omega, u_local, choice);
|
||||
// set this so that axpy works out properly. Reset later.
|
||||
hypre_VectorSize(z_local) = rsize;
|
||||
|
||||
// 1) u = u + z
|
||||
hypre_SeqVectorAxpy(multiplier, z_local, u_local);
|
||||
multiplier *= -1.0;
|
||||
|
||||
for (i = 0; i < num_inner_iters; ++i)
|
||||
{
|
||||
// 2) r = Lz
|
||||
hypre_CSRMatrixSpMVDevice(1.0, A_diag, z_local, 0.0, r_local, -2);
|
||||
// 3) z = r/D, u = u + m*z
|
||||
hypreDevice_DiagScaleVector2(num_rows, A_diag_i, A_diag_data, r_data, multiplier, z_data, u_data);
|
||||
multiplier *= -1.0;
|
||||
}
|
||||
|
||||
// reset this
|
||||
hypre_VectorSize(z_local) = zsize;
|
||||
|
||||
hypre_NvtxPopRange();
|
||||
|
||||
@ -134,3 +130,4 @@ hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A,
|
||||
}
|
||||
|
||||
#endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
|
||||
|
||||
|
||||
@ -1176,6 +1176,8 @@ HYPRE_Int hypre_BoomerAMGRelaxKaczmarz( hypre_ParCSRMatrix *A, hypre_ParVector *
|
||||
|
||||
HYPRE_Int hypre_BoomerAMGRelaxTwoStageGaussSeidelDevice ( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *r, hypre_ParVector *z, HYPRE_Int choice);
|
||||
|
||||
HYPRE_Int hypre_BoomerAMGRelaxTwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp, HYPRE_Int num_inner_iters );
|
||||
|
||||
HYPRE_Int hypre_BoomerAMGRelax11TwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );
|
||||
|
||||
HYPRE_Int hypre_BoomerAMGRelax12TwoStageGaussSeidel( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_points, HYPRE_Real relax_weight, HYPRE_Real omega, hypre_ParVector *u, hypre_ParVector *Vtemp, hypre_ParVector *Ztemp );
|
||||
|
||||
@ -1605,6 +1605,7 @@ void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPR
|
||||
HYPRE_Int hypre_SyncCudaComputeStream(hypre_Handle *hypre_handle);
|
||||
HYPRE_Int hypre_SyncCudaDevice(hypre_Handle *hypre_handle);
|
||||
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y);
|
||||
HYPRE_Int hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z);
|
||||
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
|
||||
HYPRE_Int hypreDevice_MaskedIVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y, HYPRE_Int *mask);
|
||||
HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v);
|
||||
|
||||
@ -614,6 +614,39 @@ hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
hypreCUDAKernel_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z)
|
||||
{
|
||||
HYPRE_Int i = hypre_cuda_get_grid_thread_id<1,1>();
|
||||
|
||||
if (i < n)
|
||||
{
|
||||
HYPRE_Complex t = x[i] / A_data[A_i[i]];
|
||||
y[i] = t;
|
||||
z[i] += beta*t;
|
||||
}
|
||||
}
|
||||
|
||||
/* y = diag(A) \ x
|
||||
* z = beta * (diag(A) \ x) + z
|
||||
* Note: Assume A_i[i] points to the ith diagonal entry of A */
|
||||
HYPRE_Int
|
||||
hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z)
|
||||
{
|
||||
/* trivial case */
|
||||
if (n <= 0)
|
||||
{
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
dim3 bDim = hypre_GetDefaultCUDABlockDimension();
|
||||
dim3 gDim = hypre_GetDefaultCUDAGridDimension(n, "thread", bDim);
|
||||
|
||||
HYPRE_CUDA_LAUNCH( hypreCUDAKernel_DiagScaleVector2, gDim, bDim, n, A_i, A_data, x, beta, y, z );
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
hypreCUDAKernel_BigToSmallCopy( HYPRE_Int* __restrict__ tgt,
|
||||
const HYPRE_BigInt* __restrict__ src,
|
||||
|
||||
@ -249,6 +249,7 @@ void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPR
|
||||
HYPRE_Int hypre_SyncCudaComputeStream(hypre_Handle *hypre_handle);
|
||||
HYPRE_Int hypre_SyncCudaDevice(hypre_Handle *hypre_handle);
|
||||
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y);
|
||||
HYPRE_Int hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z);
|
||||
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
|
||||
HYPRE_Int hypreDevice_MaskedIVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y, HYPRE_Int *mask);
|
||||
HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user