Fix ILU solve (#970)

* Fix segfault in ILU solve when not using reordering and running on host (only affects block Jacobi case)

* Fix segfault in ILU solve when using iterative triangular solves with block Jacobi

---------

Co-authored-by: Daniel Osei-Kuffuor <oseikuffuor1@llnl.gov>
This commit is contained in:
Victor A. P. Magri 2023-11-02 21:21:00 +11:00 committed by GitHub
parent 93bc0a63f8
commit af8fba491b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -815,6 +815,12 @@ hypre_ILUSolveSchurNSH(hypre_ParCSRMatrix *A,
* L, D and U factors only have local scope (no off-diagterms) * L, D and U factors only have local scope (no off-diagterms)
* so apart from the residual calculation (which uses A), * so apart from the residual calculation (which uses A),
* the solves with the L and U factors are local. * the solves with the L and U factors are local.
*
* Note: perm contains the permutation of indexes corresponding to
* user-prescribed reordering strategy. In the block Jacobi case, perm
* may be NULL if no reordering is done (for performance, (perm == NULL)
* assumes identity mapping of indexes). Hence we need to check the local
* solves for this case and avoid segfaults. - DOK
*--------------------------------------------------------------------*/ *--------------------------------------------------------------------*/
HYPRE_Int HYPRE_Int
@ -858,35 +864,75 @@ hypre_ILUSolveLU(hypre_ParCSRMatrix *A,
/* L solve - Forward solve */ /* L solve - Forward solve */
/* copy rhs to account for diagonal of L (which is identity) */ /* copy rhs to account for diagonal of L (which is identity) */
for (i = 0; i < nLU; i++) if(perm)
{ {
utemp_data[perm[i]] = ftemp_data[perm[i]]; for (i = 0; i < nLU; i++)
{
utemp_data[perm[i]] = ftemp_data[perm[i]];
}
}
else
{
for (i = 0; i < nLU; i++)
{
utemp_data[i] = ftemp_data[i];
}
} }
/* Update with remaining (off-diagonal) entries of L */ /* Update with remaining (off-diagonal) entries of L */
for ( i = 0; i < nLU; i++ ) if(perm)
{ {
k1 = L_diag_i[i] ; k2 = L_diag_i[i + 1]; for ( i = 0; i < nLU; i++ )
for (j = k1; j < k2; j++)
{ {
utemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[L_diag_j[j]]]; k1 = L_diag_i[i] ; k2 = L_diag_i[i + 1];
for (j = k1; j < k2; j++)
{
utemp_data[perm[i]] -= L_diag_data[j] * utemp_data[perm[L_diag_j[j]]];
}
} }
} }
else
{
for ( i = 0; i < nLU; i++ )
{
k1 = L_diag_i[i] ; k2 = L_diag_i[i + 1];
for (j = k1; j < k2; j++)
{
utemp_data[i] -= L_diag_data[j] * utemp_data[L_diag_j[j]];
}
}
}
/*-------------------- U solve - Backward substitution */ /*-------------------- U solve - Backward substitution */
for ( i = nLU - 1; i >= 0; i-- ) if(perm)
{ {
/* first update with the remaining (off-diagonal) entries of U */ for ( i = nLU - 1; i >= 0; i-- )
k1 = U_diag_i[i] ; k2 = U_diag_i[i + 1];
for (j = k1; j < k2; j++)
{ {
utemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[U_diag_j[j]]]; /* first update with the remaining (off-diagonal) entries of U */
k1 = U_diag_i[i] ; k2 = U_diag_i[i + 1];
for (j = k1; j < k2; j++)
{
utemp_data[perm[i]] -= U_diag_data[j] * utemp_data[perm[U_diag_j[j]]];
}
/* diagonal scaling (contribution from D. Note: D is stored as its inverse) */
utemp_data[perm[i]] *= D[i];
} }
/* diagonal scaling (contribution from D. Note: D is stored as its inverse) */
utemp_data[perm[i]] *= D[i];
} }
else
{
for ( i = nLU - 1; i >= 0; i-- )
{
/* first update with the remaining (off-diagonal) entries of U */
k1 = U_diag_i[i] ; k2 = U_diag_i[i + 1];
for (j = k1; j < k2; j++)
{
utemp_data[i] -= U_diag_data[j] * utemp_data[U_diag_j[j]];
}
/* diagonal scaling (contribution from D. Note: D is stored as its inverse) */
utemp_data[i] *= D[i];
}
}
/* Update solution */ /* Update solution */
hypre_ParVectorAxpy(beta, utemp, u); hypre_ParVectorAxpy(beta, utemp, u);
@ -901,6 +947,12 @@ hypre_ILUSolveLU(hypre_ParCSRMatrix *A,
* L, D and U factors only have local scope (no off-diag terms) * L, D and U factors only have local scope (no off-diag terms)
* so apart from the residual calculation (which uses A), the solves * so apart from the residual calculation (which uses A), the solves
* with the L and U factors are local. * with the L and U factors are local.
*
* Note: perm contains the permutation of indexes corresponding to
* user-prescribed reordering strategy. In the block Jacobi case, perm
* may be NULL if no reordering is done (for performance, (perm == NULL)
* assumes identity mapping of indexes). Hence we need to check the local
* solves for this case and avoid segfaults. - DOK
*--------------------------------------------------------------------*/ *--------------------------------------------------------------------*/
HYPRE_Int HYPRE_Int
@ -933,8 +985,6 @@ hypre_ILUSolveLUIter(hypre_ParCSRMatrix *A,
HYPRE_Real *utemp_data = hypre_VectorData(utemp_local); HYPRE_Real *utemp_data = hypre_VectorData(utemp_local);
hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp); hypre_Vector *ftemp_local = hypre_ParVectorLocalVector(ftemp);
HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local); HYPRE_Real *ftemp_data = hypre_VectorData(ftemp_local);
hypre_Vector *xtemp_local = hypre_ParVectorLocalVector(xtemp);
HYPRE_Real *xtemp_data = hypre_VectorData(xtemp_local);
/* Local variables */ /* Local variables */
HYPRE_Real alpha = -1.0; HYPRE_Real alpha = -1.0;
@ -954,40 +1004,68 @@ hypre_ILUSolveLUIter(hypre_ParCSRMatrix *A,
/* copy rhs to account for diagonal of L (which is identity) */ /* copy rhs to account for diagonal of L (which is identity) */
/* Initialize iteration to 0 */ /* Initialize iteration to 0 */
for ( i = 0; i < nLU; i++ ) if(perm)
{ {
utemp_data[perm[i]] = 0.0; for ( i = 0; i < nLU; i++ )
{
utemp_data[perm[i]] = 0.0;
}
}
else
{
for ( i = 0; i < nLU; i++ )
{
utemp_data[i] = 0.0;
}
} }
/* Jacobi iteration loop */ /* Jacobi iteration loop */
for ( kk = 0; kk < lower_jacobi_iters; kk++ ) for ( kk = 0; kk < lower_jacobi_iters; kk++ )
{ {
/* u^{k+1} = f - Lu^k */ /* u^{k+1} = f - Lu^k */
/* Do a SpMV with L and save the results in xtemp */ /* Do a SpMV with L and save the results in xtemp */
for ( i = 0; i < nLU; i++ ) if(perm)
{ {
sum = 0.0; for ( i = nLU-1; i >= 0; i-- )
k1 = L_diag_i[i] ; k2 = L_diag_i[i + 1];
for (j = k1; j < k2; j++)
{ {
sum += L_diag_data[j] * utemp_data[perm[L_diag_j[j]]]; sum = 0.0;
k1 = L_diag_i[i] ; k2 = L_diag_i[i + 1];
for (j = k1; j < k2; j++)
{
sum += L_diag_data[j] * utemp_data[perm[L_diag_j[j]]];
}
utemp_data[perm[i]] = ftemp_data[perm[i]] - sum;
} }
xtemp_data[i] = sum;
} }
else
for ( i = 0; i < nLU; i++ )
{ {
utemp_data[perm[i]] = ftemp_data[perm[i]] - xtemp_data[i]; for ( i = nLU-1; i >= 0; i-- )
{
sum = 0.0;
k1 = L_diag_i[i] ; k2 = L_diag_i[i + 1];
for (j = k1; j < k2; j++)
{
sum += L_diag_data[j] * utemp_data[L_diag_j[j]];
}
utemp_data[i] = ftemp_data[i] - sum;
}
} }
} /* end jacobi loop */ } /* end jacobi loop */
/* Initialize iteration to 0 */ /* Initialize iteration to 0 */
for ( i = 0; i < nLU; i++ ) if(perm)
{ {
/* this should is doable without the permutation */ for ( i = 0; i < nLU; i++ )
//ftemp_data[perm[i]] = utemp_data[perm[i]]; {
ftemp_data[perm[i]] = 0.0; ftemp_data[perm[i]] = 0.0;
}
}
else
{
for ( i = 0; i < nLU; i++ )
{
ftemp_data[i] = 0.0;
}
} }
/* Jacobi iteration loop */ /* Jacobi iteration loop */
@ -996,20 +1074,31 @@ hypre_ILUSolveLUIter(hypre_ParCSRMatrix *A,
/* u^{k+1} = f - Uu^k */ /* u^{k+1} = f - Uu^k */
/* Do a SpMV with U and save the results in xtemp */ /* Do a SpMV with U and save the results in xtemp */
for ( i = 0; i < nLU; ++i ) if(perm)
{ {
sum = 0.0; for ( i = 0; i < nLU; ++i )
k1 = U_diag_i[i] ; k2 = U_diag_i[i + 1];
for (j = k1; j < k2; j++)
{ {
sum += U_diag_data[j] * ftemp_data[perm[U_diag_j[j]]]; sum = 0.0;
k1 = U_diag_i[i] ; k2 = U_diag_i[i + 1];
for (j = k1; j < k2; j++)
{
sum += U_diag_data[j] * ftemp_data[perm[U_diag_j[j]]];
}
ftemp_data[perm[i]] = D[i] * (utemp_data[perm[i]] - sum);
} }
xtemp_data[i] = sum;
} }
else
for ( i = 0; i < nLU; ++i )
{ {
ftemp_data[perm[i]] = D[i] * (utemp_data[perm[i]] - xtemp_data[i]); for ( i = 0; i < nLU; ++i )
{
sum = 0.0;
k1 = U_diag_i[i] ; k2 = U_diag_i[i + 1];
for (j = k1; j < k2; j++)
{
sum += U_diag_data[j] * ftemp_data[U_diag_j[j]];
}
ftemp_data[i] = D[i] * (utemp_data[i] - sum);
}
} }
} /* end jacobi loop */ } /* end jacobi loop */