hypre/distributed_linear_solvers/pilut/serilut.c

641 lines
19 KiB
C

/*
* serilut.c
*
* This file implements ILUT in the local part of the matrix
*
* Started 10/18/95
* George
*
* 7/8 MRG
* - added rrowlen and verified
* 7/22 MRG
* - removed SelectInterior function form SerILUT code
* - changed FindMinGreater to ExtractMinLR
* - changed lr to using permutation; this allows reorderings like RCM.
*
* 12/4 AJC
* - Changed code to handle modified matrix storage format with multiple blocks
*
* 1/13 AJC
* - Modified code with macros to allow both 0 and 1-based indexing
*
* $Id$
*
*/
#include "./DistributedMatrixPilutSolver.h"
#include "ilu.h"
/*************************************************************************
* This function takes a matrix and performs an ILUT of the internal nodes
**************************************************************************/
int SerILUT(DataDistType *ddist, HYPRE_DistributedMatrix matrix,
FactorMatType *ldu,
ReduceMatType *rmat, int maxnz, double tol,
hypre_PilutSolverGlobals *globals)
{
int i, ii, j, k, kk, l, m, ierr, diag_present;
int *perm, *iperm,
*usrowptr, *uerowptr, *ucolind,
*rnz, **rcolind;
int row_size, *col_ind;
double *values, *uvalues, *dvalues, *nrm2s, **rvalues;
int nlocal, nbnd;
double mult, rtol;
int *structural_union;
nrows = ddist->ddist_nrows;
lnrows = ddist->ddist_lnrows;
firstrow = ddist->ddist_rowdist[mype];
lastrow = ddist->ddist_rowdist[mype+1];
usrowptr = ldu->usrowptr;
uerowptr = ldu->uerowptr;
ucolind = ldu->ucolind;
uvalues = ldu->uvalues;
dvalues = ldu->dvalues;
nrm2s = ldu->nrm2s;
perm = ldu->perm;
iperm = ldu->iperm;
/* Allocate work space */
jr = idx_malloc_init(nrows, -1, "SerILUT: jr");
lr = idx_malloc_init(nrows, -1, "SerILUT: lr");
jw = idx_malloc(nrows, "SerILUT: jw");
w = fp_malloc(nrows, "SerILUT: w" );
/* Find structural union of local rows */
ierr = FindStructuralUnion( matrix, &structural_union, globals );
if(ierr) return(ierr);
/* Exchange structural unions with other processors */
ierr = ExchangeStructuralUnions( ddist, &structural_union, globals );
if(ierr) return(ierr);
/* Select the rows to be factored */
nlocal = SelectInterior( lnrows, matrix, structural_union,
perm, iperm, globals );
/* Structural Union no longer required */
hypre_TFree( structural_union );
nbnd = lnrows - nlocal ;
#ifdef HYPRE_DEBUG
printf("nbnd = %d, lnrows=%d, nlocal=%d\n", nbnd, lnrows, nlocal );
#endif
ldu->nnodes[0] = nlocal;
/* myprintf("Nlocal: %d, Nbnd: %d\n", nlocal, nbnd); */
/*******************************************************************/
/* Go and factor the nlocal rows */
/*******************************************************************/
for (ii=0; ii<nlocal; ii++) {
i = perm[ii];
rtol = nrm2s[i]*tol; /* Compute relative tolerance */
/* Initialize work space */
ierr = HYPRE_GetDistributedMatrixRow( matrix, firstrow+i, &row_size,
&col_ind, &values);
if (ierr) return(ierr);
for (lastjr=1, lastlr=0, j=0, diag_present=0; j<row_size; j++) {
if (iperm[ col_ind[j] - firstrow ] < iperm[i])
lr[lastlr++] = iperm[ col_ind[j]-firstrow]; /* Copy the L elements separately */
if (col_ind[j] != i+firstrow) { /* Off-diagonal element */
jr[col_ind[j]] = lastjr;
jw[lastjr] = col_ind[j];
w[lastjr] = values[j];
lastjr++;
}
else { /* Put the diagonal element at the beginning */
diag_present = 1;
jr[i+firstrow] = 0;
jw[0] = i+firstrow;
w[0] = values[j];
}
}
if( !diag_present ) /* No diagonal element was found; insert a zero */
{
jr[i+firstrow] = 0;
jw[0] = i+firstrow;
w[0] = 0.0;
}
ierr = HYPRE_RestoreDistributedMatrixRow( matrix, firstrow+i, &row_size,
&col_ind, &values);
k = -1;
while (lastlr != 0) {
/* since fill may create new L elements, and they must by done in order
* of the permutation, search for the min each time.
* Note that we depend on the permutation order following natural index
* order for the interior rows. */
kk = perm[ExtractMinLR( globals )];
k = kk+firstrow;
mult = w[jr[k]]*dvalues[kk];
w[jr[k]] = mult;
if (fabs(mult) < rtol)
continue; /* First drop test */
for (l=usrowptr[kk]; l<uerowptr[kk]; l++) {
m = jr[ucolind[l]];
if (m == -1 && fabs(mult*uvalues[l]) < rtol*0.5)
continue; /* Don't add fill if the element is too small */
if (m == -1) { /* Create fill */
if (iperm[ucolind[l]-firstrow] < iperm[i])
lr[lastlr++] = iperm[ucolind[l]-firstrow]; /* Copy the L elements separately */
jr[ucolind[l]] = lastjr;
jw[lastjr] = ucolind[l];
w[lastjr] = 0.0;
m = lastjr++;
}
w[m] -= mult*uvalues[l];
}
}
/* Apply 2nd dropping rule -- forms L and U */
SecondDrop(maxnz, rtol, i+firstrow, perm, iperm, ldu, globals );
}
/******************************************************************/
/* Form the reduced matrix */
/******************************************************************/
/* Allocate memory for the reduced matrix */
rnz =
rmat->rmat_rnz = idx_malloc(nbnd, "SerILUT: rmat->rmat_rnz" );
rmat->rmat_rrowlen = idx_malloc(nbnd, "SerILUT: rmat->rmat_rrowlen");
rcolind =
rmat->rmat_rcolind = (int **)mymalloc(sizeof(int *)*nbnd, "SerILUT: rmat->rmat_rcolind");
rvalues =
rmat->rmat_rvalues = (double **)mymalloc(sizeof(double *)*nbnd, "SerILUT: rmat->rmat_rvalues");
rmat->rmat_ndone = nlocal;
rmat->rmat_ntogo = nbnd;
for (ii=nlocal; ii<lnrows; ii++) {
i = perm[ii];
rtol = nrm2s[i]*tol; /* Compute relative tolerance */
/* Initialize work space */
ierr = HYPRE_GetDistributedMatrixRow( matrix, firstrow+i, &row_size,
&col_ind, &values);
if (ierr) return(ierr);
for (lastjr=1, lastlr=0, j=0, diag_present=0; j<row_size; j++) {
if (col_ind[j] >= firstrow &&
col_ind[j] < lastrow &&
iperm[col_ind[j]-firstrow] < nlocal)
lr[lastlr++] = iperm[col_ind[j]-firstrow]; /* Copy the L elements separately */
if (col_ind[j] != i+firstrow) { /* Off-diagonal element */
jr[col_ind[j]] = lastjr;
jw[lastjr] = col_ind[j];
w[lastjr] = values[j];
lastjr++;
}
else { /* Put the diagonal element at the begining */
diag_present = 1;
jr[i+firstrow] = 0;
jw[0] = i+firstrow;
w[0] = values[j];
}
}
if( !diag_present ) /* No diagonal element was found; insert a zero */
{
jr[i+firstrow] = 0;
jw[0] = i+firstrow;
w[0] = 0.0;
}
ierr = HYPRE_RestoreDistributedMatrixRow( matrix, firstrow+i, &row_size,
&col_ind, &values);
k = -1;
while (lastlr != 0) {
kk = perm[ExtractMinLR(globals)];
k = kk+firstrow;
mult = w[jr[k]]*dvalues[kk];
w[jr[k]] = mult;
if (fabs(mult) < rtol)
continue; /* First drop test */
for (l=usrowptr[kk]; l<uerowptr[kk]; l++) {
m = jr[ucolind[l]];
if (m == -1 && fabs(mult*uvalues[l]) < rtol*0.5)
continue; /* Don't add fill if the element is too small */
if (m == -1) { /* Create fill */
CheckBounds(firstrow, ucolind[l], lastrow, globals);
if (iperm[ucolind[l]-firstrow] < nlocal)
lr[lastlr++] = iperm[ucolind[l]-firstrow]; /* Copy the L elements separately */
jr[ucolind[l]] = lastjr;
jw[lastjr] = ucolind[l];
w[lastjr] = 0.0;
m = lastjr++;
}
w[m] -= mult*uvalues[l];
}
}
/* Apply 2nd dropping rule -- forms partial L and rmat */
SecondDropUpdate(maxnz, MAX(3*maxnz, row_size),
rtol, i+firstrow,
nlocal, perm, iperm, ldu, rmat, globals);
}
free_multi(jr, jw, lr, w, -1);
return(ierr);
}
/*************************************************************************
* This function selects the interior nodes (ones w/o nonzeros corresponding
* to other PEs) and permutes them first, then boundary nodes last.
* It takes a vector that marks rows as being forced to not be in the interior.
* For full generality this would also mark them in the map, but it doesn't.
**************************************************************************/
int SelectInterior( int local_num_rows,
HYPRE_DistributedMatrix matrix,
int *external_rows,
int *newperm, int *newiperm,
hypre_PilutSolverGlobals *globals )
{
int nbnd, nlocal, i, j, ierr;
int break_loop; /* marks finding an element making this row exterior. -AC */
int row_size, *col_ind;
double *values;
/* Determine which vertices are in the boundary,
* permuting interior rows first then boundary nodes. */
nbnd = 0;
nlocal = 0;
for (i=0; i<local_num_rows; i++)
{
if (external_rows[i])
{
newperm[local_num_rows-nbnd-1] = i;
newiperm[i] = local_num_rows-nbnd-1;
nbnd++;
} else
{
ierr = HYPRE_GetDistributedMatrixRow( matrix, firstrow+i, &row_size,
&col_ind, &values);
if (ierr) return(ierr);
for (j=0, break_loop=0; ( j<row_size )&& (break_loop == 0); j++)
{
if (col_ind[j] < firstrow || col_ind[j] >= lastrow)
{
newperm[local_num_rows-nbnd-1] = i;
newiperm[i] = local_num_rows-nbnd-1;
nbnd++;
break_loop = 1;
}
}
ierr = HYPRE_RestoreDistributedMatrixRow( matrix, firstrow+i, &row_size,
&col_ind, &values);
if ( break_loop == 0 )
{
newperm[nlocal] = i;
newiperm[i] = nlocal;
nlocal++;
}
}
}
return nlocal;
}
/*************************************************************************
* FindStructuralUnion
* Produces a vector of length n that marks the union of the nonzero
* structure of all locally stored rows, not including locally stored columns.
**************************************************************************/
int FindStructuralUnion( HYPRE_DistributedMatrix matrix,
int **structural_union,
hypre_PilutSolverGlobals *globals )
{
int ierr=0, i, j, row_size, *col_ind;
/* Allocate and clear structural_union vector */
*structural_union = hypre_CTAlloc( int, nrows );
/* Loop through rows */
for ( i=0; i< lnrows; i++ )
{
/* Get row structure; no values needed */
ierr = HYPRE_GetDistributedMatrixRow( matrix, firstrow+i, &row_size,
&col_ind, NULL );
if (ierr) return(ierr);
/* Loop through nonzeros in this row */
for ( j=0; j<row_size; j++)
{
if (col_ind[j] < firstrow || col_ind[j] >= lastrow)
{
(*structural_union)[ col_ind[j] ] = 1;
}
}
/* Restore row structure */
ierr = HYPRE_RestoreDistributedMatrixRow( matrix, firstrow+i, &row_size,
&col_ind, NULL );
if (ierr) return(ierr);
}
return(ierr);
}
/*************************************************************************
* ExchangeStructuralUnions
* Exchanges structural union vectors with other processors and produces
* a vector the size of the number of locally stored rows that marks
* whether any exterior processor has a nonzero in the column corresponding
* to each row. This is used to determine if a local row might have to
* update an off-processor row.
**************************************************************************/
int ExchangeStructuralUnions( DataDistType *ddist,
int **structural_union,
hypre_PilutSolverGlobals *globals )
{
int ierr=0, i, j, *recv_unions;
/* allocate space for receiving unions */
recv_unions = hypre_CTAlloc( int, nrows );
/* combine my structural_union with all other processors */
MPI_Allreduce( *structural_union, recv_unions, nrows,
MPI_INT, MPI_LOR, pilut_comm );
/* free and reallocate structural union so that is of local size */
hypre_TFree( *structural_union );
*structural_union = hypre_TAlloc( int, lnrows );
memcpy_int( *structural_union, &recv_unions[firstrow], lnrows );
/* deallocate recv_unions */
hypre_TFree( recv_unions );
return(ierr);
}
/*************************************************************************
* This function applies the second droping rule where maxnz elements
* greater than tol are kept. The elements are stored into LDU.
**************************************************************************/
void SecondDrop(int maxnz, double tol, int row,
int *perm, int *iperm,
FactorMatType *ldu, hypre_PilutSolverGlobals *globals)
{
int i, j;
int max, nz, diag, lrow;
int first, last, itmp;
double dtmp;
/* Reset the jr array, it is not needed any more */
for (i=0; i<lastjr; i++)
jr[jw[i]] = -1;
lrow = row-firstrow;
diag = iperm[lrow];
/* Deal with the diagonal element first */
assert(jw[0] == row);
if (w[0] != 0.0)
ldu->dvalues[lrow] = 1.0/w[0];
else { /* zero pivot */
printf("Zero pivot in row %d, adding e to proceed!\n", row);
ldu->dvalues[lrow] = 1.0/tol;
}
jw[0] = jw[--lastjr];
w[0] = w[lastjr];
/* First go and remove any off diagonal elements bellow the tolerance */
for (i=0; i<lastjr;) {
if (fabs(w[i]) < tol) {
jw[i] = jw[--lastjr];
w[i] = w[lastjr];
}
else
i++;
}
if (lastjr == 0)
last = first = 0;
else { /* Perform a Qsort type pass to seperate L and U entries */
last = 0, first = lastjr-1;
while (1) {
while (last < first && iperm[jw[last]-firstrow] < diag)
last++;
while (last < first && iperm[jw[first]-firstrow] > diag)
first--;
if (last < first) {
SWAP(jw[first], jw[last], itmp);
SWAP(w[first], w[last], dtmp);
last++; first--;
}
if (last == first) {
if (iperm[jw[last]-firstrow] < diag) {
first++;
last++;
}
break;
}
else if (last > first) {
first++;
break;
}
}
}
/*****************************************************************
* The entries between [0, last) are part of L
* The entries [first, lastjr) are part of U
******************************************************************/
/* Now, I want to keep maxnz elements of L. Go and extract them */
for (nz=0; nz<maxnz && last>0; nz++) {
for (max=0, j=1; j<last; j++) {
if (fabs(w[j]) > fabs(w[max]))
max = j;
}
ldu->lcolind[ldu->lerowptr[lrow]] = jw[max];
ldu->lvalues[ldu->lerowptr[lrow]] = w[max];
ldu->lerowptr[lrow]++;
jw[max] = jw[--last];
w[max] = w[last];
}
/* Now, I want to keep maxnz elements of U. Go and extract them */
for (nz=0; nz<maxnz && lastjr>first; nz++) {
for (max=first, j=first+1; j<lastjr; j++) {
if (fabs(w[j]) > fabs(w[max]))
max = j;
}
ldu->ucolind[ldu->uerowptr[lrow]] = jw[max];
ldu->uvalues[ldu->uerowptr[lrow]] = w[max];
ldu->uerowptr[lrow]++;
jw[max] = jw[--lastjr];
w[max] = w[lastjr];
}
}
/*************************************************************************
* This function applyies the second droping rule whre maxnz elements
* greater than tol are kept. The elements are stored into L and the Rmat.
* This version keeps only maxnzkeep
**************************************************************************/
void SecondDropUpdate(int maxnz, int maxnzkeep, double tol, int row,
int nlocal, int *perm, int *iperm,
FactorMatType *ldu, ReduceMatType *rmat,
hypre_PilutSolverGlobals *globals )
{
int i, j, k, nl;
int max, nz, lrow, rrow;
int last, first, itmp;
double dtmp;
/* Reset the jr array, it is not needed any more */
for (i=0; i<lastjr; i++)
jr[jw[i]] = -1;
lrow = row-firstrow;
rrow = iperm[lrow] - nlocal;
/* First go and remove any elements of the row bellow the tolerance */
for (i=1; i<lastjr;) {
if (fabs(w[i]) < tol) {
jw[i] = jw[--lastjr];
w[i] = w[lastjr];
}
else
i++;
}
if (lastjr == 1)
last = first = 1;
else { /* Perform a Qsort type pass to seperate L and U entries */
last = 1, first = lastjr-1;
while (1) {
while (last < first && /* and [last] is L */
jw[last] >= firstrow &&
jw[last] < lastrow &&
iperm[jw[last]-firstrow] < nlocal)
last++;
while (last < first && /* and [first] is not L */
!(jw[first] >= firstrow &&
jw[first] < lastrow &&
iperm[jw[first]-firstrow] < nlocal))
first--;
if (last < first) {
SWAP(jw[first], jw[last], itmp);
SWAP( w[first], w[last], dtmp);
last++; first--;
}
if (last == first) {
if (jw[last] >= firstrow &&
jw[last] < lastrow &&
iperm[jw[last]-firstrow] < nlocal) {
first++;
last++;
}
break;
}
else if (last > first) {
first++;
break;
}
}
}
/*****************************************************************
* The entries between [1, last) are part of L
* The entries [first, lastjr) are part of U
******************************************************************/
/* Keep large maxnz elements of L */
for (nz=0; nz<maxnz && last>1; nz++) {
for (max=1, j=2; j<last; j++) {
if (fabs(w[j]) > fabs(w[max]))
max = j;
}
ldu->lcolind[ldu->lerowptr[lrow]] = jw[max];
ldu->lvalues[ldu->lerowptr[lrow]] = w[max];
ldu->lerowptr[lrow]++;
jw[max] = jw[--last];
w[max] = w[last];
}
/* Allocate appropriate amount of memory for the reduced row */
nl = MIN(lastjr-first+1, maxnzkeep);
rmat->rmat_rnz[rrow] = nl;
rmat->rmat_rcolind[rrow] = idx_malloc(nl, "SecondDropUpdate: rmat->rmat_rcolind[rrow]");
rmat->rmat_rvalues[rrow] = fp_malloc(nl, "SecondDropUpdate: rmat->rmat_rvalues[rrow]");
rmat->rmat_rrowlen[rrow] = nl;
rmat->rmat_rcolind[rrow][0] = row; /* Put the diagonal at the begining */
rmat->rmat_rvalues[rrow][0] = w[0];
if (nl == lastjr-first+1) { /* Simple copy */
for (i=1,j=first; j<lastjr; j++,i++) {
rmat->rmat_rcolind[rrow][i] = jw[j];
rmat->rmat_rvalues[rrow][i] = w[j];
}
}
else { /* Keep large nl elements in the reduced row */
for (nz=1; nz<nl; nz++) {
for (max=first, j=first+1; j<lastjr; j++) {
if (fabs(w[j]) > fabs(w[max]))
max = j;
}
rmat->rmat_rcolind[rrow][nz] = jw[max];
rmat->rmat_rvalues[rrow][nz] = w[max];
jw[max] = jw[--lastjr];
w[max] = w[lastjr];
}
}
}