2242 lines
76 KiB
C
2242 lines
76 KiB
C
/*BHEADER**********************************************************************
|
|
* (c) 1999 The Regents of the University of California
|
|
*
|
|
* See the file COPYRIGHT_and_DISCLAIMER for a complete copyright
|
|
* notice, contact person, and disclaimer.
|
|
*
|
|
* $Revision$
|
|
*********************************************************************EHEADER*/
|
|
/******************************************************************************
|
|
*
|
|
* HYPRE_DDILUT interface
|
|
*
|
|
*****************************************************************************/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
|
|
#include "utilities/utilities.h"
|
|
#include "HYPRE.h"
|
|
#include "IJ_matrix_vector/HYPRE_IJ_mv.h"
|
|
#include "parcsr_matrix_vector/HYPRE_parcsr_mv.h"
|
|
#include "parcsr_matrix_vector/parcsr_matrix_vector.h"
|
|
#include "parcsr_linear_solvers/HYPRE_parcsr_ls.h"
|
|
#include "HYPRE_MHMatrix.h"
|
|
|
|
#ifdef MLPACK
|
|
|
|
#include "ml_struct.h"
|
|
#include "ml_aggregate.h"
|
|
|
|
#endif
|
|
|
|
#include "HYPRE_MHMatrix.h"
|
|
|
|
typedef struct HYPRE_LSI_DDIlut_Struct
|
|
{
|
|
MPI_Comm comm;
|
|
MH_Matrix *mh_mat;
|
|
double thresh;
|
|
double fillin;
|
|
int Nrows;
|
|
int extNrows;
|
|
int *mat_ia;
|
|
int *mat_ja;
|
|
double *mat_aa;
|
|
int outputLevel;
|
|
}
|
|
HYPRE_LSI_DDIlut;
|
|
|
|
extern HYPRE_ParCSRMLConstructMHMatrix(HYPRE_ParCSRMatrix,MH_Matrix *,
|
|
MPI_Comm, int *, MH_Context *);
|
|
extern int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *, int *,
|
|
int **recv_lengths, int **int_buf, double **dble_buf,
|
|
int **sindex_array, int **sindex_array2, int *offset);
|
|
extern int HYPRE_LSI_DDIlutGetRowLengths(MH_Matrix *Amat, int *leng, int **);
|
|
extern int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *,
|
|
int Noffset, int *map, int *map2, int **int_buf,
|
|
double **dble_buf);
|
|
extern int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
|
|
int total_recv_leng, int *recv_lengths, int *ext_ja,
|
|
double *ext_aa, int *map, int *map2, int Noffset);
|
|
extern void HYPRE_LSI_Sort(int *, int, int *, double *);
|
|
extern int HYPRE_LSI_SplitDSort(double*,int,int*,int);
|
|
|
|
#define abs(x) ((x) > 0 ? (x) : -(x))
|
|
|
|
/*--------------------------------------------------------------------------
|
|
* HYPRE_LSI_DDIlutCreate - Return a DDIlut preconditioner object "solver".
|
|
*--------------------------------------------------------------------------*/
|
|
|
|
int HYPRE_LSI_DDIlutCreate( MPI_Comm comm, HYPRE_Solver *solver )
|
|
{
|
|
HYPRE_LSI_DDIlut *ilut_ptr;
|
|
|
|
ilut_ptr = (HYPRE_LSI_DDIlut *) malloc(sizeof(HYPRE_LSI_DDIlut));
|
|
|
|
if (ilut_ptr == NULL) return 1;
|
|
|
|
ilut_ptr->comm = comm;
|
|
ilut_ptr->mh_mat = NULL;
|
|
ilut_ptr->fillin = 0.0;
|
|
ilut_ptr->thresh = 0.0; /* defaults */
|
|
ilut_ptr->mat_ia = NULL;
|
|
ilut_ptr->mat_ja = NULL;
|
|
ilut_ptr->mat_aa = NULL;
|
|
ilut_ptr->outputLevel = 0;
|
|
|
|
*solver = (HYPRE_Solver) ilut_ptr;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------
|
|
* HYPRE_LSI_DDIlutDestroy - Destroy a DDIlut object.
|
|
*--------------------------------------------------------------------------*/
|
|
|
|
int HYPRE_LSI_DDIlutDestroy( HYPRE_Solver solver )
|
|
{
|
|
int i;
|
|
HYPRE_LSI_DDIlut *ilut_ptr;
|
|
|
|
ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
|
|
if ( ilut_ptr->mat_ia != NULL ) free(ilut_ptr->mat_ia);
|
|
if ( ilut_ptr->mat_ja != NULL ) free(ilut_ptr->mat_ja);
|
|
if ( ilut_ptr->mat_aa != NULL ) free(ilut_ptr->mat_aa);
|
|
ilut_ptr->mat_ia = NULL;
|
|
ilut_ptr->mat_ja = NULL;
|
|
ilut_ptr->mat_aa = NULL;
|
|
if ( ilut_ptr->mh_mat != NULL )
|
|
{
|
|
if (ilut_ptr->mh_mat->sendProc != NULL) free(ilut_ptr->mh_mat->sendProc);
|
|
if (ilut_ptr->mh_mat->sendLeng != NULL) free(ilut_ptr->mh_mat->sendLeng);
|
|
if (ilut_ptr->mh_mat->recvProc != NULL) free(ilut_ptr->mh_mat->recvProc);
|
|
if (ilut_ptr->mh_mat->recvLeng != NULL) free(ilut_ptr->mh_mat->recvLeng);
|
|
for ( i = 0; i < ilut_ptr->mh_mat->sendProcCnt; i++ )
|
|
if (ilut_ptr->mh_mat->sendList[i] != NULL)
|
|
free(ilut_ptr->mh_mat->sendList[i]);
|
|
if (ilut_ptr->mh_mat->sendList != NULL) free(ilut_ptr->mh_mat->sendList);
|
|
free( ilut_ptr->mh_mat );
|
|
}
|
|
ilut_ptr->mh_mat = NULL;
|
|
free(ilut_ptr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------
|
|
* HYPRE_LSI_DDIlutSetFillin - Set the fill-in parameter.
|
|
*--------------------------------------------------------------------------*/
|
|
|
|
int HYPRE_LSI_DDIlutSetFillin(HYPRE_Solver solver, double fillin)
|
|
{
|
|
HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
|
|
|
|
ilut_ptr->fillin = fillin;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------
|
|
* HYPRE_LSI_DDIlutSetDropTolerance - Set the threshold for dropping
|
|
*--------------------------------------------------------------------------*/
|
|
|
|
int HYPRE_LSI_DDIlutSetDropTolerance(HYPRE_Solver solver, double thresh)
|
|
{
|
|
HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
|
|
|
|
ilut_ptr->thresh = thresh;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------
|
|
* HYPRE_LSI_DDIlutSetOutputLevel - Set debug level
|
|
*--------------------------------------------------------------------------*/
|
|
|
|
int HYPRE_LSI_DDIlutSetOutputLevel(HYPRE_Solver solver, int level)
|
|
{
|
|
HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
|
|
|
|
ilut_ptr->outputLevel = level;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------
|
|
* HYPRE_LSI_DDIlutSolve - Solve function for DDILUT.
|
|
*--------------------------------------------------------------------------*/
|
|
|
|
int HYPRE_LSI_DDIlutSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A,
|
|
HYPRE_ParVector b, HYPRE_ParVector x )
|
|
{
|
|
int i, j, ierr = 0, *idiag, Nrows, extNrows, *mat_ia, *mat_ja;
|
|
int column;
|
|
double *rhs, *soln, *dbuffer, ddata, *mat_aa;
|
|
HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
|
|
MH_Context *context;
|
|
|
|
rhs = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) b));
|
|
soln = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) x));
|
|
|
|
Nrows = ilut_ptr->Nrows;
|
|
extNrows = ilut_ptr->extNrows;
|
|
mat_ia = ilut_ptr->mat_ia;
|
|
mat_ja = ilut_ptr->mat_ja;
|
|
mat_aa = ilut_ptr->mat_aa;
|
|
|
|
dbuffer = (double *) malloc(extNrows * sizeof(double));
|
|
idiag = (int *) malloc(extNrows * sizeof(int));
|
|
for ( i = 0; i < Nrows; i++ ) dbuffer[i] = rhs[i];
|
|
|
|
context = (MH_Context *) malloc(sizeof(MH_Context));
|
|
context->Amat = ilut_ptr->mh_mat;
|
|
context->comm = MPI_COMM_WORLD;
|
|
|
|
if ( extNrows > Nrows ) MH_ExchBdry(dbuffer, context);
|
|
|
|
for ( i = 0; i < extNrows; i++ )
|
|
{
|
|
ddata = 0.0;
|
|
for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
|
|
{
|
|
column = mat_ja[j];
|
|
if ( column == i ) { idiag[i] = j; break;}
|
|
ddata += mat_aa[j] * dbuffer[column];
|
|
}
|
|
dbuffer[i] -= ddata;
|
|
}
|
|
for ( i = extNrows-1; i >= 0; i-- )
|
|
{
|
|
ddata = 0.0;
|
|
for ( j = idiag[i]+1; j < mat_ia[i+1]; j++ )
|
|
{
|
|
column = mat_ja[j];
|
|
ddata += mat_aa[j] * dbuffer[column];
|
|
}
|
|
dbuffer[i] -= ddata;
|
|
dbuffer[i] /= mat_aa[idiag[i]];
|
|
}
|
|
for ( i = 0; i < Nrows; i++ ) soln[i] = dbuffer[i];
|
|
free(dbuffer);
|
|
free(idiag);
|
|
free(context);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*--------------------------------------------------------------------------
|
|
* HYPRE_LSI_DDIlutSetup - Set up function for LSI_DDIlut.
|
|
*--------------------------------------------------------------------------*/
|
|
|
|
int HYPRE_LSI_DDIlutSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr,
|
|
HYPRE_ParVector b, HYPRE_ParVector x )
|
|
{
|
|
int i, j, offset, total_recv_leng, *recv_lengths=NULL;
|
|
int *int_buf=NULL, mypid, nprocs, overlap_flag=1,*parray;
|
|
int *map=NULL, *map2=NULL, *row_partition=NULL,*parray2;
|
|
double *dble_buf=NULL;
|
|
HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
|
|
MH_Context *context=NULL;
|
|
MH_Matrix *mh_mat=NULL;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* get the row information in my processors */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &mypid);
|
|
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
|
HYPRE_ParCSRMatrixGetRowPartitioning(A_csr, &row_partition);
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* convert the incoming CSR matrix into a MH matrix */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
context = (MH_Context *) malloc(sizeof(MH_Context));
|
|
context->comm = MPI_COMM_WORLD;
|
|
context->globalEqns = row_partition[nprocs];
|
|
context->partition = (int *) malloc(sizeof(int)*(nprocs+1));
|
|
for (i=0; i<=nprocs; i++) context->partition[i] = row_partition[i];
|
|
hypre_TFree( row_partition );
|
|
mh_mat = ( MH_Matrix * ) malloc( sizeof( MH_Matrix) );
|
|
context->Amat = mh_mat;
|
|
HYPRE_ParCSRMLConstructMHMatrix(A_csr,mh_mat,MPI_COMM_WORLD,
|
|
context->partition,context);
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* compose the enlarged overlapped local matrix */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
if ( overlap_flag )
|
|
{
|
|
HYPRE_LSI_DDIlutComposeOverlappedMatrix(mh_mat, &total_recv_leng,
|
|
&recv_lengths, &int_buf, &dble_buf, &map, &map2,&offset);
|
|
}
|
|
else
|
|
{
|
|
total_recv_leng = 0;
|
|
recv_lengths = NULL;
|
|
int_buf = NULL;
|
|
dble_buf = NULL;
|
|
map = NULL;
|
|
map2 = NULL;
|
|
parray = (int *) malloc(nprocs * sizeof(int) );
|
|
parray2 = (int *) malloc(nprocs * sizeof(int) );
|
|
for ( i = 0; i < nprocs; i++ ) parray2[i] = 0;
|
|
parray2[mypid] = mh_mat->Nrows;
|
|
MPI_Allreduce(parray2,parray,nprocs,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
|
|
offset = 0;
|
|
for (i = 0; i < mypid; i++) offset += parray[i];
|
|
free(parray);
|
|
free(parray2);
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* perform ILUT decomposition on local matrix */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
if ( ilut_ptr->mat_ia == NULL )
|
|
HYPRE_LSI_DDIlutDecompose(ilut_ptr,mh_mat,total_recv_leng,recv_lengths,
|
|
int_buf, dble_buf, map,map2, offset);
|
|
else
|
|
{
|
|
HYPRE_LSI_DDIlutDecompose2(ilut_ptr,mh_mat,total_recv_leng,recv_lengths,
|
|
int_buf, dble_buf, map,map2, offset);
|
|
if ( mypid == 0 && ilut_ptr->outputLevel >= 1 )
|
|
printf("DDILUT : preconditioner pattern reused.\n");
|
|
}
|
|
if ( mypid == 0 && ilut_ptr->outputLevel > 2 )
|
|
{
|
|
for ( i = 0; i < ilut_ptr->extNrows; i++ )
|
|
for ( j = ilut_ptr->mat_ia[i]; j < ilut_ptr->mat_ia[i+1]; j++ )
|
|
printf("LA(%d,%d) = %e;\n", i+1, ilut_ptr->mat_ja[j]+1,
|
|
ilut_ptr->mat_aa[j]);
|
|
}
|
|
|
|
ilut_ptr->mh_mat = mh_mat;
|
|
if ( mh_mat->rowptr != NULL ) free (mh_mat->rowptr);
|
|
if ( mh_mat->colnum != NULL ) free (mh_mat->colnum);
|
|
if ( mh_mat->values != NULL ) free (mh_mat->values);
|
|
mh_mat->rowptr = NULL;
|
|
mh_mat->colnum = NULL;
|
|
mh_mat->values = NULL;
|
|
if ( map != NULL ) free(map);
|
|
if ( map2 != NULL ) free(map2);
|
|
if ( int_buf != NULL ) free(int_buf);
|
|
if ( dble_buf != NULL ) free(dble_buf);
|
|
if ( recv_lengths != NULL ) free(recv_lengths);
|
|
free( context->partition );
|
|
free( context );
|
|
}
|
|
|
|
/*****************************************************************************/
|
|
/* subroutines used for constructing overlapped matrix */
|
|
/*****************************************************************************/
|
|
|
|
int HYPRE_LSI_DDIlutGetRowLengths(MH_Matrix *Amat, int *leng, int **recv_leng)
|
|
{
|
|
int i, j, m, mypid, index, *temp_list, allocated_space, length;
|
|
int nRecv, *recvProc, *recvLeng, *cols, total_recv, mtype, msgtype;
|
|
int nSend, *sendProc, *sendLeng, **sendList, proc_id, offset;
|
|
double *vals;
|
|
MPI_Request *Request;
|
|
MPI_Status status;
|
|
MH_Context *context;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* fetch communication information */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &mypid);
|
|
nRecv = Amat->recvProcCnt;
|
|
nSend = Amat->sendProcCnt;
|
|
recvProc = Amat->recvProc;
|
|
recvLeng = Amat->recvLeng;
|
|
sendProc = Amat->sendProc;
|
|
sendLeng = Amat->sendLeng;
|
|
sendList = Amat->sendList;
|
|
total_recv = 0;
|
|
for ( i = 0; i < nRecv; i++ ) total_recv += recvLeng[i];
|
|
|
|
(*leng) = total_recv;
|
|
if ( nRecv <= 0 ) (*recv_leng) = NULL;
|
|
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* post receives for all messages */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
(*recv_leng) = (int *) malloc(total_recv * sizeof(int));
|
|
if (nRecv > 0) Request = (MPI_Request *) malloc(nRecv*sizeof(MPI_Request));
|
|
offset = 0;
|
|
mtype = 2001;
|
|
for (i = 0; i < nRecv; i++)
|
|
{
|
|
proc_id = recvProc[i];
|
|
msgtype = mtype;
|
|
length = recvLeng[i];
|
|
MPI_Irecv((void *) &((*recv_leng)[offset]), length, MPI_INT, proc_id,
|
|
msgtype, MPI_COMM_WORLD, &Request[i]);
|
|
offset += length;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* write out all messages */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
context = (MH_Context *) malloc(sizeof(MH_Context));
|
|
context->Amat = Amat;
|
|
allocated_space = 100;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
for (i = 0; i < nSend; i++)
|
|
{
|
|
proc_id = sendProc[i];
|
|
length = sendLeng[i];
|
|
temp_list = (int*) malloc(sendLeng[i] * sizeof(int));
|
|
for (j = 0; j < length; j++)
|
|
{
|
|
index = sendList[i][j];
|
|
while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0)
|
|
{
|
|
free(cols); free(vals);
|
|
allocated_space += 200 + 1;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
}
|
|
temp_list[j] = m;
|
|
}
|
|
msgtype = mtype;
|
|
MPI_Send((void*)temp_list,length,MPI_INT,proc_id,msgtype,MPI_COMM_WORLD);
|
|
free( temp_list );
|
|
}
|
|
free(cols);
|
|
free(vals);
|
|
free(context);
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* wait for messages */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
for ( i = 0; i < nRecv; i++ )
|
|
{
|
|
MPI_Wait( &Request[i], &status );
|
|
}
|
|
|
|
if (nRecv > 0) free( Request );
|
|
return 0;
|
|
}
|
|
|
|
/*****************************************************************************/
|
|
/* needed for overlapped smoothers */
|
|
/*****************************************************************************/
|
|
|
|
int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng,
|
|
int Noffset, int *map, int *map2, int **int_buf,
|
|
double **dble_buf)
|
|
{
|
|
int i, j, k, m, *temp_list, length, offset, allocated_space, proc_id;
|
|
int nRecv, nSend, *recvProc, *sendProc, total_recv, mtype, msgtype;
|
|
int *sendLeng, *recvLeng, **sendList, *cols, *isend_buf, Nrows;
|
|
int nnz, nnz_offset, index, mypid;
|
|
double *vals, *send_buf;
|
|
MPI_Request *request;
|
|
MPI_Status status;
|
|
MH_Context *context;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* fetch communication information */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &mypid);
|
|
Nrows = Amat->Nrows;
|
|
nRecv = Amat->recvProcCnt;
|
|
nSend = Amat->sendProcCnt;
|
|
recvProc = Amat->recvProc;
|
|
recvLeng = Amat->recvLeng;
|
|
sendProc = Amat->sendProc;
|
|
sendLeng = Amat->sendLeng;
|
|
sendList = Amat->sendList;
|
|
if ( nRecv <= 0 ) { (*int_buf) = NULL; (*dble_buf) = NULL;}
|
|
total_recv = 0;
|
|
for ( i = 0; i < leng; i++ ) total_recv += recv_leng[i];
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* allocate buffer space */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
if ( nRecv > 0 )
|
|
request = (MPI_Request *) malloc(nRecv*sizeof(MPI_Request ));
|
|
else request = NULL;
|
|
|
|
if ( total_recv > 0 )
|
|
{
|
|
(*int_buf) = (int *) malloc(total_recv * sizeof(int));
|
|
(*dble_buf) = (double *) malloc(total_recv * sizeof(double));
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* post receives for all messages */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
offset = 0;
|
|
mtype = 2002;
|
|
nnz_offset = 0;
|
|
for (i = 0; i < nRecv; i++)
|
|
{
|
|
proc_id = recvProc[i];
|
|
msgtype = mtype;
|
|
length = recvLeng[i];
|
|
nnz = 0;
|
|
for (j = 0; j < length; j++) nnz += recv_leng[offset+j];
|
|
|
|
MPI_Irecv((void *) &((*dble_buf)[nnz_offset]), nnz, MPI_DOUBLE,
|
|
proc_id, msgtype, MPI_COMM_WORLD, request+i);
|
|
offset += length;
|
|
nnz_offset += nnz;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* send rows to other processors */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
context = (MH_Context *) malloc(sizeof(MH_Context));
|
|
context->Amat = Amat;
|
|
mtype = 2002;
|
|
allocated_space = 100;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
for (i = 0; i < nSend; i++)
|
|
{
|
|
proc_id = sendProc[i];
|
|
length = sendLeng[i];
|
|
nnz = 0;
|
|
for (j = 0; j < length; j++)
|
|
{
|
|
index = sendList[i][j];
|
|
while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0)
|
|
{
|
|
free(cols); free(vals);
|
|
allocated_space += 200 + 1;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
}
|
|
nnz += m;
|
|
}
|
|
if ( nnz > 0 ) send_buf = (double *) malloc( nnz * sizeof(double));
|
|
offset = 0;
|
|
for (j = 0; j < length; j++)
|
|
{
|
|
index = sendList[i][j];
|
|
MH_GetRow(context,1,&index,allocated_space,cols,vals,&m);
|
|
for (k = 0; k < m; k++) send_buf[offset+k] = vals[k];
|
|
offset += m;
|
|
}
|
|
msgtype = mtype;
|
|
MPI_Send((void*) send_buf, nnz, MPI_DOUBLE, proc_id, msgtype,
|
|
MPI_COMM_WORLD);
|
|
if ( nnz > 0 ) free( send_buf );
|
|
}
|
|
free(cols);
|
|
free(vals);
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* wait for all messages */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
for (i = 0; i < nRecv; i++)
|
|
{
|
|
MPI_Wait(request+i, &status);
|
|
}
|
|
|
|
/* ----------------------------------------------------------- */
|
|
/* post receives for all messages */
|
|
/* ----------------------------------------------------------- */
|
|
|
|
mtype = 2003;
|
|
offset = 0;
|
|
nnz_offset = 0;
|
|
for (i = 0; i < nRecv; i++)
|
|
{
|
|
proc_id = recvProc[i];
|
|
msgtype = mtype;
|
|
length = recvLeng[i];
|
|
nnz = 0;
|
|
for (j = 0; j < length; j++) nnz += recv_leng[offset+j];
|
|
MPI_Irecv((void *) &((*int_buf)[nnz_offset]), nnz, MPI_INT,
|
|
proc_id, msgtype, MPI_COMM_WORLD, request+i);
|
|
offset += length;
|
|
nnz_offset += nnz;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* send rows to other processors */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
mtype = 2003;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
for (i = 0; i < nSend; i++)
|
|
{
|
|
proc_id = sendProc[i];
|
|
length = sendLeng[i];
|
|
nnz = 0;
|
|
for (j = 0; j < length; j++)
|
|
{
|
|
index = sendList[i][j];
|
|
MH_GetRow(context,1,&index,allocated_space,cols,vals,&m);
|
|
nnz += m;
|
|
}
|
|
if ( nnz > 0 ) isend_buf = (int *) malloc( nnz * sizeof(int));
|
|
offset = 0;
|
|
for (j = 0; j < length; j++)
|
|
{
|
|
index = sendList[i][j];
|
|
MH_GetRow(context,1,&index,allocated_space,cols,vals,&m);
|
|
for (k = 0; k < m; k++)
|
|
{
|
|
if ( cols[k] < Nrows ) isend_buf[offset+k] = cols[k] + Noffset;
|
|
else isend_buf[offset+k] = map[cols[k]-Nrows];
|
|
}
|
|
offset += m;
|
|
}
|
|
msgtype = mtype;
|
|
MPI_Send((void*) isend_buf, nnz, MPI_INT, proc_id, msgtype,
|
|
MPI_COMM_WORLD);
|
|
if ( nnz > 0 ) free( isend_buf );
|
|
}
|
|
free(cols);
|
|
free(vals);
|
|
|
|
/* ----------------------------------------------------------- */
|
|
/* post receives for all messages */
|
|
/* ----------------------------------------------------------- */
|
|
|
|
for (i = 0; i < nRecv; i++)
|
|
{
|
|
MPI_Wait(request+i, &status);
|
|
}
|
|
|
|
free(request);
|
|
free(context);
|
|
return 0;
|
|
}
|
|
|
|
/*****************************************************************************/
|
|
/* construct an enlarged overlapped local matrix */
|
|
/*****************************************************************************/
|
|
|
|
int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *mh_mat,
|
|
int *total_recv_leng, int **recv_lengths, int **int_buf,
|
|
double **dble_buf, int **sindex_array, int **sindex_array2,
|
|
int *offset)
|
|
{
|
|
int i, j, nprocs, mypid, Nrows, *proc_array, *proc_array2;
|
|
int extNrows, NrowsOffset, *index_array, *index_array2;
|
|
int nRecv, *recvLeng;
|
|
double *dble_array;
|
|
MH_Context *context;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* fetch communication information */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &mypid);
|
|
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* fetch matrix information */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
nRecv = mh_mat->recvProcCnt;
|
|
recvLeng = mh_mat->recvLeng;
|
|
Nrows = mh_mat->Nrows;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* compute the enlarged matrix size */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
(*total_recv_leng) = 0;
|
|
for ( i = 0; i < nRecv; i++ ) (*total_recv_leng) += recvLeng[i];
|
|
extNrows = Nrows + (*total_recv_leng);
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* compose NrowsOffset and processor offsets proc_array */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
proc_array = (int *) malloc(nprocs * sizeof(int) );
|
|
proc_array2 = (int *) malloc(nprocs * sizeof(int) );
|
|
for ( i = 0; i < nprocs; i++ ) proc_array2[i] = 0;
|
|
proc_array2[mypid] = Nrows;
|
|
MPI_Allreduce(proc_array2,proc_array,nprocs,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
|
|
NrowsOffset = 0;
|
|
for (i = 0; i < mypid; i++) NrowsOffset += proc_array[i];
|
|
for (i = 1; i < nprocs; i++) proc_array[i] += proc_array[i-1];
|
|
free(proc_array2);
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* compose the column index map (index_array,index_array2) */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
context = (MH_Context *) malloc(sizeof(MH_Context));
|
|
context->comm = MPI_COMM_WORLD;
|
|
context->Amat = mh_mat;
|
|
dble_array = (double *) malloc(extNrows *sizeof(double));
|
|
for (i = Nrows; i < extNrows; i++) dble_array[i] = 0.0;
|
|
for (i = 0; i < Nrows; i++) dble_array[i] = 1.0 * ( i + NrowsOffset );
|
|
MH_ExchBdry(dble_array, context);
|
|
if ( extNrows-Nrows > 0 )
|
|
index_array = ( int *) malloc((extNrows-Nrows) * sizeof(int));
|
|
else
|
|
index_array = NULL;
|
|
for (i = Nrows; i < extNrows; i++) index_array[i-Nrows] = dble_array[i];
|
|
if ( extNrows-Nrows > 0 )
|
|
index_array2 = (int *) malloc((extNrows-Nrows) *sizeof(int));
|
|
else
|
|
index_array2 = NULL;
|
|
for (i = 0; i < extNrows-Nrows; i++) index_array2[i] = i;
|
|
free( dble_array );
|
|
free(context);
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* send the lengths of each row to remote processor */
|
|
/* at the end, additional row information should be given */
|
|
/* in total_recv_leng, recv_lengths, int_buf, dble_buf */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
HYPRE_LSI_DDIlutGetRowLengths(mh_mat, total_recv_leng, recv_lengths);
|
|
HYPRE_LSI_DDIlutGetOffProcRows(mh_mat, *total_recv_leng, *recv_lengths,
|
|
NrowsOffset,index_array,index_array2,int_buf, dble_buf);
|
|
|
|
free(proc_array);
|
|
HYPRE_LSI_Sort(index_array, extNrows-Nrows, index_array2, NULL);
|
|
(*sindex_array) = index_array;
|
|
(*sindex_array2) = index_array2;
|
|
(*offset) = NrowsOffset;
|
|
return 0;
|
|
}
|
|
|
|
/*****************************************************************************/
|
|
/* function for doing ILUT decomposition */
|
|
/* ( based on ILU(0) + ILUT based on magnitude) */
|
|
/*****************************************************************************/
|
|
|
|
int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
|
|
int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
|
|
int *map, int *map2, int Noffset)
|
|
{
|
|
int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
|
|
int index, first, Lcount, Ucount, ncnt, j, k, total_nnz;
|
|
int sortcnt, colIndex, offset, nnz, nnz_count, Nrows, extNrows;
|
|
int *track_array, track_leng, num_small_pivot, printstep;
|
|
int *sortcols;
|
|
double *vals, ddata, thresh, *mat_aa, *diagonal, *rowNorms;
|
|
double *dble_buf, fillin, tau, rel_tau, *sortvals;
|
|
MH_Context *context;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* fetch ILUT parameters */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
MPI_Comm_rank(ilut_ptr->comm, &mypid);
|
|
fillin = ilut_ptr->fillin;
|
|
tau = ilut_ptr->thresh;
|
|
Nrows = Amat->Nrows;
|
|
extNrows = Nrows + total_recv_leng;
|
|
ilut_ptr->Nrows = Nrows;
|
|
ilut_ptr->extNrows = extNrows;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* allocate temporary storage space */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
allocated_space = extNrows;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
sortcols = (int *) malloc(extNrows * sizeof(int));
|
|
sortvals = (double *) malloc(extNrows * sizeof(double));
|
|
dble_buf = (double *) malloc(extNrows * sizeof(double));
|
|
diagonal = (double *) malloc(extNrows * sizeof(double));
|
|
rowNorms = (double *) malloc(extNrows * sizeof(double));
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* compute the storage requirement for the ILU matrix */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
context = (MH_Context *) malloc(sizeof(MH_Context));
|
|
context->Amat = Amat;
|
|
total_nnz = 0;
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
rowNorms[i] = 0.0;
|
|
while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
|
|
{
|
|
free(vals); free(cols);
|
|
allocated_space += 200 + 1;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
}
|
|
total_nnz += m;
|
|
for ( j = 0; j < m; j++ ) rowNorms[i] += abs(vals[j]);
|
|
rowNorms[i] /= extNrows;
|
|
}
|
|
for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
|
|
total_nnz = (int) ((double) total_nnz * (fillin + 1.0));
|
|
ilut_ptr->mat_ia = (int *) malloc( (extNrows + 1 ) * sizeof(int));
|
|
ilut_ptr->mat_ja = (int *) malloc( total_nnz * sizeof(int));
|
|
ilut_ptr->mat_aa = (double *) malloc( total_nnz * sizeof(double));
|
|
mat_ia = ilut_ptr->mat_ia;
|
|
mat_ja = ilut_ptr->mat_ja;
|
|
mat_aa = ilut_ptr->mat_aa;
|
|
|
|
offset = 0;
|
|
for ( i = 0; i < total_recv_leng; i++ )
|
|
{
|
|
rowNorms[i+Nrows] = 0.0;
|
|
for ( j = offset; j < offset+recv_lengths[i]; j++ )
|
|
{
|
|
index = ext_ja[j];
|
|
if ( index >= Noffset && index < Noffset+Nrows )
|
|
ext_ja[j] = index - Noffset;
|
|
else
|
|
{
|
|
m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
|
|
if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
|
|
else ext_ja[j] = -1;
|
|
}
|
|
if ( ext_ja[j] != -1 ) rowNorms[i+Nrows] += abs(ext_aa[j]);
|
|
}
|
|
rowNorms[i+Nrows] /= extNrows;
|
|
offset += recv_lengths[i];
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* process the first Nrows */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
num_small_pivot = 0;
|
|
nnz_count = 0;
|
|
nnz = 0;
|
|
mat_ia[0] = 0;
|
|
track_array = (int *) malloc( extNrows * sizeof(int) );
|
|
for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
|
|
|
|
printstep = extNrows / 10;
|
|
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 )
|
|
printf("%4d : 0DDILUT Processing row %d(%d)\n",mypid,i,extNrows);
|
|
|
|
track_leng = 0;
|
|
MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
|
|
if ( m < 0 ) printf("IlutDecompose WARNING(1): row nnz = %d\n",m);
|
|
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] < extNrows )
|
|
{
|
|
dble_buf[cols[j]] = vals[j];
|
|
track_array[track_leng++] = cols[j];
|
|
}
|
|
}
|
|
Lcount = Ucount = first = 0;
|
|
first = extNrows;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( dble_buf[index] != 0 )
|
|
{
|
|
if ( index < i ) Lcount++;
|
|
else if ( index > i ) Ucount++;
|
|
else if ( index == i ) diagonal[i] = dble_buf[index];
|
|
if ( index < first ) first = index;
|
|
}
|
|
}
|
|
Lcount = Lcount * fillin;
|
|
Ucount = Ucount * fillin;
|
|
rel_tau = tau * rowNorms[i];
|
|
for ( j = first; j < i; j++ )
|
|
{
|
|
if ( abs(dble_buf[j]) > rel_tau )
|
|
{
|
|
ddata = dble_buf[j] / diagonal[j];
|
|
for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
|
|
{
|
|
colIndex = mat_ja[k];
|
|
if ( colIndex > j )
|
|
{
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
dble_buf[colIndex] -= (ddata * mat_aa[k]);
|
|
else
|
|
{
|
|
dble_buf[colIndex] = - (ddata * mat_aa[k]);
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
track_array[track_leng++] = colIndex;
|
|
}
|
|
}
|
|
}
|
|
dble_buf[j] = ddata;
|
|
}
|
|
else dble_buf[j] = 0.0;
|
|
}
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] < extNrows )
|
|
{
|
|
vals[j] = dble_buf[cols[j]];
|
|
if ( cols[j] != i ) dble_buf[cols[j]] = 0.0;
|
|
}
|
|
}
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i )
|
|
{
|
|
if ( dble_buf[index] < -rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = - dble_buf[index] * rowNorms[index];
|
|
}
|
|
else if ( dble_buf[index] > rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
if ( sortcnt > Lcount )
|
|
{
|
|
HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount);
|
|
for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] < i && vals[j] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = vals[j];
|
|
mat_ja[nnz_count++] = cols[j];
|
|
}
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
diagonal[i] = dble_buf[i];
|
|
if ( abs(diagonal[i]) < 1.0e-16 )
|
|
{
|
|
diagonal[i] = 1.0E-6;
|
|
num_small_pivot++;
|
|
}
|
|
mat_aa[nnz_count] = diagonal[i];
|
|
mat_ja[nnz_count++] = i;
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i )
|
|
{
|
|
if ( dble_buf[index] < -rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = - dble_buf[index] * rowNorms[index];
|
|
}
|
|
else if ( dble_buf[index] > rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
if ( sortcnt > Ucount )
|
|
{
|
|
HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount);
|
|
for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] > i && vals[j] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = vals[j];
|
|
mat_ja[nnz_count++] = cols[j];
|
|
}
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
dble_buf[i] = 0.0;
|
|
mat_ia[i+1] = nnz_count;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* process the off-processor rows */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
offset = 0;
|
|
for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
|
|
for ( i = 0; i < total_recv_leng; i++ )
|
|
{
|
|
if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 )
|
|
printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows);
|
|
|
|
track_leng = m = 0;
|
|
for ( j = offset; j < offset+recv_lengths[i]; j++ )
|
|
{
|
|
if ( ext_ja[j] != -1 )
|
|
{
|
|
dble_buf[ext_ja[j]] = ext_aa[j];
|
|
track_array[track_leng++] = ext_ja[j];
|
|
cols[m] = ext_ja[j];
|
|
vals[m++] = ext_aa[j];
|
|
}
|
|
}
|
|
Lcount = Ucount = 0;
|
|
first = extNrows;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( dble_buf[index] != 0 )
|
|
{
|
|
if ( index < i+Nrows ) Lcount++;
|
|
else if ( index > i+Nrows ) Ucount++;
|
|
else if ( i+Nrows == index ) diagonal[i+Nrows] = dble_buf[index];
|
|
if ( index < first ) first = index;
|
|
}
|
|
}
|
|
Lcount = Lcount * fillin;
|
|
Ucount = Ucount * fillin;
|
|
rel_tau = tau * rowNorms[i+Nrows];
|
|
for ( j = first; j < i+Nrows; j++ )
|
|
{
|
|
if ( abs(dble_buf[j]) > rel_tau )
|
|
{
|
|
ddata = dble_buf[j] / diagonal[j];
|
|
for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
|
|
{
|
|
colIndex = mat_ja[k];
|
|
if ( colIndex > j )
|
|
{
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
dble_buf[colIndex] -= (ddata * mat_aa[k]);
|
|
else
|
|
{
|
|
dble_buf[colIndex] = - (ddata * mat_aa[k]);
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
track_array[track_leng++] = colIndex;
|
|
}
|
|
}
|
|
}
|
|
dble_buf[j] = ddata;
|
|
}
|
|
else dble_buf[j] = 0.0;
|
|
}
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] < extNrows )
|
|
{
|
|
vals[j] = dble_buf[cols[j]];
|
|
if ( cols[j] != i+Nrows ) dble_buf[cols[j]] = 0.0;
|
|
}
|
|
}
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i+Nrows )
|
|
{
|
|
if ( dble_buf[index] < -rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = - dble_buf[index]*rowNorms[index];
|
|
}
|
|
else if ( dble_buf[index] > rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
if ( sortcnt > Lcount )
|
|
{
|
|
HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount);
|
|
for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] < i+Nrows && vals[j] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = vals[j];
|
|
mat_ja[nnz_count++] = cols[j];
|
|
}
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i+Nrows && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
diagonal[i+Nrows] = dble_buf[i+Nrows];
|
|
if ( abs(diagonal[i+Nrows]) < 1.0e-16 )
|
|
{
|
|
diagonal[i+Nrows] = 1.0E-6;
|
|
num_small_pivot++;
|
|
}
|
|
mat_aa[nnz_count] = diagonal[i+Nrows];
|
|
mat_ja[nnz_count++] = i+Nrows;
|
|
dble_buf[i+Nrows] = 0.0;
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i+Nrows )
|
|
{
|
|
if ( dble_buf[index] < -rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = - dble_buf[index] * rowNorms[index];
|
|
}
|
|
else if ( dble_buf[index] > rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
if ( sortcnt > Ucount )
|
|
{
|
|
HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount);
|
|
for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] > i+Nrows && cols[j] < extNrows && vals[j] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = vals[j];
|
|
mat_ja[nnz_count++] = cols[j];
|
|
}
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i+Nrows && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
mat_ia[i+Nrows+1] = nnz_count;
|
|
offset += recv_lengths[i];
|
|
}
|
|
if ( nnz_count > total_nnz )
|
|
printf("WARNING in ILUTDecomp : memory bound passed.\n");
|
|
if ( ilut_ptr->outputLevel > 0 )
|
|
{
|
|
printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count);
|
|
printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
|
|
}
|
|
|
|
/* ---------------------------------------------------------- */
|
|
/* deallocate temporary storage space */
|
|
/* ---------------------------------------------------------- */
|
|
|
|
free(cols);
|
|
free(vals);
|
|
free(sortcols);
|
|
free(sortvals);
|
|
free(dble_buf);
|
|
free(diagonal);
|
|
free(rowNorms);
|
|
free(context);
|
|
free(track_array);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*****************************************************************************/
|
|
/* function for doing ILUT decomposition */
|
|
/* (attempted for pattern reuse, but not done yet) */
|
|
/*****************************************************************************/
|
|
|
|
int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
|
|
int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
|
|
int *map, int *map2, int Noffset)
|
|
{
|
|
int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
|
|
int index, first, Lcount, Ucount, ncnt, j, k, total_nnz;
|
|
int sortcnt, colIndex, offset, nnz, nnz_count, Nrows, extNrows;
|
|
int *track_array, track_leng, num_small_pivot, printstep, ndisc;
|
|
int *sortcols;
|
|
double *vals, ddata, thresh, *mat_aa, *diagonal, *rowNorms;
|
|
double *dble_buf, fillin, tau, rel_tau, *sortvals, absval;
|
|
MH_Context *context;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* fetch ILUT parameters */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
MPI_Comm_rank(ilut_ptr->comm, &mypid);
|
|
fillin = ilut_ptr->fillin;
|
|
tau = ilut_ptr->thresh;
|
|
Nrows = Amat->Nrows;
|
|
extNrows = Nrows + total_recv_leng;
|
|
ilut_ptr->Nrows = Nrows;
|
|
ilut_ptr->extNrows = extNrows;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* allocate temporary storage space */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
allocated_space = extNrows;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
sortcols = (int *) malloc(extNrows * sizeof(int));
|
|
sortvals = (double *) malloc(extNrows * sizeof(double));
|
|
dble_buf = (double *) malloc(extNrows * sizeof(double));
|
|
diagonal = (double *) malloc(extNrows * sizeof(double));
|
|
rowNorms = (double *) malloc(extNrows * sizeof(double));
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* compute the storage requirement for the ILU matrix */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
context = (MH_Context *) malloc(sizeof(MH_Context));
|
|
context->Amat = Amat;
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
rowNorms[i] = 0.0;
|
|
while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
|
|
{
|
|
free(vals); free(cols);
|
|
allocated_space += 200 + 1;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
}
|
|
for ( j = 0; j < m; j++ ) rowNorms[i] += abs(vals[j]);
|
|
rowNorms[i] /= extNrows;
|
|
}
|
|
total_nnz = 0;
|
|
for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
|
|
total_nnz = (int) ((double) total_nnz*(fillin+1.0)) + ilut_ptr->mat_ia[Nrows];
|
|
mat_ia = ilut_ptr->mat_ia;
|
|
mat_ja = ilut_ptr->mat_ja;
|
|
mat_aa = ilut_ptr->mat_aa;
|
|
ilut_ptr->mat_ia = (int *) malloc( (extNrows + 1 ) * sizeof(int));
|
|
ilut_ptr->mat_ja = (int *) malloc( total_nnz * sizeof(int));
|
|
ilut_ptr->mat_aa = (double *) malloc( total_nnz * sizeof(double));
|
|
|
|
ncnt = 0;
|
|
ilut_ptr->mat_ia[0] = 0;
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
|
|
if ( mat_ja[j] >= 0 && mat_ja[j] < extNrows )
|
|
ilut_ptr->mat_ja[ncnt++] = mat_ja[j];
|
|
ilut_ptr->mat_ia[i+1] = ncnt;
|
|
}
|
|
if ( mat_ia != NULL ) free( mat_ia );
|
|
if ( mat_ja != NULL ) free( mat_ja );
|
|
if ( mat_aa != NULL ) free( mat_aa );
|
|
mat_ia = ilut_ptr->mat_ia;
|
|
mat_ja = ilut_ptr->mat_ja;
|
|
mat_aa = ilut_ptr->mat_aa;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* process the first Nrows */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
num_small_pivot = 0;
|
|
nnz = 0;
|
|
mat_ia[0] = 0;
|
|
track_array = (int *) malloc( extNrows * sizeof(int) );
|
|
for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
|
|
|
|
printstep = extNrows / 10;
|
|
|
|
ndisc = 0;
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 )
|
|
printf("%4d : 1DDILUT Processing row %d(%d,%d)\n",mypid,i,extNrows,Nrows);
|
|
|
|
MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
|
|
|
|
/* ------------------------------------------------------------- */
|
|
/* load the row into buffer */
|
|
/* ------------------------------------------------------------- */
|
|
|
|
track_leng = 0;
|
|
first = extNrows;
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
index = cols[j];
|
|
if ( index < extNrows )
|
|
{
|
|
dble_buf[index] = vals[j];
|
|
track_array[track_leng++] = index;
|
|
}
|
|
if ( index < first ) first = index;
|
|
}
|
|
for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
|
|
{
|
|
index = mat_ja[j];
|
|
if ( dble_buf[index] == 0.0 ) track_array[track_leng++] = index;
|
|
if ( index < first ) first = index;
|
|
}
|
|
if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++;
|
|
|
|
/* ------------------------------------------------------------- */
|
|
/* perform factorization */
|
|
/* ------------------------------------------------------------- */
|
|
|
|
rel_tau = tau * rowNorms[i];
|
|
for ( j = first; j < i; j++ )
|
|
{
|
|
if ( abs(dble_buf[j]) > rel_tau )
|
|
{
|
|
ddata = dble_buf[j] / diagonal[j];
|
|
for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
|
|
{
|
|
colIndex = mat_ja[k];
|
|
if ( colIndex > j )
|
|
{
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
dble_buf[colIndex] -= (ddata * mat_aa[k]);
|
|
else
|
|
{
|
|
dble_buf[colIndex] = - (ddata * mat_aa[k]);
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
track_array[track_leng++] = colIndex;
|
|
}
|
|
}
|
|
}
|
|
dble_buf[j] = ddata;
|
|
}
|
|
else dble_buf[j] = 0.0;
|
|
}
|
|
|
|
diagonal[i] = dble_buf[i];
|
|
if ( abs(diagonal[i]) < 1.0e-16 )
|
|
{
|
|
diagonal[i] = dble_buf[i] = 1.0E-6;
|
|
num_small_pivot++;
|
|
}
|
|
for (j = mat_ia[i]; j < mat_ia[i+1]; j++) mat_aa[j] = dble_buf[mat_ja[j]];
|
|
for ( j = 0; j < track_leng; j++ ) dble_buf[track_array[j]] = 0.0;
|
|
}
|
|
nnz_count = mat_ia[Nrows];
|
|
ncnt = 0;
|
|
k = 0;
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
for ( j = k; j < mat_ia[i+1]; j++ )
|
|
{
|
|
if ( mat_aa[j] != 0.0 )
|
|
{
|
|
mat_ja[ncnt] = mat_ja[j];
|
|
mat_aa[ncnt++] = mat_aa[j];
|
|
}
|
|
}
|
|
k = mat_ia[i+1];
|
|
mat_ia[i+1] = ncnt;
|
|
}
|
|
if ( ilut_ptr->outputLevel > 0 )
|
|
{
|
|
printf("%4d : DDILUT after Nrows - nnz = %d %d\n", mypid, nnz_count, ncnt);
|
|
printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
|
|
printf("%4d : DDILUT number of pattern mismatch = %d\n",mypid,ndisc);
|
|
}
|
|
nnz_count = ncnt;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* preparation for processing the off-processor rows */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
offset = 0;
|
|
for ( i = 0; i < total_recv_leng; i++ )
|
|
{
|
|
rowNorms[i+Nrows] = 0.0;
|
|
for ( j = offset; j < offset+recv_lengths[i]; j++ )
|
|
{
|
|
index = ext_ja[j];
|
|
if ( index >= Noffset && index < Noffset+Nrows )
|
|
ext_ja[j] = index - Noffset;
|
|
else
|
|
{
|
|
m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
|
|
if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
|
|
else ext_ja[j] = -1;
|
|
if ( ext_ja[j] >= extNrows ) ext_ja[j] = -1;
|
|
}
|
|
if ( ext_ja[j] != -1 ) rowNorms[i+Nrows] += abs(ext_aa[j]);
|
|
}
|
|
rowNorms[i+Nrows] /= extNrows;
|
|
offset += recv_lengths[i];
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* process the off-processor rows */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
offset = 0;
|
|
for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
|
|
for ( i = 0; i < total_recv_leng; i++ )
|
|
{
|
|
if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 )
|
|
printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows);
|
|
|
|
track_leng = m = 0;
|
|
for ( j = offset; j < offset+recv_lengths[i]; j++ )
|
|
{
|
|
index = ext_ja[j];
|
|
if ( index != -1 )
|
|
{
|
|
cols[m] = index;
|
|
vals[m++] = ext_aa[j];
|
|
track_array[track_leng++] = index;
|
|
dble_buf[index] = ext_aa[j];
|
|
}
|
|
}
|
|
Lcount = Ucount = 0;
|
|
first = extNrows;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( dble_buf[index] != 0 )
|
|
{
|
|
if ( index < i+Nrows ) Lcount++;
|
|
else if ( index > i+Nrows ) Ucount++;
|
|
else if ( i+Nrows == index ) diagonal[i+Nrows] = dble_buf[index];
|
|
if ( index < first ) first = index;
|
|
}
|
|
}
|
|
Lcount = Lcount * fillin;
|
|
Ucount = Ucount * fillin;
|
|
rel_tau = tau * rowNorms[i+Nrows];
|
|
for ( j = first; j < i+Nrows; j++ )
|
|
{
|
|
if ( abs(dble_buf[j]) > rel_tau )
|
|
{
|
|
ddata = dble_buf[j] / diagonal[j];
|
|
for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
|
|
{
|
|
colIndex = mat_ja[k];
|
|
if ( colIndex > j )
|
|
{
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
dble_buf[colIndex] -= (ddata * mat_aa[k]);
|
|
else
|
|
{
|
|
dble_buf[colIndex] = - (ddata * mat_aa[k]);
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
track_array[track_leng++] = colIndex;
|
|
}
|
|
}
|
|
}
|
|
dble_buf[j] = ddata;
|
|
}
|
|
else dble_buf[j] = 0.0;
|
|
}
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
index = cols[j];
|
|
vals[j] = dble_buf[index];
|
|
if ( index != i+Nrows ) dble_buf[index] = 0.0;
|
|
}
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i+Nrows )
|
|
{
|
|
absval = abs( dble_buf[index] );
|
|
if ( absval > rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = absval * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
if ( sortcnt > Lcount )
|
|
{
|
|
HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount);
|
|
for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] < i+Nrows && vals[j] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = vals[j];
|
|
mat_ja[nnz_count++] = cols[j];
|
|
}
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i+Nrows && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
diagonal[i+Nrows] = dble_buf[i+Nrows];
|
|
if ( abs(diagonal[i+Nrows]) < 1.0e-16 )
|
|
{
|
|
diagonal[i+Nrows] = 1.0E-6;
|
|
num_small_pivot++;
|
|
}
|
|
mat_aa[nnz_count] = diagonal[i+Nrows];
|
|
mat_ja[nnz_count++] = i+Nrows;
|
|
dble_buf[i+Nrows] = 0.0;
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i+Nrows )
|
|
{
|
|
absval = abs( dble_buf[index] );
|
|
if ( absval > rel_tau )
|
|
{
|
|
sortcols[sortcnt] = index;
|
|
sortvals[sortcnt++] = absval * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
if ( sortcnt > Ucount )
|
|
{
|
|
HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount);
|
|
for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] > i+Nrows && vals[j] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = vals[j];
|
|
mat_ja[nnz_count++] = cols[j];
|
|
}
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i+Nrows && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
mat_ia[i+Nrows+1] = nnz_count;
|
|
offset += recv_lengths[i];
|
|
}
|
|
|
|
if ( nnz_count > total_nnz )
|
|
printf("WARNING in ILUTDecomp : memory bound passed.\n");
|
|
if ( ilut_ptr->outputLevel > 0 )
|
|
{
|
|
printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count);
|
|
printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
|
|
}
|
|
|
|
/* ---------------------------------------------------------- */
|
|
/* deallocate temporary storage space */
|
|
/* ---------------------------------------------------------- */
|
|
|
|
free(cols);
|
|
free(vals);
|
|
free(sortcols);
|
|
free(sortvals);
|
|
free(dble_buf);
|
|
free(diagonal);
|
|
free(rowNorms);
|
|
free(context);
|
|
free(track_array);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*****************************************************************************/
|
|
/* function for doing ILUT decomposition */
|
|
/* (purely based on magnitude) */
|
|
/*****************************************************************************/
|
|
|
|
int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
|
|
int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
|
|
int *map, int *map2, int Noffset)
|
|
{
|
|
int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
|
|
int index, first, Lcount, Ucount, ncnt, j, k, total_nnz;
|
|
int sortcnt, colIndex, offset, nnz, nnz_count, Nrows, extNrows;
|
|
int *track_array, track_leng, num_small_pivot;
|
|
double *vals, ddata, thresh, *mat_aa, *diagonal, *rowNorms;
|
|
double *dble_buf, fillin, tau, rel_tau;
|
|
MH_Context *context;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* fetch ILUT parameters */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
MPI_Comm_rank(ilut_ptr->comm, &mypid);
|
|
fillin = ilut_ptr->fillin;
|
|
tau = ilut_ptr->thresh;
|
|
Nrows = Amat->Nrows;
|
|
extNrows = Nrows + total_recv_leng;
|
|
ilut_ptr->Nrows = Nrows;
|
|
ilut_ptr->extNrows = extNrows;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* allocate temporary storage space */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
allocated_space = extNrows;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
dble_buf = (double *) malloc(extNrows * sizeof(double));
|
|
diagonal = (double *) malloc(extNrows * sizeof(double));
|
|
rowNorms = (double *) malloc(extNrows * sizeof(double));
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* compute the storage requirement for the ILU matrix */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
context = (MH_Context *) malloc(sizeof(MH_Context));
|
|
context->Amat = Amat;
|
|
total_nnz = 0;
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
rowNorms[i] = 0.0;
|
|
while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
|
|
{
|
|
free(vals); free(cols);
|
|
allocated_space += 200 + 1;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
}
|
|
total_nnz += m;
|
|
for ( j = 0; j < m; j++ ) rowNorms[i] += abs(vals[j]);
|
|
rowNorms[i] /= extNrows;
|
|
}
|
|
for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
|
|
total_nnz = (int) ((double) total_nnz * (fillin + 1.0));
|
|
ilut_ptr->mat_ia = (int *) malloc( (extNrows + 1 ) * sizeof(int));
|
|
ilut_ptr->mat_ja = (int *) malloc( total_nnz * sizeof(int));
|
|
ilut_ptr->mat_aa = (double *) malloc( total_nnz * sizeof(double));
|
|
mat_ia = ilut_ptr->mat_ia;
|
|
mat_ja = ilut_ptr->mat_ja;
|
|
mat_aa = ilut_ptr->mat_aa;
|
|
|
|
offset = 0;
|
|
for ( i = 0; i < total_recv_leng; i++ )
|
|
{
|
|
rowNorms[i+Nrows] = 0.0;
|
|
for ( j = offset; j < offset+recv_lengths[i]; j++ )
|
|
{
|
|
index = ext_ja[j];
|
|
if ( index >= Noffset && index < Noffset+Nrows )
|
|
ext_ja[j] = index - Noffset;
|
|
else
|
|
{
|
|
m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
|
|
if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
|
|
else ext_ja[j] = -1;
|
|
}
|
|
if ( ext_ja[j] != -1 ) rowNorms[i+Nrows] += abs(ext_aa[j]);
|
|
}
|
|
rowNorms[i+Nrows] /= extNrows;
|
|
offset += recv_lengths[i];
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* process the first Nrows */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
num_small_pivot = 0;
|
|
nnz_count = 0;
|
|
nnz = 0;
|
|
mat_ia[0] = 0;
|
|
track_array = (int *) malloc( extNrows * sizeof(int) );
|
|
for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
|
|
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
if ( i % 1000 == 0 && ilut_ptr->outputLevel > 0 )
|
|
printf("%4d : 2DDILUT Processing row %d(%d)\n",mypid,i,extNrows);
|
|
|
|
track_leng = 0;
|
|
MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
|
|
if ( m < 0 )
|
|
printf("IlutDecompose WARNING(1): row nnz = %d\n",m);
|
|
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( cols[j] < extNrows )
|
|
{
|
|
dble_buf[cols[j]] = vals[j];
|
|
track_array[track_leng++] = cols[j];
|
|
}
|
|
}
|
|
Lcount = Ucount = first = 0;
|
|
first = extNrows;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( dble_buf[index] != 0 )
|
|
{
|
|
if ( index < i ) Lcount++;
|
|
else if ( index > i ) Ucount++;
|
|
else if ( index == i ) diagonal[i] = dble_buf[index];
|
|
if ( index < first ) first = index;
|
|
}
|
|
}
|
|
Lcount = Lcount * (fillin + 1);
|
|
Ucount = Ucount * (fillin + 1);
|
|
rel_tau = tau * rowNorms[i];
|
|
for ( j = first; j < i; j++ )
|
|
{
|
|
if ( abs(dble_buf[j]) > rel_tau )
|
|
{
|
|
ddata = dble_buf[j] / diagonal[j];
|
|
for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
|
|
{
|
|
colIndex = mat_ja[k];
|
|
if ( colIndex > j )
|
|
{
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
dble_buf[colIndex] -= (ddata * mat_aa[k]);
|
|
else
|
|
{
|
|
dble_buf[colIndex] = - (ddata * mat_aa[k]);
|
|
track_array[track_leng++] = colIndex;
|
|
}
|
|
}
|
|
}
|
|
dble_buf[j] = ddata;
|
|
}
|
|
else dble_buf[j] = 0.0;
|
|
}
|
|
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i )
|
|
{
|
|
if ( dble_buf[index] < -rel_tau )
|
|
{
|
|
cols[sortcnt] = index;
|
|
vals[sortcnt++] = - dble_buf[index] * rowNorms[index];
|
|
}
|
|
else if ( dble_buf[index] > rel_tau )
|
|
{
|
|
cols[sortcnt] = index;
|
|
vals[sortcnt++] = dble_buf[index] * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
|
|
if ( sortcnt > Lcount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Lcount);
|
|
if ( sortcnt > Lcount )
|
|
{
|
|
for ( j = Lcount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
diagonal[i] = dble_buf[i];
|
|
if ( abs(diagonal[i]) < 1.0e-16 )
|
|
{
|
|
diagonal[i] = 1.0E-6;
|
|
num_small_pivot++;
|
|
}
|
|
mat_aa[nnz_count] = diagonal[i];
|
|
mat_ja[nnz_count++] = i;
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i )
|
|
{
|
|
if ( dble_buf[index] < -rel_tau )
|
|
{
|
|
cols[sortcnt] = index;
|
|
vals[sortcnt++] = - dble_buf[index] * rowNorms[index];
|
|
}
|
|
else if ( dble_buf[index] > rel_tau )
|
|
{
|
|
cols[sortcnt] = index;
|
|
vals[sortcnt++] = dble_buf[index] * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
if ( sortcnt > Ucount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Ucount);
|
|
if ( sortcnt > Ucount )
|
|
{
|
|
for ( j = Ucount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
dble_buf[i] = 0.0;
|
|
mat_ia[i+1] = nnz_count;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* process the off-processor rows */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
offset = 0;
|
|
for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
|
|
for ( i = 0; i < total_recv_leng; i++ )
|
|
{
|
|
if ( (i+Nrows) % 1000 == 0 && ilut_ptr->outputLevel > 0 )
|
|
printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows);
|
|
|
|
track_leng = 0;
|
|
for ( j = offset; j < offset+recv_lengths[i]; j++ )
|
|
{
|
|
if ( ext_ja[j] != -1 )
|
|
{
|
|
dble_buf[ext_ja[j]] = ext_aa[j];
|
|
track_array[track_leng++] = ext_ja[j];
|
|
}
|
|
}
|
|
Lcount = Ucount = 0;
|
|
first = extNrows;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( dble_buf[index] != 0 )
|
|
{
|
|
if ( index < i+Nrows ) Lcount++;
|
|
else if ( index > i+Nrows ) Ucount++;
|
|
else if ( i+Nrows == index ) diagonal[i+Nrows] = dble_buf[index];
|
|
if ( index < first ) first = index;
|
|
}
|
|
}
|
|
Lcount = Lcount * (fillin + 1);
|
|
Ucount = Ucount * (fillin + 1);
|
|
rel_tau = tau * rowNorms[i+Nrows];
|
|
for ( j = first; j < i+Nrows; j++ )
|
|
{
|
|
if ( abs(dble_buf[j]) > rel_tau )
|
|
{
|
|
ddata = dble_buf[j] / diagonal[j];
|
|
for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
|
|
{
|
|
colIndex = mat_ja[k];
|
|
if ( colIndex > j )
|
|
{
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
dble_buf[colIndex] -= (ddata * mat_aa[k]);
|
|
else
|
|
{
|
|
dble_buf[colIndex] = - (ddata * mat_aa[k]);
|
|
track_array[track_leng++] = colIndex;
|
|
}
|
|
}
|
|
}
|
|
dble_buf[j] = ddata;
|
|
}
|
|
else dble_buf[j] = 0.0;
|
|
}
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i+Nrows )
|
|
{
|
|
if ( dble_buf[index] < -rel_tau )
|
|
{
|
|
cols[sortcnt] = index;
|
|
vals[sortcnt++] = - dble_buf[index]*rowNorms[index];
|
|
}
|
|
else if ( dble_buf[index] > rel_tau )
|
|
{
|
|
cols[sortcnt] = index;
|
|
vals[sortcnt++] = dble_buf[index] * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
if ( sortcnt > Lcount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Lcount);
|
|
if ( sortcnt > Lcount )
|
|
{
|
|
for ( j = Lcount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index < i+Nrows && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
diagonal[i+Nrows] = dble_buf[i+Nrows];
|
|
if ( abs(diagonal[i+Nrows]) < 1.0e-16 )
|
|
{
|
|
diagonal[i+Nrows] = 1.0E-6;
|
|
num_small_pivot++;
|
|
}
|
|
mat_aa[nnz_count] = diagonal[i+Nrows];
|
|
mat_ja[nnz_count++] = i+Nrows;
|
|
dble_buf[i+Nrows] = 0.0;
|
|
sortcnt = 0;
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i+Nrows )
|
|
{
|
|
if ( dble_buf[index] < -rel_tau )
|
|
{
|
|
cols[sortcnt] = index;
|
|
vals[sortcnt++] = - dble_buf[index] * rowNorms[index];
|
|
}
|
|
else if ( dble_buf[index] > rel_tau )
|
|
{
|
|
cols[sortcnt] = index;
|
|
vals[sortcnt++] = dble_buf[index] * rowNorms[index];
|
|
}
|
|
else dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
if ( sortcnt > Ucount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Ucount);
|
|
if ( sortcnt > Ucount )
|
|
{
|
|
for ( j = Ucount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
|
|
}
|
|
for ( j = 0; j < track_leng; j++ )
|
|
{
|
|
index = track_array[j];
|
|
if ( index > i+Nrows && dble_buf[index] != 0.0 )
|
|
{
|
|
mat_aa[nnz_count] = dble_buf[index];
|
|
mat_ja[nnz_count++] = index;
|
|
dble_buf[index] = 0.0;
|
|
}
|
|
}
|
|
mat_ia[i+Nrows+1] = nnz_count;
|
|
offset += recv_lengths[i];
|
|
}
|
|
if ( nnz_count > total_nnz )
|
|
printf("WARNING in ILUTDecomp : memory bound passed.\n");
|
|
if ( ilut_ptr->outputLevel > 0 )
|
|
{
|
|
printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count);
|
|
printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
|
|
}
|
|
|
|
/* ---------------------------------------------------------- */
|
|
/* deallocate temporary storage space */
|
|
/* ---------------------------------------------------------- */
|
|
|
|
free(cols);
|
|
free(vals);
|
|
free(dble_buf);
|
|
free(diagonal);
|
|
free(rowNorms);
|
|
free(context);
|
|
free(track_array);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*****************************************************************************/
|
|
/* function for doing ILUT decomposition */
|
|
/* (This version is based on ILU(1). It converges less well as the original */
|
|
/* ILUT based on ILU(0) and magnitude. Its setup time is not faster either)*/
|
|
/*****************************************************************************/
|
|
|
|
int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
|
|
int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
|
|
int *map, int *map2, int Noffset)
|
|
{
|
|
int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
|
|
int index, first, ncnt, j, k, total_nnz;
|
|
int colIndex, offset, nnz_count, Nrows, extNrows;
|
|
int *track_array, track_leng, num_small_pivot, printstep;
|
|
int fillin, *mat_ia2, *mat_ja2, *iarray;
|
|
double *vals, ddata, thresh, *mat_aa, *diagonal, *rowNorms, *darray;
|
|
double *dble_buf, tau, rel_tau, *mat_aa2;
|
|
MH_Context *context;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* fetch ILUT parameters */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
MPI_Comm_rank(ilut_ptr->comm, &mypid);
|
|
fillin = (int) ilut_ptr->fillin;
|
|
tau = ilut_ptr->thresh;
|
|
Nrows = Amat->Nrows;
|
|
extNrows = Nrows + total_recv_leng;
|
|
ilut_ptr->Nrows = Nrows;
|
|
ilut_ptr->extNrows = extNrows;
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* allocate temporary storage space */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
allocated_space = extNrows;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
dble_buf = (double *) malloc(extNrows * sizeof(double));
|
|
diagonal = (double *) malloc(extNrows * sizeof(double));
|
|
rowNorms = (double *) malloc(extNrows * sizeof(double));
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* compute the storage requirement for the ILU matrix */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
context = (MH_Context *) malloc(sizeof(MH_Context));
|
|
context->Amat = Amat;
|
|
total_nnz = 0;
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
rowNorms[i] = 0.0;
|
|
while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
|
|
{
|
|
free(vals); free(cols);
|
|
allocated_space += 200 + 1;
|
|
cols = (int *) malloc(allocated_space * sizeof(int));
|
|
vals = (double *) malloc(allocated_space * sizeof(double));
|
|
}
|
|
total_nnz += m;
|
|
for ( j = 0; j < m; j++ ) rowNorms[i] += abs(vals[j]);
|
|
rowNorms[i] /= extNrows;
|
|
}
|
|
for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
|
|
mat_ia = (int *) malloc( (extNrows + 1 ) * sizeof(int));
|
|
mat_ja = (int *) malloc( total_nnz * sizeof(int));
|
|
mat_aa = (double *) malloc( total_nnz * sizeof(double));
|
|
total_nnz = total_nnz * 7;
|
|
mat_ia2 = (int *) malloc( (extNrows + 1 ) * sizeof(int));
|
|
mat_ja2 = (int *) malloc( total_nnz * sizeof(int));
|
|
ncnt = 0;
|
|
mat_ia[0] = 0;
|
|
for ( i = 0; i < Nrows; i++ )
|
|
{
|
|
MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
|
|
for ( j = 0; j < m; j++ )
|
|
{
|
|
if ( vals[j] != 0.0 )
|
|
{
|
|
mat_ja[ncnt] = cols[j];
|
|
mat_aa[ncnt++] = vals[j];
|
|
}
|
|
}
|
|
mat_ia[i+1] = ncnt;
|
|
}
|
|
offset = 0;
|
|
for ( i = 0; i < total_recv_leng; i++ )
|
|
{
|
|
rowNorms[i+Nrows] = 0.0;
|
|
for ( j = offset; j < offset+recv_lengths[i]; j++ )
|
|
{
|
|
index = ext_ja[j];
|
|
if ( index >= Noffset && index < Noffset+Nrows )
|
|
ext_ja[j] = index - Noffset;
|
|
else
|
|
{
|
|
m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
|
|
if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
|
|
else ext_ja[j] = -1;
|
|
}
|
|
if ( ext_ja[j] != -1 && ext_aa[j] != 0.0 )
|
|
{
|
|
rowNorms[i+Nrows] += abs(ext_aa[j]);
|
|
mat_ja[ncnt] = ext_ja[j];
|
|
mat_aa[ncnt++] = ext_aa[j];
|
|
}
|
|
}
|
|
rowNorms[i+Nrows] /= extNrows;
|
|
offset += recv_lengths[i];
|
|
mat_ia[Nrows+i+1] = ncnt;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* process the pattern */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
ncnt = 0;
|
|
mat_ia2[0] = 0;
|
|
printstep = extNrows / 10;
|
|
for ( i = 0; i < extNrows; i++ )
|
|
{
|
|
if ( ( i % printstep == 0 ) && ilut_ptr->outputLevel > 0 )
|
|
printf("%4d : DDILUT Processing pattern row = %d (%d)\n",mypid,i,extNrows);
|
|
k = mat_ia[i+1] - mat_ia[i];
|
|
for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
|
|
{
|
|
index = mat_ja[j];
|
|
k += ( mat_ia[index+1] - mat_ia[index] );
|
|
}
|
|
if ( (k+ncnt) > total_nnz )
|
|
{
|
|
iarray = mat_ja2;
|
|
total_nnz += (extNrows - i ) * k;
|
|
mat_ja2 = (int *) malloc( total_nnz * sizeof(int) );
|
|
for ( j = 0; j < ncnt; j++ ) mat_ja2[j] = iarray[j];
|
|
free( iarray );
|
|
}
|
|
for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
|
|
{
|
|
index = mat_ja[j];
|
|
mat_ja2[ncnt++] = index;
|
|
for (k = mat_ia[index]; k < mat_ia[index+1]; k++)
|
|
mat_ja2[ncnt++] = mat_ja[k];
|
|
}
|
|
HYPRE_LSI_Sort(&mat_ja2[mat_ia2[i]],ncnt-mat_ia2[i],NULL,NULL);
|
|
k = mat_ia2[i] + 1;
|
|
for ( j = mat_ia2[i]+1; j < ncnt; j++ )
|
|
{
|
|
if ( mat_ja2[j] != mat_ja2[k-1] ) mat_ja2[k++] = mat_ja2[j];
|
|
}
|
|
mat_ia2[i+1] = k;
|
|
ncnt = k;
|
|
}
|
|
for ( i = 0; i < ncnt; i++ )
|
|
if ( mat_ja2[i] < 0 || mat_ja2[i] >= extNrows )
|
|
printf("%4d : DDILUT ERROR ja %d = %d \n",mypid,i,mat_ja2[i]);
|
|
|
|
mat_aa2 = (double *) malloc( ncnt * sizeof(double));
|
|
|
|
/* ---------------------------------------------------------------- */
|
|
/* process the rows */
|
|
/* ---------------------------------------------------------------- */
|
|
|
|
num_small_pivot = 0;
|
|
track_array = (int *) malloc( extNrows * sizeof(int) );
|
|
for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
|
|
|
|
for ( i = 0; i < extNrows; i++ )
|
|
{
|
|
if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 )
|
|
printf("%4d : $DDILUT Processing row %d(%d,%d)\n",mypid,i,extNrows,Nrows);
|
|
|
|
/* ------------------------------------------------------------- */
|
|
/* load the row into buffer */
|
|
/* ------------------------------------------------------------- */
|
|
|
|
track_leng = 0;
|
|
first = extNrows;
|
|
for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
|
|
{
|
|
index = mat_ja[j];
|
|
dble_buf[index] = mat_aa[j];
|
|
track_array[track_leng++] = index;
|
|
if ( index < first ) first = index;
|
|
}
|
|
|
|
/* ------------------------------------------------------------- */
|
|
/* perform factorization */
|
|
/* ------------------------------------------------------------- */
|
|
|
|
rel_tau = tau * rowNorms[i];
|
|
for ( j = first; j < i; j++ )
|
|
{
|
|
if ( abs(dble_buf[j]) > rel_tau )
|
|
{
|
|
ddata = dble_buf[j] / diagonal[j];
|
|
for ( k = mat_ia2[j]; k < mat_ia2[j+1]; k++ )
|
|
{
|
|
colIndex = mat_ja2[k];
|
|
if ( colIndex > j && mat_aa2[k] != 0.0 )
|
|
{
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
dble_buf[colIndex] -= (ddata * mat_aa2[k]);
|
|
else
|
|
{
|
|
dble_buf[colIndex] = - (ddata * mat_aa2[k]);
|
|
if ( dble_buf[colIndex] != 0.0 )
|
|
track_array[track_leng++] = colIndex;
|
|
}
|
|
}
|
|
}
|
|
dble_buf[j] = ddata;
|
|
}
|
|
else dble_buf[j] = 0.0;
|
|
}
|
|
diagonal[i] = dble_buf[i];
|
|
if ( abs(diagonal[i]) < 1.0e-16 )
|
|
{
|
|
diagonal[i] = dble_buf[i] = 1.0E-6;
|
|
num_small_pivot++;
|
|
}
|
|
for (j = mat_ia2[i]; j < mat_ia2[i+1]; j++)
|
|
mat_aa2[j] = dble_buf[mat_ja2[j]];
|
|
for ( j = 0; j < track_leng; j++ ) dble_buf[track_array[j]] = 0.0;
|
|
}
|
|
nnz_count = mat_ia2[extNrows];
|
|
|
|
if ( ilut_ptr->outputLevel > 0 )
|
|
{
|
|
printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count);
|
|
printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
|
|
}
|
|
|
|
/* ---------------------------------------------------------- */
|
|
/* deallocate temporary storage space */
|
|
/* ---------------------------------------------------------- */
|
|
|
|
ilut_ptr->mat_ia = mat_ia2;
|
|
ilut_ptr->mat_ja = mat_ja2;
|
|
ilut_ptr->mat_aa = mat_aa2;
|
|
free(mat_ia);
|
|
free(mat_ja);
|
|
free(mat_aa);
|
|
free(cols);
|
|
free(vals);
|
|
free(dble_buf);
|
|
free(diagonal);
|
|
free(rowNorms);
|
|
free(context);
|
|
free(track_array);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|