hypre/parcsr_ls/gen_redcs_mat.c
falgout b335ec98aa Fixed some strict checking errors associated with type hypre_MPI_User_function.
Updated the check-mpi autotest script to catch MPI_Comm_size, etc.
2012-05-25 21:00:52 +00:00

481 lines
14 KiB
C

#include "_hypre_parcsr_ls.h"
#include "par_amg.h"
#define USE_ALLTOALL 0
/* here we have the sequential setup and solve - called from the
* parallel one - for the coarser levels */
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data,
HYPRE_Int p_level,
HYPRE_Int coarse_threshold)
{
/* Par Data Structure variables */
hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data);
MPI_Comm comm = hypre_ParCSRMatrixComm(Par_A_array[0]);
MPI_Comm new_comm, seq_comm;
hypre_ParCSRMatrix *A_seq = NULL;
hypre_CSRMatrix *A_seq_diag;
hypre_CSRMatrix *A_seq_offd;
hypre_ParVector *F_seq = NULL;
hypre_ParVector *U_seq = NULL;
hypre_ParCSRMatrix *A;
HYPRE_Int **dof_func_array;
HYPRE_Int num_procs, my_id;
HYPRE_Int not_finished_coarsening;
HYPRE_Int level;
HYPRE_Solver coarse_solver;
/* misc */
dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data);
/*MPI Stuff */
hypre_MPI_Comm_size(comm, &num_procs);
hypre_MPI_Comm_rank(comm,&my_id);
/*initial */
level = p_level;
not_finished_coarsening = 1;
/* convert A at this level to sequential */
A = Par_A_array[level];
{
double *A_seq_data = NULL;
HYPRE_Int *A_seq_i = NULL;
HYPRE_Int *A_seq_offd_i = NULL;
HYPRE_Int *A_seq_j = NULL;
double *A_tmp_data = NULL;
HYPRE_Int *A_tmp_i = NULL;
HYPRE_Int *A_tmp_j = NULL;
HYPRE_Int *info, *displs, *displs2;
HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt;
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag);
HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd);
HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
double *A_diag_data = hypre_CSRMatrixData(A_diag);
double *A_offd_data = hypre_CSRMatrixData(A_offd);
HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag);
HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A);
HYPRE_Int new_num_procs, *row_starts;
hypre_GenerateSubComm(comm, num_rows, &new_comm);
/*hypre_MPI_Group orig_group, new_group;
HYPRE_Int *ranks, new_num_procs, *row_starts;
info = hypre_CTAlloc(HYPRE_Int, num_procs);
hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm);
ranks = hypre_CTAlloc(HYPRE_Int, num_procs);
new_num_procs = 0;
for (i=0; i < num_procs; i++)
if (info[i])
{
ranks[new_num_procs] = i;
info[new_num_procs++] = info[i];
}
hypre_MPI_Comm_group(comm, &orig_group);
hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group);
hypre_MPI_Comm_create(comm, new_group, &new_comm);
hypre_MPI_Group_free(&new_group);
hypre_MPI_Group_free(&orig_group); */
if (num_rows)
{
hypre_MPI_Comm_size(new_comm, &new_num_procs);
info = hypre_CTAlloc(HYPRE_Int, new_num_procs);
hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm);
/* alloc space in seq data structure only for participating procs*/
HYPRE_BoomerAMGCreate(&coarse_solver);
HYPRE_BoomerAMGSetMaxRowSum(coarse_solver,
hypre_ParAMGDataMaxRowSum(amg_data));
HYPRE_BoomerAMGSetStrongThreshold(coarse_solver,
hypre_ParAMGDataStrongThreshold(amg_data));
HYPRE_BoomerAMGSetCoarsenType(coarse_solver,
hypre_ParAMGDataCoarsenType(amg_data));
HYPRE_BoomerAMGSetInterpType(coarse_solver,
hypre_ParAMGDataInterpType(amg_data));
HYPRE_BoomerAMGSetTruncFactor(coarse_solver,
hypre_ParAMGDataTruncFactor(amg_data));
HYPRE_BoomerAMGSetPMaxElmts(coarse_solver,
hypre_ParAMGDataPMaxElmts(amg_data));
if (hypre_ParAMGDataUserRelaxType(amg_data) > -1)
HYPRE_BoomerAMGSetRelaxType(coarse_solver,
hypre_ParAMGDataUserRelaxType(amg_data));
HYPRE_BoomerAMGSetRelaxOrder(coarse_solver,
hypre_ParAMGDataRelaxOrder(amg_data));
HYPRE_BoomerAMGSetRelaxWt(coarse_solver,
hypre_ParAMGDataUserRelaxWeight(amg_data));
if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1)
HYPRE_BoomerAMGSetNumSweeps(coarse_solver,
hypre_ParAMGDataUserNumSweeps(amg_data));
HYPRE_BoomerAMGSetNumFunctions(coarse_solver,
hypre_ParAMGDataNumFunctions(amg_data));
HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1);
HYPRE_BoomerAMGSetTol(coarse_solver, 0);
/* Create CSR Matrix, will be Diag part of new matrix */
A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1);
A_tmp_i[0] = 0;
for (i=1; i < num_rows+1; i++)
A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1];
num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows];
A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros);
A_tmp_data = hypre_CTAlloc(double, num_nonzeros);
cnt = 0;
for (i=0; i < num_rows; i++)
{
for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++)
{
A_tmp_j[cnt] = A_diag_j[j]+first_row_index;
A_tmp_data[cnt++] = A_diag_data[j];
}
for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++)
{
A_tmp_j[cnt] = col_map_offd[A_offd_j[j]];
A_tmp_data[cnt++] = A_offd_data[j];
}
}
displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
displs[0] = 0;
for (i=1; i < new_num_procs+1; i++)
displs[i] = displs[i-1]+info[i-1];
size = displs[new_num_procs];
A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1);
A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1);
hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info,
displs, HYPRE_MPI_INT, new_comm );
displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
A_seq_i[0] = 0;
displs2[0] = 0;
for (j=1; j < displs[1]; j++)
A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
for (i=1; i < new_num_procs; i++)
{
for (j=displs[i]; j < displs[i+1]; j++)
{
A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1];
}
}
A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1];
displs2[new_num_procs] = A_seq_i[size];
for (i=1; i < new_num_procs+1; i++)
{
displs2[i] = A_seq_i[displs[i]];
info[i-1] = displs2[i] - displs2[i-1];
}
total_nnz = displs2[new_num_procs];
A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz);
A_seq_data = hypre_CTAlloc(double, total_nnz);
hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT,
A_seq_j, info, displs2,
HYPRE_MPI_INT, new_comm );
hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE,
A_seq_data, info, displs2,
hypre_MPI_DOUBLE, new_comm );
hypre_TFree(info);
hypre_TFree(displs);
hypre_TFree(displs2);
hypre_TFree(A_tmp_i);
hypre_TFree(A_tmp_j);
hypre_TFree(A_tmp_data);
row_starts = hypre_CTAlloc(HYPRE_Int,2);
row_starts[0] = 0;
row_starts[1] = size;
/* Create 1 proc communicator */
seq_comm = hypre_MPI_COMM_SELF;
A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size,
row_starts, row_starts,
0,total_nnz,0);
A_seq_diag = hypre_ParCSRMatrixDiag(A_seq);
A_seq_offd = hypre_ParCSRMatrixOffd(A_seq);
hypre_CSRMatrixData(A_seq_diag) = A_seq_data;
hypre_CSRMatrixI(A_seq_diag) = A_seq_i;
hypre_CSRMatrixJ(A_seq_diag) = A_seq_j;
hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i;
F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts);
hypre_ParVectorOwnsPartitioning(F_seq) = 0;
hypre_ParVectorOwnsPartitioning(U_seq) = 0;
hypre_ParVectorInitialize(F_seq);
hypre_ParVectorInitialize(U_seq);
hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq);
hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver;
hypre_ParAMGDataACoarse(amg_data) = A_seq;
hypre_ParAMGDataFCoarse(amg_data) = F_seq;
hypre_ParAMGDataUCoarse(amg_data) = U_seq;
hypre_ParAMGDataNewComm(amg_data) = new_comm;
}
}
return 0;
}
/*--------------------------------------------------------------------------
* hypre_seqAMGCycle
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_seqAMGCycle( hypre_ParAMGData *amg_data,
HYPRE_Int p_level,
hypre_ParVector **Par_F_array,
hypre_ParVector **Par_U_array )
{
hypre_ParVector *Aux_U;
hypre_ParVector *Aux_F;
/* Local variables */
HYPRE_Int Solve_err_flag = 0;
HYPRE_Int n;
HYPRE_Int i;
hypre_Vector *u_local;
double *u_data;
HYPRE_Int first_index;
/* Acquire seq data */
MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data);
HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data);
hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data);
hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data);
hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data);
Aux_U = Par_U_array[p_level];
Aux_F = Par_F_array[p_level];
first_index = hypre_ParVectorFirstIndex(Aux_U);
u_local = hypre_ParVectorLocalVector(Aux_U);
u_data = hypre_VectorData(u_local);
n = hypre_VectorSize(u_local);
if (A_coarse)
{
double *f_data;
hypre_Vector *f_local;
hypre_Vector *tmp_vec;
HYPRE_Int nf;
HYPRE_Int local_info;
double *recv_buf;
HYPRE_Int *displs, *info;
HYPRE_Int size;
HYPRE_Int new_num_procs;
hypre_MPI_Comm_size(new_comm, &new_num_procs);
f_local = hypre_ParVectorLocalVector(Aux_F);
f_data = hypre_VectorData(f_local);
nf = hypre_VectorSize(f_local);
/* first f */
info = hypre_CTAlloc(HYPRE_Int, new_num_procs);
local_info = nf;
hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm);
displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1);
displs[0] = 0;
for (i=1; i < new_num_procs+1; i++)
displs[i] = displs[i-1]+info[i-1];
size = displs[new_num_procs];
tmp_vec = hypre_ParVectorLocalVector(F_coarse);
recv_buf = hypre_VectorData(tmp_vec);
hypre_MPI_Allgatherv ( f_data, nf, hypre_MPI_DOUBLE,
recv_buf, info, displs,
hypre_MPI_DOUBLE, new_comm );
tmp_vec = hypre_ParVectorLocalVector(U_coarse);
recv_buf = hypre_VectorData(tmp_vec);
/*then u */
hypre_MPI_Allgatherv ( u_data, n, hypre_MPI_DOUBLE,
recv_buf, info, displs,
hypre_MPI_DOUBLE, new_comm );
/* clean up */
hypre_TFree(displs);
hypre_TFree(info);
hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse);
/*copy my part of U to parallel vector */
{
double *local_data;
local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse));
for (i = 0; i < n; i++)
{
u_data[i] = local_data[first_index+i];
}
}
}
return(Solve_err_flag);
}
/* generate sub communicator, which contains only idle processors */
HYPRE_Int hypre_GenerateSubComm(MPI_Comm comm, HYPRE_Int participate, MPI_Comm *new_comm_ptr)
{
MPI_Comm new_comm;
hypre_MPI_Group orig_group, new_group;
hypre_MPI_Op hypre_MPI_MERGE;
HYPRE_Int *info, *ranks, new_num_procs, my_info, my_id, num_procs;
HYPRE_Int *list_len;
hypre_MPI_Comm_rank(comm,&my_id);
if (participate)
my_info = 1;
else
my_info = 0;
hypre_MPI_Allreduce(&my_info, &new_num_procs, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
if (new_num_procs == 0)
{
new_comm = hypre_MPI_COMM_NULL;
*new_comm_ptr = new_comm;
return 0;
}
ranks = hypre_CTAlloc(HYPRE_Int, new_num_procs+2);
if (new_num_procs == 1)
{
if (participate) my_info = my_id;
hypre_MPI_Allreduce(&my_info, &ranks[2], 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
}
else
{
info = hypre_CTAlloc(HYPRE_Int, new_num_procs+2);
list_len = hypre_CTAlloc(HYPRE_Int, 1);
if (participate)
{
info[0] = 1;
info[1] = 1;
info[2] = my_id;
}
else
info[0] = 0;
list_len[0] = new_num_procs + 2;
hypre_MPI_Op_create((hypre_MPI_User_function *)hypre_merge_lists, 0, &hypre_MPI_MERGE);
hypre_MPI_Allreduce(info, ranks, list_len[0], HYPRE_MPI_INT, hypre_MPI_MERGE, comm);
hypre_MPI_Op_free (&hypre_MPI_MERGE);
hypre_TFree(list_len);
hypre_TFree(info);
}
hypre_MPI_Comm_size(comm,&num_procs);
hypre_MPI_Comm_group(comm, &orig_group);
hypre_MPI_Group_incl(orig_group, new_num_procs, &ranks[2], &new_group);
hypre_MPI_Comm_create(comm, new_group, &new_comm);
hypre_MPI_Group_free(&new_group);
hypre_MPI_Group_free(&orig_group);
hypre_TFree(ranks);
*new_comm_ptr = new_comm;
return 0;
}
void hypre_merge_lists (HYPRE_Int *list1, HYPRE_Int* list2, hypre_int *np1, hypre_MPI_Datatype *dptr)
{
HYPRE_Int i, len1, len2, indx1, indx2;
if (list1[0] == 0 || (list2[0] == 0 && list1[0] == 0))
{
return;
}
else
{
list2[0] = 1;
len1 = list1[1];
len2 = list2[1];
list2[1] = len1+len2;
if ((hypre_int)(list2[1]) > *np1+2) printf("segfault in MPI User function merge_list\n");
indx1 = len1+1;
indx2 = len2+1;
for (i=len1+len2+1; i > 1; i--)
{
if (indx2 > 1 && indx1 > 1 && list1[indx1] > list2[indx2])
{
list2[i] = list1[indx1];
indx1--;
}
else if (indx2 > 1)
{
list2[i] = list2[indx2];
indx2--;
}
else if (indx1 > 1)
{
list2[i] = list1[indx1];
indx1--;
}
}
}
}