542 lines
22 KiB
C
542 lines
22 KiB
C
/*BHEADER**********************************************************************
|
|
* Copyright (c) 2008, Lawrence Livermore National Security, LLC.
|
|
* Produced at the Lawrence Livermore National Laboratory.
|
|
* This file is part of HYPRE. See file COPYRIGHT for details.
|
|
*
|
|
* HYPRE is free software; you can redistribute it and/or modify it under the
|
|
* terms of the GNU Lesser General Public License (as published by the Free
|
|
* Software Foundation) version 2.1 dated February 1999.
|
|
*
|
|
* $Revision$
|
|
***********************************************************************EHEADER*/
|
|
|
|
|
|
|
|
|
|
#include "headers.h"
|
|
|
|
void RowsWithColumn_original
|
|
( int * rowmin, int * rowmax, int column, hypre_ParCSRMatrix * A )
|
|
/* Finds rows of A which have a nonzero at the given (global) column number.
|
|
Sets rowmin to the minimum (local) row number of such rows, and rowmax
|
|
to the max. If there are no such rows, will return rowmax<0<=rowmin */
|
|
{
|
|
hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(A);
|
|
hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(A);
|
|
int * mat_i, * mat_j;
|
|
int i, j, num_rows;
|
|
int firstColDiag;
|
|
int * colMapOffd;
|
|
|
|
mat_i = hypre_CSRMatrixI(diag);
|
|
mat_j = hypre_CSRMatrixJ(diag);
|
|
num_rows = hypre_CSRMatrixNumRows(diag);
|
|
firstColDiag = hypre_ParCSRMatrixFirstColDiag(A);
|
|
*rowmin = num_rows;
|
|
*rowmax = -1;
|
|
|
|
for ( i=0; i<num_rows; ++i ) {
|
|
/* global number: row = i + firstRowIndex;*/
|
|
for ( j=mat_i[i]; j<mat_i[i+1]; ++j ) {
|
|
if ( mat_j[j]+firstColDiag==column ) {
|
|
/* row i (local row number) has column mat_j[j] (local column number) */
|
|
*rowmin = i<*rowmin ? i : *rowmin;
|
|
*rowmax = i>*rowmax ? i : *rowmax;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
mat_i = hypre_CSRMatrixI(offd);
|
|
mat_j = hypre_CSRMatrixJ(offd);
|
|
num_rows = hypre_CSRMatrixNumRows(offd);
|
|
colMapOffd = hypre_ParCSRMatrixColMapOffd(A);
|
|
for ( i=0; i<num_rows; ++i ) {
|
|
/* global number: row = i + firstRowIndex;*/
|
|
for ( j=mat_i[i]; j<mat_i[i+1]; ++j ) {
|
|
if ( colMapOffd[ mat_j[j] ]==column ) {
|
|
/* row i (local row number) has column mat_j[j] (local column number) */
|
|
*rowmin = i<*rowmin ? i : *rowmin;
|
|
*rowmax = i>*rowmax ? i : *rowmax;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* global col no.: mat_j[j]+hypre_ParCSRMatrixFirstColDiag(A)
|
|
or hypre_ParCSRMatrixColMapOffd(A)[ mat_j[j] ]
|
|
global row no.: i + hypre_ParCSRMatrixFirstRowIndex(A)
|
|
*/
|
|
|
|
}
|
|
|
|
void RowsWithColumn
|
|
( int * rowmin, int * rowmax, int column,
|
|
int num_rows_diag, int firstColDiag, int * colMapOffd,
|
|
int * mat_i_diag, int * mat_j_diag, int * mat_i_offd, int * mat_j_offd )
|
|
/* Finds rows of A which have a nonzero at the given (global) column number.
|
|
Sets rowmin to the minimum (local) row number of such rows, and rowmax
|
|
to the max. If there are no such rows, will return rowmax<0<=rowmin
|
|
The matrix A, normally a hypre_ParCSRMatrix or hypre_ParCSRBooleanMatrix,
|
|
is specified by:
|
|
num_rows_diag, (number of rows in diag, assumed to be same in offd)
|
|
firstColDiag, colMapOffd (to map CSR-type matrix columns to ParCSR-type columns
|
|
mat_i_diag, mat_j_diag: indices in the hypre_CSRMatrix or hypre_CSRBooleanMatrix for
|
|
diag block of A
|
|
mat_i_offd, mat_j_offd: indices in the hypre_CSRMatrix or hypre_CSRBooleanMatrix for
|
|
offd block of A
|
|
*/
|
|
{
|
|
int i, j;
|
|
|
|
*rowmin = num_rows_diag;
|
|
*rowmax = -1;
|
|
|
|
for ( i=0; i<num_rows_diag; ++i ) {
|
|
/* global number: row = i + firstRowIndex;*/
|
|
for ( j=mat_i_diag[i]; j<mat_i_diag[i+1]; ++j ) {
|
|
if ( mat_j_diag[j]+firstColDiag==column ) {
|
|
/* row i (local row number) has column mat_j[j] (local column number) */
|
|
*rowmin = i<*rowmin ? i : *rowmin;
|
|
*rowmax = i>*rowmax ? i : *rowmax;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
for ( i=0; i<num_rows_diag; ++i ) {
|
|
/* global number: row = i + firstRowIndex;*/
|
|
for ( j=mat_i_offd[i]; j<mat_i_offd[i+1]; ++j ) {
|
|
if ( colMapOffd[ mat_j_offd[j] ]==column ) {
|
|
/* row i (local row number) has column mat_j[j] (local column number) */
|
|
*rowmin = i<*rowmin ? i : *rowmin;
|
|
*rowmax = i>*rowmax ? i : *rowmax;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* global col no.: mat_j[j]+hypre_ParCSRMatrixFirstColDiag(A)
|
|
or hypre_ParCSRMatrixColMapOffd(A)[ mat_j[j] ]
|
|
global row no.: i + hypre_ParCSRMatrixFirstRowIndex(A)
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
/* hypre_MatTCommPkgCreate_core does all the communications and computations for
|
|
hypre_MatTCommPkgCreate ( hypre_ParCSRMatrix *A)
|
|
and hypre_BoolMatTCommPkgCreate ( hypre_ParCSRBooleanMatrix *A)
|
|
To support both data types, it has hardly any data structures other than int*.
|
|
|
|
*/
|
|
|
|
void
|
|
hypre_MatTCommPkgCreate_core (
|
|
|
|
/* input args: */
|
|
MPI_Comm comm, int * col_map_offd, int first_col_diag, int * col_starts,
|
|
int num_rows_diag, int num_cols_diag, int num_cols_offd, int * row_starts,
|
|
int firstColDiag, int * colMapOffd,
|
|
int * mat_i_diag, int * mat_j_diag, int * mat_i_offd, int * mat_j_offd,
|
|
|
|
int data, /* = 1 for a matrix with floating-point data, =0 for Boolean matrix */
|
|
|
|
/* pointers to output args: */
|
|
int * p_num_recvs, int ** p_recv_procs, int ** p_recv_vec_starts,
|
|
int * p_num_sends, int ** p_send_procs, int ** p_send_map_starts,
|
|
int ** p_send_map_elmts
|
|
|
|
)
|
|
{
|
|
int num_sends;
|
|
int *send_procs;
|
|
int *send_map_starts;
|
|
int *send_map_elmts;
|
|
int num_recvs;
|
|
int *recv_procs;
|
|
int *recv_vec_starts;
|
|
int i, j, j2, k, ir, rowmin, rowmax;
|
|
int *tmp, *recv_buf, *displs, *info, *send_buf, *all_num_sends3;
|
|
int num_procs, my_id, num_elmts;
|
|
int local_info, index, index2;
|
|
int pmatch, col, kc, p;
|
|
int * recv_sz_buf;
|
|
int * row_marker;
|
|
|
|
MPI_Comm_size(comm, &num_procs);
|
|
MPI_Comm_rank(comm, &my_id);
|
|
|
|
info = hypre_CTAlloc(int, num_procs);
|
|
|
|
/* ----------------------------------------------------------------------
|
|
* determine which processors to receive from (set proc_mark) and num_recvs,
|
|
* at the end of the loop proc_mark[i] contains the number of elements to be
|
|
* received from Proc. i
|
|
*
|
|
* - For A*b or A*B: for each off-diagonal column i of A, you want to identify
|
|
* the processor which has the corresponding element i of b (row i of B)
|
|
* (columns in the local diagonal block, just multiply local rows of B).
|
|
* You do it by finding the processor which has that column of A in its
|
|
* _diagonal_ block - assuming b or B is distributed the same, which I believe
|
|
* is evenly among processors, by row. There is a unique solution because
|
|
* the diag/offd blocking is defined by which processor owns which rows of A.
|
|
*
|
|
* - For A*A^T: A^T is not distributed by rows as B or any 'normal' matrix is.
|
|
* For each off-diagonal row,column k,i element of A, you want to identify
|
|
* the processors which have the corresponding row,column i,j elements of A^T
|
|
* i.e., row,column j,i elements of A (all i,j,k for which these entries are
|
|
* nonzero, row k of A lives on this processor, and row j of A lives on
|
|
* a different processor). So, given a column i in the local A-offd or A-diag,
|
|
* we want to find all the processors which have column i, in diag or offd
|
|
* blocks. Unlike the A*B case, I don't think you can eliminate looking at
|
|
* any class of blocks.
|
|
* ---------------------------------------------------------------------*/
|
|
|
|
/* The algorithm for A*B was:
|
|
For each of my columns i (in offd block), use known information on data
|
|
distribution of columns in _diagonal_ blocks to find the processor p which
|
|
owns row i. (Note that for i in diag block, I own the row, nothing to do.)
|
|
Count up such i's for each processor in proc_mark. Construct a data
|
|
structure, recv_buf, made by appending a structure tmp from each processor.
|
|
The data structure tmp looks like (p, no. of i's, i1, i2,...) (p=0,...) .
|
|
There are two communication steps: gather size information (local_info) from
|
|
all processors (into info), then gather the data (tmp) from all processors
|
|
(into recv_buf). Then you go through recv_buf. For each (sink) processor p
|
|
you search for for the appearance of my (source) processor number
|
|
(most of recv_buf pertains to other processors and is ignored).
|
|
When you find the appropriate section, pull out the i's, count them and
|
|
save them, in send_map_elmts, and save p in send_procs and index information
|
|
in send_map_starts.
|
|
*/
|
|
/* The algorithm for A*A^T:
|
|
[ Originally I had planned to figure out approximately which processors
|
|
had the information (for A*B it could be done exactly) to save on
|
|
communication. But even for A*B where the data owner is known, all data is
|
|
sent to all processors, so that's not worth worrying about on the first cut.
|
|
One consequence is that proc_mark is not needed.]
|
|
Construct a data structure, recv_buf, made by appending a structure tmp for
|
|
each processor. It simply consists of (no. of i's, i1, i2,...) where i is
|
|
the global number of a column in the offd block. There are still two
|
|
communication steps: gather size information (local_info) from all processors
|
|
(into info), then gather the data (tmp) from all processors (into recv_buf).
|
|
Then you go through recv_buf. For each (sink) processor p you go through
|
|
all its column numbers in recv_buf. Check each one for whether you have
|
|
data in that column. If so, put in in send_map_elmts, p in send_procs,
|
|
and update the index information in send_map_starts. Note that these
|
|
arrays don't mean quite the same thing as for A*B.
|
|
*/
|
|
|
|
num_recvs=num_procs-1;
|
|
local_info = num_procs + num_cols_offd + num_cols_diag;
|
|
|
|
MPI_Allgather(&local_info, 1, MPI_INT, info, 1, MPI_INT, comm);
|
|
|
|
/* ----------------------------------------------------------------------
|
|
* generate information to be send: tmp contains for each recv_proc:
|
|
* {deleted: id of recv_procs}, number of elements to be received for this processor,
|
|
* indices of elements (in this order)
|
|
* ---------------------------------------------------------------------*/
|
|
|
|
displs = hypre_CTAlloc(int, num_procs+1);
|
|
displs[0] = 0;
|
|
for (i=1; i < num_procs+1; i++)
|
|
displs[i] = displs[i-1]+info[i-1];
|
|
recv_buf = hypre_CTAlloc(int, displs[num_procs]);
|
|
tmp = hypre_CTAlloc(int, local_info);
|
|
|
|
j = 0;
|
|
for (i=0; i < num_procs; i++) {
|
|
j2 = j++;
|
|
tmp[j2] = 0;
|
|
for (k=0; k < num_cols_offd; k++)
|
|
if (col_map_offd[k] >= col_starts[i] &&
|
|
col_map_offd[k] < col_starts[i+1]) {
|
|
tmp[j++] = col_map_offd[k];
|
|
++(tmp[j2]);
|
|
};
|
|
for (k=0; k < num_cols_diag; k++)
|
|
if ( k+first_col_diag >= col_starts[i] &&
|
|
k+first_col_diag < col_starts[i+1] ) {
|
|
tmp[j++] = k + first_col_diag;
|
|
++(tmp[j2]);
|
|
}
|
|
}
|
|
|
|
MPI_Allgatherv(tmp,local_info,MPI_INT,recv_buf,info,displs,MPI_INT,comm);
|
|
|
|
|
|
/* ----------------------------------------------------------------------
|
|
* determine send_procs and actual elements to be send (in send_map_elmts)
|
|
* and send_map_starts whose i-th entry points to the beginning of the
|
|
* elements to be send to proc. i
|
|
* ---------------------------------------------------------------------*/
|
|
/* Meanings of arrays being set here, more verbosely stated:
|
|
send_procs: processors p to send to
|
|
send_map_starts: for each p, gives range of indices in send_map_elmts;
|
|
send_map_elmts: Each element is a send_map_elmts[i], with i in a range given
|
|
by send_map_starts[p..p+1], for some p. This element is is the global
|
|
column number for a column in the offd block of p which is to be multiplied
|
|
by data from this processor.
|
|
For A*B, send_map_elmts[i] is therefore a row of B belonging to this
|
|
processor, to be sent to p. For A*A^T, send_map_elmts[i] is a row of A
|
|
belonging to this processor, to be sent to p; this row was selected
|
|
because it has a nonzero on a _column_ needed by p.
|
|
*/
|
|
num_sends = num_procs; /* may turn out to be less, but we can't know yet */
|
|
num_elmts = (num_procs-1)*num_rows_diag;
|
|
/* ... a crude upper bound; should try to do better even if more comm required */
|
|
send_procs = hypre_CTAlloc(int, num_sends);
|
|
send_map_starts = hypre_CTAlloc(int, num_sends+1);
|
|
send_map_elmts = hypre_CTAlloc(int, num_elmts);
|
|
row_marker = hypre_CTAlloc(int,num_rows_diag);
|
|
|
|
index = 0;
|
|
index2 = 0;
|
|
send_map_starts[0] = 0;
|
|
for (i=0; i < num_procs; i++) {
|
|
send_map_starts[index+1] = send_map_starts[index];
|
|
j = displs[i];
|
|
pmatch = 0;
|
|
for ( ir=0; ir<num_rows_diag; ++ir ) row_marker[ir] = 0;
|
|
while ( j < displs[i+1])
|
|
{
|
|
num_elmts = recv_buf[j++]; /* no. of columns proc. i wants */
|
|
for ( k=0; k<num_elmts; k++ ) {
|
|
col = recv_buf[j++]; /* a global column no. at proc. i */
|
|
for ( kc=0; kc<num_cols_offd; kc++ ) {
|
|
if ( col_map_offd[kc]==col && i!=my_id ) {
|
|
/* this processor has the same column as proc. i (but is different) */
|
|
pmatch = 1;
|
|
send_procs[index] = i;
|
|
/* this would be right if we could send columns, but we can't ...
|
|
offset = first_col_diag;
|
|
++send_map_starts[index+1];
|
|
send_map_elmts[index2++] = col - offset; */
|
|
/* Plan to send all of my rows which use this column... */
|
|
RowsWithColumn( &rowmin, &rowmax, col,
|
|
num_rows_diag,
|
|
firstColDiag, colMapOffd,
|
|
mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd
|
|
);
|
|
for ( ir=rowmin; ir<=rowmax; ++ir ) {
|
|
if ( row_marker[ir]==0 ) {
|
|
row_marker[ir] = 1;
|
|
++send_map_starts[index+1];
|
|
send_map_elmts[index2++] = ir;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* alternative way of doing the following for-loop:
|
|
for ( kc=0; kc<num_cols_diag; kc++ ) {
|
|
if ( kc+first_col_diag==col && i!=my_id ) {
|
|
/ * this processor has the same column as proc. i (but is different) * /
|
|
pmatch = 1;
|
|
/ * this would be right if we could send columns, but we can't ... >>> * /
|
|
send_procs[index] = i;
|
|
++send_map_starts[index+1];
|
|
send_map_elmts[index2++] = col - offset;
|
|
/ * Plan to send all of my rows which use this column... * /
|
|
/ * NOT DONE * /
|
|
}
|
|
}
|
|
*/
|
|
for ( kc=row_starts[my_id]; kc<row_starts[my_id+1]; kc++ ) {
|
|
if ( kc==col && i!=my_id ) {
|
|
/* this processor has the same column as proc. i (but is different) */
|
|
pmatch = 1;
|
|
send_procs[index] = i;
|
|
/* this would be right if we could send columns, but we can't ... >>>
|
|
++send_map_starts[index+1];
|
|
send_map_elmts[index2++] = col - offset;*/
|
|
/* Plan to send all of my rows which use this column... */
|
|
RowsWithColumn( &rowmin, &rowmax, col,
|
|
num_rows_diag,
|
|
firstColDiag, colMapOffd,
|
|
mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd
|
|
);
|
|
for ( ir=rowmin; ir<=rowmax; ++ir ) {
|
|
if ( row_marker[ir]==0 ) {
|
|
row_marker[ir] = 1;
|
|
++send_map_starts[index+1];
|
|
send_map_elmts[index2++] = ir;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if ( pmatch ) index++;
|
|
}
|
|
num_sends = index; /* no. of proc. rows will be sent to */
|
|
|
|
/* Compute receive arrays recv_procs, recv_vec_starts ... */
|
|
recv_procs = hypre_CTAlloc(int, num_recvs);
|
|
recv_vec_starts = hypre_CTAlloc(int, num_recvs+1);
|
|
j2 = 0;
|
|
for (i=0; i < num_procs; i++) {
|
|
if ( i!=my_id ) { recv_procs[j2] = i; j2++; };
|
|
};
|
|
|
|
/* Compute recv_vec_starts.
|
|
The real job is, for each processor p, to figure out how many rows
|
|
p will send to me (me=this processor). I now know how many (and which)
|
|
rows I will send to each p. Indeed, if send_procs[index]=p, then the
|
|
number is send_map_starts[index+1]-send_map_starts[index].
|
|
More communication is needed.
|
|
options:
|
|
MPI_Allgather of communication sizes. <--- my choice, for now
|
|
good: simple bad: send num_procs*num_sends data, only need num_procs
|
|
but: not that much data compared to previous communication
|
|
MPI_Allgatherv of communication sizes, only for pairs of procs. that communicate
|
|
good: less data than above bad: need extra commun. step to get recvcounts
|
|
MPI_ISend,MPI_IRecv of each size, separately between each pair of procs.
|
|
good: no excess data sent bad: lots of little messages
|
|
but: Allgather might be done the same under the hood
|
|
may be much slower than Allgather or may be a bit faster depending on
|
|
implementations
|
|
*/
|
|
send_buf = hypre_CTAlloc( int, 3*num_sends );
|
|
all_num_sends3 = hypre_CTAlloc( int, num_procs );
|
|
|
|
/* scatter-gather num_sends, to set up the size for the main comm. step */
|
|
i = 3*num_sends;
|
|
MPI_Allgather( &i, 1, MPI_INT, all_num_sends3, 1, MPI_INT, comm );
|
|
displs[0] = 0;
|
|
for ( p=0; p<num_procs; ++p ) {
|
|
displs[p+1] = displs[p] + all_num_sends3[p];
|
|
};
|
|
recv_sz_buf = hypre_CTAlloc( int, displs[num_procs] );
|
|
|
|
/* scatter-gather size of row info to send, and proc. to send to */
|
|
index = 0;
|
|
for ( i=0; i<num_sends; ++i ) {
|
|
send_buf[index++] = send_procs[i]; /* processor to send to */
|
|
send_buf[index++] = my_id;
|
|
send_buf[index++] = send_map_starts[i+1] - send_map_starts[i];
|
|
/* ... sizes of info to send */
|
|
};
|
|
|
|
MPI_Allgatherv( send_buf, 3*num_sends, MPI_INT,
|
|
recv_sz_buf, all_num_sends3, displs, MPI_INT, comm);
|
|
|
|
recv_vec_starts[0] = 0;
|
|
j2 = 0; j = 0;
|
|
for ( i=0; i<displs[num_procs]; i=i+3 ) {
|
|
j = i;
|
|
if ( recv_sz_buf[j++]==my_id ) {
|
|
recv_procs[j2] = recv_sz_buf[j++];
|
|
recv_vec_starts[j2+1] = recv_vec_starts[j2] + recv_sz_buf[j++];
|
|
j2++;
|
|
}
|
|
}
|
|
num_recvs = j2;
|
|
|
|
#if 0
|
|
printf("num_procs=%i send_map_starts (%i):",num_procs,num_sends+1);
|
|
for( i=0; i<=num_sends; ++i ) printf(" %i", send_map_starts[i] );
|
|
printf(" send_procs (%i):",num_sends);
|
|
for( i=0; i<num_sends; ++i ) printf(" %i", send_procs[i] );
|
|
printf("\n");
|
|
printf("my_id=%i num_sends=%i send_buf[0,1,2]=%i %i %i",
|
|
my_id, num_sends, send_buf[0], send_buf[1], send_buf[2] );
|
|
printf(" all_num_sends3[0,1]=%i %i\n", all_num_sends3[0], all_num_sends3[1] );
|
|
printf("my_id=%i rcv_sz_buf (%i):", my_id, displs[num_procs] );
|
|
for( i=0; i<displs[num_procs]; ++i ) printf(" %i", recv_sz_buf[i] );
|
|
printf("\n");
|
|
printf("my_id=%i recv_vec_starts (%i):",my_id,num_recvs+1);
|
|
for( i=0; i<=num_recvs; ++i ) printf(" %i", recv_vec_starts[i] );
|
|
printf(" recv_procs (%i):",num_recvs);
|
|
for( i=0; i<num_recvs; ++i ) printf(" %i", recv_procs[i] );
|
|
printf("\n");
|
|
printf("my_id=%i num_recvs=%i recv_sz_buf[0,1,2]=%i %i %i\n",
|
|
my_id, num_recvs, recv_sz_buf[0], recv_sz_buf[1], recv_sz_buf[2] );
|
|
#endif
|
|
|
|
hypre_TFree(send_buf);
|
|
hypre_TFree(all_num_sends3);
|
|
hypre_TFree(tmp);
|
|
hypre_TFree(recv_buf);
|
|
hypre_TFree(displs);
|
|
hypre_TFree(info);
|
|
hypre_TFree(recv_sz_buf);
|
|
hypre_TFree(row_marker);
|
|
|
|
|
|
/* finish up with the hand-coded call-by-reference... */
|
|
*p_num_recvs = num_recvs;
|
|
*p_recv_procs = recv_procs;
|
|
*p_recv_vec_starts = recv_vec_starts;
|
|
*p_num_sends = num_sends;
|
|
*p_send_procs = send_procs;
|
|
*p_send_map_starts = send_map_starts;
|
|
*p_send_map_elmts = send_map_elmts;
|
|
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
* hypre_MatTCommPkgCreate
|
|
* generates a special comm_pkg for A - for use in multiplying by its
|
|
* transpose, A * A^T
|
|
* if no row and/or column partitioning is given, the routine determines
|
|
* them with MPE_Decomp1d
|
|
* ---------------------------------------------------------------------*/
|
|
|
|
int
|
|
hypre_MatTCommPkgCreate ( hypre_ParCSRMatrix *A)
|
|
{
|
|
hypre_ParCSRCommPkg *comm_pkg;
|
|
|
|
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
|
/* MPI_Datatype *recv_mpi_types;
|
|
MPI_Datatype *send_mpi_types;
|
|
*/
|
|
int num_sends;
|
|
int *send_procs;
|
|
int *send_map_starts;
|
|
int *send_map_elmts;
|
|
int num_recvs;
|
|
int *recv_procs;
|
|
int *recv_vec_starts;
|
|
|
|
int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A);
|
|
int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A);
|
|
int *col_starts = hypre_ParCSRMatrixColStarts(A);
|
|
|
|
int ierr = 0;
|
|
int num_rows_diag = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A));
|
|
int num_cols_diag = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A));
|
|
int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A));
|
|
int * row_starts = hypre_ParCSRMatrixRowStarts(A);
|
|
|
|
hypre_MatTCommPkgCreate_core (
|
|
comm, col_map_offd, first_col_diag, col_starts,
|
|
num_rows_diag, num_cols_diag, num_cols_offd, row_starts,
|
|
hypre_ParCSRMatrixFirstColDiag(A),
|
|
hypre_ParCSRMatrixColMapOffd(A),
|
|
hypre_CSRMatrixI( hypre_ParCSRMatrixDiag(A) ),
|
|
hypre_CSRMatrixJ( hypre_ParCSRMatrixDiag(A) ),
|
|
hypre_CSRMatrixI( hypre_ParCSRMatrixOffd(A) ),
|
|
hypre_CSRMatrixJ( hypre_ParCSRMatrixOffd(A) ),
|
|
1,
|
|
&num_recvs, &recv_procs, &recv_vec_starts,
|
|
&num_sends, &send_procs, &send_map_starts,
|
|
&send_map_elmts
|
|
);
|
|
|
|
comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1);
|
|
|
|
hypre_ParCSRCommPkgComm(comm_pkg) = comm;
|
|
|
|
hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs;
|
|
hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs;
|
|
hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts;
|
|
hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends;
|
|
hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs;
|
|
hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts;
|
|
hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts;
|
|
|
|
hypre_ParCSRMatrixCommPkgT(A) = comm_pkg;
|
|
|
|
return ierr;
|
|
}
|