Bug fixes involving mixedint option and CUDA build (#783)

This PR fixes a few variable types inconsistencies arisen from the mixedint build. Additionally, it fixes the CUDA-11.1.1 build.

* Fix cuSPARSE version tag for using generic SpMM and new SpMV algorithms
* Bug fixes on hypre_ILU: S_row_starts computation and m -> big_m
* Bug fix: HYPRE_MPI_REAL -> HYPRE_MPI_COMPLEX
* Bug fix: HYPRE_Int -> HYPRE_BigInt
* Bug fix: HYPRE_MPI_INT -> HYPRE_MPI_BIG_INT

Co-authored-by: TotoGaz <49004943+TotoGaz@users.noreply.github.com>
This commit is contained in:
Victor A. Paludetto Magri 2022-12-07 19:52:15 -08:00 committed by GitHub
parent af9c59cc34
commit 8f51e49402
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 90 additions and 95 deletions

View File

@ -2562,7 +2562,7 @@ hypre_BoomerAMGIndepPMISa( hypre_ParCSRMatrix *S,
HYPRE_BigInt big_graph_size = (HYPRE_BigInt) graph_size; HYPRE_BigInt big_graph_size = (HYPRE_BigInt) graph_size;
/* stop the coarsening if nothing left to be coarsened */ /* stop the coarsening if nothing left to be coarsened */
hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
if (global_graph_size == 0) if (global_graph_size == 0)
{ {

View File

@ -934,7 +934,7 @@ hypre_ILUSetup( void *ilu_vdata,
HYPRE_Int m = n - nLU; HYPRE_Int m = n - nLU;
HYPRE_BigInt S_total_rows, S_row_starts[2]; HYPRE_BigInt S_total_rows, S_row_starts[2];
HYPRE_BigInt big_m = (HYPRE_BigInt)m; HYPRE_BigInt big_m = (HYPRE_BigInt)m;
hypre_MPI_Allreduce( &big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
if ( S_total_rows > 0 ) if ( S_total_rows > 0 )
{ {
@ -952,8 +952,8 @@ hypre_ILUSetup( void *ilu_vdata,
/* only do so when we hae the Schur Complement */ /* only do so when we hae the Schur Complement */
{ {
HYPRE_BigInt global_start; HYPRE_BigInt global_start;
hypre_MPI_Scan( &big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
S_row_starts[0] = global_start - m; S_row_starts[0] = global_start - big_m;
S_row_starts[1] = global_start; S_row_starts[1] = global_start;
} }
@ -3353,20 +3353,17 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_Int *perm, HYPRE_Int n,
HYPRE_BigInt S_total_rows, S_row_starts[2]; HYPRE_BigInt S_total_rows, S_row_starts[2];
HYPRE_BigInt big_m = (HYPRE_BigInt)m; HYPRE_BigInt big_m = (HYPRE_BigInt)m;
hypre_MPI_Allreduce( &big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
if (S_total_rows > 0) if (S_total_rows > 0)
{ {
{ {
HYPRE_BigInt global_start; HYPRE_BigInt global_start;
hypre_MPI_Scan( &big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
S_row_starts[0] = global_start - m; S_row_starts[0] = global_start - big_m;
S_row_starts[1] = global_start; S_row_starts[1] = global_start;
} }
S_row_starts[1] = S_total_rows;
S_row_starts[0] = S_total_rows - m;
hypre_MPI_Allreduce(&m, &S_total_rows, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
S = hypre_ParCSRMatrixCreate( hypre_ParCSRMatrixComm(A), S = hypre_ParCSRMatrixCreate( hypre_ParCSRMatrixComm(A),
S_total_rows, S_total_rows,
S_total_rows, S_total_rows,

View File

@ -393,9 +393,10 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A,
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), v_buf_data = hypre_CTAlloc(HYPRE_Complex,
hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
HYPRE_MEMORY_HOST); HYPRE_MEMORY_HOST);
v_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST); v_ext_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd, HYPRE_MEMORY_HOST);
status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST); status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST);
requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST); requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST);
@ -420,7 +421,8 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A,
{ {
v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)];
} }
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]); hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
comm, &requests[jr++]);
} }
} }
hypre_MPI_Waitall(jr, requests, status); hypre_MPI_Waitall(jr, requests, status);
@ -435,7 +437,8 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A,
ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]); hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
comm, &requests[jr++]);
} }
hypre_MPI_Waitall(jr, requests, status); hypre_MPI_Waitall(jr, requests, status);
} }
@ -522,9 +525,10 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A,
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), v_buf_data = hypre_CTAlloc(HYPRE_Complex,
hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
HYPRE_MEMORY_HOST); HYPRE_MEMORY_HOST);
v_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST); v_ext_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd, HYPRE_MEMORY_HOST);
status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST); status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST);
requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST); requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST);
@ -568,7 +572,8 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A,
{ {
v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)];
} }
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]); hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
comm, &requests[jr++]);
} }
} }
hypre_MPI_Waitall(jr, requests, status); hypre_MPI_Waitall(jr, requests, status);
@ -583,7 +588,8 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A,
ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]); hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
comm, &requests[jr++]);
} }
hypre_MPI_Waitall(jr, requests, status); hypre_MPI_Waitall(jr, requests, status);
} }

View File

@ -194,7 +194,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
HYPRE_BigInt *big_buf_data = NULL; HYPRE_BigInt *big_buf_data = NULL;
HYPRE_Real *dbl_buf_data = NULL; HYPRE_Real *dbl_buf_data = NULL;
HYPRE_Int g_nc; HYPRE_BigInt g_nc;
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A); HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
@ -324,11 +324,12 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
/* for communication */ /* for communication */
num_sends_A = hypre_ParCSRCommPkgNumSends(comm_pkg_A); num_sends_A = hypre_ParCSRCommPkgNumSends(comm_pkg_A);
int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, int_buf_data = hypre_CTAlloc(HYPRE_Int,
num_sends_A), HYPRE_MEMORY_HOST); hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A),
big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, HYPRE_MEMORY_HOST);
num_sends_A), HYPRE_MEMORY_HOST); big_buf_data = hypre_CTAlloc(HYPRE_BigInt,
hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A),
HYPRE_MEMORY_HOST);
/*----------------------------------------------------------------------- /*-----------------------------------------------------------------------
* create and send and receive fine_to_coarse info. * create and send and receive fine_to_coarse info.
@ -338,12 +339,14 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
HYPRE_BigInt my_first_cpt; HYPRE_BigInt my_first_cpt;
HYPRE_Int tmp_i; HYPRE_Int tmp_i;
my_first_cpt = num_cpts_global[0]; my_first_cpt = num_cpts_global[0];
/* create the fine to coarse and coarse to fine*/ /* create the fine to coarse and coarse to fine */
fine_to_coarse = hypre_CTAlloc(HYPRE_Int, num_rows_P, HYPRE_MEMORY_HOST); fine_to_coarse = hypre_CTAlloc(HYPRE_Int, num_rows_P, HYPRE_MEMORY_HOST);
for (i = 0; i < num_rows_P; i++) { fine_to_coarse[i] = -1; } for (i = 0; i < num_rows_P; i++)
{
fine_to_coarse[i] = -1;
}
coarse_to_fine = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST); coarse_to_fine = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST);
@ -367,14 +370,12 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i); start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i);
for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++) for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++)
{ {
tmp_i = fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j)]; tmp_i = fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j)];
big_buf_data[index++] = (HYPRE_BigInt)tmp_i + my_first_cpt; /* makes it global*/ big_buf_data[index++] = (HYPRE_BigInt)tmp_i + my_first_cpt; /* makes it global*/
} }
} }
comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg_A, big_buf_data, comm_handle = hypre_ParCSRCommHandleCreate(21, comm_pkg_A, big_buf_data,
fine_to_coarse_offd); fine_to_coarse_offd);
hypre_ParCSRCommHandleDestroy(comm_handle); hypre_ParCSRCommHandleDestroy(comm_handle);
@ -384,7 +385,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
* Get the CF_marker data for the off-processor columns of A * Get the CF_marker data for the off-processor columns of A
*-------------------------------------------------------------------*/ *-------------------------------------------------------------------*/
{ {
if (num_cols_A_offd) if (num_cols_A_offd)
{ {
CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST); CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
@ -405,7 +405,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
} }
} }
comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg_A, int_buf_data, comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg_A, int_buf_data,
CF_marker_offd); CF_marker_offd);
hypre_ParCSRCommHandleDestroy(comm_handle); hypre_ParCSRCommHandleDestroy(comm_handle);
@ -422,7 +422,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
} }
} }
comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg_A, int_buf_data, comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg_A, int_buf_data,
dof_func_offd); dof_func_offd);
hypre_ParCSRCommHandleDestroy(comm_handle); hypre_ParCSRCommHandleDestroy(comm_handle);
@ -437,7 +437,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
HYPRE_Int kc; HYPRE_Int kc;
HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstColDiag(*P); HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstColDiag(*P);
HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)hypre_CSRMatrixNumCols(P_diag); HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) hypre_CSRMatrixNumCols(P_diag);
if (num_procs > 1) if (num_procs > 1)
{ {
@ -482,9 +482,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
P_ext_i[i] = P_ext_i[i - 1]; P_ext_i[i] = P_ext_i[i - 1];
} }
if (num_procs > 1) { P_ext_i[0] = 0; } if (num_procs > 1)
{
P_ext_i[0] = 0;
}
} /* end of ghost rows */ } /* end of ghost rows */
/*------------------------------------------------------------------- /*-------------------------------------------------------------------
@ -517,7 +518,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
new_nnz_diag = nnz_diag + nnz_diag * num_smooth_vecs; new_nnz_diag = nnz_diag + nnz_diag * num_smooth_vecs;
new_nnz_offd = nnz_offd + nnz_offd * num_smooth_vecs; new_nnz_offd = nnz_offd + nnz_offd * num_smooth_vecs;
/* new number of coarse variables */ /* new number of coarse variables */
if (level == interp_vec_first_level ) if (level == interp_vec_first_level )
{ {
@ -528,7 +528,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
new_ncv = ncv; /* unchanged on level > first_level */ new_ncv = ncv; /* unchanged on level > first_level */
} }
/* allocations */ /* allocations */
P_diag_j_new = hypre_CTAlloc(HYPRE_Int, new_nnz_diag, memory_location_P); P_diag_j_new = hypre_CTAlloc(HYPRE_Int, new_nnz_diag, memory_location_P);
P_diag_data_new = hypre_CTAlloc(HYPRE_Real, new_nnz_diag, memory_location_P); P_diag_data_new = hypre_CTAlloc(HYPRE_Real, new_nnz_diag, memory_location_P);
@ -542,7 +541,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
P_diag_i_new[0] = P_diag_i[0]; P_diag_i_new[0] = P_diag_i[0];
P_offd_i_new[0] = P_offd_i[0]; P_offd_i_new[0] = P_offd_i[0];
/* doing truncation? if so, need some more allocations*/ /* doing truncation? if so, need some more allocations*/
if (q_max > 0 || abs_trunc > 0.0) if (q_max > 0 || abs_trunc > 0.0)
{ {
@ -567,7 +565,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
{ {
HYPRE_Int fine_index; HYPRE_Int fine_index;
smooth_vec_offd = hypre_CTAlloc(HYPRE_Real, num_cols_A_offd * num_smooth_vecs, HYPRE_MEMORY_HOST); smooth_vec_offd = hypre_CTAlloc(HYPRE_Real,
num_cols_A_offd * num_smooth_vecs,
HYPRE_MEMORY_HOST);
/* for now, do a seperate comm for each smooth vector */ /* for now, do a seperate comm for each smooth vector */
for (k = 0; k < num_smooth_vecs; k++) for (k = 0; k < num_smooth_vecs; k++)
@ -576,8 +576,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
vector = smooth_vecs[k]; vector = smooth_vecs[k];
vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector)); vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector));
dbl_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, dbl_buf_data = hypre_CTAlloc(HYPRE_Real,
num_sends_A), HYPRE_MEMORY_HOST); hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A),
HYPRE_MEMORY_HOST);
/* point into smooth_vec_offd */ /* point into smooth_vec_offd */
offd_vec_data = smooth_vec_offd + k * num_cols_A_offd; offd_vec_data = smooth_vec_offd + k * num_cols_A_offd;
@ -587,23 +589,19 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i); start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i);
for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++) for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++)
{ {
fine_index = hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j); fine_index = hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j);
dbl_buf_data[index++] = vec_data[fine_index]; dbl_buf_data[index++] = vec_data[fine_index];
} }
} }
comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg_A, dbl_buf_data, comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg_A, dbl_buf_data, offd_vec_data);
offd_vec_data);
hypre_ParCSRCommHandleDestroy(comm_handle); hypre_ParCSRCommHandleDestroy(comm_handle);
hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST); hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST);
} /* end of smooth vecs */ } /* end of smooth vecs */
}/*end num procs > 1 */ }/*end num procs > 1 */
/*------------------------------------------------------------------- /*-------------------------------------------------------------------
* Get smooth vec components for the off-processor columns of P * Get smooth vec components for the off-processor columns of P
* TO Do: would be less storage to get the offd coarse to fine * TO Do: would be less storage to get the offd coarse to fine
@ -613,19 +611,21 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
if (num_procs > 1) if (num_procs > 1)
{ {
HYPRE_Int c_index, fine_index; HYPRE_Int c_index, fine_index;
smooth_vec_offd_P = hypre_CTAlloc(HYPRE_Real, num_cols_P_offd * num_smooth_vecs, smooth_vec_offd_P = hypre_CTAlloc(HYPRE_Real,
num_cols_P_offd * num_smooth_vecs,
HYPRE_MEMORY_HOST); HYPRE_MEMORY_HOST);
/* for now, do a seperate comm for each smooth vector */ /* for now, do a seperate comm for each smooth vector */
for (k = 0; k < num_smooth_vecs; k++) for (k = 0; k < num_smooth_vecs; k++)
{ {
vector = smooth_vecs[k]; vector = smooth_vecs[k];
vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector)); vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector));
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg_P); num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg_P);
dbl_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg_P, dbl_buf_data = hypre_CTAlloc(HYPRE_Real,
num_sends), HYPRE_MEMORY_HOST); hypre_ParCSRCommPkgSendMapStart(comm_pkg_P, num_sends),
HYPRE_MEMORY_HOST);
/* point into smooth_vec_offd_P */ /* point into smooth_vec_offd_P */
offd_vec_data_P = smooth_vec_offd_P + k * num_cols_P_offd; offd_vec_data_P = smooth_vec_offd_P + k * num_cols_P_offd;
@ -640,24 +640,20 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
fine_index = coarse_to_fine[c_index]; fine_index = coarse_to_fine[c_index];
dbl_buf_data[index++] = vec_data[fine_index]; dbl_buf_data[index++] = vec_data[fine_index];
} }
} }
comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg_P, dbl_buf_data, comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg_P, dbl_buf_data,
offd_vec_data_P); offd_vec_data_P);
hypre_ParCSRCommHandleDestroy(comm_handle); hypre_ParCSRCommHandleDestroy(comm_handle);
hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST); hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST);
} }
} /*end num procs > 1 */
}/*end num procs > 1 */
/*------------------------------------------------------------------- /*-------------------------------------------------------------------
* Main loop! * Main loop!
*-------------------------------------------------------------------*/ *-------------------------------------------------------------------*/
/******** loop through rows - only operate on rows of original functions******/ /******** loop through rows - only operate on rows of original functions******/
j_diag_pos = 0; j_diag_pos = 0;
@ -699,12 +695,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
orig_diag_start = P_diag_i[i]; orig_diag_start = P_diag_i[i];
orig_offd_start = P_offd_i[i]; orig_offd_start = P_offd_i[i];
/* if original function dofs? or a new one that we don't want /* if original function dofs? or a new one that we don't want
* to modify*/ * to modify*/
if (fcn_num < orig_nf || modify == 0 ) if (fcn_num < orig_nf || modify == 0 )
{ {
/* for this row, will we add q entries ? */ /* for this row, will we add q entries ? */
if (fcn_num < orig_nf && num_smooth_vecs) if (fcn_num < orig_nf && num_smooth_vecs)
{ {
@ -748,7 +742,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
* use col_map[index]*/ * use col_map[index]*/
if (num_smooth_vecs && (level == interp_vec_first_level)) if (num_smooth_vecs && (level == interp_vec_first_level))
{ {
big_new_col = big_index + (big_index / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)num_smooth_vecs; big_new_col = big_index +
(big_index / (HYPRE_BigInt)num_functions) *
(HYPRE_BigInt)num_smooth_vecs;
} }
else /* no adjustment */ else /* no adjustment */
{ {
@ -772,7 +768,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
/* orig_row_sum += P_diag_data[orig_diag_start + j]; */ /* orig_row_sum += P_diag_data[orig_diag_start + j]; */
P_diag_data_new[j_diag_pos] = 0.0; P_diag_data_new[j_diag_pos] = 0.0;
new_col = col_map[ P_diag_j[orig_diag_start + j]]; new_col = col_map[P_diag_j[orig_diag_start + j]];
P_diag_j_new[j_diag_pos] = new_col; P_diag_j_new[j_diag_pos] = new_col;
j_diag_pos++; j_diag_pos++;
@ -800,10 +796,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
num_new_p_diag++; num_new_p_diag++;
q_count++; q_count++;
p_count_diag++; p_count_diag++;
}
}
}
}
}
}
/* offd */ /* offd */
p_count_offd = p_count_diag; /* for indexing into is_q*/ p_count_offd = p_count_diag; /* for indexing into is_q*/
for (j = 0; j < p_num_offd_elements; j++) for (j = 0; j < p_num_offd_elements; j++)
@ -858,7 +854,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
num_new_p_offd++; num_new_p_offd++;
q_count++; q_count++;
p_count_offd++; p_count_offd++;
} }
} }
} }
@ -884,7 +879,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
{ {
r_extra[k] += A_diag_data[jj] * vec_data[i1]; r_extra[k] += A_diag_data[jj] * vec_data[i1];
} }
} }
offd_vec_data = smooth_vec_offd + k * num_cols_A_offd; offd_vec_data = smooth_vec_offd + k * num_cols_A_offd;
@ -910,7 +904,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
{ {
af_sum += A_diag_data[jj]; af_sum += A_diag_data[jj];
} }
} }
for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++) for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++)
{ {
@ -919,7 +912,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
{ {
af_sum += A_offd_data[jj]; af_sum += A_offd_data[jj];
} }
} }
if (af_sum != 0.0) if (af_sum != 0.0)
@ -935,7 +927,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
} }
/* now we will use the adjustment later */ /* now we will use the adjustment later */
/* now if we have any coarse connections with no /* now if we have any coarse connections with no
corresponding point in orig p, then these we have to corresponding point in orig p, then these we have to
distibute and treat as fine, basically*/ distibute and treat as fine, basically*/
@ -985,6 +976,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
} }
} }
} /* end diag loop */ } /* end diag loop */
/* now offd loop */ /* now offd loop */
for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++) for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++)
{ {
@ -1103,7 +1095,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
/* if (a_ij*d_sign > 0) /* if (a_ij*d_sign > 0)
continue;*/ continue;*/
found = 0; found = 0;
if (CF_marker[jj_point] >= 0) /*coarse*/ if (CF_marker[jj_point] >= 0) /*coarse*/
{ {
@ -1226,7 +1217,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
P_diag_data_new[cur_spot + k + 1] += q_val; P_diag_data_new[cur_spot + k + 1] += q_val;
} }
} }
kk_count++; kk_count++;
} /* did each element of p_diag */ } /* did each element of p_diag */
@ -2479,7 +2469,8 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
HYPRE_Int new_nf; HYPRE_Int new_nf;
c_dof_func = hypre_TReAlloc_v2(c_dof_func, HYPRE_Int, hypre_IntArraySize(*coarse_dof_func), c_dof_func = hypre_TReAlloc_v2(c_dof_func, HYPRE_Int, hypre_IntArraySize(*coarse_dof_func),
HYPRE_Int, new_ncv, hypre_IntArrayMemoryLocation(*coarse_dof_func)); HYPRE_Int, new_ncv,
hypre_IntArrayMemoryLocation(*coarse_dof_func));
cur_spot = 0; cur_spot = 0;
for (i = 0; i < ncv_peru; i++) for (i = 0; i < ncv_peru; i++)
{ {
@ -2494,15 +2485,17 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
hypre_IntArrayData(*coarse_dof_func) = c_dof_func; hypre_IntArrayData(*coarse_dof_func) = c_dof_func;
hypre_IntArraySize(*coarse_dof_func) = new_ncv; hypre_IntArraySize(*coarse_dof_func) = new_ncv;
/* also we need to update the col starts and global num columns*/ /* also we need to update the col starts and global num columns*/
/* assumes that unknowns are together on a procsessor with /* assumes that unknowns are together on a procsessor with
* nodal coarsening */ * nodal coarsening */
new_col_starts[0] = (col_starts[0] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf ; new_col_starts[0] = (col_starts[0] / (HYPRE_BigInt) num_functions) * (HYPRE_BigInt) new_nf;
new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf; new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt) num_functions) * (HYPRE_BigInt) new_nf;
if (myid == (num_procs - 1)) { g_nc = new_col_starts[1]; } if (myid == (num_procs - 1))
{
g_nc = new_col_starts[1];
}
hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm);
} }
else /* not first level */ else /* not first level */

View File

@ -272,10 +272,10 @@ hypre_MatTCommPkgCreate_core (
} }
} }
hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT, recv_buf, info, displs, HYPRE_MPI_INT, hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT,
recv_buf, info, displs, HYPRE_MPI_BIG_INT,
comm); comm);
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
* determine send_procs and actual elements to be send (in send_map_elmts) * determine send_procs and actual elements to be send (in send_map_elmts)
* and send_map_starts whose i-th entry points to the beginning of the * and send_map_starts whose i-th entry points to the beginning of the

View File

@ -851,7 +851,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix
a_i = hypre_CSRBooleanMatrix_Get_I(A); a_i = hypre_CSRBooleanMatrix_Get_I(A);
a_j = hypre_CSRBooleanMatrix_Get_J(A); a_j = hypre_CSRBooleanMatrix_Get_J(A);
} }
hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_INT, 0, comm); hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_BIG_INT, 0, comm);
global_num_rows = global_data[0]; global_num_rows = global_data[0];
global_num_cols = global_data[1]; global_num_cols = global_data[1];

View File

@ -535,7 +535,7 @@ hypre_VectorToParVector ( MPI_Comm comm,
global_vecstride = hypre_VectorVectorStride(v); global_vecstride = hypre_VectorVectorStride(v);
} }
hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_INT, 0, comm); hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_BIG_INT, 0, comm);
hypre_MPI_Bcast(&num_vectors, 1, HYPRE_MPI_INT, 0, comm); hypre_MPI_Bcast(&num_vectors, 1, HYPRE_MPI_INT, 0, comm);
hypre_MPI_Bcast(&global_vecstride, 1, HYPRE_MPI_INT, 0, comm); hypre_MPI_Bcast(&global_vecstride, 1, HYPRE_MPI_INT, 0, comm);

View File

@ -108,8 +108,8 @@ void hypre_ParChordMatrix_RowStarts(
/* Global number of columns */ /* Global number of columns */
/* hypre_MPI_Allreduce( &num_rdofs, global_num_cols, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm );*/ /* hypre_MPI_Allreduce( &num_rdofs, global_num_cols, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm );*/
hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MIN, comm ); hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_BIG_INT, hypre_MPI_MIN, comm );
hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm ); hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_BIG_INT, hypre_MPI_MAX, comm );
*global_num_cols = global_max_rdof - global_min_rdof; *global_num_cols = global_max_rdof - global_min_rdof;
} }

View File

@ -91,7 +91,7 @@ using hypre_DeviceItem = void*;
#endif #endif
#define CUSPARSE_NEWAPI_VERSION 11000 #define CUSPARSE_NEWAPI_VERSION 11000
#define CUSPARSE_NEWSPMM_VERSION 11201 #define CUSPARSE_NEWSPMM_VERSION 11401
#define CUDA_MALLOCASYNC_VERSION 11020 #define CUDA_MALLOCASYNC_VERSION 11020
#define THRUST_CALL_BLOCKING 1 #define THRUST_CALL_BLOCKING 1
@ -2793,4 +2793,3 @@ struct hypre_cub_CachingDeviceAllocator
#endif #endif
#endif #endif

View File

@ -39,7 +39,7 @@ using hypre_DeviceItem = void*;
#endif #endif
#define CUSPARSE_NEWAPI_VERSION 11000 #define CUSPARSE_NEWAPI_VERSION 11000
#define CUSPARSE_NEWSPMM_VERSION 11201 #define CUSPARSE_NEWSPMM_VERSION 11401
#define CUDA_MALLOCASYNC_VERSION 11020 #define CUDA_MALLOCASYNC_VERSION 11020
#define THRUST_CALL_BLOCKING 1 #define THRUST_CALL_BLOCKING 1