From 8f51e494024be1f9738f7b2360cfb7fa73c3e13a Mon Sep 17 00:00:00 2001 From: "Victor A. Paludetto Magri" <50467563+victorapm@users.noreply.github.com> Date: Wed, 7 Dec 2022 19:52:15 -0800 Subject: [PATCH] Bug fixes involving mixedint option and CUDA build (#783) This PR fixes a few variable types inconsistencies arisen from the mixedint build. Additionally, it fixes the CUDA-11.1.1 build. * Fix cuSPARSE version tag for using generic SpMM and new SpMV algorithms * Bug fixes on hypre_ILU: S_row_starts computation and m -> big_m * Bug fix: HYPRE_MPI_REAL -> HYPRE_MPI_COMPLEX * Bug fix: HYPRE_Int -> HYPRE_BigInt * Bug fix: HYPRE_MPI_INT -> HYPRE_MPI_BIG_INT Co-authored-by: TotoGaz <49004943+TotoGaz@users.noreply.github.com> --- src/parcsr_ls/par_cr.c | 2 +- src/parcsr_ls/par_ilu_setup.c | 15 ++-- src/parcsr_ls/par_relax.c | 22 +++-- src/parcsr_ls/par_sv_interp_ln.c | 129 +++++++++++++--------------- src/parcsr_mv/communicationT.c | 4 +- src/parcsr_mv/par_csr_bool_matrix.c | 2 +- src/parcsr_mv/par_vector.c | 2 +- src/parcsr_mv/parchord_to_parcsr.c | 4 +- src/utilities/_hypre_utilities.hpp | 3 +- src/utilities/device_utils.h | 2 +- 10 files changed, 90 insertions(+), 95 deletions(-) diff --git a/src/parcsr_ls/par_cr.c b/src/parcsr_ls/par_cr.c index cd11ac908..42f16d22f 100644 --- a/src/parcsr_ls/par_cr.c +++ b/src/parcsr_ls/par_cr.c @@ -2562,7 +2562,7 @@ hypre_BoomerAMGIndepPMISa( hypre_ParCSRMatrix *S, HYPRE_BigInt big_graph_size = (HYPRE_BigInt) graph_size; /* stop the coarsening if nothing left to be coarsened */ - hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); if (global_graph_size == 0) { diff --git a/src/parcsr_ls/par_ilu_setup.c b/src/parcsr_ls/par_ilu_setup.c index aea83ff6c..fc7e6d5e2 100644 --- a/src/parcsr_ls/par_ilu_setup.c +++ b/src/parcsr_ls/par_ilu_setup.c @@ -934,7 +934,7 @@ hypre_ILUSetup( void *ilu_vdata, HYPRE_Int m = n - nLU; HYPRE_BigInt S_total_rows, S_row_starts[2]; HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce( &big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); if ( S_total_rows > 0 ) { @@ -952,8 +952,8 @@ hypre_ILUSetup( void *ilu_vdata, /* only do so when we hae the Schur Complement */ { HYPRE_BigInt global_start; - hypre_MPI_Scan( &big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); - S_row_starts[0] = global_start - m; + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + S_row_starts[0] = global_start - big_m; S_row_starts[1] = global_start; } @@ -3353,20 +3353,17 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_Int *perm, HYPRE_Int n, HYPRE_BigInt S_total_rows, S_row_starts[2]; HYPRE_BigInt big_m = (HYPRE_BigInt)m; - hypre_MPI_Allreduce( &big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); if (S_total_rows > 0) { { HYPRE_BigInt global_start; - hypre_MPI_Scan( &big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); - S_row_starts[0] = global_start - m; + hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm); + S_row_starts[0] = global_start - big_m; S_row_starts[1] = global_start; } - S_row_starts[1] = S_total_rows; - S_row_starts[0] = S_total_rows - m; - hypre_MPI_Allreduce(&m, &S_total_rows, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); S = hypre_ParCSRMatrixCreate( hypre_ParCSRMatrixComm(A), S_total_rows, S_total_rows, diff --git a/src/parcsr_ls/par_relax.c b/src/parcsr_ls/par_relax.c index 60b6d6a6d..1012babef 100644 --- a/src/parcsr_ls/par_relax.c +++ b/src/parcsr_ls/par_relax.c @@ -393,9 +393,10 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A, num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + v_buf_data = hypre_CTAlloc(HYPRE_Complex, + hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST); - v_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST); + v_ext_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd, HYPRE_MEMORY_HOST); status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST); requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST); @@ -420,7 +421,8 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A, { v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; } - hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]); + hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, + comm, &requests[jr++]); } } hypre_MPI_Waitall(jr, requests, status); @@ -435,7 +437,8 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A, ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]); + hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, + comm, &requests[jr++]); } hypre_MPI_Waitall(jr, requests, status); } @@ -522,9 +525,10 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A, num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); - v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), + v_buf_data = hypre_CTAlloc(HYPRE_Complex, + hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends), HYPRE_MEMORY_HOST); - v_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST); + v_ext_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd, HYPRE_MEMORY_HOST); status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST); requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST); @@ -568,7 +572,8 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A, { v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; } - hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]); + hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, + comm, &requests[jr++]); } } hypre_MPI_Waitall(jr, requests, status); @@ -583,7 +588,8 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A, ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i); vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i); vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start; - hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]); + hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0, + comm, &requests[jr++]); } hypre_MPI_Waitall(jr, requests, status); } diff --git a/src/parcsr_ls/par_sv_interp_ln.c b/src/parcsr_ls/par_sv_interp_ln.c index b9f58f672..f28c1d032 100644 --- a/src/parcsr_ls/par_sv_interp_ln.c +++ b/src/parcsr_ls/par_sv_interp_ln.c @@ -194,7 +194,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, HYPRE_BigInt *big_buf_data = NULL; HYPRE_Real *dbl_buf_data = NULL; - HYPRE_Int g_nc; + HYPRE_BigInt g_nc; HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A); @@ -324,11 +324,12 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, /* for communication */ num_sends_A = hypre_ParCSRCommPkgNumSends(comm_pkg_A); - int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, - num_sends_A), HYPRE_MEMORY_HOST); - big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, - num_sends_A), HYPRE_MEMORY_HOST); - + int_buf_data = hypre_CTAlloc(HYPRE_Int, + hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A), + HYPRE_MEMORY_HOST); + big_buf_data = hypre_CTAlloc(HYPRE_BigInt, + hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A), + HYPRE_MEMORY_HOST); /*----------------------------------------------------------------------- * create and send and receive fine_to_coarse info. @@ -338,14 +339,16 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, HYPRE_BigInt my_first_cpt; HYPRE_Int tmp_i; - my_first_cpt = num_cpts_global[0]; - /* create the fine to coarse and coarse to fine*/ - fine_to_coarse = hypre_CTAlloc(HYPRE_Int, num_rows_P, HYPRE_MEMORY_HOST); - for (i = 0; i < num_rows_P; i++) { fine_to_coarse[i] = -1; } + /* create the fine to coarse and coarse to fine */ + fine_to_coarse = hypre_CTAlloc(HYPRE_Int, num_rows_P, HYPRE_MEMORY_HOST); + for (i = 0; i < num_rows_P; i++) + { + fine_to_coarse[i] = -1; + } - coarse_to_fine = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST); + coarse_to_fine = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST); coarse_counter = 0; for (i = 0; i < num_rows_P; i++) @@ -367,15 +370,13 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++) { - tmp_i = fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j)]; big_buf_data[index++] = (HYPRE_BigInt)tmp_i + my_first_cpt; /* makes it global*/ } - } - comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg_A, big_buf_data, - fine_to_coarse_offd); + comm_handle = hypre_ParCSRCommHandleCreate(21, comm_pkg_A, big_buf_data, + fine_to_coarse_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } /* end fine to coarse {} */ @@ -384,7 +385,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, * Get the CF_marker data for the off-processor columns of A *-------------------------------------------------------------------*/ { - if (num_cols_A_offd) { CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST); @@ -405,8 +405,8 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, } } - comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg_A, int_buf_data, - CF_marker_offd); + comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg_A, int_buf_data, + CF_marker_offd); hypre_ParCSRCommHandleDestroy(comm_handle); if (num_functions > 1) @@ -422,8 +422,8 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, } } - comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg_A, int_buf_data, - dof_func_offd); + comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg_A, int_buf_data, + dof_func_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } @@ -437,7 +437,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, HYPRE_Int kc; HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstColDiag(*P); - HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)hypre_CSRMatrixNumCols(P_diag); + HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) hypre_CSRMatrixNumCols(P_diag); if (num_procs > 1) { @@ -482,9 +482,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, P_ext_i[i] = P_ext_i[i - 1]; } - if (num_procs > 1) { P_ext_i[0] = 0; } - - + if (num_procs > 1) + { + P_ext_i[0] = 0; + } } /* end of ghost rows */ /*------------------------------------------------------------------- @@ -493,7 +494,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, /* if level = first_level, we need to fix the col numbering to leave * space for the new unknowns */ - col_map = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST); + col_map = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST); if (num_smooth_vecs && level == interp_vec_first_level) { @@ -517,7 +518,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, new_nnz_diag = nnz_diag + nnz_diag * num_smooth_vecs; new_nnz_offd = nnz_offd + nnz_offd * num_smooth_vecs; - /* new number of coarse variables */ if (level == interp_vec_first_level ) { @@ -528,7 +528,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, new_ncv = ncv; /* unchanged on level > first_level */ } - /* allocations */ P_diag_j_new = hypre_CTAlloc(HYPRE_Int, new_nnz_diag, memory_location_P); P_diag_data_new = hypre_CTAlloc(HYPRE_Real, new_nnz_diag, memory_location_P); @@ -542,7 +541,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, P_diag_i_new[0] = P_diag_i[0]; P_offd_i_new[0] = P_offd_i[0]; - /* doing truncation? if so, need some more allocations*/ if (q_max > 0 || abs_trunc > 0.0) { @@ -567,7 +565,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, { HYPRE_Int fine_index; - smooth_vec_offd = hypre_CTAlloc(HYPRE_Real, num_cols_A_offd * num_smooth_vecs, HYPRE_MEMORY_HOST); + smooth_vec_offd = hypre_CTAlloc(HYPRE_Real, + num_cols_A_offd * num_smooth_vecs, + HYPRE_MEMORY_HOST); /* for now, do a seperate comm for each smooth vector */ for (k = 0; k < num_smooth_vecs; k++) @@ -576,8 +576,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, vector = smooth_vecs[k]; vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector)); - dbl_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, - num_sends_A), HYPRE_MEMORY_HOST); + dbl_buf_data = hypre_CTAlloc(HYPRE_Real, + hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A), + HYPRE_MEMORY_HOST); + /* point into smooth_vec_offd */ offd_vec_data = smooth_vec_offd + k * num_cols_A_offd; @@ -587,23 +589,19 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++) { - fine_index = hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j); dbl_buf_data[index++] = vec_data[fine_index]; } - } - comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg_A, dbl_buf_data, - offd_vec_data); + comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg_A, dbl_buf_data, offd_vec_data); hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST); } /* end of smooth vecs */ }/*end num procs > 1 */ - /*------------------------------------------------------------------- * Get smooth vec components for the off-processor columns of P * TO Do: would be less storage to get the offd coarse to fine @@ -613,19 +611,21 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, if (num_procs > 1) { HYPRE_Int c_index, fine_index; - smooth_vec_offd_P = hypre_CTAlloc(HYPRE_Real, num_cols_P_offd * num_smooth_vecs, - HYPRE_MEMORY_HOST); + smooth_vec_offd_P = hypre_CTAlloc(HYPRE_Real, + num_cols_P_offd * num_smooth_vecs, + HYPRE_MEMORY_HOST); /* for now, do a seperate comm for each smooth vector */ for (k = 0; k < num_smooth_vecs; k++) { - vector = smooth_vecs[k]; vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector)); num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg_P); - dbl_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg_P, - num_sends), HYPRE_MEMORY_HOST); + dbl_buf_data = hypre_CTAlloc(HYPRE_Real, + hypre_ParCSRCommPkgSendMapStart(comm_pkg_P, num_sends), + HYPRE_MEMORY_HOST); + /* point into smooth_vec_offd_P */ offd_vec_data_P = smooth_vec_offd_P + k * num_cols_P_offd; @@ -640,24 +640,20 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, fine_index = coarse_to_fine[c_index]; dbl_buf_data[index++] = vec_data[fine_index]; } - } - comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg_P, dbl_buf_data, - offd_vec_data_P); + comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg_P, dbl_buf_data, + offd_vec_data_P); hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST); } - - }/*end num procs > 1 */ - + } /*end num procs > 1 */ /*------------------------------------------------------------------- * Main loop! *-------------------------------------------------------------------*/ - /******** loop through rows - only operate on rows of original functions******/ j_diag_pos = 0; @@ -699,12 +695,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, orig_diag_start = P_diag_i[i]; orig_offd_start = P_offd_i[i]; - /* if original function dofs? or a new one that we don't want * to modify*/ if (fcn_num < orig_nf || modify == 0 ) { - /* for this row, will we add q entries ? */ if (fcn_num < orig_nf && num_smooth_vecs) { @@ -748,7 +742,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, * use col_map[index]*/ if (num_smooth_vecs && (level == interp_vec_first_level)) { - big_new_col = big_index + (big_index / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)num_smooth_vecs; + big_new_col = big_index + + (big_index / (HYPRE_BigInt)num_functions) * + (HYPRE_BigInt)num_smooth_vecs; } else /* no adjustment */ { @@ -772,7 +768,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, /* orig_row_sum += P_diag_data[orig_diag_start + j]; */ P_diag_data_new[j_diag_pos] = 0.0; - new_col = col_map[ P_diag_j[orig_diag_start + j]]; + new_col = col_map[P_diag_j[orig_diag_start + j]]; P_diag_j_new[j_diag_pos] = new_col; j_diag_pos++; @@ -800,10 +796,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, num_new_p_diag++; q_count++; p_count_diag++; - } } } + /* offd */ p_count_offd = p_count_diag; /* for indexing into is_q*/ for (j = 0; j < p_num_offd_elements; j++) @@ -858,7 +854,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, num_new_p_offd++; q_count++; p_count_offd++; - } } } @@ -884,7 +879,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, { r_extra[k] += A_diag_data[jj] * vec_data[i1]; } - } offd_vec_data = smooth_vec_offd + k * num_cols_A_offd; @@ -910,7 +904,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, { af_sum += A_diag_data[jj]; } - } for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++) { @@ -919,7 +912,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, { af_sum += A_offd_data[jj]; } - } if (af_sum != 0.0) @@ -935,7 +927,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, } /* now we will use the adjustment later */ - /* now if we have any coarse connections with no corresponding point in orig p, then these we have to distibute and treat as fine, basically*/ @@ -985,6 +976,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, } } } /* end diag loop */ + /* now offd loop */ for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++) { @@ -1103,7 +1095,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, /* if (a_ij*d_sign > 0) continue;*/ - found = 0; if (CF_marker[jj_point] >= 0) /*coarse*/ { @@ -1200,11 +1191,11 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, if (add_q) { - cur_spot = P_diag_i_new[i] + kk_count * (num_smooth_vecs + 1); + cur_spot = P_diag_i_new[i] + kk_count * (num_smooth_vecs + 1); } else { - cur_spot = P_diag_i_new[i] + kk_count; + cur_spot = P_diag_i_new[i] + kk_count; } P_diag_data_new[cur_spot] += aw; @@ -1226,7 +1217,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, P_diag_data_new[cur_spot + k + 1] += q_val; } - } kk_count++; } /* did each element of p_diag */ @@ -2478,8 +2468,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, { HYPRE_Int new_nf; - c_dof_func = hypre_TReAlloc_v2(c_dof_func, HYPRE_Int, hypre_IntArraySize(*coarse_dof_func), - HYPRE_Int, new_ncv, hypre_IntArrayMemoryLocation(*coarse_dof_func)); + c_dof_func = hypre_TReAlloc_v2(c_dof_func, HYPRE_Int, hypre_IntArraySize(*coarse_dof_func), + HYPRE_Int, new_ncv, + hypre_IntArrayMemoryLocation(*coarse_dof_func)); cur_spot = 0; for (i = 0; i < ncv_peru; i++) { @@ -2494,15 +2485,17 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A, hypre_IntArrayData(*coarse_dof_func) = c_dof_func; hypre_IntArraySize(*coarse_dof_func) = new_ncv; - /* also we need to update the col starts and global num columns*/ /* assumes that unknowns are together on a procsessor with * nodal coarsening */ - new_col_starts[0] = (col_starts[0] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf ; - new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf; + new_col_starts[0] = (col_starts[0] / (HYPRE_BigInt) num_functions) * (HYPRE_BigInt) new_nf; + new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt) num_functions) * (HYPRE_BigInt) new_nf; - if (myid == (num_procs - 1)) { g_nc = new_col_starts[1]; } + if (myid == (num_procs - 1)) + { + g_nc = new_col_starts[1]; + } hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm); } else /* not first level */ diff --git a/src/parcsr_mv/communicationT.c b/src/parcsr_mv/communicationT.c index 0c1d0f3c6..d5244a5a6 100644 --- a/src/parcsr_mv/communicationT.c +++ b/src/parcsr_mv/communicationT.c @@ -272,10 +272,10 @@ hypre_MatTCommPkgCreate_core ( } } - hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT, recv_buf, info, displs, HYPRE_MPI_INT, + hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT, + recv_buf, info, displs, HYPRE_MPI_BIG_INT, comm); - /* ---------------------------------------------------------------------- * determine send_procs and actual elements to be send (in send_map_elmts) * and send_map_starts whose i-th entry points to the beginning of the diff --git a/src/parcsr_mv/par_csr_bool_matrix.c b/src/parcsr_mv/par_csr_bool_matrix.c index e00b1f37a..1f56344b0 100644 --- a/src/parcsr_mv/par_csr_bool_matrix.c +++ b/src/parcsr_mv/par_csr_bool_matrix.c @@ -851,7 +851,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix a_i = hypre_CSRBooleanMatrix_Get_I(A); a_j = hypre_CSRBooleanMatrix_Get_J(A); } - hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_BIG_INT, 0, comm); global_num_rows = global_data[0]; global_num_cols = global_data[1]; diff --git a/src/parcsr_mv/par_vector.c b/src/parcsr_mv/par_vector.c index 8e3ba365a..37905977e 100644 --- a/src/parcsr_mv/par_vector.c +++ b/src/parcsr_mv/par_vector.c @@ -535,7 +535,7 @@ hypre_VectorToParVector ( MPI_Comm comm, global_vecstride = hypre_VectorVectorStride(v); } - hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_INT, 0, comm); + hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_BIG_INT, 0, comm); hypre_MPI_Bcast(&num_vectors, 1, HYPRE_MPI_INT, 0, comm); hypre_MPI_Bcast(&global_vecstride, 1, HYPRE_MPI_INT, 0, comm); diff --git a/src/parcsr_mv/parchord_to_parcsr.c b/src/parcsr_mv/parchord_to_parcsr.c index db70f861c..ea0237553 100644 --- a/src/parcsr_mv/parchord_to_parcsr.c +++ b/src/parcsr_mv/parchord_to_parcsr.c @@ -108,8 +108,8 @@ void hypre_ParChordMatrix_RowStarts( /* Global number of columns */ /* hypre_MPI_Allreduce( &num_rdofs, global_num_cols, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm );*/ - hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MIN, comm ); - hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm ); + hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_BIG_INT, hypre_MPI_MIN, comm ); + hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_BIG_INT, hypre_MPI_MAX, comm ); *global_num_cols = global_max_rdof - global_min_rdof; } diff --git a/src/utilities/_hypre_utilities.hpp b/src/utilities/_hypre_utilities.hpp index 98ec12c02..d03b6a878 100644 --- a/src/utilities/_hypre_utilities.hpp +++ b/src/utilities/_hypre_utilities.hpp @@ -91,7 +91,7 @@ using hypre_DeviceItem = void*; #endif #define CUSPARSE_NEWAPI_VERSION 11000 -#define CUSPARSE_NEWSPMM_VERSION 11201 +#define CUSPARSE_NEWSPMM_VERSION 11401 #define CUDA_MALLOCASYNC_VERSION 11020 #define THRUST_CALL_BLOCKING 1 @@ -2793,4 +2793,3 @@ struct hypre_cub_CachingDeviceAllocator #endif #endif - diff --git a/src/utilities/device_utils.h b/src/utilities/device_utils.h index 0ccd0580a..a15302975 100644 --- a/src/utilities/device_utils.h +++ b/src/utilities/device_utils.h @@ -39,7 +39,7 @@ using hypre_DeviceItem = void*; #endif #define CUSPARSE_NEWAPI_VERSION 11000 -#define CUSPARSE_NEWSPMM_VERSION 11201 +#define CUSPARSE_NEWSPMM_VERSION 11401 #define CUDA_MALLOCASYNC_VERSION 11020 #define THRUST_CALL_BLOCKING 1