Bug fixes involving mixedint option and CUDA build (#783)

This PR fixes a few variable types inconsistencies arisen from the mixedint build. Additionally, it fixes the CUDA-11.1.1 build.

* Fix cuSPARSE version tag for using generic SpMM and new SpMV algorithms
* Bug fixes on hypre_ILU: S_row_starts computation and m -> big_m
* Bug fix: HYPRE_MPI_REAL -> HYPRE_MPI_COMPLEX
* Bug fix: HYPRE_Int -> HYPRE_BigInt
* Bug fix: HYPRE_MPI_INT -> HYPRE_MPI_BIG_INT

Co-authored-by: TotoGaz <49004943+TotoGaz@users.noreply.github.com>
This commit is contained in:
Victor A. Paludetto Magri 2022-12-07 19:52:15 -08:00 committed by GitHub
parent af9c59cc34
commit 8f51e49402
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 90 additions and 95 deletions

View File

@ -2562,7 +2562,7 @@ hypre_BoomerAMGIndepPMISa( hypre_ParCSRMatrix *S,
HYPRE_BigInt big_graph_size = (HYPRE_BigInt) graph_size;
/* stop the coarsening if nothing left to be coarsened */
hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
if (global_graph_size == 0)
{

View File

@ -934,7 +934,7 @@ hypre_ILUSetup( void *ilu_vdata,
HYPRE_Int m = n - nLU;
HYPRE_BigInt S_total_rows, S_row_starts[2];
HYPRE_BigInt big_m = (HYPRE_BigInt)m;
hypre_MPI_Allreduce( &big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
if ( S_total_rows > 0 )
{
@ -952,8 +952,8 @@ hypre_ILUSetup( void *ilu_vdata,
/* only do so when we hae the Schur Complement */
{
HYPRE_BigInt global_start;
hypre_MPI_Scan( &big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
S_row_starts[0] = global_start - m;
hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
S_row_starts[0] = global_start - big_m;
S_row_starts[1] = global_start;
}
@ -3353,20 +3353,17 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_Int *perm, HYPRE_Int n,
HYPRE_BigInt S_total_rows, S_row_starts[2];
HYPRE_BigInt big_m = (HYPRE_BigInt)m;
hypre_MPI_Allreduce( &big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
if (S_total_rows > 0)
{
{
HYPRE_BigInt global_start;
hypre_MPI_Scan( &big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
S_row_starts[0] = global_start - m;
hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
S_row_starts[0] = global_start - big_m;
S_row_starts[1] = global_start;
}
S_row_starts[1] = S_total_rows;
S_row_starts[0] = S_total_rows - m;
hypre_MPI_Allreduce(&m, &S_total_rows, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
S = hypre_ParCSRMatrixCreate( hypre_ParCSRMatrixComm(A),
S_total_rows,
S_total_rows,

View File

@ -393,9 +393,10 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A,
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
v_buf_data = hypre_CTAlloc(HYPRE_Complex,
hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
HYPRE_MEMORY_HOST);
v_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
v_ext_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd, HYPRE_MEMORY_HOST);
status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST);
requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST);
@ -420,7 +421,8 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A,
{
v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)];
}
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]);
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
comm, &requests[jr++]);
}
}
hypre_MPI_Waitall(jr, requests, status);
@ -435,7 +437,8 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A,
ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]);
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
comm, &requests[jr++]);
}
hypre_MPI_Waitall(jr, requests, status);
}
@ -522,9 +525,10 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A,
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
v_buf_data = hypre_CTAlloc(HYPRE_Complex,
hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
HYPRE_MEMORY_HOST);
v_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
v_ext_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd, HYPRE_MEMORY_HOST);
status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST);
requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST);
@ -568,7 +572,8 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A,
{
v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)];
}
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]);
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
comm, &requests[jr++]);
}
}
hypre_MPI_Waitall(jr, requests, status);
@ -583,7 +588,8 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A,
ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]);
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
comm, &requests[jr++]);
}
hypre_MPI_Waitall(jr, requests, status);
}

View File

@ -194,7 +194,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
HYPRE_BigInt *big_buf_data = NULL;
HYPRE_Real *dbl_buf_data = NULL;
HYPRE_Int g_nc;
HYPRE_BigInt g_nc;
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
@ -324,11 +324,12 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
/* for communication */
num_sends_A = hypre_ParCSRCommPkgNumSends(comm_pkg_A);
int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A,
num_sends_A), HYPRE_MEMORY_HOST);
big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A,
num_sends_A), HYPRE_MEMORY_HOST);
int_buf_data = hypre_CTAlloc(HYPRE_Int,
hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A),
HYPRE_MEMORY_HOST);
big_buf_data = hypre_CTAlloc(HYPRE_BigInt,
hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A),
HYPRE_MEMORY_HOST);
/*-----------------------------------------------------------------------
* create and send and receive fine_to_coarse info.
@ -338,12 +339,14 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
HYPRE_BigInt my_first_cpt;
HYPRE_Int tmp_i;
my_first_cpt = num_cpts_global[0];
/* create the fine to coarse and coarse to fine*/
/* create the fine to coarse and coarse to fine */
fine_to_coarse = hypre_CTAlloc(HYPRE_Int, num_rows_P, HYPRE_MEMORY_HOST);
for (i = 0; i < num_rows_P; i++) { fine_to_coarse[i] = -1; }
for (i = 0; i < num_rows_P; i++)
{
fine_to_coarse[i] = -1;
}
coarse_to_fine = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST);
@ -367,14 +370,12 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i);
for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++)
{
tmp_i = fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j)];
big_buf_data[index++] = (HYPRE_BigInt)tmp_i + my_first_cpt; /* makes it global*/
}
}
comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg_A, big_buf_data,
comm_handle = hypre_ParCSRCommHandleCreate(21, comm_pkg_A, big_buf_data,
fine_to_coarse_offd);
hypre_ParCSRCommHandleDestroy(comm_handle);
@ -384,7 +385,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
* Get the CF_marker data for the off-processor columns of A
*-------------------------------------------------------------------*/
{
if (num_cols_A_offd)
{
CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
@ -405,7 +405,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
}
}
comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg_A, int_buf_data,
comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg_A, int_buf_data,
CF_marker_offd);
hypre_ParCSRCommHandleDestroy(comm_handle);
@ -422,7 +422,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
}
}
comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg_A, int_buf_data,
comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg_A, int_buf_data,
dof_func_offd);
hypre_ParCSRCommHandleDestroy(comm_handle);
@ -437,7 +437,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
HYPRE_Int kc;
HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstColDiag(*P);
HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)hypre_CSRMatrixNumCols(P_diag);
HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) hypre_CSRMatrixNumCols(P_diag);
if (num_procs > 1)
{
@ -482,9 +482,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
P_ext_i[i] = P_ext_i[i - 1];
}
if (num_procs > 1) { P_ext_i[0] = 0; }
if (num_procs > 1)
{
P_ext_i[0] = 0;
}
} /* end of ghost rows */
/*-------------------------------------------------------------------
@ -517,7 +518,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
new_nnz_diag = nnz_diag + nnz_diag * num_smooth_vecs;
new_nnz_offd = nnz_offd + nnz_offd * num_smooth_vecs;
/* new number of coarse variables */
if (level == interp_vec_first_level )
{
@ -528,7 +528,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
new_ncv = ncv; /* unchanged on level > first_level */
}
/* allocations */
P_diag_j_new = hypre_CTAlloc(HYPRE_Int, new_nnz_diag, memory_location_P);
P_diag_data_new = hypre_CTAlloc(HYPRE_Real, new_nnz_diag, memory_location_P);
@ -542,7 +541,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
P_diag_i_new[0] = P_diag_i[0];
P_offd_i_new[0] = P_offd_i[0];
/* doing truncation? if so, need some more allocations*/
if (q_max > 0 || abs_trunc > 0.0)
{
@ -567,7 +565,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
{
HYPRE_Int fine_index;
smooth_vec_offd = hypre_CTAlloc(HYPRE_Real, num_cols_A_offd * num_smooth_vecs, HYPRE_MEMORY_HOST);
smooth_vec_offd = hypre_CTAlloc(HYPRE_Real,
num_cols_A_offd * num_smooth_vecs,
HYPRE_MEMORY_HOST);
/* for now, do a seperate comm for each smooth vector */
for (k = 0; k < num_smooth_vecs; k++)
@ -576,8 +576,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
vector = smooth_vecs[k];
vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector));
dbl_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A,
num_sends_A), HYPRE_MEMORY_HOST);
dbl_buf_data = hypre_CTAlloc(HYPRE_Real,
hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A),
HYPRE_MEMORY_HOST);
/* point into smooth_vec_offd */
offd_vec_data = smooth_vec_offd + k * num_cols_A_offd;
@ -587,23 +589,19 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i);
for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++)
{
fine_index = hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j);
dbl_buf_data[index++] = vec_data[fine_index];
}
}
comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg_A, dbl_buf_data,
offd_vec_data);
comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg_A, dbl_buf_data, offd_vec_data);
hypre_ParCSRCommHandleDestroy(comm_handle);
hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST);
} /* end of smooth vecs */
}/*end num procs > 1 */
/*-------------------------------------------------------------------
* Get smooth vec components for the off-processor columns of P
* TO Do: would be less storage to get the offd coarse to fine
@ -613,19 +611,21 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
if (num_procs > 1)
{
HYPRE_Int c_index, fine_index;
smooth_vec_offd_P = hypre_CTAlloc(HYPRE_Real, num_cols_P_offd * num_smooth_vecs,
smooth_vec_offd_P = hypre_CTAlloc(HYPRE_Real,
num_cols_P_offd * num_smooth_vecs,
HYPRE_MEMORY_HOST);
/* for now, do a seperate comm for each smooth vector */
for (k = 0; k < num_smooth_vecs; k++)
{
vector = smooth_vecs[k];
vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector));
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg_P);
dbl_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg_P,
num_sends), HYPRE_MEMORY_HOST);
dbl_buf_data = hypre_CTAlloc(HYPRE_Real,
hypre_ParCSRCommPkgSendMapStart(comm_pkg_P, num_sends),
HYPRE_MEMORY_HOST);
/* point into smooth_vec_offd_P */
offd_vec_data_P = smooth_vec_offd_P + k * num_cols_P_offd;
@ -640,24 +640,20 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
fine_index = coarse_to_fine[c_index];
dbl_buf_data[index++] = vec_data[fine_index];
}
}
comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg_P, dbl_buf_data,
comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg_P, dbl_buf_data,
offd_vec_data_P);
hypre_ParCSRCommHandleDestroy(comm_handle);
hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST);
}
}/*end num procs > 1 */
} /*end num procs > 1 */
/*-------------------------------------------------------------------
* Main loop!
*-------------------------------------------------------------------*/
/******** loop through rows - only operate on rows of original functions******/
j_diag_pos = 0;
@ -699,12 +695,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
orig_diag_start = P_diag_i[i];
orig_offd_start = P_offd_i[i];
/* if original function dofs? or a new one that we don't want
* to modify*/
if (fcn_num < orig_nf || modify == 0 )
{
/* for this row, will we add q entries ? */
if (fcn_num < orig_nf && num_smooth_vecs)
{
@ -748,7 +742,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
* use col_map[index]*/
if (num_smooth_vecs && (level == interp_vec_first_level))
{
big_new_col = big_index + (big_index / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)num_smooth_vecs;
big_new_col = big_index +
(big_index / (HYPRE_BigInt)num_functions) *
(HYPRE_BigInt)num_smooth_vecs;
}
else /* no adjustment */
{
@ -772,7 +768,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
/* orig_row_sum += P_diag_data[orig_diag_start + j]; */
P_diag_data_new[j_diag_pos] = 0.0;
new_col = col_map[ P_diag_j[orig_diag_start + j]];
new_col = col_map[P_diag_j[orig_diag_start + j]];
P_diag_j_new[j_diag_pos] = new_col;
j_diag_pos++;
@ -800,10 +796,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
num_new_p_diag++;
q_count++;
p_count_diag++;
}
}
}
}
}
}
/* offd */
p_count_offd = p_count_diag; /* for indexing into is_q*/
for (j = 0; j < p_num_offd_elements; j++)
@ -858,7 +854,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
num_new_p_offd++;
q_count++;
p_count_offd++;
}
}
}
@ -884,7 +879,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
{
r_extra[k] += A_diag_data[jj] * vec_data[i1];
}
}
offd_vec_data = smooth_vec_offd + k * num_cols_A_offd;
@ -910,7 +904,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
{
af_sum += A_diag_data[jj];
}
}
for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++)
{
@ -919,7 +912,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
{
af_sum += A_offd_data[jj];
}
}
if (af_sum != 0.0)
@ -935,7 +927,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
}
/* now we will use the adjustment later */
/* now if we have any coarse connections with no
corresponding point in orig p, then these we have to
distibute and treat as fine, basically*/
@ -985,6 +976,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
}
}
} /* end diag loop */
/* now offd loop */
for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++)
{
@ -1103,7 +1095,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
/* if (a_ij*d_sign > 0)
continue;*/
found = 0;
if (CF_marker[jj_point] >= 0) /*coarse*/
{
@ -1226,7 +1217,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
P_diag_data_new[cur_spot + k + 1] += q_val;
}
}
kk_count++;
} /* did each element of p_diag */
@ -2479,7 +2469,8 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
HYPRE_Int new_nf;
c_dof_func = hypre_TReAlloc_v2(c_dof_func, HYPRE_Int, hypre_IntArraySize(*coarse_dof_func),
HYPRE_Int, new_ncv, hypre_IntArrayMemoryLocation(*coarse_dof_func));
HYPRE_Int, new_ncv,
hypre_IntArrayMemoryLocation(*coarse_dof_func));
cur_spot = 0;
for (i = 0; i < ncv_peru; i++)
{
@ -2494,15 +2485,17 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
hypre_IntArrayData(*coarse_dof_func) = c_dof_func;
hypre_IntArraySize(*coarse_dof_func) = new_ncv;
/* also we need to update the col starts and global num columns*/
/* assumes that unknowns are together on a procsessor with
* nodal coarsening */
new_col_starts[0] = (col_starts[0] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf ;
new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf;
new_col_starts[0] = (col_starts[0] / (HYPRE_BigInt) num_functions) * (HYPRE_BigInt) new_nf;
new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt) num_functions) * (HYPRE_BigInt) new_nf;
if (myid == (num_procs - 1)) { g_nc = new_col_starts[1]; }
if (myid == (num_procs - 1))
{
g_nc = new_col_starts[1];
}
hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm);
}
else /* not first level */

View File

@ -272,10 +272,10 @@ hypre_MatTCommPkgCreate_core (
}
}
hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT, recv_buf, info, displs, HYPRE_MPI_INT,
hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT,
recv_buf, info, displs, HYPRE_MPI_BIG_INT,
comm);
/* ----------------------------------------------------------------------
* determine send_procs and actual elements to be send (in send_map_elmts)
* and send_map_starts whose i-th entry points to the beginning of the

View File

@ -851,7 +851,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix
a_i = hypre_CSRBooleanMatrix_Get_I(A);
a_j = hypre_CSRBooleanMatrix_Get_J(A);
}
hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_INT, 0, comm);
hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_BIG_INT, 0, comm);
global_num_rows = global_data[0];
global_num_cols = global_data[1];

View File

@ -535,7 +535,7 @@ hypre_VectorToParVector ( MPI_Comm comm,
global_vecstride = hypre_VectorVectorStride(v);
}
hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_INT, 0, comm);
hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_BIG_INT, 0, comm);
hypre_MPI_Bcast(&num_vectors, 1, HYPRE_MPI_INT, 0, comm);
hypre_MPI_Bcast(&global_vecstride, 1, HYPRE_MPI_INT, 0, comm);

View File

@ -108,8 +108,8 @@ void hypre_ParChordMatrix_RowStarts(
/* Global number of columns */
/* hypre_MPI_Allreduce( &num_rdofs, global_num_cols, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm );*/
hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MIN, comm );
hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm );
hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_BIG_INT, hypre_MPI_MIN, comm );
hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_BIG_INT, hypre_MPI_MAX, comm );
*global_num_cols = global_max_rdof - global_min_rdof;
}

View File

@ -91,7 +91,7 @@ using hypre_DeviceItem = void*;
#endif
#define CUSPARSE_NEWAPI_VERSION 11000
#define CUSPARSE_NEWSPMM_VERSION 11201
#define CUSPARSE_NEWSPMM_VERSION 11401
#define CUDA_MALLOCASYNC_VERSION 11020
#define THRUST_CALL_BLOCKING 1
@ -2793,4 +2793,3 @@ struct hypre_cub_CachingDeviceAllocator
#endif
#endif

View File

@ -39,7 +39,7 @@ using hypre_DeviceItem = void*;
#endif
#define CUSPARSE_NEWAPI_VERSION 11000
#define CUSPARSE_NEWSPMM_VERSION 11201
#define CUSPARSE_NEWSPMM_VERSION 11401
#define CUDA_MALLOCASYNC_VERSION 11020
#define THRUST_CALL_BLOCKING 1