Bug fixes involving mixedint option and CUDA build (#783)
This PR fixes a few variable types inconsistencies arisen from the mixedint build. Additionally, it fixes the CUDA-11.1.1 build. * Fix cuSPARSE version tag for using generic SpMM and new SpMV algorithms * Bug fixes on hypre_ILU: S_row_starts computation and m -> big_m * Bug fix: HYPRE_MPI_REAL -> HYPRE_MPI_COMPLEX * Bug fix: HYPRE_Int -> HYPRE_BigInt * Bug fix: HYPRE_MPI_INT -> HYPRE_MPI_BIG_INT Co-authored-by: TotoGaz <49004943+TotoGaz@users.noreply.github.com>
This commit is contained in:
parent
af9c59cc34
commit
8f51e49402
@ -2562,7 +2562,7 @@ hypre_BoomerAMGIndepPMISa( hypre_ParCSRMatrix *S,
|
||||
|
||||
HYPRE_BigInt big_graph_size = (HYPRE_BigInt) graph_size;
|
||||
/* stop the coarsening if nothing left to be coarsened */
|
||||
hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
|
||||
hypre_MPI_Allreduce(&big_graph_size, &global_graph_size, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
|
||||
|
||||
if (global_graph_size == 0)
|
||||
{
|
||||
|
||||
@ -934,7 +934,7 @@ hypre_ILUSetup( void *ilu_vdata,
|
||||
HYPRE_Int m = n - nLU;
|
||||
HYPRE_BigInt S_total_rows, S_row_starts[2];
|
||||
HYPRE_BigInt big_m = (HYPRE_BigInt)m;
|
||||
hypre_MPI_Allreduce( &big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
|
||||
hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
|
||||
|
||||
if ( S_total_rows > 0 )
|
||||
{
|
||||
@ -952,8 +952,8 @@ hypre_ILUSetup( void *ilu_vdata,
|
||||
/* only do so when we hae the Schur Complement */
|
||||
{
|
||||
HYPRE_BigInt global_start;
|
||||
hypre_MPI_Scan( &big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
|
||||
S_row_starts[0] = global_start - m;
|
||||
hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
|
||||
S_row_starts[0] = global_start - big_m;
|
||||
S_row_starts[1] = global_start;
|
||||
}
|
||||
|
||||
@ -3353,20 +3353,17 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_Int *perm, HYPRE_Int n,
|
||||
|
||||
HYPRE_BigInt S_total_rows, S_row_starts[2];
|
||||
HYPRE_BigInt big_m = (HYPRE_BigInt)m;
|
||||
hypre_MPI_Allreduce( &big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
|
||||
hypre_MPI_Allreduce(&big_m, &S_total_rows, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
|
||||
|
||||
if (S_total_rows > 0)
|
||||
{
|
||||
{
|
||||
HYPRE_BigInt global_start;
|
||||
hypre_MPI_Scan( &big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
|
||||
S_row_starts[0] = global_start - m;
|
||||
hypre_MPI_Scan(&big_m, &global_start, 1, HYPRE_MPI_BIG_INT, hypre_MPI_SUM, comm);
|
||||
S_row_starts[0] = global_start - big_m;
|
||||
S_row_starts[1] = global_start;
|
||||
}
|
||||
|
||||
S_row_starts[1] = S_total_rows;
|
||||
S_row_starts[0] = S_total_rows - m;
|
||||
hypre_MPI_Allreduce(&m, &S_total_rows, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
|
||||
S = hypre_ParCSRMatrixCreate( hypre_ParCSRMatrixComm(A),
|
||||
S_total_rows,
|
||||
S_total_rows,
|
||||
|
||||
@ -393,9 +393,10 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A,
|
||||
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
|
||||
num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
|
||||
|
||||
v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
|
||||
v_buf_data = hypre_CTAlloc(HYPRE_Complex,
|
||||
hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
|
||||
HYPRE_MEMORY_HOST);
|
||||
v_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
|
||||
v_ext_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd, HYPRE_MEMORY_HOST);
|
||||
|
||||
status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST);
|
||||
requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST);
|
||||
@ -420,7 +421,8 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A,
|
||||
{
|
||||
v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)];
|
||||
}
|
||||
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]);
|
||||
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
|
||||
comm, &requests[jr++]);
|
||||
}
|
||||
}
|
||||
hypre_MPI_Waitall(jr, requests, status);
|
||||
@ -435,7 +437,8 @@ hypre_BoomerAMGRelax1GaussSeidel( hypre_ParCSRMatrix *A,
|
||||
ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
|
||||
vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
|
||||
vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
|
||||
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]);
|
||||
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
|
||||
comm, &requests[jr++]);
|
||||
}
|
||||
hypre_MPI_Waitall(jr, requests, status);
|
||||
}
|
||||
@ -522,9 +525,10 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A,
|
||||
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
|
||||
num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
|
||||
|
||||
v_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
|
||||
v_buf_data = hypre_CTAlloc(HYPRE_Complex,
|
||||
hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends),
|
||||
HYPRE_MEMORY_HOST);
|
||||
v_ext_data = hypre_CTAlloc(HYPRE_Real, num_cols_offd, HYPRE_MEMORY_HOST);
|
||||
v_ext_data = hypre_CTAlloc(HYPRE_Complex, num_cols_offd, HYPRE_MEMORY_HOST);
|
||||
|
||||
status = hypre_CTAlloc(hypre_MPI_Status, num_recvs + num_sends, HYPRE_MEMORY_HOST);
|
||||
requests = hypre_CTAlloc(hypre_MPI_Request, num_recvs + num_sends, HYPRE_MEMORY_HOST);
|
||||
@ -568,7 +572,8 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A,
|
||||
{
|
||||
v_buf_data[j] = u_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)];
|
||||
}
|
||||
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]);
|
||||
hypre_MPI_Isend(&v_buf_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
|
||||
comm, &requests[jr++]);
|
||||
}
|
||||
}
|
||||
hypre_MPI_Waitall(jr, requests, status);
|
||||
@ -583,7 +588,8 @@ hypre_BoomerAMGRelax2GaussSeidel( hypre_ParCSRMatrix *A,
|
||||
ip = hypre_ParCSRCommPkgRecvProc(comm_pkg, i);
|
||||
vec_start = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i);
|
||||
vec_len = hypre_ParCSRCommPkgRecvVecStart(comm_pkg, i + 1) - vec_start;
|
||||
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_REAL, ip, 0, comm, &requests[jr++]);
|
||||
hypre_MPI_Irecv(&v_ext_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, ip, 0,
|
||||
comm, &requests[jr++]);
|
||||
}
|
||||
hypre_MPI_Waitall(jr, requests, status);
|
||||
}
|
||||
|
||||
@ -194,7 +194,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
HYPRE_BigInt *big_buf_data = NULL;
|
||||
HYPRE_Real *dbl_buf_data = NULL;
|
||||
|
||||
HYPRE_Int g_nc;
|
||||
HYPRE_BigInt g_nc;
|
||||
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
|
||||
@ -324,11 +324,12 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
|
||||
/* for communication */
|
||||
num_sends_A = hypre_ParCSRCommPkgNumSends(comm_pkg_A);
|
||||
int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A,
|
||||
num_sends_A), HYPRE_MEMORY_HOST);
|
||||
big_buf_data = hypre_CTAlloc(HYPRE_BigInt, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A,
|
||||
num_sends_A), HYPRE_MEMORY_HOST);
|
||||
|
||||
int_buf_data = hypre_CTAlloc(HYPRE_Int,
|
||||
hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A),
|
||||
HYPRE_MEMORY_HOST);
|
||||
big_buf_data = hypre_CTAlloc(HYPRE_BigInt,
|
||||
hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A),
|
||||
HYPRE_MEMORY_HOST);
|
||||
|
||||
/*-----------------------------------------------------------------------
|
||||
* create and send and receive fine_to_coarse info.
|
||||
@ -338,14 +339,16 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
HYPRE_BigInt my_first_cpt;
|
||||
HYPRE_Int tmp_i;
|
||||
|
||||
|
||||
my_first_cpt = num_cpts_global[0];
|
||||
|
||||
/* create the fine to coarse and coarse to fine*/
|
||||
fine_to_coarse = hypre_CTAlloc(HYPRE_Int, num_rows_P, HYPRE_MEMORY_HOST);
|
||||
for (i = 0; i < num_rows_P; i++) { fine_to_coarse[i] = -1; }
|
||||
/* create the fine to coarse and coarse to fine */
|
||||
fine_to_coarse = hypre_CTAlloc(HYPRE_Int, num_rows_P, HYPRE_MEMORY_HOST);
|
||||
for (i = 0; i < num_rows_P; i++)
|
||||
{
|
||||
fine_to_coarse[i] = -1;
|
||||
}
|
||||
|
||||
coarse_to_fine = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST);
|
||||
coarse_to_fine = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST);
|
||||
|
||||
coarse_counter = 0;
|
||||
for (i = 0; i < num_rows_P; i++)
|
||||
@ -367,15 +370,13 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i);
|
||||
for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++)
|
||||
{
|
||||
|
||||
tmp_i = fine_to_coarse[hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j)];
|
||||
big_buf_data[index++] = (HYPRE_BigInt)tmp_i + my_first_cpt; /* makes it global*/
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg_A, big_buf_data,
|
||||
fine_to_coarse_offd);
|
||||
comm_handle = hypre_ParCSRCommHandleCreate(21, comm_pkg_A, big_buf_data,
|
||||
fine_to_coarse_offd);
|
||||
|
||||
hypre_ParCSRCommHandleDestroy(comm_handle);
|
||||
} /* end fine to coarse {} */
|
||||
@ -384,7 +385,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
* Get the CF_marker data for the off-processor columns of A
|
||||
*-------------------------------------------------------------------*/
|
||||
{
|
||||
|
||||
if (num_cols_A_offd)
|
||||
{
|
||||
CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_A_offd, HYPRE_MEMORY_HOST);
|
||||
@ -405,8 +405,8 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
}
|
||||
}
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg_A, int_buf_data,
|
||||
CF_marker_offd);
|
||||
comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg_A, int_buf_data,
|
||||
CF_marker_offd);
|
||||
|
||||
hypre_ParCSRCommHandleDestroy(comm_handle);
|
||||
if (num_functions > 1)
|
||||
@ -422,8 +422,8 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
}
|
||||
}
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg_A, int_buf_data,
|
||||
dof_func_offd);
|
||||
comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg_A, int_buf_data,
|
||||
dof_func_offd);
|
||||
|
||||
hypre_ParCSRCommHandleDestroy(comm_handle);
|
||||
}
|
||||
@ -437,7 +437,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
|
||||
HYPRE_Int kc;
|
||||
HYPRE_BigInt col_1 = hypre_ParCSRMatrixFirstColDiag(*P);
|
||||
HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt)hypre_CSRMatrixNumCols(P_diag);
|
||||
HYPRE_BigInt col_n = col_1 + (HYPRE_BigInt) hypre_CSRMatrixNumCols(P_diag);
|
||||
|
||||
if (num_procs > 1)
|
||||
{
|
||||
@ -482,9 +482,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
P_ext_i[i] = P_ext_i[i - 1];
|
||||
}
|
||||
|
||||
if (num_procs > 1) { P_ext_i[0] = 0; }
|
||||
|
||||
|
||||
if (num_procs > 1)
|
||||
{
|
||||
P_ext_i[0] = 0;
|
||||
}
|
||||
} /* end of ghost rows */
|
||||
|
||||
/*-------------------------------------------------------------------
|
||||
@ -493,7 +494,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
|
||||
/* if level = first_level, we need to fix the col numbering to leave
|
||||
* space for the new unknowns */
|
||||
col_map = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST);
|
||||
col_map = hypre_CTAlloc(HYPRE_Int, ncv, HYPRE_MEMORY_HOST);
|
||||
|
||||
if (num_smooth_vecs && level == interp_vec_first_level)
|
||||
{
|
||||
@ -517,7 +518,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
new_nnz_diag = nnz_diag + nnz_diag * num_smooth_vecs;
|
||||
new_nnz_offd = nnz_offd + nnz_offd * num_smooth_vecs;
|
||||
|
||||
|
||||
/* new number of coarse variables */
|
||||
if (level == interp_vec_first_level )
|
||||
{
|
||||
@ -528,7 +528,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
new_ncv = ncv; /* unchanged on level > first_level */
|
||||
}
|
||||
|
||||
|
||||
/* allocations */
|
||||
P_diag_j_new = hypre_CTAlloc(HYPRE_Int, new_nnz_diag, memory_location_P);
|
||||
P_diag_data_new = hypre_CTAlloc(HYPRE_Real, new_nnz_diag, memory_location_P);
|
||||
@ -542,7 +541,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
P_diag_i_new[0] = P_diag_i[0];
|
||||
P_offd_i_new[0] = P_offd_i[0];
|
||||
|
||||
|
||||
/* doing truncation? if so, need some more allocations*/
|
||||
if (q_max > 0 || abs_trunc > 0.0)
|
||||
{
|
||||
@ -567,7 +565,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
{
|
||||
HYPRE_Int fine_index;
|
||||
|
||||
smooth_vec_offd = hypre_CTAlloc(HYPRE_Real, num_cols_A_offd * num_smooth_vecs, HYPRE_MEMORY_HOST);
|
||||
smooth_vec_offd = hypre_CTAlloc(HYPRE_Real,
|
||||
num_cols_A_offd * num_smooth_vecs,
|
||||
HYPRE_MEMORY_HOST);
|
||||
|
||||
/* for now, do a seperate comm for each smooth vector */
|
||||
for (k = 0; k < num_smooth_vecs; k++)
|
||||
@ -576,8 +576,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
vector = smooth_vecs[k];
|
||||
vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector));
|
||||
|
||||
dbl_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg_A,
|
||||
num_sends_A), HYPRE_MEMORY_HOST);
|
||||
dbl_buf_data = hypre_CTAlloc(HYPRE_Real,
|
||||
hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, num_sends_A),
|
||||
HYPRE_MEMORY_HOST);
|
||||
|
||||
/* point into smooth_vec_offd */
|
||||
offd_vec_data = smooth_vec_offd + k * num_cols_A_offd;
|
||||
|
||||
@ -587,23 +589,19 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
start = hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i);
|
||||
for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg_A, i + 1); j++)
|
||||
{
|
||||
|
||||
fine_index = hypre_ParCSRCommPkgSendMapElmt(comm_pkg_A, j);
|
||||
|
||||
dbl_buf_data[index++] = vec_data[fine_index];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg_A, dbl_buf_data,
|
||||
offd_vec_data);
|
||||
comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg_A, dbl_buf_data, offd_vec_data);
|
||||
hypre_ParCSRCommHandleDestroy(comm_handle);
|
||||
|
||||
hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST);
|
||||
} /* end of smooth vecs */
|
||||
}/*end num procs > 1 */
|
||||
|
||||
|
||||
/*-------------------------------------------------------------------
|
||||
* Get smooth vec components for the off-processor columns of P
|
||||
* TO Do: would be less storage to get the offd coarse to fine
|
||||
@ -613,19 +611,21 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
if (num_procs > 1)
|
||||
{
|
||||
HYPRE_Int c_index, fine_index;
|
||||
smooth_vec_offd_P = hypre_CTAlloc(HYPRE_Real, num_cols_P_offd * num_smooth_vecs,
|
||||
HYPRE_MEMORY_HOST);
|
||||
smooth_vec_offd_P = hypre_CTAlloc(HYPRE_Real,
|
||||
num_cols_P_offd * num_smooth_vecs,
|
||||
HYPRE_MEMORY_HOST);
|
||||
|
||||
/* for now, do a seperate comm for each smooth vector */
|
||||
for (k = 0; k < num_smooth_vecs; k++)
|
||||
{
|
||||
|
||||
vector = smooth_vecs[k];
|
||||
vec_data = hypre_VectorData(hypre_ParVectorLocalVector(vector));
|
||||
|
||||
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg_P);
|
||||
dbl_buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg_P,
|
||||
num_sends), HYPRE_MEMORY_HOST);
|
||||
dbl_buf_data = hypre_CTAlloc(HYPRE_Real,
|
||||
hypre_ParCSRCommPkgSendMapStart(comm_pkg_P, num_sends),
|
||||
HYPRE_MEMORY_HOST);
|
||||
|
||||
/* point into smooth_vec_offd_P */
|
||||
offd_vec_data_P = smooth_vec_offd_P + k * num_cols_P_offd;
|
||||
|
||||
@ -640,24 +640,20 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
fine_index = coarse_to_fine[c_index];
|
||||
dbl_buf_data[index++] = vec_data[fine_index];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate( 1, comm_pkg_P, dbl_buf_data,
|
||||
offd_vec_data_P);
|
||||
comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg_P, dbl_buf_data,
|
||||
offd_vec_data_P);
|
||||
hypre_ParCSRCommHandleDestroy(comm_handle);
|
||||
|
||||
hypre_TFree(dbl_buf_data, HYPRE_MEMORY_HOST);
|
||||
}
|
||||
|
||||
}/*end num procs > 1 */
|
||||
|
||||
} /*end num procs > 1 */
|
||||
|
||||
/*-------------------------------------------------------------------
|
||||
* Main loop!
|
||||
*-------------------------------------------------------------------*/
|
||||
|
||||
|
||||
/******** loop through rows - only operate on rows of original functions******/
|
||||
|
||||
j_diag_pos = 0;
|
||||
@ -699,12 +695,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
orig_diag_start = P_diag_i[i];
|
||||
orig_offd_start = P_offd_i[i];
|
||||
|
||||
|
||||
/* if original function dofs? or a new one that we don't want
|
||||
* to modify*/
|
||||
if (fcn_num < orig_nf || modify == 0 )
|
||||
{
|
||||
|
||||
/* for this row, will we add q entries ? */
|
||||
if (fcn_num < orig_nf && num_smooth_vecs)
|
||||
{
|
||||
@ -748,7 +742,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
* use col_map[index]*/
|
||||
if (num_smooth_vecs && (level == interp_vec_first_level))
|
||||
{
|
||||
big_new_col = big_index + (big_index / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)num_smooth_vecs;
|
||||
big_new_col = big_index +
|
||||
(big_index / (HYPRE_BigInt)num_functions) *
|
||||
(HYPRE_BigInt)num_smooth_vecs;
|
||||
}
|
||||
else /* no adjustment */
|
||||
{
|
||||
@ -772,7 +768,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
/* orig_row_sum += P_diag_data[orig_diag_start + j]; */
|
||||
P_diag_data_new[j_diag_pos] = 0.0;
|
||||
|
||||
new_col = col_map[ P_diag_j[orig_diag_start + j]];
|
||||
new_col = col_map[P_diag_j[orig_diag_start + j]];
|
||||
P_diag_j_new[j_diag_pos] = new_col;
|
||||
|
||||
j_diag_pos++;
|
||||
@ -800,10 +796,10 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
num_new_p_diag++;
|
||||
q_count++;
|
||||
p_count_diag++;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* offd */
|
||||
p_count_offd = p_count_diag; /* for indexing into is_q*/
|
||||
for (j = 0; j < p_num_offd_elements; j++)
|
||||
@ -858,7 +854,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
num_new_p_offd++;
|
||||
q_count++;
|
||||
p_count_offd++;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -884,7 +879,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
{
|
||||
r_extra[k] += A_diag_data[jj] * vec_data[i1];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
offd_vec_data = smooth_vec_offd + k * num_cols_A_offd;
|
||||
@ -910,7 +904,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
{
|
||||
af_sum += A_diag_data[jj];
|
||||
}
|
||||
|
||||
}
|
||||
for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++)
|
||||
{
|
||||
@ -919,7 +912,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
{
|
||||
af_sum += A_offd_data[jj];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (af_sum != 0.0)
|
||||
@ -935,7 +927,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
}
|
||||
/* now we will use the adjustment later */
|
||||
|
||||
|
||||
/* now if we have any coarse connections with no
|
||||
corresponding point in orig p, then these we have to
|
||||
distibute and treat as fine, basically*/
|
||||
@ -985,6 +976,7 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
}
|
||||
}
|
||||
} /* end diag loop */
|
||||
|
||||
/* now offd loop */
|
||||
for (jj = A_offd_i[i]; jj < A_offd_i[i + 1]; jj++)
|
||||
{
|
||||
@ -1103,7 +1095,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
/* if (a_ij*d_sign > 0)
|
||||
continue;*/
|
||||
|
||||
|
||||
found = 0;
|
||||
if (CF_marker[jj_point] >= 0) /*coarse*/
|
||||
{
|
||||
@ -1200,11 +1191,11 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
|
||||
if (add_q)
|
||||
{
|
||||
cur_spot = P_diag_i_new[i] + kk_count * (num_smooth_vecs + 1);
|
||||
cur_spot = P_diag_i_new[i] + kk_count * (num_smooth_vecs + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
cur_spot = P_diag_i_new[i] + kk_count;
|
||||
cur_spot = P_diag_i_new[i] + kk_count;
|
||||
}
|
||||
|
||||
P_diag_data_new[cur_spot] += aw;
|
||||
@ -1226,7 +1217,6 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
|
||||
P_diag_data_new[cur_spot + k + 1] += q_val;
|
||||
}
|
||||
|
||||
}
|
||||
kk_count++;
|
||||
} /* did each element of p_diag */
|
||||
@ -2478,8 +2468,9 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
{
|
||||
HYPRE_Int new_nf;
|
||||
|
||||
c_dof_func = hypre_TReAlloc_v2(c_dof_func, HYPRE_Int, hypre_IntArraySize(*coarse_dof_func),
|
||||
HYPRE_Int, new_ncv, hypre_IntArrayMemoryLocation(*coarse_dof_func));
|
||||
c_dof_func = hypre_TReAlloc_v2(c_dof_func, HYPRE_Int, hypre_IntArraySize(*coarse_dof_func),
|
||||
HYPRE_Int, new_ncv,
|
||||
hypre_IntArrayMemoryLocation(*coarse_dof_func));
|
||||
cur_spot = 0;
|
||||
for (i = 0; i < ncv_peru; i++)
|
||||
{
|
||||
@ -2494,15 +2485,17 @@ HYPRE_Int hypre_BoomerAMG_LNExpandInterp( hypre_ParCSRMatrix *A,
|
||||
hypre_IntArrayData(*coarse_dof_func) = c_dof_func;
|
||||
hypre_IntArraySize(*coarse_dof_func) = new_ncv;
|
||||
|
||||
|
||||
/* also we need to update the col starts and global num columns*/
|
||||
|
||||
/* assumes that unknowns are together on a procsessor with
|
||||
* nodal coarsening */
|
||||
new_col_starts[0] = (col_starts[0] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf ;
|
||||
new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt)num_functions) * (HYPRE_BigInt)new_nf;
|
||||
new_col_starts[0] = (col_starts[0] / (HYPRE_BigInt) num_functions) * (HYPRE_BigInt) new_nf;
|
||||
new_col_starts[1] = (col_starts[1] / (HYPRE_BigInt) num_functions) * (HYPRE_BigInt) new_nf;
|
||||
|
||||
if (myid == (num_procs - 1)) { g_nc = new_col_starts[1]; }
|
||||
if (myid == (num_procs - 1))
|
||||
{
|
||||
g_nc = new_col_starts[1];
|
||||
}
|
||||
hypre_MPI_Bcast(&g_nc, 1, HYPRE_MPI_BIG_INT, num_procs - 1, comm);
|
||||
}
|
||||
else /* not first level */
|
||||
|
||||
@ -272,10 +272,10 @@ hypre_MatTCommPkgCreate_core (
|
||||
}
|
||||
}
|
||||
|
||||
hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT, recv_buf, info, displs, HYPRE_MPI_INT,
|
||||
hypre_MPI_Allgatherv(tmp, local_info, HYPRE_MPI_BIG_INT,
|
||||
recv_buf, info, displs, HYPRE_MPI_BIG_INT,
|
||||
comm);
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* determine send_procs and actual elements to be send (in send_map_elmts)
|
||||
* and send_map_starts whose i-th entry points to the beginning of the
|
||||
|
||||
@ -851,7 +851,7 @@ hypre_CSRBooleanMatrixToParCSRBooleanMatrix
|
||||
a_i = hypre_CSRBooleanMatrix_Get_I(A);
|
||||
a_j = hypre_CSRBooleanMatrix_Get_J(A);
|
||||
}
|
||||
hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_INT, 0, comm);
|
||||
hypre_MPI_Bcast(global_data, 2, HYPRE_MPI_BIG_INT, 0, comm);
|
||||
global_num_rows = global_data[0];
|
||||
global_num_cols = global_data[1];
|
||||
|
||||
|
||||
@ -535,7 +535,7 @@ hypre_VectorToParVector ( MPI_Comm comm,
|
||||
global_vecstride = hypre_VectorVectorStride(v);
|
||||
}
|
||||
|
||||
hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_INT, 0, comm);
|
||||
hypre_MPI_Bcast(&global_size, 1, HYPRE_MPI_BIG_INT, 0, comm);
|
||||
hypre_MPI_Bcast(&num_vectors, 1, HYPRE_MPI_INT, 0, comm);
|
||||
hypre_MPI_Bcast(&global_vecstride, 1, HYPRE_MPI_INT, 0, comm);
|
||||
|
||||
|
||||
@ -108,8 +108,8 @@ void hypre_ParChordMatrix_RowStarts(
|
||||
|
||||
/* Global number of columns */
|
||||
/* hypre_MPI_Allreduce( &num_rdofs, global_num_cols, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm );*/
|
||||
hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MIN, comm );
|
||||
hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm );
|
||||
hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_BIG_INT, hypre_MPI_MIN, comm );
|
||||
hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_BIG_INT, hypre_MPI_MAX, comm );
|
||||
*global_num_cols = global_max_rdof - global_min_rdof;
|
||||
}
|
||||
|
||||
|
||||
@ -91,7 +91,7 @@ using hypre_DeviceItem = void*;
|
||||
#endif
|
||||
|
||||
#define CUSPARSE_NEWAPI_VERSION 11000
|
||||
#define CUSPARSE_NEWSPMM_VERSION 11201
|
||||
#define CUSPARSE_NEWSPMM_VERSION 11401
|
||||
#define CUDA_MALLOCASYNC_VERSION 11020
|
||||
#define THRUST_CALL_BLOCKING 1
|
||||
|
||||
@ -2793,4 +2793,3 @@ struct hypre_cub_CachingDeviceAllocator
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -39,7 +39,7 @@ using hypre_DeviceItem = void*;
|
||||
#endif
|
||||
|
||||
#define CUSPARSE_NEWAPI_VERSION 11000
|
||||
#define CUSPARSE_NEWSPMM_VERSION 11201
|
||||
#define CUSPARSE_NEWSPMM_VERSION 11401
|
||||
#define CUDA_MALLOCASYNC_VERSION 11020
|
||||
#define THRUST_CALL_BLOCKING 1
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user