Fix colmap (#328)
When building the colmap array for prolongation operators that contain a high number of nonzero coefficients living in P_offd, see hypre_build_interp_colmap, the code will make P_offd_size * num_cols_P_offd comparisons, and this can be very high depending on the test case. This PR eliminates such code path and uses a hash table to accomplish the same purpose, thus lowering the algorithmic complexity.
This commit is contained in:
parent
c5110654ce
commit
521ef12ff1
@ -102,26 +102,26 @@ HYPRE_Int hypre_alt_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
HYPRE_Int hypre_big_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
|
||||
HYPRE_Int hypre_big_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
|
||||
hypre_ParCSRCommPkg *extend_comm_pkg,
|
||||
HYPRE_Int *IN_marker,
|
||||
HYPRE_Int *IN_marker,
|
||||
HYPRE_Int full_off_procNodes,
|
||||
HYPRE_BigInt offset,
|
||||
HYPRE_BigInt *OUT_marker)
|
||||
{
|
||||
{
|
||||
hypre_ParCSRCommHandle *comm_handle;
|
||||
|
||||
HYPRE_Int i, index, shift;
|
||||
|
||||
HYPRE_Int num_sends, num_recvs;
|
||||
|
||||
|
||||
HYPRE_Int *recv_vec_starts;
|
||||
|
||||
HYPRE_Int e_num_sends;
|
||||
|
||||
HYPRE_BigInt *int_buf_data;
|
||||
HYPRE_BigInt *e_out_marker;
|
||||
|
||||
|
||||
|
||||
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
|
||||
num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
|
||||
@ -137,29 +137,29 @@ HYPRE_Int hypre_big_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
|
||||
|
||||
/* orig commpkg data*/
|
||||
index = 0;
|
||||
|
||||
|
||||
HYPRE_Int begin = hypre_ParCSRCommPkgSendMapStart(comm_pkg, 0);
|
||||
HYPRE_Int end = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for HYPRE_SMP_SCHEDULE
|
||||
#endif
|
||||
for (i = begin; i < end; ++i) {
|
||||
int_buf_data[i - begin] = offset +
|
||||
int_buf_data[i - begin] = offset +
|
||||
(HYPRE_BigInt) IN_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, i)];
|
||||
}
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, int_buf_data,
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, int_buf_data,
|
||||
OUT_marker);
|
||||
|
||||
|
||||
hypre_ParCSRCommHandleDestroy(comm_handle);
|
||||
comm_handle = NULL;
|
||||
|
||||
|
||||
/* now do the extend commpkg */
|
||||
|
||||
/* first we need to shift our position in the OUT_marker */
|
||||
shift = recv_vec_starts[num_recvs];
|
||||
e_out_marker = OUT_marker + shift;
|
||||
|
||||
|
||||
index = 0;
|
||||
|
||||
begin = hypre_ParCSRCommPkgSendMapStart(extend_comm_pkg, 0);
|
||||
@ -171,17 +171,17 @@ HYPRE_Int hypre_big_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
|
||||
int_buf_data[i - begin] = offset +
|
||||
(HYPRE_BigInt) IN_marker[hypre_ParCSRCommPkgSendMapElmt(extend_comm_pkg, i)];
|
||||
}
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate( 21, extend_comm_pkg, int_buf_data,
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate( 21, extend_comm_pkg, int_buf_data,
|
||||
e_out_marker);
|
||||
|
||||
|
||||
hypre_ParCSRCommHandleDestroy(comm_handle);
|
||||
comm_handle = NULL;
|
||||
|
||||
|
||||
hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
|
||||
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
}
|
||||
|
||||
/* sort for non-ordered arrays */
|
||||
HYPRE_Int hypre_ssort(HYPRE_BigInt *data, HYPRE_Int n)
|
||||
@ -227,7 +227,7 @@ void hypre_swap_int(HYPRE_BigInt *data, HYPRE_Int a, HYPRE_Int b)
|
||||
}
|
||||
|
||||
/* Initialize CF_marker_offd, CF_marker, P_marker, P_marker_offd, tmp */
|
||||
void hypre_initialize_vecs(HYPRE_Int diag_n, HYPRE_Int offd_n, HYPRE_Int *diag_ftc, HYPRE_BigInt *offd_ftc,
|
||||
void hypre_initialize_vecs(HYPRE_Int diag_n, HYPRE_Int offd_n, HYPRE_Int *diag_ftc, HYPRE_BigInt *offd_ftc,
|
||||
HYPRE_Int *diag_pm, HYPRE_Int *offd_pm, HYPRE_Int *tmp_CF)
|
||||
{
|
||||
HYPRE_Int i;
|
||||
@ -289,9 +289,9 @@ void hypre_initialize_vecs(HYPRE_Int diag_n, HYPRE_Int offd_n, HYPRE_Int *diag_f
|
||||
|
||||
/* Find nodes that are offd and are not contained in original offd
|
||||
* (neighbors of neighbors) */
|
||||
static HYPRE_Int hypre_new_offd_nodes(HYPRE_BigInt **found, HYPRE_Int num_cols_A_offd,
|
||||
HYPRE_Int *A_ext_i, HYPRE_BigInt *A_ext_j,
|
||||
HYPRE_Int num_cols_S_offd, HYPRE_BigInt *col_map_offd, HYPRE_BigInt col_1,
|
||||
static HYPRE_Int hypre_new_offd_nodes(HYPRE_BigInt **found, HYPRE_Int num_cols_A_offd,
|
||||
HYPRE_Int *A_ext_i, HYPRE_BigInt *A_ext_j,
|
||||
HYPRE_Int num_cols_S_offd, HYPRE_BigInt *col_map_offd, HYPRE_BigInt col_1,
|
||||
HYPRE_BigInt col_n, HYPRE_Int *Sop_i, HYPRE_BigInt *Sop_j,
|
||||
HYPRE_Int *CF_marker_offd)
|
||||
{
|
||||
@ -674,147 +674,105 @@ void hypre_build_interp_colmap(hypre_ParCSRMatrix *P, HYPRE_Int full_off_procNod
|
||||
#ifdef HYPRE_PROFILE
|
||||
hypre_profile_times[HYPRE_TIMER_ID_RENUMBER_COLIDX] -= hypre_MPI_Wtime();
|
||||
#endif
|
||||
HYPRE_Int n_fine = hypre_CSRMatrixNumRows(P->diag);
|
||||
|
||||
HYPRE_Int i, index;
|
||||
|
||||
HYPRE_Int n_fine = hypre_CSRMatrixNumRows(P->diag);
|
||||
|
||||
HYPRE_Int P_offd_size = P->offd->i[n_fine];
|
||||
HYPRE_Int *P_offd_j = P->offd->j;
|
||||
HYPRE_Int P_offd_size = P->offd->i[n_fine];
|
||||
HYPRE_Int *P_offd_j = P->offd->j;
|
||||
HYPRE_BigInt *col_map_offd_P = NULL;
|
||||
|
||||
HYPRE_Int *P_marker = NULL;
|
||||
HYPRE_Int *P_marker = NULL;
|
||||
HYPRE_Int prefix_sum_workspace[hypre_NumThreads() + 1];
|
||||
HYPRE_Int num_cols_P_offd = 0;
|
||||
HYPRE_Int i, index;
|
||||
|
||||
if (full_off_procNodes)
|
||||
{
|
||||
P_marker = hypre_TAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
|
||||
}
|
||||
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
|
||||
#endif
|
||||
for (i=0; i < full_off_procNodes; i++)
|
||||
P_marker[i] = 0;
|
||||
|
||||
#ifdef HYPRE_CONCURRENT_HOPSCOTCH
|
||||
for (i = 0; i < full_off_procNodes; i++)
|
||||
{
|
||||
P_marker[i] = 0;
|
||||
}
|
||||
|
||||
/* These two loops set P_marker[i] to 1 if it appears in P_offd_j and if
|
||||
* tmp_CF_marker_offd has i marked. num_cols_P_offd is then set to the
|
||||
* total number of times P_marker is set */
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for private(i,index) HYPRE_SMP_SCHEDULE
|
||||
for (i=0; i < P_offd_size; i++)
|
||||
#endif
|
||||
for (i = 0; i < P_offd_size; i++)
|
||||
{
|
||||
index = P_offd_j[i];
|
||||
if(tmp_CF_marker_offd[index] >= 0)
|
||||
{ P_marker[index] = 1; }
|
||||
index = P_offd_j[i];
|
||||
if (tmp_CF_marker_offd[index] >= 0)
|
||||
{
|
||||
P_marker[index] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
HYPRE_Int prefix_sum_workspace[hypre_NumThreads() + 1];
|
||||
HYPRE_Int num_cols_P_offd = 0;
|
||||
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel private(i)
|
||||
#endif
|
||||
{
|
||||
HYPRE_Int i_begin, i_end;
|
||||
hypre_GetSimpleThreadPartition(&i_begin, &i_end, full_off_procNodes);
|
||||
HYPRE_Int i_begin, i_end;
|
||||
hypre_GetSimpleThreadPartition(&i_begin, &i_end, full_off_procNodes);
|
||||
|
||||
HYPRE_Int local_num_cols_P_offd = 0;
|
||||
for (i = i_begin; i < i_end; i++)
|
||||
{
|
||||
if (P_marker[i] == 1) local_num_cols_P_offd++;
|
||||
}
|
||||
HYPRE_Int local_num_cols_P_offd = 0;
|
||||
for (i = i_begin; i < i_end; i++)
|
||||
{
|
||||
if (P_marker[i] == 1) local_num_cols_P_offd++;
|
||||
}
|
||||
|
||||
hypre_prefix_sum(&local_num_cols_P_offd, &num_cols_P_offd, prefix_sum_workspace);
|
||||
hypre_prefix_sum(&local_num_cols_P_offd, &num_cols_P_offd, prefix_sum_workspace);
|
||||
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp master
|
||||
{
|
||||
if (num_cols_P_offd)
|
||||
col_map_offd_P = hypre_TAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
|
||||
}
|
||||
#endif
|
||||
{
|
||||
if (num_cols_P_offd)
|
||||
{
|
||||
col_map_offd_P = hypre_TAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
|
||||
}
|
||||
}
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp barrier
|
||||
#endif
|
||||
|
||||
for (i = i_begin; i < i_end; i++)
|
||||
{
|
||||
if (P_marker[i] == 1)
|
||||
{
|
||||
col_map_offd_P[local_num_cols_P_offd++] = fine_to_coarse_offd[i];
|
||||
}
|
||||
}
|
||||
for (i = i_begin; i < i_end; i++)
|
||||
{
|
||||
if (P_marker[i] == 1)
|
||||
{
|
||||
col_map_offd_P[local_num_cols_P_offd++] = fine_to_coarse_offd[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hypre_UnorderedBigIntMap col_map_offd_P_inverse;
|
||||
hypre_big_sort_and_create_inverse_map(col_map_offd_P, num_cols_P_offd, &col_map_offd_P, &col_map_offd_P_inverse);
|
||||
|
||||
// find old idx -> new idx map
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (i = 0; i < full_off_procNodes; i++)
|
||||
P_marker[i] = hypre_UnorderedBigIntMapGet(&col_map_offd_P_inverse, fine_to_coarse_offd[i]);
|
||||
{
|
||||
P_marker[i] = hypre_UnorderedBigIntMapGet(&col_map_offd_P_inverse, fine_to_coarse_offd[i]);
|
||||
}
|
||||
|
||||
if (num_cols_P_offd)
|
||||
{
|
||||
hypre_UnorderedBigIntMapDestroy(&col_map_offd_P_inverse);
|
||||
hypre_UnorderedBigIntMapDestroy(&col_map_offd_P_inverse);
|
||||
}
|
||||
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(i = 0; i < P_offd_size; i++)
|
||||
P_offd_j[i] = P_marker[P_offd_j[i]];
|
||||
|
||||
#else /* HYPRE_CONCURRENT_HOPSCOTCH */
|
||||
HYPRE_Int num_cols_P_offd = 0;
|
||||
HYPRE_Int j;
|
||||
for (i=0; i < P_offd_size; i++)
|
||||
{
|
||||
index = P_offd_j[i];
|
||||
if (!P_marker[index])
|
||||
{
|
||||
if(tmp_CF_marker_offd[index] >= 0)
|
||||
{
|
||||
num_cols_P_offd++;
|
||||
P_marker[index] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (num_cols_P_offd)
|
||||
{
|
||||
HYPRE_Int *tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
|
||||
HYPRE_BigInt *tmp_marker = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
|
||||
col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
|
||||
|
||||
index = 0;
|
||||
for(i = 0; i < num_cols_P_offd; i++)
|
||||
{
|
||||
while( P_marker[index] == 0) index++;
|
||||
tmp_map_offd[i] = index++;
|
||||
}
|
||||
for(i = 0; i < P_offd_size; i++)
|
||||
P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
|
||||
P_offd_j[i],
|
||||
num_cols_P_offd);
|
||||
|
||||
index = 0;
|
||||
for(i = 0; i < num_cols_P_offd; i++)
|
||||
{
|
||||
while (P_marker[index] == 0) index++;
|
||||
col_map_offd_P[i] = fine_to_coarse_offd[index];
|
||||
index++;
|
||||
}
|
||||
|
||||
/* Sort the col_map_offd_P and P_offd_j correctly */
|
||||
for(i = 0; i < num_cols_P_offd; i++)
|
||||
tmp_marker[i] = col_map_offd_P[i];
|
||||
|
||||
/* Check if sort actually changed anything */
|
||||
if(hypre_ssort(col_map_offd_P,num_cols_P_offd))
|
||||
{
|
||||
for(i = 0; i < P_offd_size; i++)
|
||||
for(j = 0; j < num_cols_P_offd; j++)
|
||||
if(tmp_marker[P_offd_j[i]] == col_map_offd_P[j])
|
||||
{
|
||||
P_offd_j[i] = j;
|
||||
j = num_cols_P_offd;
|
||||
}
|
||||
}
|
||||
hypre_TFree(tmp_marker, HYPRE_MEMORY_HOST);
|
||||
hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
|
||||
}
|
||||
#endif /* HYPRE_CONCURRENT_HOPSCOTCH */
|
||||
{
|
||||
P_offd_j[i] = P_marker[P_offd_j[i]];
|
||||
}
|
||||
|
||||
hypre_TFree(P_marker, HYPRE_MEMORY_HOST);
|
||||
|
||||
|
||||
@ -613,7 +613,7 @@ void hypre_sort_and_create_inverse_map(HYPRE_Int *in, HYPRE_Int len, HYPRE_Int *
|
||||
hypre_merge_sort(in, temp, len, out);
|
||||
hypre_UnorderedIntMapCreate(inverse_map, 2*len, 16*hypre_NumThreads());
|
||||
HYPRE_Int i;
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#ifdef HYPRE_CONCURRENT_HOPSCOTCH
|
||||
#pragma omp parallel for HYPRE_SMP_SCHEDULE
|
||||
#endif
|
||||
for (i = 0; i < len; i++)
|
||||
@ -762,7 +762,7 @@ void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPR
|
||||
hypre_big_merge_sort(in, temp, len, out);
|
||||
hypre_UnorderedBigIntMapCreate(inverse_map, 2*len, 16*hypre_NumThreads());
|
||||
HYPRE_Int i;
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#ifdef HYPRE_CONCURRENT_HOPSCOTCH
|
||||
#pragma omp parallel for HYPRE_SMP_SCHEDULE
|
||||
#endif
|
||||
for (i = 0; i < len; i++)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user