Fix colmap (#328)

When building the colmap array for prolongation operators that contain a high number of nonzero coefficients living in P_offd, see hypre_build_interp_colmap, the code will make P_offd_size * num_cols_P_offd comparisons, and this can be very high depending on the test case. This PR eliminates such code path and uses a hash table to accomplish the same purpose, thus lowering the algorithmic complexity.
This commit is contained in:
Victor A. Paludetto Magri 2021-04-20 10:28:49 -07:00 committed by GitHub
parent c5110654ce
commit 521ef12ff1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 84 additions and 126 deletions

View File

@ -102,26 +102,26 @@ HYPRE_Int hypre_alt_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
return hypre_error_flag;
}
HYPRE_Int hypre_big_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
HYPRE_Int hypre_big_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
hypre_ParCSRCommPkg *extend_comm_pkg,
HYPRE_Int *IN_marker,
HYPRE_Int *IN_marker,
HYPRE_Int full_off_procNodes,
HYPRE_BigInt offset,
HYPRE_BigInt *OUT_marker)
{
{
hypre_ParCSRCommHandle *comm_handle;
HYPRE_Int i, index, shift;
HYPRE_Int num_sends, num_recvs;
HYPRE_Int *recv_vec_starts;
HYPRE_Int e_num_sends;
HYPRE_BigInt *int_buf_data;
HYPRE_BigInt *e_out_marker;
num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
@ -137,29 +137,29 @@ HYPRE_Int hypre_big_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
/* orig commpkg data*/
index = 0;
HYPRE_Int begin = hypre_ParCSRCommPkgSendMapStart(comm_pkg, 0);
HYPRE_Int end = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for HYPRE_SMP_SCHEDULE
#endif
for (i = begin; i < end; ++i) {
int_buf_data[i - begin] = offset +
int_buf_data[i - begin] = offset +
(HYPRE_BigInt) IN_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, i)];
}
comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, int_buf_data,
comm_handle = hypre_ParCSRCommHandleCreate( 21, comm_pkg, int_buf_data,
OUT_marker);
hypre_ParCSRCommHandleDestroy(comm_handle);
comm_handle = NULL;
/* now do the extend commpkg */
/* first we need to shift our position in the OUT_marker */
shift = recv_vec_starts[num_recvs];
e_out_marker = OUT_marker + shift;
index = 0;
begin = hypre_ParCSRCommPkgSendMapStart(extend_comm_pkg, 0);
@ -171,17 +171,17 @@ HYPRE_Int hypre_big_insert_new_nodes(hypre_ParCSRCommPkg *comm_pkg,
int_buf_data[i - begin] = offset +
(HYPRE_BigInt) IN_marker[hypre_ParCSRCommPkgSendMapElmt(extend_comm_pkg, i)];
}
comm_handle = hypre_ParCSRCommHandleCreate( 21, extend_comm_pkg, int_buf_data,
comm_handle = hypre_ParCSRCommHandleCreate( 21, extend_comm_pkg, int_buf_data,
e_out_marker);
hypre_ParCSRCommHandleDestroy(comm_handle);
comm_handle = NULL;
hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST);
return hypre_error_flag;
}
}
/* sort for non-ordered arrays */
HYPRE_Int hypre_ssort(HYPRE_BigInt *data, HYPRE_Int n)
@ -227,7 +227,7 @@ void hypre_swap_int(HYPRE_BigInt *data, HYPRE_Int a, HYPRE_Int b)
}
/* Initialize CF_marker_offd, CF_marker, P_marker, P_marker_offd, tmp */
void hypre_initialize_vecs(HYPRE_Int diag_n, HYPRE_Int offd_n, HYPRE_Int *diag_ftc, HYPRE_BigInt *offd_ftc,
void hypre_initialize_vecs(HYPRE_Int diag_n, HYPRE_Int offd_n, HYPRE_Int *diag_ftc, HYPRE_BigInt *offd_ftc,
HYPRE_Int *diag_pm, HYPRE_Int *offd_pm, HYPRE_Int *tmp_CF)
{
HYPRE_Int i;
@ -289,9 +289,9 @@ void hypre_initialize_vecs(HYPRE_Int diag_n, HYPRE_Int offd_n, HYPRE_Int *diag_f
/* Find nodes that are offd and are not contained in original offd
* (neighbors of neighbors) */
static HYPRE_Int hypre_new_offd_nodes(HYPRE_BigInt **found, HYPRE_Int num_cols_A_offd,
HYPRE_Int *A_ext_i, HYPRE_BigInt *A_ext_j,
HYPRE_Int num_cols_S_offd, HYPRE_BigInt *col_map_offd, HYPRE_BigInt col_1,
static HYPRE_Int hypre_new_offd_nodes(HYPRE_BigInt **found, HYPRE_Int num_cols_A_offd,
HYPRE_Int *A_ext_i, HYPRE_BigInt *A_ext_j,
HYPRE_Int num_cols_S_offd, HYPRE_BigInt *col_map_offd, HYPRE_BigInt col_1,
HYPRE_BigInt col_n, HYPRE_Int *Sop_i, HYPRE_BigInt *Sop_j,
HYPRE_Int *CF_marker_offd)
{
@ -674,147 +674,105 @@ void hypre_build_interp_colmap(hypre_ParCSRMatrix *P, HYPRE_Int full_off_procNod
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_RENUMBER_COLIDX] -= hypre_MPI_Wtime();
#endif
HYPRE_Int n_fine = hypre_CSRMatrixNumRows(P->diag);
HYPRE_Int i, index;
HYPRE_Int n_fine = hypre_CSRMatrixNumRows(P->diag);
HYPRE_Int P_offd_size = P->offd->i[n_fine];
HYPRE_Int *P_offd_j = P->offd->j;
HYPRE_Int P_offd_size = P->offd->i[n_fine];
HYPRE_Int *P_offd_j = P->offd->j;
HYPRE_BigInt *col_map_offd_P = NULL;
HYPRE_Int *P_marker = NULL;
HYPRE_Int *P_marker = NULL;
HYPRE_Int prefix_sum_workspace[hypre_NumThreads() + 1];
HYPRE_Int num_cols_P_offd = 0;
HYPRE_Int i, index;
if (full_off_procNodes)
{
P_marker = hypre_TAlloc(HYPRE_Int, full_off_procNodes, HYPRE_MEMORY_HOST);
}
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
for (i=0; i < full_off_procNodes; i++)
P_marker[i] = 0;
#ifdef HYPRE_CONCURRENT_HOPSCOTCH
for (i = 0; i < full_off_procNodes; i++)
{
P_marker[i] = 0;
}
/* These two loops set P_marker[i] to 1 if it appears in P_offd_j and if
* tmp_CF_marker_offd has i marked. num_cols_P_offd is then set to the
* total number of times P_marker is set */
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i,index) HYPRE_SMP_SCHEDULE
for (i=0; i < P_offd_size; i++)
#endif
for (i = 0; i < P_offd_size; i++)
{
index = P_offd_j[i];
if(tmp_CF_marker_offd[index] >= 0)
{ P_marker[index] = 1; }
index = P_offd_j[i];
if (tmp_CF_marker_offd[index] >= 0)
{
P_marker[index] = 1;
}
}
HYPRE_Int prefix_sum_workspace[hypre_NumThreads() + 1];
HYPRE_Int num_cols_P_offd = 0;
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel private(i)
#endif
{
HYPRE_Int i_begin, i_end;
hypre_GetSimpleThreadPartition(&i_begin, &i_end, full_off_procNodes);
HYPRE_Int i_begin, i_end;
hypre_GetSimpleThreadPartition(&i_begin, &i_end, full_off_procNodes);
HYPRE_Int local_num_cols_P_offd = 0;
for (i = i_begin; i < i_end; i++)
{
if (P_marker[i] == 1) local_num_cols_P_offd++;
}
HYPRE_Int local_num_cols_P_offd = 0;
for (i = i_begin; i < i_end; i++)
{
if (P_marker[i] == 1) local_num_cols_P_offd++;
}
hypre_prefix_sum(&local_num_cols_P_offd, &num_cols_P_offd, prefix_sum_workspace);
hypre_prefix_sum(&local_num_cols_P_offd, &num_cols_P_offd, prefix_sum_workspace);
#ifdef HYPRE_USING_OPENMP
#pragma omp master
{
if (num_cols_P_offd)
col_map_offd_P = hypre_TAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
}
#endif
{
if (num_cols_P_offd)
{
col_map_offd_P = hypre_TAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
}
}
#ifdef HYPRE_USING_OPENMP
#pragma omp barrier
#endif
for (i = i_begin; i < i_end; i++)
{
if (P_marker[i] == 1)
{
col_map_offd_P[local_num_cols_P_offd++] = fine_to_coarse_offd[i];
}
}
for (i = i_begin; i < i_end; i++)
{
if (P_marker[i] == 1)
{
col_map_offd_P[local_num_cols_P_offd++] = fine_to_coarse_offd[i];
}
}
}
hypre_UnorderedBigIntMap col_map_offd_P_inverse;
hypre_big_sort_and_create_inverse_map(col_map_offd_P, num_cols_P_offd, &col_map_offd_P, &col_map_offd_P_inverse);
// find old idx -> new idx map
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for
#endif
for (i = 0; i < full_off_procNodes; i++)
P_marker[i] = hypre_UnorderedBigIntMapGet(&col_map_offd_P_inverse, fine_to_coarse_offd[i]);
{
P_marker[i] = hypre_UnorderedBigIntMapGet(&col_map_offd_P_inverse, fine_to_coarse_offd[i]);
}
if (num_cols_P_offd)
{
hypre_UnorderedBigIntMapDestroy(&col_map_offd_P_inverse);
hypre_UnorderedBigIntMapDestroy(&col_map_offd_P_inverse);
}
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for
#endif
for(i = 0; i < P_offd_size; i++)
P_offd_j[i] = P_marker[P_offd_j[i]];
#else /* HYPRE_CONCURRENT_HOPSCOTCH */
HYPRE_Int num_cols_P_offd = 0;
HYPRE_Int j;
for (i=0; i < P_offd_size; i++)
{
index = P_offd_j[i];
if (!P_marker[index])
{
if(tmp_CF_marker_offd[index] >= 0)
{
num_cols_P_offd++;
P_marker[index] = 1;
}
}
}
if (num_cols_P_offd)
{
HYPRE_Int *tmp_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
HYPRE_BigInt *tmp_marker = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
col_map_offd_P = hypre_CTAlloc(HYPRE_BigInt, num_cols_P_offd, HYPRE_MEMORY_HOST);
index = 0;
for(i = 0; i < num_cols_P_offd; i++)
{
while( P_marker[index] == 0) index++;
tmp_map_offd[i] = index++;
}
for(i = 0; i < P_offd_size; i++)
P_offd_j[i] = hypre_BinarySearch(tmp_map_offd,
P_offd_j[i],
num_cols_P_offd);
index = 0;
for(i = 0; i < num_cols_P_offd; i++)
{
while (P_marker[index] == 0) index++;
col_map_offd_P[i] = fine_to_coarse_offd[index];
index++;
}
/* Sort the col_map_offd_P and P_offd_j correctly */
for(i = 0; i < num_cols_P_offd; i++)
tmp_marker[i] = col_map_offd_P[i];
/* Check if sort actually changed anything */
if(hypre_ssort(col_map_offd_P,num_cols_P_offd))
{
for(i = 0; i < P_offd_size; i++)
for(j = 0; j < num_cols_P_offd; j++)
if(tmp_marker[P_offd_j[i]] == col_map_offd_P[j])
{
P_offd_j[i] = j;
j = num_cols_P_offd;
}
}
hypre_TFree(tmp_marker, HYPRE_MEMORY_HOST);
hypre_TFree(tmp_map_offd, HYPRE_MEMORY_HOST);
}
#endif /* HYPRE_CONCURRENT_HOPSCOTCH */
{
P_offd_j[i] = P_marker[P_offd_j[i]];
}
hypre_TFree(P_marker, HYPRE_MEMORY_HOST);

View File

@ -613,7 +613,7 @@ void hypre_sort_and_create_inverse_map(HYPRE_Int *in, HYPRE_Int len, HYPRE_Int *
hypre_merge_sort(in, temp, len, out);
hypre_UnorderedIntMapCreate(inverse_map, 2*len, 16*hypre_NumThreads());
HYPRE_Int i;
#ifdef HYPRE_USING_OPENMP
#ifdef HYPRE_CONCURRENT_HOPSCOTCH
#pragma omp parallel for HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < len; i++)
@ -762,7 +762,7 @@ void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPR
hypre_big_merge_sort(in, temp, len, out);
hypre_UnorderedBigIntMapCreate(inverse_map, 2*len, 16*hypre_NumThreads());
HYPRE_Int i;
#ifdef HYPRE_USING_OPENMP
#ifdef HYPRE_CONCURRENT_HOPSCOTCH
#pragma omp parallel for HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < len; i++)