/****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. * * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ #include "_hypre_utilities.h" #include "../seq_mv/HYPRE_seq_mv.h" //#define DBG_MERGE_SORT #ifdef DBG_MERGE_SORT #include #include #endif #define SWAP(T, a, b) do { T tmp = a; a = b; b = tmp; } while (0) /*-------------------------------------------------------------------------- * hypre_MergeOrderedArrays: merge two ordered arrays *--------------------------------------------------------------------------*/ HYPRE_Int hypre_MergeOrderedArrays( hypre_IntArray *array1, hypre_IntArray *array2, hypre_IntArray *array3 ) { HYPRE_Int i = 0, j = 0, k = 0; const HYPRE_Int size1 = hypre_IntArraySize(array1); const HYPRE_Int size2 = hypre_IntArraySize(array2); HYPRE_MemoryLocation memory_location = hypre_IntArrayMemoryLocation(array3); HYPRE_Int *array1_data = hypre_IntArrayData(array1); HYPRE_Int *array2_data = hypre_IntArrayData(array2); HYPRE_Int *array3_data = hypre_TAlloc(HYPRE_Int, size1 + size2, memory_location); while (i < size1 && j < size2) { if (array1_data[i] > array2_data[j]) { array3_data[k++] = array2_data[j++]; } else if (array1_data[i] < array2_data[j]) { array3_data[k++] = array1_data[i++]; } else { array3_data[k++] = array1_data[i++]; j++; } } while (i < size1) { array3_data[k++] = array1_data[i++]; } while (j < size2) { array3_data[k++] = array2_data[j++]; } array3_data = hypre_TReAlloc_v2(array3_data, HYPRE_Int, size1 + size2, HYPRE_Int, k, memory_location); hypre_IntArraySize(array3) = k; hypre_IntArrayData(array3) = array3_data; return hypre_error_flag; } /*-------------------------------------------------------------------------- * hypre_union2 * * Union of two sorted (in ascending order) array arr1 and arr2 into arr3 * * Assumptions: * 1) no duplicate entries in arr1 and arr2. But an entry is * allowed to appear in both arr1 and arr2 * 2) arr3 should have enough space on entry * 3) map1 and map2 map arr1 and arr2 to arr3 *--------------------------------------------------------------------------*/ void hypre_union2( HYPRE_Int n1, HYPRE_BigInt *arr1, HYPRE_Int n2, HYPRE_BigInt *arr2, HYPRE_Int *n3, HYPRE_BigInt *arr3, HYPRE_Int *map1, HYPRE_Int *map2 ) { HYPRE_Int i = 0, j = 0, k = 0; while (i < n1 && j < n2) { if (arr1[i] < arr2[j]) { if (map1) { map1[i] = k; } arr3[k++] = arr1[i++]; } else if (arr1[i] > arr2[j]) { if (map2) { map2[j] = k; } arr3[k++] = arr2[j++]; } else /* == */ { if (map1) { map1[i] = k; } if (map2) { map2[j] = k; } arr3[k++] = arr1[i++]; j++; } } while (i < n1) { if (map1) { map1[i] = k; } arr3[k++] = arr1[i++]; } while (j < n2) { if (map2) { map2[j] = k; } arr3[k++] = arr2[j++]; } *n3 = k; } /*-------------------------------------------------------------------------- * hypre_merge *--------------------------------------------------------------------------*/ static void hypre_merge( HYPRE_Int *first1, HYPRE_Int *last1, HYPRE_Int *first2, HYPRE_Int *last2, HYPRE_Int *out ) { for ( ; first1 != last1; ++out) { if (first2 == last2) { for ( ; first1 != last1; ++first1, ++out) { *out = *first1; } return; } if (*first2 < *first1) { *out = *first2; ++first2; } else { *out = *first1; ++first1; } } for ( ; first2 != last2; ++first2, ++out) { *out = *first2; } } /*-------------------------------------------------------------------------- * hypre_big_merge *--------------------------------------------------------------------------*/ static void hypre_big_merge( HYPRE_BigInt *first1, HYPRE_BigInt *last1, HYPRE_BigInt *first2, HYPRE_BigInt *last2, HYPRE_BigInt *out ) { for ( ; first1 != last1; ++out) { if (first2 == last2) { for ( ; first1 != last1; ++first1, ++out) { *out = *first1; } return; } if (*first2 < *first1) { *out = *first2; ++first2; } else { *out = *first1; ++first1; } } for ( ; first2 != last2; ++first2, ++out) { *out = *first2; } } /*-------------------------------------------------------------------------- * kth_element_ *--------------------------------------------------------------------------*/ static void kth_element_( HYPRE_Int *out1, HYPRE_Int *out2, HYPRE_Int *a1, HYPRE_Int *a2, HYPRE_Int left, HYPRE_Int right, HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k) { while (1) { HYPRE_Int i = (left + right) / 2; // right < k -> i < k HYPRE_Int j = k - i - 1; #ifdef DBG_MERGE_SORT hypre_assert(left <= right && right <= k); hypre_assert(i < k); // i == k implies left == right == k that can never happen hypre_assert(j >= 0 && j < n2); #endif if ((j == -1 || a1[i] >= a2[j]) && (j == n2 - 1 || a1[i] <= a2[j + 1])) { *out1 = i; *out2 = j + 1; return; } else if (j >= 0 && a2[j] >= a1[i] && (i == n1 - 1 || a2[j] <= a1[i + 1])) { *out1 = i + 1; *out2 = j; return; } else if (a1[i] > a2[j] && j != n2 - 1 && a1[i] > a2[j + 1]) { // search in left half of a1 right = i - 1; } else { // search in right half of a1 left = i + 1; } } } /** * Partition the input so that * a1[0:*out1) and a2[0:*out2) contain the smallest k elements */ /*-------------------------------------------------------------------------- * kth_element * * Partition the input so that * a1[0:*out1) and a2[0:*out2) contain the smallest k elements *--------------------------------------------------------------------------*/ static void kth_element( HYPRE_Int *out1, HYPRE_Int *out2, HYPRE_Int *a1, HYPRE_Int *a2, HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k) { // either of the inputs is empty if (n1 == 0) { *out1 = 0; *out2 = k; return; } if (n2 == 0) { *out1 = k; *out2 = 0; return; } if (k >= n1 + n2) { *out1 = n1; *out2 = n2; return; } // one is greater than the other if (k < n1 && a1[k] <= a2[0]) { *out1 = k; *out2 = 0; return; } if (k - n1 >= 0 && a2[k - n1] >= a1[n1 - 1]) { *out1 = n1; *out2 = k - n1; return; } if (k < n2 && a2[k] <= a1[0]) { *out1 = 0; *out2 = k; return; } if (k - n2 >= 0 && a1[k - n2] >= a2[n2 - 1]) { *out1 = k - n2; *out2 = n2; return; } // now k > 0 // faster to do binary search on the shorter sequence if (n1 > n2) { SWAP(HYPRE_Int, n1, n2); SWAP(HYPRE_Int *, a1, a2); SWAP(HYPRE_Int *, out1, out2); } if (k < (n1 + n2) / 2) { kth_element_(out1, out2, a1, a2, 0, hypre_min(n1 - 1, k), n1, n2, k); } else { // when k is big, faster to find (n1 + n2 - k)th biggest element HYPRE_Int offset1 = hypre_max(k - n2, 0), offset2 = hypre_max(k - n1, 0); HYPRE_Int new_k = k - offset1 - offset2; HYPRE_Int new_n1 = hypre_min(n1 - offset1, new_k + 1); HYPRE_Int new_n2 = hypre_min(n2 - offset2, new_k + 1); kth_element_(out1, out2, a1 + offset1, a2 + offset2, 0, new_n1 - 1, new_n1, new_n2, new_k); *out1 += offset1; *out2 += offset2; } #ifdef DBG_MERGE_SORT hypre_assert(*out1 + *out2 == k); #endif } /*-------------------------------------------------------------------------- * big_kth_element_ *--------------------------------------------------------------------------*/ static void big_kth_element_( HYPRE_Int *out1, HYPRE_Int *out2, HYPRE_BigInt *a1, HYPRE_BigInt *a2, HYPRE_Int left, HYPRE_Int right, HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k) { while (1) { HYPRE_Int i = (left + right) / 2; // right < k -> i < k HYPRE_Int j = k - i - 1; #ifdef DBG_MERGE_SORT hypre_assert(left <= right && right <= k); hypre_assert(i < k); // i == k implies left == right == k that can never happen hypre_assert(j >= 0 && j < n2); #endif if ((j == -1 || a1[i] >= a2[j]) && (j == n2 - 1 || a1[i] <= a2[j + 1])) { *out1 = i; *out2 = j + 1; return; } else if (j >= 0 && a2[j] >= a1[i] && (i == n1 - 1 || a2[j] <= a1[i + 1])) { *out1 = i + 1; *out2 = j; return; } else if (a1[i] > a2[j] && j != n2 - 1 && a1[i] > a2[j + 1]) { // search in left half of a1 right = i - 1; } else { // search in right half of a1 left = i + 1; } } } /*-------------------------------------------------------------------------- * big_kth_element * * Partition the input so that * a1[0:*out1) and a2[0:*out2) contain the smallest k elements *--------------------------------------------------------------------------*/ static void big_kth_element( HYPRE_Int *out1, HYPRE_Int *out2, HYPRE_BigInt *a1, HYPRE_BigInt *a2, HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k) { // either of the inputs is empty if (n1 == 0) { *out1 = 0; *out2 = k; return; } if (n2 == 0) { *out1 = k; *out2 = 0; return; } if (k >= n1 + n2) { *out1 = n1; *out2 = n2; return; } // one is greater than the other if (k < n1 && a1[k] <= a2[0]) { *out1 = k; *out2 = 0; return; } if (k - n1 >= 0 && a2[k - n1] >= a1[n1 - 1]) { *out1 = n1; *out2 = k - n1; return; } if (k < n2 && a2[k] <= a1[0]) { *out1 = 0; *out2 = k; return; } if (k - n2 >= 0 && a1[k - n2] >= a2[n2 - 1]) { *out1 = k - n2; *out2 = n2; return; } // now k > 0 // faster to do binary search on the shorter sequence if (n1 > n2) { SWAP(HYPRE_Int, n1, n2); SWAP(HYPRE_BigInt *, a1, a2); SWAP(HYPRE_Int *, out1, out2); } if (k < (n1 + n2) / 2) { big_kth_element_(out1, out2, a1, a2, 0, hypre_min(n1 - 1, k), n1, n2, k); } else { // when k is big, faster to find (n1 + n2 - k)th biggest element HYPRE_Int offset1 = hypre_max(k - n2, 0), offset2 = hypre_max(k - n1, 0); HYPRE_Int new_k = k - offset1 - offset2; HYPRE_Int new_n1 = hypre_min(n1 - offset1, new_k + 1); HYPRE_Int new_n2 = hypre_min(n2 - offset2, new_k + 1); big_kth_element_(out1, out2, a1 + (HYPRE_BigInt)offset1, a2 + (HYPRE_BigInt)offset2, 0, new_n1 - 1, new_n1, new_n2, new_k); *out1 += offset1; *out2 += offset2; } #ifdef DBG_MERGE_SORT hypre_assert(*out1 + *out2 == k); #endif } /*-------------------------------------------------------------------------- * hypre_parallel_merge * * @param num_threads number of threads that participate in this merge * @param my_thread_num thread id (zer0-based) among the threads that * participate in this merge *--------------------------------------------------------------------------*/ static void hypre_parallel_merge( HYPRE_Int *first1, HYPRE_Int *last1, HYPRE_Int *first2, HYPRE_Int *last2, HYPRE_Int *out, HYPRE_Int num_threads, HYPRE_Int my_thread_num ) { HYPRE_Int n1 = last1 - first1; HYPRE_Int n2 = last2 - first2; HYPRE_Int n = n1 + n2; HYPRE_Int n_per_thread = (n + num_threads - 1) / num_threads; HYPRE_Int begin_rank = hypre_min(n_per_thread * my_thread_num, n); HYPRE_Int end_rank = hypre_min(begin_rank + n_per_thread, n); #ifdef DBG_MERGE_SORT hypre_assert(std::is_sorted(first1, last1)); hypre_assert(std::is_sorted(first2, last2)); #endif HYPRE_Int begin1, begin2, end1, end2; kth_element(&begin1, &begin2, first1, first2, n1, n2, begin_rank); kth_element(&end1, &end2, first1, first2, n1, n2, end_rank); while (begin1 > end1 && begin1 > 0 && begin2 < n2 && first1[begin1 - 1] == first2[begin2]) { #ifdef DBG_MERGE_SORT printf("%s:%d\n", __FILE__, __LINE__); #endif begin1--; begin2++; } while (begin2 > end2 && end1 > 0 && end2 < n2 && first1[end1 - 1] == first2[end2]) { #ifdef DBG_MERGE_SORT printf("%s:%d\n", __FILE__, __LINE__); #endif end1--; end2++; } #ifdef DBG_MERGE_SORT hypre_assert(begin1 <= end1); hypre_assert(begin2 <= end2); #endif hypre_merge( first1 + begin1, first1 + end1, first2 + begin2, first2 + end2, out + begin1 + begin2); #ifdef DBG_MERGE_SORT hypre_assert(std::is_sorted(out + begin1 + begin2, out + end1 + end2)); #endif } /*-------------------------------------------------------------------------- * hypre_big_parallel_merge * * @param num_threads number of threads that participate in this merge * @param my_thread_num thread id (zero-based) among the threads that * participate in this merge *--------------------------------------------------------------------------*/ static void hypre_big_parallel_merge( HYPRE_BigInt *first1, HYPRE_BigInt *last1, HYPRE_BigInt *first2, HYPRE_BigInt *last2, HYPRE_BigInt *out, HYPRE_Int num_threads, HYPRE_Int my_thread_num) { HYPRE_Int n1 = (HYPRE_Int)(last1 - first1); HYPRE_Int n2 = (HYPRE_Int)(last2 - first2); HYPRE_Int n = n1 + n2; HYPRE_Int n_per_thread = (n + num_threads - 1) / num_threads; HYPRE_Int begin_rank = hypre_min(n_per_thread * my_thread_num, n); HYPRE_Int end_rank = hypre_min(begin_rank + n_per_thread, n); #ifdef DBG_MERGE_SORT hypre_assert(std::is_sorted(first1, last1)); hypre_assert(std::is_sorted(first2, last2)); #endif HYPRE_Int begin1, begin2, end1, end2; big_kth_element(&begin1, &begin2, first1, first2, n1, n2, begin_rank); big_kth_element(&end1, &end2, first1, first2, n1, n2, end_rank); while (begin1 > end1 && begin1 > 0 && begin2 < n2 && first1[begin1 - 1] == first2[begin2]) { #ifdef DBG_MERGE_SORT printf("%s:%d\n", __FILE__, __LINE__); #endif begin1--; begin2++; } while (begin2 > end2 && end1 > 0 && end2 < n2 && first1[end1 - 1] == first2[end2]) { #ifdef DBG_MERGE_SORT printf("%s:%d\n", __FILE__, __LINE__); #endif end1--; end2++; } #ifdef DBG_MERGE_SORT hypre_assert(begin1 <= end1); hypre_assert(begin2 <= end2); #endif hypre_big_merge( first1 + (HYPRE_BigInt)begin1, first1 + (HYPRE_BigInt)end1, first2 + (HYPRE_BigInt)begin2, first2 + (HYPRE_BigInt)end2, out + (HYPRE_BigInt)(begin1 + begin2)); #ifdef DBG_MERGE_SORT hypre_assert(std::is_sorted(out + begin1 + begin2, out + end1 + end2)); #endif } /*-------------------------------------------------------------------------- * hypre_merge_sort *--------------------------------------------------------------------------*/ void hypre_merge_sort( HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **out ) { if (0 == len) { return; } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime(); #endif #ifdef DBG_MERGE_SORT HYPRE_Int *dbg_buf = new HYPRE_Int[len]; std::copy(in, in + len, dbg_buf); std::sort(dbg_buf, dbg_buf + len); #endif // HYPRE_Int thread_private_len[hypre_NumThreads()]; // HYPRE_Int out_len = 0; #ifdef HYPRE_USING_OPENMP #pragma omp parallel #endif { HYPRE_Int num_threads = hypre_NumActiveThreads(); HYPRE_Int my_thread_num = hypre_GetThreadNum(); // thread-private sort HYPRE_Int i_per_thread = (len + num_threads - 1) / num_threads; HYPRE_Int i_begin = hypre_min(i_per_thread * my_thread_num, len); HYPRE_Int i_end = hypre_min(i_begin + i_per_thread, len); hypre_qsort0(in, i_begin, i_end - 1); // merge sorted sequences HYPRE_Int in_group_size; HYPRE_Int *in_buf = in; HYPRE_Int *out_buf = temp; for (in_group_size = 1; in_group_size < num_threads; in_group_size *= 2) { #ifdef HYPRE_USING_OPENMP #pragma omp barrier #endif // merge 2 in-groups into 1 out-group HYPRE_Int out_group_size = in_group_size * 2; HYPRE_Int group_leader = my_thread_num / out_group_size * out_group_size; // HYPRE_Int group_sub_leader = hypre_min(group_leader + in_group_size, num_threads - 1); HYPRE_Int id_in_group = my_thread_num % out_group_size; HYPRE_Int num_threads_in_group = hypre_min(group_leader + out_group_size, num_threads) - group_leader; HYPRE_Int in_group1_begin = hypre_min(i_per_thread * group_leader, len); HYPRE_Int in_group1_end = hypre_min(in_group1_begin + i_per_thread * in_group_size, len); HYPRE_Int in_group2_begin = hypre_min(in_group1_begin + i_per_thread * in_group_size, len); HYPRE_Int in_group2_end = hypre_min(in_group2_begin + i_per_thread * in_group_size, len); hypre_parallel_merge( in_buf + in_group1_begin, in_buf + in_group1_end, in_buf + in_group2_begin, in_buf + in_group2_end, out_buf + in_group1_begin, num_threads_in_group, id_in_group); HYPRE_Int *temp = in_buf; in_buf = out_buf; out_buf = temp; } *out = in_buf; } /* omp parallel */ #ifdef DBG_MERGE_SORT hypre_assert(std::equal(*out, *out + len, dbg_buf)); delete[] dbg_buf; #endif #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime(); #endif } /*-------------------------------------------------------------------------- * hypre_sort_and_create_inverse_map * * Sort array "in" with length len and put result in array "out" * "in" will be deallocated unless in == *out * inverse_map is an inverse hash table s.t. * inverse_map[i] = j iff (*out)[j] = i *--------------------------------------------------------------------------*/ void hypre_sort_and_create_inverse_map(HYPRE_Int *in, HYPRE_Int len, HYPRE_Int **out, hypre_UnorderedIntMap *inverse_map) { if (len == 0) { return; } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime(); #endif HYPRE_Int *temp = hypre_TAlloc(HYPRE_Int, len, HYPRE_MEMORY_HOST); hypre_merge_sort(in, temp, len, out); hypre_UnorderedIntMapCreate(inverse_map, 2 * len, 16 * hypre_NumThreads()); HYPRE_Int i; #ifdef HYPRE_CONCURRENT_HOPSCOTCH #pragma omp parallel for HYPRE_SMP_SCHEDULE #endif for (i = 0; i < len; i++) { HYPRE_Int old = hypre_UnorderedIntMapPutIfAbsent(inverse_map, (*out)[i], i); hypre_assert(old == HYPRE_HOPSCOTCH_HASH_EMPTY); #ifdef DBG_MERGE_SORT if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i) { fprintf(stderr, "%d %d\n", i, (*out)[i]); hypre_assert(false); } #endif } #ifdef DBG_MERGE_SORT std::unordered_map inverse_map2(len); for (HYPRE_Int i = 0; i < len; ++i) { inverse_map2[(*out)[i]] = i; if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i) { fprintf(stderr, "%d %d\n", i, (*out)[i]); hypre_assert(false); } } hypre_assert(hypre_UnorderedIntMapSize(inverse_map) == len); #endif if (*out == in) { hypre_TFree(temp, HYPRE_MEMORY_HOST); } else { hypre_TFree(in, HYPRE_MEMORY_HOST); } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime(); #endif } /*-------------------------------------------------------------------------- * hypre_big_merge_sort *--------------------------------------------------------------------------*/ void hypre_big_merge_sort(HYPRE_BigInt *in, HYPRE_BigInt *temp, HYPRE_Int len, HYPRE_BigInt **out) { if (0 == len) { return; } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime(); #endif #ifdef DBG_MERGE_SORT HYPRE_Int *dbg_buf = new HYPRE_Int[len]; std::copy(in, in + len, dbg_buf); std::sort(dbg_buf, dbg_buf + len); #endif // HYPRE_Int thread_private_len[hypre_NumThreads()]; // HYPRE_Int out_len = 0; #ifdef HYPRE_USING_OPENMP #pragma omp parallel #endif { HYPRE_Int num_threads = hypre_NumActiveThreads(); HYPRE_Int my_thread_num = hypre_GetThreadNum(); // thread-private sort HYPRE_Int i_per_thread = (len + num_threads - 1) / num_threads; HYPRE_Int i_begin = hypre_min(i_per_thread * my_thread_num, len); HYPRE_Int i_end = hypre_min(i_begin + i_per_thread, len); hypre_BigQsort0(in, i_begin, i_end - 1); // merge sorted sequences HYPRE_Int in_group_size; HYPRE_BigInt *in_buf = in; HYPRE_BigInt *out_buf = temp; for (in_group_size = 1; in_group_size < num_threads; in_group_size *= 2) { #ifdef HYPRE_USING_OPENMP #pragma omp barrier #endif // merge 2 in-groups into 1 out-group HYPRE_Int out_group_size = in_group_size * 2; HYPRE_Int group_leader = my_thread_num / out_group_size * out_group_size; // HYPRE_Int group_sub_leader = hypre_min(group_leader + in_group_size, num_threads - 1); HYPRE_Int id_in_group = my_thread_num % out_group_size; HYPRE_Int num_threads_in_group = hypre_min(group_leader + out_group_size, num_threads) - group_leader; HYPRE_Int in_group1_begin = hypre_min(i_per_thread * group_leader, len); HYPRE_Int in_group1_end = hypre_min(in_group1_begin + i_per_thread * in_group_size, len); HYPRE_Int in_group2_begin = hypre_min(in_group1_begin + i_per_thread * in_group_size, len); HYPRE_Int in_group2_end = hypre_min(in_group2_begin + i_per_thread * in_group_size, len); hypre_big_parallel_merge( in_buf + (HYPRE_BigInt)in_group1_begin, in_buf + (HYPRE_BigInt)in_group1_end, in_buf + (HYPRE_BigInt)in_group2_begin, in_buf + (HYPRE_BigInt)in_group2_end, out_buf + (HYPRE_BigInt)in_group1_begin, num_threads_in_group, id_in_group); HYPRE_BigInt *temp = in_buf; in_buf = out_buf; out_buf = temp; } *out = in_buf; } /* omp parallel */ #ifdef DBG_MERGE_SORT hypre_assert(std::equal(*out, *out + len, dbg_buf)); delete[] dbg_buf; #endif #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime(); #endif } /*-------------------------------------------------------------------------- * hypre_big_sort_and_create_inverse_map *--------------------------------------------------------------------------*/ void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPRE_BigInt **out, hypre_UnorderedBigIntMap *inverse_map) { if (len == 0) { return; } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime(); #endif HYPRE_BigInt *temp = hypre_TAlloc(HYPRE_BigInt, len, HYPRE_MEMORY_HOST); hypre_big_merge_sort(in, temp, len, out); hypre_UnorderedBigIntMapCreate(inverse_map, 2 * len, 16 * hypre_NumThreads()); HYPRE_Int i; #ifdef HYPRE_CONCURRENT_HOPSCOTCH #pragma omp parallel for HYPRE_SMP_SCHEDULE #endif for (i = 0; i < len; i++) { HYPRE_Int old = hypre_UnorderedBigIntMapPutIfAbsent(inverse_map, (*out)[i], i); hypre_assert(old == HYPRE_HOPSCOTCH_HASH_EMPTY); #ifdef DBG_MERGE_SORT if (hypre_UnorderedBigIntMapGet(inverse_map, (*out)[i]) != i) { fprintf(stderr, "%d %d\n", i, (*out)[i]); hypre_assert(false); } #endif } #ifdef DBG_MERGE_SORT std::unordered_map inverse_map2(len); for (HYPRE_Int i = 0; i < len; ++i) { inverse_map2[(*out)[i]] = i; if (hypre_UnorderedBigIntMapGet(inverse_map, (*out)[i]) != i) { fprintf(stderr, "%d %d\n", i, (*out)[i]); hypre_assert(false); } } hypre_assert(hypre_UnorderedBigIntMapSize(inverse_map) == len); #endif if (*out == in) { hypre_TFree(temp, HYPRE_MEMORY_HOST); } else { hypre_TFree(in, HYPRE_MEMORY_HOST); } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime(); #endif } /* vim: set tabstop=8 softtabstop=3 sw=3 expandtab: */