hypre/src/utilities/merge_sort.c
2022-06-07 12:08:43 -07:00

866 lines
25 KiB
C

/******************************************************************************
* Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
* HYPRE Project Developers. See the top-level COPYRIGHT file for details.
*
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/
#include "_hypre_utilities.h"
#include "../seq_mv/HYPRE_seq_mv.h"
//#define DBG_MERGE_SORT
#ifdef DBG_MERGE_SORT
#include <algorithm>
#include <unordered_map>
#endif
#define SWAP(T, a, b) do { T tmp = a; a = b; b = tmp; } while (0)
/*--------------------------------------------------------------------------
* hypre_MergeOrderedArrays: merge two ordered arrays
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_MergeOrderedArrays( hypre_IntArray *array1,
hypre_IntArray *array2,
hypre_IntArray *array3 )
{
HYPRE_Int i = 0, j = 0, k = 0;
const HYPRE_Int size1 = hypre_IntArraySize(array1);
const HYPRE_Int size2 = hypre_IntArraySize(array2);
HYPRE_MemoryLocation memory_location = hypre_IntArrayMemoryLocation(array3);
HYPRE_Int *array1_data = hypre_IntArrayData(array1);
HYPRE_Int *array2_data = hypre_IntArrayData(array2);
HYPRE_Int *array3_data = hypre_TAlloc(HYPRE_Int, size1 + size2, memory_location);
while (i < size1 && j < size2)
{
if (array1_data[i] > array2_data[j])
{
array3_data[k++] = array2_data[j++];
}
else if (array1_data[i] < array2_data[j])
{
array3_data[k++] = array1_data[i++];
}
else
{
array3_data[k++] = array1_data[i++];
j++;
}
}
while (i < size1)
{
array3_data[k++] = array1_data[i++];
}
while (j < size2)
{
array3_data[k++] = array2_data[j++];
}
array3_data = hypre_TReAlloc_v2(array3_data, HYPRE_Int, size1 + size2, HYPRE_Int, k, memory_location);
hypre_IntArraySize(array3) = k;
hypre_IntArrayData(array3) = array3_data;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
* hypre_union2
*
* Union of two sorted (in ascending order) array arr1 and arr2 into arr3
*
* Assumptions:
* 1) no duplicate entries in arr1 and arr2. But an entry is
* allowed to appear in both arr1 and arr2
* 2) arr3 should have enough space on entry
* 3) map1 and map2 map arr1 and arr2 to arr3
*--------------------------------------------------------------------------*/
void hypre_union2( HYPRE_Int n1, HYPRE_BigInt *arr1,
HYPRE_Int n2, HYPRE_BigInt *arr2,
HYPRE_Int *n3, HYPRE_BigInt *arr3,
HYPRE_Int *map1, HYPRE_Int *map2 )
{
HYPRE_Int i = 0, j = 0, k = 0;
while (i < n1 && j < n2)
{
if (arr1[i] < arr2[j])
{
if (map1) { map1[i] = k; }
arr3[k++] = arr1[i++];
}
else if (arr1[i] > arr2[j])
{
if (map2) { map2[j] = k; }
arr3[k++] = arr2[j++];
}
else /* == */
{
if (map1) { map1[i] = k; }
if (map2) { map2[j] = k; }
arr3[k++] = arr1[i++];
j++;
}
}
while (i < n1)
{
if (map1) { map1[i] = k; }
arr3[k++] = arr1[i++];
}
while (j < n2)
{
if (map2) { map2[j] = k; }
arr3[k++] = arr2[j++];
}
*n3 = k;
}
/*--------------------------------------------------------------------------
* hypre_merge
*--------------------------------------------------------------------------*/
static void hypre_merge( HYPRE_Int *first1, HYPRE_Int *last1,
HYPRE_Int *first2, HYPRE_Int *last2,
HYPRE_Int *out )
{
for ( ; first1 != last1; ++out)
{
if (first2 == last2)
{
for ( ; first1 != last1; ++first1, ++out)
{
*out = *first1;
}
return;
}
if (*first2 < *first1)
{
*out = *first2;
++first2;
}
else
{
*out = *first1;
++first1;
}
}
for ( ; first2 != last2; ++first2, ++out)
{
*out = *first2;
}
}
/*--------------------------------------------------------------------------
* hypre_big_merge
*--------------------------------------------------------------------------*/
static void hypre_big_merge( HYPRE_BigInt *first1, HYPRE_BigInt *last1,
HYPRE_BigInt *first2, HYPRE_BigInt *last2,
HYPRE_BigInt *out )
{
for ( ; first1 != last1; ++out)
{
if (first2 == last2)
{
for ( ; first1 != last1; ++first1, ++out)
{
*out = *first1;
}
return;
}
if (*first2 < *first1)
{
*out = *first2;
++first2;
}
else
{
*out = *first1;
++first1;
}
}
for ( ; first2 != last2; ++first2, ++out)
{
*out = *first2;
}
}
/*--------------------------------------------------------------------------
* kth_element_
*--------------------------------------------------------------------------*/
static void kth_element_( HYPRE_Int *out1, HYPRE_Int *out2,
HYPRE_Int *a1, HYPRE_Int *a2,
HYPRE_Int left, HYPRE_Int right,
HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k)
{
while (1)
{
HYPRE_Int i = (left + right) / 2; // right < k -> i < k
HYPRE_Int j = k - i - 1;
#ifdef DBG_MERGE_SORT
hypre_assert(left <= right && right <= k);
hypre_assert(i < k); // i == k implies left == right == k that can never happen
hypre_assert(j >= 0 && j < n2);
#endif
if ((j == -1 || a1[i] >= a2[j]) && (j == n2 - 1 || a1[i] <= a2[j + 1]))
{
*out1 = i; *out2 = j + 1;
return;
}
else if (j >= 0 && a2[j] >= a1[i] && (i == n1 - 1 || a2[j] <= a1[i + 1]))
{
*out1 = i + 1; *out2 = j;
return;
}
else if (a1[i] > a2[j] && j != n2 - 1 && a1[i] > a2[j + 1])
{
// search in left half of a1
right = i - 1;
}
else
{
// search in right half of a1
left = i + 1;
}
}
}
/**
* Partition the input so that
* a1[0:*out1) and a2[0:*out2) contain the smallest k elements
*/
/*--------------------------------------------------------------------------
* kth_element
*
* Partition the input so that
* a1[0:*out1) and a2[0:*out2) contain the smallest k elements
*--------------------------------------------------------------------------*/
static void kth_element( HYPRE_Int *out1, HYPRE_Int *out2,
HYPRE_Int *a1, HYPRE_Int *a2,
HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k)
{
// either of the inputs is empty
if (n1 == 0)
{
*out1 = 0; *out2 = k;
return;
}
if (n2 == 0)
{
*out1 = k; *out2 = 0;
return;
}
if (k >= n1 + n2)
{
*out1 = n1; *out2 = n2;
return;
}
// one is greater than the other
if (k < n1 && a1[k] <= a2[0])
{
*out1 = k; *out2 = 0;
return;
}
if (k - n1 >= 0 && a2[k - n1] >= a1[n1 - 1])
{
*out1 = n1; *out2 = k - n1;
return;
}
if (k < n2 && a2[k] <= a1[0])
{
*out1 = 0; *out2 = k;
return;
}
if (k - n2 >= 0 && a1[k - n2] >= a2[n2 - 1])
{
*out1 = k - n2; *out2 = n2;
return;
}
// now k > 0
// faster to do binary search on the shorter sequence
if (n1 > n2)
{
SWAP(HYPRE_Int, n1, n2);
SWAP(HYPRE_Int *, a1, a2);
SWAP(HYPRE_Int *, out1, out2);
}
if (k < (n1 + n2) / 2)
{
kth_element_(out1, out2, a1, a2, 0, hypre_min(n1 - 1, k), n1, n2, k);
}
else
{
// when k is big, faster to find (n1 + n2 - k)th biggest element
HYPRE_Int offset1 = hypre_max(k - n2, 0), offset2 = hypre_max(k - n1, 0);
HYPRE_Int new_k = k - offset1 - offset2;
HYPRE_Int new_n1 = hypre_min(n1 - offset1, new_k + 1);
HYPRE_Int new_n2 = hypre_min(n2 - offset2, new_k + 1);
kth_element_(out1, out2, a1 + offset1, a2 + offset2, 0, new_n1 - 1, new_n1, new_n2, new_k);
*out1 += offset1;
*out2 += offset2;
}
#ifdef DBG_MERGE_SORT
hypre_assert(*out1 + *out2 == k);
#endif
}
/*--------------------------------------------------------------------------
* big_kth_element_
*--------------------------------------------------------------------------*/
static void big_kth_element_( HYPRE_Int *out1, HYPRE_Int *out2,
HYPRE_BigInt *a1, HYPRE_BigInt *a2,
HYPRE_Int left, HYPRE_Int right,
HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k)
{
while (1)
{
HYPRE_Int i = (left + right) / 2; // right < k -> i < k
HYPRE_Int j = k - i - 1;
#ifdef DBG_MERGE_SORT
hypre_assert(left <= right && right <= k);
hypre_assert(i < k); // i == k implies left == right == k that can never happen
hypre_assert(j >= 0 && j < n2);
#endif
if ((j == -1 || a1[i] >= a2[j]) && (j == n2 - 1 || a1[i] <= a2[j + 1]))
{
*out1 = i; *out2 = j + 1;
return;
}
else if (j >= 0 && a2[j] >= a1[i] && (i == n1 - 1 || a2[j] <= a1[i + 1]))
{
*out1 = i + 1; *out2 = j;
return;
}
else if (a1[i] > a2[j] && j != n2 - 1 && a1[i] > a2[j + 1])
{
// search in left half of a1
right = i - 1;
}
else
{
// search in right half of a1
left = i + 1;
}
}
}
/*--------------------------------------------------------------------------
* big_kth_element
*
* Partition the input so that
* a1[0:*out1) and a2[0:*out2) contain the smallest k elements
*--------------------------------------------------------------------------*/
static void big_kth_element( HYPRE_Int *out1, HYPRE_Int *out2,
HYPRE_BigInt *a1, HYPRE_BigInt *a2,
HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k)
{
// either of the inputs is empty
if (n1 == 0)
{
*out1 = 0; *out2 = k;
return;
}
if (n2 == 0)
{
*out1 = k; *out2 = 0;
return;
}
if (k >= n1 + n2)
{
*out1 = n1; *out2 = n2;
return;
}
// one is greater than the other
if (k < n1 && a1[k] <= a2[0])
{
*out1 = k; *out2 = 0;
return;
}
if (k - n1 >= 0 && a2[k - n1] >= a1[n1 - 1])
{
*out1 = n1; *out2 = k - n1;
return;
}
if (k < n2 && a2[k] <= a1[0])
{
*out1 = 0; *out2 = k;
return;
}
if (k - n2 >= 0 && a1[k - n2] >= a2[n2 - 1])
{
*out1 = k - n2; *out2 = n2;
return;
}
// now k > 0
// faster to do binary search on the shorter sequence
if (n1 > n2)
{
SWAP(HYPRE_Int, n1, n2);
SWAP(HYPRE_BigInt *, a1, a2);
SWAP(HYPRE_Int *, out1, out2);
}
if (k < (n1 + n2) / 2)
{
big_kth_element_(out1, out2, a1, a2, 0, hypre_min(n1 - 1, k), n1, n2, k);
}
else
{
// when k is big, faster to find (n1 + n2 - k)th biggest element
HYPRE_Int offset1 = hypre_max(k - n2, 0), offset2 = hypre_max(k - n1, 0);
HYPRE_Int new_k = k - offset1 - offset2;
HYPRE_Int new_n1 = hypre_min(n1 - offset1, new_k + 1);
HYPRE_Int new_n2 = hypre_min(n2 - offset2, new_k + 1);
big_kth_element_(out1, out2, a1 + (HYPRE_BigInt)offset1, a2 + (HYPRE_BigInt)offset2, 0, new_n1 - 1,
new_n1, new_n2, new_k);
*out1 += offset1;
*out2 += offset2;
}
#ifdef DBG_MERGE_SORT
hypre_assert(*out1 + *out2 == k);
#endif
}
/*--------------------------------------------------------------------------
* hypre_parallel_merge
*
* @param num_threads number of threads that participate in this merge
* @param my_thread_num thread id (zer0-based) among the threads that
* participate in this merge
*--------------------------------------------------------------------------*/
static void hypre_parallel_merge( HYPRE_Int *first1, HYPRE_Int *last1,
HYPRE_Int *first2, HYPRE_Int *last2,
HYPRE_Int *out, HYPRE_Int num_threads,
HYPRE_Int my_thread_num )
{
HYPRE_Int n1 = last1 - first1;
HYPRE_Int n2 = last2 - first2;
HYPRE_Int n = n1 + n2;
HYPRE_Int n_per_thread = (n + num_threads - 1) / num_threads;
HYPRE_Int begin_rank = hypre_min(n_per_thread * my_thread_num, n);
HYPRE_Int end_rank = hypre_min(begin_rank + n_per_thread, n);
#ifdef DBG_MERGE_SORT
hypre_assert(std::is_sorted(first1, last1));
hypre_assert(std::is_sorted(first2, last2));
#endif
HYPRE_Int begin1, begin2, end1, end2;
kth_element(&begin1, &begin2, first1, first2, n1, n2, begin_rank);
kth_element(&end1, &end2, first1, first2, n1, n2, end_rank);
while (begin1 > end1 && begin1 > 0 && begin2 < n2 && first1[begin1 - 1] == first2[begin2])
{
#ifdef DBG_MERGE_SORT
printf("%s:%d\n", __FILE__, __LINE__);
#endif
begin1--; begin2++;
}
while (begin2 > end2 && end1 > 0 && end2 < n2 && first1[end1 - 1] == first2[end2])
{
#ifdef DBG_MERGE_SORT
printf("%s:%d\n", __FILE__, __LINE__);
#endif
end1--; end2++;
}
#ifdef DBG_MERGE_SORT
hypre_assert(begin1 <= end1);
hypre_assert(begin2 <= end2);
#endif
hypre_merge(
first1 + begin1, first1 + end1,
first2 + begin2, first2 + end2,
out + begin1 + begin2);
#ifdef DBG_MERGE_SORT
hypre_assert(std::is_sorted(out + begin1 + begin2, out + end1 + end2));
#endif
}
/*--------------------------------------------------------------------------
* hypre_big_parallel_merge
*
* @param num_threads number of threads that participate in this merge
* @param my_thread_num thread id (zero-based) among the threads that
* participate in this merge
*--------------------------------------------------------------------------*/
static void hypre_big_parallel_merge(
HYPRE_BigInt *first1, HYPRE_BigInt *last1, HYPRE_BigInt *first2, HYPRE_BigInt *last2,
HYPRE_BigInt *out,
HYPRE_Int num_threads, HYPRE_Int my_thread_num)
{
HYPRE_Int n1 = (HYPRE_Int)(last1 - first1);
HYPRE_Int n2 = (HYPRE_Int)(last2 - first2);
HYPRE_Int n = n1 + n2;
HYPRE_Int n_per_thread = (n + num_threads - 1) / num_threads;
HYPRE_Int begin_rank = hypre_min(n_per_thread * my_thread_num, n);
HYPRE_Int end_rank = hypre_min(begin_rank + n_per_thread, n);
#ifdef DBG_MERGE_SORT
hypre_assert(std::is_sorted(first1, last1));
hypre_assert(std::is_sorted(first2, last2));
#endif
HYPRE_Int begin1, begin2, end1, end2;
big_kth_element(&begin1, &begin2, first1, first2, n1, n2, begin_rank);
big_kth_element(&end1, &end2, first1, first2, n1, n2, end_rank);
while (begin1 > end1 && begin1 > 0 && begin2 < n2 && first1[begin1 - 1] == first2[begin2])
{
#ifdef DBG_MERGE_SORT
printf("%s:%d\n", __FILE__, __LINE__);
#endif
begin1--; begin2++;
}
while (begin2 > end2 && end1 > 0 && end2 < n2 && first1[end1 - 1] == first2[end2])
{
#ifdef DBG_MERGE_SORT
printf("%s:%d\n", __FILE__, __LINE__);
#endif
end1--; end2++;
}
#ifdef DBG_MERGE_SORT
hypre_assert(begin1 <= end1);
hypre_assert(begin2 <= end2);
#endif
hypre_big_merge(
first1 + (HYPRE_BigInt)begin1, first1 + (HYPRE_BigInt)end1,
first2 + (HYPRE_BigInt)begin2, first2 + (HYPRE_BigInt)end2,
out + (HYPRE_BigInt)(begin1 + begin2));
#ifdef DBG_MERGE_SORT
hypre_assert(std::is_sorted(out + begin1 + begin2, out + end1 + end2));
#endif
}
/*--------------------------------------------------------------------------
* hypre_merge_sort
*--------------------------------------------------------------------------*/
void hypre_merge_sort( HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **out )
{
if (0 == len) { return; }
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime();
#endif
#ifdef DBG_MERGE_SORT
HYPRE_Int *dbg_buf = new HYPRE_Int[len];
std::copy(in, in + len, dbg_buf);
std::sort(dbg_buf, dbg_buf + len);
#endif
// HYPRE_Int thread_private_len[hypre_NumThreads()];
// HYPRE_Int out_len = 0;
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel
#endif
{
HYPRE_Int num_threads = hypre_NumActiveThreads();
HYPRE_Int my_thread_num = hypre_GetThreadNum();
// thread-private sort
HYPRE_Int i_per_thread = (len + num_threads - 1) / num_threads;
HYPRE_Int i_begin = hypre_min(i_per_thread * my_thread_num, len);
HYPRE_Int i_end = hypre_min(i_begin + i_per_thread, len);
hypre_qsort0(in, i_begin, i_end - 1);
// merge sorted sequences
HYPRE_Int in_group_size;
HYPRE_Int *in_buf = in;
HYPRE_Int *out_buf = temp;
for (in_group_size = 1; in_group_size < num_threads; in_group_size *= 2)
{
#ifdef HYPRE_USING_OPENMP
#pragma omp barrier
#endif
// merge 2 in-groups into 1 out-group
HYPRE_Int out_group_size = in_group_size * 2;
HYPRE_Int group_leader = my_thread_num / out_group_size * out_group_size;
// HYPRE_Int group_sub_leader = hypre_min(group_leader + in_group_size, num_threads - 1);
HYPRE_Int id_in_group = my_thread_num % out_group_size;
HYPRE_Int num_threads_in_group =
hypre_min(group_leader + out_group_size, num_threads) - group_leader;
HYPRE_Int in_group1_begin = hypre_min(i_per_thread * group_leader, len);
HYPRE_Int in_group1_end = hypre_min(in_group1_begin + i_per_thread * in_group_size, len);
HYPRE_Int in_group2_begin = hypre_min(in_group1_begin + i_per_thread * in_group_size, len);
HYPRE_Int in_group2_end = hypre_min(in_group2_begin + i_per_thread * in_group_size, len);
hypre_parallel_merge(
in_buf + in_group1_begin, in_buf + in_group1_end,
in_buf + in_group2_begin, in_buf + in_group2_end,
out_buf + in_group1_begin,
num_threads_in_group,
id_in_group);
HYPRE_Int *temp = in_buf;
in_buf = out_buf;
out_buf = temp;
}
*out = in_buf;
} /* omp parallel */
#ifdef DBG_MERGE_SORT
hypre_assert(std::equal(*out, *out + len, dbg_buf));
delete[] dbg_buf;
#endif
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime();
#endif
}
/*--------------------------------------------------------------------------
* hypre_sort_and_create_inverse_map
*
* Sort array "in" with length len and put result in array "out"
* "in" will be deallocated unless in == *out
* inverse_map is an inverse hash table s.t.
* inverse_map[i] = j iff (*out)[j] = i
*--------------------------------------------------------------------------*/
void hypre_sort_and_create_inverse_map(HYPRE_Int *in, HYPRE_Int len, HYPRE_Int **out,
hypre_UnorderedIntMap *inverse_map)
{
if (len == 0)
{
return;
}
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime();
#endif
HYPRE_Int *temp = hypre_TAlloc(HYPRE_Int, len, HYPRE_MEMORY_HOST);
hypre_merge_sort(in, temp, len, out);
hypre_UnorderedIntMapCreate(inverse_map, 2 * len, 16 * hypre_NumThreads());
HYPRE_Int i;
#ifdef HYPRE_CONCURRENT_HOPSCOTCH
#pragma omp parallel for HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < len; i++)
{
HYPRE_Int old = hypre_UnorderedIntMapPutIfAbsent(inverse_map, (*out)[i], i);
hypre_assert(old == HYPRE_HOPSCOTCH_HASH_EMPTY);
#ifdef DBG_MERGE_SORT
if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i)
{
fprintf(stderr, "%d %d\n", i, (*out)[i]);
hypre_assert(false);
}
#endif
}
#ifdef DBG_MERGE_SORT
std::unordered_map<HYPRE_Int, HYPRE_Int> inverse_map2(len);
for (HYPRE_Int i = 0; i < len; ++i)
{
inverse_map2[(*out)[i]] = i;
if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i)
{
fprintf(stderr, "%d %d\n", i, (*out)[i]);
hypre_assert(false);
}
}
hypre_assert(hypre_UnorderedIntMapSize(inverse_map) == len);
#endif
if (*out == in)
{
hypre_TFree(temp, HYPRE_MEMORY_HOST);
}
else
{
hypre_TFree(in, HYPRE_MEMORY_HOST);
}
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime();
#endif
}
/*--------------------------------------------------------------------------
* hypre_big_merge_sort
*--------------------------------------------------------------------------*/
void hypre_big_merge_sort(HYPRE_BigInt *in, HYPRE_BigInt *temp, HYPRE_Int len,
HYPRE_BigInt **out)
{
if (0 == len) { return; }
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime();
#endif
#ifdef DBG_MERGE_SORT
HYPRE_Int *dbg_buf = new HYPRE_Int[len];
std::copy(in, in + len, dbg_buf);
std::sort(dbg_buf, dbg_buf + len);
#endif
// HYPRE_Int thread_private_len[hypre_NumThreads()];
// HYPRE_Int out_len = 0;
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel
#endif
{
HYPRE_Int num_threads = hypre_NumActiveThreads();
HYPRE_Int my_thread_num = hypre_GetThreadNum();
// thread-private sort
HYPRE_Int i_per_thread = (len + num_threads - 1) / num_threads;
HYPRE_Int i_begin = hypre_min(i_per_thread * my_thread_num, len);
HYPRE_Int i_end = hypre_min(i_begin + i_per_thread, len);
hypre_BigQsort0(in, i_begin, i_end - 1);
// merge sorted sequences
HYPRE_Int in_group_size;
HYPRE_BigInt *in_buf = in;
HYPRE_BigInt *out_buf = temp;
for (in_group_size = 1; in_group_size < num_threads; in_group_size *= 2)
{
#ifdef HYPRE_USING_OPENMP
#pragma omp barrier
#endif
// merge 2 in-groups into 1 out-group
HYPRE_Int out_group_size = in_group_size * 2;
HYPRE_Int group_leader = my_thread_num / out_group_size * out_group_size;
// HYPRE_Int group_sub_leader = hypre_min(group_leader + in_group_size, num_threads - 1);
HYPRE_Int id_in_group = my_thread_num % out_group_size;
HYPRE_Int num_threads_in_group =
hypre_min(group_leader + out_group_size, num_threads) - group_leader;
HYPRE_Int in_group1_begin = hypre_min(i_per_thread * group_leader, len);
HYPRE_Int in_group1_end = hypre_min(in_group1_begin + i_per_thread * in_group_size, len);
HYPRE_Int in_group2_begin = hypre_min(in_group1_begin + i_per_thread * in_group_size, len);
HYPRE_Int in_group2_end = hypre_min(in_group2_begin + i_per_thread * in_group_size, len);
hypre_big_parallel_merge(
in_buf + (HYPRE_BigInt)in_group1_begin, in_buf + (HYPRE_BigInt)in_group1_end,
in_buf + (HYPRE_BigInt)in_group2_begin, in_buf + (HYPRE_BigInt)in_group2_end,
out_buf + (HYPRE_BigInt)in_group1_begin,
num_threads_in_group,
id_in_group);
HYPRE_BigInt *temp = in_buf;
in_buf = out_buf;
out_buf = temp;
}
*out = in_buf;
} /* omp parallel */
#ifdef DBG_MERGE_SORT
hypre_assert(std::equal(*out, *out + len, dbg_buf));
delete[] dbg_buf;
#endif
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime();
#endif
}
/*--------------------------------------------------------------------------
* hypre_big_sort_and_create_inverse_map
*--------------------------------------------------------------------------*/
void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPRE_BigInt **out,
hypre_UnorderedBigIntMap *inverse_map)
{
if (len == 0)
{
return;
}
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime();
#endif
HYPRE_BigInt *temp = hypre_TAlloc(HYPRE_BigInt, len, HYPRE_MEMORY_HOST);
hypre_big_merge_sort(in, temp, len, out);
hypre_UnorderedBigIntMapCreate(inverse_map, 2 * len, 16 * hypre_NumThreads());
HYPRE_Int i;
#ifdef HYPRE_CONCURRENT_HOPSCOTCH
#pragma omp parallel for HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < len; i++)
{
HYPRE_Int old = hypre_UnorderedBigIntMapPutIfAbsent(inverse_map, (*out)[i], i);
hypre_assert(old == HYPRE_HOPSCOTCH_HASH_EMPTY);
#ifdef DBG_MERGE_SORT
if (hypre_UnorderedBigIntMapGet(inverse_map, (*out)[i]) != i)
{
fprintf(stderr, "%d %d\n", i, (*out)[i]);
hypre_assert(false);
}
#endif
}
#ifdef DBG_MERGE_SORT
std::unordered_map<HYPRE_Int, HYPRE_Int> inverse_map2(len);
for (HYPRE_Int i = 0; i < len; ++i)
{
inverse_map2[(*out)[i]] = i;
if (hypre_UnorderedBigIntMapGet(inverse_map, (*out)[i]) != i)
{
fprintf(stderr, "%d %d\n", i, (*out)[i]);
hypre_assert(false);
}
}
hypre_assert(hypre_UnorderedBigIntMapSize(inverse_map) == len);
#endif
if (*out == in)
{
hypre_TFree(temp, HYPRE_MEMORY_HOST);
}
else
{
hypre_TFree(in, HYPRE_MEMORY_HOST);
}
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime();
#endif
}
/* vim: set tabstop=8 softtabstop=3 sw=3 expandtab: */