Gpu mixedInt (#380)

This PR adds GPU support for mixedInt. 

Co-authored-by: Rob Falgout <rfalgout@llnl.gov>
This commit is contained in:
Ruipeng Li 2021-06-10 11:10:13 -07:00 committed by GitHub
parent 1d9411c7ab
commit ad5d7e009f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 109 additions and 74 deletions

View File

@ -11,7 +11,7 @@ case $1 in
-h|-help)
cat <<EOF
**** Only run this script on the lassen/ray cluster ****
**** Only run this script on the lassen cluster ****
$0 [-h|-help] {src_dir}
@ -53,6 +53,12 @@ eo="-gpu -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -eo: $eo
./renametest.sh basic $output_dir/basic-cuda-um
#CUDA with UM and mixed-int
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda-um-mixedint
# CUDA with UM with shared library [no run]
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo

View File

@ -11,7 +11,7 @@ case $1 in
-h|-help)
cat <<EOF
**** Only run this script on the lassen/ray cluster ****
**** Only run this script on the ray cluster ****
$0 [-h|-help] {src_dir}
@ -53,6 +53,12 @@ eo="-gpu -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -eo: $eo
./renametest.sh basic $output_dir/basic-cuda-um
#CUDA with UM and mixed-int
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda-um-mixedint
# CUDA with UM with shared library [no run]
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo

View File

@ -307,8 +307,8 @@ hypre_IJMatrixAssembleSortAndReduce3(HYPRE_Int N0, HYPRE_BigInt *I0, HYPRE_Big
{
hypreDevice_StableSortTupleByTupleKey(N0, I0, J0, X0, A0, 0);
HYPRE_Int *I = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE);
HYPRE_Int *J = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE);
HYPRE_BigInt *I = hypre_TAlloc(HYPRE_BigInt, N0, HYPRE_MEMORY_DEVICE);
HYPRE_BigInt *J = hypre_TAlloc(HYPRE_BigInt, N0, HYPRE_MEMORY_DEVICE);
HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE);
/* output in X0: 0: keep, 1: zero-out */

View File

@ -309,7 +309,7 @@ hypre_IJVectorAssembleSortAndReduce3(HYPRE_Int N0, HYPRE_BigInt *I0, char *X0,
I0 + N0,
thrust::make_zip_iterator(thrust::make_tuple(X0, A0)) );
HYPRE_Int *I = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE);
HYPRE_BigInt *I = hypre_TAlloc(HYPRE_BigInt, N0, HYPRE_MEMORY_DEVICE);
HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE);
/* output in X0: 0: keep, 1: zero-out */

View File

@ -3444,7 +3444,7 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_Int *perm, HYPRE_Int n,
S_row_starts[1] = global_start;
}
S_row_starts = hypre_CTAlloc(HYPRE_Int,2,HYPRE_MEMORY_HOST);
S_row_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
S_row_starts[1] = S_total_rows;
S_row_starts[0] = S_total_rows - m;
hypre_MPI_Allreduce(&m, &S_total_rows, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);

View File

@ -43,7 +43,7 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A,
HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A);
HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(A);
HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag);
HYPRE_Int global_num_vars = hypre_ParCSRMatrixGlobalNumRows(A);
HYPRE_Int num_nonzeros_diag;
@ -178,9 +178,9 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A,
hypre_ParCSRMatrixCommPkg(S) = NULL;
hypre_ParCSRMatrixColMapOffd(S) = hypre_TAlloc(HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST);
hypre_ParCSRMatrixColMapOffd(S) = hypre_TAlloc(HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST);
hypre_TMemcpy(hypre_ParCSRMatrixColMapOffd(S), hypre_ParCSRMatrixColMapOffd(A),
HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
hypre_ParCSRMatrixSocDiagJ(S) = S_temp_diag_j;
hypre_ParCSRMatrixSocOffdJ(S) = S_temp_offd_j;

View File

@ -450,13 +450,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
tmp_j,
AFF_offd_j );
col_map_offd_AFF = hypre_TAlloc(HYPRE_BigInt, num_cols_AFF_offd, HYPRE_MEMORY_DEVICE);
tmp_end = HYPRE_THRUST_CALL( copy_if,
thrust::make_transform_iterator(recv_buf, -_1-1),
thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd,
offd_mark,
col_map_offd_AFF,
thrust::identity<HYPRE_Int>() );
hypre_assert(tmp_end - col_map_offd_AFF == num_cols_AFF_offd);
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
thrust::make_transform_iterator(recv_buf, -_1-1),
thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd,
offd_mark,
col_map_offd_AFF,
thrust::identity<HYPRE_Int>() );
hypre_assert(tmp_end_big - col_map_offd_AFF == num_cols_AFF_offd);
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
AFF = hypre_ParCSRMatrixCreate(comm,
@ -597,13 +597,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
tmp_j,
AFC_offd_j );
col_map_offd_AFC = hypre_TAlloc(HYPRE_BigInt, num_cols_AFC_offd, HYPRE_MEMORY_DEVICE);
tmp_end = HYPRE_THRUST_CALL( copy_if,
recv_buf,
recv_buf + num_cols_A_offd,
offd_mark,
col_map_offd_AFC,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end - col_map_offd_AFC == num_cols_AFC_offd);
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
recv_buf,
recv_buf + num_cols_A_offd,
offd_mark,
col_map_offd_AFC,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end_big - col_map_offd_AFC == num_cols_AFC_offd);
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
/* AFC */
@ -745,13 +745,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
tmp_j,
ACF_offd_j );
col_map_offd_ACF = hypre_TAlloc(HYPRE_BigInt, num_cols_ACF_offd, HYPRE_MEMORY_DEVICE);
tmp_end = HYPRE_THRUST_CALL( copy_if,
thrust::make_transform_iterator(recv_buf, -_1-1),
thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd,
offd_mark,
col_map_offd_ACF,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end - col_map_offd_ACF == num_cols_ACF_offd);
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
thrust::make_transform_iterator(recv_buf, -_1-1),
thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd,
offd_mark,
col_map_offd_ACF,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end_big - col_map_offd_ACF == num_cols_ACF_offd);
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
/* ACF */
@ -894,13 +894,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
tmp_j,
ACC_offd_j );
col_map_offd_ACC = hypre_TAlloc(HYPRE_BigInt, num_cols_ACC_offd, HYPRE_MEMORY_DEVICE);
tmp_end = HYPRE_THRUST_CALL( copy_if,
recv_buf,
recv_buf + num_cols_A_offd,
offd_mark,
col_map_offd_ACC,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end - col_map_offd_ACC == num_cols_ACC_offd);
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
recv_buf,
recv_buf + num_cols_A_offd,
offd_mark,
col_map_offd_ACC,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end_big - col_map_offd_ACC == num_cols_ACC_offd);
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
/* ACC */
@ -1217,13 +1217,13 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A,
tmp_j,
ACX_offd_j );
col_map_offd_ACX = hypre_TAlloc(HYPRE_BigInt, num_cols_ACX_offd, HYPRE_MEMORY_DEVICE);
tmp_end = HYPRE_THRUST_CALL( copy_if,
col_map_offd_A,
col_map_offd_A + num_cols_A_offd,
offd_mark,
col_map_offd_ACX,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end - col_map_offd_ACX == num_cols_ACX_offd);
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
col_map_offd_A,
col_map_offd_A + num_cols_A_offd,
offd_mark,
col_map_offd_ACX,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end_big - col_map_offd_ACX == num_cols_ACX_offd);
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
/* ACX */
@ -1354,13 +1354,13 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A,
tmp_j,
AXC_offd_j );
col_map_offd_AXC = hypre_TAlloc(HYPRE_BigInt, num_cols_AXC_offd, HYPRE_MEMORY_DEVICE);
tmp_end = HYPRE_THRUST_CALL( copy_if,
recv_buf,
recv_buf + num_cols_A_offd,
offd_mark,
col_map_offd_AXC,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end - col_map_offd_AXC == num_cols_AXC_offd);
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
recv_buf,
recv_buf + num_cols_A_offd,
offd_mark,
col_map_offd_AXC,
thrust::identity<HYPRE_Int>());
hypre_assert(tmp_end_big - col_map_offd_AXC == num_cols_AXC_offd);
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
/* AXC */

View File

@ -14,7 +14,7 @@
HYPRE_Int
hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix *A,
HYPRE_Int indices_len,
HYPRE_Int *indices,
HYPRE_BigInt *indices,
hypre_ParCSRCommPkg *comm_pkg,
HYPRE_Int want_data,
void **request_ptr)

View File

@ -432,9 +432,9 @@ hypre_CSRMatrixSplitDevice_core( HYPRE_Int job, /* 0: qu
col_map_offd_C,
col_map_offd_C + B_ext_offd_nnz + num_cols_offd_B );
HYPRE_Int *new_end = HYPRE_THRUST_CALL( unique,
col_map_offd_C,
col_map_offd_C + B_ext_offd_nnz + num_cols_offd_B );
HYPRE_BigInt *new_end = HYPRE_THRUST_CALL( unique,
col_map_offd_C,
col_map_offd_C + B_ext_offd_nnz + num_cols_offd_B );
num_cols_offd_C = new_end - col_map_offd_C;

View File

@ -17,6 +17,7 @@ C_COMPILE_FLAGS = \
HEADERS =\
HYPRE_struct_mv.h\
_hypre_struct_mv.h\
_hypre_struct_mv.hpp\
assumed_part.h\
box.h\
box_manager.h\

View File

@ -29,7 +29,8 @@ HEADERS =\
mpistubs.h\
threading.h\
timing.h\
_hypre_utilities.h
_hypre_utilities.h\
_hypre_utilities.hpp
FILES =\
F90_HYPRE_error.c\

View File

@ -940,7 +940,7 @@ HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v);
HYPRE_Int hypreDevice_GetRowNnz(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz);
HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_Int first_col, HYPRE_Int *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab);
HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab);
HYPRE_Int hypreDevice_IntegerReduceSum(HYPRE_Int m, HYPRE_Int *d_i);

View File

@ -160,11 +160,20 @@ hypreDevice_GetRowNnz(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_di
}
__global__ void
hypreCUDAKernel_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int has_offd,
HYPRE_BigInt first_col, HYPRE_Int *d_col_map_offd_A,
HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a,
HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a,
HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab)
hypreCUDAKernel_CopyParCSRRows(HYPRE_Int nrows,
HYPRE_Int *d_row_indices,
HYPRE_Int has_offd,
HYPRE_BigInt first_col,
HYPRE_BigInt *d_col_map_offd_A,
HYPRE_Int *d_diag_i,
HYPRE_Int *d_diag_j,
HYPRE_Complex *d_diag_a,
HYPRE_Int *d_offd_i,
HYPRE_Int *d_offd_j,
HYPRE_Complex *d_offd_a,
HYPRE_Int *d_ib,
HYPRE_BigInt *d_jb,
HYPRE_Complex *d_ab)
{
const HYPRE_Int global_warp_id = hypre_cuda_get_grid_warp_id<1,1>();
@ -251,11 +260,21 @@ hypreCUDAKernel_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_
* If col_map_offd_A == NULL, use (-1 - d_offd_j) as column id
* If nrows == 1 and d_ib == NULL, it means d_ib[0] = 0 */
HYPRE_Int
hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd,
HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A,
HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a,
HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a,
HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab)
hypreDevice_CopyParCSRRows(HYPRE_Int nrows,
HYPRE_Int *d_row_indices,
HYPRE_Int job,
HYPRE_Int has_offd,
HYPRE_BigInt first_col,
HYPRE_BigInt *d_col_map_offd_A,
HYPRE_Int *d_diag_i,
HYPRE_Int *d_diag_j,
HYPRE_Complex *d_diag_a,
HYPRE_Int *d_offd_i,
HYPRE_Int *d_offd_j,
HYPRE_Complex *d_offd_a,
HYPRE_Int *d_ib,
HYPRE_BigInt *d_jb,
HYPRE_Complex *d_ab)
{
/* trivial case */
if (nrows <= 0)
@ -368,6 +387,8 @@ hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_
return hypre_error_flag;
}
/* Input: d_row_num, of size nrows, contains the rows indices that can be BigInt or Int
* Output: d_row_ind */
template <typename T>
HYPRE_Int
hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, T *d_row_num, T *d_row_ind)
@ -390,7 +411,7 @@ hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_
}
template HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_num, HYPRE_Int *d_row_ind);
#if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT)
#if defined(HYPRE_MIXEDINT)
template HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_BigInt *d_row_num, HYPRE_BigInt *d_row_ind);
#endif
@ -731,9 +752,9 @@ hypreDevice_StableSortByTupleKey(HYPRE_Int N, T1 *keys1, T2 *keys2, T3 *vals, HY
return hypre_error_flag;
}
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, HYPRE_Int *vals, HYPRE_Int opt);
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Real *keys2, HYPRE_Int *vals, HYPRE_Int opt);
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_BigInt *keys1, HYPRE_BigInt *keys2, HYPRE_Complex *vals, HYPRE_Int opt);
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, HYPRE_Int *vals, HYPRE_Int opt);
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Real *keys2, HYPRE_Int *vals, HYPRE_Int opt);
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, HYPRE_Complex *vals, HYPRE_Int opt);
/* opt:
* 0, (a,b) < (a',b') iff a < a' or (a = a' and b < b') [normal tupe comp]
@ -759,10 +780,10 @@ hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, T1 *keys1, T2 *keys2, T3 *val
return hypre_error_flag;
}
#if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT)
template HYPRE_Int hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, char *vals1, HYPRE_Complex *vals2, HYPRE_Int opt);
#if defined(HYPRE_MIXEDINT)
template HYPRE_Int hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, HYPRE_BigInt *keys1, HYPRE_BigInt *keys2, char *vals1, HYPRE_Complex *vals2, HYPRE_Int opt);
#endif
template HYPRE_Int hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, char *vals1, HYPRE_Complex *vals2, HYPRE_Int opt);
template <typename T1, typename T2, typename T3>
HYPRE_Int

View File

@ -875,7 +875,7 @@ HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v);
HYPRE_Int hypreDevice_GetRowNnz(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz);
HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_Int first_col, HYPRE_Int *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab);
HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab);
HYPRE_Int hypreDevice_IntegerReduceSum(HYPRE_Int m, HYPRE_Int *d_i);