Gpu mixedInt (#380)
This PR adds GPU support for mixedInt. Co-authored-by: Rob Falgout <rfalgout@llnl.gov>
This commit is contained in:
parent
1d9411c7ab
commit
ad5d7e009f
@ -11,7 +11,7 @@ case $1 in
|
||||
-h|-help)
|
||||
cat <<EOF
|
||||
|
||||
**** Only run this script on the lassen/ray cluster ****
|
||||
**** Only run this script on the lassen cluster ****
|
||||
|
||||
$0 [-h|-help] {src_dir}
|
||||
|
||||
@ -53,6 +53,12 @@ eo="-gpu -rt -mpibind -save ${save}"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -eo: $eo
|
||||
./renametest.sh basic $output_dir/basic-cuda-um
|
||||
|
||||
#CUDA with UM and mixed-int
|
||||
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save}"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||
./renametest.sh basic $output_dir/basic-cuda-um-mixedint
|
||||
|
||||
# CUDA with UM with shared library [no run]
|
||||
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||
|
||||
@ -11,7 +11,7 @@ case $1 in
|
||||
-h|-help)
|
||||
cat <<EOF
|
||||
|
||||
**** Only run this script on the lassen/ray cluster ****
|
||||
**** Only run this script on the ray cluster ****
|
||||
|
||||
$0 [-h|-help] {src_dir}
|
||||
|
||||
@ -53,6 +53,12 @@ eo="-gpu -rt -mpibind -save ${save}"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -eo: $eo
|
||||
./renametest.sh basic $output_dir/basic-cuda-um
|
||||
|
||||
#CUDA with UM and mixed-int
|
||||
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save}"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
|
||||
./renametest.sh basic $output_dir/basic-cuda-um-mixedint
|
||||
|
||||
# CUDA with UM with shared library [no run]
|
||||
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||
|
||||
@ -307,8 +307,8 @@ hypre_IJMatrixAssembleSortAndReduce3(HYPRE_Int N0, HYPRE_BigInt *I0, HYPRE_Big
|
||||
{
|
||||
hypreDevice_StableSortTupleByTupleKey(N0, I0, J0, X0, A0, 0);
|
||||
|
||||
HYPRE_Int *I = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_Int *J = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_BigInt *I = hypre_TAlloc(HYPRE_BigInt, N0, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_BigInt *J = hypre_TAlloc(HYPRE_BigInt, N0, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
/* output in X0: 0: keep, 1: zero-out */
|
||||
|
||||
@ -309,7 +309,7 @@ hypre_IJVectorAssembleSortAndReduce3(HYPRE_Int N0, HYPRE_BigInt *I0, char *X0,
|
||||
I0 + N0,
|
||||
thrust::make_zip_iterator(thrust::make_tuple(X0, A0)) );
|
||||
|
||||
HYPRE_Int *I = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_BigInt *I = hypre_TAlloc(HYPRE_BigInt, N0, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
/* output in X0: 0: keep, 1: zero-out */
|
||||
|
||||
@ -3444,7 +3444,7 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_Int *perm, HYPRE_Int n,
|
||||
S_row_starts[1] = global_start;
|
||||
}
|
||||
|
||||
S_row_starts = hypre_CTAlloc(HYPRE_Int,2,HYPRE_MEMORY_HOST);
|
||||
S_row_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST);
|
||||
S_row_starts[1] = S_total_rows;
|
||||
S_row_starts[0] = S_total_rows - m;
|
||||
hypre_MPI_Allreduce(&m, &S_total_rows, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
|
||||
|
||||
@ -43,7 +43,7 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd);
|
||||
HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag);
|
||||
HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd);
|
||||
HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A);
|
||||
HYPRE_BigInt *row_starts = hypre_ParCSRMatrixRowStarts(A);
|
||||
HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag);
|
||||
HYPRE_Int global_num_vars = hypre_ParCSRMatrixGlobalNumRows(A);
|
||||
HYPRE_Int num_nonzeros_diag;
|
||||
@ -178,9 +178,9 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A,
|
||||
|
||||
hypre_ParCSRMatrixCommPkg(S) = NULL;
|
||||
|
||||
hypre_ParCSRMatrixColMapOffd(S) = hypre_TAlloc(HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST);
|
||||
hypre_ParCSRMatrixColMapOffd(S) = hypre_TAlloc(HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST);
|
||||
hypre_TMemcpy(hypre_ParCSRMatrixColMapOffd(S), hypre_ParCSRMatrixColMapOffd(A),
|
||||
HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
|
||||
HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
|
||||
|
||||
hypre_ParCSRMatrixSocDiagJ(S) = S_temp_diag_j;
|
||||
hypre_ParCSRMatrixSocOffdJ(S) = S_temp_offd_j;
|
||||
|
||||
@ -450,13 +450,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
|
||||
tmp_j,
|
||||
AFF_offd_j );
|
||||
col_map_offd_AFF = hypre_TAlloc(HYPRE_BigInt, num_cols_AFF_offd, HYPRE_MEMORY_DEVICE);
|
||||
tmp_end = HYPRE_THRUST_CALL( copy_if,
|
||||
thrust::make_transform_iterator(recv_buf, -_1-1),
|
||||
thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_AFF,
|
||||
thrust::identity<HYPRE_Int>() );
|
||||
hypre_assert(tmp_end - col_map_offd_AFF == num_cols_AFF_offd);
|
||||
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
|
||||
thrust::make_transform_iterator(recv_buf, -_1-1),
|
||||
thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_AFF,
|
||||
thrust::identity<HYPRE_Int>() );
|
||||
hypre_assert(tmp_end_big - col_map_offd_AFF == num_cols_AFF_offd);
|
||||
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
AFF = hypre_ParCSRMatrixCreate(comm,
|
||||
@ -597,13 +597,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
|
||||
tmp_j,
|
||||
AFC_offd_j );
|
||||
col_map_offd_AFC = hypre_TAlloc(HYPRE_BigInt, num_cols_AFC_offd, HYPRE_MEMORY_DEVICE);
|
||||
tmp_end = HYPRE_THRUST_CALL( copy_if,
|
||||
recv_buf,
|
||||
recv_buf + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_AFC,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end - col_map_offd_AFC == num_cols_AFC_offd);
|
||||
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
|
||||
recv_buf,
|
||||
recv_buf + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_AFC,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end_big - col_map_offd_AFC == num_cols_AFC_offd);
|
||||
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
/* AFC */
|
||||
@ -745,13 +745,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
|
||||
tmp_j,
|
||||
ACF_offd_j );
|
||||
col_map_offd_ACF = hypre_TAlloc(HYPRE_BigInt, num_cols_ACF_offd, HYPRE_MEMORY_DEVICE);
|
||||
tmp_end = HYPRE_THRUST_CALL( copy_if,
|
||||
thrust::make_transform_iterator(recv_buf, -_1-1),
|
||||
thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_ACF,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end - col_map_offd_ACF == num_cols_ACF_offd);
|
||||
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
|
||||
thrust::make_transform_iterator(recv_buf, -_1-1),
|
||||
thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_ACF,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end_big - col_map_offd_ACF == num_cols_ACF_offd);
|
||||
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
/* ACF */
|
||||
@ -894,13 +894,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
|
||||
tmp_j,
|
||||
ACC_offd_j );
|
||||
col_map_offd_ACC = hypre_TAlloc(HYPRE_BigInt, num_cols_ACC_offd, HYPRE_MEMORY_DEVICE);
|
||||
tmp_end = HYPRE_THRUST_CALL( copy_if,
|
||||
recv_buf,
|
||||
recv_buf + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_ACC,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end - col_map_offd_ACC == num_cols_ACC_offd);
|
||||
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
|
||||
recv_buf,
|
||||
recv_buf + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_ACC,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end_big - col_map_offd_ACC == num_cols_ACC_offd);
|
||||
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
/* ACC */
|
||||
@ -1217,13 +1217,13 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A,
|
||||
tmp_j,
|
||||
ACX_offd_j );
|
||||
col_map_offd_ACX = hypre_TAlloc(HYPRE_BigInt, num_cols_ACX_offd, HYPRE_MEMORY_DEVICE);
|
||||
tmp_end = HYPRE_THRUST_CALL( copy_if,
|
||||
col_map_offd_A,
|
||||
col_map_offd_A + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_ACX,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end - col_map_offd_ACX == num_cols_ACX_offd);
|
||||
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
|
||||
col_map_offd_A,
|
||||
col_map_offd_A + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_ACX,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end_big - col_map_offd_ACX == num_cols_ACX_offd);
|
||||
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
/* ACX */
|
||||
@ -1354,13 +1354,13 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A,
|
||||
tmp_j,
|
||||
AXC_offd_j );
|
||||
col_map_offd_AXC = hypre_TAlloc(HYPRE_BigInt, num_cols_AXC_offd, HYPRE_MEMORY_DEVICE);
|
||||
tmp_end = HYPRE_THRUST_CALL( copy_if,
|
||||
recv_buf,
|
||||
recv_buf + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_AXC,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end - col_map_offd_AXC == num_cols_AXC_offd);
|
||||
HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if,
|
||||
recv_buf,
|
||||
recv_buf + num_cols_A_offd,
|
||||
offd_mark,
|
||||
col_map_offd_AXC,
|
||||
thrust::identity<HYPRE_Int>());
|
||||
hypre_assert(tmp_end_big - col_map_offd_AXC == num_cols_AXC_offd);
|
||||
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
/* AXC */
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
HYPRE_Int
|
||||
hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int indices_len,
|
||||
HYPRE_Int *indices,
|
||||
HYPRE_BigInt *indices,
|
||||
hypre_ParCSRCommPkg *comm_pkg,
|
||||
HYPRE_Int want_data,
|
||||
void **request_ptr)
|
||||
|
||||
@ -432,9 +432,9 @@ hypre_CSRMatrixSplitDevice_core( HYPRE_Int job, /* 0: qu
|
||||
col_map_offd_C,
|
||||
col_map_offd_C + B_ext_offd_nnz + num_cols_offd_B );
|
||||
|
||||
HYPRE_Int *new_end = HYPRE_THRUST_CALL( unique,
|
||||
col_map_offd_C,
|
||||
col_map_offd_C + B_ext_offd_nnz + num_cols_offd_B );
|
||||
HYPRE_BigInt *new_end = HYPRE_THRUST_CALL( unique,
|
||||
col_map_offd_C,
|
||||
col_map_offd_C + B_ext_offd_nnz + num_cols_offd_B );
|
||||
|
||||
num_cols_offd_C = new_end - col_map_offd_C;
|
||||
|
||||
|
||||
@ -17,6 +17,7 @@ C_COMPILE_FLAGS = \
|
||||
HEADERS =\
|
||||
HYPRE_struct_mv.h\
|
||||
_hypre_struct_mv.h\
|
||||
_hypre_struct_mv.hpp\
|
||||
assumed_part.h\
|
||||
box.h\
|
||||
box_manager.h\
|
||||
|
||||
@ -29,7 +29,8 @@ HEADERS =\
|
||||
mpistubs.h\
|
||||
threading.h\
|
||||
timing.h\
|
||||
_hypre_utilities.h
|
||||
_hypre_utilities.h\
|
||||
_hypre_utilities.hpp
|
||||
|
||||
FILES =\
|
||||
F90_HYPRE_error.c\
|
||||
|
||||
@ -940,7 +940,7 @@ HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v);
|
||||
|
||||
HYPRE_Int hypreDevice_GetRowNnz(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz);
|
||||
|
||||
HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_Int first_col, HYPRE_Int *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab);
|
||||
HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab);
|
||||
|
||||
HYPRE_Int hypreDevice_IntegerReduceSum(HYPRE_Int m, HYPRE_Int *d_i);
|
||||
|
||||
|
||||
@ -160,11 +160,20 @@ hypreDevice_GetRowNnz(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_di
|
||||
}
|
||||
|
||||
__global__ void
|
||||
hypreCUDAKernel_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int has_offd,
|
||||
HYPRE_BigInt first_col, HYPRE_Int *d_col_map_offd_A,
|
||||
HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a,
|
||||
HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a,
|
||||
HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab)
|
||||
hypreCUDAKernel_CopyParCSRRows(HYPRE_Int nrows,
|
||||
HYPRE_Int *d_row_indices,
|
||||
HYPRE_Int has_offd,
|
||||
HYPRE_BigInt first_col,
|
||||
HYPRE_BigInt *d_col_map_offd_A,
|
||||
HYPRE_Int *d_diag_i,
|
||||
HYPRE_Int *d_diag_j,
|
||||
HYPRE_Complex *d_diag_a,
|
||||
HYPRE_Int *d_offd_i,
|
||||
HYPRE_Int *d_offd_j,
|
||||
HYPRE_Complex *d_offd_a,
|
||||
HYPRE_Int *d_ib,
|
||||
HYPRE_BigInt *d_jb,
|
||||
HYPRE_Complex *d_ab)
|
||||
{
|
||||
const HYPRE_Int global_warp_id = hypre_cuda_get_grid_warp_id<1,1>();
|
||||
|
||||
@ -251,11 +260,21 @@ hypreCUDAKernel_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_
|
||||
* If col_map_offd_A == NULL, use (-1 - d_offd_j) as column id
|
||||
* If nrows == 1 and d_ib == NULL, it means d_ib[0] = 0 */
|
||||
HYPRE_Int
|
||||
hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd,
|
||||
HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A,
|
||||
HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a,
|
||||
HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a,
|
||||
HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab)
|
||||
hypreDevice_CopyParCSRRows(HYPRE_Int nrows,
|
||||
HYPRE_Int *d_row_indices,
|
||||
HYPRE_Int job,
|
||||
HYPRE_Int has_offd,
|
||||
HYPRE_BigInt first_col,
|
||||
HYPRE_BigInt *d_col_map_offd_A,
|
||||
HYPRE_Int *d_diag_i,
|
||||
HYPRE_Int *d_diag_j,
|
||||
HYPRE_Complex *d_diag_a,
|
||||
HYPRE_Int *d_offd_i,
|
||||
HYPRE_Int *d_offd_j,
|
||||
HYPRE_Complex *d_offd_a,
|
||||
HYPRE_Int *d_ib,
|
||||
HYPRE_BigInt *d_jb,
|
||||
HYPRE_Complex *d_ab)
|
||||
{
|
||||
/* trivial case */
|
||||
if (nrows <= 0)
|
||||
@ -368,6 +387,8 @@ hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
/* Input: d_row_num, of size nrows, contains the rows indices that can be BigInt or Int
|
||||
* Output: d_row_ind */
|
||||
template <typename T>
|
||||
HYPRE_Int
|
||||
hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, T *d_row_num, T *d_row_ind)
|
||||
@ -390,7 +411,7 @@ hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_
|
||||
}
|
||||
|
||||
template HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_num, HYPRE_Int *d_row_ind);
|
||||
#if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT)
|
||||
#if defined(HYPRE_MIXEDINT)
|
||||
template HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_BigInt *d_row_num, HYPRE_BigInt *d_row_ind);
|
||||
#endif
|
||||
|
||||
@ -731,9 +752,9 @@ hypreDevice_StableSortByTupleKey(HYPRE_Int N, T1 *keys1, T2 *keys2, T3 *vals, HY
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, HYPRE_Int *vals, HYPRE_Int opt);
|
||||
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Real *keys2, HYPRE_Int *vals, HYPRE_Int opt);
|
||||
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_BigInt *keys1, HYPRE_BigInt *keys2, HYPRE_Complex *vals, HYPRE_Int opt);
|
||||
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, HYPRE_Int *vals, HYPRE_Int opt);
|
||||
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Real *keys2, HYPRE_Int *vals, HYPRE_Int opt);
|
||||
template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, HYPRE_Complex *vals, HYPRE_Int opt);
|
||||
|
||||
/* opt:
|
||||
* 0, (a,b) < (a',b') iff a < a' or (a = a' and b < b') [normal tupe comp]
|
||||
@ -759,10 +780,10 @@ hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, T1 *keys1, T2 *keys2, T3 *val
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
#if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT)
|
||||
template HYPRE_Int hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, char *vals1, HYPRE_Complex *vals2, HYPRE_Int opt);
|
||||
#if defined(HYPRE_MIXEDINT)
|
||||
template HYPRE_Int hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, HYPRE_BigInt *keys1, HYPRE_BigInt *keys2, char *vals1, HYPRE_Complex *vals2, HYPRE_Int opt);
|
||||
#endif
|
||||
template HYPRE_Int hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, char *vals1, HYPRE_Complex *vals2, HYPRE_Int opt);
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
HYPRE_Int
|
||||
|
||||
@ -875,7 +875,7 @@ HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v);
|
||||
|
||||
HYPRE_Int hypreDevice_GetRowNnz(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz);
|
||||
|
||||
HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_Int first_col, HYPRE_Int *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab);
|
||||
HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab);
|
||||
|
||||
HYPRE_Int hypreDevice_IntegerReduceSum(HYPRE_Int m, HYPRE_Int *d_i);
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user