Gpu mixedInt (#380)
This PR adds GPU support for mixedInt. Co-authored-by: Rob Falgout <rfalgout@llnl.gov>
This commit is contained in:
		
							parent
							
								
									1d9411c7ab
								
							
						
					
					
						commit
						ad5d7e009f
					
				| @ -11,7 +11,7 @@ case $1 in | ||||
|    -h|-help) | ||||
|       cat <<EOF | ||||
| 
 | ||||
|    **** Only run this script on the lassen/ray cluster **** | ||||
|    **** Only run this script on the lassen cluster **** | ||||
| 
 | ||||
|    $0 [-h|-help] {src_dir} | ||||
| 
 | ||||
| @ -53,6 +53,12 @@ eo="-gpu -rt -mpibind -save ${save}" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -eo: $eo | ||||
| ./renametest.sh basic $output_dir/basic-cuda-um | ||||
| 
 | ||||
| #CUDA with UM and mixed-int | ||||
| co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'" | ||||
| ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save}" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro | ||||
| ./renametest.sh basic $output_dir/basic-cuda-um-mixedint | ||||
| 
 | ||||
| # CUDA with UM with shared library [no run] | ||||
| co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
|  | ||||
| @ -11,7 +11,7 @@ case $1 in | ||||
|    -h|-help) | ||||
|       cat <<EOF | ||||
| 
 | ||||
|    **** Only run this script on the lassen/ray cluster **** | ||||
|    **** Only run this script on the ray cluster **** | ||||
| 
 | ||||
|    $0 [-h|-help] {src_dir} | ||||
| 
 | ||||
| @ -53,6 +53,12 @@ eo="-gpu -rt -mpibind -save ${save}" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -eo: $eo | ||||
| ./renametest.sh basic $output_dir/basic-cuda-um | ||||
| 
 | ||||
| #CUDA with UM and mixed-int | ||||
| co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'" | ||||
| ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save}" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro | ||||
| ./renametest.sh basic $output_dir/basic-cuda-um-mixedint | ||||
| 
 | ||||
| # CUDA with UM with shared library [no run] | ||||
| co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'" | ||||
| ./test.sh basic.sh $src_dir -co: $co -mo: $mo | ||||
|  | ||||
| @ -307,8 +307,8 @@ hypre_IJMatrixAssembleSortAndReduce3(HYPRE_Int  N0, HYPRE_BigInt  *I0, HYPRE_Big | ||||
| { | ||||
|    hypreDevice_StableSortTupleByTupleKey(N0, I0, J0, X0, A0, 0); | ||||
| 
 | ||||
|    HYPRE_Int     *I = hypre_TAlloc(HYPRE_Int,     N0, HYPRE_MEMORY_DEVICE); | ||||
|    HYPRE_Int     *J = hypre_TAlloc(HYPRE_Int,     N0, HYPRE_MEMORY_DEVICE); | ||||
|    HYPRE_BigInt  *I = hypre_TAlloc(HYPRE_BigInt,  N0, HYPRE_MEMORY_DEVICE); | ||||
|    HYPRE_BigInt  *J = hypre_TAlloc(HYPRE_BigInt,  N0, HYPRE_MEMORY_DEVICE); | ||||
|    HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE); | ||||
| 
 | ||||
|    /* output in X0: 0: keep, 1: zero-out */ | ||||
|  | ||||
| @ -309,7 +309,7 @@ hypre_IJVectorAssembleSortAndReduce3(HYPRE_Int  N0, HYPRE_BigInt  *I0, char *X0, | ||||
|                       I0 + N0, | ||||
|                       thrust::make_zip_iterator(thrust::make_tuple(X0, A0)) ); | ||||
| 
 | ||||
|    HYPRE_Int     *I = hypre_TAlloc(HYPRE_Int,     N0, HYPRE_MEMORY_DEVICE); | ||||
|    HYPRE_BigInt  *I = hypre_TAlloc(HYPRE_BigInt,  N0, HYPRE_MEMORY_DEVICE); | ||||
|    HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE); | ||||
| 
 | ||||
|    /* output in X0: 0: keep, 1: zero-out */ | ||||
|  | ||||
| @ -3444,7 +3444,7 @@ hypre_ILUSetupRAPILU0Device(hypre_ParCSRMatrix *A, HYPRE_Int *perm, HYPRE_Int n, | ||||
|          S_row_starts[1] = global_start; | ||||
|       } | ||||
| 
 | ||||
|       S_row_starts = hypre_CTAlloc(HYPRE_Int,2,HYPRE_MEMORY_HOST); | ||||
|       S_row_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); | ||||
|       S_row_starts[1] = S_total_rows; | ||||
|       S_row_starts[0] = S_total_rows - m; | ||||
|       hypre_MPI_Allreduce(&m, &S_total_rows, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); | ||||
|  | ||||
| @ -43,7 +43,7 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix    *A, | ||||
|    HYPRE_Real              *A_offd_data     = hypre_CSRMatrixData(A_offd); | ||||
|    HYPRE_Int               *A_diag_j        = hypre_CSRMatrixJ(A_diag); | ||||
|    HYPRE_Int               *A_offd_j        = hypre_CSRMatrixJ(A_offd); | ||||
|    HYPRE_Int               *row_starts      = hypre_ParCSRMatrixRowStarts(A); | ||||
|    HYPRE_BigInt            *row_starts      = hypre_ParCSRMatrixRowStarts(A); | ||||
|    HYPRE_Int                num_variables   = hypre_CSRMatrixNumRows(A_diag); | ||||
|    HYPRE_Int                global_num_vars = hypre_ParCSRMatrixGlobalNumRows(A); | ||||
|    HYPRE_Int                num_nonzeros_diag; | ||||
| @ -178,9 +178,9 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix    *A, | ||||
| 
 | ||||
|    hypre_ParCSRMatrixCommPkg(S) = NULL; | ||||
| 
 | ||||
|    hypre_ParCSRMatrixColMapOffd(S) = hypre_TAlloc(HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST); | ||||
|    hypre_ParCSRMatrixColMapOffd(S) = hypre_TAlloc(HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST); | ||||
|    hypre_TMemcpy(hypre_ParCSRMatrixColMapOffd(S), hypre_ParCSRMatrixColMapOffd(A), | ||||
|                  HYPRE_Int, num_cols_offd, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); | ||||
|                  HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); | ||||
| 
 | ||||
|    hypre_ParCSRMatrixSocDiagJ(S) = S_temp_diag_j; | ||||
|    hypre_ParCSRMatrixSocOffdJ(S) = S_temp_offd_j; | ||||
|  | ||||
| @ -450,13 +450,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix  *A, | ||||
|                          tmp_j, | ||||
|                          AFF_offd_j ); | ||||
|       col_map_offd_AFF = hypre_TAlloc(HYPRE_BigInt, num_cols_AFF_offd, HYPRE_MEMORY_DEVICE); | ||||
|       tmp_end = HYPRE_THRUST_CALL( copy_if, | ||||
|                                    thrust::make_transform_iterator(recv_buf, -_1-1), | ||||
|                                    thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd, | ||||
|                                    offd_mark, | ||||
|                                    col_map_offd_AFF, | ||||
|                                    thrust::identity<HYPRE_Int>() ); | ||||
|       hypre_assert(tmp_end - col_map_offd_AFF == num_cols_AFF_offd); | ||||
|       HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if, | ||||
|                                                      thrust::make_transform_iterator(recv_buf, -_1-1), | ||||
|                                                      thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd, | ||||
|                                                      offd_mark, | ||||
|                                                      col_map_offd_AFF, | ||||
|                                                      thrust::identity<HYPRE_Int>() ); | ||||
|       hypre_assert(tmp_end_big - col_map_offd_AFF == num_cols_AFF_offd); | ||||
|       hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE); | ||||
| 
 | ||||
|       AFF = hypre_ParCSRMatrixCreate(comm, | ||||
| @ -597,13 +597,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix  *A, | ||||
|                          tmp_j, | ||||
|                          AFC_offd_j ); | ||||
|       col_map_offd_AFC = hypre_TAlloc(HYPRE_BigInt, num_cols_AFC_offd, HYPRE_MEMORY_DEVICE); | ||||
|       tmp_end = HYPRE_THRUST_CALL( copy_if, | ||||
|                                    recv_buf, | ||||
|                                    recv_buf + num_cols_A_offd, | ||||
|                                    offd_mark, | ||||
|                                    col_map_offd_AFC, | ||||
|                                    thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end - col_map_offd_AFC == num_cols_AFC_offd); | ||||
|       HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if, | ||||
|                                                      recv_buf, | ||||
|                                                      recv_buf + num_cols_A_offd, | ||||
|                                                      offd_mark, | ||||
|                                                      col_map_offd_AFC, | ||||
|                                                      thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end_big - col_map_offd_AFC == num_cols_AFC_offd); | ||||
|       hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE); | ||||
| 
 | ||||
|       /* AFC */ | ||||
| @ -745,13 +745,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix  *A, | ||||
|                          tmp_j, | ||||
|                          ACF_offd_j ); | ||||
|       col_map_offd_ACF = hypre_TAlloc(HYPRE_BigInt, num_cols_ACF_offd, HYPRE_MEMORY_DEVICE); | ||||
|       tmp_end = HYPRE_THRUST_CALL( copy_if, | ||||
|                                    thrust::make_transform_iterator(recv_buf, -_1-1), | ||||
|                                    thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd, | ||||
|                                    offd_mark, | ||||
|                                    col_map_offd_ACF, | ||||
|                                    thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end - col_map_offd_ACF == num_cols_ACF_offd); | ||||
|       HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if, | ||||
|                                                      thrust::make_transform_iterator(recv_buf, -_1-1), | ||||
|                                                      thrust::make_transform_iterator(recv_buf, -_1-1) + num_cols_A_offd, | ||||
|                                                      offd_mark, | ||||
|                                                      col_map_offd_ACF, | ||||
|                                                      thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end_big - col_map_offd_ACF == num_cols_ACF_offd); | ||||
|       hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE); | ||||
| 
 | ||||
|       /* ACF */ | ||||
| @ -894,13 +894,13 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix  *A, | ||||
|                          tmp_j, | ||||
|                          ACC_offd_j ); | ||||
|       col_map_offd_ACC = hypre_TAlloc(HYPRE_BigInt, num_cols_ACC_offd, HYPRE_MEMORY_DEVICE); | ||||
|       tmp_end = HYPRE_THRUST_CALL( copy_if, | ||||
|                                    recv_buf, | ||||
|                                    recv_buf + num_cols_A_offd, | ||||
|                                    offd_mark, | ||||
|                                    col_map_offd_ACC, | ||||
|                                    thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end - col_map_offd_ACC == num_cols_ACC_offd); | ||||
|       HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if, | ||||
|                                                      recv_buf, | ||||
|                                                      recv_buf + num_cols_A_offd, | ||||
|                                                      offd_mark, | ||||
|                                                      col_map_offd_ACC, | ||||
|                                                      thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end_big - col_map_offd_ACC == num_cols_ACC_offd); | ||||
|       hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE); | ||||
| 
 | ||||
|       /* ACC */ | ||||
| @ -1217,13 +1217,13 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix  *A, | ||||
|                          tmp_j, | ||||
|                          ACX_offd_j ); | ||||
|       col_map_offd_ACX = hypre_TAlloc(HYPRE_BigInt, num_cols_ACX_offd, HYPRE_MEMORY_DEVICE); | ||||
|       tmp_end = HYPRE_THRUST_CALL( copy_if, | ||||
|                                    col_map_offd_A, | ||||
|                                    col_map_offd_A + num_cols_A_offd, | ||||
|                                    offd_mark, | ||||
|                                    col_map_offd_ACX, | ||||
|                                    thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end - col_map_offd_ACX == num_cols_ACX_offd); | ||||
|       HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if, | ||||
|                                                      col_map_offd_A, | ||||
|                                                      col_map_offd_A + num_cols_A_offd, | ||||
|                                                      offd_mark, | ||||
|                                                      col_map_offd_ACX, | ||||
|                                                      thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end_big - col_map_offd_ACX == num_cols_ACX_offd); | ||||
|       hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE); | ||||
| 
 | ||||
|       /* ACX */ | ||||
| @ -1354,13 +1354,13 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix  *A, | ||||
|                          tmp_j, | ||||
|                          AXC_offd_j ); | ||||
|       col_map_offd_AXC = hypre_TAlloc(HYPRE_BigInt, num_cols_AXC_offd, HYPRE_MEMORY_DEVICE); | ||||
|       tmp_end = HYPRE_THRUST_CALL( copy_if, | ||||
|                                    recv_buf, | ||||
|                                    recv_buf + num_cols_A_offd, | ||||
|                                    offd_mark, | ||||
|                                    col_map_offd_AXC, | ||||
|                                    thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end - col_map_offd_AXC == num_cols_AXC_offd); | ||||
|       HYPRE_BigInt *tmp_end_big = HYPRE_THRUST_CALL( copy_if, | ||||
|                                                      recv_buf, | ||||
|                                                      recv_buf + num_cols_A_offd, | ||||
|                                                      offd_mark, | ||||
|                                                      col_map_offd_AXC, | ||||
|                                                      thrust::identity<HYPRE_Int>()); | ||||
|       hypre_assert(tmp_end_big - col_map_offd_AXC == num_cols_AXC_offd); | ||||
|       hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE); | ||||
| 
 | ||||
|       /* AXC */ | ||||
|  | ||||
| @ -14,7 +14,7 @@ | ||||
| HYPRE_Int | ||||
| hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix   *A, | ||||
|                                        HYPRE_Int             indices_len, | ||||
|                                        HYPRE_Int            *indices, | ||||
|                                        HYPRE_BigInt         *indices, | ||||
|                                        hypre_ParCSRCommPkg  *comm_pkg, | ||||
|                                        HYPRE_Int             want_data, | ||||
|                                        void                **request_ptr) | ||||
|  | ||||
| @ -432,9 +432,9 @@ hypre_CSRMatrixSplitDevice_core( HYPRE_Int         job,                 /* 0: qu | ||||
|                       col_map_offd_C, | ||||
|                       col_map_offd_C + B_ext_offd_nnz + num_cols_offd_B ); | ||||
| 
 | ||||
|    HYPRE_Int *new_end = HYPRE_THRUST_CALL( unique, | ||||
|                                            col_map_offd_C, | ||||
|                                            col_map_offd_C + B_ext_offd_nnz + num_cols_offd_B ); | ||||
|    HYPRE_BigInt *new_end = HYPRE_THRUST_CALL( unique, | ||||
|                                               col_map_offd_C, | ||||
|                                               col_map_offd_C + B_ext_offd_nnz + num_cols_offd_B ); | ||||
| 
 | ||||
|    num_cols_offd_C = new_end - col_map_offd_C; | ||||
| 
 | ||||
|  | ||||
| @ -17,6 +17,7 @@ C_COMPILE_FLAGS = \ | ||||
| HEADERS =\
 | ||||
|  HYPRE_struct_mv.h\
 | ||||
|  _hypre_struct_mv.h\
 | ||||
|  _hypre_struct_mv.hpp\
 | ||||
|  assumed_part.h\
 | ||||
|  box.h\
 | ||||
|  box_manager.h\
 | ||||
|  | ||||
| @ -29,7 +29,8 @@ HEADERS =\ | ||||
|  mpistubs.h\
 | ||||
|  threading.h\
 | ||||
|  timing.h\
 | ||||
|  _hypre_utilities.h | ||||
|  _hypre_utilities.h\
 | ||||
|  _hypre_utilities.hpp | ||||
| 
 | ||||
| FILES =\
 | ||||
|  F90_HYPRE_error.c\
 | ||||
|  | ||||
| @ -940,7 +940,7 @@ HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v); | ||||
| 
 | ||||
| HYPRE_Int hypreDevice_GetRowNnz(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz); | ||||
| 
 | ||||
| HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_Int first_col, HYPRE_Int *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab); | ||||
| HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab); | ||||
| 
 | ||||
| HYPRE_Int hypreDevice_IntegerReduceSum(HYPRE_Int m, HYPRE_Int *d_i); | ||||
| 
 | ||||
|  | ||||
| @ -160,11 +160,20 @@ hypreDevice_GetRowNnz(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_di | ||||
| } | ||||
| 
 | ||||
| __global__ void | ||||
| hypreCUDAKernel_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int has_offd, | ||||
|                                HYPRE_BigInt first_col, HYPRE_Int *d_col_map_offd_A, | ||||
|                                HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, | ||||
|                                HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, | ||||
|                                HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab) | ||||
| hypreCUDAKernel_CopyParCSRRows(HYPRE_Int      nrows, | ||||
|                                HYPRE_Int     *d_row_indices, | ||||
|                                HYPRE_Int      has_offd, | ||||
|                                HYPRE_BigInt   first_col, | ||||
|                                HYPRE_BigInt  *d_col_map_offd_A, | ||||
|                                HYPRE_Int     *d_diag_i, | ||||
|                                HYPRE_Int     *d_diag_j, | ||||
|                                HYPRE_Complex *d_diag_a, | ||||
|                                HYPRE_Int     *d_offd_i, | ||||
|                                HYPRE_Int     *d_offd_j, | ||||
|                                HYPRE_Complex *d_offd_a, | ||||
|                                HYPRE_Int     *d_ib, | ||||
|                                HYPRE_BigInt  *d_jb, | ||||
|                                HYPRE_Complex *d_ab) | ||||
| { | ||||
|    const HYPRE_Int global_warp_id = hypre_cuda_get_grid_warp_id<1,1>(); | ||||
| 
 | ||||
| @ -251,11 +260,21 @@ hypreCUDAKernel_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_ | ||||
|  *    If col_map_offd_A == NULL, use (-1 - d_offd_j) as column id | ||||
|  *    If nrows == 1 and d_ib == NULL, it means d_ib[0] = 0 */ | ||||
| HYPRE_Int | ||||
| hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, | ||||
|                            HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, | ||||
|                            HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, | ||||
|                            HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, | ||||
|                            HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab) | ||||
| hypreDevice_CopyParCSRRows(HYPRE_Int      nrows, | ||||
|                            HYPRE_Int     *d_row_indices, | ||||
|                            HYPRE_Int      job, | ||||
|                            HYPRE_Int      has_offd, | ||||
|                            HYPRE_BigInt   first_col, | ||||
|                            HYPRE_BigInt  *d_col_map_offd_A, | ||||
|                            HYPRE_Int     *d_diag_i, | ||||
|                            HYPRE_Int     *d_diag_j, | ||||
|                            HYPRE_Complex *d_diag_a, | ||||
|                            HYPRE_Int     *d_offd_i, | ||||
|                            HYPRE_Int     *d_offd_j, | ||||
|                            HYPRE_Complex *d_offd_a, | ||||
|                            HYPRE_Int     *d_ib, | ||||
|                            HYPRE_BigInt  *d_jb, | ||||
|                            HYPRE_Complex *d_ab) | ||||
| { | ||||
|    /* trivial case */ | ||||
|    if (nrows <= 0) | ||||
| @ -368,6 +387,8 @@ hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_ | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /* Input: d_row_num, of size nrows, contains the rows indices that can be BigInt or Int
 | ||||
|  * Output: d_row_ind */ | ||||
| template <typename T> | ||||
| HYPRE_Int | ||||
| hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, T *d_row_num, T *d_row_ind) | ||||
| @ -390,7 +411,7 @@ hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_ | ||||
| } | ||||
| 
 | ||||
| template HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_num, HYPRE_Int *d_row_ind); | ||||
| #if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT) | ||||
| #if defined(HYPRE_MIXEDINT) | ||||
| template HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_BigInt *d_row_num, HYPRE_BigInt *d_row_ind); | ||||
| #endif | ||||
| 
 | ||||
| @ -731,9 +752,9 @@ hypreDevice_StableSortByTupleKey(HYPRE_Int N, T1 *keys1, T2 *keys2, T3 *vals, HY | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int  *keys2, HYPRE_Int *vals, HYPRE_Int opt); | ||||
| template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Real *keys2, HYPRE_Int *vals, HYPRE_Int opt); | ||||
| template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_BigInt *keys1, HYPRE_BigInt *keys2, HYPRE_Complex *vals, HYPRE_Int opt); | ||||
| template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int  *keys2, HYPRE_Int     *vals, HYPRE_Int opt); | ||||
| template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Real *keys2, HYPRE_Int     *vals, HYPRE_Int opt); | ||||
| template HYPRE_Int hypreDevice_StableSortByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int  *keys2, HYPRE_Complex *vals, HYPRE_Int opt); | ||||
| 
 | ||||
| /* opt:
 | ||||
|  *      0, (a,b) < (a',b') iff a < a' or (a = a' and  b  <  b')                       [normal tupe comp] | ||||
| @ -759,10 +780,10 @@ hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, T1 *keys1, T2 *keys2, T3 *val | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| #if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT) | ||||
| template HYPRE_Int hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, char *vals1, HYPRE_Complex *vals2, HYPRE_Int opt); | ||||
| #if defined(HYPRE_MIXEDINT) | ||||
| template HYPRE_Int hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, HYPRE_BigInt *keys1, HYPRE_BigInt *keys2, char *vals1, HYPRE_Complex *vals2, HYPRE_Int opt); | ||||
| #endif | ||||
| template HYPRE_Int hypreDevice_StableSortTupleByTupleKey(HYPRE_Int N, HYPRE_Int *keys1, HYPRE_Int *keys2, char *vals1, HYPRE_Complex *vals2, HYPRE_Int opt); | ||||
| 
 | ||||
| template <typename T1, typename T2, typename T3> | ||||
| HYPRE_Int | ||||
|  | ||||
| @ -875,7 +875,7 @@ HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v); | ||||
| 
 | ||||
| HYPRE_Int hypreDevice_GetRowNnz(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int *d_diag_ia, HYPRE_Int *d_offd_ia, HYPRE_Int *d_rownnz); | ||||
| 
 | ||||
| HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_Int first_col, HYPRE_Int *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab); | ||||
| HYPRE_Int hypreDevice_CopyParCSRRows(HYPRE_Int nrows, HYPRE_Int *d_row_indices, HYPRE_Int job, HYPRE_Int has_offd, HYPRE_BigInt first_col, HYPRE_BigInt *d_col_map_offd_A, HYPRE_Int *d_diag_i, HYPRE_Int *d_diag_j, HYPRE_Complex *d_diag_a, HYPRE_Int *d_offd_i, HYPRE_Int *d_offd_j, HYPRE_Complex *d_offd_a, HYPRE_Int *d_ib, HYPRE_BigInt *d_jb, HYPRE_Complex *d_ab); | ||||
| 
 | ||||
| HYPRE_Int hypreDevice_IntegerReduceSum(HYPRE_Int m, HYPRE_Int *d_i); | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Ruipeng Li
						Ruipeng Li