diff --git a/src/parcsr_ls/par_mgr.c b/src/parcsr_ls/par_mgr.c index 06d2d19b5..1cba60001 100644 --- a/src/parcsr_ls/par_mgr.c +++ b/src/parcsr_ls/par_mgr.c @@ -871,7 +871,8 @@ hypre_MGRCoarsen(hypre_ParCSRMatrix *S, HYPRE_Int *CF_marker = NULL; HYPRE_Int *cindexes = fixed_coarse_indexes; HYPRE_Int i, row, nc; - HYPRE_Int nloc = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)); + HYPRE_Int nloc = hypre_ParCSRMatrixNumRows(A); + HYPRE_MemoryLocation memory_location; /* If this is the last level, coarsen onto fixed coarse set */ if (cflag) @@ -883,6 +884,12 @@ hypre_MGRCoarsen(hypre_ParCSRMatrix *S, *CF_marker_ptr = hypre_IntArrayCreate(nloc); hypre_IntArrayInitialize(*CF_marker_ptr); hypre_IntArraySetConstantValues(*CF_marker_ptr, FMRK); + memory_location = hypre_IntArrayMemoryLocation(*CF_marker_ptr); + + if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE) + { + hypre_IntArrayMigrate(*CF_marker_ptr, HYPRE_MEMORY_HOST); + } CF_marker = hypre_IntArrayData(*CF_marker_ptr); /* first mark fixed coarse set */ @@ -891,6 +898,11 @@ hypre_MGRCoarsen(hypre_ParCSRMatrix *S, { CF_marker[cindexes[i]] = CMRK; } + + if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE) + { + hypre_IntArrayMigrate(*CF_marker_ptr, HYPRE_MEMORY_DEVICE); + } } else { diff --git a/src/parcsr_ls/par_mgr_setup.c b/src/parcsr_ls/par_mgr_setup.c index 98b4aa5f0..6c22f0bf8 100644 --- a/src/parcsr_ls/par_mgr_setup.c +++ b/src/parcsr_ls/par_mgr_setup.c @@ -1550,19 +1550,14 @@ hypre_MGRSetup( void *mgr_vdata, hypre_ParPrintf(comm, "Lev = %d, proc = %d - SetupAFF: %f\n", lev, my_id, wall_time); #endif - /* TODO: refactor this block. Add hypre_IntArrayScale (VPM) */ + /* TODO: refactor this block (VPM) */ #if defined (HYPRE_USING_CUDA) || defined (HYPRE_USING_HIP) - hypre_IntArray *F_marker = hypre_IntArrayCreate(nloc); - hypre_IntArrayInitialize(F_marker); + hypre_ParCSRMatrix *P_FF_ptr; + hypre_IntArray *F_marker = hypre_IntArrayCloneDeep(CF_marker_array[lev]); - HYPRE_Int *F_marker_data = hypre_IntArrayData(F_marker); - for (j = 0; j < nloc; j++) - { - F_marker_data[j] = -CF_marker[j]; - } - - hypre_ParCSRMatrix *P_FF_ptr; - hypre_MGRBuildPDevice(A_array[lev], F_marker_data, row_starts_fpts, 0, &P_FF_ptr); + hypre_IntArrayNegate(F_marker); + hypre_MGRBuildPDevice(A_array[lev], hypre_IntArrayData(F_marker), + row_starts_fpts, 0, &P_FF_ptr); P_FF_array[lev] = P_FF_ptr; hypre_IntArrayDestroy(F_marker); @@ -1631,6 +1626,13 @@ hypre_MGRSetup( void *mgr_vdata, * skip if we reduce the reserved C-points before the coarse grid solve */ if (mgr_data -> lvl_to_keep_cpoints == 0) { + memory_location = hypre_IntArrayMemoryLocation(CF_marker_array[lev]); + if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE) + { + hypre_IntArrayMigrate(CF_marker_array[lev], HYPRE_MEMORY_HOST); + } + CF_marker = hypre_IntArrayData(CF_marker_array[lev]); + for (i = 0; i < reserved_coarse_size; i++) { CF_marker[reserved_Cpoint_local_indexes[i]] = S_CMRK; @@ -1653,6 +1655,11 @@ hypre_MGRSetup( void *mgr_vdata, CF_marker[i] = CMRK; } } + + if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE) + { + hypre_IntArrayMigrate(CF_marker_array[lev], HYPRE_MEMORY_DEVICE); + } } /* allocate space for solution and rhs arrays */ diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index a05e6b44e..25df53fe2 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -1259,4 +1259,3 @@ HYPRE_Int hypre_ParVectorGetValuesDevice(hypre_ParVector *vector, HYPRE_Int num_ #endif #endif - diff --git a/src/seq_mv/csr_matop_device.c b/src/seq_mv/csr_matop_device.c index 527a84f29..d2d4a4887 100644 --- a/src/seq_mv/csr_matop_device.c +++ b/src/seq_mv/csr_matop_device.c @@ -2047,8 +2047,10 @@ hypre_CSRMatrixSortRowOutOfPlace(hypre_CSRMatrix *A) hypre_CSRMatrixSortedJ(A) = hypre_TAlloc(HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE); hypre_CSRMatrixSortedData(A) = hypre_TAlloc(HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE); - hypre_TMemcpy(hypre_CSRMatrixSortedJ(A), A_j, HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); - hypre_TMemcpy(hypre_CSRMatrixSortedData(A), A_a, HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + hypre_TMemcpy(hypre_CSRMatrixSortedJ(A), A_j, HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE, + HYPRE_MEMORY_DEVICE); + hypre_TMemcpy(hypre_CSRMatrixSortedData(A), A_a, HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE, + HYPRE_MEMORY_DEVICE); hypre_CSRMatrixJ(A) = hypre_CSRMatrixSortedJ(A); hypre_CSRMatrixData(A) = hypre_CSRMatrixSortedData(A); @@ -2129,7 +2131,7 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, hypre_int buffer_size; hypre_int structural_zero; #endif - cusparseDiagType_t DiagType = unit_diag ? CUSPARSE_DIAG_TYPE_UNIT: CUSPARSE_DIAG_TYPE_NON_UNIT; + cusparseDiagType_t DiagType = unit_diag ? CUSPARSE_DIAG_TYPE_UNIT : CUSPARSE_DIAG_TYPE_NON_UNIT; if (nrow != ncol) { @@ -2189,8 +2191,10 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, HYPRE_Complex *A_ma = hypre_CsrsvDataMatData(csrsv_data); #if CUSPARSE_VERSION >= CUSPARSE_SPSV_VERSION - cusparseSpMatDescr_t matA = hypre_CSRMatrixToCusparseSpMat_core(nrow, ncol, 0, nnzA, A_i, A_j, A_ma); - HYPRE_CUSPARSE_CALL( cusparseSpMatSetAttribute(matA, CUSPARSE_SPMAT_DIAG_TYPE, &DiagType, sizeof(cusparseDiagType_t)) ); + cusparseSpMatDescr_t matA = hypre_CSRMatrixToCusparseSpMat_core(nrow, ncol, 0, nnzA, A_i, A_j, + A_ma); + HYPRE_CUSPARSE_CALL( cusparseSpMatSetAttribute(matA, CUSPARSE_SPMAT_DIAG_TYPE, &DiagType, + sizeof(cusparseDiagType_t)) ); cusparseDnVecDescr_t vecF = hypre_VectorToCusparseDnVec_core(f_data, nrow); cusparseDnVecDescr_t vecU = hypre_VectorToCusparseDnVec_core(u_data, ncol); #else @@ -2455,7 +2459,8 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo, HYPRE_Complex alpha = 1.0; size_t buffer_size; hypre_int structural_zero; - rocsparse_diag_type diag_type = unit_diag ? rocsparse_diag_type_unit: rocsparse_diag_type_non_unit; + rocsparse_diag_type diag_type = unit_diag ? rocsparse_diag_type_unit : + rocsparse_diag_type_non_unit; if (nrow != ncol) { diff --git a/src/seq_mv/csr_spgemm_device.c b/src/seq_mv/csr_spgemm_device.c index b6398b3c0..c6153b97d 100644 --- a/src/seq_mv/csr_spgemm_device.c +++ b/src/seq_mv/csr_spgemm_device.c @@ -68,9 +68,9 @@ hypreDevice_CSRSpGemm(hypre_CSRMatrix *A, hypre_CSRMatrixGPUMatDescr(C), hypre_CSRMatrixGPUMatInfo(C), &nnzC, &d_ic, &d_jc, &d_c); #elif defined(HYPRE_USING_ONEMKLSPARSE) hypreDevice_CSRSpGemmOnemklsparse( m, k, n, - hypre_CSRMatrixGPUMatHandle(A), nnza, d_ia, d_ja, d_a, - hypre_CSRMatrixGPUMatHandle(B), nnzb, d_ib, d_jb, d_b, - hypre_CSRMatrixGPUMatHandle(C), &nnzC, &d_ic, &d_jc, &d_c); + hypre_CSRMatrixGPUMatHandle(A), nnza, d_ia, d_ja, d_a, + hypre_CSRMatrixGPUMatHandle(B), nnzb, d_ib, d_jb, d_b, + hypre_CSRMatrixGPUMatHandle(C), &nnzC, &d_ic, &d_jc, &d_c); #else hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Attempting to use device sparse matrix library for SpGEMM without having compiled support for it!\n"); diff --git a/src/seq_mv/csr_spgemm_device_numer.h b/src/seq_mv/csr_spgemm_device_numer.h index f8c28d541..21ee6ada5 100644 --- a/src/seq_mv/csr_spgemm_device_numer.h +++ b/src/seq_mv/csr_spgemm_device_numer.h @@ -39,7 +39,7 @@ hypre_spgemm_hash_insert_numer( HYPRE_Int old = -1; #if defined(HYPRE_USING_HIP) && (HIP_VERSION == 50422804) -/* VPM: see https://github.com/hypre-space/hypre/issues/875 */ + /* VPM: see https://github.com/hypre-space/hypre/issues/875 */ #pragma unroll 8 #else #pragma unroll UNROLL_FACTOR diff --git a/src/seq_mv/csr_spgemm_device_symbl.h b/src/seq_mv/csr_spgemm_device_symbl.h index a5b5602bd..e73398800 100644 --- a/src/seq_mv/csr_spgemm_device_symbl.h +++ b/src/seq_mv/csr_spgemm_device_symbl.h @@ -34,7 +34,7 @@ hypre_spgemm_hash_insert_symbl( HYPRE_Int old = -1; #if defined(HYPRE_USING_HIP) && (HIP_VERSION == 50422804) -/* VPM: see https://github.com/hypre-space/hypre/issues/875 */ + /* VPM: see https://github.com/hypre-space/hypre/issues/875 */ #pragma unroll 8 #else #pragma unroll UNROLL_FACTOR diff --git a/src/seq_mv/protos.h b/src/seq_mv/protos.h index 5de5dc1f3..9ccfec134 100644 --- a/src/seq_mv/protos.h +++ b/src/seq_mv/protos.h @@ -87,10 +87,15 @@ HYPRE_Int hypre_CSRMatrixPermuteDevice( hypre_CSRMatrix *A, HYPRE_Int *perm, HYPRE_Int *rqperm, hypre_CSRMatrix *B ); HYPRE_Int hypre_CSRMatrixSortRow(hypre_CSRMatrix *A); HYPRE_Int hypre_CSRMatrixSortRowOutOfPlace(hypre_CSRMatrix *A); -HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice_core(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, HYPRE_Int offset_u); -HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, hypre_Vector *u ); -HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); -HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); +HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice_core(char uplo, HYPRE_Int unit_diag, + hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, + HYPRE_Int offset_u); +HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice(char uplo, HYPRE_Int unit_diag, + hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, hypre_Vector *u ); +HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo, HYPRE_Int unit_diag, + hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); +HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, HYPRE_Int unit_diag, + hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); HYPRE_Int hypre_CSRMatrixIntersectPattern(hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA, HYPRE_Int diag_option); HYPRE_Int hypre_CSRMatrixDiagScaleDevice( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd); diff --git a/src/seq_mv/seq_mv.h b/src/seq_mv/seq_mv.h index 9003036cd..4156a04ee 100644 --- a/src/seq_mv/seq_mv.h +++ b/src/seq_mv/seq_mv.h @@ -363,10 +363,15 @@ HYPRE_Int hypre_CSRMatrixPermuteDevice( hypre_CSRMatrix *A, HYPRE_Int *perm, HYPRE_Int *rqperm, hypre_CSRMatrix *B ); HYPRE_Int hypre_CSRMatrixSortRow(hypre_CSRMatrix *A); HYPRE_Int hypre_CSRMatrixSortRowOutOfPlace(hypre_CSRMatrix *A); -HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice_core(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, HYPRE_Int offset_u); -HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, hypre_Vector *u ); -HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); -HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); +HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice_core(char uplo, HYPRE_Int unit_diag, + hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, + HYPRE_Int offset_u); +HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice(char uplo, HYPRE_Int unit_diag, + hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, hypre_Vector *u ); +HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo, HYPRE_Int unit_diag, + hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); +HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, HYPRE_Int unit_diag, + hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); HYPRE_Int hypre_CSRMatrixIntersectPattern(hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA, HYPRE_Int diag_option); HYPRE_Int hypre_CSRMatrixDiagScaleDevice( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd); diff --git a/src/seq_mv/seq_mv.hpp b/src/seq_mv/seq_mv.hpp index 8d79463cf..744e55e84 100644 --- a/src/seq_mv/seq_mv.hpp +++ b/src/seq_mv/seq_mv.hpp @@ -22,7 +22,8 @@ cusparseDnVecDescr_t hypre_VectorToCusparseDnVec_core(HYPRE_Complex *x_data, HYP cusparseDnVecDescr_t hypre_VectorToCusparseDnVec(const hypre_Vector *x, HYPRE_Int offset, HYPRE_Int size_override); -cusparseDnMatDescr_t hypre_VectorToCusparseDnMat_core(HYPRE_Complex *x_data, HYPRE_Int nrow, HYPRE_Int ncol, HYPRE_Int order); +cusparseDnMatDescr_t hypre_VectorToCusparseDnMat_core(HYPRE_Complex *x_data, HYPRE_Int nrow, + HYPRE_Int ncol, HYPRE_Int order); cusparseDnMatDescr_t hypre_VectorToCusparseDnMat(const hypre_Vector *x); diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index c6e21e8cb..52307ba03 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -2108,14 +2108,15 @@ HYPRE_Int hypre_IntArrayCountHost( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int hypre_IntArrayCount( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int *num_values_ptr ); HYPRE_Int hypre_IntArrayInverseMapping( hypre_IntArray *v, hypre_IntArray **w_ptr ); +HYPRE_Int hypre_IntArrayNegate( hypre_IntArray *v ); /* int_array_device.c */ -#if defined(HYPRE_USING_GPU) +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_IntArraySetConstantValuesDevice( hypre_IntArray *v, HYPRE_Int value ); -HYPRE_Int hypre_IntArrayReverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w ); HYPRE_Int hypre_IntArrayCountDevice ( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int *num_values_ptr ); HYPRE_Int hypre_IntArrayInverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w ); +HYPRE_Int hypre_IntArrayNegateDevice( hypre_IntArray *v ); #endif /* memory_tracker.c */ diff --git a/src/utilities/int_array.c b/src/utilities/int_array.c index 034c81b6d..09b689b40 100644 --- a/src/utilities/int_array.c +++ b/src/utilities/int_array.c @@ -303,7 +303,7 @@ hypre_IntArraySetConstantValues( hypre_IntArray *v, return hypre_error_flag; } -#if defined(HYPRE_USING_GPU) +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_IntArrayMemoryLocation(v)); if (exec == HYPRE_EXEC_DEVICE) @@ -361,7 +361,7 @@ hypre_IntArrayCount( hypre_IntArray *v, return hypre_error_flag; } -#if defined(HYPRE_USING_GPU) +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_IntArrayMemoryLocation(v)); if (exec == HYPRE_EXEC_DEVICE) @@ -446,3 +446,41 @@ hypre_IntArrayInverseMapping( hypre_IntArray *v, return hypre_error_flag; } + +/*-------------------------------------------------------------------------- + * hypre_IntArrayNegate + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IntArrayNegate( hypre_IntArray *v ) +{ + HYPRE_Int *array_data = hypre_IntArrayData(v); + HYPRE_Int size = hypre_IntArraySize(v); + HYPRE_Int i; + + if (size <= 0) + { + return hypre_error_flag; + } + +#if defined(HYPRE_USING_GPU) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_IntArrayMemoryLocation(v)); + + if (exec == HYPRE_EXEC_DEVICE) + { + hypre_IntArrayNegateDevice(v); + } + else +#endif + { +#if defined(HYPRE_USING_OPENMP) + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < size; i++) + { + array_data[i] = - array_data[i]; + } + } + + return hypre_error_flag; +} diff --git a/src/utilities/int_array_device.c b/src/utilities/int_array_device.c index 05edde75d..4b8c3c476 100644 --- a/src/utilities/int_array_device.c +++ b/src/utilities/int_array_device.c @@ -5,11 +5,11 @@ * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ -#include "_hypre_onedpl.hpp" #include "_hypre_utilities.h" #include "_hypre_utilities.hpp" +#include "_hypre_onedpl.hpp" -#if defined(HYPRE_USING_GPU) +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) /*-------------------------------------------------------------------------- * hypre_IntArraySetConstantValuesDevice @@ -39,7 +39,7 @@ hypre_IntArraySetConstantValuesDevice( hypre_IntArray *v, return hypre_error_flag; } -#if !defined(HYPRE_USING_DEVICE_OPENMP) +#if defined(HYPRE_USING_GPU) /*-------------------------------------------------------------------------- * hypreGPUKernel_IntArrayInverseMapping *--------------------------------------------------------------------------*/ @@ -122,4 +122,26 @@ hypre_IntArrayCountDevice( hypre_IntArray *v, return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * hypre_IntArrayNegateDevice + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_IntArrayNegateDevice( hypre_IntArray *v ) +{ +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_THRUST_CALL( transform, + hypre_IntArrayData(v), + hypre_IntArrayData(v) + hypre_IntArraySize(v), + hypre_IntArrayData(v), + thrust::negate() ); + +#elif defined(HYPRE_USING_SYCL) || defined(HYPRE_USING_DEVICE_OPENMP) + hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Not implemented yet!"); + +#endif + + return hypre_error_flag; +} + #endif diff --git a/src/utilities/memory.c b/src/utilities/memory.c index e74353ba8..7c0c25c03 100644 --- a/src/utilities/memory.c +++ b/src/utilities/memory.c @@ -55,6 +55,7 @@ hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location, /*-------------------------------------------------------------------------- * hypre_OutOfMemory *--------------------------------------------------------------------------*/ + static inline void hypre_OutOfMemory(size_t size) { @@ -74,8 +75,7 @@ hypre_WrongMemoryLocation(void) void hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location) { -#if defined(HYPRE_DEBUG) -#if defined(HYPRE_USING_GPU) +#if defined(HYPRE_DEBUG) && defined(HYPRE_USING_GPU) if (!ptr) { return; @@ -87,7 +87,6 @@ hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location) * will create an endless loop otherwise */ assert(location == location_ptr); #endif -#endif } /*========================================================================== diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 8770dc545..c289aba3a 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -401,14 +401,15 @@ HYPRE_Int hypre_IntArrayCountHost( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int hypre_IntArrayCount( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int *num_values_ptr ); HYPRE_Int hypre_IntArrayInverseMapping( hypre_IntArray *v, hypre_IntArray **w_ptr ); +HYPRE_Int hypre_IntArrayNegate( hypre_IntArray *v ); /* int_array_device.c */ -#if defined(HYPRE_USING_GPU) +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) HYPRE_Int hypre_IntArraySetConstantValuesDevice( hypre_IntArray *v, HYPRE_Int value ); -HYPRE_Int hypre_IntArrayReverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w ); HYPRE_Int hypre_IntArrayCountDevice ( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int *num_values_ptr ); HYPRE_Int hypre_IntArrayInverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w ); +HYPRE_Int hypre_IntArrayNegateDevice( hypre_IntArray *v ); #endif /* memory_tracker.c */