MGR non-UVM device support (#906)

Allow MGR to work without UVM in device runs
This commit is contained in:
Victor A. P. Magri 2023-06-02 17:40:18 -04:00 committed by GitHub
parent 7ff7f2f60d
commit bd1073ad70
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 140 additions and 45 deletions

View File

@ -871,7 +871,8 @@ hypre_MGRCoarsen(hypre_ParCSRMatrix *S,
HYPRE_Int *CF_marker = NULL; HYPRE_Int *CF_marker = NULL;
HYPRE_Int *cindexes = fixed_coarse_indexes; HYPRE_Int *cindexes = fixed_coarse_indexes;
HYPRE_Int i, row, nc; HYPRE_Int i, row, nc;
HYPRE_Int nloc = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)); HYPRE_Int nloc = hypre_ParCSRMatrixNumRows(A);
HYPRE_MemoryLocation memory_location;
/* If this is the last level, coarsen onto fixed coarse set */ /* If this is the last level, coarsen onto fixed coarse set */
if (cflag) if (cflag)
@ -883,6 +884,12 @@ hypre_MGRCoarsen(hypre_ParCSRMatrix *S,
*CF_marker_ptr = hypre_IntArrayCreate(nloc); *CF_marker_ptr = hypre_IntArrayCreate(nloc);
hypre_IntArrayInitialize(*CF_marker_ptr); hypre_IntArrayInitialize(*CF_marker_ptr);
hypre_IntArraySetConstantValues(*CF_marker_ptr, FMRK); hypre_IntArraySetConstantValues(*CF_marker_ptr, FMRK);
memory_location = hypre_IntArrayMemoryLocation(*CF_marker_ptr);
if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE)
{
hypre_IntArrayMigrate(*CF_marker_ptr, HYPRE_MEMORY_HOST);
}
CF_marker = hypre_IntArrayData(*CF_marker_ptr); CF_marker = hypre_IntArrayData(*CF_marker_ptr);
/* first mark fixed coarse set */ /* first mark fixed coarse set */
@ -891,6 +898,11 @@ hypre_MGRCoarsen(hypre_ParCSRMatrix *S,
{ {
CF_marker[cindexes[i]] = CMRK; CF_marker[cindexes[i]] = CMRK;
} }
if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE)
{
hypre_IntArrayMigrate(*CF_marker_ptr, HYPRE_MEMORY_DEVICE);
}
} }
else else
{ {

View File

@ -1550,19 +1550,14 @@ hypre_MGRSetup( void *mgr_vdata,
hypre_ParPrintf(comm, "Lev = %d, proc = %d - SetupAFF: %f\n", lev, my_id, wall_time); hypre_ParPrintf(comm, "Lev = %d, proc = %d - SetupAFF: %f\n", lev, my_id, wall_time);
#endif #endif
/* TODO: refactor this block. Add hypre_IntArrayScale (VPM) */ /* TODO: refactor this block (VPM) */
#if defined (HYPRE_USING_CUDA) || defined (HYPRE_USING_HIP) #if defined (HYPRE_USING_CUDA) || defined (HYPRE_USING_HIP)
hypre_IntArray *F_marker = hypre_IntArrayCreate(nloc); hypre_ParCSRMatrix *P_FF_ptr;
hypre_IntArrayInitialize(F_marker); hypre_IntArray *F_marker = hypre_IntArrayCloneDeep(CF_marker_array[lev]);
HYPRE_Int *F_marker_data = hypre_IntArrayData(F_marker); hypre_IntArrayNegate(F_marker);
for (j = 0; j < nloc; j++) hypre_MGRBuildPDevice(A_array[lev], hypre_IntArrayData(F_marker),
{ row_starts_fpts, 0, &P_FF_ptr);
F_marker_data[j] = -CF_marker[j];
}
hypre_ParCSRMatrix *P_FF_ptr;
hypre_MGRBuildPDevice(A_array[lev], F_marker_data, row_starts_fpts, 0, &P_FF_ptr);
P_FF_array[lev] = P_FF_ptr; P_FF_array[lev] = P_FF_ptr;
hypre_IntArrayDestroy(F_marker); hypre_IntArrayDestroy(F_marker);
@ -1631,6 +1626,13 @@ hypre_MGRSetup( void *mgr_vdata,
* skip if we reduce the reserved C-points before the coarse grid solve */ * skip if we reduce the reserved C-points before the coarse grid solve */
if (mgr_data -> lvl_to_keep_cpoints == 0) if (mgr_data -> lvl_to_keep_cpoints == 0)
{ {
memory_location = hypre_IntArrayMemoryLocation(CF_marker_array[lev]);
if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE)
{
hypre_IntArrayMigrate(CF_marker_array[lev], HYPRE_MEMORY_HOST);
}
CF_marker = hypre_IntArrayData(CF_marker_array[lev]);
for (i = 0; i < reserved_coarse_size; i++) for (i = 0; i < reserved_coarse_size; i++)
{ {
CF_marker[reserved_Cpoint_local_indexes[i]] = S_CMRK; CF_marker[reserved_Cpoint_local_indexes[i]] = S_CMRK;
@ -1653,6 +1655,11 @@ hypre_MGRSetup( void *mgr_vdata,
CF_marker[i] = CMRK; CF_marker[i] = CMRK;
} }
} }
if (hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_DEVICE)
{
hypre_IntArrayMigrate(CF_marker_array[lev], HYPRE_MEMORY_DEVICE);
}
} }
/* allocate space for solution and rhs arrays */ /* allocate space for solution and rhs arrays */

View File

@ -1259,4 +1259,3 @@ HYPRE_Int hypre_ParVectorGetValuesDevice(hypre_ParVector *vector, HYPRE_Int num_
#endif #endif
#endif #endif

View File

@ -2047,8 +2047,10 @@ hypre_CSRMatrixSortRowOutOfPlace(hypre_CSRMatrix *A)
hypre_CSRMatrixSortedJ(A) = hypre_TAlloc(HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE); hypre_CSRMatrixSortedJ(A) = hypre_TAlloc(HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE);
hypre_CSRMatrixSortedData(A) = hypre_TAlloc(HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE); hypre_CSRMatrixSortedData(A) = hypre_TAlloc(HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE);
hypre_TMemcpy(hypre_CSRMatrixSortedJ(A), A_j, HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); hypre_TMemcpy(hypre_CSRMatrixSortedJ(A), A_j, HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE,
hypre_TMemcpy(hypre_CSRMatrixSortedData(A), A_a, HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); HYPRE_MEMORY_DEVICE);
hypre_TMemcpy(hypre_CSRMatrixSortedData(A), A_a, HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE,
HYPRE_MEMORY_DEVICE);
hypre_CSRMatrixJ(A) = hypre_CSRMatrixSortedJ(A); hypre_CSRMatrixJ(A) = hypre_CSRMatrixSortedJ(A);
hypre_CSRMatrixData(A) = hypre_CSRMatrixSortedData(A); hypre_CSRMatrixData(A) = hypre_CSRMatrixSortedData(A);
@ -2129,7 +2131,7 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
hypre_int buffer_size; hypre_int buffer_size;
hypre_int structural_zero; hypre_int structural_zero;
#endif #endif
cusparseDiagType_t DiagType = unit_diag ? CUSPARSE_DIAG_TYPE_UNIT: CUSPARSE_DIAG_TYPE_NON_UNIT; cusparseDiagType_t DiagType = unit_diag ? CUSPARSE_DIAG_TYPE_UNIT : CUSPARSE_DIAG_TYPE_NON_UNIT;
if (nrow != ncol) if (nrow != ncol)
{ {
@ -2189,8 +2191,10 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo,
HYPRE_Complex *A_ma = hypre_CsrsvDataMatData(csrsv_data); HYPRE_Complex *A_ma = hypre_CsrsvDataMatData(csrsv_data);
#if CUSPARSE_VERSION >= CUSPARSE_SPSV_VERSION #if CUSPARSE_VERSION >= CUSPARSE_SPSV_VERSION
cusparseSpMatDescr_t matA = hypre_CSRMatrixToCusparseSpMat_core(nrow, ncol, 0, nnzA, A_i, A_j, A_ma); cusparseSpMatDescr_t matA = hypre_CSRMatrixToCusparseSpMat_core(nrow, ncol, 0, nnzA, A_i, A_j,
HYPRE_CUSPARSE_CALL( cusparseSpMatSetAttribute(matA, CUSPARSE_SPMAT_DIAG_TYPE, &DiagType, sizeof(cusparseDiagType_t)) ); A_ma);
HYPRE_CUSPARSE_CALL( cusparseSpMatSetAttribute(matA, CUSPARSE_SPMAT_DIAG_TYPE, &DiagType,
sizeof(cusparseDiagType_t)) );
cusparseDnVecDescr_t vecF = hypre_VectorToCusparseDnVec_core(f_data, nrow); cusparseDnVecDescr_t vecF = hypre_VectorToCusparseDnVec_core(f_data, nrow);
cusparseDnVecDescr_t vecU = hypre_VectorToCusparseDnVec_core(u_data, ncol); cusparseDnVecDescr_t vecU = hypre_VectorToCusparseDnVec_core(u_data, ncol);
#else #else
@ -2455,7 +2459,8 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo,
HYPRE_Complex alpha = 1.0; HYPRE_Complex alpha = 1.0;
size_t buffer_size; size_t buffer_size;
hypre_int structural_zero; hypre_int structural_zero;
rocsparse_diag_type diag_type = unit_diag ? rocsparse_diag_type_unit: rocsparse_diag_type_non_unit; rocsparse_diag_type diag_type = unit_diag ? rocsparse_diag_type_unit :
rocsparse_diag_type_non_unit;
if (nrow != ncol) if (nrow != ncol)
{ {

View File

@ -68,9 +68,9 @@ hypreDevice_CSRSpGemm(hypre_CSRMatrix *A,
hypre_CSRMatrixGPUMatDescr(C), hypre_CSRMatrixGPUMatInfo(C), &nnzC, &d_ic, &d_jc, &d_c); hypre_CSRMatrixGPUMatDescr(C), hypre_CSRMatrixGPUMatInfo(C), &nnzC, &d_ic, &d_jc, &d_c);
#elif defined(HYPRE_USING_ONEMKLSPARSE) #elif defined(HYPRE_USING_ONEMKLSPARSE)
hypreDevice_CSRSpGemmOnemklsparse( m, k, n, hypreDevice_CSRSpGemmOnemklsparse( m, k, n,
hypre_CSRMatrixGPUMatHandle(A), nnza, d_ia, d_ja, d_a, hypre_CSRMatrixGPUMatHandle(A), nnza, d_ia, d_ja, d_a,
hypre_CSRMatrixGPUMatHandle(B), nnzb, d_ib, d_jb, d_b, hypre_CSRMatrixGPUMatHandle(B), nnzb, d_ib, d_jb, d_b,
hypre_CSRMatrixGPUMatHandle(C), &nnzC, &d_ic, &d_jc, &d_c); hypre_CSRMatrixGPUMatHandle(C), &nnzC, &d_ic, &d_jc, &d_c);
#else #else
hypre_error_w_msg(HYPRE_ERROR_GENERIC, hypre_error_w_msg(HYPRE_ERROR_GENERIC,
"Attempting to use device sparse matrix library for SpGEMM without having compiled support for it!\n"); "Attempting to use device sparse matrix library for SpGEMM without having compiled support for it!\n");

View File

@ -39,7 +39,7 @@ hypre_spgemm_hash_insert_numer(
HYPRE_Int old = -1; HYPRE_Int old = -1;
#if defined(HYPRE_USING_HIP) && (HIP_VERSION == 50422804) #if defined(HYPRE_USING_HIP) && (HIP_VERSION == 50422804)
/* VPM: see https://github.com/hypre-space/hypre/issues/875 */ /* VPM: see https://github.com/hypre-space/hypre/issues/875 */
#pragma unroll 8 #pragma unroll 8
#else #else
#pragma unroll UNROLL_FACTOR #pragma unroll UNROLL_FACTOR

View File

@ -34,7 +34,7 @@ hypre_spgemm_hash_insert_symbl(
HYPRE_Int old = -1; HYPRE_Int old = -1;
#if defined(HYPRE_USING_HIP) && (HIP_VERSION == 50422804) #if defined(HYPRE_USING_HIP) && (HIP_VERSION == 50422804)
/* VPM: see https://github.com/hypre-space/hypre/issues/875 */ /* VPM: see https://github.com/hypre-space/hypre/issues/875 */
#pragma unroll 8 #pragma unroll 8
#else #else
#pragma unroll UNROLL_FACTOR #pragma unroll UNROLL_FACTOR

View File

@ -87,10 +87,15 @@ HYPRE_Int hypre_CSRMatrixPermuteDevice( hypre_CSRMatrix *A, HYPRE_Int *perm,
HYPRE_Int *rqperm, hypre_CSRMatrix *B ); HYPRE_Int *rqperm, hypre_CSRMatrix *B );
HYPRE_Int hypre_CSRMatrixSortRow(hypre_CSRMatrix *A); HYPRE_Int hypre_CSRMatrixSortRow(hypre_CSRMatrix *A);
HYPRE_Int hypre_CSRMatrixSortRowOutOfPlace(hypre_CSRMatrix *A); HYPRE_Int hypre_CSRMatrixSortRowOutOfPlace(hypre_CSRMatrix *A);
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice_core(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, HYPRE_Int offset_u); HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice_core(char uplo, HYPRE_Int unit_diag,
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, hypre_Vector *u ); hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u,
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); HYPRE_Int offset_u);
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice(char uplo, HYPRE_Int unit_diag,
hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, hypre_Vector *u );
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo, HYPRE_Int unit_diag,
hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u );
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, HYPRE_Int unit_diag,
hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u );
HYPRE_Int hypre_CSRMatrixIntersectPattern(hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA, HYPRE_Int hypre_CSRMatrixIntersectPattern(hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA,
HYPRE_Int diag_option); HYPRE_Int diag_option);
HYPRE_Int hypre_CSRMatrixDiagScaleDevice( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd); HYPRE_Int hypre_CSRMatrixDiagScaleDevice( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd);

View File

@ -363,10 +363,15 @@ HYPRE_Int hypre_CSRMatrixPermuteDevice( hypre_CSRMatrix *A, HYPRE_Int *perm,
HYPRE_Int *rqperm, hypre_CSRMatrix *B ); HYPRE_Int *rqperm, hypre_CSRMatrix *B );
HYPRE_Int hypre_CSRMatrixSortRow(hypre_CSRMatrix *A); HYPRE_Int hypre_CSRMatrixSortRow(hypre_CSRMatrix *A);
HYPRE_Int hypre_CSRMatrixSortRowOutOfPlace(hypre_CSRMatrix *A); HYPRE_Int hypre_CSRMatrixSortRowOutOfPlace(hypre_CSRMatrix *A);
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice_core(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u, HYPRE_Int offset_u); HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice_core(char uplo, HYPRE_Int unit_diag,
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, hypre_Vector *u ); hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, HYPRE_Int offset_f, hypre_Vector *u,
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); HYPRE_Int offset_u);
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, HYPRE_Int unit_diag, hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u ); HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveDevice(char uplo, HYPRE_Int unit_diag,
hypre_CSRMatrix *A, HYPRE_Real *l1_norms, hypre_Vector *f, hypre_Vector *u );
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveRocsparse(char uplo, HYPRE_Int unit_diag,
hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u );
HYPRE_Int hypre_CSRMatrixTriLowerUpperSolveCusparse(char uplo, HYPRE_Int unit_diag,
hypre_CSRMatrix *A, HYPRE_Real *l1_norms, HYPRE_Complex *f, HYPRE_Complex *u );
HYPRE_Int hypre_CSRMatrixIntersectPattern(hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA, HYPRE_Int hypre_CSRMatrixIntersectPattern(hypre_CSRMatrix *A, hypre_CSRMatrix *B, HYPRE_Int *markA,
HYPRE_Int diag_option); HYPRE_Int diag_option);
HYPRE_Int hypre_CSRMatrixDiagScaleDevice( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd); HYPRE_Int hypre_CSRMatrixDiagScaleDevice( hypre_CSRMatrix *A, hypre_Vector *ld, hypre_Vector *rd);

View File

@ -22,7 +22,8 @@ cusparseDnVecDescr_t hypre_VectorToCusparseDnVec_core(HYPRE_Complex *x_data, HYP
cusparseDnVecDescr_t hypre_VectorToCusparseDnVec(const hypre_Vector *x, HYPRE_Int offset, cusparseDnVecDescr_t hypre_VectorToCusparseDnVec(const hypre_Vector *x, HYPRE_Int offset,
HYPRE_Int size_override); HYPRE_Int size_override);
cusparseDnMatDescr_t hypre_VectorToCusparseDnMat_core(HYPRE_Complex *x_data, HYPRE_Int nrow, HYPRE_Int ncol, HYPRE_Int order); cusparseDnMatDescr_t hypre_VectorToCusparseDnMat_core(HYPRE_Complex *x_data, HYPRE_Int nrow,
HYPRE_Int ncol, HYPRE_Int order);
cusparseDnMatDescr_t hypre_VectorToCusparseDnMat(const hypre_Vector *x); cusparseDnMatDescr_t hypre_VectorToCusparseDnMat(const hypre_Vector *x);

View File

@ -2108,14 +2108,15 @@ HYPRE_Int hypre_IntArrayCountHost( hypre_IntArray *v, HYPRE_Int value,
HYPRE_Int hypre_IntArrayCount( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int hypre_IntArrayCount( hypre_IntArray *v, HYPRE_Int value,
HYPRE_Int *num_values_ptr ); HYPRE_Int *num_values_ptr );
HYPRE_Int hypre_IntArrayInverseMapping( hypre_IntArray *v, hypre_IntArray **w_ptr ); HYPRE_Int hypre_IntArrayInverseMapping( hypre_IntArray *v, hypre_IntArray **w_ptr );
HYPRE_Int hypre_IntArrayNegate( hypre_IntArray *v );
/* int_array_device.c */ /* int_array_device.c */
#if defined(HYPRE_USING_GPU) #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
HYPRE_Int hypre_IntArraySetConstantValuesDevice( hypre_IntArray *v, HYPRE_Int value ); HYPRE_Int hypre_IntArraySetConstantValuesDevice( hypre_IntArray *v, HYPRE_Int value );
HYPRE_Int hypre_IntArrayReverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w );
HYPRE_Int hypre_IntArrayCountDevice ( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int hypre_IntArrayCountDevice ( hypre_IntArray *v, HYPRE_Int value,
HYPRE_Int *num_values_ptr ); HYPRE_Int *num_values_ptr );
HYPRE_Int hypre_IntArrayInverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w ); HYPRE_Int hypre_IntArrayInverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w );
HYPRE_Int hypre_IntArrayNegateDevice( hypre_IntArray *v );
#endif #endif
/* memory_tracker.c */ /* memory_tracker.c */

View File

@ -303,7 +303,7 @@ hypre_IntArraySetConstantValues( hypre_IntArray *v,
return hypre_error_flag; return hypre_error_flag;
} }
#if defined(HYPRE_USING_GPU) #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_IntArrayMemoryLocation(v)); HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_IntArrayMemoryLocation(v));
if (exec == HYPRE_EXEC_DEVICE) if (exec == HYPRE_EXEC_DEVICE)
@ -361,7 +361,7 @@ hypre_IntArrayCount( hypre_IntArray *v,
return hypre_error_flag; return hypre_error_flag;
} }
#if defined(HYPRE_USING_GPU) #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_IntArrayMemoryLocation(v)); HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_IntArrayMemoryLocation(v));
if (exec == HYPRE_EXEC_DEVICE) if (exec == HYPRE_EXEC_DEVICE)
@ -446,3 +446,41 @@ hypre_IntArrayInverseMapping( hypre_IntArray *v,
return hypre_error_flag; return hypre_error_flag;
} }
/*--------------------------------------------------------------------------
* hypre_IntArrayNegate
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_IntArrayNegate( hypre_IntArray *v )
{
HYPRE_Int *array_data = hypre_IntArrayData(v);
HYPRE_Int size = hypre_IntArraySize(v);
HYPRE_Int i;
if (size <= 0)
{
return hypre_error_flag;
}
#if defined(HYPRE_USING_GPU)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_IntArrayMemoryLocation(v));
if (exec == HYPRE_EXEC_DEVICE)
{
hypre_IntArrayNegateDevice(v);
}
else
#endif
{
#if defined(HYPRE_USING_OPENMP)
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < size; i++)
{
array_data[i] = - array_data[i];
}
}
return hypre_error_flag;
}

View File

@ -5,11 +5,11 @@
* SPDX-License-Identifier: (Apache-2.0 OR MIT) * SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/ ******************************************************************************/
#include "_hypre_onedpl.hpp"
#include "_hypre_utilities.h" #include "_hypre_utilities.h"
#include "_hypre_utilities.hpp" #include "_hypre_utilities.hpp"
#include "_hypre_onedpl.hpp"
#if defined(HYPRE_USING_GPU) #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
/*-------------------------------------------------------------------------- /*--------------------------------------------------------------------------
* hypre_IntArraySetConstantValuesDevice * hypre_IntArraySetConstantValuesDevice
@ -39,7 +39,7 @@ hypre_IntArraySetConstantValuesDevice( hypre_IntArray *v,
return hypre_error_flag; return hypre_error_flag;
} }
#if !defined(HYPRE_USING_DEVICE_OPENMP) #if defined(HYPRE_USING_GPU)
/*-------------------------------------------------------------------------- /*--------------------------------------------------------------------------
* hypreGPUKernel_IntArrayInverseMapping * hypreGPUKernel_IntArrayInverseMapping
*--------------------------------------------------------------------------*/ *--------------------------------------------------------------------------*/
@ -122,4 +122,26 @@ hypre_IntArrayCountDevice( hypre_IntArray *v,
return hypre_error_flag; return hypre_error_flag;
} }
/*--------------------------------------------------------------------------
* hypre_IntArrayNegateDevice
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_IntArrayNegateDevice( hypre_IntArray *v )
{
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
HYPRE_THRUST_CALL( transform,
hypre_IntArrayData(v),
hypre_IntArrayData(v) + hypre_IntArraySize(v),
hypre_IntArrayData(v),
thrust::negate<HYPRE_Int>() );
#elif defined(HYPRE_USING_SYCL) || defined(HYPRE_USING_DEVICE_OPENMP)
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Not implemented yet!");
#endif
return hypre_error_flag;
}
#endif #endif

View File

@ -55,6 +55,7 @@ hypre_GetMemoryLocationName(hypre_MemoryLocation memory_location,
/*-------------------------------------------------------------------------- /*--------------------------------------------------------------------------
* hypre_OutOfMemory * hypre_OutOfMemory
*--------------------------------------------------------------------------*/ *--------------------------------------------------------------------------*/
static inline void static inline void
hypre_OutOfMemory(size_t size) hypre_OutOfMemory(size_t size)
{ {
@ -74,8 +75,7 @@ hypre_WrongMemoryLocation(void)
void void
hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location) hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location)
{ {
#if defined(HYPRE_DEBUG) #if defined(HYPRE_DEBUG) && defined(HYPRE_USING_GPU)
#if defined(HYPRE_USING_GPU)
if (!ptr) if (!ptr)
{ {
return; return;
@ -87,7 +87,6 @@ hypre_CheckMemoryLocation(void *ptr, hypre_MemoryLocation location)
* will create an endless loop otherwise */ * will create an endless loop otherwise */
assert(location == location_ptr); assert(location == location_ptr);
#endif #endif
#endif
} }
/*========================================================================== /*==========================================================================

View File

@ -401,14 +401,15 @@ HYPRE_Int hypre_IntArrayCountHost( hypre_IntArray *v, HYPRE_Int value,
HYPRE_Int hypre_IntArrayCount( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int hypre_IntArrayCount( hypre_IntArray *v, HYPRE_Int value,
HYPRE_Int *num_values_ptr ); HYPRE_Int *num_values_ptr );
HYPRE_Int hypre_IntArrayInverseMapping( hypre_IntArray *v, hypre_IntArray **w_ptr ); HYPRE_Int hypre_IntArrayInverseMapping( hypre_IntArray *v, hypre_IntArray **w_ptr );
HYPRE_Int hypre_IntArrayNegate( hypre_IntArray *v );
/* int_array_device.c */ /* int_array_device.c */
#if defined(HYPRE_USING_GPU) #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
HYPRE_Int hypre_IntArraySetConstantValuesDevice( hypre_IntArray *v, HYPRE_Int value ); HYPRE_Int hypre_IntArraySetConstantValuesDevice( hypre_IntArray *v, HYPRE_Int value );
HYPRE_Int hypre_IntArrayReverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w );
HYPRE_Int hypre_IntArrayCountDevice ( hypre_IntArray *v, HYPRE_Int value, HYPRE_Int hypre_IntArrayCountDevice ( hypre_IntArray *v, HYPRE_Int value,
HYPRE_Int *num_values_ptr ); HYPRE_Int *num_values_ptr );
HYPRE_Int hypre_IntArrayInverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w ); HYPRE_Int hypre_IntArrayInverseMappingDevice( hypre_IntArray *v, hypre_IntArray *w );
HYPRE_Int hypre_IntArrayNegateDevice( hypre_IntArray *v );
#endif #endif
/* memory_tracker.c */ /* memory_tracker.c */