SYCL support for AMG solve phase (#549)

This adds matvec, matrix transpose, and vector operations (axpy, inner product, etc.)
with sycl backend (via oneMKL and oneDPL) for running on Intel GPUs. Thus, the AMG
solve phase can now execute entirely on Intel GPUs.
This commit is contained in:
Wayne Mitchell 2022-01-31 16:15:30 -08:00 committed by GitHub
parent b159c7dd58
commit a7bb784a45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
41 changed files with 803 additions and 256 deletions

View File

@ -113,6 +113,10 @@ option(HYPRE_ENABLE_CUBLAS "Use cuBLAS" OFF)
option(HYPRE_ENABLE_CURAND "Use cuRAND" ON)
option(HYPRE_ENABLE_GPU_PROFILING "Use NVTX on CUDA" OFF)
set(HYPRE_CUDA_SM "70" CACHE STRING "Target CUDA architecture.")
# oneAPI options
option(HYPRE_ENABLE_ONEMKLSPARSE "Use oneMKL sparse" ON)
option(HYPRE_ENABLE_ONEMKLBLAS "Use oneMKL blas" ON)
option(HYPRE_ENABLE_ONEMKLRAND "Use oneMKL rand" ON)
option(TPL_DSUPERLU_LIBRARIES "List of absolute paths to SuperLU_Dist link libraries [].")
option(TPL_DSUPERLU_INCLUDE_DIRS "List of absolute paths to SuperLU_Dist include directories [].")
@ -421,6 +425,26 @@ if (HYPRE_USING_CUDA)
endif ()
endif ()
if (HYPRE_USING_SYCL)
if (HYPRE_ENABLE_ONEMKLSPARSE)
set(HYPRE_USING_ONEMKLSPARSE ON CACHE BOOL "" FORCE)
endif()
if (HYPRE_ENABLE_ONEMKLBLAS)
set(HYPRE_USING_ONEMKLBLAS ON CACHE BOOL "" FORCE)
endif()
if (HYPRE_ENABLE_ONEMKLRAND)
set(HYPRE_USING_ONEMKLRAND ON CACHE BOOL "" FORCE)
endif()
if (HYPRE_USING_ONEMKLSPARSE OR HYPRE_USING_ONEMKLBLAS OR HYPRE_USING_ONEMKLRAND)
set(MKL_LINK static)
set(MKL_THREADING sequential)
find_package(MKL CONFIG REQUIRED HINTS "$ENV{MKLROOT}/lib/cmake/mkl")
target_compile_options(${PROJECT_NAME} PUBLIC $<TARGET_PROPERTY:MKL::MKL_DPCPP,INTERFACE_COMPILE_OPTIONS>)
target_include_directories(${PROJECT_NAME} PUBLIC $<TARGET_PROPERTY:MKL::MKL_DPCPP,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(${PROJECT_NAME} PUBLIC $<LINK_ONLY:MKL::MKL_DPCPP>)
endif()
endif()
if (HYPRE_USING_CALIPER)
find_package(caliper REQUIRED)
target_link_libraries(${PROJECT_NAME} PUBLIC caliper)

View File

@ -282,7 +282,10 @@ hypre_IJMatrixInitializeParCSR_v2(hypre_IJMatrix *matrix, HYPRE_MemoryLocation m
hypre_ParCSRMatrixInitialize_v2(par_matrix, memory_location);
hypre_AuxParCSRMatrixInitialize_v2(aux_matrix, memory_location_aux);
if (memory_location_aux == HYPRE_MEMORY_HOST)
/* WM: TODO - implement for sycl... is this available for other non-cuda/hip gpu implementations? */
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
if (hypre_GetExecPolicy1(memory_location_aux) == HYPRE_EXEC_HOST)
#endif
{
if (hypre_AuxParCSRMatrixDiagSizes(aux_matrix))
{

View File

@ -264,12 +264,15 @@ hypre_AuxParCSRMatrixInitialize_v2( hypre_AuxParCSRMatrix *matrix,
return 0;
}
/* WM: Q - added the macro guards here (since IJ assembly not yet ported to sycl)... is this OK/correct? */
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
if (memory_location != HYPRE_MEMORY_HOST)
{
/* GPU assembly */
hypre_AuxParCSRMatrixNeedAux(matrix) = 1;
}
else
#endif
{
/* CPU assembly */
/* allocate stash for setting or adding off processor values */

View File

@ -36,6 +36,9 @@ set(HYPRE_ENABLE_DEVICE_POOL @HYPRE_ENABLE_DEVICE_POOL@)
set(HYPRE_ENABLE_CUBLAS @HYPRE_ENABLE_CUBLAS@)
set(HYPRE_ENABLE_CURAND @HYPRE_ENABLE_CURAND@)
set(HYPRE_ENABLE_GPU_PROFILING @HYPRE_ENABLE_GPU_PROFILING@)
set(HYPRE_ENABLE_ONEMKLSPARSE @HYPRE_ENABLE_ONEMKLSPARSE@)
set(HYPRE_ENABLE_ONEMKLBLAS @HYPRE_ENABLE_ONEMKLBLAS@)
set(HYPRE_ENABLE_ONEMKLRAND @HYPRE_ENABLE_ONEMKLRAND@)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")

View File

@ -103,6 +103,15 @@
/* Use NVTX */
#cmakedefine HYPRE_USING_NVTX 1
/* Use oneMLK spasre */
#cmakedefine HYPRE_USING_ONEMKLSPARSE 1
/* Use oneMLK blas */
#cmakedefine HYPRE_USING_ONEMKLBLAS 1
/* Use oneMLK rand */
#cmakedefine HYPRE_USING_ONEMKLRAND 1
/* Use SuperLU_Dist */
#cmakedefine HYPRE_USING_DSUPERLU 1

View File

@ -211,6 +211,7 @@ hypre_using_onemklsparse=no
hypre_using_onemklblas=no
hypre_using_onemklrand=no
hypre_found_mkl=no
dnl *********************************************************************
dnl * Initialize flag-check variables
@ -519,6 +520,39 @@ AS_HELP_STRING([--enable-gpu-aware-mpi],
[hypre_gpu_mpi=no]
)
AC_ARG_ENABLE(onemklsparse,
AS_HELP_STRING([--enable-onemklsparse],
[Use oneMKL sparse (default is YES).]),
[case "${enableval}" in
yes) hypre_using_onemklsparse=yes ;;
no) hypre_using_onemklsparse=no ;;
*) hypre_using_onemklsparse=yes ;;
esac],
[hypre_using_onemklsparse=yes]
)
AC_ARG_ENABLE(onemklblas,
AS_HELP_STRING([--enable-onemklblas],
[Use oneMKL blas (default is YES).]),
[case "${enableval}" in
yes) hypre_using_onemklblas=yes ;;
no) hypre_using_onemklblas=no ;;
*) hypre_using_onemklblas=yes ;;
esac],
[hypre_using_onemklblas=yes]
)
AC_ARG_ENABLE(onemklrand,
AS_HELP_STRING([--enable-onemklrand],
[Use oneMKL rand (default is YES).]),
[case "${enableval}" in
yes) hypre_using_onemklrand=yes ;;
no) hypre_using_onemklrand=no ;;
*) hypre_using_onemklrand=yes ;;
esac],
[hypre_using_onemklrand=yes]
)
dnl * The AC_DEFINE is below, after hypre_using_mpi is completely set
dnl * Need to change to a new approach that always defines variable to some value
@ -2311,45 +2345,37 @@ AS_IF([test x"$hypre_using_sycl" == x"yes"],
AC_DEFINE(HYPRE_USING_GPU, 1, [Define to 1 if executing on GPU device])
AC_DEFINE(HYPRE_USING_SYCL, 1, [SYCL being used])
dnl (Ab)Using CUCC when compiling HIP
LINK_CC=${CUCC}
LINK_CXX=${CUCC}
if test "$hypre_user_chose_cuflags" = "no"
SYCLFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel"
if test "$hypre_using_debug" = "yes"
then
CUFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel"
if test "$hypre_using_debug" = "yes"
then
CUFLAGS="-O0 -Wall -g ${CUFLAGS}"
else
CUFLAGS="-g -O3 ${CUFLAGS}"
fi
SYCLFLAGS="-Wall -g ${SYCLFLAGS}"
else
SYCLFLAGS="-g -O3 ${SYCLFLAGS}"
fi
dnl (Ab)Use CUFLAGS to capture SYCL compilation flags
if test "$hypre_user_chose_cuflags" = "no"
then
CUFLAGS="${SYCLFLAGS}"
fi
dnl If not in debug mode, at least -O2, but the user can override with
dnl with SYCLFLAGS on the configure line. If in debug mode, -O0 -Wall
dnl plus flags for debugging symbols
AS_IF([test x"$hypre_using_debug" == x"yes"],
[SYCLFLAGS="-O0 -Wall -g ${SYCLFLAGS}"],
[SYCLFLAGS="-O2 ${SYCLFLAGS}"],)
AS_IF([test x"$hypre_using_onemklsparse" == x"yes"],
[AC_DEFINE(HYPRE_USING_ONEMKLSPARSE, 1, [onemkl::SPARSE being used])
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} -lmkl_sycl"
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include/mkl/spblas.hpp"
AS_IF([test x"$hypre_using_onemklsparse" == x"yes" || test x"$hypre_using_onemklblas" == x"yes" || test x"$hypre_using_onemklrand" == x"yes"],
[AC_CHECK_HEADERS(["${MKLROOT}/include/mkl.h"],
[hypre_found_mkl=yes],
AC_MSG_ERROR([unable to find oneMKL ... Ensure that MKLROOT is set]))
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} ${MKLROOT}/lib/intel64/libmkl_sycl.a -Wl,-export-dynamic -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_sequential.a ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -lsycl -lOpenCL -lpthread -lm -ldl"
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include"
])
AS_IF([test x"$hypre_using_onemklsparse" == x"yes"], [AC_DEFINE(HYPRE_USING_ONEMKLSPARSE, 1, [onemkl::SPARSE being used])])
AS_IF([test x"$hypre_using_onemklblas" == x"yes"],
[AC_DEFINE(HYPRE_USING_ONEMKLBLAS, 1, [onemkl::BLAS being used])
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} -lmkl_sycl"
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include/oneapi/mkl/blas.hpp"
])
AS_IF([test x"$hypre_using_onemklblas" == x"yes"], [AC_DEFINE(HYPRE_USING_ONEMKLBLAS, 1, [onemkl::BLAS being used])])
AS_IF([test x"$hypre_using_onemklrand" == x"yes"],
[AC_DEFINE(HYPRE_USING_ONEMKLRAND, 1, [onemkl::rng being used])
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} -lmkl_sycl"
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include/oneapi/mkl/rng.hpp"
])
AS_IF([test x"$hypre_using_onemklrand" == x"yes"], [AC_DEFINE(HYPRE_USING_ONEMKLRAND, 1, [onemkl::rng being used])])
]) dnl AS_IF([test x"$hypre_using_sycl" == x"yes"]

95
src/configure vendored
View File

@ -792,6 +792,9 @@ enable_rocblas
enable_rocrand
enable_gpu_profiling
enable_gpu_aware_mpi
enable_onemklsparse
enable_onemklblas
enable_onemklrand
with_LD
with_LDFLAGS
with_extra_CFLAGS
@ -1520,6 +1523,9 @@ Optional Features:
--enable-rocrand Use rocRAND (default is YES).
--enable-gpu-profiling Use NVTX on CUDA, rocTX on HIP (default is NO).
--enable-gpu-aware-mpi Use GPU memory aware MPI
--enable-onemklsparse Use oneMKL sparse (default is YES).
--enable-onemklblas Use oneMKL blas (default is YES).
--enable-onemklrand Use oneMKL rand (default is YES).
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@ -2819,6 +2825,7 @@ hypre_using_onemklsparse=no
hypre_using_onemklblas=no
hypre_using_onemklrand=no
hypre_found_mkl=no
hypre_blas_lib_old_style=no
hypre_blas_lib_dir_old_style=no
@ -3247,6 +3254,45 @@ else
fi
# Check whether --enable-onemklsparse was given.
if test "${enable_onemklsparse+set}" = set; then :
enableval=$enable_onemklsparse; case "${enableval}" in
yes) hypre_using_onemklsparse=yes ;;
no) hypre_using_onemklsparse=no ;;
*) hypre_using_onemklsparse=yes ;;
esac
else
hypre_using_onemklsparse=yes
fi
# Check whether --enable-onemklblas was given.
if test "${enable_onemklblas+set}" = set; then :
enableval=$enable_onemklblas; case "${enableval}" in
yes) hypre_using_onemklblas=yes ;;
no) hypre_using_onemklblas=no ;;
*) hypre_using_onemklblas=yes ;;
esac
else
hypre_using_onemklblas=yes
fi
# Check whether --enable-onemklrand was given.
if test "${enable_onemklrand+set}" = set; then :
enableval=$enable_onemklrand; case "${enableval}" in
yes) hypre_using_onemklrand=yes ;;
no) hypre_using_onemklrand=no ;;
*) hypre_using_onemklrand=yes ;;
esac
else
hypre_using_onemklrand=yes
fi
if test "x$CC" = "x"
then
@ -9138,24 +9184,40 @@ $as_echo "#define HYPRE_USING_GPU 1" >>confdefs.h
$as_echo "#define HYPRE_USING_SYCL 1" >>confdefs.h
LINK_CC=${CUCC}
LINK_CC=${CUCC}
LINK_CXX=${CUCC}
if test "$hypre_user_chose_cuflags" = "no"
SYCLFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel"
if test "$hypre_using_debug" = "yes"
then
CUFLAGS="-fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel"
if test "$hypre_using_debug" = "yes"
then
CUFLAGS="-O0 -Wall -g ${CUFLAGS}"
else
CUFLAGS="-g -O3 ${CUFLAGS}"
fi
SYCLFLAGS="-Wall -g ${SYCLFLAGS}"
else
SYCLFLAGS="-g -O3 ${SYCLFLAGS}"
fi
if test "$hypre_user_chose_cuflags" = "no"
then
CUFLAGS="${SYCLFLAGS}"
fi
if test x"$hypre_using_debug" == x"yes"; then :
SYCLFLAGS="-O0 -Wall -g ${SYCLFLAGS}"
elif SYCLFLAGS="-O2 ${SYCLFLAGS}"; then :
if test x"$hypre_using_onemklsparse" == x"yes" || test x"$hypre_using_onemklblas" == x"yes" || test x"$hypre_using_onemklrand" == x"yes"; then :
for ac_header in "${MKLROOT}/include/mkl.h"
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
cat >>confdefs.h <<_ACEOF
#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
_ACEOF
hypre_found_mkl=yes
else
as_fn_error $? "unable to find oneMKL ... Ensure that MKLROOT is set" "$LINENO" 5
fi
done
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} ${MKLROOT}/lib/intel64/libmkl_sycl.a -Wl,-export-dynamic -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_sequential.a ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -lsycl -lOpenCL -lpthread -lm -ldl"
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include"
fi
@ -9163,27 +9225,18 @@ fi
$as_echo "#define HYPRE_USING_ONEMKLSPARSE 1" >>confdefs.h
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} -lmkl_sycl"
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include/mkl/spblas.hpp"
fi
if test x"$hypre_using_onemklblas" == x"yes"; then :
$as_echo "#define HYPRE_USING_ONEMKLBLAS 1" >>confdefs.h
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} -lmkl_sycl"
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include/oneapi/mkl/blas.hpp"
fi
if test x"$hypre_using_onemklrand" == x"yes"; then :
$as_echo "#define HYPRE_USING_ONEMKLRAND 1" >>confdefs.h
HYPRE_SYCL_LIBS="${HYPRE_SYCL_LIBS} -lmkl_sycl"
HYPRE_SYCL_INCL="${HYPRE_SYCL_INCL} -I${MKLROOT}/include/oneapi/mkl/rng.hpp"
fi

View File

@ -102,7 +102,7 @@ hypre_DistributedMatrixGetRowParCSR( hypre_DistributedMatrix *matrix,
// RL: if HYPRE_ParCSRMatrixGetRow was on device, need the next line to guarantee it's done
#if defined(HYPRE_USING_GPU)
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
return(ierr);

View File

@ -459,7 +459,7 @@ HYPRE_Int hypre_ParCSRMatrixFixZeroRowsDevice(hypre_ParCSRMatrix *A)
HYPRE_CUDA_LAUNCH(hypreCUDAKernel_ParCSRMatrixFixZeroRows, gDim, bDim,
nrows, A_diag_i, A_diag_j, A_diag_data, A_offd_i, A_offd_data, num_cols_offd);
//hypre_SyncCudaComputeStream(hypre_handle());
//hypre_SyncComputeStream(hypre_handle());
return hypre_error_flag;
}

View File

@ -632,25 +632,24 @@ hypre_BoomerAMGBuildModPartialExtInterp( hypre_ParCSRMatrix *A,
hypre_GpuProfilingPushRange("PartialExtInterp");
#endif
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
HYPRE_Int ierr = 0;
if (exec == HYPRE_EXEC_HOST)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
if (exec == HYPRE_EXEC_DEVICE)
{
ierr = hypre_BoomerAMGBuildModPartialExtInterpDevice(A, CF_marker, S, num_cpts_global,
num_old_cpts_global,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
else
#endif
{
ierr = hypre_BoomerAMGBuildModPartialExtInterpHost(A, CF_marker, S, num_cpts_global,
num_old_cpts_global,
num_functions, dof_func,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
else
{
ierr = hypre_BoomerAMGBuildModPartialExtInterpDevice(A, CF_marker, S, num_cpts_global,
num_old_cpts_global,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#endif
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_GpuProfilingPopRange();
@ -1338,25 +1337,24 @@ hypre_BoomerAMGBuildModPartialExtPEInterp( hypre_ParCSRMatrix *A,
hypre_GpuProfilingPushRange("PartialExtPEInterp");
#endif
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
HYPRE_Int ierr = 0;
if (exec == HYPRE_EXEC_HOST)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
if (exec == HYPRE_EXEC_DEVICE)
{
ierr = hypre_BoomerAMGBuildModPartialExtPEInterpDevice(A, CF_marker, S, num_cpts_global,
num_old_cpts_global,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
else
#endif
{
ierr = hypre_BoomerAMGBuildModPartialExtPEInterpHost(A, CF_marker, S, num_cpts_global,
num_old_cpts_global,
num_functions, dof_func,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
else
{
ierr = hypre_BoomerAMGBuildModPartialExtPEInterpDevice(A, CF_marker, S, num_cpts_global,
num_old_cpts_global,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#endif
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_GpuProfilingPopRange();

View File

@ -217,7 +217,7 @@ hypre_BoomerAMGDD_FAC_Jacobi( void *amgdd_vdata,
HYPRE_Int level,
HYPRE_Int cycle_param )
{
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
hypre_ParAMGDDData *amgdd_data = (hypre_ParAMGDDData*) amgdd_vdata;
hypre_AMGDDCompGrid *compGrid = hypre_ParAMGDDDataCompGrid(amgdd_data)[level];
HYPRE_MemoryLocation memory_location = hypre_AMGDDCompGridMemoryLocation(compGrid);
@ -558,7 +558,7 @@ hypre_BoomerAMGDD_FAC_CFL1Jacobi( void *amgdd_vdata,
HYPRE_Int level,
HYPRE_Int cycle_param )
{
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
hypre_ParAMGDDData *amgdd_data = (hypre_ParAMGDDData*) amgdd_vdata;
hypre_AMGDDCompGrid *compGrid = hypre_ParAMGDDDataCompGrid(amgdd_data)[level];
HYPRE_MemoryLocation memory_location = hypre_AMGDDCompGridMemoryLocation(compGrid);

View File

@ -8,7 +8,7 @@
#include "_hypre_parcsr_ls.h"
#include "_hypre_utilities.hpp"
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
HYPRE_Int
hypre_BoomerAMGDD_FAC_JacobiDevice( void *amgdd_vdata,
@ -149,4 +149,4 @@ hypre_BoomerAMGDD_FAC_CFL1JacobiDevice( void *amgdd_vdata,
return hypre_error_flag;
}
#endif // defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#endif // defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)

View File

@ -554,23 +554,22 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A,
hypre_GpuProfilingPushRange("ModExtInterp");
#endif
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
HYPRE_Int ierr = 0;
if (exec == HYPRE_EXEC_HOST)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
if (exec == HYPRE_EXEC_DEVICE)
{
ierr = hypre_BoomerAMGBuildExtInterpDevice(A, CF_marker, S, num_cpts_global, 1, NULL,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
else
#endif
{
ierr = hypre_BoomerAMGBuildModExtInterpHost(A, CF_marker, S, num_cpts_global, num_functions,
dof_func,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
else
{
ierr = hypre_BoomerAMGBuildExtInterpDevice(A, CF_marker, S, num_cpts_global, 1, NULL,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#endif
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_GpuProfilingPopRange();
@ -1869,23 +1868,22 @@ hypre_BoomerAMGBuildModExtPEInterp(hypre_ParCSRMatrix *A,
hypre_GpuProfilingPushRange("ModExtPEInterp");
#endif
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
HYPRE_Int ierr = 0;
if (exec == HYPRE_EXEC_HOST)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
if (exec == HYPRE_EXEC_DEVICE)
{
ierr = hypre_BoomerAMGBuildExtPEInterpDevice(A, CF_marker, S, num_cpts_global, 1, NULL,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
else
#endif
{
ierr = hypre_BoomerAMGBuildModExtPEInterpHost(A, CF_marker, S, num_cpts_global,
num_functions, dof_func,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
else
{
ierr = hypre_BoomerAMGBuildExtPEInterpDevice(A, CF_marker, S, num_cpts_global, 1, NULL,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#endif
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_GpuProfilingPopRange();

View File

@ -122,8 +122,8 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A,
pass_starts = hypre_CTAlloc(HYPRE_Int, 10, HYPRE_MEMORY_HOST);
/* contains beginning for each pass in pass_order field, assume no more than 10 passes */
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine + 1, HYPRE_MEMORY_HOST);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine + 1, HYPRE_MEMORY_HOST);
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine + 1, HYPRE_MEMORY_DEVICE);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine + 1, HYPRE_MEMORY_DEVICE);
cnt = 0;
remaining = 0;
@ -351,10 +351,10 @@ hypre_BoomerAMGBuildModMultipassHost( hypre_ParCSRMatrix *A,
P_offd_i[i + 1] += P_offd_i[i];
}
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_i[n_fine], HYPRE_MEMORY_HOST);
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_i[n_fine], HYPRE_MEMORY_HOST);
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_i[n_fine], HYPRE_MEMORY_HOST);
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_i[n_fine], HYPRE_MEMORY_HOST);
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_i[n_fine], HYPRE_MEMORY_DEVICE);
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_i[n_fine], HYPRE_MEMORY_DEVICE);
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_i[n_fine], HYPRE_MEMORY_DEVICE);
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_i[n_fine], HYPRE_MEMORY_DEVICE);
/* insert weights for coarse points */
for (i = 0; i < pass_starts[1]; i++)

View File

@ -255,8 +255,8 @@ hypre_BoomerAMGBuildMultipassHost( hypre_ParCSRMatrix *A,
if (pass_array_size) { pass_array = hypre_CTAlloc(HYPRE_Int, pass_array_size, HYPRE_MEMORY_HOST); }
pass_pointer = hypre_CTAlloc(HYPRE_Int, max_num_passes + 1, HYPRE_MEMORY_HOST);
if (n_fine) { assigned = hypre_CTAlloc(HYPRE_Int, n_fine, HYPRE_MEMORY_HOST); }
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine + 1, HYPRE_MEMORY_HOST);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine + 1, HYPRE_MEMORY_HOST);
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine + 1, HYPRE_MEMORY_DEVICE);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine + 1, HYPRE_MEMORY_DEVICE);
if (n_coarse) { C_array = hypre_CTAlloc(HYPRE_Int, n_coarse, HYPRE_MEMORY_HOST); }
if (num_cols_offd)
@ -1138,14 +1138,14 @@ hypre_BoomerAMGBuildMultipassHost( hypre_ParCSRMatrix *A,
hypre_TFree(cnt_nz_offd_per_thread, HYPRE_MEMORY_HOST);
hypre_TFree(max_num_threads, HYPRE_MEMORY_HOST);
P_diag_j = hypre_CTAlloc(HYPRE_Int, total_nz, HYPRE_MEMORY_HOST);
P_diag_data = hypre_CTAlloc(HYPRE_Real, total_nz, HYPRE_MEMORY_HOST);
P_diag_j = hypre_CTAlloc(HYPRE_Int, total_nz, HYPRE_MEMORY_DEVICE);
P_diag_data = hypre_CTAlloc(HYPRE_Real, total_nz, HYPRE_MEMORY_DEVICE);
if (total_nz_offd)
{
P_offd_j = hypre_CTAlloc(HYPRE_Int, total_nz_offd, HYPRE_MEMORY_HOST);
P_offd_data = hypre_CTAlloc(HYPRE_Real, total_nz_offd, HYPRE_MEMORY_HOST);
P_offd_j = hypre_CTAlloc(HYPRE_Int, total_nz_offd, HYPRE_MEMORY_DEVICE);
P_offd_data = hypre_CTAlloc(HYPRE_Real, total_nz_offd, HYPRE_MEMORY_DEVICE);
}
for (i = 0; i < n_fine; i++)

View File

@ -1145,7 +1145,7 @@ hypre_BoomerAMGRelax7Jacobi( hypre_ParCSRMatrix *A,
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_SetSyncCudaCompute(sync_stream);
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
return hypre_error_flag;

View File

@ -169,7 +169,7 @@ hypre_ParCSRMaxEigEstimateDevice( hypre_ParCSRMatrix *A,
rowsums_upper,
scale);
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
e_min = HYPRE_THRUST_CALL(reduce, rowsums_lower, rowsums_lower + A_num_rows, (HYPRE_Real)0,
thrust::minimum<HYPRE_Real>());
@ -323,7 +323,7 @@ hypre_ParCSRMaxEigEstimateCGDevice(hypre_ParCSRMatrix *A, /* matrix to relax
/* set residual to random */
hypre_CurandUniform(local_size, r_data, 0, 0, 0, 0);
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
HYPRE_THRUST_CALL(transform,
r_data, r_data + local_size, r_data,

View File

@ -434,7 +434,7 @@ hypre_ParCSRCommHandleCreate_v2 ( HYPRE_Int job,
recv_data = recv_data_in;
// TODO RL: it seems that we need to sync the CUDA stream before doing GPU-GPU MPI.
// Need to check MPI documentation whether this is acutally true
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
num_requests = num_sends + num_recvs;

View File

@ -4113,7 +4113,7 @@ hypre_ParTMatmul( hypre_ParCSRMatrix *A,
if ( hypre_GetExecPolicy2(memory_location_A, memory_location_B) == HYPRE_EXEC_DEVICE )
{
hypre_CSRMatrixMoveDiagFirstDevice(hypre_ParCSRMatrixDiag(C));
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
}
#endif

View File

@ -306,7 +306,7 @@ hypre_MergeDiagAndOffdDevice(hypre_ParCSRMatrix *A)
hypre_CSRMatrixData(B) = B_a;
hypre_CSRMatrixMemoryLocation(B) = HYPRE_MEMORY_DEVICE;
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return B;
}
@ -1044,7 +1044,7 @@ hypre_ParCSRMatrixGetRowDevice( hypre_ParCSRMatrix *mat,
*values = hypre_ParCSRMatrixRowvalues(mat);
}
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return hypre_error_flag;
}
@ -1603,7 +1603,7 @@ hypre_ParCSRDiagScale( HYPRE_ParCSRMatrix HA,
HYPRE_Int ierr = 0;
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypreDevice_DiagScaleVector(local_size, A_i, A_data, y_data, 0.0, x_data);
//hypre_SyncCudaComputeStream(hypre_handle());
//hypre_SyncComputeStream(hypre_handle());
#else /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
HYPRE_Int i;
#if defined(HYPRE_USING_DEVICE_OPENMP)

View File

@ -349,7 +349,7 @@ hypre_ParCSRMatrixMatvecOutOfPlace( HYPRE_Complex alpha,
#if defined(HYPRE_USING_GPU)
hypre_SetSyncCudaCompute(sync_stream);
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
#ifdef HYPRE_PROFILE
@ -725,7 +725,7 @@ hypre_ParCSRMatrixMatvecT( HYPRE_Complex alpha,
#if defined(HYPRE_USING_GPU)
hypre_SetSyncCudaCompute(sync_stream);
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
#ifdef HYPRE_PROFILE

View File

@ -497,7 +497,7 @@ hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A,
hypre_assert(!hypre_CSRMatrixCheckDiagFirstDevice(hypre_ParCSRMatrixDiag(C)));
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return C;
}
@ -817,7 +817,7 @@ hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R,
hypre_assert(!hypre_CSRMatrixCheckDiagFirstDevice(hypre_ParCSRMatrixDiag(C)));
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return C;
}

View File

@ -1269,7 +1269,7 @@ hypre_CSRMatrixTranspose(hypre_CSRMatrix *A,
{
HYPRE_Int ierr = 0;
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_CSRMatrixMemoryLocation(A) );
if (exec == HYPRE_EXEC_DEVICE)

View File

@ -15,7 +15,7 @@
#include "_hypre_utilities.hpp"
#include "seq_mv.hpp"
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
hypre_CsrsvData*
hypre_CsrsvDataCreate()
{
@ -82,9 +82,33 @@ hypre_GpuMatDataCreate()
hypre_GpuMatDataMatInfo(data) = info;
#endif
#if defined(HYPRE_USING_ONEMKLSPARSE)
oneapi::mkl::sparse::matrix_handle_t mat_handle;
HYPRE_SYCL_CALL( oneapi::mkl::sparse::init_matrix_handle(&mat_handle) );
hypre_GpuMatDataMatHandle(data) = mat_handle;
#endif
return data;
}
void
hypre_GPUMatDataSetCSRData( hypre_GpuMatData *data,
hypre_CSRMatrix *matrix)
{
#if defined(HYPRE_USING_ONEMKLSPARSE)
oneapi::mkl::sparse::matrix_handle_t mat_handle = hypre_GpuMatDataMatHandle(data);
HYPRE_SYCL_CALL( oneapi::mkl::sparse::set_csr_data(mat_handle,
hypre_CSRMatrixNumRows(matrix),
hypre_CSRMatrixNumCols(matrix),
oneapi::mkl::index_base::zero,
hypre_CSRMatrixI(matrix),
hypre_CSRMatrixJ(matrix),
hypre_CSRMatrixData(matrix)) );
#endif
}
void
hypre_GpuMatDataDestroy(hypre_GpuMatData *data)
{
@ -103,6 +127,10 @@ hypre_GpuMatDataDestroy(hypre_GpuMatData *data)
HYPRE_ROCSPARSE_CALL( rocsparse_destroy_mat_info(hypre_GpuMatDataMatInfo(data)) );
#endif
#if defined(HYPRE_USING_ONEMKLSPARSE)
HYPRE_SYCL_CALL( oneapi::mkl::sparse::release_matrix_handle(&hypre_GpuMatDataMatHandle(data)) );
#endif
hypre_TFree(data, HYPRE_MEMORY_HOST);
}
@ -151,7 +179,7 @@ hypre_CSRMatrixAddDevice ( HYPRE_Complex alpha,
hypre_CSRMatrixData(C) = C_data;
hypre_CSRMatrixMemoryLocation(C) = HYPRE_MEMORY_DEVICE;
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return C;
}
@ -174,7 +202,7 @@ hypre_CSRMatrixMultiplyDevice( hypre_CSRMatrix *A,
hypreDevice_CSRSpGemm(A, B, &C);
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return C;
}
@ -326,7 +354,7 @@ hypre_CSRMatrixSplitDevice( hypre_CSRMatrix *B_ext,
*B_ext_diag_ptr = B_ext_diag;
*B_ext_offd_ptr = B_ext_offd;
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return ierr;
}
@ -576,7 +604,7 @@ hypre_CSRMatrixAddPartialDevice( hypre_CSRMatrix *A,
hypre_CSRMatrixData(C) = C_data;
hypre_CSRMatrixMemoryLocation(C) = HYPRE_MEMORY_DEVICE;
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return C;
}
@ -618,7 +646,7 @@ hypre_CSRMatrixColNNzRealDevice( hypre_CSRMatrix *A,
hypre_TFree(reduced_col_indices, HYPRE_MEMORY_DEVICE);
hypre_TFree(reduced_col_nnz, HYPRE_MEMORY_DEVICE);
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return hypre_error_flag;
}
@ -681,7 +709,7 @@ hypre_CSRMatrixMoveDiagFirstDevice( hypre_CSRMatrix *A )
HYPRE_CUDA_LAUNCH(hypreCUDAKernel_CSRMoveDiagFirst, gDim, bDim,
nrows, A_i, A_j, A_data);
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return hypre_error_flag;
}
@ -725,7 +753,7 @@ hypre_CSRMatrixCheckDiagFirstDevice( hypre_CSRMatrix *A )
hypre_TFree(result, HYPRE_MEMORY_DEVICE);
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return ierr;
}
@ -822,7 +850,7 @@ hypre_CSRMatrixFixZeroDiagDevice( hypre_CSRMatrix *A,
hypre_TFree(result, HYPRE_MEMORY_DEVICE);
#endif
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return ierr;
}
@ -917,7 +945,7 @@ hypre_CSRMatrixReplaceDiagDevice( hypre_CSRMatrix *A,
hypre_TFree(result, HYPRE_MEMORY_DEVICE);
#endif
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return ierr;
}
@ -1113,7 +1141,7 @@ hypre_CSRMatrixComputeRowSumDevice( hypre_CSRMatrix *A,
row_sum, scal, set_or_add[0] == 's' );
}
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
}
/* type 0: diag
@ -1206,7 +1234,7 @@ hypre_CSRMatrixExtractDiagonalDevice( hypre_CSRMatrix *A,
HYPRE_CUDA_LAUNCH( hypreCUDAKernel_CSRExtractDiag, gDim, bDim, nrows, A_i, A_j, A_data, d, type );
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
}
/* return C = [A; B] */
@ -1530,9 +1558,6 @@ hypre_CSRMatrixTransposeDevice(hypre_CSRMatrix *A,
#elif defined(HYPRE_USING_ROCSPARSE)
hypreDevice_CSRSpTransRocsparse(nrows_A, ncols_A, nnz_A, A_i, A_j, A_data, &C_i, &C_j, &C_data,
data);
#elif defined(HYPRE_USING_ONEMKLSPARSE)
hypreDevice_CSRSpTransOnemklsparse(nrows_A, ncols_A, nnz_A, A_i, A_j, A_data, &C_i, &C_j, &C_data,
data);
#else
hypreDevice_CSRSpTrans(nrows_A, ncols_A, nnz_A, A_i, A_j, A_data, &C_i, &C_j, &C_data, data);
#endif
@ -1546,7 +1571,7 @@ hypre_CSRMatrixTransposeDevice(hypre_CSRMatrix *A,
*AT_ptr = C;
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
return hypre_error_flag;
}

View File

@ -44,7 +44,7 @@ hypre_CSRMatrixCreate( HYPRE_Int num_rows,
/* set defaults */
hypre_CSRMatrixOwnsData(matrix) = 1;
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
hypre_CSRMatrixSortedJ(matrix) = NULL;
hypre_CSRMatrixSortedData(matrix) = NULL;
hypre_CSRMatrixCsrsvData(matrix) = NULL;
@ -78,7 +78,7 @@ hypre_CSRMatrixDestroy( hypre_CSRMatrix *matrix )
hypre_TFree(hypre_CSRMatrixBigJ(matrix), memory_location);
}
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
hypre_TFree(hypre_CSRMatrixSortedData(matrix), memory_location);
hypre_TFree(hypre_CSRMatrixSortedJ(matrix), memory_location);
hypre_CsrsvDataDestroy(hypre_CSRMatrixCsrsvData(matrix));
@ -1074,7 +1074,7 @@ hypre_CSRMatrixPrefetch( hypre_CSRMatrix *A, HYPRE_MemoryLocation memory_locatio
return ierr;
}
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
hypre_GpuMatData *
hypre_CSRMatrixGetGPUMatData(hypre_CSRMatrix *matrix)
{
@ -1086,6 +1086,7 @@ hypre_CSRMatrixGetGPUMatData(hypre_CSRMatrix *matrix)
if (!hypre_CSRMatrixGPUMatData(matrix))
{
hypre_CSRMatrixGPUMatData(matrix) = hypre_GpuMatDataCreate();
hypre_GPUMatDataSetCSRData(hypre_CSRMatrixGPUMatData(matrix), matrix);
}
return hypre_CSRMatrixGPUMatData(matrix);

View File

@ -16,12 +16,12 @@
#ifndef hypre_CSR_MATRIX_HEADER
#define hypre_CSR_MATRIX_HEADER
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
struct hypre_CsrsvData;
typedef struct hypre_CsrsvData hypre_CsrsvData;
#endif
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
struct hypre_GpuMatData;
typedef struct hypre_GpuMatData hypre_GpuMatData;
#endif
@ -45,7 +45,7 @@ typedef struct
HYPRE_Int *rownnz; /* for compressing rows in matrix multiplication */
HYPRE_Int num_rownnz;
HYPRE_MemoryLocation memory_location; /* memory location of arrays i, j, data */
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
HYPRE_Int *sorted_j; /* some cusparse routines require sorted CSR */
HYPRE_Complex *sorted_data;
hypre_CsrsvData *csrsv_data;
@ -69,7 +69,7 @@ typedef struct
#define hypre_CSRMatrixOwnsData(matrix) ((matrix) -> owns_data)
#define hypre_CSRMatrixMemoryLocation(matrix) ((matrix) -> memory_location)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
#define hypre_CSRMatrixSortedJ(matrix) ((matrix) -> sorted_j)
#define hypre_CSRMatrixSortedData(matrix) ((matrix) -> sorted_data)
#define hypre_CSRMatrixCsrsvData(matrix) ((matrix) -> csrsv_data)

View File

@ -712,10 +712,6 @@ hypre_CSRMatrixMatvecOutOfPlace( HYPRE_Complex alpha,
#if defined(HYPRE_USING_GPU)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_CSRMatrixMemoryLocation(A) );
/* WM: TODO - remove after sycl implementation in place */
#if defined(HYPRE_USING_SYCL)
exec = HYPRE_EXEC_HOST;
#endif
if (exec == HYPRE_EXEC_DEVICE)
{
ierr = hypre_CSRMatrixMatvecDevice(0, alpha, A, x, beta, b, y, offset);

View File

@ -13,7 +13,6 @@
#include "seq_mv.h"
#include "_hypre_utilities.hpp"
#include "seq_mv.hpp"
#if defined(HYPRE_USING_GPU)
@ -53,8 +52,6 @@ hypre_CSRMatrixMatvecDevice2( HYPRE_Int trans,
hypre_CSRMatrixMatvecRocsparse(trans, alpha, A, x, beta, y, offset);
#elif defined(HYPRE_USING_ONEMKLSPARSE)
hypre_CSRMatrixMatvecOnemklsparse(trans, alpha, A, x, beta, y, offset);
// WM: TODO: remove trivial HYPRE_USING_SYCL branch after onemlksparse implementation is in
#elif defined(HYPRE_USING_SYCL)
#else // #ifdef HYPRE_USING_CUSPARSE
#error HYPRE SPMV TODO
#endif
@ -117,7 +114,7 @@ hypre_CSRMatrixMatvecDevice( HYPRE_Int trans,
hypre_CSRMatrixMatvecDevice2(trans, alpha, A, x, beta, y, offset);
}
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_GpuProfilingPopRange();
@ -201,7 +198,7 @@ hypre_CSRMatrixMatvecCusparseNewAPI( HYPRE_Int trans,
#endif
dBuffer) );
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
if (trans)
{
@ -328,7 +325,29 @@ hypre_CSRMatrixMatvecOnemklsparse( HYPRE_Int trans,
hypre_Vector *y,
HYPRE_Int offset )
{
/* WM: TODO */
sycl::queue *compute_queue = hypre_HandleComputeStream(hypre_handle());
hypre_CSRMatrix *AT;
oneapi::mkl::sparse::matrix_handle_t matA_handle = hypre_CSRMatrixGPUMatHandle(A);
if (trans)
{
hypre_CSRMatrixTransposeDevice(A, &AT, 1);
matA_handle = hypre_CSRMatrixGPUMatHandle(AT);
}
HYPRE_SYCL_CALL( oneapi::mkl::sparse::gemv(*compute_queue,
oneapi::mkl::transpose::nontrans,
alpha,
matA_handle,
hypre_VectorData(x),
beta,
hypre_VectorData(y) + offset).wait() );
if (trans)
{
hypre_CSRMatrixDestroy(AT);
}
return hypre_error_flag;
}
#endif // #if defined(HYPRE_USING_ROCSPARSE)

View File

@ -89,7 +89,7 @@ hypreDevice_CSRSpGemm(hypre_CSRMatrix *A,
#endif
hypreDevice_CSRSpGemmRownnz(m, k, n, d_ia, d_ja, d_ib, d_jb, 0 /* without input rc */, d_rc);
#ifdef HYPRE_SPGEMM_TIMING
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
t2 = hypre_MPI_Wtime() - t1;
hypre_printf("Rownnz time %f\n", t2);
#endif
@ -101,7 +101,7 @@ hypreDevice_CSRSpGemm(hypre_CSRMatrix *A,
1 /* exact row nnz */,
&d_ic, &d_jc, &d_c, &nnzC);
#ifdef HYPRE_SPGEMM_TIMING
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
t2 = hypre_MPI_Wtime() - t1;
hypre_printf("SpGemmNumerical time %f\n", t2);
#endif
@ -115,7 +115,7 @@ hypreDevice_CSRSpGemm(hypre_CSRMatrix *A,
#endif
hypreDevice_CSRSpGemmRownnzEstimate(m, k, n, d_ia, d_ja, d_ib, d_jb, d_rc);
#ifdef HYPRE_SPGEMM_TIMING
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
t2 = hypre_MPI_Wtime() - t1;
hypre_printf("RownnzEst time %f\n", t2);
#endif
@ -126,7 +126,7 @@ hypreDevice_CSRSpGemm(hypre_CSRMatrix *A,
hypreDevice_CSRSpGemmNumerWithRownnzEstimate(m, k, n, d_ia, d_ja, d_a, d_ib, d_jb, d_b, d_rc,
&d_ic, &d_jc, &d_c, &nnzC);
#ifdef HYPRE_SPGEMM_TIMING
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
t2 = hypre_MPI_Wtime() - t1;
hypre_printf("SpGemmNumerical time %f\n", t2);
#endif
@ -140,7 +140,7 @@ hypreDevice_CSRSpGemm(hypre_CSRMatrix *A,
#endif
hypreDevice_CSRSpGemmRownnzEstimate(m, k, n, d_ia, d_ja, d_ib, d_jb, d_rc);
#ifdef HYPRE_SPGEMM_TIMING
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
t2 = hypre_MPI_Wtime() - t1;
hypre_printf("RownnzEst time %f\n", t2);
#endif
@ -157,7 +157,7 @@ hypreDevice_CSRSpGemm(hypre_CSRMatrix *A,
d_rc + 2 * m,
thrust::identity<HYPRE_Int>() );
#ifdef HYPRE_SPGEMM_TIMING
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
t2 = hypre_MPI_Wtime() - t1;
hypre_printf("RownnzBound time %f\n", t2);
#endif
@ -169,7 +169,7 @@ hypreDevice_CSRSpGemm(hypre_CSRMatrix *A,
rownnz_exact,
&d_ic, &d_jc, &d_c, &nnzC);
#ifdef HYPRE_SPGEMM_TIMING
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
t2 = hypre_MPI_Wtime() - t1;
hypre_printf("SpGemmNumerical time %f\n", t2);
#endif

View File

@ -5,6 +5,7 @@
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/
#include "_hypre_onedpl.hpp"
#include "seq_mv.h"
#include "_hypre_utilities.hpp"
@ -146,19 +147,6 @@ hypreDevice_CSRSpTransRocsparse(HYPRE_Int m, HYPRE_Int n, HYPR
#endif // #if defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_ONEMKLSPARSE)
HYPRE_Int
hypreDevice_CSRSpTransOnemklsparse(HYPRE_Int m, HYPRE_Int n, HYPRE_Int nnzA,
HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex *d_aa,
HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, HYPRE_Complex **d_ac_out,
HYPRE_Int want_data)
{
/* WM: TODO */
return hypre_error_flag;
}
#endif // #if defined(HYPRE_USING_ONEMKLSPARSE)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
HYPRE_Int
@ -242,7 +230,79 @@ hypreDevice_CSRSpTrans(HYPRE_Int m, HYPRE_Int n, HYPRE_Int
HYPRE_Int **d_ic_out, HYPRE_Int **d_jc_out, HYPRE_Complex **d_ac_out,
HYPRE_Int want_data)
{
/* WM: TODO */
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_SPTRANS] -= hypre_MPI_Wtime();
#endif
HYPRE_Int *d_jt, *d_it, *d_pm, *d_ic, *d_jc;
HYPRE_Complex *d_ac = NULL;
HYPRE_Int *mem_work = hypre_TAlloc(HYPRE_Int, 3 * nnzA, HYPRE_MEMORY_DEVICE);
/* allocate C */
d_jc = hypre_TAlloc(HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE);
if (want_data)
{
d_ac = hypre_TAlloc(HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE);
}
/* permutation vector */
d_pm = mem_work;
/* expansion: A's row idx */
d_it = d_pm + nnzA;
hypreDevice_CsrRowPtrsToIndices_v2(m, nnzA, d_ia, d_it);
/* a copy of col idx of A */
d_jt = d_it + nnzA;
hypre_TMemcpy(d_jt, d_ja, HYPRE_Int, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
/* sort: by col */
oneapi::dpl::counting_iterator<HYPRE_Int> count(0);
HYPRE_ONEDPL_CALL( std::copy,
count,
count + nnzA,
d_pm);
auto zip_jt_pm = oneapi::dpl::make_zip_iterator(d_jt, d_pm);
HYPRE_ONEDPL_CALL( std::stable_sort,
zip_jt_pm,
zip_jt_pm + nnzA,
[](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); } );
auto permuted_it = oneapi::dpl::make_permutation_iterator(d_it, d_pm);
HYPRE_ONEDPL_CALL( std::copy,
permuted_it,
permuted_it + nnzA,
d_jc );
if (want_data)
{
auto permuted_aa = oneapi::dpl::make_permutation_iterator(d_aa, d_pm);
HYPRE_ONEDPL_CALL( std::copy,
permuted_aa,
permuted_aa + nnzA,
d_ac );
}
/* convert into ic: row idx --> row ptrs */
d_ic = hypreDevice_CsrRowIndicesToPtrs(n, nnzA, d_jt);
#ifdef HYPRE_DEBUG
HYPRE_Int nnzC;
hypre_TMemcpy(&nnzC, &d_ic[n], HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE);
hypre_assert(nnzC == nnzA);
#endif
hypre_TFree(mem_work, HYPRE_MEMORY_DEVICE);
*d_ic_out = d_ic;
*d_jc_out = d_jc;
*d_ac_out = d_ac;
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_SPTRANS] += hypre_MPI_Wtime();
#endif
return hypre_error_flag;
}
#endif // #if defined(HYPRE_USING_SYCL)

View File

@ -315,14 +315,16 @@ HYPRE_Int hypre_SeqVectorElmdivpyMarked( hypre_Vector *x, hypre_Vector *b, hypre
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x,
HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int fill );
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
hypre_CsrsvData* hypre_CsrsvDataCreate();
void hypre_CsrsvDataDestroy(hypre_CsrsvData *data);
hypre_GpuMatData* hypre_GpuMatDataCreate();
void hypre_GPUMatDataSetCSRData(hypre_GpuMatData *data, hypre_CSRMatrix *matrix);
void hypre_GpuMatDataDestroy(hypre_GpuMatData *data);
hypre_GpuMatData* hypre_CSRMatrixGetGPUMatData(hypre_CSRMatrix *matrix);
#define hypre_CSRMatrixGPUMatDescr(matrix) ( hypre_GpuMatDataMatDecsr(hypre_CSRMatrixGetGPUMatData(matrix)) )
#define hypre_CSRMatrixGPUMatInfo(matrix) ( hypre_GpuMatDataMatInfo (hypre_CSRMatrixGetGPUMatData(matrix)) )
#define hypre_CSRMatrixGPUMatHandle(matrix) ( hypre_GpuMatDataMatHandle (hypre_CSRMatrixGetGPUMatData(matrix)) )
#define hypre_CSRMatrixGPUMatSpMVBuffer(matrix) ( hypre_GpuMatDataSpMVBuffer (hypre_CSRMatrixGetGPUMatData(matrix)) )
#endif
void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix);

View File

@ -36,12 +36,12 @@ extern "C" {
#ifndef hypre_CSR_MATRIX_HEADER
#define hypre_CSR_MATRIX_HEADER
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
struct hypre_CsrsvData;
typedef struct hypre_CsrsvData hypre_CsrsvData;
#endif
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
struct hypre_GpuMatData;
typedef struct hypre_GpuMatData hypre_GpuMatData;
#endif
@ -65,7 +65,7 @@ typedef struct
HYPRE_Int *rownnz; /* for compressing rows in matrix multiplication */
HYPRE_Int num_rownnz;
HYPRE_MemoryLocation memory_location; /* memory location of arrays i, j, data */
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
HYPRE_Int *sorted_j; /* some cusparse routines require sorted CSR */
HYPRE_Complex *sorted_data;
hypre_CsrsvData *csrsv_data;
@ -89,7 +89,7 @@ typedef struct
#define hypre_CSRMatrixOwnsData(matrix) ((matrix) -> owns_data)
#define hypre_CSRMatrixMemoryLocation(matrix) ((matrix) -> memory_location)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
#define hypre_CSRMatrixSortedJ(matrix) ((matrix) -> sorted_j)
#define hypre_CSRMatrixSortedData(matrix) ((matrix) -> sorted_data)
#define hypre_CSRMatrixCsrsvData(matrix) ((matrix) -> csrsv_data)
@ -587,14 +587,16 @@ HYPRE_Int hypre_SeqVectorElmdivpyMarked( hypre_Vector *x, hypre_Vector *b, hypre
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x,
HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int fill );
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
hypre_CsrsvData* hypre_CsrsvDataCreate();
void hypre_CsrsvDataDestroy(hypre_CsrsvData *data);
hypre_GpuMatData* hypre_GpuMatDataCreate();
void hypre_GPUMatDataSetCSRData(hypre_GpuMatData *data, hypre_CSRMatrix *matrix);
void hypre_GpuMatDataDestroy(hypre_GpuMatData *data);
hypre_GpuMatData* hypre_CSRMatrixGetGPUMatData(hypre_CSRMatrix *matrix);
#define hypre_CSRMatrixGPUMatDescr(matrix) ( hypre_GpuMatDataMatDecsr(hypre_CSRMatrixGetGPUMatData(matrix)) )
#define hypre_CSRMatrixGPUMatInfo(matrix) ( hypre_GpuMatDataMatInfo (hypre_CSRMatrixGetGPUMatData(matrix)) )
#define hypre_CSRMatrixGPUMatHandle(matrix) ( hypre_GpuMatDataMatHandle (hypre_CSRMatrixGetGPUMatData(matrix)) )
#define hypre_CSRMatrixGPUMatSpMVBuffer(matrix) ( hypre_GpuMatDataSpMVBuffer (hypre_CSRMatrixGetGPUMatData(matrix)) )
#endif
void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix);

View File

@ -11,6 +11,7 @@
*
*****************************************************************************/
#include "_hypre_onedpl.hpp"
#include "seq_mv.h"
#include "_hypre_utilities.hpp" //RL: TODO vector_device.c, include cuda there
@ -286,6 +287,11 @@ hypre_SeqVectorSetConstantValues( hypre_Vector *v,
{
HYPRE_THRUST_CALL( fill_n, vector_data, size, value );
}
#elif defined(HYPRE_USING_SYCL)
if (size > 0)
{
HYPRE_ONEDPL_CALL( std::fill_n, vector_data, size, value );
}
#else
HYPRE_Int i;
#if defined(HYPRE_USING_DEVICE_OPENMP)
@ -300,7 +306,7 @@ hypre_SeqVectorSetConstantValues( hypre_Vector *v,
#endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#if defined(HYPRE_USING_GPU)
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
#ifdef HYPRE_PROFILE
@ -467,13 +473,31 @@ hypre_SeqVectorScale( HYPRE_Complex alpha,
//hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUBLAS)
HYPRE_CUBLAS_CALL( cublasDscal(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, y_data, 1) );
#else
HYPRE_THRUST_CALL( transform, y_data, y_data + size, y_data, alpha * _1 );
#endif
#endif // #if defined(HYPRE_USING_CUBLAS)
#elif defined(HYPRE_USING_SYCL) // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_ONEMKLBLAS)
HYPRE_SYCL_CALL( oneapi::mkl::blas::scal(*hypre_HandleComputeStream(hypre_handle()),
size, alpha,
y_data, 1).wait() );
#else
HYPRE_ONEDPL_CALL( std::transform, y_data, y_data + size,
y_data, [alpha](HYPRE_Complex y) -> HYPRE_Complex { return alpha * y; } );
#endif // #if defined(HYPRE_USING_ONEMKL)
#endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#else // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
HYPRE_Int i;
#if defined(HYPRE_USING_DEVICE_OPENMP)
#pragma omp target teams distribute parallel for private(i) is_device_ptr(y_data)
@ -485,10 +509,10 @@ hypre_SeqVectorScale( HYPRE_Complex alpha,
y_data[i] *= alpha;
}
#endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
#if defined(HYPRE_USING_GPU)
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
#ifdef HYPRE_PROFILE
@ -520,14 +544,32 @@ hypre_SeqVectorAxpy( HYPRE_Complex alpha,
//hypre_SeqVectorPrefetch(x, HYPRE_MEMORY_DEVICE);
//hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUBLAS)
HYPRE_CUBLAS_CALL( cublasDaxpy(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, x_data, 1,
y_data, 1) );
#else
HYPRE_THRUST_CALL( transform, x_data, x_data + size, y_data, y_data, alpha * _1 + _2 );
#endif
#endif // #if defined(HYPRE_USING_CUBLAS)
#elif defined(HYPRE_USING_SYCL) // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_ONEMKLBLAS)
HYPRE_SYCL_CALL( oneapi::mkl::blas::axpy(*hypre_HandleComputeStream(hypre_handle()),
size, alpha,
x_data, 1, y_data, 1).wait() );
#else
HYPRE_ONEDPL_CALL( std::transform, x_data, x_data + size, y_data, y_data,
[alpha](HYPRE_Complex x, HYPRE_Complex y) -> HYPRE_Complex { return alpha * x + y; } );
#endif // #if defined(HYPRE_USING_ONEMKL)
#endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#else // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
HYPRE_Int i;
#if defined(HYPRE_USING_DEVICE_OPENMP)
#pragma omp target teams distribute parallel for private(i) is_device_ptr(y_data, x_data)
@ -539,10 +581,10 @@ hypre_SeqVectorAxpy( HYPRE_Complex alpha,
y_data[i] += alpha * x_data[i];
}
#endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
#if defined(HYPRE_USING_GPU)
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
#ifdef HYPRE_PROFILE
@ -567,7 +609,7 @@ hypre_SeqVectorElmdivpy( hypre_Vector *x,
HYPRE_Complex *y_data = hypre_VectorData(y);
HYPRE_Int size = hypre_VectorSize(b);
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
//HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_VectorMemoryLocation(x), hypre_VectorMemoryLocation(b) );
//RL: TODO back to hypre_GetExecPolicy2 later
HYPRE_ExecutionPolicy exec = HYPRE_EXEC_DEVICE;
@ -596,7 +638,7 @@ hypre_SeqVectorElmdivpy( hypre_Vector *x,
}
#if defined(HYPRE_USING_GPU)
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
#ifdef HYPRE_PROFILE
@ -647,7 +689,7 @@ hypre_SeqVectorElmdivpyMarked( hypre_Vector *x,
}
#if defined(HYPRE_USING_GPU)
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
#ifdef HYPRE_PROFILE
@ -678,19 +720,41 @@ hypre_SeqVectorInnerProd( hypre_Vector *x,
//hypre_SeqVectorPrefetch(x, HYPRE_MEMORY_DEVICE);
//hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
#ifndef HYPRE_COMPLEX
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUBLAS)
HYPRE_CUBLAS_CALL( cublasDdot(hypre_HandleCublasHandle(hypre_handle()), size, x_data, 1, y_data, 1,
&result) );
#else
result = HYPRE_THRUST_CALL( inner_product, x_data, x_data + size, y_data, 0.0 );
#endif
#endif // #if defined(HYPRE_USING_CUBLAS)
#elif defined(HYPRE_USING_SYCL) // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_ONEMKLBLAS)
HYPRE_Real *result_dev = hypre_CTAlloc(HYPRE_Real, 1, HYPRE_MEMORY_DEVICE);
HYPRE_SYCL_CALL( oneapi::mkl::blas::dot(*hypre_HandleComputeStream(hypre_handle()),
size, x_data, 1,
y_data, 1, result_dev).wait() );
hypre_TMemcpy(&result, result_dev, HYPRE_Real, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE);
hypre_TFree(result_dev, HYPRE_MEMORY_DEVICE);
#else
result = HYPRE_ONEDPL_CALL( std::transform_reduce, x_data, x_data + size, y_data, 0.0 );
#endif // #if defined(HYPRE_USING_ONEMKLBLAS)
#endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#else // #ifndef HYPRE_COMPLEX
/* TODO */
#error "Complex inner product"
#endif
#else /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#endif // #ifndef HYPRE_COMPLEX
#else // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
HYPRE_Int i;
#if defined(HYPRE_USING_DEVICE_OPENMP)
#pragma omp target teams distribute parallel for private(i) reduction(+:result) is_device_ptr(y_data,x_data) map(result)
@ -701,10 +765,11 @@ hypre_SeqVectorInnerProd( hypre_Vector *x,
{
result += hypre_conj(y_data[i]) * x_data[i];
}
#endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
#if defined(HYPRE_USING_GPU)
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
#ifdef HYPRE_PROFILE
@ -806,7 +871,7 @@ hypre_SeqVectorMax( HYPRE_Complex alpha,
#endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();

View File

@ -30,6 +30,7 @@ HEADERS =\
mpistubs.h\
threading.h\
timing.h\
_hypre_onedpl.hpp\
_hypre_utilities.h\
_hypre_utilities.hpp
@ -81,6 +82,7 @@ SONAME = libHYPRE_utilities-${HYPRE_RELEASE_VERSION}${HYPRE_LIB_SUFFIX}
all: libHYPRE_utilities${HYPRE_LIB_SUFFIX}
cp -fR $(srcdir)/HYPRE_*.h $(HYPRE_BUILD_DIR)/include
cp -fR $(srcdir)/_hypre_onedpl.hpp $(HYPRE_BUILD_DIR)/include
cp -fR $(srcdir)/_hypre_utilities.h $(HYPRE_BUILD_DIR)/include
cp -fR $(srcdir)/_hypre_utilities.hpp $(HYPRE_BUILD_DIR)/include
cp -fR $(srcdir)/fortran*.h $(HYPRE_BUILD_DIR)/include

View File

@ -0,0 +1,21 @@
/******************************************************************************
* Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
* HYPRE Project Developers. See the top-level COPYRIGHT file for details.
*
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/
#include "HYPRE_config.h"
#if defined(HYPRE_USING_SYCL)
/* oneAPI DPL headers */
/* NOTE: these must be included before standard C++ headers */
#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
#include <oneapi/dpl/numeric>
#include <oneapi/dpl/iterator>
#include <oneapi/dpl/functional>
#endif

View File

@ -1740,7 +1740,7 @@ void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPR
hypre_UnorderedBigIntMap *inverse_map);
#if defined(HYPRE_USING_GPU)
HYPRE_Int hypre_SyncCudaComputeStream(hypre_Handle *hypre_handle);
HYPRE_Int hypre_SyncComputeStream(hypre_Handle *hypre_handle);
HYPRE_Int hypre_SyncCudaDevice(hypre_Handle *hypre_handle);
HYPRE_Int hypre_ResetCudaDevice(hypre_Handle *hypre_handle);
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
@ -1753,6 +1753,13 @@ HYPRE_Int hypreDevice_IVAXPYMarked(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex
HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v);
HYPRE_Int hypreDevice_Filln(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v);
HYPRE_Int hypreDevice_Scalen(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v);
HYPRE_Int* hypreDevice_CsrRowPtrsToIndices(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr);
HYPRE_Int hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr,
HYPRE_Int *d_row_ind);
HYPRE_Int* hypreDevice_CsrRowIndicesToPtrs(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind);
HYPRE_Int hypreDevice_CsrRowIndicesToPtrs_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind,
HYPRE_Int *d_row_ptr);
#endif
HYPRE_Int hypre_CurandUniform( HYPRE_Int n, HYPRE_Real *urand, HYPRE_Int set_seed,
@ -1775,7 +1782,7 @@ char *hypre_strcpy(char *destination, const char *source);
HYPRE_Int hypre_SetSyncCudaCompute(HYPRE_Int action);
HYPRE_Int hypre_RestoreSyncCudaCompute();
HYPRE_Int hypre_GetSyncCudaCompute(HYPRE_Int *cuda_compute_stream_sync_ptr);
HYPRE_Int hypre_SyncCudaComputeStream(hypre_Handle *hypre_handle);
HYPRE_Int hypre_SyncComputeStream(hypre_Handle *hypre_handle);
/* handle.c */
HYPRE_Int hypre_SetSpGemmUseCusparse( HYPRE_Int use_cusparse );

View File

@ -104,6 +104,14 @@ struct hypre_device_allocator
#include <hip/hip_runtime.h>
#if defined(HYPRE_USING_ROCSPARSE)
#include <rocsparse.h>
#endif
#if defined(HYPRE_USING_ROCRAND)
#include <rocrand.h>
#endif
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* sycl includes
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
@ -112,17 +120,18 @@ struct hypre_device_allocator
/* WM: problems with this being inside extern C++ {} */
/* #include <CL/sycl.hpp> */
#if defined(HYPRE_USING_ONEMKLSPARSE)
#include <oneapi/mkl/spblas.hpp>
#endif
#if defined(HYPRE_USING_ONEMKLBLAS)
#include <oneapi/mkl/blas.hpp>
#endif
#if defined(HYPRE_USING_ONEMKLRAND)
#include <oneapi/mkl/rng.hpp>
#endif
#endif // defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
#if defined(HYPRE_USING_ROCSPARSE)
#include <rocsparse.h>
#endif
#if defined(HYPRE_USING_ROCRAND)
#include <rocrand.h>
#endif
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* macros for wrapping cuda/hip/sycl calls for error reporting
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
@ -164,6 +173,29 @@ struct hypre_device_allocator
assert(0); exit(1); \
}
#define HYPRE_ONEDPL_CALL(func_name, ...) \
func_name(oneapi::dpl::execution::make_device_policy( \
*hypre_HandleComputeStream(hypre_handle())), __VA_ARGS__);
#define HYPRE_SYCL_LAUNCH(kernel_name, gridsize, blocksize, ...) \
{ \
if ( gridsize[0] == 0 || blocksize[0] == 0 ) \
{ \
hypre_printf("Error %s %d: Invalid SYCL 1D launch parameters grid/block (%d) (%d)\n", \
__FILE__, __LINE__, \
gridsize[0], blocksize[0]); \
assert(0); exit(1); \
} \
else \
{ \
hypre_HandleComputeStream(hypre_handle())->submit([&] (sycl::handler& cgh) { \
cgh.parallel_for(sycl::nd_range<1>(gridsize*blocksize, blocksize), \
[=] (sycl::nd_item<1> item) { (kernel_name)(item, __VA_ARGS__); \
}); \
}).wait_and_throw(); \
} \
}
#endif // defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
#define HYPRE_CUBLAS_CALL(call) do { \
@ -365,6 +397,10 @@ struct hypre_CsrsvData
#elif defined(HYPRE_USING_ROCSPARSE)
rocsparse_mat_info info_L;
rocsparse_mat_info info_U;
#elif defined(HYPRE_USING_ONEMKLSPARSE)
/* WM: todo - placeholders */
char info_L;
char info_U;
#endif
hypre_int BufferSize;
char *Buffer;
@ -386,10 +422,15 @@ struct hypre_GpuMatData
rocsparse_mat_descr mat_descr;
rocsparse_mat_info mat_info;
#endif
#if defined(HYPRE_USING_ONEMKLSPARSE)
oneapi::mkl::sparse::matrix_handle_t mat_handle;
#endif
};
#define hypre_GpuMatDataMatDecsr(data) ((data) -> mat_descr)
#define hypre_GpuMatDataMatInfo(data) ((data) -> mat_info)
#define hypre_GpuMatDataMatHandle(data) ((data) -> mat_handle)
#define hypre_GpuMatDataSpMVBuffer(data) ((data) -> spmv_buffer)
#endif //#if defined(HYPRE_USING_GPU)
@ -443,9 +484,9 @@ using namespace thrust::placeholders;
#if defined(HYPRE_DEBUG)
#if defined(HYPRE_USING_CUDA)
#define GPU_LAUNCH_SYNC { hypre_SyncCudaComputeStream(hypre_handle()); HYPRE_CUDA_CALL( cudaGetLastError() ); }
#define GPU_LAUNCH_SYNC { hypre_SyncComputeStream(hypre_handle()); HYPRE_CUDA_CALL( cudaGetLastError() ); }
#elif defined(HYPRE_USING_HIP)
#define GPU_LAUNCH_SYNC { hypre_SyncCudaComputeStream(hypre_handle()); HYPRE_HIP_CALL( hipGetLastError() ); }
#define GPU_LAUNCH_SYNC { hypre_SyncComputeStream(hypre_handle()); HYPRE_HIP_CALL( hipGetLastError() ); }
#endif
#else // #if defined(HYPRE_DEBUG)
#define GPU_LAUNCH_SYNC
@ -1027,16 +1068,6 @@ HYPRE_Int hypreDevice_IntegerInclusiveScan(HYPRE_Int n, HYPRE_Int *d_i);
HYPRE_Int hypreDevice_IntegerExclusiveScan(HYPRE_Int n, HYPRE_Int *d_i);
HYPRE_Int* hypreDevice_CsrRowPtrsToIndices(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr);
HYPRE_Int hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr,
HYPRE_Int *d_row_ind);
HYPRE_Int* hypreDevice_CsrRowIndicesToPtrs(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind);
HYPRE_Int hypreDevice_CsrRowIndicesToPtrs_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind,
HYPRE_Int *d_row_ptr);
HYPRE_Int hypreDevice_GenScatterAdd(HYPRE_Real *x, HYPRE_Int ny, HYPRE_Int *map, HYPRE_Real *y,
char *work);

View File

@ -5,6 +5,7 @@
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/
#include "_hypre_onedpl.hpp"
#include "_hypre_utilities.h"
#include "_hypre_utilities.hpp"
@ -42,7 +43,156 @@ sycl::range<1> hypre_GetDefaultDeviceGridDimension(HYPRE_Int n,
return gDim;
}
#endif
void
hypreSYCLKernel_IVAXPY(sycl::nd_item<1>& item,
HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y )
{
HYPRE_Int i = (HYPRE_Int) item.get_global_linear_id();
if (i < n)
{
y[i] += x[i] / a[i];
}
}
/* Inverse Vector AXPY: y[i] = x[i] / a[i] + y[i] */
HYPRE_Int
hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y)
{
/* trivial case */
if (n <= 0)
{
return hypre_error_flag;
}
sycl::range<1> bDim = hypre_GetDefaultDeviceBlockDimension();
sycl::range<1> gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim);
HYPRE_SYCL_LAUNCH( hypreSYCLKernel_IVAXPY, gDim, bDim, n, a, x, y );
return hypre_error_flag;
}
void
hypreSYCLKernel_IVAXPYMarked(sycl::nd_item<1>& item,
HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y,
HYPRE_Int *marker, HYPRE_Int marker_val)
{
HYPRE_Int i = (HYPRE_Int) item.get_global_linear_id();
if (i < n)
{
if (marker[i] == marker_val)
{
y[i] += x[i] / a[i];
}
}
}
/* Inverse Vector AXPY: y[i] = x[i] / a[i] + y[i] */
HYPRE_Int
hypreDevice_IVAXPYMarked(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y,
HYPRE_Int *marker, HYPRE_Int marker_val)
{
/* trivial case */
if (n <= 0)
{
return hypre_error_flag;
}
sycl::range<1> bDim = hypre_GetDefaultDeviceBlockDimension();
sycl::range<1> gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim);
HYPRE_SYCL_LAUNCH( hypreSYCLKernel_IVAXPYMarked, gDim, bDim, n, a, x, y, marker, marker_val );
return hypre_error_flag;
}
HYPRE_Int*
hypreDevice_CsrRowPtrsToIndices(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr)
{
/* trivial case */
if (nrows <= 0 || nnz <= 0)
{
return NULL;
}
HYPRE_Int *d_row_ind = hypre_TAlloc(HYPRE_Int, nnz, HYPRE_MEMORY_DEVICE);
hypreDevice_CsrRowPtrsToIndices_v2(nrows, nnz, d_row_ptr, d_row_ind);
return d_row_ind;
}
void
hypreSYCLKernel_ScatterRowPtr(sycl::nd_item<1>& item,
HYPRE_Int nrows, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_ind)
{
HYPRE_Int i = (HYPRE_Int) item.get_global_linear_id();
if (i < nrows)
{
HYPRE_Int row_start = d_row_ptr[i];
HYPRE_Int row_end = d_row_ptr[i + 1];
if (row_start != row_end)
{
d_row_ind[row_start] = i;
}
}
}
HYPRE_Int
hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr,
HYPRE_Int *d_row_ind)
{
/* trivial case */
if (nrows <= 0 || nnz <= 0)
{
return hypre_error_flag;
}
sycl::range<1> bDim = hypre_GetDefaultDeviceBlockDimension();
sycl::range<1> gDim = hypre_GetDefaultDeviceGridDimension(nrows, "thread", bDim);
HYPRE_ONEDPL_CALL( std::fill, d_row_ind, d_row_ind + nnz, 0 );
HYPRE_SYCL_LAUNCH( hypreSYCLKernel_ScatterRowPtr, gDim, bDim, nrows, d_row_ptr, d_row_ind );
HYPRE_ONEDPL_CALL( std::inclusive_scan, d_row_ind, d_row_ind + nnz, d_row_ind,
oneapi::dpl::maximum<HYPRE_Int>());
return hypre_error_flag;
}
HYPRE_Int*
hypreDevice_CsrRowIndicesToPtrs(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind)
{
HYPRE_Int *d_row_ptr = hypre_TAlloc(HYPRE_Int, nrows + 1, HYPRE_MEMORY_DEVICE);
oneapi::dpl::counting_iterator<HYPRE_Int> count(0);
HYPRE_ONEDPL_CALL( oneapi::dpl::lower_bound,
d_row_ind, d_row_ind + nnz,
count,
count + nrows + 1,
d_row_ptr);
return d_row_ptr;
}
HYPRE_Int
hypreDevice_CsrRowIndicesToPtrs_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind,
HYPRE_Int *d_row_ptr)
{
oneapi::dpl::counting_iterator<HYPRE_Int> count(0);
HYPRE_ONEDPL_CALL( oneapi::dpl::lower_bound,
d_row_ind, d_row_ind + nnz,
count,
count + nrows + 1,
d_row_ptr);
return hypre_error_flag;
}
#endif // #if defined(HYPRE_USING_SYCL)
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
@ -1381,6 +1531,17 @@ hypre_SyncCudaDevice(hypre_Handle *hypre_handle)
HYPRE_CUDA_CALL( cudaDeviceSynchronize() );
#elif defined(HYPRE_USING_HIP)
HYPRE_HIP_CALL( hipDeviceSynchronize() );
#elif defined(HYPRE_USING_SYCL)
try
{
HYPRE_SYCL_CALL( hypre_HandleComputeStream(hypre_handle)->wait_and_throw() );
}
catch (sycl::exception const &exc)
{
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
#endif
return hypre_error_flag;
}
@ -1404,9 +1565,9 @@ hypre_ResetCudaDevice(hypre_Handle *hypre_handle)
* 4: sync stream based on cuda_compute_stream_sync
*/
HYPRE_Int
hypre_SyncCudaComputeStream_core(HYPRE_Int action,
hypre_Handle *hypre_handle,
HYPRE_Int *cuda_compute_stream_sync_ptr)
hypre_SyncComputeStream_core(HYPRE_Int action,
hypre_Handle *hypre_handle,
HYPRE_Int *cuda_compute_stream_sync_ptr)
{
/* with UVM the default is to sync at kernel completions, since host is also able to
* touch GPU memory */
@ -1448,7 +1609,7 @@ hypre_SyncCudaComputeStream_core(HYPRE_Int action,
#endif
break;
default:
hypre_printf("hypre_SyncCudaComputeStream_core invalid action\n");
hypre_printf("hypre_SyncComputeStream_core invalid action\n");
hypre_error_in_arg(1);
}
@ -1460,7 +1621,7 @@ hypre_SetSyncCudaCompute(HYPRE_Int action)
{
/* convert to 1/0 */
action = action != 0;
hypre_SyncCudaComputeStream_core(action, NULL, NULL);
hypre_SyncComputeStream_core(action, NULL, NULL);
return hypre_error_flag;
}
@ -1468,7 +1629,7 @@ hypre_SetSyncCudaCompute(HYPRE_Int action)
HYPRE_Int
hypre_RestoreSyncCudaCompute()
{
hypre_SyncCudaComputeStream_core(2, NULL, NULL);
hypre_SyncComputeStream_core(2, NULL, NULL);
return hypre_error_flag;
}
@ -1476,15 +1637,15 @@ hypre_RestoreSyncCudaCompute()
HYPRE_Int
hypre_GetSyncCudaCompute(HYPRE_Int *cuda_compute_stream_sync_ptr)
{
hypre_SyncCudaComputeStream_core(3, NULL, cuda_compute_stream_sync_ptr);
hypre_SyncComputeStream_core(3, NULL, cuda_compute_stream_sync_ptr);
return hypre_error_flag;
}
HYPRE_Int
hypre_SyncCudaComputeStream(hypre_Handle *hypre_handle)
hypre_SyncComputeStream(hypre_Handle *hypre_handle)
{
hypre_SyncCudaComputeStream_core(4, hypre_handle, NULL);
hypre_SyncComputeStream_core(4, hypre_handle, NULL);
return hypre_error_flag;
}

View File

@ -47,6 +47,14 @@
#include <hip/hip_runtime.h>
#if defined(HYPRE_USING_ROCSPARSE)
#include <rocsparse.h>
#endif
#if defined(HYPRE_USING_ROCRAND)
#include <rocrand.h>
#endif
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* sycl includes
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
@ -55,17 +63,18 @@
/* WM: problems with this being inside extern C++ {} */
/* #include <CL/sycl.hpp> */
#if defined(HYPRE_USING_ONEMKLSPARSE)
#include <oneapi/mkl/spblas.hpp>
#endif
#if defined(HYPRE_USING_ONEMKLBLAS)
#include <oneapi/mkl/blas.hpp>
#endif
#if defined(HYPRE_USING_ONEMKLRAND)
#include <oneapi/mkl/rng.hpp>
#endif
#endif // defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
#if defined(HYPRE_USING_ROCSPARSE)
#include <rocsparse.h>
#endif
#if defined(HYPRE_USING_ROCRAND)
#include <rocrand.h>
#endif
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* macros for wrapping cuda/hip/sycl calls for error reporting
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
@ -107,6 +116,29 @@
assert(0); exit(1); \
}
#define HYPRE_ONEDPL_CALL(func_name, ...) \
func_name(oneapi::dpl::execution::make_device_policy( \
*hypre_HandleComputeStream(hypre_handle())), __VA_ARGS__);
#define HYPRE_SYCL_LAUNCH(kernel_name, gridsize, blocksize, ...) \
{ \
if ( gridsize[0] == 0 || blocksize[0] == 0 ) \
{ \
hypre_printf("Error %s %d: Invalid SYCL 1D launch parameters grid/block (%d) (%d)\n", \
__FILE__, __LINE__, \
gridsize[0], blocksize[0]); \
assert(0); exit(1); \
} \
else \
{ \
hypre_HandleComputeStream(hypre_handle())->submit([&] (sycl::handler& cgh) { \
cgh.parallel_for(sycl::nd_range<1>(gridsize*blocksize, blocksize), \
[=] (sycl::nd_item<1> item) { (kernel_name)(item, __VA_ARGS__); \
}); \
}).wait_and_throw(); \
} \
}
#endif // defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
#define HYPRE_CUBLAS_CALL(call) do { \
@ -308,6 +340,10 @@ struct hypre_CsrsvData
#elif defined(HYPRE_USING_ROCSPARSE)
rocsparse_mat_info info_L;
rocsparse_mat_info info_U;
#elif defined(HYPRE_USING_ONEMKLSPARSE)
/* WM: todo - placeholders */
char info_L;
char info_U;
#endif
hypre_int BufferSize;
char *Buffer;
@ -329,10 +365,15 @@ struct hypre_GpuMatData
rocsparse_mat_descr mat_descr;
rocsparse_mat_info mat_info;
#endif
#if defined(HYPRE_USING_ONEMKLSPARSE)
oneapi::mkl::sparse::matrix_handle_t mat_handle;
#endif
};
#define hypre_GpuMatDataMatDecsr(data) ((data) -> mat_descr)
#define hypre_GpuMatDataMatInfo(data) ((data) -> mat_info)
#define hypre_GpuMatDataMatHandle(data) ((data) -> mat_handle)
#define hypre_GpuMatDataSpMVBuffer(data) ((data) -> spmv_buffer)
#endif //#if defined(HYPRE_USING_GPU)
@ -386,9 +427,9 @@ using namespace thrust::placeholders;
#if defined(HYPRE_DEBUG)
#if defined(HYPRE_USING_CUDA)
#define GPU_LAUNCH_SYNC { hypre_SyncCudaComputeStream(hypre_handle()); HYPRE_CUDA_CALL( cudaGetLastError() ); }
#define GPU_LAUNCH_SYNC { hypre_SyncComputeStream(hypre_handle()); HYPRE_CUDA_CALL( cudaGetLastError() ); }
#elif defined(HYPRE_USING_HIP)
#define GPU_LAUNCH_SYNC { hypre_SyncCudaComputeStream(hypre_handle()); HYPRE_HIP_CALL( hipGetLastError() ); }
#define GPU_LAUNCH_SYNC { hypre_SyncComputeStream(hypre_handle()); HYPRE_HIP_CALL( hipGetLastError() ); }
#endif
#else // #if defined(HYPRE_DEBUG)
#define GPU_LAUNCH_SYNC
@ -970,16 +1011,6 @@ HYPRE_Int hypreDevice_IntegerInclusiveScan(HYPRE_Int n, HYPRE_Int *d_i);
HYPRE_Int hypreDevice_IntegerExclusiveScan(HYPRE_Int n, HYPRE_Int *d_i);
HYPRE_Int* hypreDevice_CsrRowPtrsToIndices(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr);
HYPRE_Int hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr,
HYPRE_Int *d_row_ind);
HYPRE_Int* hypreDevice_CsrRowIndicesToPtrs(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind);
HYPRE_Int hypreDevice_CsrRowIndicesToPtrs_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind,
HYPRE_Int *d_row_ptr);
HYPRE_Int hypreDevice_GenScatterAdd(HYPRE_Real *x, HYPRE_Int ny, HYPRE_Int *map, HYPRE_Real *y,
char *work);

View File

@ -168,7 +168,7 @@ hypre_IntArraySetConstantValues( hypre_IntArray *v,
#endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#if defined(HYPRE_USING_GPU)
hypre_SyncCudaComputeStream(hypre_handle());
hypre_SyncComputeStream(hypre_handle());
#endif
return ierr;

View File

@ -269,7 +269,7 @@ void hypre_big_sort_and_create_inverse_map(HYPRE_BigInt *in, HYPRE_Int len, HYPR
hypre_UnorderedBigIntMap *inverse_map);
#if defined(HYPRE_USING_GPU)
HYPRE_Int hypre_SyncCudaComputeStream(hypre_Handle *hypre_handle);
HYPRE_Int hypre_SyncComputeStream(hypre_Handle *hypre_handle);
HYPRE_Int hypre_SyncCudaDevice(hypre_Handle *hypre_handle);
HYPRE_Int hypre_ResetCudaDevice(hypre_Handle *hypre_handle);
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
@ -282,6 +282,13 @@ HYPRE_Int hypreDevice_IVAXPYMarked(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex
HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v);
HYPRE_Int hypreDevice_Filln(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v);
HYPRE_Int hypreDevice_Scalen(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v);
HYPRE_Int* hypreDevice_CsrRowPtrsToIndices(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr);
HYPRE_Int hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr,
HYPRE_Int *d_row_ind);
HYPRE_Int* hypreDevice_CsrRowIndicesToPtrs(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind);
HYPRE_Int hypreDevice_CsrRowIndicesToPtrs_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ind,
HYPRE_Int *d_row_ptr);
#endif
HYPRE_Int hypre_CurandUniform( HYPRE_Int n, HYPRE_Real *urand, HYPRE_Int set_seed,
@ -304,7 +311,7 @@ char *hypre_strcpy(char *destination, const char *source);
HYPRE_Int hypre_SetSyncCudaCompute(HYPRE_Int action);
HYPRE_Int hypre_RestoreSyncCudaCompute();
HYPRE_Int hypre_GetSyncCudaCompute(HYPRE_Int *cuda_compute_stream_sync_ptr);
HYPRE_Int hypre_SyncCudaComputeStream(hypre_Handle *hypre_handle);
HYPRE_Int hypre_SyncComputeStream(hypre_Handle *hypre_handle);
/* handle.c */
HYPRE_Int hypre_SetSpGemmUseCusparse( HYPRE_Int use_cusparse );