fix cub allocator; unify all allocators (#447)
This PR fixes CUB allocator with UVM and also unifies the CUB and UMPIRE allocators.
This commit is contained in:
parent
6f0bdbbb11
commit
40dc7ce550
@ -1282,7 +1282,7 @@ typedef struct
|
||||
#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateNsamples(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateMultFactor(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleSpgemmHashType(hypre_handle) hypre_CudaDataSpgemmHashType(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleUmpireDeviceAllocator(hypre_handle) hypre_CudaDataUmpireDeviceAllocator(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleDeviceAllocator(hypre_handle) hypre_CudaDataDeviceAllocator(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleUseGpuRand(hypre_handle) hypre_CudaDataUseGpuRand(hypre_HandleCudaData(hypre_handle))
|
||||
|
||||
#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm)
|
||||
|
||||
@ -15,51 +15,38 @@ extern "C++" {
|
||||
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef HYPRE_UMPIRE_ALLOCATOR_H
|
||||
#define HYPRE_UMPIRE_ALLOCATOR_H
|
||||
#ifndef DEVICE_ALLOCATOR_H
|
||||
#define DEVICE_ALLOCATOR_H
|
||||
|
||||
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
|
||||
#if defined(HYPRE_USING_UMPIRE_DEVICE)
|
||||
|
||||
/*
|
||||
#include "umpire/Allocator.hpp"
|
||||
#include "umpire/ResourceManager.hpp"
|
||||
|
||||
#include "umpire/strategy/DynamicPool.hpp"
|
||||
#include "umpire/strategy/AllocationAdvisor.hpp"
|
||||
#include "umpire/strategy/MonotonicAllocationStrategy.hpp"
|
||||
#include "umpire/util/Macros.hpp"
|
||||
*/
|
||||
|
||||
struct hypre_umpire_device_allocator
|
||||
/* C++ style memory allocator for GPU **device** memory
|
||||
* Just wraps _hypre_TAlloc and _hypre_TFree */
|
||||
struct hypre_device_allocator
|
||||
{
|
||||
typedef char value_type;
|
||||
|
||||
hypre_umpire_device_allocator()
|
||||
hypre_device_allocator()
|
||||
{
|
||||
// constructor
|
||||
}
|
||||
|
||||
~hypre_umpire_device_allocator()
|
||||
~hypre_device_allocator()
|
||||
{
|
||||
// destructor
|
||||
}
|
||||
|
||||
char *allocate(std::ptrdiff_t num_bytes)
|
||||
{
|
||||
char *ptr = NULL;
|
||||
hypre_umpire_device_pooled_allocate((void**) &ptr, num_bytes);
|
||||
|
||||
return ptr;
|
||||
return _hypre_TAlloc(char, num_bytes, hypre_MEMORY_DEVICE);
|
||||
}
|
||||
|
||||
void deallocate(char *ptr, size_t n)
|
||||
{
|
||||
hypre_umpire_device_pooled_free(ptr);
|
||||
_hypre_TFree(ptr, hypre_MEMORY_DEVICE);
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* #ifdef HYPRE_USING_UMPIRE_DEVICE */
|
||||
#endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
|
||||
|
||||
#endif
|
||||
@ -228,9 +215,11 @@ struct hypre_CudaData
|
||||
hypre_cub_CachingDeviceAllocator *cub_dev_allocator;
|
||||
hypre_cub_CachingDeviceAllocator *cub_uvm_allocator;
|
||||
#endif
|
||||
#ifdef HYPRE_USING_UMPIRE_DEVICE
|
||||
hypre_umpire_device_allocator umpire_device_allocator;
|
||||
|
||||
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
|
||||
hypre_device_allocator device_allocator;
|
||||
#endif
|
||||
|
||||
HYPRE_Int cuda_device;
|
||||
/* by default, hypre puts GPU computations in this stream
|
||||
* Do not be confused with the default (null) CUDA stream */
|
||||
@ -272,7 +261,7 @@ struct hypre_CudaData
|
||||
#define hypre_CudaDataSpgemmRownnzEstimateNsamples(data) ((data) -> spgemm_rownnz_estimate_nsamples)
|
||||
#define hypre_CudaDataSpgemmRownnzEstimateMultFactor(data) ((data) -> spgemm_rownnz_estimate_mult_factor)
|
||||
#define hypre_CudaDataSpgemmHashType(data) ((data) -> spgemm_hash_type)
|
||||
#define hypre_CudaDataUmpireDeviceAllocator(data) ((data) -> umpire_device_allocator)
|
||||
#define hypre_CudaDataDeviceAllocator(data) ((data) -> device_allocator)
|
||||
#define hypre_CudaDataUseGpuRand(data) ((data) -> use_gpu_rand)
|
||||
|
||||
hypre_CudaData* hypre_CudaDataCreate();
|
||||
@ -435,35 +424,13 @@ using namespace thrust::placeholders;
|
||||
/* RL: TODO Want macro HYPRE_THRUST_CALL to return value but I don't know how to do it right
|
||||
* The following one works OK for now */
|
||||
|
||||
#ifdef HYPRE_USING_UMPIRE_DEVICE
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::cuda::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#elif defined(HYPRE_USING_HIP)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::hip::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#endif // HYPRE_USING_CUDA
|
||||
|
||||
#elif HYPRE_USING_DEVICE_POOL
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::cuda::par(*(hypre_HandleCubDevAllocator(hypre_handle()))).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::cuda::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#elif defined(HYPRE_USING_HIP) \
|
||||
thrust::func_name(thrust::hip::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::cuda::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#elif defined(HYPRE_USING_HIP)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::hip::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#endif // HYPRE_USING_CUDA
|
||||
|
||||
#endif // HYPRE_USING_UMPIRE_DEVICE
|
||||
|
||||
|
||||
/* return the number of threads in block */
|
||||
template <hypre_int dim>
|
||||
static __device__ __forceinline__
|
||||
|
||||
@ -163,9 +163,11 @@ struct hypre_CudaData
|
||||
hypre_cub_CachingDeviceAllocator *cub_dev_allocator;
|
||||
hypre_cub_CachingDeviceAllocator *cub_uvm_allocator;
|
||||
#endif
|
||||
#ifdef HYPRE_USING_UMPIRE_DEVICE
|
||||
hypre_umpire_device_allocator umpire_device_allocator;
|
||||
|
||||
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
|
||||
hypre_device_allocator device_allocator;
|
||||
#endif
|
||||
|
||||
HYPRE_Int cuda_device;
|
||||
/* by default, hypre puts GPU computations in this stream
|
||||
* Do not be confused with the default (null) CUDA stream */
|
||||
@ -207,7 +209,7 @@ struct hypre_CudaData
|
||||
#define hypre_CudaDataSpgemmRownnzEstimateNsamples(data) ((data) -> spgemm_rownnz_estimate_nsamples)
|
||||
#define hypre_CudaDataSpgemmRownnzEstimateMultFactor(data) ((data) -> spgemm_rownnz_estimate_mult_factor)
|
||||
#define hypre_CudaDataSpgemmHashType(data) ((data) -> spgemm_hash_type)
|
||||
#define hypre_CudaDataUmpireDeviceAllocator(data) ((data) -> umpire_device_allocator)
|
||||
#define hypre_CudaDataDeviceAllocator(data) ((data) -> device_allocator)
|
||||
#define hypre_CudaDataUseGpuRand(data) ((data) -> use_gpu_rand)
|
||||
|
||||
hypre_CudaData* hypre_CudaDataCreate();
|
||||
@ -370,35 +372,13 @@ using namespace thrust::placeholders;
|
||||
/* RL: TODO Want macro HYPRE_THRUST_CALL to return value but I don't know how to do it right
|
||||
* The following one works OK for now */
|
||||
|
||||
#ifdef HYPRE_USING_UMPIRE_DEVICE
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::cuda::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#elif defined(HYPRE_USING_HIP)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::hip::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#endif // HYPRE_USING_CUDA
|
||||
|
||||
#elif HYPRE_USING_DEVICE_POOL
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::cuda::par(*(hypre_HandleCubDevAllocator(hypre_handle()))).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::cuda::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#elif defined(HYPRE_USING_HIP) \
|
||||
thrust::func_name(thrust::hip::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::cuda::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#elif defined(HYPRE_USING_HIP)
|
||||
#define HYPRE_THRUST_CALL(func_name, ...) \
|
||||
thrust::func_name(thrust::hip::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
|
||||
#endif // HYPRE_USING_CUDA
|
||||
|
||||
#endif // HYPRE_USING_UMPIRE_DEVICE
|
||||
|
||||
|
||||
/* return the number of threads in block */
|
||||
template <hypre_int dim>
|
||||
static __device__ __forceinline__
|
||||
|
||||
@ -5,51 +5,38 @@
|
||||
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef HYPRE_UMPIRE_ALLOCATOR_H
|
||||
#define HYPRE_UMPIRE_ALLOCATOR_H
|
||||
#ifndef DEVICE_ALLOCATOR_H
|
||||
#define DEVICE_ALLOCATOR_H
|
||||
|
||||
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
|
||||
#if defined(HYPRE_USING_UMPIRE_DEVICE)
|
||||
|
||||
/*
|
||||
#include "umpire/Allocator.hpp"
|
||||
#include "umpire/ResourceManager.hpp"
|
||||
|
||||
#include "umpire/strategy/DynamicPool.hpp"
|
||||
#include "umpire/strategy/AllocationAdvisor.hpp"
|
||||
#include "umpire/strategy/MonotonicAllocationStrategy.hpp"
|
||||
#include "umpire/util/Macros.hpp"
|
||||
*/
|
||||
|
||||
struct hypre_umpire_device_allocator
|
||||
/* C++ style memory allocator for GPU **device** memory
|
||||
* Just wraps _hypre_TAlloc and _hypre_TFree */
|
||||
struct hypre_device_allocator
|
||||
{
|
||||
typedef char value_type;
|
||||
|
||||
hypre_umpire_device_allocator()
|
||||
hypre_device_allocator()
|
||||
{
|
||||
// constructor
|
||||
}
|
||||
|
||||
~hypre_umpire_device_allocator()
|
||||
~hypre_device_allocator()
|
||||
{
|
||||
// destructor
|
||||
}
|
||||
|
||||
char *allocate(std::ptrdiff_t num_bytes)
|
||||
{
|
||||
char *ptr = NULL;
|
||||
hypre_umpire_device_pooled_allocate((void**) &ptr, num_bytes);
|
||||
|
||||
return ptr;
|
||||
return _hypre_TAlloc(char, num_bytes, hypre_MEMORY_DEVICE);
|
||||
}
|
||||
|
||||
void deallocate(char *ptr, size_t n)
|
||||
{
|
||||
hypre_umpire_device_pooled_free(ptr);
|
||||
_hypre_TFree(ptr, hypre_MEMORY_DEVICE);
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* #ifdef HYPRE_USING_UMPIRE_DEVICE */
|
||||
#endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
|
||||
|
||||
#endif
|
||||
@ -224,7 +224,7 @@ HYPRE_Init()
|
||||
#ifdef HYPRE_USING_DEVICE_POOL
|
||||
/* Keep this check here at the end of HYPRE_Init()
|
||||
* Make sure that device pool allocator has not been setup in HYPRE_Init,
|
||||
* otherwise users are not able to set the parametersB
|
||||
* otherwise users are not able to set all the parameters
|
||||
*/
|
||||
if ( hypre_HandleCubDevAllocator(_hypre_handle) ||
|
||||
hypre_HandleCubUvmAllocator(_hypre_handle) )
|
||||
|
||||
@ -76,7 +76,7 @@ typedef struct
|
||||
#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateNsamples(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateMultFactor(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleSpgemmHashType(hypre_handle) hypre_CudaDataSpgemmHashType(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleUmpireDeviceAllocator(hypre_handle) hypre_CudaDataUmpireDeviceAllocator(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleDeviceAllocator(hypre_handle) hypre_CudaDataDeviceAllocator(hypre_HandleCudaData(hypre_handle))
|
||||
#define hypre_HandleUseGpuRand(hypre_handle) hypre_CudaDataUseGpuRand(hypre_HandleCudaData(hypre_handle))
|
||||
|
||||
#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm)
|
||||
|
||||
@ -88,10 +88,10 @@ extern "C++" {
|
||||
# Structures and prototypes
|
||||
#===========================================================================
|
||||
|
||||
cat umpire_allocator.h >> $INTERNAL_HEADER
|
||||
cat cuda_utils.h >> $INTERNAL_HEADER
|
||||
cat cuda_reducer.h >> $INTERNAL_HEADER
|
||||
cat cub_allocator.h >> $INTERNAL_HEADER
|
||||
cat device_allocator.h >> $INTERNAL_HEADER
|
||||
cat cuda_utils.h >> $INTERNAL_HEADER
|
||||
cat cuda_reducer.h >> $INTERNAL_HEADER
|
||||
cat cub_allocator.h >> $INTERNAL_HEADER
|
||||
|
||||
#===========================================================================
|
||||
# Include guards
|
||||
|
||||
@ -1272,6 +1272,7 @@ hypre_SetCubMemPoolSize(hypre_uint cub_bin_growth,
|
||||
hypre_HandleCubMaxBin(hypre_handle()) = cub_max_bin;
|
||||
hypre_HandleCubMaxCachedBytes(hypre_handle()) = cub_max_cached_bytes;
|
||||
|
||||
//TODO XXX RL: cub_min_bin, cub_max_bin are not (re)set
|
||||
if (hypre_HandleCubDevAllocator(hypre_handle()))
|
||||
{
|
||||
hypre_HandleCubDevAllocator(hypre_handle()) -> SetMaxCachedBytes(cub_max_cached_bytes);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user