fix cub allocator; unify all allocators (#447)

This PR fixes CUB allocator with UVM and also unifies the CUB and UMPIRE allocators.
This commit is contained in:
Ruipeng Li 2021-08-11 13:01:51 -07:00 committed by GitHub
parent 6f0bdbbb11
commit 40dc7ce550
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 44 additions and 109 deletions

View File

@ -1282,7 +1282,7 @@ typedef struct
#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateNsamples(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateMultFactor(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleSpgemmHashType(hypre_handle) hypre_CudaDataSpgemmHashType(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUmpireDeviceAllocator(hypre_handle) hypre_CudaDataUmpireDeviceAllocator(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleDeviceAllocator(hypre_handle) hypre_CudaDataDeviceAllocator(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUseGpuRand(hypre_handle) hypre_CudaDataUseGpuRand(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm)

View File

@ -15,51 +15,38 @@ extern "C++" {
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/
#ifndef HYPRE_UMPIRE_ALLOCATOR_H
#define HYPRE_UMPIRE_ALLOCATOR_H
#ifndef DEVICE_ALLOCATOR_H
#define DEVICE_ALLOCATOR_H
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_UMPIRE_DEVICE)
/*
#include "umpire/Allocator.hpp"
#include "umpire/ResourceManager.hpp"
#include "umpire/strategy/DynamicPool.hpp"
#include "umpire/strategy/AllocationAdvisor.hpp"
#include "umpire/strategy/MonotonicAllocationStrategy.hpp"
#include "umpire/util/Macros.hpp"
*/
struct hypre_umpire_device_allocator
/* C++ style memory allocator for GPU **device** memory
* Just wraps _hypre_TAlloc and _hypre_TFree */
struct hypre_device_allocator
{
typedef char value_type;
hypre_umpire_device_allocator()
hypre_device_allocator()
{
// constructor
}
~hypre_umpire_device_allocator()
~hypre_device_allocator()
{
// destructor
}
char *allocate(std::ptrdiff_t num_bytes)
{
char *ptr = NULL;
hypre_umpire_device_pooled_allocate((void**) &ptr, num_bytes);
return ptr;
return _hypre_TAlloc(char, num_bytes, hypre_MEMORY_DEVICE);
}
void deallocate(char *ptr, size_t n)
{
hypre_umpire_device_pooled_free(ptr);
_hypre_TFree(ptr, hypre_MEMORY_DEVICE);
}
};
#endif /* #ifdef HYPRE_USING_UMPIRE_DEVICE */
#endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#endif
@ -228,9 +215,11 @@ struct hypre_CudaData
hypre_cub_CachingDeviceAllocator *cub_dev_allocator;
hypre_cub_CachingDeviceAllocator *cub_uvm_allocator;
#endif
#ifdef HYPRE_USING_UMPIRE_DEVICE
hypre_umpire_device_allocator umpire_device_allocator;
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_device_allocator device_allocator;
#endif
HYPRE_Int cuda_device;
/* by default, hypre puts GPU computations in this stream
* Do not be confused with the default (null) CUDA stream */
@ -272,7 +261,7 @@ struct hypre_CudaData
#define hypre_CudaDataSpgemmRownnzEstimateNsamples(data) ((data) -> spgemm_rownnz_estimate_nsamples)
#define hypre_CudaDataSpgemmRownnzEstimateMultFactor(data) ((data) -> spgemm_rownnz_estimate_mult_factor)
#define hypre_CudaDataSpgemmHashType(data) ((data) -> spgemm_hash_type)
#define hypre_CudaDataUmpireDeviceAllocator(data) ((data) -> umpire_device_allocator)
#define hypre_CudaDataDeviceAllocator(data) ((data) -> device_allocator)
#define hypre_CudaDataUseGpuRand(data) ((data) -> use_gpu_rand)
hypre_CudaData* hypre_CudaDataCreate();
@ -435,35 +424,13 @@ using namespace thrust::placeholders;
/* RL: TODO Want macro HYPRE_THRUST_CALL to return value but I don't know how to do it right
* The following one works OK for now */
#ifdef HYPRE_USING_UMPIRE_DEVICE
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::hip::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif // HYPRE_USING_CUDA
#elif HYPRE_USING_DEVICE_POOL
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(*(hypre_HandleCubDevAllocator(hypre_handle()))).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP) \
thrust::func_name(thrust::hip::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif
#else
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::hip::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif // HYPRE_USING_CUDA
#endif // HYPRE_USING_UMPIRE_DEVICE
/* return the number of threads in block */
template <hypre_int dim>
static __device__ __forceinline__

View File

@ -163,9 +163,11 @@ struct hypre_CudaData
hypre_cub_CachingDeviceAllocator *cub_dev_allocator;
hypre_cub_CachingDeviceAllocator *cub_uvm_allocator;
#endif
#ifdef HYPRE_USING_UMPIRE_DEVICE
hypre_umpire_device_allocator umpire_device_allocator;
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_device_allocator device_allocator;
#endif
HYPRE_Int cuda_device;
/* by default, hypre puts GPU computations in this stream
* Do not be confused with the default (null) CUDA stream */
@ -207,7 +209,7 @@ struct hypre_CudaData
#define hypre_CudaDataSpgemmRownnzEstimateNsamples(data) ((data) -> spgemm_rownnz_estimate_nsamples)
#define hypre_CudaDataSpgemmRownnzEstimateMultFactor(data) ((data) -> spgemm_rownnz_estimate_mult_factor)
#define hypre_CudaDataSpgemmHashType(data) ((data) -> spgemm_hash_type)
#define hypre_CudaDataUmpireDeviceAllocator(data) ((data) -> umpire_device_allocator)
#define hypre_CudaDataDeviceAllocator(data) ((data) -> device_allocator)
#define hypre_CudaDataUseGpuRand(data) ((data) -> use_gpu_rand)
hypre_CudaData* hypre_CudaDataCreate();
@ -370,35 +372,13 @@ using namespace thrust::placeholders;
/* RL: TODO Want macro HYPRE_THRUST_CALL to return value but I don't know how to do it right
* The following one works OK for now */
#ifdef HYPRE_USING_UMPIRE_DEVICE
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::hip::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif // HYPRE_USING_CUDA
#elif HYPRE_USING_DEVICE_POOL
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(*(hypre_HandleCubDevAllocator(hypre_handle()))).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP) \
thrust::func_name(thrust::hip::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif
#else
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::hip::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif // HYPRE_USING_CUDA
#endif // HYPRE_USING_UMPIRE_DEVICE
/* return the number of threads in block */
template <hypre_int dim>
static __device__ __forceinline__

View File

@ -5,51 +5,38 @@
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/
#ifndef HYPRE_UMPIRE_ALLOCATOR_H
#define HYPRE_UMPIRE_ALLOCATOR_H
#ifndef DEVICE_ALLOCATOR_H
#define DEVICE_ALLOCATOR_H
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_UMPIRE_DEVICE)
/*
#include "umpire/Allocator.hpp"
#include "umpire/ResourceManager.hpp"
#include "umpire/strategy/DynamicPool.hpp"
#include "umpire/strategy/AllocationAdvisor.hpp"
#include "umpire/strategy/MonotonicAllocationStrategy.hpp"
#include "umpire/util/Macros.hpp"
*/
struct hypre_umpire_device_allocator
/* C++ style memory allocator for GPU **device** memory
* Just wraps _hypre_TAlloc and _hypre_TFree */
struct hypre_device_allocator
{
typedef char value_type;
hypre_umpire_device_allocator()
hypre_device_allocator()
{
// constructor
}
~hypre_umpire_device_allocator()
~hypre_device_allocator()
{
// destructor
}
char *allocate(std::ptrdiff_t num_bytes)
{
char *ptr = NULL;
hypre_umpire_device_pooled_allocate((void**) &ptr, num_bytes);
return ptr;
return _hypre_TAlloc(char, num_bytes, hypre_MEMORY_DEVICE);
}
void deallocate(char *ptr, size_t n)
{
hypre_umpire_device_pooled_free(ptr);
_hypre_TFree(ptr, hypre_MEMORY_DEVICE);
}
};
#endif /* #ifdef HYPRE_USING_UMPIRE_DEVICE */
#endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#endif

View File

@ -224,7 +224,7 @@ HYPRE_Init()
#ifdef HYPRE_USING_DEVICE_POOL
/* Keep this check here at the end of HYPRE_Init()
* Make sure that device pool allocator has not been setup in HYPRE_Init,
* otherwise users are not able to set the parametersB
* otherwise users are not able to set all the parameters
*/
if ( hypre_HandleCubDevAllocator(_hypre_handle) ||
hypre_HandleCubUvmAllocator(_hypre_handle) )

View File

@ -76,7 +76,7 @@ typedef struct
#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateNsamples(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateMultFactor(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleSpgemmHashType(hypre_handle) hypre_CudaDataSpgemmHashType(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUmpireDeviceAllocator(hypre_handle) hypre_CudaDataUmpireDeviceAllocator(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleDeviceAllocator(hypre_handle) hypre_CudaDataDeviceAllocator(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUseGpuRand(hypre_handle) hypre_CudaDataUseGpuRand(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm)

View File

@ -88,10 +88,10 @@ extern "C++" {
# Structures and prototypes
#===========================================================================
cat umpire_allocator.h >> $INTERNAL_HEADER
cat cuda_utils.h >> $INTERNAL_HEADER
cat cuda_reducer.h >> $INTERNAL_HEADER
cat cub_allocator.h >> $INTERNAL_HEADER
cat device_allocator.h >> $INTERNAL_HEADER
cat cuda_utils.h >> $INTERNAL_HEADER
cat cuda_reducer.h >> $INTERNAL_HEADER
cat cub_allocator.h >> $INTERNAL_HEADER
#===========================================================================
# Include guards

View File

@ -1272,6 +1272,7 @@ hypre_SetCubMemPoolSize(hypre_uint cub_bin_growth,
hypre_HandleCubMaxBin(hypre_handle()) = cub_max_bin;
hypre_HandleCubMaxCachedBytes(hypre_handle()) = cub_max_cached_bytes;
//TODO XXX RL: cub_min_bin, cub_max_bin are not (re)set
if (hypre_HandleCubDevAllocator(hypre_handle()))
{
hypre_HandleCubDevAllocator(hypre_handle()) -> SetMaxCachedBytes(cub_max_cached_bytes);