fix cub allocator; unify all allocators (#447)

This PR fixes CUB allocator with UVM and also unifies the CUB and UMPIRE allocators.
This commit is contained in:
Ruipeng Li 2021-08-11 13:01:51 -07:00 committed by GitHub
parent 6f0bdbbb11
commit 40dc7ce550
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 44 additions and 109 deletions

View File

@ -1282,7 +1282,7 @@ typedef struct
#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateNsamples(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateNsamples(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateMultFactor(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateMultFactor(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleSpgemmHashType(hypre_handle) hypre_CudaDataSpgemmHashType(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleSpgemmHashType(hypre_handle) hypre_CudaDataSpgemmHashType(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUmpireDeviceAllocator(hypre_handle) hypre_CudaDataUmpireDeviceAllocator(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleDeviceAllocator(hypre_handle) hypre_CudaDataDeviceAllocator(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUseGpuRand(hypre_handle) hypre_CudaDataUseGpuRand(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleUseGpuRand(hypre_handle) hypre_CudaDataUseGpuRand(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm) #define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm)

View File

@ -15,51 +15,38 @@ extern "C++" {
* SPDX-License-Identifier: (Apache-2.0 OR MIT) * SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/ ******************************************************************************/
#ifndef HYPRE_UMPIRE_ALLOCATOR_H #ifndef DEVICE_ALLOCATOR_H
#define HYPRE_UMPIRE_ALLOCATOR_H #define DEVICE_ALLOCATOR_H
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_UMPIRE_DEVICE)
/* /* C++ style memory allocator for GPU **device** memory
#include "umpire/Allocator.hpp" * Just wraps _hypre_TAlloc and _hypre_TFree */
#include "umpire/ResourceManager.hpp" struct hypre_device_allocator
#include "umpire/strategy/DynamicPool.hpp"
#include "umpire/strategy/AllocationAdvisor.hpp"
#include "umpire/strategy/MonotonicAllocationStrategy.hpp"
#include "umpire/util/Macros.hpp"
*/
struct hypre_umpire_device_allocator
{ {
typedef char value_type; typedef char value_type;
hypre_umpire_device_allocator() hypre_device_allocator()
{ {
// constructor // constructor
} }
~hypre_umpire_device_allocator() ~hypre_device_allocator()
{ {
// destructor // destructor
} }
char *allocate(std::ptrdiff_t num_bytes) char *allocate(std::ptrdiff_t num_bytes)
{ {
char *ptr = NULL; return _hypre_TAlloc(char, num_bytes, hypre_MEMORY_DEVICE);
hypre_umpire_device_pooled_allocate((void**) &ptr, num_bytes);
return ptr;
} }
void deallocate(char *ptr, size_t n) void deallocate(char *ptr, size_t n)
{ {
hypre_umpire_device_pooled_free(ptr); _hypre_TFree(ptr, hypre_MEMORY_DEVICE);
} }
}; };
#endif /* #ifdef HYPRE_USING_UMPIRE_DEVICE */
#endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */ #endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#endif #endif
@ -228,9 +215,11 @@ struct hypre_CudaData
hypre_cub_CachingDeviceAllocator *cub_dev_allocator; hypre_cub_CachingDeviceAllocator *cub_dev_allocator;
hypre_cub_CachingDeviceAllocator *cub_uvm_allocator; hypre_cub_CachingDeviceAllocator *cub_uvm_allocator;
#endif #endif
#ifdef HYPRE_USING_UMPIRE_DEVICE
hypre_umpire_device_allocator umpire_device_allocator; #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_device_allocator device_allocator;
#endif #endif
HYPRE_Int cuda_device; HYPRE_Int cuda_device;
/* by default, hypre puts GPU computations in this stream /* by default, hypre puts GPU computations in this stream
* Do not be confused with the default (null) CUDA stream */ * Do not be confused with the default (null) CUDA stream */
@ -272,7 +261,7 @@ struct hypre_CudaData
#define hypre_CudaDataSpgemmRownnzEstimateNsamples(data) ((data) -> spgemm_rownnz_estimate_nsamples) #define hypre_CudaDataSpgemmRownnzEstimateNsamples(data) ((data) -> spgemm_rownnz_estimate_nsamples)
#define hypre_CudaDataSpgemmRownnzEstimateMultFactor(data) ((data) -> spgemm_rownnz_estimate_mult_factor) #define hypre_CudaDataSpgemmRownnzEstimateMultFactor(data) ((data) -> spgemm_rownnz_estimate_mult_factor)
#define hypre_CudaDataSpgemmHashType(data) ((data) -> spgemm_hash_type) #define hypre_CudaDataSpgemmHashType(data) ((data) -> spgemm_hash_type)
#define hypre_CudaDataUmpireDeviceAllocator(data) ((data) -> umpire_device_allocator) #define hypre_CudaDataDeviceAllocator(data) ((data) -> device_allocator)
#define hypre_CudaDataUseGpuRand(data) ((data) -> use_gpu_rand) #define hypre_CudaDataUseGpuRand(data) ((data) -> use_gpu_rand)
hypre_CudaData* hypre_CudaDataCreate(); hypre_CudaData* hypre_CudaDataCreate();
@ -435,35 +424,13 @@ using namespace thrust::placeholders;
/* RL: TODO Want macro HYPRE_THRUST_CALL to return value but I don't know how to do it right /* RL: TODO Want macro HYPRE_THRUST_CALL to return value but I don't know how to do it right
* The following one works OK for now */ * The following one works OK for now */
#ifdef HYPRE_USING_UMPIRE_DEVICE
#if defined(HYPRE_USING_CUDA) #if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \ #define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__); thrust::func_name(thrust::cuda::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP) #elif defined(HYPRE_USING_HIP) \
#define HYPRE_THRUST_CALL(func_name, ...) \ thrust::func_name(thrust::hip::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
thrust::func_name(thrust::hip::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif // HYPRE_USING_CUDA
#elif HYPRE_USING_DEVICE_POOL
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(*(hypre_HandleCubDevAllocator(hypre_handle()))).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif #endif
#else
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::hip::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif // HYPRE_USING_CUDA
#endif // HYPRE_USING_UMPIRE_DEVICE
/* return the number of threads in block */ /* return the number of threads in block */
template <hypre_int dim> template <hypre_int dim>
static __device__ __forceinline__ static __device__ __forceinline__

View File

@ -163,9 +163,11 @@ struct hypre_CudaData
hypre_cub_CachingDeviceAllocator *cub_dev_allocator; hypre_cub_CachingDeviceAllocator *cub_dev_allocator;
hypre_cub_CachingDeviceAllocator *cub_uvm_allocator; hypre_cub_CachingDeviceAllocator *cub_uvm_allocator;
#endif #endif
#ifdef HYPRE_USING_UMPIRE_DEVICE
hypre_umpire_device_allocator umpire_device_allocator; #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_device_allocator device_allocator;
#endif #endif
HYPRE_Int cuda_device; HYPRE_Int cuda_device;
/* by default, hypre puts GPU computations in this stream /* by default, hypre puts GPU computations in this stream
* Do not be confused with the default (null) CUDA stream */ * Do not be confused with the default (null) CUDA stream */
@ -207,7 +209,7 @@ struct hypre_CudaData
#define hypre_CudaDataSpgemmRownnzEstimateNsamples(data) ((data) -> spgemm_rownnz_estimate_nsamples) #define hypre_CudaDataSpgemmRownnzEstimateNsamples(data) ((data) -> spgemm_rownnz_estimate_nsamples)
#define hypre_CudaDataSpgemmRownnzEstimateMultFactor(data) ((data) -> spgemm_rownnz_estimate_mult_factor) #define hypre_CudaDataSpgemmRownnzEstimateMultFactor(data) ((data) -> spgemm_rownnz_estimate_mult_factor)
#define hypre_CudaDataSpgemmHashType(data) ((data) -> spgemm_hash_type) #define hypre_CudaDataSpgemmHashType(data) ((data) -> spgemm_hash_type)
#define hypre_CudaDataUmpireDeviceAllocator(data) ((data) -> umpire_device_allocator) #define hypre_CudaDataDeviceAllocator(data) ((data) -> device_allocator)
#define hypre_CudaDataUseGpuRand(data) ((data) -> use_gpu_rand) #define hypre_CudaDataUseGpuRand(data) ((data) -> use_gpu_rand)
hypre_CudaData* hypre_CudaDataCreate(); hypre_CudaData* hypre_CudaDataCreate();
@ -370,35 +372,13 @@ using namespace thrust::placeholders;
/* RL: TODO Want macro HYPRE_THRUST_CALL to return value but I don't know how to do it right /* RL: TODO Want macro HYPRE_THRUST_CALL to return value but I don't know how to do it right
* The following one works OK for now */ * The following one works OK for now */
#ifdef HYPRE_USING_UMPIRE_DEVICE
#if defined(HYPRE_USING_CUDA) #if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \ #define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__); thrust::func_name(thrust::cuda::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP) #elif defined(HYPRE_USING_HIP) \
#define HYPRE_THRUST_CALL(func_name, ...) \ thrust::func_name(thrust::hip::par(hypre_HandleDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
thrust::func_name(thrust::hip::par(hypre_HandleUmpireDeviceAllocator(hypre_handle())).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif // HYPRE_USING_CUDA
#elif HYPRE_USING_DEVICE_POOL
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par(*(hypre_HandleCubDevAllocator(hypre_handle()))).on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif #endif
#else
#if defined(HYPRE_USING_CUDA)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::cuda::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#elif defined(HYPRE_USING_HIP)
#define HYPRE_THRUST_CALL(func_name, ...) \
thrust::func_name(thrust::hip::par.on(hypre_HandleCudaComputeStream(hypre_handle())), __VA_ARGS__);
#endif // HYPRE_USING_CUDA
#endif // HYPRE_USING_UMPIRE_DEVICE
/* return the number of threads in block */ /* return the number of threads in block */
template <hypre_int dim> template <hypre_int dim>
static __device__ __forceinline__ static __device__ __forceinline__

View File

@ -5,51 +5,38 @@
* SPDX-License-Identifier: (Apache-2.0 OR MIT) * SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/ ******************************************************************************/
#ifndef HYPRE_UMPIRE_ALLOCATOR_H #ifndef DEVICE_ALLOCATOR_H
#define HYPRE_UMPIRE_ALLOCATOR_H #define DEVICE_ALLOCATOR_H
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_UMPIRE_DEVICE)
/* /* C++ style memory allocator for GPU **device** memory
#include "umpire/Allocator.hpp" * Just wraps _hypre_TAlloc and _hypre_TFree */
#include "umpire/ResourceManager.hpp" struct hypre_device_allocator
#include "umpire/strategy/DynamicPool.hpp"
#include "umpire/strategy/AllocationAdvisor.hpp"
#include "umpire/strategy/MonotonicAllocationStrategy.hpp"
#include "umpire/util/Macros.hpp"
*/
struct hypre_umpire_device_allocator
{ {
typedef char value_type; typedef char value_type;
hypre_umpire_device_allocator() hypre_device_allocator()
{ {
// constructor // constructor
} }
~hypre_umpire_device_allocator() ~hypre_device_allocator()
{ {
// destructor // destructor
} }
char *allocate(std::ptrdiff_t num_bytes) char *allocate(std::ptrdiff_t num_bytes)
{ {
char *ptr = NULL; return _hypre_TAlloc(char, num_bytes, hypre_MEMORY_DEVICE);
hypre_umpire_device_pooled_allocate((void**) &ptr, num_bytes);
return ptr;
} }
void deallocate(char *ptr, size_t n) void deallocate(char *ptr, size_t n)
{ {
hypre_umpire_device_pooled_free(ptr); _hypre_TFree(ptr, hypre_MEMORY_DEVICE);
} }
}; };
#endif /* #ifdef HYPRE_USING_UMPIRE_DEVICE */
#endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */ #endif /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#endif #endif

View File

@ -224,7 +224,7 @@ HYPRE_Init()
#ifdef HYPRE_USING_DEVICE_POOL #ifdef HYPRE_USING_DEVICE_POOL
/* Keep this check here at the end of HYPRE_Init() /* Keep this check here at the end of HYPRE_Init()
* Make sure that device pool allocator has not been setup in HYPRE_Init, * Make sure that device pool allocator has not been setup in HYPRE_Init,
* otherwise users are not able to set the parametersB * otherwise users are not able to set all the parameters
*/ */
if ( hypre_HandleCubDevAllocator(_hypre_handle) || if ( hypre_HandleCubDevAllocator(_hypre_handle) ||
hypre_HandleCubUvmAllocator(_hypre_handle) ) hypre_HandleCubUvmAllocator(_hypre_handle) )

View File

@ -76,7 +76,7 @@ typedef struct
#define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateNsamples(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateNsamples(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateMultFactor(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle) hypre_CudaDataSpgemmRownnzEstimateMultFactor(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleSpgemmHashType(hypre_handle) hypre_CudaDataSpgemmHashType(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleSpgemmHashType(hypre_handle) hypre_CudaDataSpgemmHashType(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUmpireDeviceAllocator(hypre_handle) hypre_CudaDataUmpireDeviceAllocator(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleDeviceAllocator(hypre_handle) hypre_CudaDataDeviceAllocator(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUseGpuRand(hypre_handle) hypre_CudaDataUseGpuRand(hypre_HandleCudaData(hypre_handle)) #define hypre_HandleUseGpuRand(hypre_handle) hypre_CudaDataUseGpuRand(hypre_HandleCudaData(hypre_handle))
#define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm) #define hypre_HandleUmpireResourceMan(hypre_handle) ((hypre_handle) -> umpire_rm)

View File

@ -88,10 +88,10 @@ extern "C++" {
# Structures and prototypes # Structures and prototypes
#=========================================================================== #===========================================================================
cat umpire_allocator.h >> $INTERNAL_HEADER cat device_allocator.h >> $INTERNAL_HEADER
cat cuda_utils.h >> $INTERNAL_HEADER cat cuda_utils.h >> $INTERNAL_HEADER
cat cuda_reducer.h >> $INTERNAL_HEADER cat cuda_reducer.h >> $INTERNAL_HEADER
cat cub_allocator.h >> $INTERNAL_HEADER cat cub_allocator.h >> $INTERNAL_HEADER
#=========================================================================== #===========================================================================
# Include guards # Include guards

View File

@ -1272,6 +1272,7 @@ hypre_SetCubMemPoolSize(hypre_uint cub_bin_growth,
hypre_HandleCubMaxBin(hypre_handle()) = cub_max_bin; hypre_HandleCubMaxBin(hypre_handle()) = cub_max_bin;
hypre_HandleCubMaxCachedBytes(hypre_handle()) = cub_max_cached_bytes; hypre_HandleCubMaxCachedBytes(hypre_handle()) = cub_max_cached_bytes;
//TODO XXX RL: cub_min_bin, cub_max_bin are not (re)set
if (hypre_HandleCubDevAllocator(hypre_handle())) if (hypre_HandleCubDevAllocator(hypre_handle()))
{ {
hypre_HandleCubDevAllocator(hypre_handle()) -> SetMaxCachedBytes(cub_max_cached_bytes); hypre_HandleCubDevAllocator(hypre_handle()) -> SetMaxCachedBytes(cub_max_cached_bytes);