This PR (by @pbauman #428) adds support for PRNG on AMD GPUs via rocRAND. 

Co-authored-by: Paul T. Bauman <ptbauman@gmail.com>
This commit is contained in:
Ruipeng Li 2021-07-23 09:22:00 -07:00 committed by GitHub
parent 5cf30f95b9
commit a522bfdb91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 93 additions and 13 deletions

View File

@ -196,8 +196,8 @@ hypre_BoomerAMGIndepSetDevice( hypre_ParCSRMatrix *S,
}
/* Augments measures by some random value between 0 and 1
* aug_rand: 1: GPU CURAND; 11: GPU SEQ CURAND
* 2: CPU RAND; 12: CPU SEQ RAND
* aug_rand: 1: GPU CURAND/ROCRAND; 11: GPU SEQ CURAND/ROCRAND
* 2: CPU RAND; 12: CPU SEQ RAND
*/
HYPRE_Int
hypre_BoomerAMGIndepSetInitDevice( hypre_ParCSRMatrix *S,
@ -212,12 +212,6 @@ hypre_BoomerAMGIndepSetInitDevice( hypre_ParCSRMatrix *S,
hypre_MPI_Comm_rank(comm, &my_id);
// RL: TODO
#if defined(HYPRE_USING_ROCRAND)
if (aug_rand == 1) { aug_rand = 2; }
if (aug_rand == 11) { aug_rand = 12; }
#endif
urand = hypre_TAlloc(HYPRE_Real, num_rows_diag, HYPRE_MEMORY_DEVICE);
if (aug_rand == 2 || aug_rand == 12)

View File

@ -110,6 +110,10 @@ struct hypre_umpire_device_allocator
#include <rocsparse.h>
#endif
#if defined(HYPRE_USING_ROCRAND)
#include <rocrand.h>
#endif
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
#define HYPRE_CUDA_CALL(call) do { \
@ -163,6 +167,13 @@ struct hypre_umpire_device_allocator
hypre_assert(0); exit(1); \
} } while(0)
#define HYPRE_ROCRAND_CALL(call) do { \
rocrand_status err = call; \
if (ROCRAND_STATUS_SUCCESS != err) { \
hypre_printf("ROCRAND ERROR (code = %d) at %s:%d\n", err, __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
struct hypre_cub_CachingDeviceAllocator;
typedef struct hypre_cub_CachingDeviceAllocator hypre_cub_CachingDeviceAllocator;
@ -187,6 +198,10 @@ struct hypre_CudaData
curandGenerator_t curand_generator;
#endif
#if defined(HYPRE_USING_ROCRAND)
rocrand_generator curand_generator;
#endif
#if defined(HYPRE_USING_CUBLAS)
cublasHandle_t cublas_handle;
#endif
@ -267,6 +282,10 @@ void hypre_CudaDataDestroy(hypre_CudaData* data);
curandGenerator_t hypre_CudaDataCurandGenerator(hypre_CudaData *data);
#endif
#if defined(HYPRE_USING_ROCRAND)
rocrand_generator hypre_CudaDataCurandGenerator(hypre_CudaData *data);
#endif
#if defined(HYPRE_USING_CUBLAS)
cublasHandle_t hypre_CudaDataCublasHandle(hypre_CudaData *data);
#endif

View File

@ -990,8 +990,25 @@ hypre_CurandUniform_core( HYPRE_Int n,
}
#endif /* #if defined(HYPRE_USING_CURAND) */
// RL: TODO
#if defined(HYPRE_USING_ROCRAND)
rocrand_generator
hypre_CudaDataCurandGenerator(hypre_CudaData *data)
{
if (data->curand_generator)
{
return data->curand_generator;
}
rocrand_generator gen;
HYPRE_ROCRAND_CALL( rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_DEFAULT) );
HYPRE_ROCRAND_CALL( rocrand_set_seed(gen, 1234ULL) );
HYPRE_ROCRAND_CALL( rocrand_set_stream(gen, hypre_CudaDataCudaComputeStream(data)) );
data->curand_generator = gen;
return gen;
}
template <typename T>
HYPRE_Int
hypre_CurandUniform_core( HYPRE_Int n,
@ -1001,8 +1018,32 @@ hypre_CurandUniform_core( HYPRE_Int n,
HYPRE_Int set_offset,
hypre_ulonglongint offset)
{
hypre_error_w_msg(1, "ROCRand has not been available");
exit(0);
hypre_GpuProfilingPushRange("hypre_CurandUniform_core");
rocrand_generator gen = hypre_HandleCurandGenerator(hypre_handle());
if (set_seed)
{
HYPRE_ROCRAND_CALL( rocrand_set_seed(gen, seed) );
}
if (set_offset)
{
HYPRE_ROCRAND_CALL( rocrand_set_offset(gen, offset) );
}
if (sizeof(T) == sizeof(hypre_double))
{
HYPRE_ROCRAND_CALL( rocrand_generate_uniform_double(gen, (hypre_double *) urand, n) );
}
else if (sizeof(T) == sizeof(float))
{
HYPRE_ROCRAND_CALL( rocrand_generate_uniform(gen, (float *) urand, n) );
}
hypre_GpuProfilingPopRange();
return hypre_error_flag;
}
#endif /* #if defined(HYPRE_USING_ROCRAND) */
@ -1117,7 +1158,7 @@ hypre_CudaDataCreate()
hypre_CudaDataSpgemmHashType(data) = 'L';
/* pmis */
#ifdef HYPRE_USING_CURAND
#if defined(HYPRE_USING_CURAND) || defined(HYPRE_USING_ROCRAND)
hypre_CudaDataUseGpuRand(data) = 1;
#else
hypre_CudaDataUseGpuRand(data) = 0;
@ -1155,6 +1196,13 @@ hypre_CudaDataDestroy(hypre_CudaData *data)
}
#endif
#if defined(HYPRE_USING_ROCRAND)
if (data->curand_generator)
{
HYPRE_ROCRAND_CALL( rocrand_destroy_generator(data->curand_generator) );
}
#endif
#if defined(HYPRE_USING_CUBLAS)
if (data->cublas_handle)
{

View File

@ -45,6 +45,10 @@
#include <rocsparse.h>
#endif
#if defined(HYPRE_USING_ROCRAND)
#include <rocrand.h>
#endif
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_DEVICE_OPENMP)
#define HYPRE_CUDA_CALL(call) do { \
@ -98,6 +102,13 @@
hypre_assert(0); exit(1); \
} } while(0)
#define HYPRE_ROCRAND_CALL(call) do { \
rocrand_status err = call; \
if (ROCRAND_STATUS_SUCCESS != err) { \
hypre_printf("ROCRAND ERROR (code = %d) at %s:%d\n", err, __FILE__, __LINE__); \
hypre_assert(0); exit(1); \
} } while(0)
struct hypre_cub_CachingDeviceAllocator;
typedef struct hypre_cub_CachingDeviceAllocator hypre_cub_CachingDeviceAllocator;
@ -122,6 +133,10 @@ struct hypre_CudaData
curandGenerator_t curand_generator;
#endif
#if defined(HYPRE_USING_ROCRAND)
rocrand_generator curand_generator;
#endif
#if defined(HYPRE_USING_CUBLAS)
cublasHandle_t cublas_handle;
#endif
@ -202,6 +217,10 @@ void hypre_CudaDataDestroy(hypre_CudaData* data);
curandGenerator_t hypre_CudaDataCurandGenerator(hypre_CudaData *data);
#endif
#if defined(HYPRE_USING_ROCRAND)
rocrand_generator hypre_CudaDataCurandGenerator(hypre_CudaData *data);
#endif
#if defined(HYPRE_USING_CUBLAS)
cublasHandle_t hypre_CudaDataCublasHandle(hypre_CudaData *data);
#endif

View File

@ -208,7 +208,7 @@ HYPRE_Init()
hypre_HandleCusparseHandle(_hypre_handle);
#endif
#if defined(HYPRE_USING_CURAND)
#if defined(HYPRE_USING_CURAND) || defined(HYPRE_USING_ROCRAND)
hypre_HandleCurandGenerator(_hypre_handle);
#endif