code style

This commit is contained in:
Ruipeng Li 2022-06-08 10:53:39 -07:00
parent a436b670db
commit 0eab14592b

View File

@ -19,9 +19,13 @@
*- - - - - - - - - - - - - - - - - - - - - - - - - - */ *- - - - - - - - - - - - - - - - - - - - - - - - - - */
template <char type> template <char type>
static __device__ __forceinline__ static __device__ __forceinline__
void rownnz_naive_rowi(HYPRE_Int rowi, HYPRE_Int lane_id, HYPRE_Int *ia, HYPRE_Int *ja, void rownnz_naive_rowi(HYPRE_Int rowi,
HYPRE_Int lane_id,
HYPRE_Int *ia,
HYPRE_Int *ja,
HYPRE_Int *ib, HYPRE_Int *ib,
HYPRE_Int &row_nnz_sum, HYPRE_Int &row_nnz_max) HYPRE_Int &row_nnz_sum,
HYPRE_Int &row_nnz_max)
{ {
/* load the start and end position of row i of A */ /* load the start and end position of row i of A */
HYPRE_Int j = -1; HYPRE_Int j = -1;
@ -57,8 +61,14 @@ void rownnz_naive_rowi(HYPRE_Int rowi, HYPRE_Int lane_id, HYPRE_Int *ia, HYPRE_I
template <char type, HYPRE_Int NUM_WARPS_PER_BLOCK> template <char type, HYPRE_Int NUM_WARPS_PER_BLOCK>
__global__ __global__
void csr_spmm_rownnz_naive(HYPRE_Int M, /*HYPRE_Int K,*/ HYPRE_Int N, HYPRE_Int *ia, HYPRE_Int *ja, void csr_spmm_rownnz_naive(HYPRE_Int M,
HYPRE_Int *ib, HYPRE_Int *jb, HYPRE_Int *rcL, HYPRE_Int *rcU) HYPRE_Int N,
HYPRE_Int *ia,
HYPRE_Int *ja,
HYPRE_Int *ib,
HYPRE_Int *jb,
HYPRE_Int *rcL,
HYPRE_Int *rcU)
{ {
const HYPRE_Int num_warps = NUM_WARPS_PER_BLOCK * gridDim.x; const HYPRE_Int num_warps = NUM_WARPS_PER_BLOCK * gridDim.x;
/* warp id inside the block */ /* warp id inside the block */
@ -106,7 +116,8 @@ void csr_spmm_rownnz_naive(HYPRE_Int M, /*HYPRE_Int K,*/ HYPRE_Int N, HYPRE_Int
COHEN COHEN
*- - - - - - - - - - - - - - - - - - - - - - - - - - */ *- - - - - - - - - - - - - - - - - - - - - - - - - - */
__global__ __global__
void expdistfromuniform(HYPRE_Int n, float *x) void expdistfromuniform(HYPRE_Int n,
float *x)
{ {
const HYPRE_Int global_thread_id = blockIdx.x * get_block_size() + get_thread_id(); const HYPRE_Int global_thread_id = blockIdx.x * get_block_size() + get_thread_id();
const HYPRE_Int total_num_threads = gridDim.x * get_block_size(); const HYPRE_Int total_num_threads = gridDim.x * get_block_size();
@ -122,8 +133,16 @@ void expdistfromuniform(HYPRE_Int n, float *x)
/* T = float: single precision should be enough */ /* T = float: single precision should be enough */
template <typename T, HYPRE_Int NUM_WARPS_PER_BLOCK, HYPRE_Int SHMEM_SIZE_PER_WARP, HYPRE_Int layer> template <typename T, HYPRE_Int NUM_WARPS_PER_BLOCK, HYPRE_Int SHMEM_SIZE_PER_WARP, HYPRE_Int layer>
__global__ __global__
void cohen_rowest_kernel(HYPRE_Int nrow, HYPRE_Int *rowptr, HYPRE_Int *colidx, T *V_in, T *V_out, void cohen_rowest_kernel(HYPRE_Int nrow,
HYPRE_Int *rc, HYPRE_Int nsamples, HYPRE_Int *low, HYPRE_Int *upp, T mult) HYPRE_Int *rowptr,
HYPRE_Int *colidx,
T *V_in,
T *V_out,
HYPRE_Int *rc,
HYPRE_Int nsamples,
HYPRE_Int *low,
HYPRE_Int *upp,
T mult)
{ {
const HYPRE_Int num_warps = NUM_WARPS_PER_BLOCK * gridDim.x; const HYPRE_Int num_warps = NUM_WARPS_PER_BLOCK * gridDim.x;
/* warp id inside the block */ /* warp id inside the block */
@ -268,9 +287,19 @@ void cohen_rowest_kernel(HYPRE_Int nrow, HYPRE_Int *rowptr, HYPRE_Int *colidx, T
} }
template <typename T, HYPRE_Int BDIMX, HYPRE_Int BDIMY, HYPRE_Int NUM_WARPS_PER_BLOCK, HYPRE_Int SHMEM_SIZE_PER_WARP> template <typename T, HYPRE_Int BDIMX, HYPRE_Int BDIMY, HYPRE_Int NUM_WARPS_PER_BLOCK, HYPRE_Int SHMEM_SIZE_PER_WARP>
void csr_spmm_rownnz_cohen(HYPRE_Int M, HYPRE_Int K, HYPRE_Int N, HYPRE_Int *d_ia, HYPRE_Int *d_ja, void csr_spmm_rownnz_cohen(HYPRE_Int M,
HYPRE_Int *d_ib, HYPRE_Int *d_jb, HYPRE_Int *d_low, HYPRE_Int *d_upp, HYPRE_Int *d_rc, HYPRE_Int K,
HYPRE_Int nsamples, T mult_factor, T *work) HYPRE_Int N,
HYPRE_Int *d_ia,
HYPRE_Int *d_ja,
HYPRE_Int *d_ib,
HYPRE_Int *d_jb,
HYPRE_Int *d_low,
HYPRE_Int *d_upp,
HYPRE_Int *d_rc,
HYPRE_Int nsamples,
T mult_factor,
T *work)
{ {
dim3 bDim(BDIMX, BDIMY, NUM_WARPS_PER_BLOCK); dim3 bDim(BDIMX, BDIMY, NUM_WARPS_PER_BLOCK);
hypre_assert(bDim.x * bDim.y == HYPRE_WARP_SIZE); hypre_assert(bDim.x * bDim.y == HYPRE_WARP_SIZE);
@ -355,8 +384,8 @@ hypreDevice_CSRSpGemmRownnzEstimate( HYPRE_Int m,
// for cases where one WARP works on a row // for cases where one WARP works on a row
dim3 gDim( (m + bDim.z - 1) / bDim.z ); dim3 gDim( (m + bDim.z - 1) / bDim.z );
size_t cohen_nsamples = hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle()); size_t cohen_nsamples = hypre_HandleSpgemmRownnzEstimateNsamples(hypre_handle());
float cohen_mult = hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle()); float cohen_mult = hypre_HandleSpgemmRownnzEstimateMultFactor(hypre_handle());
//hypre_printf("Cohen Nsamples %d, mult %f\n", cohen_nsamples, cohen_mult); //hypre_printf("Cohen Nsamples %d, mult %f\n", cohen_nsamples, cohen_mult);
@ -377,7 +406,7 @@ hypreDevice_CSRSpGemmRownnzEstimate( HYPRE_Int m,
/* [optional] first run naive estimate for naive lower and upper bounds, /* [optional] first run naive estimate for naive lower and upper bounds,
which will be given to Cohen's alg as corrections */ which will be given to Cohen's alg as corrections */
char *work_mem = hypre_TAlloc(char, char *work_mem = hypre_TAlloc(char,
cohen_nsamples * (n + k) * sizeof(float) +2 * m * sizeof(HYPRE_Int), cohen_nsamples * (n + k) * sizeof(float) + 2 * m * sizeof(HYPRE_Int),
HYPRE_MEMORY_DEVICE); HYPRE_MEMORY_DEVICE);
char *work_mem_saved = work_mem; char *work_mem_saved = work_mem;