tune for AMD GPUs
This commit is contained in:
		
							parent
							
								
									ee88d0b1a4
								
							
						
					
					
						commit
						a5b10499cb
					
				| @ -17,7 +17,12 @@ | ||||
| static const char HYPRE_SPGEMM_HASH_TYPE = 'D'; | ||||
| 
 | ||||
| /* default settings associated with bin 5 */ | ||||
| #if defined(HYPRE_USING_CUDA) | ||||
| #define HYPRE_SPGEMM_NUMER_HASH_SIZE 256 | ||||
| #endif | ||||
| #if defined(HYPRE_USING_HIP) | ||||
| #define HYPRE_SPGEMM_NUMER_HASH_SIZE 128 | ||||
| #endif | ||||
| #define HYPRE_SPGEMM_SYMBL_HASH_SIZE 512 | ||||
| #define HYPRE_SPGEMM_BASE_GROUP_SIZE 32 | ||||
| /* unroll factor in the kernels */ | ||||
|  | ||||
| @ -31,9 +31,16 @@ hypreDevice_CSRSpGemmNumerWithRownnzUpperboundNoBin( HYPRE_Int       m, | ||||
|                                                      HYPRE_Complex **d_c_out, | ||||
|                                                      HYPRE_Int      *nnzC_out ) | ||||
| { | ||||
| #if defined(HYPRE_USING_CUDA) | ||||
|    const HYPRE_Int SHMEM_HASH_SIZE = HYPRE_SPGEMM_NUMER_HASH_SIZE; | ||||
|    const HYPRE_Int GROUP_SIZE = HYPRE_SPGEMM_BASE_GROUP_SIZE; | ||||
|    const HYPRE_Int BIN = 5; | ||||
| #endif | ||||
| #if defined(HYPRE_USING_HIP) | ||||
|    const HYPRE_Int SHMEM_HASH_SIZE = 2 * HYPRE_SPGEMM_NUMER_HASH_SIZE; | ||||
|    const HYPRE_Int GROUP_SIZE = 2 * HYPRE_SPGEMM_BASE_GROUP_SIZE; | ||||
|    const HYPRE_Int BIN = 6; | ||||
| #endif | ||||
| 
 | ||||
| #ifdef HYPRE_SPGEMM_PRINTF | ||||
|    HYPRE_Int max_rc = HYPRE_THRUST_CALL(reduce, d_rc, d_rc + m, 0,      thrust::maximum<HYPRE_Int>()); | ||||
| @ -122,7 +129,12 @@ hypreDevice_CSRSpGemmNumerWithRownnzUpperboundBinned( HYPRE_Int       m, | ||||
|    HYPRE_Int  h_bin_ptr[HYPRE_SPGEMM_MAX_NBIN + 1]; | ||||
|    //HYPRE_Int  num_bins = hypre_HandleSpgemmNumBin(hypre_handle());
 | ||||
|    HYPRE_Int high_bin = hypre_HandleSpgemmHighestBin(hypre_handle())[1]; | ||||
| #if defined(HYPRE_USING_CUDA) | ||||
|    const char s = 8, t = 2, u = high_bin; | ||||
| #endif | ||||
| #if defined(HYPRE_USING_HIP) | ||||
|    const char s = 4, t = 2, u = high_bin; | ||||
| #endif | ||||
| 
 | ||||
|    hypre_SpGemmCreateBins(m, s, t, u, d_rc, false, d_rind, h_bin_ptr); | ||||
| 
 | ||||
|  | ||||
| @ -454,7 +454,7 @@ hypre_spgemm_numerical_with_rownnz( HYPRE_Int      m, | ||||
| #if defined(HYPRE_USING_CUDA) | ||||
|    const HYPRE_Int BDIMX                = hypre_min(4, GROUP_SIZE); | ||||
| #elif defined(HYPRE_USING_HIP) | ||||
|    const HYPRE_Int BDIMX                = hypre_min(2, GROUP_SIZE); | ||||
|    const HYPRE_Int BDIMX                = hypre_min(4, GROUP_SIZE); | ||||
| #endif | ||||
|    const HYPRE_Int BDIMY                = GROUP_SIZE / BDIMX; | ||||
| 
 | ||||
|  | ||||
| @ -22,6 +22,7 @@ hypre_spgemm_numerical_with_rownnz | ||||
|   HYPRE_Complex *d_b, | ||||
|   HYPRE_Int *d_rc, HYPRE_Int *d_ic, HYPRE_Int *d_jc, HYPRE_Complex *d_c ); | ||||
| 
 | ||||
| #if defined(HYPRE_USING_CUDA) | ||||
| template HYPRE_Int | ||||
| hypre_spgemm_numerical_with_rownnz | ||||
| < 5, HYPRE_SPGEMM_NUMER_HASH_SIZE, HYPRE_SPGEMM_BASE_GROUP_SIZE, false > | ||||
| @ -30,6 +31,7 @@ hypre_spgemm_numerical_with_rownnz | ||||
|   HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex *d_a, HYPRE_Int *d_ib, HYPRE_Int *d_jb, | ||||
|   HYPRE_Complex *d_b, | ||||
|   HYPRE_Int *d_rc, HYPRE_Int *d_ic, HYPRE_Int *d_jc, HYPRE_Complex *d_c ); | ||||
| #endif | ||||
| 
 | ||||
| template HYPRE_Int | ||||
| hypre_spgemm_numerical_max_num_blocks | ||||
|  | ||||
| @ -21,6 +21,17 @@ hypre_spgemm_numerical_with_rownnz | ||||
|   HYPRE_Complex *d_b, | ||||
|   HYPRE_Int *d_rc, HYPRE_Int *d_ic, HYPRE_Int *d_jc, HYPRE_Complex *d_c ); | ||||
| 
 | ||||
| #if defined(HYPRE_USING_HIP) | ||||
| template HYPRE_Int | ||||
| hypre_spgemm_numerical_with_rownnz | ||||
| < 6, HYPRE_SPGEMM_NUMER_HASH_SIZE * 2, HYPRE_SPGEMM_BASE_GROUP_SIZE * 2, false > | ||||
| ( HYPRE_Int m, HYPRE_Int *row_ind, HYPRE_Int k, HYPRE_Int n, bool need_ghash, | ||||
|   HYPRE_Int exact_rownnz, | ||||
|   HYPRE_Int *d_ia, HYPRE_Int *d_ja, HYPRE_Complex *d_a, HYPRE_Int *d_ib, HYPRE_Int *d_jb, | ||||
|   HYPRE_Complex *d_b, | ||||
|   HYPRE_Int *d_rc, HYPRE_Int *d_ic, HYPRE_Int *d_jc, HYPRE_Complex *d_c ); | ||||
| #endif | ||||
| 
 | ||||
| template HYPRE_Int | ||||
| hypre_spgemm_numerical_max_num_blocks | ||||
| < HYPRE_SPGEMM_NUMER_HASH_SIZE * 2, HYPRE_SPGEMM_BASE_GROUP_SIZE * 2 > | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Ruipeng Li
						Ruipeng Li