This commit is contained in:
Ruipeng Li 2022-03-31 17:48:19 -07:00
parent 6086b07a96
commit 1c35a59c1d
16 changed files with 74 additions and 46 deletions

View File

@ -1640,11 +1640,13 @@ HYPRE_ILUSetupCusparseCSRILU0SetupSolve(hypre_CSRMatrix *A, cusparseMatDescr_t m
HYPRE_CUSPARSE_CALL(cusparseCreateCsrsv2Info(&(matU_info)));
/* 2. Get working array size */
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnz_A,
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n,
nnz_A,
matL_des, A_data, A_i, A_j,
matL_info, &matL_buffersize));
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnz_A,
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n,
nnz_A,
matU_des, A_data, A_i, A_j,
matU_info, &matU_buffersize));
@ -2720,7 +2722,8 @@ hypre_ParILURAPBuildRP(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *BLUm, hypre_Pa
}
/* check buffer size and create buffer */
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsm2_bufferSizeExt(handle, algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsm2_bufferSizeExt(handle, algo,
CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_NON_TRANSPOSE,
n, m, nnz_BLUm, &alpha, matL_des, BLUm_diag_data, BLUm_diag_i,
BLUm_diag_j, rhs, n, malL_info, policy, &buffer_size));
@ -2749,7 +2752,8 @@ hypre_ParILURAPBuildRP(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *BLUm, hypre_Pa
/* check buffer size and create buffer */
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsm2_bufferSizeExt(handle, algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsm2_bufferSizeExt(handle, algo,
CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_NON_TRANSPOSE,
n, m, nnz_BLUm, &alpha, matU_des, BLUm_diag_data, BLUm_diag_i,
BLUm_diag_j, rhs, n, malU_info, policy, &buffer_size));

View File

@ -1430,7 +1430,7 @@ hypre_ILUSolveRAPGMRES(hypre_ParCSRMatrix *A, hypre_ParVector *f,
/* solve L^{-1} */
if (nLU > 0)
{
/* L solve */
/* L solve */
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
nLU, BLU_nnz, &one, matL_des,
BLU_data, BLU_i, BLU_j, matBL_info,

View File

@ -1085,7 +1085,12 @@ HYPRE_Int hypre_ParCSRMatrixMatvec_FF ( HYPRE_Complex alpha, hypre_ParCSRMatrix
hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt );
/* par_csr_triplemat.c */
HYPRE_Int hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext, hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr );
HYPRE_Int hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg_A,
HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B,
HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B,
HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext,
hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr,
HYPRE_BigInt **col_map_offd_C_ptr );
hypre_ParCSRMatrix *hypre_ParCSRMatMat( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
hypre_ParCSRMatrix *hypre_ParCSRMatMatHost( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
hypre_ParCSRMatrix *hypre_ParCSRMatMatDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );

View File

@ -1045,7 +1045,7 @@ hypre_ParCSRMatrixMatvecT_unpack( HYPRE_Complex *locl_data,
hypre_ParCSRCommPkgSendMapRowInd(comm_pkg), 0);
#endif
return hypre_error_flag;
return hypre_error_flag;
}
#endif

View File

@ -125,7 +125,8 @@ hypre_ParCSRMatMatDevice( hypre_ParCSRMatrix *A,
hypre_ForceSyncComputeStream(hypre_handle());
t2 = hypre_MPI_Wtime() - t1 - t2;
hypre_ParPrintf(comm, "Time Bext %f\n", t2);
hypre_ParPrintf(comm, "Size Bext %d %d %d\n", hypre_CSRMatrixNumRows(Bext), hypre_CSRMatrixNumCols(Bext), hypre_CSRMatrixNumNonzeros(Bext));
hypre_ParPrintf(comm, "Size Bext %d %d %d\n", hypre_CSRMatrixNumRows(Bext),
hypre_CSRMatrixNumCols(Bext), hypre_CSRMatrixNumNonzeros(Bext));
#endif
#if PARCSRGEMM_TIMING > 1
@ -502,7 +503,8 @@ hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A,
hypre_ForceSyncComputeStream(hypre_handle());
t2 = hypre_MPI_Wtime() - t1;
hypre_ParPrintf(comm, "Time Cext %f\n", t2);
hypre_ParPrintf(comm, "Size Cext %d %d %d\n", hypre_CSRMatrixNumRows(Cext), hypre_CSRMatrixNumCols(Cext), hypre_CSRMatrixNumNonzeros(Cext));
hypre_ParPrintf(comm, "Size Cext %d %d %d\n", hypre_CSRMatrixNumRows(Cext),
hypre_CSRMatrixNumCols(Cext), hypre_CSRMatrixNumNonzeros(Cext));
#endif
/* add Cext to local part of Cbar */
@ -893,7 +895,7 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg,
tmp_j + local_nnz_Cbar + Cext_diag_nnz,
tmp_j + tmp_s,
tmp_j + local_nnz_Cbar + Cext_diag_nnz,
[const_val = num_cols] (const auto & x) {return x + const_val;} );
[const_val = num_cols] (const auto & x) {return x + const_val;} );
}
#else
HYPRE_THRUST_CALL( gather,
@ -951,7 +953,8 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg,
HYPRE_Int *zmp_j = hypre_TAlloc(HYPRE_Int, tmp_s, HYPRE_MEMORY_DEVICE);
HYPRE_Complex *zmp_a = hypre_TAlloc(HYPRE_Complex, tmp_s, HYPRE_MEMORY_DEVICE);
HYPRE_Int local_nnz_C = hypreDevice_ReduceByTupleKey(tmp_s, tmp_i, tmp_j, tmp_a, zmp_i, zmp_j, zmp_a);
HYPRE_Int local_nnz_C = hypreDevice_ReduceByTupleKey(tmp_s, tmp_i, tmp_j, tmp_a, zmp_i, zmp_j,
zmp_a);
hypre_TFree(tmp_i, HYPRE_MEMORY_DEVICE);
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
@ -1045,7 +1048,7 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg,
C_offd_j,
C_offd_j + nnz_C_offd,
C_offd_j,
[const_val = num_cols] (const auto & x) {return x - const_val;} );
[const_val = num_cols] (const auto & x) {return x - const_val;} );
}
#else
HYPRE_THRUST_CALL( transform,

View File

@ -452,7 +452,12 @@ HYPRE_Int hypre_ParCSRMatrixMatvec_FF ( HYPRE_Complex alpha, hypre_ParCSRMatrix
hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt );
/* par_csr_triplemat.c */
HYPRE_Int hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext, hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr );
HYPRE_Int hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg_A,
HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B,
HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B,
HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext,
hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr,
HYPRE_BigInt **col_map_offd_C_ptr );
hypre_ParCSRMatrix *hypre_ParCSRMatMat( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
hypre_ParCSRMatrix *hypre_ParCSRMatMatHost( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
hypre_ParCSRMatrix *hypre_ParCSRMatMatDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );

View File

@ -78,14 +78,14 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
void *rs_buffer;
HYPRE_ROCSPARSE_CALL( hypre_rocsparse_csrgemm_buffer_size(handle,
transA, transB,
m, n, k,
&alpha, // \alpha = 1
descrA, nnzA, d_ia, d_ja_sorted,
descrB, nnzB, d_ib, d_jb_sorted,
NULL, // \beta = 0
NULL, 0, NULL, NULL, // D is nothing
infoC, &rs_buffer_size) );
transA, transB,
m, n, k,
&alpha, // \alpha = 1
descrA, nnzA, d_ia, d_ja_sorted,
descrB, nnzB, d_ib, d_jb_sorted,
NULL, // \beta = 0
NULL, 0, NULL, NULL, // D is nothing
infoC, &rs_buffer_size) );
rs_buffer = hypre_TAlloc(char, rs_buffer_size, HYPRE_MEMORY_DEVICE);

View File

@ -186,7 +186,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
}
else if (rownnz >= 32)
{
@ -194,7 +194,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
}
else if (rownnz >= 16)
{
@ -202,7 +202,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
}
else if (rownnz >= 8)
{
@ -210,7 +210,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
}
else
{
@ -218,7 +218,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
}
return hypre_error_flag;

View File

@ -320,7 +320,8 @@ HYPRE_Int hypre_SeqVectorElmdivpy( hypre_Vector *x, hypre_Vector *b, hypre_Vecto
HYPRE_Int hypre_SeqVectorElmdivpyMarked( hypre_Vector *x, hypre_Vector *b, hypre_Vector *y,
HYPRE_Int *marker, HYPRE_Int marker_val );
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x,
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A,
hypre_Vector *x,
HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int *y_ind, HYPRE_Int fill );
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)

View File

@ -592,7 +592,8 @@ HYPRE_Int hypre_SeqVectorElmdivpy( hypre_Vector *x, hypre_Vector *b, hypre_Vecto
HYPRE_Int hypre_SeqVectorElmdivpyMarked( hypre_Vector *x, hypre_Vector *b, hypre_Vector *y,
HYPRE_Int *marker, HYPRE_Int marker_val );
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x,
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A,
hypre_Vector *x,
HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int *y_ind, HYPRE_Int fill );
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)

View File

@ -478,7 +478,8 @@ hypre_SeqVectorScale( HYPRE_Complex alpha,
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUBLAS)
HYPRE_CUBLAS_CALL( hypre_cublas_scal(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, y_data, 1) );
HYPRE_CUBLAS_CALL( hypre_cublas_scal(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, y_data,
1) );
#else
hypreDevice_ComplexScalen( y_data, size, y_data, alpha );
#endif // #if defined(HYPRE_USING_CUBLAS)
@ -553,7 +554,8 @@ hypre_SeqVectorAxpy( HYPRE_Complex alpha,
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUBLAS)
HYPRE_CUBLAS_CALL( hypre_cublas_axpy(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, x_data, 1,
HYPRE_CUBLAS_CALL( hypre_cublas_axpy(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, x_data,
1,
y_data, 1) );
#else
hypreDevice_ComplexAxpyn(x_data, size, y_data, y_data, alpha);
@ -735,7 +737,8 @@ hypre_SeqVectorInnerProd( hypre_Vector *x,
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
#if defined(HYPRE_USING_CUBLAS)
HYPRE_CUBLAS_CALL( hypre_cublas_dot(hypre_HandleCublasHandle(hypre_handle()), size, x_data, 1, y_data, 1,
HYPRE_CUBLAS_CALL( hypre_cublas_dot(hypre_HandleCublasHandle(hypre_handle()), size, x_data, 1,
y_data, 1,
&result) );
#else
result = HYPRE_THRUST_CALL( inner_product, x_data, x_data + size, y_data, 0.0 );

View File

@ -332,7 +332,7 @@ void runjob3( HYPRE_ParCSRMatrix parcsr_A,
{
hypre_printf("--- rep %d (out of %d) ---\n", i, rep);
}
if (i == rep -1)
if (i == rep - 1)
{
time_index = hypre_InitializeTiming("Device Parcsr Matrix-by-Matrix, RAP2");
hypre_BeginTiming(time_index);
@ -389,7 +389,8 @@ void runjob3( HYPRE_ParCSRMatrix parcsr_A,
if (myid == 0)
{
hypre_printf("AH: %d x %d, nnz %d, CPU-GPU err %e\n", hypre_ParCSRMatrixGlobalNumRows(parcsr_AH_host_2),
hypre_printf("AH: %d x %d, nnz %d, CPU-GPU err %e\n",
hypre_ParCSRMatrixGlobalNumRows(parcsr_AH_host_2),
hypre_ParCSRMatrixGlobalNumCols(parcsr_AH_host_2),
hypre_ParCSRMatrixNumNonzeros(parcsr_AH_host_2),
rfnorm);
@ -773,7 +774,7 @@ main( hypre_int argc,
{
HYPRE_Int ierr1 = HYPRE_IJMatrixRead( argv[build_matrix_arg_index], comm,
HYPRE_PARCSR, &ij_A );
HYPRE_Int ierr2 = HYPRE_IJMatrixRead( argv[build_matrix_arg_index+1], comm,
HYPRE_Int ierr2 = HYPRE_IJMatrixRead( argv[build_matrix_arg_index + 1], comm,
HYPRE_PARCSR, &ij_P );
if (ierr1 || ierr2)
{

View File

@ -819,7 +819,8 @@ HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr);
hypre_MemoryTracker * hypre_MemoryTrackerCreate();
void hypre_MemoryTrackerDestroy(hypre_MemoryTracker *tracker);
void hypre_MemoryTrackerInsert(const char *action, void *ptr, void *ptr2, size_t nbytes,
hypre_MemoryLocation memory_location, hypre_MemoryLocation memory_location2, const char *filename, const char *function, HYPRE_Int line);
hypre_MemoryLocation memory_location, hypre_MemoryLocation memory_location2, const char *filename,
const char *function, HYPRE_Int line);
HYPRE_Int hypre_PrintMemoryTracker();
#endif

View File

@ -674,9 +674,11 @@ void hypre_CudaCompileFlagCheck()
hypre_int *cuda_arch_compile_d = NULL;
//cuda_arch_compile_d = hypre_TAlloc(hypre_int, 1, HYPRE_MEMORY_DEVICE);
HYPRE_CUDA_CALL( cudaMalloc(&cuda_arch_compile_d, sizeof(hypre_int)) );
HYPRE_CUDA_CALL( cudaMemcpy(cuda_arch_compile_d, &cuda_arch_compile, sizeof(hypre_int), cudaMemcpyHostToDevice) );
HYPRE_CUDA_CALL( cudaMemcpy(cuda_arch_compile_d, &cuda_arch_compile, sizeof(hypre_int),
cudaMemcpyHostToDevice) );
HYPRE_GPU_LAUNCH( hypreGPUKernel_CompileFlagSafetyCheck, gDim, bDim, cuda_arch_compile_d );
HYPRE_CUDA_CALL( cudaMemcpy(&cuda_arch_compile, cuda_arch_compile_d, sizeof(hypre_int), cudaMemcpyDeviceToHost) );
HYPRE_CUDA_CALL( cudaMemcpy(&cuda_arch_compile, cuda_arch_compile_d, sizeof(hypre_int),
cudaMemcpyDeviceToHost) );
//hypre_TFree(cuda_arch_compile_d, HYPRE_MEMORY_DEVICE);
HYPRE_CUDA_CALL( cudaFree(cuda_arch_compile_d) );
@ -1020,7 +1022,7 @@ template HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Complex *x, HYPRE_Int n, HY
__global__ void
hypreGPUKernel_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y)
HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y)
{
HYPRE_Int i = hypre_cuda_get_grid_thread_id<1, 1>();
@ -1059,7 +1061,7 @@ hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
__global__ void
hypreGPUKernel_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z)
HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z)
{
HYPRE_Int i = hypre_cuda_get_grid_thread_id<1, 1>();
@ -1093,9 +1095,9 @@ hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
}
__global__ void
hypreGPUKernel_BigToSmallCopy( HYPRE_Int* __restrict__ tgt,
const HYPRE_BigInt* __restrict__ src,
HYPRE_Int size)
hypreGPUKernel_BigToSmallCopy( HYPRE_Int* __restrict__ tgt,
const HYPRE_BigInt* __restrict__ src,
HYPRE_Int size )
{
HYPRE_Int i = hypre_cuda_get_grid_thread_id<1, 1>();

View File

@ -1383,7 +1383,8 @@ hypre_PrintMemoryTracker()
tracker->data[i]._memory_location == hypre_MEMORY_HOST &&
tracker->data[i]._memory_location2 == hypre_MEMORY_DEVICE )
{
fprintf(file, " %6zu %12s %16p %16p %10s %16s %16s %40s (%5d) %50s | %12zu %12zu %12zu %12zu\n",
fprintf(file,
" %6zu %12s %16p %16p %10s %16s %16s %40s (%5d) %50s | %12zu %12zu %12zu %12zu\n",
i,
tracker->data[i]._action,
tracker->data[i]._ptr,
@ -1398,7 +1399,7 @@ hypre_PrintMemoryTracker()
curr_bytes[hypre_MEMORY_HOST_PINNED],
curr_bytes[hypre_MEMORY_DEVICE],
curr_bytes[hypre_MEMORY_UNIFIED]
);
);
}
else
#endif
@ -1416,7 +1417,7 @@ hypre_PrintMemoryTracker()
curr_bytes[hypre_MEMORY_HOST_PINNED],
curr_bytes[hypre_MEMORY_DEVICE],
curr_bytes[hypre_MEMORY_UNIFIED]
);
);
}
}

View File

@ -305,7 +305,8 @@ HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr);
hypre_MemoryTracker * hypre_MemoryTrackerCreate();
void hypre_MemoryTrackerDestroy(hypre_MemoryTracker *tracker);
void hypre_MemoryTrackerInsert(const char *action, void *ptr, void *ptr2, size_t nbytes,
hypre_MemoryLocation memory_location, hypre_MemoryLocation memory_location2, const char *filename, const char *function, HYPRE_Int line);
hypre_MemoryLocation memory_location, hypre_MemoryLocation memory_location2, const char *filename,
const char *function, HYPRE_Int line);
HYPRE_Int hypre_PrintMemoryTracker();
#endif