astyle
This commit is contained in:
parent
6086b07a96
commit
1c35a59c1d
@ -1640,11 +1640,13 @@ HYPRE_ILUSetupCusparseCSRILU0SetupSolve(hypre_CSRMatrix *A, cusparseMatDescr_t m
|
||||
HYPRE_CUSPARSE_CALL(cusparseCreateCsrsv2Info(&(matU_info)));
|
||||
|
||||
/* 2. Get working array size */
|
||||
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnz_A,
|
||||
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n,
|
||||
nnz_A,
|
||||
matL_des, A_data, A_i, A_j,
|
||||
matL_info, &matL_buffersize));
|
||||
|
||||
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnz_A,
|
||||
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n,
|
||||
nnz_A,
|
||||
matU_des, A_data, A_i, A_j,
|
||||
matU_info, &matU_buffersize));
|
||||
|
||||
@ -2720,7 +2722,8 @@ hypre_ParILURAPBuildRP(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *BLUm, hypre_Pa
|
||||
}
|
||||
|
||||
/* check buffer size and create buffer */
|
||||
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsm2_bufferSizeExt(handle, algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsm2_bufferSizeExt(handle, algo,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
n, m, nnz_BLUm, &alpha, matL_des, BLUm_diag_data, BLUm_diag_i,
|
||||
BLUm_diag_j, rhs, n, malL_info, policy, &buffer_size));
|
||||
@ -2749,7 +2752,8 @@ hypre_ParILURAPBuildRP(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *BLUm, hypre_Pa
|
||||
|
||||
/* check buffer size and create buffer */
|
||||
|
||||
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsm2_bufferSizeExt(handle, algo, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsm2_bufferSizeExt(handle, algo,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
n, m, nnz_BLUm, &alpha, matU_des, BLUm_diag_data, BLUm_diag_i,
|
||||
BLUm_diag_j, rhs, n, malU_info, policy, &buffer_size));
|
||||
|
||||
@ -1430,7 +1430,7 @@ hypre_ILUSolveRAPGMRES(hypre_ParCSRMatrix *A, hypre_ParVector *f,
|
||||
/* solve L^{-1} */
|
||||
if (nLU > 0)
|
||||
{
|
||||
/* L solve */
|
||||
/* L solve */
|
||||
HYPRE_CUSPARSE_CALL(hypre_cusparse_csrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
||||
nLU, BLU_nnz, &one, matL_des,
|
||||
BLU_data, BLU_i, BLU_j, matBL_info,
|
||||
|
||||
@ -1085,7 +1085,12 @@ HYPRE_Int hypre_ParCSRMatrixMatvec_FF ( HYPRE_Complex alpha, hypre_ParCSRMatrix
|
||||
hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt );
|
||||
|
||||
/* par_csr_triplemat.c */
|
||||
HYPRE_Int hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext, hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr );
|
||||
HYPRE_Int hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg_A,
|
||||
HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B,
|
||||
HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B,
|
||||
HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext,
|
||||
hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr,
|
||||
HYPRE_BigInt **col_map_offd_C_ptr );
|
||||
hypre_ParCSRMatrix *hypre_ParCSRMatMat( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
|
||||
hypre_ParCSRMatrix *hypre_ParCSRMatMatHost( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
|
||||
hypre_ParCSRMatrix *hypre_ParCSRMatMatDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
|
||||
|
||||
@ -1045,7 +1045,7 @@ hypre_ParCSRMatrixMatvecT_unpack( HYPRE_Complex *locl_data,
|
||||
hypre_ParCSRCommPkgSendMapRowInd(comm_pkg), 0);
|
||||
#endif
|
||||
|
||||
return hypre_error_flag;
|
||||
return hypre_error_flag;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -125,7 +125,8 @@ hypre_ParCSRMatMatDevice( hypre_ParCSRMatrix *A,
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
t2 = hypre_MPI_Wtime() - t1 - t2;
|
||||
hypre_ParPrintf(comm, "Time Bext %f\n", t2);
|
||||
hypre_ParPrintf(comm, "Size Bext %d %d %d\n", hypre_CSRMatrixNumRows(Bext), hypre_CSRMatrixNumCols(Bext), hypre_CSRMatrixNumNonzeros(Bext));
|
||||
hypre_ParPrintf(comm, "Size Bext %d %d %d\n", hypre_CSRMatrixNumRows(Bext),
|
||||
hypre_CSRMatrixNumCols(Bext), hypre_CSRMatrixNumNonzeros(Bext));
|
||||
#endif
|
||||
|
||||
#if PARCSRGEMM_TIMING > 1
|
||||
@ -502,7 +503,8 @@ hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A,
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
t2 = hypre_MPI_Wtime() - t1;
|
||||
hypre_ParPrintf(comm, "Time Cext %f\n", t2);
|
||||
hypre_ParPrintf(comm, "Size Cext %d %d %d\n", hypre_CSRMatrixNumRows(Cext), hypre_CSRMatrixNumCols(Cext), hypre_CSRMatrixNumNonzeros(Cext));
|
||||
hypre_ParPrintf(comm, "Size Cext %d %d %d\n", hypre_CSRMatrixNumRows(Cext),
|
||||
hypre_CSRMatrixNumCols(Cext), hypre_CSRMatrixNumNonzeros(Cext));
|
||||
#endif
|
||||
|
||||
/* add Cext to local part of Cbar */
|
||||
@ -893,7 +895,7 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg,
|
||||
tmp_j + local_nnz_Cbar + Cext_diag_nnz,
|
||||
tmp_j + tmp_s,
|
||||
tmp_j + local_nnz_Cbar + Cext_diag_nnz,
|
||||
[const_val = num_cols] (const auto & x) {return x + const_val;} );
|
||||
[const_val = num_cols] (const auto & x) {return x + const_val;} );
|
||||
}
|
||||
#else
|
||||
HYPRE_THRUST_CALL( gather,
|
||||
@ -951,7 +953,8 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg,
|
||||
HYPRE_Int *zmp_j = hypre_TAlloc(HYPRE_Int, tmp_s, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_Complex *zmp_a = hypre_TAlloc(HYPRE_Complex, tmp_s, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
HYPRE_Int local_nnz_C = hypreDevice_ReduceByTupleKey(tmp_s, tmp_i, tmp_j, tmp_a, zmp_i, zmp_j, zmp_a);
|
||||
HYPRE_Int local_nnz_C = hypreDevice_ReduceByTupleKey(tmp_s, tmp_i, tmp_j, tmp_a, zmp_i, zmp_j,
|
||||
zmp_a);
|
||||
|
||||
hypre_TFree(tmp_i, HYPRE_MEMORY_DEVICE);
|
||||
hypre_TFree(tmp_j, HYPRE_MEMORY_DEVICE);
|
||||
@ -1045,7 +1048,7 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg,
|
||||
C_offd_j,
|
||||
C_offd_j + nnz_C_offd,
|
||||
C_offd_j,
|
||||
[const_val = num_cols] (const auto & x) {return x - const_val;} );
|
||||
[const_val = num_cols] (const auto & x) {return x - const_val;} );
|
||||
}
|
||||
#else
|
||||
HYPRE_THRUST_CALL( transform,
|
||||
|
||||
@ -452,7 +452,12 @@ HYPRE_Int hypre_ParCSRMatrixMatvec_FF ( HYPRE_Complex alpha, hypre_ParCSRMatrix
|
||||
hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt );
|
||||
|
||||
/* par_csr_triplemat.c */
|
||||
HYPRE_Int hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg_A, HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B, HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B, HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext, hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr, HYPRE_BigInt **col_map_offd_C_ptr );
|
||||
HYPRE_Int hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg_A,
|
||||
HYPRE_Int num_cols_A, HYPRE_Int num_cols_B, HYPRE_BigInt first_col_diag_B,
|
||||
HYPRE_BigInt last_col_diag_B, HYPRE_Int num_cols_offd_B, HYPRE_BigInt *col_map_offd_B,
|
||||
HYPRE_Int local_nnz_Cbar, hypre_CSRMatrix *Cbar, hypre_CSRMatrix *Cext,
|
||||
hypre_CSRMatrix **C_diag_ptr, hypre_CSRMatrix **C_offd_ptr, HYPRE_Int *num_cols_offd_C_ptr,
|
||||
HYPRE_BigInt **col_map_offd_C_ptr );
|
||||
hypre_ParCSRMatrix *hypre_ParCSRMatMat( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
|
||||
hypre_ParCSRMatrix *hypre_ParCSRMatMatHost( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
|
||||
hypre_ParCSRMatrix *hypre_ParCSRMatMatDevice( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B );
|
||||
|
||||
@ -78,14 +78,14 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int m,
|
||||
void *rs_buffer;
|
||||
|
||||
HYPRE_ROCSPARSE_CALL( hypre_rocsparse_csrgemm_buffer_size(handle,
|
||||
transA, transB,
|
||||
m, n, k,
|
||||
&alpha, // \alpha = 1
|
||||
descrA, nnzA, d_ia, d_ja_sorted,
|
||||
descrB, nnzB, d_ib, d_jb_sorted,
|
||||
NULL, // \beta = 0
|
||||
NULL, 0, NULL, NULL, // D is nothing
|
||||
infoC, &rs_buffer_size) );
|
||||
transA, transB,
|
||||
m, n, k,
|
||||
&alpha, // \alpha = 1
|
||||
descrA, nnzA, d_ia, d_ja_sorted,
|
||||
descrB, nnzB, d_ib, d_jb_sorted,
|
||||
NULL, // \beta = 0
|
||||
NULL, 0, NULL, NULL, // D is nothing
|
||||
infoC, &rs_buffer_size) );
|
||||
|
||||
rs_buffer = hypre_TAlloc(char, rs_buffer_size, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
|
||||
@ -186,7 +186,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
|
||||
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
|
||||
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
|
||||
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
}
|
||||
else if (rownnz >= 32)
|
||||
{
|
||||
@ -194,7 +194,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
|
||||
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
|
||||
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
|
||||
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
}
|
||||
else if (rownnz >= 16)
|
||||
{
|
||||
@ -202,7 +202,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
|
||||
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
|
||||
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
|
||||
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
}
|
||||
else if (rownnz >= 8)
|
||||
{
|
||||
@ -210,7 +210,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
|
||||
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
|
||||
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
|
||||
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -218,7 +218,7 @@ hypreDevice_CSRMatrixMatvec( HYPRE_Int nrows,
|
||||
const HYPRE_Int num_groups_per_block = SPMV_BLOCKDIM / group_size;
|
||||
const dim3 gDim((nrows + num_groups_per_block - 1) / num_groups_per_block);
|
||||
HYPRE_GPU_LAUNCH( (hypre_csr_v_k_shuffle<F, group_size, HYPRE_Real>), gDim, bDim,
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
nrows, alpha, d_ia, d_ja, d_a, d_x, beta, d_y, d_yind );
|
||||
}
|
||||
|
||||
return hypre_error_flag;
|
||||
|
||||
@ -320,7 +320,8 @@ HYPRE_Int hypre_SeqVectorElmdivpy( hypre_Vector *x, hypre_Vector *b, hypre_Vecto
|
||||
HYPRE_Int hypre_SeqVectorElmdivpyMarked( hypre_Vector *x, hypre_Vector *b, hypre_Vector *y,
|
||||
HYPRE_Int *marker, HYPRE_Int marker_val );
|
||||
|
||||
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x,
|
||||
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A,
|
||||
hypre_Vector *x,
|
||||
HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int *y_ind, HYPRE_Int fill );
|
||||
|
||||
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
|
||||
|
||||
@ -592,7 +592,8 @@ HYPRE_Int hypre_SeqVectorElmdivpy( hypre_Vector *x, hypre_Vector *b, hypre_Vecto
|
||||
HYPRE_Int hypre_SeqVectorElmdivpyMarked( hypre_Vector *x, hypre_Vector *b, hypre_Vector *y,
|
||||
HYPRE_Int *marker, HYPRE_Int marker_val );
|
||||
|
||||
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x,
|
||||
HYPRE_Int hypre_CSRMatrixSpMVDevice( HYPRE_Int trans, HYPRE_Complex alpha, hypre_CSRMatrix *A,
|
||||
hypre_Vector *x,
|
||||
HYPRE_Complex beta, hypre_Vector *y, HYPRE_Int *y_ind, HYPRE_Int fill );
|
||||
|
||||
#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
|
||||
|
||||
@ -478,7 +478,8 @@ hypre_SeqVectorScale( HYPRE_Complex alpha,
|
||||
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
|
||||
|
||||
#if defined(HYPRE_USING_CUBLAS)
|
||||
HYPRE_CUBLAS_CALL( hypre_cublas_scal(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, y_data, 1) );
|
||||
HYPRE_CUBLAS_CALL( hypre_cublas_scal(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, y_data,
|
||||
1) );
|
||||
#else
|
||||
hypreDevice_ComplexScalen( y_data, size, y_data, alpha );
|
||||
#endif // #if defined(HYPRE_USING_CUBLAS)
|
||||
@ -553,7 +554,8 @@ hypre_SeqVectorAxpy( HYPRE_Complex alpha,
|
||||
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
|
||||
|
||||
#if defined(HYPRE_USING_CUBLAS)
|
||||
HYPRE_CUBLAS_CALL( hypre_cublas_axpy(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, x_data, 1,
|
||||
HYPRE_CUBLAS_CALL( hypre_cublas_axpy(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, x_data,
|
||||
1,
|
||||
y_data, 1) );
|
||||
#else
|
||||
hypreDevice_ComplexAxpyn(x_data, size, y_data, y_data, alpha);
|
||||
@ -735,7 +737,8 @@ hypre_SeqVectorInnerProd( hypre_Vector *x,
|
||||
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
|
||||
|
||||
#if defined(HYPRE_USING_CUBLAS)
|
||||
HYPRE_CUBLAS_CALL( hypre_cublas_dot(hypre_HandleCublasHandle(hypre_handle()), size, x_data, 1, y_data, 1,
|
||||
HYPRE_CUBLAS_CALL( hypre_cublas_dot(hypre_HandleCublasHandle(hypre_handle()), size, x_data, 1,
|
||||
y_data, 1,
|
||||
&result) );
|
||||
#else
|
||||
result = HYPRE_THRUST_CALL( inner_product, x_data, x_data + size, y_data, 0.0 );
|
||||
|
||||
@ -332,7 +332,7 @@ void runjob3( HYPRE_ParCSRMatrix parcsr_A,
|
||||
{
|
||||
hypre_printf("--- rep %d (out of %d) ---\n", i, rep);
|
||||
}
|
||||
if (i == rep -1)
|
||||
if (i == rep - 1)
|
||||
{
|
||||
time_index = hypre_InitializeTiming("Device Parcsr Matrix-by-Matrix, RAP2");
|
||||
hypre_BeginTiming(time_index);
|
||||
@ -389,7 +389,8 @@ void runjob3( HYPRE_ParCSRMatrix parcsr_A,
|
||||
|
||||
if (myid == 0)
|
||||
{
|
||||
hypre_printf("AH: %d x %d, nnz %d, CPU-GPU err %e\n", hypre_ParCSRMatrixGlobalNumRows(parcsr_AH_host_2),
|
||||
hypre_printf("AH: %d x %d, nnz %d, CPU-GPU err %e\n",
|
||||
hypre_ParCSRMatrixGlobalNumRows(parcsr_AH_host_2),
|
||||
hypre_ParCSRMatrixGlobalNumCols(parcsr_AH_host_2),
|
||||
hypre_ParCSRMatrixNumNonzeros(parcsr_AH_host_2),
|
||||
rfnorm);
|
||||
@ -773,7 +774,7 @@ main( hypre_int argc,
|
||||
{
|
||||
HYPRE_Int ierr1 = HYPRE_IJMatrixRead( argv[build_matrix_arg_index], comm,
|
||||
HYPRE_PARCSR, &ij_A );
|
||||
HYPRE_Int ierr2 = HYPRE_IJMatrixRead( argv[build_matrix_arg_index+1], comm,
|
||||
HYPRE_Int ierr2 = HYPRE_IJMatrixRead( argv[build_matrix_arg_index + 1], comm,
|
||||
HYPRE_PARCSR, &ij_P );
|
||||
if (ierr1 || ierr2)
|
||||
{
|
||||
|
||||
@ -819,7 +819,8 @@ HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr);
|
||||
hypre_MemoryTracker * hypre_MemoryTrackerCreate();
|
||||
void hypre_MemoryTrackerDestroy(hypre_MemoryTracker *tracker);
|
||||
void hypre_MemoryTrackerInsert(const char *action, void *ptr, void *ptr2, size_t nbytes,
|
||||
hypre_MemoryLocation memory_location, hypre_MemoryLocation memory_location2, const char *filename, const char *function, HYPRE_Int line);
|
||||
hypre_MemoryLocation memory_location, hypre_MemoryLocation memory_location2, const char *filename,
|
||||
const char *function, HYPRE_Int line);
|
||||
HYPRE_Int hypre_PrintMemoryTracker();
|
||||
#endif
|
||||
|
||||
|
||||
@ -674,9 +674,11 @@ void hypre_CudaCompileFlagCheck()
|
||||
hypre_int *cuda_arch_compile_d = NULL;
|
||||
//cuda_arch_compile_d = hypre_TAlloc(hypre_int, 1, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_CUDA_CALL( cudaMalloc(&cuda_arch_compile_d, sizeof(hypre_int)) );
|
||||
HYPRE_CUDA_CALL( cudaMemcpy(cuda_arch_compile_d, &cuda_arch_compile, sizeof(hypre_int), cudaMemcpyHostToDevice) );
|
||||
HYPRE_CUDA_CALL( cudaMemcpy(cuda_arch_compile_d, &cuda_arch_compile, sizeof(hypre_int),
|
||||
cudaMemcpyHostToDevice) );
|
||||
HYPRE_GPU_LAUNCH( hypreGPUKernel_CompileFlagSafetyCheck, gDim, bDim, cuda_arch_compile_d );
|
||||
HYPRE_CUDA_CALL( cudaMemcpy(&cuda_arch_compile, cuda_arch_compile_d, sizeof(hypre_int), cudaMemcpyDeviceToHost) );
|
||||
HYPRE_CUDA_CALL( cudaMemcpy(&cuda_arch_compile, cuda_arch_compile_d, sizeof(hypre_int),
|
||||
cudaMemcpyDeviceToHost) );
|
||||
//hypre_TFree(cuda_arch_compile_d, HYPRE_MEMORY_DEVICE);
|
||||
HYPRE_CUDA_CALL( cudaFree(cuda_arch_compile_d) );
|
||||
|
||||
@ -1020,7 +1022,7 @@ template HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Complex *x, HYPRE_Int n, HY
|
||||
|
||||
__global__ void
|
||||
hypreGPUKernel_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
|
||||
HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y)
|
||||
HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y)
|
||||
{
|
||||
HYPRE_Int i = hypre_cuda_get_grid_thread_id<1, 1>();
|
||||
|
||||
@ -1059,7 +1061,7 @@ hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
|
||||
|
||||
__global__ void
|
||||
hypreGPUKernel_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
|
||||
HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z)
|
||||
HYPRE_Complex *x, HYPRE_Complex beta, HYPRE_Complex *y, HYPRE_Complex *z)
|
||||
{
|
||||
HYPRE_Int i = hypre_cuda_get_grid_thread_id<1, 1>();
|
||||
|
||||
@ -1093,9 +1095,9 @@ hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data,
|
||||
}
|
||||
|
||||
__global__ void
|
||||
hypreGPUKernel_BigToSmallCopy( HYPRE_Int* __restrict__ tgt,
|
||||
const HYPRE_BigInt* __restrict__ src,
|
||||
HYPRE_Int size)
|
||||
hypreGPUKernel_BigToSmallCopy( HYPRE_Int* __restrict__ tgt,
|
||||
const HYPRE_BigInt* __restrict__ src,
|
||||
HYPRE_Int size )
|
||||
{
|
||||
HYPRE_Int i = hypre_cuda_get_grid_thread_id<1, 1>();
|
||||
|
||||
|
||||
@ -1383,7 +1383,8 @@ hypre_PrintMemoryTracker()
|
||||
tracker->data[i]._memory_location == hypre_MEMORY_HOST &&
|
||||
tracker->data[i]._memory_location2 == hypre_MEMORY_DEVICE )
|
||||
{
|
||||
fprintf(file, " %6zu %12s %16p %16p %10s %16s %16s %40s (%5d) %50s | %12zu %12zu %12zu %12zu\n",
|
||||
fprintf(file,
|
||||
" %6zu %12s %16p %16p %10s %16s %16s %40s (%5d) %50s | %12zu %12zu %12zu %12zu\n",
|
||||
i,
|
||||
tracker->data[i]._action,
|
||||
tracker->data[i]._ptr,
|
||||
@ -1398,7 +1399,7 @@ hypre_PrintMemoryTracker()
|
||||
curr_bytes[hypre_MEMORY_HOST_PINNED],
|
||||
curr_bytes[hypre_MEMORY_DEVICE],
|
||||
curr_bytes[hypre_MEMORY_UNIFIED]
|
||||
);
|
||||
);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -1416,7 +1417,7 @@ hypre_PrintMemoryTracker()
|
||||
curr_bytes[hypre_MEMORY_HOST_PINNED],
|
||||
curr_bytes[hypre_MEMORY_DEVICE],
|
||||
curr_bytes[hypre_MEMORY_UNIFIED]
|
||||
);
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -305,7 +305,8 @@ HYPRE_Int hypre_umpire_pinned_pooled_free(void *ptr);
|
||||
hypre_MemoryTracker * hypre_MemoryTrackerCreate();
|
||||
void hypre_MemoryTrackerDestroy(hypre_MemoryTracker *tracker);
|
||||
void hypre_MemoryTrackerInsert(const char *action, void *ptr, void *ptr2, size_t nbytes,
|
||||
hypre_MemoryLocation memory_location, hypre_MemoryLocation memory_location2, const char *filename, const char *function, HYPRE_Int line);
|
||||
hypre_MemoryLocation memory_location, hypre_MemoryLocation memory_location2, const char *filename,
|
||||
const char *function, HYPRE_Int line);
|
||||
HYPRE_Int hypre_PrintMemoryTracker();
|
||||
#endif
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user