minor changes

This commit is contained in:
Ruipeng Li 2022-04-06 17:39:23 -07:00
parent 224bb78d4f
commit e21778682b
3 changed files with 12 additions and 14 deletions

View File

@ -947,9 +947,8 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg,
HYPRE_Int *ie_ii = hypre_TAlloc(HYPRE_Int, num_rows + num_elemt, HYPRE_MEMORY_DEVICE);
HYPRE_Int *ie_j = hypre_TAlloc(HYPRE_Int, num_rows + num_elemt, HYPRE_MEMORY_DEVICE);
// FIXME
HYPRE_Complex *ie_a = hypre_TAlloc(HYPRE_Complex, num_rows + num_elemt, HYPRE_MEMORY_DEVICE);
HYPRE_THRUST_CALL(fill, ie_a, ie_a + num_rows + num_elemt, 1.0);
//HYPRE_Complex *ie_a = hypre_TAlloc(HYPRE_Complex, num_rows + num_elemt, HYPRE_MEMORY_DEVICE);
//HYPRE_THRUST_CALL(fill, ie_a, ie_a + num_rows + num_elemt, 1.0);
HYPRE_THRUST_CALL( sequence, ie_ii, ie_ii + num_rows);
HYPRE_THRUST_CALL( copy, send_map, send_map + num_elemt, ie_ii + num_rows);
@ -961,8 +960,7 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg,
hypre_CSRMatrixI(IE) = ie_i;
hypre_CSRMatrixJ(IE) = ie_j;
// FIXME
hypre_CSRMatrixData(IE) = ie_a;
//hypre_CSRMatrixData(IE) = ie_a;
// CC = [Cbar_local; Cext]
hypre_CSRMatrix *CC = hypre_CSRMatrixStack2Device(Cbar_local, Cext);

View File

@ -467,14 +467,14 @@ hypre_spgemm_numerical_with_rownnz( HYPRE_Int m,
{
if (ghash_size)
{
HYPRE_CUDA_LAUNCH2 (
HYPRE_GPU_LAUNCH2 (
(hypre_spgemm_numeric<num_groups_per_block, GROUP_SIZE, SHMEM_HASH_SIZE, HAS_RIND, false, HASH_TYPE, true>),
gDim, bDim, shmem_bytes,
m, row_ind, d_ia, d_ja, d_a, d_ib, d_jb, d_b, d_ic, d_jc, d_c, NULL, d_ghash_i, d_ghash_j, d_ghash_a );
}
else
{
HYPRE_CUDA_LAUNCH2 (
HYPRE_GPU_LAUNCH2 (
(hypre_spgemm_numeric<num_groups_per_block, GROUP_SIZE, SHMEM_HASH_SIZE, HAS_RIND, false, HASH_TYPE, false>),
gDim, bDim, shmem_bytes,
m, row_ind, d_ia, d_ja, d_a, d_ib, d_jb, d_b, d_ic, d_jc, d_c, NULL, d_ghash_i, d_ghash_j, d_ghash_a );
@ -484,14 +484,14 @@ hypre_spgemm_numerical_with_rownnz( HYPRE_Int m,
{
if (ghash_size)
{
HYPRE_CUDA_LAUNCH2 (
HYPRE_GPU_LAUNCH2 (
(hypre_spgemm_numeric<num_groups_per_block, GROUP_SIZE, SHMEM_HASH_SIZE, HAS_RIND, true, HASH_TYPE, true>),
gDim, bDim, shmem_bytes,
m, row_ind, d_ia, d_ja, d_a, d_ib, d_jb, d_b, d_ic, d_jc, d_c, d_rc, d_ghash_i, d_ghash_j, d_ghash_a );
}
else
{
HYPRE_CUDA_LAUNCH2 (
HYPRE_GPU_LAUNCH2 (
(hypre_spgemm_numeric<num_groups_per_block, GROUP_SIZE, SHMEM_HASH_SIZE, HAS_RIND, true, HASH_TYPE, false>),
gDim, bDim, shmem_bytes,
m, row_ind, d_ia, d_ja, d_a, d_ib, d_jb, d_b, d_ic, d_jc, d_c, d_rc, d_ghash_i, d_ghash_j, d_ghash_a );
@ -587,7 +587,7 @@ hypreDevice_CSRSpGemmNumerPostCopy( HYPRE_Int m,
dim3 bDim(GROUP_SIZE, 1, num_groups_per_block);
dim3 gDim( (m + bDim.z - 1) / bDim.z );
HYPRE_CUDA_LAUNCH( (hypre_spgemm_copy_from_Cext_into_C<GROUP_SIZE>), gDim, bDim,
HYPRE_GPU_LAUNCH( (hypre_spgemm_copy_from_Cext_into_C<GROUP_SIZE>), gDim, bDim,
m, *d_ic, *d_jc, *d_c, d_ic_new, d_jc_new, d_c_new );
hypre_TFree(*d_ic, HYPRE_MEMORY_DEVICE);

View File

@ -390,14 +390,14 @@ hypre_spgemm_symbolic_rownnz( HYPRE_Int m,
{
if (ghash_size)
{
HYPRE_CUDA_LAUNCH2(
HYPRE_GPU_LAUNCH2(
(hypre_spgemm_symbolic<num_groups_per_block, GROUP_SIZE, SHMEM_HASH_SIZE, HAS_RIND, true, HASH_TYPE, true>),
gDim, bDim, shmem_bytes,
m, row_ind, d_ia, d_ja, d_ib, d_jb, d_ghash_i, d_ghash_j, d_rc, d_rf );
}
else
{
HYPRE_CUDA_LAUNCH2(
HYPRE_GPU_LAUNCH2(
(hypre_spgemm_symbolic<num_groups_per_block, GROUP_SIZE, SHMEM_HASH_SIZE, HAS_RIND, true, HASH_TYPE, false>),
gDim, bDim, shmem_bytes,
m, row_ind, d_ia, d_ja, d_ib, d_jb, d_ghash_i, d_ghash_j, d_rc, d_rf );
@ -407,14 +407,14 @@ hypre_spgemm_symbolic_rownnz( HYPRE_Int m,
{
if (ghash_size)
{
HYPRE_CUDA_LAUNCH2(
HYPRE_GPU_LAUNCH2(
(hypre_spgemm_symbolic<num_groups_per_block, GROUP_SIZE, SHMEM_HASH_SIZE, HAS_RIND, false, HASH_TYPE, true>),
gDim, bDim, shmem_bytes,
m, row_ind, d_ia, d_ja, d_ib, d_jb, d_ghash_i, d_ghash_j, d_rc, d_rf );
}
else
{
HYPRE_CUDA_LAUNCH2(
HYPRE_GPU_LAUNCH2(
(hypre_spgemm_symbolic<num_groups_per_block, GROUP_SIZE, SHMEM_HASH_SIZE, HAS_RIND, false, HASH_TYPE, false>),
gDim, bDim, shmem_bytes,
m, row_ind, d_ia, d_ja, d_ib, d_jb, d_ghash_i, d_ghash_j, d_rc, d_rf );