From 33524991d06f5e5b4c0c088ccf117aa09288867a Mon Sep 17 00:00:00 2001 From: Rui Peng Li Date: Wed, 13 Dec 2023 15:09:24 -0800 Subject: [PATCH] Make GPU-aware MPI a runtime option (#1031) This PR makes GPU-aware MPI a runtime option. --- src/parcsr_ls/ame.c | 7 +- src/parcsr_ls/ams.c | 7 +- src/parcsr_ls/par_2s_interp_device.c | 21 ++++-- src/parcsr_ls/par_coarsen_device.c | 37 +++++++---- src/parcsr_ls/par_indepset_device.c | 7 +- src/parcsr_ls/par_interp_device.c | 28 +++++--- src/parcsr_ls/par_lr_interp_device.c | 14 ++-- src/parcsr_ls/par_lr_restr_device.c | 7 +- src/parcsr_ls/par_mod_multi_interp_device.c | 35 +++++++--- src/parcsr_ls/par_strength_device.c | 7 +- src/parcsr_mv/par_csr_fffc_device.c | 14 ++-- src/parcsr_mv/par_csr_matop_device.c | 21 ++++-- src/parcsr_mv/par_csr_matvec_device.c | 14 ++-- src/parcsr_mv/par_csr_triplemat_device.c | 14 ++-- src/struct_mv/struct_communication.c | 71 +++++++++++---------- src/test/ij.c | 9 +++ src/test/sstruct.c | 9 +++ src/test/struct.c | 8 +++ src/utilities/HYPRE_handle.c | 8 +++ src/utilities/HYPRE_utilities.h | 1 + src/utilities/_hypre_utilities.h | 8 +++ src/utilities/general.c | 8 +++ src/utilities/handle.c | 21 ++++++ src/utilities/handle.h | 6 ++ src/utilities/protos.h | 2 + 25 files changed, 280 insertions(+), 104 deletions(-) diff --git a/src/parcsr_ls/ame.c b/src/parcsr_ls/ame.c index e587dac41..8d90f9c8b 100644 --- a/src/parcsr_ls/ame.c +++ b/src/parcsr_ls/ame.c @@ -501,9 +501,12 @@ HYPRE_Int hypre_AMESetup(void *esolver) int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif } else diff --git a/src/parcsr_ls/ams.c b/src/parcsr_ls/ams.c index 12594b655..b40a76727 100644 --- a/src/parcsr_ls/ams.c +++ b/src/parcsr_ls/ams.c @@ -595,9 +595,12 @@ HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif } else diff --git a/src/parcsr_ls/par_2s_interp_device.c b/src/parcsr_ls/par_2s_interp_device.c index 0b6db66e8..680f560c7 100644 --- a/src/parcsr_ls/par_2s_interp_device.c +++ b/src/parcsr_ls/par_2s_interp_device.c @@ -105,9 +105,12 @@ hypre_BoomerAMGBuildModPartialExtInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -423,9 +426,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -445,9 +451,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, diff --git a/src/parcsr_ls/par_coarsen_device.c b/src/parcsr_ls/par_coarsen_device.c index f9b008b11..42253bcd2 100644 --- a/src/parcsr_ls/par_coarsen_device.c +++ b/src/parcsr_ls/par_coarsen_device.c @@ -169,9 +169,12 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S, (HYPRE_Int *) send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, @@ -254,10 +257,11 @@ hypre_GetGlobalMeasureDevice( hypre_ParCSRMatrix *S, /* compute local column nnz of the offd part */ hypre_CSRMatrixColNNzRealDevice(S_offd, measure_offd); -#if defined(HYPRE_WITH_GPU_AWARE_MPI) - /* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + /* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */ + hypre_ForceSyncComputeStream(hypre_handle()); + } /* send local column nnz of the offd part to neighbors */ comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, HYPRE_MEMORY_DEVICE, measure_offd, @@ -381,9 +385,12 @@ hypre_PMISCoarseningInitDevice( hypre_ParCSRMatrix *S, /* in */ real_send_buf); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, @@ -576,9 +583,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in real_send_buf); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, @@ -597,9 +607,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in CF_marker_diag, int_send_buf); -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, diff --git a/src/parcsr_ls/par_indepset_device.c b/src/parcsr_ls/par_indepset_device.c index 1ba215082..bd7938fcd 100644 --- a/src/parcsr_ls/par_indepset_device.c +++ b/src/parcsr_ls/par_indepset_device.c @@ -179,10 +179,11 @@ hypre_BoomerAMGIndepSetDevice( hypre_ParCSRMatrix *S, /*-------------------------------------------------------------------- * Exchange boundary data for IS_marker: send external IS to internal *-------------------------------------------------------------------*/ -#if defined(HYPRE_WITH_GPU_AWARE_MPI) /* RL: make sure IS_marker_offd is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } comm_handle = hypre_ParCSRCommHandleCreate_v2(12, comm_pkg, HYPRE_MEMORY_DEVICE, IS_marker_offd, diff --git a/src/parcsr_ls/par_interp_device.c b/src/parcsr_ls/par_interp_device.c index 67528b6e4..0fa03f5b6 100644 --- a/src/parcsr_ls/par_interp_device.c +++ b/src/parcsr_ls/par_interp_device.c @@ -162,9 +162,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, @@ -194,9 +197,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, @@ -1149,9 +1155,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* create a handle to start communication. 11: for integer */ @@ -1216,9 +1225,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A, thrust::plus() ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure big_int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_int_buf_data, diff --git a/src/parcsr_ls/par_lr_interp_device.c b/src/parcsr_ls/par_lr_interp_device.c index bf537faed..1f1bb0487 100644 --- a/src/parcsr_ls/par_lr_interp_device.c +++ b/src/parcsr_ls/par_lr_interp_device.c @@ -1101,9 +1101,12 @@ hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -1398,9 +1401,12 @@ hypre_BoomerAMGBuildExtPEInterpDevice(hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, diff --git a/src/parcsr_ls/par_lr_restr_device.c b/src/parcsr_ls/par_lr_restr_device.c index 33aae8722..edca5b180 100644 --- a/src/parcsr_ls/par_lr_restr_device.c +++ b/src/parcsr_ls/par_lr_restr_device.c @@ -283,9 +283,12 @@ hypre_BoomerAMGBuildRestrNeumannAIRDevice( hypre_ParCSRMatrix *A, thrust::plus() ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf_i is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg_Z, HYPRE_MEMORY_DEVICE, send_buf_i, diff --git a/src/parcsr_ls/par_mod_multi_interp_device.c b/src/parcsr_ls/par_mod_multi_interp_device.c index cda32dd3f..034fbc15c 100644 --- a/src/parcsr_ls/par_mod_multi_interp_device.c +++ b/src/parcsr_ls/par_mod_multi_interp_device.c @@ -383,9 +383,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif dof_func_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_DEVICE); @@ -418,9 +421,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* allocate one more see comments in hypre_modmp_compute_num_cols_offd_fine_to_coarse */ @@ -572,9 +578,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* create a handle to start communication. 11: for integer */ @@ -1096,9 +1105,12 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A, big_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data, @@ -1392,9 +1404,12 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A, big_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data, diff --git a/src/parcsr_ls/par_strength_device.c b/src/parcsr_ls/par_strength_device.c index 9d5d2ec14..d28ad3ece 100644 --- a/src/parcsr_ls/par_strength_device.c +++ b/src/parcsr_ls/par_strength_device.c @@ -138,9 +138,12 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A, int_buf_data ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, diff --git a/src/parcsr_mv/par_csr_fffc_device.c b/src/parcsr_mv/par_csr_fffc_device.c index dbd7dd133..5a472b255 100644 --- a/src/parcsr_mv/par_csr_fffc_device.c +++ b/src/parcsr_mv/par_csr_fffc_device.c @@ -411,9 +411,12 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, @@ -1593,9 +1596,12 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A, send_buf ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure send_buf is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf, diff --git a/src/parcsr_mv/par_csr_matop_device.c b/src/parcsr_mv/par_csr_matop_device.c index a97d11cc4..9da117676 100644 --- a/src/parcsr_mv/par_csr_matop_device.c +++ b/src/parcsr_mv/par_csr_matop_device.c @@ -837,10 +837,11 @@ hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix *A, NULL, &comm_pkg_j); -#if defined(HYPRE_WITH_GPU_AWARE_MPI) /* RL: make sure d_send_j/d_send_a is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } /* init communication */ /* ja */ @@ -1491,9 +1492,12 @@ hypre_ParCSRMatrixTransposeDevice( hypre_ParCSRMatrix *A, thrust::plus() ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure A_offdT is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif if (!hypre_ParCSRMatrixCommPkg(A)) @@ -1927,9 +1931,12 @@ hypre_ParCSRMatrixDiagScaleDevice( hypre_ParCSRMatrix *par_A, #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* make sure send_rdbuf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* A_diag = diag(ld) * A_diag * diag(rd) */ diff --git a/src/parcsr_mv/par_csr_matvec_device.c b/src/parcsr_mv/par_csr_matvec_device.c index 66bd3229b..1636ef253 100644 --- a/src/parcsr_mv/par_csr_matvec_device.c +++ b/src/parcsr_mv/par_csr_matvec_device.c @@ -205,9 +205,12 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha, hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] += hypre_MPI_Wtime(); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure x_buf_data is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif /* when using GPUs, start local matvec first in order to overlap with communication */ @@ -426,10 +429,11 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha, } } -#if defined(HYPRE_WITH_GPU_AWARE_MPI) /* RL: make sure y_tmp is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); -#endif + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } /* when using GPUs, start local matvec first in order to overlap with communication */ if (diagT) diff --git a/src/parcsr_mv/par_csr_triplemat_device.c b/src/parcsr_mv/par_csr_triplemat_device.c index b9677cfc7..13071f0fc 100644 --- a/src/parcsr_mv/par_csr_triplemat_device.c +++ b/src/parcsr_mv/par_csr_triplemat_device.c @@ -492,9 +492,12 @@ hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A, func1 ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure Cint is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar; @@ -767,9 +770,12 @@ hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R, func1 ); #endif -#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC) +#if defined(HYPRE_USING_THRUST_NOSYNC) /* RL: make sure Cint is ready before issuing GPU-GPU MPI */ - hypre_ForceSyncComputeStream(hypre_handle()); + if (hypre_GetGpuAwareMPI()) + { + hypre_ForceSyncComputeStream(hypre_handle()); + } #endif hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar; diff --git a/src/struct_mv/struct_communication.c b/src/struct_mv/struct_communication.c index 600df8368..0c264a12c 100644 --- a/src/struct_mv/struct_communication.c +++ b/src/struct_mv/struct_communication.c @@ -963,38 +963,42 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg, #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST) { -#if defined(HYPRE_WITH_GPU_AWARE_MPI) - hypre_ForceSyncComputeStream(hypre_handle()); - - send_buffers_mpi = send_buffers; - recv_buffers_mpi = recv_buffers; -#else - memory_location_mpi = HYPRE_MEMORY_HOST; - - send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); - if (num_sends > 0) + if (hypre_GetGpuAwareMPI()) { - size = hypre_CommPkgSendBufsize(comm_pkg); - send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_sends; i++) - { - send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]); - } - hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, - memory_location); - } - - recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); - if (num_recvs > 0) - { - size = hypre_CommPkgRecvBufsize(comm_pkg); - recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); - for (i = 1; i < num_recvs; i++) - { - recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]); - } - } +#if defined(HYPRE_USING_GPU) + hypre_ForceSyncComputeStream(hypre_handle()); #endif + send_buffers_mpi = send_buffers; + recv_buffers_mpi = recv_buffers; + } + else + { + memory_location_mpi = HYPRE_MEMORY_HOST; + + send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST); + if (num_sends > 0) + { + size = hypre_CommPkgSendBufsize(comm_pkg); + send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); + for (i = 1; i < num_sends; i++) + { + send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]); + } + hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST, + memory_location); + } + + recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST); + if (num_recvs > 0) + { + size = hypre_CommPkgRecvBufsize(comm_pkg); + recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi); + for (i = 1; i < num_recvs; i++) + { + recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]); + } + } + } } else #endif @@ -1152,9 +1156,10 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle ) HYPRE_MemoryLocation memory_location_mpi = memory_location; #if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) -#if !defined(HYPRE_WITH_GPU_AWARE_MPI) - memory_location_mpi = HYPRE_MEMORY_HOST; -#endif + if (!hypre_GetGpuAwareMPI()) + { + memory_location_mpi = HYPRE_MEMORY_HOST; + } #endif /*-------------------------------------------------------------------- diff --git a/src/test/ij.c b/src/test/ij.c index 5e5fe3842..0ee26e3f7 100644 --- a/src/test/ij.c +++ b/src/test/ij.c @@ -499,6 +499,8 @@ main( hypre_int argc, char mem_tracker_name[HYPRE_MAX_FILE_NAME_LEN] = {0}; #endif + HYPRE_Int gpu_aware_mpi = 0; + /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); @@ -1597,6 +1599,11 @@ main( hypre_int argc, snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]); } #endif + else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 ) + { + arg_index++; + gpu_aware_mpi = atoi(argv[arg_index++]); + } else { arg_index++; @@ -2716,6 +2723,8 @@ main( hypre_int argc, HYPRE_SetUseGpuRand(use_curand); #endif + HYPRE_SetGpuAwareMPI(gpu_aware_mpi); + /*----------------------------------------------------------- * Set up matrix *-----------------------------------------------------------*/ diff --git a/src/test/sstruct.c b/src/test/sstruct.c index fc7c186d5..542c45206 100644 --- a/src/test/sstruct.c +++ b/src/test/sstruct.c @@ -2460,6 +2460,8 @@ main( hypre_int argc, global_data.memory_location = memory_location; + HYPRE_Int gpu_aware_mpi = 0; + /*----------------------------------------------------------- * Initialize some stuff *-----------------------------------------------------------*/ @@ -2903,6 +2905,11 @@ main( hypre_int argc, snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]); } #endif + else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 ) + { + arg_index++; + gpu_aware_mpi = atoi(argv[arg_index++]); + } else { arg_index++; @@ -2924,6 +2931,8 @@ main( hypre_int argc, HYPRE_SetSpGemmUseVendor(spgemm_use_vendor); #endif + HYPRE_SetGpuAwareMPI(gpu_aware_mpi); + if ( solver_id == 39 && lobpcgFlag ) { solver_id = 10; diff --git a/src/test/struct.c b/src/test/struct.c index 4295f4aa0..a19214288 100644 --- a/src/test/struct.c +++ b/src/test/struct.c @@ -188,6 +188,7 @@ main( hypre_int argc, HYPRE_MemoryLocation memory_location = HYPRE_MEMORY_DEVICE; HYPRE_ExecutionPolicy default_exec_policy = HYPRE_EXEC_DEVICE; #endif + HYPRE_Int gpu_aware_mpi = 0; //HYPRE_Int device_level = -2; @@ -566,6 +567,11 @@ main( hypre_int argc, snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]); } #endif + else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 ) + { + arg_index++; + gpu_aware_mpi = atoi(argv[arg_index++]); + } /* end lobpcg */ else { @@ -584,6 +590,8 @@ main( hypre_int argc, /* default execution policy */ HYPRE_SetExecutionPolicy(default_exec_policy); + HYPRE_SetGpuAwareMPI(gpu_aware_mpi); + /* begin lobpcg */ if ( solver_id == 0 && lobpcgFlag ) diff --git a/src/utilities/HYPRE_handle.c b/src/utilities/HYPRE_handle.c index 9be97512c..2a12657b6 100644 --- a/src/utilities/HYPRE_handle.c +++ b/src/utilities/HYPRE_handle.c @@ -49,3 +49,11 @@ HYPRE_SetUseGpuRand( HYPRE_Int use_gpu_rand ) return hypre_SetUseGpuRand(use_gpu_rand); } +/*-------------------------------------------------------------------------- + * HYPRE_SetGPUAwareMPI + *--------------------------------------------------------------------------*/ +HYPRE_Int +HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ) +{ + return hypre_SetGpuAwareMPI(use_gpu_aware_mpi); +} diff --git a/src/utilities/HYPRE_utilities.h b/src/utilities/HYPRE_utilities.h index 1ce387a26..a92fa078f 100644 --- a/src/utilities/HYPRE_utilities.h +++ b/src/utilities/HYPRE_utilities.h @@ -343,6 +343,7 @@ HYPRE_Int HYPRE_SetSpMVUseVendor( HYPRE_Int use_vendor ); #define HYPRE_SetSpGemmUseCusparse(use_vendor) HYPRE_SetSpGemmUseVendor(use_vendor) HYPRE_Int HYPRE_SetSpGemmUseVendor( HYPRE_Int use_vendor ); HYPRE_Int HYPRE_SetUseGpuRand( HYPRE_Int use_curand ); +HYPRE_Int HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); /*-------------------------------------------------------------------------- * Base objects diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 513623a8d..faa40bedc 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1779,6 +1779,11 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; + /* GPU MPI */ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int use_gpu_aware_mpi; +#endif + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -1821,6 +1826,7 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) +#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) @@ -2373,6 +2379,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand ); HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method ); HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func); HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func); +HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); +HYPRE_Int hypre_GetGpuAwareMPI(void); /* int_array.c */ hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size ); diff --git a/src/utilities/general.c b/src/utilities/general.c index fd1795fb6..16491b04c 100644 --- a/src/utilities/general.c +++ b/src/utilities/general.c @@ -44,6 +44,14 @@ hypre_HandleCreate(void) hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */ #endif +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) +#if defined(HYPRE_WITH_GPU_AWARE_MPI) + hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1; +#else + hypre_HandleUseGpuAwareMPI(hypre_handle_) = 0; +#endif +#endif + return hypre_handle_; } diff --git a/src/utilities/handle.c b/src/utilities/handle.c index 792a17b03..faeb9f48c 100644 --- a/src/utilities/handle.c +++ b/src/utilities/handle.c @@ -182,3 +182,24 @@ hypre_SetUserDeviceMfree(GPUMfreeFunc func) return hypre_error_flag; } + +HYPRE_Int +hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ) +{ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + hypre_HandleUseGpuAwareMPI(hypre_handle()) = use_gpu_aware_mpi; +#else + HYPRE_UNUSED_VAR(use_gpu_aware_mpi); +#endif + return hypre_error_flag; +} + +HYPRE_Int +hypre_GetGpuAwareMPI(void) +{ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + return hypre_HandleUseGpuAwareMPI(hypre_handle()); +#else + return 0; +#endif +} diff --git a/src/utilities/handle.h b/src/utilities/handle.h index 44973dedc..88b16782d 100644 --- a/src/utilities/handle.h +++ b/src/utilities/handle.h @@ -29,6 +29,11 @@ typedef struct HYPRE_Int struct_comm_recv_buffer_size; HYPRE_Int struct_comm_send_buffer_size; + /* GPU MPI */ +#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int use_gpu_aware_mpi; +#endif + #if defined(HYPRE_USING_GPU) hypre_DeviceData *device_data; HYPRE_Int device_gs_method; /* device G-S options */ @@ -71,6 +76,7 @@ typedef struct #define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data) #define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method) +#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi) #define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle)) #define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle)) diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 2e77b7788..33414521e 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -389,6 +389,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand ); HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method ); HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func); HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func); +HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi ); +HYPRE_Int hypre_GetGpuAwareMPI(void); /* int_array.c */ hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size );