Make GPU-aware MPI a runtime option (#1031)

This PR makes GPU-aware MPI a runtime option.
This commit is contained in:
Rui Peng Li 2023-12-13 15:09:24 -08:00 committed by GitHub
parent 0dfe0fee56
commit 33524991d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 280 additions and 104 deletions

View File

@ -501,9 +501,12 @@ HYPRE_Int hypre_AMESetup(void *esolver)
int_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
}
else

View File

@ -595,9 +595,12 @@ HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A,
int_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
}
else

View File

@ -105,9 +105,12 @@ hypre_BoomerAMGBuildModPartialExtInterpDevice( hypre_ParCSRMatrix *A,
send_buf );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
@ -423,9 +426,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A,
send_buf );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
@ -445,9 +451,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A,
send_buf );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,

View File

@ -169,9 +169,12 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S,
(HYPRE_Int *) send_buf );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg,
@ -254,10 +257,11 @@ hypre_GetGlobalMeasureDevice( hypre_ParCSRMatrix *S,
/* compute local column nnz of the offd part */
hypre_CSRMatrixColNNzRealDevice(S_offd, measure_offd);
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
/* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
#endif
if (hypre_GetGpuAwareMPI())
{
/* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
}
/* send local column nnz of the offd part to neighbors */
comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, HYPRE_MEMORY_DEVICE, measure_offd,
@ -381,9 +385,12 @@ hypre_PMISCoarseningInitDevice( hypre_ParCSRMatrix *S, /* in */
real_send_buf);
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg,
@ -576,9 +583,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in
real_send_buf);
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg,
@ -597,9 +607,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in
CF_marker_diag,
int_send_buf);
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg,

View File

@ -179,10 +179,11 @@ hypre_BoomerAMGIndepSetDevice( hypre_ParCSRMatrix *S,
/*--------------------------------------------------------------------
* Exchange boundary data for IS_marker: send external IS to internal
*-------------------------------------------------------------------*/
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
/* RL: make sure IS_marker_offd is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
#endif
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
comm_handle = hypre_ParCSRCommHandleCreate_v2(12, comm_pkg,
HYPRE_MEMORY_DEVICE, IS_marker_offd,

View File

@ -162,9 +162,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A,
int_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
@ -194,9 +197,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A,
int_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
@ -1149,9 +1155,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A,
int_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
/* create a handle to start communication. 11: for integer */
@ -1216,9 +1225,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A,
thrust::plus<HYPRE_BigInt>() );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure big_int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_int_buf_data,

View File

@ -1101,9 +1101,12 @@ hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A,
send_buf );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
@ -1398,9 +1401,12 @@ hypre_BoomerAMGBuildExtPEInterpDevice(hypre_ParCSRMatrix *A,
send_buf );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,

View File

@ -283,9 +283,12 @@ hypre_BoomerAMGBuildRestrNeumannAIRDevice( hypre_ParCSRMatrix *A,
thrust::plus<HYPRE_BigInt>() );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure send_buf_i is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg_Z, HYPRE_MEMORY_DEVICE, send_buf_i,

View File

@ -383,9 +383,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A,
int_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
dof_func_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_DEVICE);
@ -418,9 +421,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A,
int_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
/* allocate one more see comments in hypre_modmp_compute_num_cols_offd_fine_to_coarse */
@ -572,9 +578,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A,
int_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
/* create a handle to start communication. 11: for integer */
@ -1096,9 +1105,12 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A,
big_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data,
@ -1392,9 +1404,12 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A,
big_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data,

View File

@ -138,9 +138,12 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A,
int_buf_data );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,

View File

@ -411,9 +411,12 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
send_buf );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
@ -1593,9 +1596,12 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A,
send_buf );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,

View File

@ -837,10 +837,11 @@ hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix *A,
NULL,
&comm_pkg_j);
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
/* RL: make sure d_send_j/d_send_a is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
#endif
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
/* init communication */
/* ja */
@ -1491,9 +1492,12 @@ hypre_ParCSRMatrixTransposeDevice( hypre_ParCSRMatrix *A,
thrust::plus<HYPRE_BigInt>() );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure A_offdT is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
if (!hypre_ParCSRMatrixCommPkg(A))
@ -1927,9 +1931,12 @@ hypre_ParCSRMatrixDiagScaleDevice( hypre_ParCSRMatrix *par_A,
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* make sure send_rdbuf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
/* A_diag = diag(ld) * A_diag * diag(rd) */

View File

@ -205,9 +205,12 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha,
hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] += hypre_MPI_Wtime();
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure x_buf_data is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
/* when using GPUs, start local matvec first in order to overlap with communication */
@ -426,10 +429,11 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha,
}
}
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
/* RL: make sure y_tmp is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
#endif
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
/* when using GPUs, start local matvec first in order to overlap with communication */
if (diagT)

View File

@ -492,9 +492,12 @@ hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A,
func1 );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure Cint is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar;
@ -767,9 +770,12 @@ hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R,
func1 );
#endif
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
#if defined(HYPRE_USING_THRUST_NOSYNC)
/* RL: make sure Cint is ready before issuing GPU-GPU MPI */
hypre_ForceSyncComputeStream(hypre_handle());
if (hypre_GetGpuAwareMPI())
{
hypre_ForceSyncComputeStream(hypre_handle());
}
#endif
hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar;

View File

@ -963,38 +963,42 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg,
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST)
{
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
hypre_ForceSyncComputeStream(hypre_handle());
send_buffers_mpi = send_buffers;
recv_buffers_mpi = recv_buffers;
#else
memory_location_mpi = HYPRE_MEMORY_HOST;
send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST);
if (num_sends > 0)
if (hypre_GetGpuAwareMPI())
{
size = hypre_CommPkgSendBufsize(comm_pkg);
send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi);
for (i = 1; i < num_sends; i++)
{
send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]);
}
hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST,
memory_location);
}
recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST);
if (num_recvs > 0)
{
size = hypre_CommPkgRecvBufsize(comm_pkg);
recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi);
for (i = 1; i < num_recvs; i++)
{
recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]);
}
}
#if defined(HYPRE_USING_GPU)
hypre_ForceSyncComputeStream(hypre_handle());
#endif
send_buffers_mpi = send_buffers;
recv_buffers_mpi = recv_buffers;
}
else
{
memory_location_mpi = HYPRE_MEMORY_HOST;
send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST);
if (num_sends > 0)
{
size = hypre_CommPkgSendBufsize(comm_pkg);
send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi);
for (i = 1; i < num_sends; i++)
{
send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]);
}
hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST,
memory_location);
}
recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST);
if (num_recvs > 0)
{
size = hypre_CommPkgRecvBufsize(comm_pkg);
recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi);
for (i = 1; i < num_recvs; i++)
{
recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]);
}
}
}
}
else
#endif
@ -1152,9 +1156,10 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle )
HYPRE_MemoryLocation memory_location_mpi = memory_location;
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
#if !defined(HYPRE_WITH_GPU_AWARE_MPI)
memory_location_mpi = HYPRE_MEMORY_HOST;
#endif
if (!hypre_GetGpuAwareMPI())
{
memory_location_mpi = HYPRE_MEMORY_HOST;
}
#endif
/*--------------------------------------------------------------------

View File

@ -499,6 +499,8 @@ main( hypre_int argc,
char mem_tracker_name[HYPRE_MAX_FILE_NAME_LEN] = {0};
#endif
HYPRE_Int gpu_aware_mpi = 0;
/* Initialize MPI */
hypre_MPI_Init(&argc, &argv);
@ -1597,6 +1599,11 @@ main( hypre_int argc,
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
}
#endif
else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 )
{
arg_index++;
gpu_aware_mpi = atoi(argv[arg_index++]);
}
else
{
arg_index++;
@ -2716,6 +2723,8 @@ main( hypre_int argc,
HYPRE_SetUseGpuRand(use_curand);
#endif
HYPRE_SetGpuAwareMPI(gpu_aware_mpi);
/*-----------------------------------------------------------
* Set up matrix
*-----------------------------------------------------------*/

View File

@ -2460,6 +2460,8 @@ main( hypre_int argc,
global_data.memory_location = memory_location;
HYPRE_Int gpu_aware_mpi = 0;
/*-----------------------------------------------------------
* Initialize some stuff
*-----------------------------------------------------------*/
@ -2903,6 +2905,11 @@ main( hypre_int argc,
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
}
#endif
else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 )
{
arg_index++;
gpu_aware_mpi = atoi(argv[arg_index++]);
}
else
{
arg_index++;
@ -2924,6 +2931,8 @@ main( hypre_int argc,
HYPRE_SetSpGemmUseVendor(spgemm_use_vendor);
#endif
HYPRE_SetGpuAwareMPI(gpu_aware_mpi);
if ( solver_id == 39 && lobpcgFlag )
{
solver_id = 10;

View File

@ -188,6 +188,7 @@ main( hypre_int argc,
HYPRE_MemoryLocation memory_location = HYPRE_MEMORY_DEVICE;
HYPRE_ExecutionPolicy default_exec_policy = HYPRE_EXEC_DEVICE;
#endif
HYPRE_Int gpu_aware_mpi = 0;
//HYPRE_Int device_level = -2;
@ -566,6 +567,11 @@ main( hypre_int argc,
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
}
#endif
else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 )
{
arg_index++;
gpu_aware_mpi = atoi(argv[arg_index++]);
}
/* end lobpcg */
else
{
@ -584,6 +590,8 @@ main( hypre_int argc,
/* default execution policy */
HYPRE_SetExecutionPolicy(default_exec_policy);
HYPRE_SetGpuAwareMPI(gpu_aware_mpi);
/* begin lobpcg */
if ( solver_id == 0 && lobpcgFlag )

View File

@ -49,3 +49,11 @@ HYPRE_SetUseGpuRand( HYPRE_Int use_gpu_rand )
return hypre_SetUseGpuRand(use_gpu_rand);
}
/*--------------------------------------------------------------------------
* HYPRE_SetGPUAwareMPI
*--------------------------------------------------------------------------*/
HYPRE_Int
HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi )
{
return hypre_SetGpuAwareMPI(use_gpu_aware_mpi);
}

View File

@ -343,6 +343,7 @@ HYPRE_Int HYPRE_SetSpMVUseVendor( HYPRE_Int use_vendor );
#define HYPRE_SetSpGemmUseCusparse(use_vendor) HYPRE_SetSpGemmUseVendor(use_vendor)
HYPRE_Int HYPRE_SetSpGemmUseVendor( HYPRE_Int use_vendor );
HYPRE_Int HYPRE_SetUseGpuRand( HYPRE_Int use_curand );
HYPRE_Int HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi );
/*--------------------------------------------------------------------------
* Base objects

View File

@ -1779,6 +1779,11 @@ typedef struct
HYPRE_Int struct_comm_recv_buffer_size;
HYPRE_Int struct_comm_send_buffer_size;
/* GPU MPI */
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
HYPRE_Int use_gpu_aware_mpi;
#endif
#if defined(HYPRE_USING_GPU)
hypre_DeviceData *device_data;
HYPRE_Int device_gs_method; /* device G-S options */
@ -1821,6 +1826,7 @@ typedef struct
#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data)
#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method)
#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi)
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
@ -2373,6 +2379,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand );
HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method );
HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func);
HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func);
HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi );
HYPRE_Int hypre_GetGpuAwareMPI(void);
/* int_array.c */
hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size );

View File

@ -44,6 +44,14 @@ hypre_HandleCreate(void)
hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */
#endif
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1;
#else
hypre_HandleUseGpuAwareMPI(hypre_handle_) = 0;
#endif
#endif
return hypre_handle_;
}

View File

@ -182,3 +182,24 @@ hypre_SetUserDeviceMfree(GPUMfreeFunc func)
return hypre_error_flag;
}
HYPRE_Int
hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi )
{
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
hypre_HandleUseGpuAwareMPI(hypre_handle()) = use_gpu_aware_mpi;
#else
HYPRE_UNUSED_VAR(use_gpu_aware_mpi);
#endif
return hypre_error_flag;
}
HYPRE_Int
hypre_GetGpuAwareMPI(void)
{
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
return hypre_HandleUseGpuAwareMPI(hypre_handle());
#else
return 0;
#endif
}

View File

@ -29,6 +29,11 @@ typedef struct
HYPRE_Int struct_comm_recv_buffer_size;
HYPRE_Int struct_comm_send_buffer_size;
/* GPU MPI */
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
HYPRE_Int use_gpu_aware_mpi;
#endif
#if defined(HYPRE_USING_GPU)
hypre_DeviceData *device_data;
HYPRE_Int device_gs_method; /* device G-S options */
@ -71,6 +76,7 @@ typedef struct
#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data)
#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method)
#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi)
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))

View File

@ -389,6 +389,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand );
HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method );
HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func);
HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func);
HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi );
HYPRE_Int hypre_GetGpuAwareMPI(void);
/* int_array.c */
hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size );