Make GPU-aware MPI a runtime option (#1031)
This PR makes GPU-aware MPI a runtime option.
This commit is contained in:
parent
0dfe0fee56
commit
33524991d0
@ -501,9 +501,12 @@ HYPRE_Int hypre_AMESetup(void *esolver)
|
||||
int_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
|
||||
@ -595,9 +595,12 @@ HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A,
|
||||
int_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
|
||||
@ -105,9 +105,12 @@ hypre_BoomerAMGBuildModPartialExtInterpDevice( hypre_ParCSRMatrix *A,
|
||||
send_buf );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||
@ -423,9 +426,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A,
|
||||
send_buf );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||
@ -445,9 +451,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A,
|
||||
send_buf );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||
|
||||
@ -169,9 +169,12 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S,
|
||||
(HYPRE_Int *) send_buf );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg,
|
||||
@ -254,10 +257,11 @@ hypre_GetGlobalMeasureDevice( hypre_ParCSRMatrix *S,
|
||||
/* compute local column nnz of the offd part */
|
||||
hypre_CSRMatrixColNNzRealDevice(S_offd, measure_offd);
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
/* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
#endif
|
||||
}
|
||||
|
||||
/* send local column nnz of the offd part to neighbors */
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, HYPRE_MEMORY_DEVICE, measure_offd,
|
||||
@ -381,9 +385,12 @@ hypre_PMISCoarseningInitDevice( hypre_ParCSRMatrix *S, /* in */
|
||||
real_send_buf);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg,
|
||||
@ -576,9 +583,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in
|
||||
real_send_buf);
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg,
|
||||
@ -597,9 +607,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in
|
||||
CF_marker_diag,
|
||||
int_send_buf);
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg,
|
||||
|
||||
@ -179,10 +179,11 @@ hypre_BoomerAMGIndepSetDevice( hypre_ParCSRMatrix *S,
|
||||
/*--------------------------------------------------------------------
|
||||
* Exchange boundary data for IS_marker: send external IS to internal
|
||||
*-------------------------------------------------------------------*/
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
||||
/* RL: make sure IS_marker_offd is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
#endif
|
||||
}
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(12, comm_pkg,
|
||||
HYPRE_MEMORY_DEVICE, IS_marker_offd,
|
||||
|
||||
@ -162,9 +162,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A,
|
||||
int_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
|
||||
@ -194,9 +197,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A,
|
||||
int_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
|
||||
@ -1149,9 +1155,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A,
|
||||
int_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
/* create a handle to start communication. 11: for integer */
|
||||
@ -1216,9 +1225,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A,
|
||||
thrust::plus<HYPRE_BigInt>() );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure big_int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_int_buf_data,
|
||||
|
||||
@ -1101,9 +1101,12 @@ hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A,
|
||||
send_buf );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||
@ -1398,9 +1401,12 @@ hypre_BoomerAMGBuildExtPEInterpDevice(hypre_ParCSRMatrix *A,
|
||||
send_buf );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||
|
||||
@ -283,9 +283,12 @@ hypre_BoomerAMGBuildRestrNeumannAIRDevice( hypre_ParCSRMatrix *A,
|
||||
thrust::plus<HYPRE_BigInt>() );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure send_buf_i is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg_Z, HYPRE_MEMORY_DEVICE, send_buf_i,
|
||||
|
||||
@ -383,9 +383,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A,
|
||||
int_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
dof_func_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_DEVICE);
|
||||
@ -418,9 +421,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A,
|
||||
int_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
/* allocate one more see comments in hypre_modmp_compute_num_cols_offd_fine_to_coarse */
|
||||
@ -572,9 +578,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A,
|
||||
int_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
/* create a handle to start communication. 11: for integer */
|
||||
@ -1096,9 +1105,12 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A,
|
||||
big_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data,
|
||||
@ -1392,9 +1404,12 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A,
|
||||
big_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data,
|
||||
|
||||
@ -138,9 +138,12 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A,
|
||||
int_buf_data );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
|
||||
|
||||
@ -411,9 +411,12 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
|
||||
send_buf );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||
@ -1593,9 +1596,12 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A,
|
||||
send_buf );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||
|
||||
@ -837,10 +837,11 @@ hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix *A,
|
||||
NULL,
|
||||
&comm_pkg_j);
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
||||
/* RL: make sure d_send_j/d_send_a is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
#endif
|
||||
}
|
||||
|
||||
/* init communication */
|
||||
/* ja */
|
||||
@ -1491,9 +1492,12 @@ hypre_ParCSRMatrixTransposeDevice( hypre_ParCSRMatrix *A,
|
||||
thrust::plus<HYPRE_BigInt>() );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure A_offdT is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!hypre_ParCSRMatrixCommPkg(A))
|
||||
@ -1927,9 +1931,12 @@ hypre_ParCSRMatrixDiagScaleDevice( hypre_ParCSRMatrix *par_A,
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* make sure send_rdbuf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
/* A_diag = diag(ld) * A_diag * diag(rd) */
|
||||
|
||||
@ -205,9 +205,12 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha,
|
||||
hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] += hypre_MPI_Wtime();
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure x_buf_data is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
/* when using GPUs, start local matvec first in order to overlap with communication */
|
||||
@ -426,10 +429,11 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha,
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
||||
/* RL: make sure y_tmp is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
#endif
|
||||
}
|
||||
|
||||
/* when using GPUs, start local matvec first in order to overlap with communication */
|
||||
if (diagT)
|
||||
|
||||
@ -492,9 +492,12 @@ hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A,
|
||||
func1 );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure Cint is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar;
|
||||
@ -767,9 +770,12 @@ hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R,
|
||||
func1 );
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||
/* RL: make sure Cint is ready before issuing GPU-GPU MPI */
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
}
|
||||
#endif
|
||||
|
||||
hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar;
|
||||
|
||||
@ -963,12 +963,16 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg,
|
||||
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||
if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST)
|
||||
{
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
||||
if (hypre_GetGpuAwareMPI())
|
||||
{
|
||||
#if defined(HYPRE_USING_GPU)
|
||||
hypre_ForceSyncComputeStream(hypre_handle());
|
||||
|
||||
#endif
|
||||
send_buffers_mpi = send_buffers;
|
||||
recv_buffers_mpi = recv_buffers;
|
||||
#else
|
||||
}
|
||||
else
|
||||
{
|
||||
memory_location_mpi = HYPRE_MEMORY_HOST;
|
||||
|
||||
send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST);
|
||||
@ -994,7 +998,7 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg,
|
||||
recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -1152,9 +1156,10 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle )
|
||||
HYPRE_MemoryLocation memory_location_mpi = memory_location;
|
||||
|
||||
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||
#if !defined(HYPRE_WITH_GPU_AWARE_MPI)
|
||||
if (!hypre_GetGpuAwareMPI())
|
||||
{
|
||||
memory_location_mpi = HYPRE_MEMORY_HOST;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------
|
||||
|
||||
@ -499,6 +499,8 @@ main( hypre_int argc,
|
||||
char mem_tracker_name[HYPRE_MAX_FILE_NAME_LEN] = {0};
|
||||
#endif
|
||||
|
||||
HYPRE_Int gpu_aware_mpi = 0;
|
||||
|
||||
/* Initialize MPI */
|
||||
hypre_MPI_Init(&argc, &argv);
|
||||
|
||||
@ -1597,6 +1599,11 @@ main( hypre_int argc,
|
||||
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
|
||||
}
|
||||
#endif
|
||||
else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 )
|
||||
{
|
||||
arg_index++;
|
||||
gpu_aware_mpi = atoi(argv[arg_index++]);
|
||||
}
|
||||
else
|
||||
{
|
||||
arg_index++;
|
||||
@ -2716,6 +2723,8 @@ main( hypre_int argc,
|
||||
HYPRE_SetUseGpuRand(use_curand);
|
||||
#endif
|
||||
|
||||
HYPRE_SetGpuAwareMPI(gpu_aware_mpi);
|
||||
|
||||
/*-----------------------------------------------------------
|
||||
* Set up matrix
|
||||
*-----------------------------------------------------------*/
|
||||
|
||||
@ -2460,6 +2460,8 @@ main( hypre_int argc,
|
||||
|
||||
global_data.memory_location = memory_location;
|
||||
|
||||
HYPRE_Int gpu_aware_mpi = 0;
|
||||
|
||||
/*-----------------------------------------------------------
|
||||
* Initialize some stuff
|
||||
*-----------------------------------------------------------*/
|
||||
@ -2903,6 +2905,11 @@ main( hypre_int argc,
|
||||
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
|
||||
}
|
||||
#endif
|
||||
else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 )
|
||||
{
|
||||
arg_index++;
|
||||
gpu_aware_mpi = atoi(argv[arg_index++]);
|
||||
}
|
||||
else
|
||||
{
|
||||
arg_index++;
|
||||
@ -2924,6 +2931,8 @@ main( hypre_int argc,
|
||||
HYPRE_SetSpGemmUseVendor(spgemm_use_vendor);
|
||||
#endif
|
||||
|
||||
HYPRE_SetGpuAwareMPI(gpu_aware_mpi);
|
||||
|
||||
if ( solver_id == 39 && lobpcgFlag )
|
||||
{
|
||||
solver_id = 10;
|
||||
|
||||
@ -188,6 +188,7 @@ main( hypre_int argc,
|
||||
HYPRE_MemoryLocation memory_location = HYPRE_MEMORY_DEVICE;
|
||||
HYPRE_ExecutionPolicy default_exec_policy = HYPRE_EXEC_DEVICE;
|
||||
#endif
|
||||
HYPRE_Int gpu_aware_mpi = 0;
|
||||
|
||||
//HYPRE_Int device_level = -2;
|
||||
|
||||
@ -566,6 +567,11 @@ main( hypre_int argc,
|
||||
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
|
||||
}
|
||||
#endif
|
||||
else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 )
|
||||
{
|
||||
arg_index++;
|
||||
gpu_aware_mpi = atoi(argv[arg_index++]);
|
||||
}
|
||||
/* end lobpcg */
|
||||
else
|
||||
{
|
||||
@ -584,6 +590,8 @@ main( hypre_int argc,
|
||||
/* default execution policy */
|
||||
HYPRE_SetExecutionPolicy(default_exec_policy);
|
||||
|
||||
HYPRE_SetGpuAwareMPI(gpu_aware_mpi);
|
||||
|
||||
/* begin lobpcg */
|
||||
|
||||
if ( solver_id == 0 && lobpcgFlag )
|
||||
|
||||
@ -49,3 +49,11 @@ HYPRE_SetUseGpuRand( HYPRE_Int use_gpu_rand )
|
||||
return hypre_SetUseGpuRand(use_gpu_rand);
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
* HYPRE_SetGPUAwareMPI
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi )
|
||||
{
|
||||
return hypre_SetGpuAwareMPI(use_gpu_aware_mpi);
|
||||
}
|
||||
|
||||
@ -343,6 +343,7 @@ HYPRE_Int HYPRE_SetSpMVUseVendor( HYPRE_Int use_vendor );
|
||||
#define HYPRE_SetSpGemmUseCusparse(use_vendor) HYPRE_SetSpGemmUseVendor(use_vendor)
|
||||
HYPRE_Int HYPRE_SetSpGemmUseVendor( HYPRE_Int use_vendor );
|
||||
HYPRE_Int HYPRE_SetUseGpuRand( HYPRE_Int use_curand );
|
||||
HYPRE_Int HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi );
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
* Base objects
|
||||
|
||||
@ -1779,6 +1779,11 @@ typedef struct
|
||||
HYPRE_Int struct_comm_recv_buffer_size;
|
||||
HYPRE_Int struct_comm_send_buffer_size;
|
||||
|
||||
/* GPU MPI */
|
||||
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||
HYPRE_Int use_gpu_aware_mpi;
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_GPU)
|
||||
hypre_DeviceData *device_data;
|
||||
HYPRE_Int device_gs_method; /* device G-S options */
|
||||
@ -1821,6 +1826,7 @@ typedef struct
|
||||
|
||||
#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data)
|
||||
#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method)
|
||||
#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi)
|
||||
|
||||
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
|
||||
@ -2373,6 +2379,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand );
|
||||
HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method );
|
||||
HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func);
|
||||
HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func);
|
||||
HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi );
|
||||
HYPRE_Int hypre_GetGpuAwareMPI(void);
|
||||
|
||||
/* int_array.c */
|
||||
hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size );
|
||||
|
||||
@ -44,6 +44,14 @@ hypre_HandleCreate(void)
|
||||
hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
||||
hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1;
|
||||
#else
|
||||
hypre_HandleUseGpuAwareMPI(hypre_handle_) = 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return hypre_handle_;
|
||||
}
|
||||
|
||||
|
||||
@ -182,3 +182,24 @@ hypre_SetUserDeviceMfree(GPUMfreeFunc func)
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
HYPRE_Int
|
||||
hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi )
|
||||
{
|
||||
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||
hypre_HandleUseGpuAwareMPI(hypre_handle()) = use_gpu_aware_mpi;
|
||||
#else
|
||||
HYPRE_UNUSED_VAR(use_gpu_aware_mpi);
|
||||
#endif
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
HYPRE_Int
|
||||
hypre_GetGpuAwareMPI(void)
|
||||
{
|
||||
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||
return hypre_HandleUseGpuAwareMPI(hypre_handle());
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -29,6 +29,11 @@ typedef struct
|
||||
HYPRE_Int struct_comm_recv_buffer_size;
|
||||
HYPRE_Int struct_comm_send_buffer_size;
|
||||
|
||||
/* GPU MPI */
|
||||
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||
HYPRE_Int use_gpu_aware_mpi;
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_GPU)
|
||||
hypre_DeviceData *device_data;
|
||||
HYPRE_Int device_gs_method; /* device G-S options */
|
||||
@ -71,6 +76,7 @@ typedef struct
|
||||
|
||||
#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data)
|
||||
#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method)
|
||||
#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi)
|
||||
|
||||
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
|
||||
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
|
||||
|
||||
@ -389,6 +389,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand );
|
||||
HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method );
|
||||
HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func);
|
||||
HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func);
|
||||
HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi );
|
||||
HYPRE_Int hypre_GetGpuAwareMPI(void);
|
||||
|
||||
/* int_array.c */
|
||||
hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size );
|
||||
|
||||
Loading…
Reference in New Issue
Block a user