Make GPU-aware MPI a runtime option (#1031)
This PR makes GPU-aware MPI a runtime option.
This commit is contained in:
parent
0dfe0fee56
commit
33524991d0
@ -501,9 +501,12 @@ HYPRE_Int hypre_AMESetup(void *esolver)
|
|||||||
int_buf_data );
|
int_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@ -595,9 +595,12 @@ HYPRE_Int hypre_ParCSRComputeL1Norms(hypre_ParCSRMatrix *A,
|
|||||||
int_buf_data );
|
int_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@ -105,9 +105,12 @@ hypre_BoomerAMGBuildModPartialExtInterpDevice( hypre_ParCSRMatrix *A,
|
|||||||
send_buf );
|
send_buf );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||||
@ -423,9 +426,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A,
|
|||||||
send_buf );
|
send_buf );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||||
@ -445,9 +451,12 @@ hypre_BoomerAMGBuildModPartialExtPEInterpDevice( hypre_ParCSRMatrix *A,
|
|||||||
send_buf );
|
send_buf );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||||
|
|||||||
@ -169,9 +169,12 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S,
|
|||||||
(HYPRE_Int *) send_buf );
|
(HYPRE_Int *) send_buf );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg,
|
||||||
@ -254,10 +257,11 @@ hypre_GetGlobalMeasureDevice( hypre_ParCSRMatrix *S,
|
|||||||
/* compute local column nnz of the offd part */
|
/* compute local column nnz of the offd part */
|
||||||
hypre_CSRMatrixColNNzRealDevice(S_offd, measure_offd);
|
hypre_CSRMatrixColNNzRealDevice(S_offd, measure_offd);
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
if (hypre_GetGpuAwareMPI())
|
||||||
/* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */
|
{
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
/* RL: make sure measure_offd is ready before issuing GPU-GPU MPI */
|
||||||
#endif
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
|
|
||||||
/* send local column nnz of the offd part to neighbors */
|
/* send local column nnz of the offd part to neighbors */
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, HYPRE_MEMORY_DEVICE, measure_offd,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(2, comm_pkg, HYPRE_MEMORY_DEVICE, measure_offd,
|
||||||
@ -381,9 +385,12 @@ hypre_PMISCoarseningInitDevice( hypre_ParCSRMatrix *S, /* in */
|
|||||||
real_send_buf);
|
real_send_buf);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg,
|
||||||
@ -576,9 +583,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in
|
|||||||
real_send_buf);
|
real_send_buf);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure real_send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg,
|
||||||
@ -597,9 +607,12 @@ hypre_PMISCoarseningUpdateCFDevice( hypre_ParCSRMatrix *S, /* in
|
|||||||
CF_marker_diag,
|
CF_marker_diag,
|
||||||
int_send_buf);
|
int_send_buf);
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg,
|
||||||
|
|||||||
@ -179,10 +179,11 @@ hypre_BoomerAMGIndepSetDevice( hypre_ParCSRMatrix *S,
|
|||||||
/*--------------------------------------------------------------------
|
/*--------------------------------------------------------------------
|
||||||
* Exchange boundary data for IS_marker: send external IS to internal
|
* Exchange boundary data for IS_marker: send external IS to internal
|
||||||
*-------------------------------------------------------------------*/
|
*-------------------------------------------------------------------*/
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
|
||||||
/* RL: make sure IS_marker_offd is ready before issuing GPU-GPU MPI */
|
/* RL: make sure IS_marker_offd is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
#endif
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(12, comm_pkg,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(12, comm_pkg,
|
||||||
HYPRE_MEMORY_DEVICE, IS_marker_offd,
|
HYPRE_MEMORY_DEVICE, IS_marker_offd,
|
||||||
|
|||||||
@ -162,9 +162,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A,
|
|||||||
int_buf_data );
|
int_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
|
||||||
@ -194,9 +197,12 @@ hypre_BoomerAMGBuildDirInterpDevice( hypre_ParCSRMatrix *A,
|
|||||||
int_buf_data );
|
int_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
|
||||||
@ -1149,9 +1155,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A,
|
|||||||
int_buf_data );
|
int_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* create a handle to start communication. 11: for integer */
|
/* create a handle to start communication. 11: for integer */
|
||||||
@ -1216,9 +1225,12 @@ hypre_BoomerAMGBuildInterpOnePntDevice( hypre_ParCSRMatrix *A,
|
|||||||
thrust::plus<HYPRE_BigInt>() );
|
thrust::plus<HYPRE_BigInt>() );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure big_int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure big_int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_int_buf_data,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_int_buf_data,
|
||||||
|
|||||||
@ -1101,9 +1101,12 @@ hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A,
|
|||||||
send_buf );
|
send_buf );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||||
@ -1398,9 +1401,12 @@ hypre_BoomerAMGBuildExtPEInterpDevice(hypre_ParCSRMatrix *A,
|
|||||||
send_buf );
|
send_buf );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(1, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||||
|
|||||||
@ -283,9 +283,12 @@ hypre_BoomerAMGBuildRestrNeumannAIRDevice( hypre_ParCSRMatrix *A,
|
|||||||
thrust::plus<HYPRE_BigInt>() );
|
thrust::plus<HYPRE_BigInt>() );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure send_buf_i is ready before issuing GPU-GPU MPI */
|
/* RL: make sure send_buf_i is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg_Z, HYPRE_MEMORY_DEVICE, send_buf_i,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg_Z, HYPRE_MEMORY_DEVICE, send_buf_i,
|
||||||
|
|||||||
@ -383,9 +383,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A,
|
|||||||
int_buf_data );
|
int_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
dof_func_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_DEVICE);
|
dof_func_offd = hypre_TAlloc(HYPRE_Int, num_cols_offd_A, HYPRE_MEMORY_DEVICE);
|
||||||
@ -418,9 +421,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A,
|
|||||||
int_buf_data );
|
int_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* allocate one more see comments in hypre_modmp_compute_num_cols_offd_fine_to_coarse */
|
/* allocate one more see comments in hypre_modmp_compute_num_cols_offd_fine_to_coarse */
|
||||||
@ -572,9 +578,12 @@ hypre_BoomerAMGBuildModMultipassDevice( hypre_ParCSRMatrix *A,
|
|||||||
int_buf_data );
|
int_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* create a handle to start communication. 11: for integer */
|
/* create a handle to start communication. 11: for integer */
|
||||||
@ -1096,9 +1105,12 @@ hypre_GenerateMultipassPiDevice( hypre_ParCSRMatrix *A,
|
|||||||
big_buf_data );
|
big_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data,
|
||||||
@ -1392,9 +1404,12 @@ hypre_GenerateMultiPiDevice( hypre_ParCSRMatrix *A,
|
|||||||
big_buf_data );
|
big_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure big_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, big_buf_data,
|
||||||
|
|||||||
@ -138,9 +138,12 @@ hypre_BoomerAMGCreateSDevice(hypre_ParCSRMatrix *A,
|
|||||||
int_buf_data );
|
int_buf_data );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure int_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data,
|
||||||
|
|||||||
@ -411,9 +411,12 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A,
|
|||||||
send_buf );
|
send_buf );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||||
@ -1593,9 +1596,12 @@ hypre_ParCSRMatrixGenerate1DCFDevice( hypre_ParCSRMatrix *A,
|
|||||||
send_buf );
|
send_buf );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
/* RL: make sure send_buf is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
comm_handle = hypre_ParCSRCommHandleCreate_v2(21, comm_pkg, HYPRE_MEMORY_DEVICE, send_buf,
|
||||||
|
|||||||
@ -837,10 +837,11 @@ hypre_ParcsrGetExternalRowsDeviceInit( hypre_ParCSRMatrix *A,
|
|||||||
NULL,
|
NULL,
|
||||||
&comm_pkg_j);
|
&comm_pkg_j);
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
|
||||||
/* RL: make sure d_send_j/d_send_a is ready before issuing GPU-GPU MPI */
|
/* RL: make sure d_send_j/d_send_a is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
#endif
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
|
|
||||||
/* init communication */
|
/* init communication */
|
||||||
/* ja */
|
/* ja */
|
||||||
@ -1491,9 +1492,12 @@ hypre_ParCSRMatrixTransposeDevice( hypre_ParCSRMatrix *A,
|
|||||||
thrust::plus<HYPRE_BigInt>() );
|
thrust::plus<HYPRE_BigInt>() );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure A_offdT is ready before issuing GPU-GPU MPI */
|
/* RL: make sure A_offdT is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!hypre_ParCSRMatrixCommPkg(A))
|
if (!hypre_ParCSRMatrixCommPkg(A))
|
||||||
@ -1927,9 +1931,12 @@ hypre_ParCSRMatrixDiagScaleDevice( hypre_ParCSRMatrix *par_A,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* make sure send_rdbuf_data is ready before issuing GPU-GPU MPI */
|
/* make sure send_rdbuf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* A_diag = diag(ld) * A_diag * diag(rd) */
|
/* A_diag = diag(ld) * A_diag * diag(rd) */
|
||||||
|
|||||||
@ -205,9 +205,12 @@ hypre_ParCSRMatrixMatvecOutOfPlaceDevice( HYPRE_Complex alpha,
|
|||||||
hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] += hypre_MPI_Wtime();
|
hypre_profile_times[HYPRE_TIMER_ID_PACK_UNPACK] += hypre_MPI_Wtime();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure x_buf_data is ready before issuing GPU-GPU MPI */
|
/* RL: make sure x_buf_data is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* when using GPUs, start local matvec first in order to overlap with communication */
|
/* when using GPUs, start local matvec first in order to overlap with communication */
|
||||||
@ -426,10 +429,11 @@ hypre_ParCSRMatrixMatvecTDevice( HYPRE_Complex alpha,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
|
||||||
/* RL: make sure y_tmp is ready before issuing GPU-GPU MPI */
|
/* RL: make sure y_tmp is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
#endif
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
|
|
||||||
/* when using GPUs, start local matvec first in order to overlap with communication */
|
/* when using GPUs, start local matvec first in order to overlap with communication */
|
||||||
if (diagT)
|
if (diagT)
|
||||||
|
|||||||
@ -492,9 +492,12 @@ hypre_ParCSRTMatMatKTDevice( hypre_ParCSRMatrix *A,
|
|||||||
func1 );
|
func1 );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure Cint is ready before issuing GPU-GPU MPI */
|
/* RL: make sure Cint is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar;
|
hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar;
|
||||||
@ -767,9 +770,12 @@ hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R,
|
|||||||
func1 );
|
func1 );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI) && defined(HYPRE_USING_THRUST_NOSYNC)
|
#if defined(HYPRE_USING_THRUST_NOSYNC)
|
||||||
/* RL: make sure Cint is ready before issuing GPU-GPU MPI */
|
/* RL: make sure Cint is ready before issuing GPU-GPU MPI */
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
if (hypre_GetGpuAwareMPI())
|
||||||
|
{
|
||||||
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar;
|
hypre_CSRMatrixData(Cint) = hypre_CSRMatrixData(Cbar) + local_nnz_Cbar;
|
||||||
|
|||||||
@ -963,38 +963,42 @@ hypre_InitializeCommunication( hypre_CommPkg *comm_pkg,
|
|||||||
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||||
if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST)
|
if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST)
|
||||||
{
|
{
|
||||||
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
if (hypre_GetGpuAwareMPI())
|
||||||
hypre_ForceSyncComputeStream(hypre_handle());
|
|
||||||
|
|
||||||
send_buffers_mpi = send_buffers;
|
|
||||||
recv_buffers_mpi = recv_buffers;
|
|
||||||
#else
|
|
||||||
memory_location_mpi = HYPRE_MEMORY_HOST;
|
|
||||||
|
|
||||||
send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST);
|
|
||||||
if (num_sends > 0)
|
|
||||||
{
|
{
|
||||||
size = hypre_CommPkgSendBufsize(comm_pkg);
|
#if defined(HYPRE_USING_GPU)
|
||||||
send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi);
|
hypre_ForceSyncComputeStream(hypre_handle());
|
||||||
for (i = 1; i < num_sends; i++)
|
|
||||||
{
|
|
||||||
send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]);
|
|
||||||
}
|
|
||||||
hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST,
|
|
||||||
memory_location);
|
|
||||||
}
|
|
||||||
|
|
||||||
recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST);
|
|
||||||
if (num_recvs > 0)
|
|
||||||
{
|
|
||||||
size = hypre_CommPkgRecvBufsize(comm_pkg);
|
|
||||||
recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi);
|
|
||||||
for (i = 1; i < num_recvs; i++)
|
|
||||||
{
|
|
||||||
recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
send_buffers_mpi = send_buffers;
|
||||||
|
recv_buffers_mpi = recv_buffers;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
memory_location_mpi = HYPRE_MEMORY_HOST;
|
||||||
|
|
||||||
|
send_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_sends, HYPRE_MEMORY_HOST);
|
||||||
|
if (num_sends > 0)
|
||||||
|
{
|
||||||
|
size = hypre_CommPkgSendBufsize(comm_pkg);
|
||||||
|
send_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi);
|
||||||
|
for (i = 1; i < num_sends; i++)
|
||||||
|
{
|
||||||
|
send_buffers_mpi[i] = send_buffers_mpi[i - 1] + (send_buffers[i] - send_buffers[i - 1]);
|
||||||
|
}
|
||||||
|
hypre_TMemcpy(send_buffers_mpi[0], send_buffers[0], HYPRE_Complex, size, HYPRE_MEMORY_HOST,
|
||||||
|
memory_location);
|
||||||
|
}
|
||||||
|
|
||||||
|
recv_buffers_mpi = hypre_TAlloc(HYPRE_Complex *, num_recvs, HYPRE_MEMORY_HOST);
|
||||||
|
if (num_recvs > 0)
|
||||||
|
{
|
||||||
|
size = hypre_CommPkgRecvBufsize(comm_pkg);
|
||||||
|
recv_buffers_mpi[0] = hypre_CTAlloc(HYPRE_Complex, size, memory_location_mpi);
|
||||||
|
for (i = 1; i < num_recvs; i++)
|
||||||
|
{
|
||||||
|
recv_buffers_mpi[i] = recv_buffers_mpi[i - 1] + (recv_buffers[i] - recv_buffers[i - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
@ -1152,9 +1156,10 @@ hypre_FinalizeCommunication( hypre_CommHandle *comm_handle )
|
|||||||
HYPRE_MemoryLocation memory_location_mpi = memory_location;
|
HYPRE_MemoryLocation memory_location_mpi = memory_location;
|
||||||
|
|
||||||
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||||
#if !defined(HYPRE_WITH_GPU_AWARE_MPI)
|
if (!hypre_GetGpuAwareMPI())
|
||||||
memory_location_mpi = HYPRE_MEMORY_HOST;
|
{
|
||||||
#endif
|
memory_location_mpi = HYPRE_MEMORY_HOST;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*--------------------------------------------------------------------
|
/*--------------------------------------------------------------------
|
||||||
|
|||||||
@ -499,6 +499,8 @@ main( hypre_int argc,
|
|||||||
char mem_tracker_name[HYPRE_MAX_FILE_NAME_LEN] = {0};
|
char mem_tracker_name[HYPRE_MAX_FILE_NAME_LEN] = {0};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
HYPRE_Int gpu_aware_mpi = 0;
|
||||||
|
|
||||||
/* Initialize MPI */
|
/* Initialize MPI */
|
||||||
hypre_MPI_Init(&argc, &argv);
|
hypre_MPI_Init(&argc, &argv);
|
||||||
|
|
||||||
@ -1597,6 +1599,11 @@ main( hypre_int argc,
|
|||||||
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
|
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 )
|
||||||
|
{
|
||||||
|
arg_index++;
|
||||||
|
gpu_aware_mpi = atoi(argv[arg_index++]);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
arg_index++;
|
arg_index++;
|
||||||
@ -2716,6 +2723,8 @@ main( hypre_int argc,
|
|||||||
HYPRE_SetUseGpuRand(use_curand);
|
HYPRE_SetUseGpuRand(use_curand);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
HYPRE_SetGpuAwareMPI(gpu_aware_mpi);
|
||||||
|
|
||||||
/*-----------------------------------------------------------
|
/*-----------------------------------------------------------
|
||||||
* Set up matrix
|
* Set up matrix
|
||||||
*-----------------------------------------------------------*/
|
*-----------------------------------------------------------*/
|
||||||
|
|||||||
@ -2460,6 +2460,8 @@ main( hypre_int argc,
|
|||||||
|
|
||||||
global_data.memory_location = memory_location;
|
global_data.memory_location = memory_location;
|
||||||
|
|
||||||
|
HYPRE_Int gpu_aware_mpi = 0;
|
||||||
|
|
||||||
/*-----------------------------------------------------------
|
/*-----------------------------------------------------------
|
||||||
* Initialize some stuff
|
* Initialize some stuff
|
||||||
*-----------------------------------------------------------*/
|
*-----------------------------------------------------------*/
|
||||||
@ -2903,6 +2905,11 @@ main( hypre_int argc,
|
|||||||
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
|
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 )
|
||||||
|
{
|
||||||
|
arg_index++;
|
||||||
|
gpu_aware_mpi = atoi(argv[arg_index++]);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
arg_index++;
|
arg_index++;
|
||||||
@ -2924,6 +2931,8 @@ main( hypre_int argc,
|
|||||||
HYPRE_SetSpGemmUseVendor(spgemm_use_vendor);
|
HYPRE_SetSpGemmUseVendor(spgemm_use_vendor);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
HYPRE_SetGpuAwareMPI(gpu_aware_mpi);
|
||||||
|
|
||||||
if ( solver_id == 39 && lobpcgFlag )
|
if ( solver_id == 39 && lobpcgFlag )
|
||||||
{
|
{
|
||||||
solver_id = 10;
|
solver_id = 10;
|
||||||
|
|||||||
@ -188,6 +188,7 @@ main( hypre_int argc,
|
|||||||
HYPRE_MemoryLocation memory_location = HYPRE_MEMORY_DEVICE;
|
HYPRE_MemoryLocation memory_location = HYPRE_MEMORY_DEVICE;
|
||||||
HYPRE_ExecutionPolicy default_exec_policy = HYPRE_EXEC_DEVICE;
|
HYPRE_ExecutionPolicy default_exec_policy = HYPRE_EXEC_DEVICE;
|
||||||
#endif
|
#endif
|
||||||
|
HYPRE_Int gpu_aware_mpi = 0;
|
||||||
|
|
||||||
//HYPRE_Int device_level = -2;
|
//HYPRE_Int device_level = -2;
|
||||||
|
|
||||||
@ -566,6 +567,11 @@ main( hypre_int argc,
|
|||||||
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
|
snprintf(mem_tracker_name, HYPRE_MAX_FILE_NAME_LEN, "%s", argv[arg_index++]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
else if ( strcmp(argv[arg_index], "-gpu_mpi") == 0 )
|
||||||
|
{
|
||||||
|
arg_index++;
|
||||||
|
gpu_aware_mpi = atoi(argv[arg_index++]);
|
||||||
|
}
|
||||||
/* end lobpcg */
|
/* end lobpcg */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -584,6 +590,8 @@ main( hypre_int argc,
|
|||||||
/* default execution policy */
|
/* default execution policy */
|
||||||
HYPRE_SetExecutionPolicy(default_exec_policy);
|
HYPRE_SetExecutionPolicy(default_exec_policy);
|
||||||
|
|
||||||
|
HYPRE_SetGpuAwareMPI(gpu_aware_mpi);
|
||||||
|
|
||||||
/* begin lobpcg */
|
/* begin lobpcg */
|
||||||
|
|
||||||
if ( solver_id == 0 && lobpcgFlag )
|
if ( solver_id == 0 && lobpcgFlag )
|
||||||
|
|||||||
@ -49,3 +49,11 @@ HYPRE_SetUseGpuRand( HYPRE_Int use_gpu_rand )
|
|||||||
return hypre_SetUseGpuRand(use_gpu_rand);
|
return hypre_SetUseGpuRand(use_gpu_rand);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------
|
||||||
|
* HYPRE_SetGPUAwareMPI
|
||||||
|
*--------------------------------------------------------------------------*/
|
||||||
|
HYPRE_Int
|
||||||
|
HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi )
|
||||||
|
{
|
||||||
|
return hypre_SetGpuAwareMPI(use_gpu_aware_mpi);
|
||||||
|
}
|
||||||
|
|||||||
@ -343,6 +343,7 @@ HYPRE_Int HYPRE_SetSpMVUseVendor( HYPRE_Int use_vendor );
|
|||||||
#define HYPRE_SetSpGemmUseCusparse(use_vendor) HYPRE_SetSpGemmUseVendor(use_vendor)
|
#define HYPRE_SetSpGemmUseCusparse(use_vendor) HYPRE_SetSpGemmUseVendor(use_vendor)
|
||||||
HYPRE_Int HYPRE_SetSpGemmUseVendor( HYPRE_Int use_vendor );
|
HYPRE_Int HYPRE_SetSpGemmUseVendor( HYPRE_Int use_vendor );
|
||||||
HYPRE_Int HYPRE_SetUseGpuRand( HYPRE_Int use_curand );
|
HYPRE_Int HYPRE_SetUseGpuRand( HYPRE_Int use_curand );
|
||||||
|
HYPRE_Int HYPRE_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi );
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------
|
/*--------------------------------------------------------------------------
|
||||||
* Base objects
|
* Base objects
|
||||||
|
|||||||
@ -1779,6 +1779,11 @@ typedef struct
|
|||||||
HYPRE_Int struct_comm_recv_buffer_size;
|
HYPRE_Int struct_comm_recv_buffer_size;
|
||||||
HYPRE_Int struct_comm_send_buffer_size;
|
HYPRE_Int struct_comm_send_buffer_size;
|
||||||
|
|
||||||
|
/* GPU MPI */
|
||||||
|
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||||
|
HYPRE_Int use_gpu_aware_mpi;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_USING_GPU)
|
#if defined(HYPRE_USING_GPU)
|
||||||
hypre_DeviceData *device_data;
|
hypre_DeviceData *device_data;
|
||||||
HYPRE_Int device_gs_method; /* device G-S options */
|
HYPRE_Int device_gs_method; /* device G-S options */
|
||||||
@ -1821,6 +1826,7 @@ typedef struct
|
|||||||
|
|
||||||
#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data)
|
#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data)
|
||||||
#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method)
|
#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method)
|
||||||
|
#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi)
|
||||||
|
|
||||||
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
|
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
|
||||||
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
|
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
|
||||||
@ -2373,6 +2379,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand );
|
|||||||
HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method );
|
HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method );
|
||||||
HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func);
|
HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func);
|
||||||
HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func);
|
HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func);
|
||||||
|
HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi );
|
||||||
|
HYPRE_Int hypre_GetGpuAwareMPI(void);
|
||||||
|
|
||||||
/* int_array.c */
|
/* int_array.c */
|
||||||
hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size );
|
hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size );
|
||||||
|
|||||||
@ -44,6 +44,14 @@ hypre_HandleCreate(void)
|
|||||||
hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */
|
hypre_HandleDeviceGSMethod(hypre_handle_) = 1; /* CPU: 0; Cusparse: 1 */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||||
|
#if defined(HYPRE_WITH_GPU_AWARE_MPI)
|
||||||
|
hypre_HandleUseGpuAwareMPI(hypre_handle_) = 1;
|
||||||
|
#else
|
||||||
|
hypre_HandleUseGpuAwareMPI(hypre_handle_) = 0;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
return hypre_handle_;
|
return hypre_handle_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -182,3 +182,24 @@ hypre_SetUserDeviceMfree(GPUMfreeFunc func)
|
|||||||
|
|
||||||
return hypre_error_flag;
|
return hypre_error_flag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HYPRE_Int
|
||||||
|
hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi )
|
||||||
|
{
|
||||||
|
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||||
|
hypre_HandleUseGpuAwareMPI(hypre_handle()) = use_gpu_aware_mpi;
|
||||||
|
#else
|
||||||
|
HYPRE_UNUSED_VAR(use_gpu_aware_mpi);
|
||||||
|
#endif
|
||||||
|
return hypre_error_flag;
|
||||||
|
}
|
||||||
|
|
||||||
|
HYPRE_Int
|
||||||
|
hypre_GetGpuAwareMPI(void)
|
||||||
|
{
|
||||||
|
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||||
|
return hypre_HandleUseGpuAwareMPI(hypre_handle());
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|||||||
@ -29,6 +29,11 @@ typedef struct
|
|||||||
HYPRE_Int struct_comm_recv_buffer_size;
|
HYPRE_Int struct_comm_recv_buffer_size;
|
||||||
HYPRE_Int struct_comm_send_buffer_size;
|
HYPRE_Int struct_comm_send_buffer_size;
|
||||||
|
|
||||||
|
/* GPU MPI */
|
||||||
|
#if defined(HYPRE_USING_GPU) || defined(HYPRE_USING_DEVICE_OPENMP)
|
||||||
|
HYPRE_Int use_gpu_aware_mpi;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(HYPRE_USING_GPU)
|
#if defined(HYPRE_USING_GPU)
|
||||||
hypre_DeviceData *device_data;
|
hypre_DeviceData *device_data;
|
||||||
HYPRE_Int device_gs_method; /* device G-S options */
|
HYPRE_Int device_gs_method; /* device G-S options */
|
||||||
@ -71,6 +76,7 @@ typedef struct
|
|||||||
|
|
||||||
#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data)
|
#define hypre_HandleDeviceData(hypre_handle) ((hypre_handle) -> device_data)
|
||||||
#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method)
|
#define hypre_HandleDeviceGSMethod(hypre_handle) ((hypre_handle) -> device_gs_method)
|
||||||
|
#define hypre_HandleUseGpuAwareMPI(hypre_handle) ((hypre_handle) -> use_gpu_aware_mpi)
|
||||||
|
|
||||||
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
|
#define hypre_HandleCurandGenerator(hypre_handle) hypre_DeviceDataCurandGenerator(hypre_HandleDeviceData(hypre_handle))
|
||||||
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
|
#define hypre_HandleCublasHandle(hypre_handle) hypre_DeviceDataCublasHandle(hypre_HandleDeviceData(hypre_handle))
|
||||||
|
|||||||
@ -389,6 +389,8 @@ HYPRE_Int hypre_SetUseGpuRand( HYPRE_Int use_gpurand );
|
|||||||
HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method );
|
HYPRE_Int hypre_SetGaussSeidelMethod( HYPRE_Int gs_method );
|
||||||
HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func);
|
HYPRE_Int hypre_SetUserDeviceMalloc(GPUMallocFunc func);
|
||||||
HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func);
|
HYPRE_Int hypre_SetUserDeviceMfree(GPUMfreeFunc func);
|
||||||
|
HYPRE_Int hypre_SetGpuAwareMPI( HYPRE_Int use_gpu_aware_mpi );
|
||||||
|
HYPRE_Int hypre_GetGpuAwareMPI(void);
|
||||||
|
|
||||||
/* int_array.c */
|
/* int_array.c */
|
||||||
hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size );
|
hypre_IntArray* hypre_IntArrayCreate( HYPRE_Int size );
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user