commit
9791056f00
@ -38,9 +38,10 @@ shift
|
||||
# Basic build and run tests
|
||||
mo="-j test"
|
||||
eo=""
|
||||
roij="-ij -rt -mpibind -rtol 1e-3 -atol 1e-3"
|
||||
roij="-ij -ams -rt -mpibind -rtol 1e-3 -atol 1e-3"
|
||||
ross="-struct -sstruct -rt -mpibind -rtol 1e-6 -atol 1e-6"
|
||||
rost="-struct -rt -mpibind -rtol 1e-8 -atol 1e-8"
|
||||
rocuda="-cuda -rt -mpibind -rtol 1e-5 -atol 3e-5"
|
||||
|
||||
# CUDA with UM
|
||||
co="--with-cuda --enable-unified-memory --enable-persistent --enable-cub --enable-debug --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' HYPRE_CUDA_SM=70"
|
||||
@ -49,9 +50,9 @@ co="--with-cuda --enable-unified-memory --enable-persistent --enable-cub --enabl
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ross
|
||||
./renametest.sh basic $output_dir/basic-cuda-um-struct-sstruct
|
||||
|
||||
# CUDA with UM [shared library, no run]
|
||||
# CUDA with UM [shared library]
|
||||
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' HYPRE_CUDA_SM=70"
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo
|
||||
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $rocuda
|
||||
./renametest.sh basic $output_dir/basic-cuda-um-shared
|
||||
#./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $roij
|
||||
#./renametest.sh basic $output_dir/basic-cuda-um-shared-ij
|
||||
|
||||
@ -1567,6 +1567,10 @@ HYPRE_Int hypre_BoomerAMGBuildFFInterp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_m
|
||||
HYPRE_Int hypre_BoomerAMGBuildFF1Interp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_marker , hypre_ParCSRMatrix *S , HYPRE_BigInt *num_cpts_global , HYPRE_Int num_functions , HYPRE_Int *dof_func , HYPRE_Int debug_flag , HYPRE_Real trunc_factor , HYPRE_Int max_elmts , HYPRE_Int *col_offd_S_to_A , hypre_ParCSRMatrix **P_ptr );
|
||||
HYPRE_Int hypre_BoomerAMGBuildExtInterp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_marker , hypre_ParCSRMatrix *S , HYPRE_BigInt *num_cpts_global , HYPRE_Int num_functions , HYPRE_Int *dof_func , HYPRE_Int debug_flag , HYPRE_Real trunc_factor , HYPRE_Int max_elmts , HYPRE_Int *col_offd_S_to_A , hypre_ParCSRMatrix **P_ptr );
|
||||
|
||||
/* par_lr_interp_device.c */
|
||||
HYPRE_Int hypre_BoomerAMGBuildExtInterpDevice(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
|
||||
HYPRE_Int hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr);
|
||||
|
||||
/* par_mod_lr_interp.c */
|
||||
HYPRE_Int hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
|
||||
HYPRE_Int hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
|
||||
|
||||
@ -163,6 +163,9 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S,
|
||||
/*---------------------------------------------------
|
||||
* Clean up and return
|
||||
*---------------------------------------------------*/
|
||||
if( *CF_marker_ptr == NULL )
|
||||
*CF_marker_ptr = hypre_CTAlloc(HYPRE_Int, num_cols_diag, HYPRE_MEMORY_HOST);
|
||||
|
||||
hypre_TMemcpy( *CF_marker_ptr, CF_marker_diag, HYPRE_Int, num_cols_diag, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE );
|
||||
hypre_TFree(CF_marker_diag, HYPRE_MEMORY_DEVICE);
|
||||
|
||||
|
||||
@ -2688,7 +2688,7 @@ hypre_BoomerAMGBuildDirInterp( hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int ierr = 0;
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixDiag(A)) );
|
||||
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
|
||||
|
||||
if (exec == HYPRE_EXEC_DEVICE)
|
||||
{
|
||||
|
||||
@ -1038,29 +1038,26 @@ hypre_BoomerAMGBuildStdInterp(hypre_ParCSRMatrix *A,
|
||||
* Comment:
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildExtPIInterp(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions,
|
||||
HYPRE_Int *dof_func,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
hypre_BoomerAMGBuildExtPIInterpHost(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions,
|
||||
HYPRE_Int *dof_func,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
#ifdef HYPRE_PROFILE
|
||||
hypre_profile_times[HYPRE_TIMER_ID_EXTENDED_I_INTERP] -= hypre_MPI_Wtime();
|
||||
#endif
|
||||
|
||||
/* Communication Variables */
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
||||
|
||||
|
||||
HYPRE_Int my_id, num_procs;
|
||||
HYPRE_Int my_id, num_procs;
|
||||
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
|
||||
@ -1932,19 +1929,24 @@ hypre_BoomerAMGBuildExtPIInterp(hypre_ParCSRMatrix *A,
|
||||
* Comment: Only use FF when there is no common c point.
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions,
|
||||
HYPRE_Int *dof_func,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
/* Communication Variables */
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
||||
HYPRE_Int my_id, num_procs;
|
||||
|
||||
|
||||
HYPRE_Int my_id, num_procs;
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
|
||||
/* Variables to store input variables */
|
||||
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
|
||||
@ -2088,8 +2090,8 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
/*-----------------------------------------------------------------------
|
||||
* Intialize counters and allocate mapping vector.
|
||||
*-----------------------------------------------------------------------*/
|
||||
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_DEVICE);
|
||||
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_DEVICE);
|
||||
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
|
||||
if (n_fine)
|
||||
{
|
||||
@ -2369,14 +2371,14 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
|
||||
if (P_diag_size)
|
||||
{
|
||||
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_DEVICE);
|
||||
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_DEVICE);
|
||||
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, memory_location_P);
|
||||
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, memory_location_P);
|
||||
}
|
||||
|
||||
if (P_offd_size)
|
||||
{
|
||||
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_DEVICE);
|
||||
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_DEVICE);
|
||||
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, memory_location_P);
|
||||
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, memory_location_P);
|
||||
}
|
||||
|
||||
P_diag_i[n_fine] = jj_counter;
|
||||
@ -2826,6 +2828,9 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
hypre_CSRMatrixJ(P_offd) = P_offd_j;
|
||||
hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
|
||||
|
||||
hypre_CSRMatrixMemoryLocation(P_diag) = memory_location_P;
|
||||
hypre_CSRMatrixMemoryLocation(P_offd) = memory_location_P;
|
||||
|
||||
/* Compress P, removing coefficients smaller than trunc_factor * Max */
|
||||
if (trunc_factor != 0.0 || max_elmts > 0)
|
||||
{
|
||||
@ -2882,19 +2887,24 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
* Comment: Only use FF when there is no common c point.
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions,
|
||||
HYPRE_Int *dof_func,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
/* Communication Variables */
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
||||
HYPRE_Int my_id, num_procs;
|
||||
|
||||
|
||||
HYPRE_Int my_id, num_procs;
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
|
||||
/* Variables to store input variables */
|
||||
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
|
||||
@ -3035,8 +3045,8 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
/*-----------------------------------------------------------------------
|
||||
* Intialize counters and allocate mapping vector.
|
||||
*-----------------------------------------------------------------------*/
|
||||
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
|
||||
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
|
||||
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
|
||||
if (n_fine)
|
||||
{
|
||||
@ -3281,14 +3291,14 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
|
||||
if (P_diag_size)
|
||||
{
|
||||
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
|
||||
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_HOST);
|
||||
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, memory_location_P);
|
||||
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, memory_location_P);
|
||||
}
|
||||
|
||||
if (P_offd_size)
|
||||
{
|
||||
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
|
||||
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_HOST);
|
||||
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, memory_location_P);
|
||||
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, memory_location_P);
|
||||
}
|
||||
|
||||
P_diag_i[n_fine] = jj_counter;
|
||||
@ -3710,6 +3720,9 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
hypre_CSRMatrixJ(P_offd) = P_offd_j;
|
||||
hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
|
||||
|
||||
hypre_CSRMatrixMemoryLocation(P_diag) = memory_location_P;
|
||||
hypre_CSRMatrixMemoryLocation(P_offd) = memory_location_P;
|
||||
|
||||
/* Compress P, removing coefficients smaller than trunc_factor * Max */
|
||||
if (trunc_factor != 0.0 || max_elmts > 0)
|
||||
{
|
||||
@ -3765,18 +3778,24 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
* Comment: Only use FF when there is no common c point.
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions,
|
||||
HYPRE_Int *dof_func,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
/* Communication Variables */
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
||||
HYPRE_Int my_id, num_procs;
|
||||
|
||||
HYPRE_Int my_id, num_procs;
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
|
||||
/* Variables to store input variables */
|
||||
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
|
||||
@ -3916,8 +3935,8 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
/*-----------------------------------------------------------------------
|
||||
* Intialize counters and allocate mapping vector.
|
||||
*-----------------------------------------------------------------------*/
|
||||
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
|
||||
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
|
||||
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
|
||||
if (n_fine)
|
||||
{
|
||||
@ -4168,14 +4187,14 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
|
||||
if (P_diag_size)
|
||||
{
|
||||
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
|
||||
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_HOST);
|
||||
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, memory_location_P);
|
||||
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, memory_location_P);
|
||||
}
|
||||
|
||||
if (P_offd_size)
|
||||
{
|
||||
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
|
||||
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_HOST);
|
||||
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, memory_location_P);
|
||||
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, memory_location_P);
|
||||
}
|
||||
|
||||
P_diag_i[n_fine] = jj_counter;
|
||||
@ -4603,6 +4622,9 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
hypre_CSRMatrixJ(P_offd) = P_offd_j;
|
||||
hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
|
||||
|
||||
hypre_CSRMatrixMemoryLocation(P_diag) = memory_location_P;
|
||||
hypre_CSRMatrixMemoryLocation(P_offd) = memory_location_P;
|
||||
|
||||
/* Compress P, removing coefficients smaller than trunc_factor * Max */
|
||||
if (trunc_factor != 0.0 || max_elmts > 0)
|
||||
{
|
||||
@ -4661,23 +4683,22 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
* Comment:
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildExtInterp(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions,
|
||||
HYPRE_Int *dof_func,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
hypre_BoomerAMGBuildExtInterpHost(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions,
|
||||
HYPRE_Int *dof_func,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
/* Communication Variables */
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
||||
|
||||
HYPRE_Int my_id, num_procs;
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
||||
HYPRE_Int my_id, num_procs;
|
||||
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
|
||||
@ -4825,8 +4846,8 @@ hypre_BoomerAMGBuildExtInterp(hypre_ParCSRMatrix *A,
|
||||
/*-----------------------------------------------------------------------
|
||||
* Intialize counters and allocate mapping vector.
|
||||
*-----------------------------------------------------------------------*/
|
||||
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
|
||||
|
||||
if (n_fine)
|
||||
{
|
||||
@ -5421,3 +5442,82 @@ hypre_BoomerAMGBuildExtInterp(hypre_ParCSRMatrix *A,
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
hypre_NvtxPushRange("ExtInterp");
|
||||
#endif
|
||||
|
||||
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
|
||||
|
||||
HYPRE_Int ierr = 0;
|
||||
|
||||
if (exec == HYPRE_EXEC_HOST)
|
||||
{
|
||||
ierr = hypre_BoomerAMGBuildExtInterpHost(A,CF_marker,S,num_cpts_global,num_functions,dof_func,
|
||||
debug_flag,trunc_factor,max_elmts,col_offd_S_to_A,P_ptr);
|
||||
}
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
else
|
||||
{
|
||||
ierr = hypre_BoomerAMGBuildExtInterpDevice(A,CF_marker,S,num_cpts_global,num_functions,dof_func,
|
||||
debug_flag,trunc_factor,max_elmts,col_offd_S_to_A,P_ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
hypre_NvtxPopRange();
|
||||
#endif
|
||||
|
||||
return ierr;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------*/
|
||||
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildExtPIInterp(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int num_functions,
|
||||
HYPRE_Int *dof_func,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
hypre_NvtxPushRange("ExtPIInterp");
|
||||
#endif
|
||||
|
||||
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
|
||||
|
||||
HYPRE_Int ierr = 0;
|
||||
|
||||
if (exec == HYPRE_EXEC_HOST)
|
||||
{
|
||||
ierr = hypre_BoomerAMGBuildExtPIInterpHost(A, CF_marker, S, num_cpts_global, num_functions, dof_func,
|
||||
debug_flag, trunc_factor, max_elmts, col_offd_S_to_A, P_ptr);
|
||||
}
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
else
|
||||
{
|
||||
ierr = hypre_BoomerAMGBuildExtPIInterpDevice(A, CF_marker, S, num_cpts_global, num_functions, dof_func,
|
||||
debug_flag, trunc_factor, max_elmts, P_ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
hypre_NvtxPopRange();
|
||||
#endif
|
||||
|
||||
return ierr;
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -13,18 +13,20 @@
|
||||
* Comment:
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
hypre_BoomerAMGBuildModExtInterpHost(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
/* Communication Variables */
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
|
||||
HYPRE_Int my_id, num_procs;
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
HYPRE_Int my_id, num_procs;
|
||||
|
||||
/* Variables to store input variables */
|
||||
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
|
||||
@ -152,7 +154,7 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
start_array[my_thread_num+1] = stop;
|
||||
for (i=start; i < stop; i++)
|
||||
{
|
||||
if (CF_marker[i] > 0)
|
||||
if (CF_marker[i] > 0)
|
||||
{
|
||||
cpt_array[my_thread_num]++;
|
||||
}
|
||||
@ -225,11 +227,11 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
|
||||
for (i=startf; i<stopf; i++)
|
||||
{
|
||||
j = As_FF_diag_i[i];
|
||||
j = As_FF_diag_i[i];
|
||||
if (D_w[i]) beta = 1.0/D_w[i];
|
||||
else beta = 1.0;
|
||||
As_FF_diag_data[j] = beta*D_q[i];
|
||||
if (D_q[i]) gamma = -1.0/D_q[i];
|
||||
if (D_q[i]) gamma = -1.0/D_q[i];
|
||||
else gamma = 1.0;
|
||||
for (j=As_FF_diag_i[i]+1; j < As_FF_diag_i[i+1]; j++)
|
||||
As_FF_diag_data[j] *= beta;
|
||||
@ -241,17 +243,17 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
As_FC_offd_data[j] *= gamma;
|
||||
}
|
||||
|
||||
} /* end parallel region */
|
||||
} /* end parallel region */
|
||||
|
||||
W = hypre_ParMatmul(As_FF, As_FC);
|
||||
W_diag = hypre_ParCSRMatrixDiag(W);
|
||||
W_offd = hypre_ParCSRMatrixOffd(W);
|
||||
W_diag_i = hypre_CSRMatrixI(W_diag);
|
||||
W_diag_j = hypre_CSRMatrixJ(W_diag);
|
||||
W_diag_data = hypre_CSRMatrixData(W_diag);
|
||||
W_offd_i = hypre_CSRMatrixI(W_offd);
|
||||
W_offd_j = hypre_CSRMatrixJ(W_offd);
|
||||
W_offd_data = hypre_CSRMatrixData(W_offd);
|
||||
W_diag_i = hypre_CSRMatrixI(W_diag);
|
||||
W_diag_j = hypre_CSRMatrixJ(W_diag);
|
||||
W_diag_data = hypre_CSRMatrixData(W_diag);
|
||||
W_offd_i = hypre_CSRMatrixI(W_offd);
|
||||
W_offd_j = hypre_CSRMatrixJ(W_offd);
|
||||
W_offd_data = hypre_CSRMatrixData(W_offd);
|
||||
num_cols_P_offd = hypre_CSRMatrixNumCols(W_offd);
|
||||
/*-----------------------------------------------------------------------
|
||||
* Intialize data for P
|
||||
@ -316,8 +318,8 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
P_offd_i[i+1] = cnt_offd;
|
||||
}
|
||||
|
||||
} /* end parallel region */
|
||||
|
||||
} /* end parallel region */
|
||||
|
||||
/*-----------------------------------------------------------------------
|
||||
* Create matrix
|
||||
*-----------------------------------------------------------------------*/
|
||||
@ -359,7 +361,7 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
P_offd_j = hypre_CSRMatrixJ(P_offd);
|
||||
P_diag_size = P_diag_i[n_fine];
|
||||
P_offd_size = P_offd_i[n_fine];
|
||||
|
||||
|
||||
col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
|
||||
if (num_cols_P_offd)
|
||||
{
|
||||
@ -368,7 +370,7 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
{
|
||||
P_marker[P_offd_j[i]] = 1;
|
||||
}
|
||||
|
||||
|
||||
new_ncols_P_offd = 0;
|
||||
for (i=0; i < num_cols_P_offd; i++)
|
||||
{
|
||||
@ -398,8 +400,8 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
}
|
||||
|
||||
hypre_TFree(col_map_offd_P, HYPRE_MEMORY_HOST);
|
||||
hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
|
||||
hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
|
||||
hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
|
||||
hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
|
||||
hypre_TFree(map, HYPRE_MEMORY_HOST);
|
||||
}
|
||||
}
|
||||
@ -421,21 +423,67 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------*
|
||||
* Modularized Extended Interpolation
|
||||
*-----------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
hypre_NvtxPushRange("ModExtInterp");
|
||||
#endif
|
||||
|
||||
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
|
||||
|
||||
HYPRE_Int ierr = 0;
|
||||
|
||||
if (exec == HYPRE_EXEC_HOST)
|
||||
{
|
||||
ierr = hypre_BoomerAMGBuildModExtInterpHost(A,CF_marker,S,num_cpts_global,
|
||||
debug_flag,trunc_factor,max_elmts,col_offd_S_to_A,P_ptr);
|
||||
}
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
else
|
||||
{
|
||||
ierr = hypre_BoomerAMGBuildExtInterpDevice(A,CF_marker,S,num_cpts_global,1,NULL,
|
||||
debug_flag,trunc_factor,max_elmts,col_offd_S_to_A,P_ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
hypre_NvtxPopRange();
|
||||
#endif
|
||||
|
||||
return ierr;
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------
|
||||
* hypre_BoomerAMGBuildModExtPIInterp
|
||||
* Comment:
|
||||
*--------------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
hypre_BoomerAMGBuildModExtPIInterpHost(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
/* Communication Variables */
|
||||
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
|
||||
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
|
||||
hypre_ParCSRCommHandle *comm_handle = NULL;
|
||||
|
||||
@ -596,7 +644,7 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
start_array[my_thread_num+1] = stop;
|
||||
for (i=start; i < stop; i++)
|
||||
{
|
||||
if (CF_marker[i] > 0)
|
||||
if (CF_marker[i] > 0)
|
||||
{
|
||||
cpt_array[my_thread_num]++;
|
||||
}
|
||||
@ -752,7 +800,7 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
for (i=startf; i<stopf; i++)
|
||||
{
|
||||
theta = (D_theta[i]+D_w[i]);
|
||||
if (theta)
|
||||
if (theta)
|
||||
{
|
||||
theta = -1.0/theta;
|
||||
for (j=As_FF_diag_i[i]; j < As_FF_diag_i[i+1]; j++)
|
||||
@ -762,17 +810,17 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
}
|
||||
}
|
||||
|
||||
} /* end parallel region */
|
||||
} /* end parallel region */
|
||||
|
||||
W = hypre_ParMatmul(As_FF, As_FC);
|
||||
W_diag = hypre_ParCSRMatrixDiag(W);
|
||||
W_offd = hypre_ParCSRMatrixOffd(W);
|
||||
W_diag_i = hypre_CSRMatrixI(W_diag);
|
||||
W_diag_j = hypre_CSRMatrixJ(W_diag);
|
||||
W_diag_data = hypre_CSRMatrixData(W_diag);
|
||||
W_offd_i = hypre_CSRMatrixI(W_offd);
|
||||
W_offd_j = hypre_CSRMatrixJ(W_offd);
|
||||
W_offd_data = hypre_CSRMatrixData(W_offd);
|
||||
W_diag_i = hypre_CSRMatrixI(W_diag);
|
||||
W_diag_j = hypre_CSRMatrixJ(W_diag);
|
||||
W_diag_data = hypre_CSRMatrixData(W_diag);
|
||||
W_offd_i = hypre_CSRMatrixI(W_offd);
|
||||
W_offd_j = hypre_CSRMatrixJ(W_offd);
|
||||
W_offd_data = hypre_CSRMatrixData(W_offd);
|
||||
num_cols_P_offd = hypre_CSRMatrixNumCols(W_offd);
|
||||
/*-----------------------------------------------------------------------
|
||||
* Intialize data for P
|
||||
@ -837,8 +885,8 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
P_offd_i[i+1] = cnt_offd;
|
||||
}
|
||||
|
||||
} /* end parallel region */
|
||||
|
||||
} /* end parallel region */
|
||||
|
||||
/*-----------------------------------------------------------------------
|
||||
* Create matrix
|
||||
*-----------------------------------------------------------------------*/
|
||||
@ -887,7 +935,7 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
|
||||
for (i=0; i < P_offd_size; i++)
|
||||
P_marker[P_offd_j[i]] = 1;
|
||||
|
||||
|
||||
new_ncols_P_offd = 0;
|
||||
for (i=0; i < num_cols_P_offd; i++)
|
||||
if (P_marker[i]) new_ncols_P_offd++;
|
||||
@ -914,8 +962,8 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
new_ncols_P_offd);
|
||||
}
|
||||
hypre_TFree(col_map_offd_P, HYPRE_MEMORY_HOST);
|
||||
hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
|
||||
hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
|
||||
hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
|
||||
hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
|
||||
hypre_TFree(map, HYPRE_MEMORY_HOST);
|
||||
}
|
||||
}
|
||||
@ -942,3 +990,45 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------*
|
||||
* Modularized Extended+i Interpolation
|
||||
*-----------------------------------------------------------------------*/
|
||||
HYPRE_Int
|
||||
hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A,
|
||||
HYPRE_Int *CF_marker,
|
||||
hypre_ParCSRMatrix *S,
|
||||
HYPRE_BigInt *num_cpts_global,
|
||||
HYPRE_Int debug_flag,
|
||||
HYPRE_Real trunc_factor,
|
||||
HYPRE_Int max_elmts,
|
||||
HYPRE_Int *col_offd_S_to_A,
|
||||
hypre_ParCSRMatrix **P_ptr)
|
||||
{
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
hypre_NvtxPushRange("ExtPIInterp");
|
||||
#endif
|
||||
|
||||
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
|
||||
|
||||
HYPRE_Int ierr = 0;
|
||||
|
||||
if (exec == HYPRE_EXEC_HOST)
|
||||
{
|
||||
ierr = hypre_BoomerAMGBuildModExtPIInterpHost(A, CF_marker, S, num_cpts_global,
|
||||
debug_flag, trunc_factor, max_elmts, col_offd_S_to_A, P_ptr);
|
||||
}
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
else
|
||||
{
|
||||
ierr = hypre_BoomerAMGBuildExtPIInterpDevice(A, CF_marker, S, num_cpts_global, 1, NULL,
|
||||
debug_flag, trunc_factor, max_elmts, P_ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HYPRE_USING_CUDA)
|
||||
hypre_NvtxPopRange();
|
||||
#endif
|
||||
|
||||
return ierr;
|
||||
}
|
||||
|
||||
|
||||
@ -1053,6 +1053,10 @@ HYPRE_Int hypre_BoomerAMGBuildFFInterp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_m
|
||||
HYPRE_Int hypre_BoomerAMGBuildFF1Interp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_marker , hypre_ParCSRMatrix *S , HYPRE_BigInt *num_cpts_global , HYPRE_Int num_functions , HYPRE_Int *dof_func , HYPRE_Int debug_flag , HYPRE_Real trunc_factor , HYPRE_Int max_elmts , HYPRE_Int *col_offd_S_to_A , hypre_ParCSRMatrix **P_ptr );
|
||||
HYPRE_Int hypre_BoomerAMGBuildExtInterp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_marker , hypre_ParCSRMatrix *S , HYPRE_BigInt *num_cpts_global , HYPRE_Int num_functions , HYPRE_Int *dof_func , HYPRE_Int debug_flag , HYPRE_Real trunc_factor , HYPRE_Int max_elmts , HYPRE_Int *col_offd_S_to_A , hypre_ParCSRMatrix **P_ptr );
|
||||
|
||||
/* par_lr_interp_device.c */
|
||||
HYPRE_Int hypre_BoomerAMGBuildExtInterpDevice(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
|
||||
HYPRE_Int hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr);
|
||||
|
||||
/* par_mod_lr_interp.c */
|
||||
HYPRE_Int hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
|
||||
HYPRE_Int hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
|
||||
|
||||
@ -5671,11 +5671,13 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A,
|
||||
B_nnz_offd);
|
||||
|
||||
B_diag = hypre_ParCSRMatrixDiag(B);
|
||||
hypre_CSRMatrixMemoryLocation(B_diag) = HYPRE_MEMORY_HOST;
|
||||
hypre_CSRMatrixData(B_diag) = B_diag_a;
|
||||
hypre_CSRMatrixI(B_diag) = B_diag_i;
|
||||
hypre_CSRMatrixJ(B_diag) = B_diag_j;
|
||||
|
||||
B_offd = hypre_ParCSRMatrixOffd(B);
|
||||
hypre_CSRMatrixMemoryLocation(B_offd) = HYPRE_MEMORY_HOST;
|
||||
hypre_CSRMatrixData(B_offd) = B_offd_a;
|
||||
hypre_CSRMatrixI(B_offd) = B_offd_i;
|
||||
hypre_CSRMatrixJ(B_offd) = B_offd_j;
|
||||
|
||||
@ -8,12 +8,20 @@
|
||||
# ij: Test BoomerAMG on GPU options
|
||||
#=============================================================================
|
||||
|
||||
mpirun -np 4 ./ij -n 256 256 128 -P 2 2 1 -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -no_cuda_um 0 -mm_cusparse 0 -interptype 3 -solver 1 \
|
||||
mpirun -np 4 ./ij -n 256 256 128 -P 2 2 1 -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 3 -solver 1 \
|
||||
> gpu_boomer.out.1
|
||||
|
||||
mpirun -np 3 ./ij -n 128 128 384 -P 1 1 3 -pmis -keepT 1 -rlx 7 -w 0.85 -exec_device -rap 1 -mod_rap2 1 -no_cuda_um 1 -mm_cusparse 0 -interptype 3 -solver 1 \
|
||||
mpirun -np 3 ./ij -n 128 128 384 -P 1 1 3 -pmis -keepT 1 -rlx 7 -w 0.85 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 3 -solver 1 \
|
||||
>> gpu_boomer.out.2
|
||||
|
||||
mpirun -np 4 ./ij -n 4096 4096 1 -P 2 2 1 -9pt -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -no_cuda_um 1 -mm_cusparse 0 -interptype 3 -solver 1 \
|
||||
mpirun -np 4 ./ij -n 4096 4096 1 -P 2 2 1 -9pt -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 3 -solver 1 \
|
||||
>> gpu_boomer.out.3
|
||||
|
||||
mpirun -np 4 ./ij -n 256 256 128 -P 2 2 1 -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 14 -solver 1 \
|
||||
>> gpu_boomer.out.4
|
||||
|
||||
mpirun -np 4 ./ij -n 256 256 128 -P 2 2 1 -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 6 -solver 1 \
|
||||
>> gpu_boomer.out.5
|
||||
|
||||
mpirun -np 4 ./ij -n 4096 4096 1 -P 2 2 1 -9pt -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 15 -solver 1 \
|
||||
>> gpu_boomer.out.6
|
||||
|
||||
@ -22,3 +22,26 @@ Final Relative Residual Norm = 9.439905e-09
|
||||
Iterations = 91
|
||||
Final Relative Residual Norm = 9.334377e-09
|
||||
|
||||
# Output file: gpu_boomer.out.4
|
||||
Complexity: grid = 1.361991
|
||||
operator = 2.858223
|
||||
memory = 3.484106
|
||||
|
||||
Iterations = 20
|
||||
Final Relative Residual Norm = 9.758706e-09
|
||||
|
||||
# Output file: gpu_boomer.out.5
|
||||
Complexity: grid = 1.354135
|
||||
operator = 2.781538
|
||||
memory = 3.406330
|
||||
|
||||
Iterations = 19
|
||||
Final Relative Residual Norm = 8.539869e-09
|
||||
|
||||
# Output file: gpu_boomer.out.6
|
||||
Complexity: grid = 1.233945
|
||||
operator = 1.239473
|
||||
memory = 1.468494
|
||||
|
||||
Iterations = 84
|
||||
Final Relative Residual Norm = 9.747204e-09
|
||||
|
||||
@ -16,6 +16,9 @@ FILES="\
|
||||
${TNAME}.out.1\
|
||||
${TNAME}.out.2\
|
||||
${TNAME}.out.3\
|
||||
${TNAME}.out.4\
|
||||
${TNAME}.out.5\
|
||||
${TNAME}.out.6\
|
||||
"
|
||||
|
||||
for i in $FILES
|
||||
|
||||
@ -3166,6 +3166,9 @@ template <typename T1, typename T2, typename T3> HYPRE_Int hypreDevice_ReduceByT
|
||||
template <typename T>
|
||||
HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, T *d_row_num, T *d_row_ind);
|
||||
|
||||
template <typename T>
|
||||
HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@ -3190,8 +3193,6 @@ HYPRE_Int hypreDevice_CsrRowIndicesToPtrs_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYP
|
||||
|
||||
HYPRE_Int hypreDevice_GenScatterAdd(HYPRE_Real *x, HYPRE_Int ny, HYPRE_Int *map, HYPRE_Real *y, char *work);
|
||||
|
||||
HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v);
|
||||
|
||||
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
|
||||
|
||||
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex *y);
|
||||
|
||||
@ -553,8 +553,9 @@ hypreDevice_GenScatterAdd(HYPRE_Real *x, HYPRE_Int ny, HYPRE_Int *map, HYPRE_Rea
|
||||
}
|
||||
|
||||
/* x[map[i]] = v */
|
||||
template <typename T>
|
||||
__global__ void
|
||||
hypreCUDAKernel_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v)
|
||||
hypreCUDAKernel_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v)
|
||||
{
|
||||
HYPRE_Int global_thread_id = hypre_cuda_get_grid_thread_id<1,1>();
|
||||
|
||||
@ -567,8 +568,9 @@ hypreCUDAKernel_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE
|
||||
/* x[map[i]] = v
|
||||
* n is length of map
|
||||
* TODO: thrust? */
|
||||
template <typename T>
|
||||
HYPRE_Int
|
||||
hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v)
|
||||
hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v)
|
||||
{
|
||||
/* trivial case */
|
||||
if (n <= 0)
|
||||
@ -584,6 +586,9 @@ hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
template HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v);
|
||||
template HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Complex *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Complex v);
|
||||
|
||||
__global__ void
|
||||
hypreCUDAKernel_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y)
|
||||
{
|
||||
|
||||
@ -282,6 +282,9 @@ template <typename T1, typename T2, typename T3> HYPRE_Int hypreDevice_ReduceByT
|
||||
template <typename T>
|
||||
HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, T *d_row_num, T *d_row_ind);
|
||||
|
||||
template <typename T>
|
||||
HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@ -306,8 +309,6 @@ HYPRE_Int hypreDevice_CsrRowIndicesToPtrs_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYP
|
||||
|
||||
HYPRE_Int hypreDevice_GenScatterAdd(HYPRE_Real *x, HYPRE_Int ny, HYPRE_Int *map, HYPRE_Real *y, char *work);
|
||||
|
||||
HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v);
|
||||
|
||||
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
|
||||
|
||||
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex *y);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user