Merge pull request #121 from hypre-space/gpudev-bjorn

Gpudev bjorn
This commit is contained in:
Ruipeng Li 2020-05-20 13:09:09 -07:00 committed by GitHub
commit 9791056f00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1247 additions and 511 deletions

View File

@ -38,9 +38,10 @@ shift
# Basic build and run tests
mo="-j test"
eo=""
roij="-ij -rt -mpibind -rtol 1e-3 -atol 1e-3"
roij="-ij -ams -rt -mpibind -rtol 1e-3 -atol 1e-3"
ross="-struct -sstruct -rt -mpibind -rtol 1e-6 -atol 1e-6"
rost="-struct -rt -mpibind -rtol 1e-8 -atol 1e-8"
rocuda="-cuda -rt -mpibind -rtol 1e-5 -atol 3e-5"
# CUDA with UM
co="--with-cuda --enable-unified-memory --enable-persistent --enable-cub --enable-debug --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' HYPRE_CUDA_SM=70"
@ -49,9 +50,9 @@ co="--with-cuda --enable-unified-memory --enable-persistent --enable-cub --enabl
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ross
./renametest.sh basic $output_dir/basic-cuda-um-struct-sstruct
# CUDA with UM [shared library, no run]
# CUDA with UM [shared library]
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' HYPRE_CUDA_SM=70"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $rocuda
./renametest.sh basic $output_dir/basic-cuda-um-shared
#./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $roij
#./renametest.sh basic $output_dir/basic-cuda-um-shared-ij

View File

@ -1567,6 +1567,10 @@ HYPRE_Int hypre_BoomerAMGBuildFFInterp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_m
HYPRE_Int hypre_BoomerAMGBuildFF1Interp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_marker , hypre_ParCSRMatrix *S , HYPRE_BigInt *num_cpts_global , HYPRE_Int num_functions , HYPRE_Int *dof_func , HYPRE_Int debug_flag , HYPRE_Real trunc_factor , HYPRE_Int max_elmts , HYPRE_Int *col_offd_S_to_A , hypre_ParCSRMatrix **P_ptr );
HYPRE_Int hypre_BoomerAMGBuildExtInterp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_marker , hypre_ParCSRMatrix *S , HYPRE_BigInt *num_cpts_global , HYPRE_Int num_functions , HYPRE_Int *dof_func , HYPRE_Int debug_flag , HYPRE_Real trunc_factor , HYPRE_Int max_elmts , HYPRE_Int *col_offd_S_to_A , hypre_ParCSRMatrix **P_ptr );
/* par_lr_interp_device.c */
HYPRE_Int hypre_BoomerAMGBuildExtInterpDevice(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
HYPRE_Int hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr);
/* par_mod_lr_interp.c */
HYPRE_Int hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
HYPRE_Int hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);

View File

@ -163,6 +163,9 @@ hypre_BoomerAMGCoarsenPMISDevice( hypre_ParCSRMatrix *S,
/*---------------------------------------------------
* Clean up and return
*---------------------------------------------------*/
if( *CF_marker_ptr == NULL )
*CF_marker_ptr = hypre_CTAlloc(HYPRE_Int, num_cols_diag, HYPRE_MEMORY_HOST);
hypre_TMemcpy( *CF_marker_ptr, CF_marker_diag, HYPRE_Int, num_cols_diag, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE );
hypre_TFree(CF_marker_diag, HYPRE_MEMORY_DEVICE);

View File

@ -2688,7 +2688,7 @@ hypre_BoomerAMGBuildDirInterp( hypre_ParCSRMatrix *A,
HYPRE_Int ierr = 0;
#if defined(HYPRE_USING_CUDA)
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_CSRMatrixMemoryLocation(hypre_ParCSRMatrixDiag(A)) );
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
if (exec == HYPRE_EXEC_DEVICE)
{

View File

@ -1038,29 +1038,26 @@ hypre_BoomerAMGBuildStdInterp(hypre_ParCSRMatrix *A,
* Comment:
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildExtPIInterp(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions,
HYPRE_Int *dof_func,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
hypre_BoomerAMGBuildExtPIInterpHost(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions,
HYPRE_Int *dof_func,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_EXTENDED_I_INTERP] -= hypre_MPI_Wtime();
#endif
/* Communication Variables */
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
HYPRE_Int my_id, num_procs;
HYPRE_Int my_id, num_procs;
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
@ -1932,19 +1929,24 @@ hypre_BoomerAMGBuildExtPIInterp(hypre_ParCSRMatrix *A,
* Comment: Only use FF when there is no common c point.
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag,
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions,
HYPRE_Int *dof_func,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
/* Communication Variables */
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
HYPRE_Int my_id, num_procs;
HYPRE_Int my_id, num_procs;
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
/* Variables to store input variables */
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
@ -2088,8 +2090,8 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
/*-----------------------------------------------------------------------
* Intialize counters and allocate mapping vector.
*-----------------------------------------------------------------------*/
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_DEVICE);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_DEVICE);
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
if (n_fine)
{
@ -2369,14 +2371,14 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
if (P_diag_size)
{
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_DEVICE);
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_DEVICE);
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, memory_location_P);
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, memory_location_P);
}
if (P_offd_size)
{
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_DEVICE);
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_DEVICE);
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, memory_location_P);
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, memory_location_P);
}
P_diag_i[n_fine] = jj_counter;
@ -2826,6 +2828,9 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
hypre_CSRMatrixJ(P_offd) = P_offd_j;
hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
hypre_CSRMatrixMemoryLocation(P_diag) = memory_location_P;
hypre_CSRMatrixMemoryLocation(P_offd) = memory_location_P;
/* Compress P, removing coefficients smaller than trunc_factor * Max */
if (trunc_factor != 0.0 || max_elmts > 0)
{
@ -2882,19 +2887,24 @@ hypre_BoomerAMGBuildExtPICCInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
* Comment: Only use FF when there is no common c point.
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag,
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions,
HYPRE_Int *dof_func,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
/* Communication Variables */
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
HYPRE_Int my_id, num_procs;
HYPRE_Int my_id, num_procs;
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
/* Variables to store input variables */
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
@ -3035,8 +3045,8 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
/*-----------------------------------------------------------------------
* Intialize counters and allocate mapping vector.
*-----------------------------------------------------------------------*/
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
if (n_fine)
{
@ -3281,14 +3291,14 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
if (P_diag_size)
{
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_HOST);
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, memory_location_P);
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, memory_location_P);
}
if (P_offd_size)
{
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_HOST);
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, memory_location_P);
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, memory_location_P);
}
P_diag_i[n_fine] = jj_counter;
@ -3710,6 +3720,9 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
hypre_CSRMatrixJ(P_offd) = P_offd_j;
hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
hypre_CSRMatrixMemoryLocation(P_diag) = memory_location_P;
hypre_CSRMatrixMemoryLocation(P_offd) = memory_location_P;
/* Compress P, removing coefficients smaller than trunc_factor * Max */
if (trunc_factor != 0.0 || max_elmts > 0)
{
@ -3765,18 +3778,24 @@ hypre_BoomerAMGBuildFFInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
* Comment: Only use FF when there is no common c point.
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag,
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions,
HYPRE_Int *dof_func,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
/* Communication Variables */
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
HYPRE_Int my_id, num_procs;
HYPRE_Int my_id, num_procs;
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
/* Variables to store input variables */
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
@ -3916,8 +3935,8 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
/*-----------------------------------------------------------------------
* Intialize counters and allocate mapping vector.
*-----------------------------------------------------------------------*/
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, HYPRE_MEMORY_HOST);
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
if (n_fine)
{
@ -4168,14 +4187,14 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
if (P_diag_size)
{
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, HYPRE_MEMORY_HOST);
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, HYPRE_MEMORY_HOST);
P_diag_j = hypre_CTAlloc(HYPRE_Int, P_diag_size, memory_location_P);
P_diag_data = hypre_CTAlloc(HYPRE_Real, P_diag_size, memory_location_P);
}
if (P_offd_size)
{
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, HYPRE_MEMORY_HOST);
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, HYPRE_MEMORY_HOST);
P_offd_j = hypre_CTAlloc(HYPRE_Int, P_offd_size, memory_location_P);
P_offd_data = hypre_CTAlloc(HYPRE_Real, P_offd_size, memory_location_P);
}
P_diag_i[n_fine] = jj_counter;
@ -4603,6 +4622,9 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
hypre_CSRMatrixJ(P_offd) = P_offd_j;
hypre_ParCSRMatrixOwnsRowStarts(P) = 0;
hypre_CSRMatrixMemoryLocation(P_diag) = memory_location_P;
hypre_CSRMatrixMemoryLocation(P_offd) = memory_location_P;
/* Compress P, removing coefficients smaller than trunc_factor * Max */
if (trunc_factor != 0.0 || max_elmts > 0)
{
@ -4661,23 +4683,22 @@ hypre_BoomerAMGBuildFF1Interp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
* Comment:
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildExtInterp(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions,
HYPRE_Int *dof_func,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
hypre_BoomerAMGBuildExtInterpHost(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions,
HYPRE_Int *dof_func,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
/* Communication Variables */
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
HYPRE_Int my_id, num_procs;
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
HYPRE_Int my_id, num_procs;
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
@ -4825,8 +4846,8 @@ hypre_BoomerAMGBuildExtInterp(hypre_ParCSRMatrix *A,
/*-----------------------------------------------------------------------
* Intialize counters and allocate mapping vector.
*-----------------------------------------------------------------------*/
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
P_diag_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
P_offd_i = hypre_CTAlloc(HYPRE_Int, n_fine+1, memory_location_P);
if (n_fine)
{
@ -5421,3 +5442,82 @@ hypre_BoomerAMGBuildExtInterp(hypre_ParCSRMatrix *A,
return hypre_error_flag;
}
HYPRE_Int
hypre_BoomerAMGBuildExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag,
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
#if defined(HYPRE_USING_CUDA)
hypre_NvtxPushRange("ExtInterp");
#endif
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
HYPRE_Int ierr = 0;
if (exec == HYPRE_EXEC_HOST)
{
ierr = hypre_BoomerAMGBuildExtInterpHost(A,CF_marker,S,num_cpts_global,num_functions,dof_func,
debug_flag,trunc_factor,max_elmts,col_offd_S_to_A,P_ptr);
}
#if defined(HYPRE_USING_CUDA)
else
{
ierr = hypre_BoomerAMGBuildExtInterpDevice(A,CF_marker,S,num_cpts_global,num_functions,dof_func,
debug_flag,trunc_factor,max_elmts,col_offd_S_to_A,P_ptr);
}
#endif
#if defined(HYPRE_USING_CUDA)
hypre_NvtxPopRange();
#endif
return ierr;
}
/*-----------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildExtPIInterp(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int num_functions,
HYPRE_Int *dof_func,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
#if defined(HYPRE_USING_CUDA)
hypre_NvtxPushRange("ExtPIInterp");
#endif
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
HYPRE_Int ierr = 0;
if (exec == HYPRE_EXEC_HOST)
{
ierr = hypre_BoomerAMGBuildExtPIInterpHost(A, CF_marker, S, num_cpts_global, num_functions, dof_func,
debug_flag, trunc_factor, max_elmts, col_offd_S_to_A, P_ptr);
}
#if defined(HYPRE_USING_CUDA)
else
{
ierr = hypre_BoomerAMGBuildExtPIInterpDevice(A, CF_marker, S, num_cpts_global, num_functions, dof_func,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#endif
#if defined(HYPRE_USING_CUDA)
hypre_NvtxPopRange();
#endif
return ierr;
}

File diff suppressed because it is too large Load Diff

View File

@ -13,18 +13,20 @@
* Comment:
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
hypre_BoomerAMGBuildModExtInterpHost(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
/* Communication Variables */
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
HYPRE_Int my_id, num_procs;
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
HYPRE_Int my_id, num_procs;
/* Variables to store input variables */
hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
@ -152,7 +154,7 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
start_array[my_thread_num+1] = stop;
for (i=start; i < stop; i++)
{
if (CF_marker[i] > 0)
if (CF_marker[i] > 0)
{
cpt_array[my_thread_num]++;
}
@ -225,11 +227,11 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
for (i=startf; i<stopf; i++)
{
j = As_FF_diag_i[i];
j = As_FF_diag_i[i];
if (D_w[i]) beta = 1.0/D_w[i];
else beta = 1.0;
As_FF_diag_data[j] = beta*D_q[i];
if (D_q[i]) gamma = -1.0/D_q[i];
if (D_q[i]) gamma = -1.0/D_q[i];
else gamma = 1.0;
for (j=As_FF_diag_i[i]+1; j < As_FF_diag_i[i+1]; j++)
As_FF_diag_data[j] *= beta;
@ -241,17 +243,17 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
As_FC_offd_data[j] *= gamma;
}
} /* end parallel region */
} /* end parallel region */
W = hypre_ParMatmul(As_FF, As_FC);
W_diag = hypre_ParCSRMatrixDiag(W);
W_offd = hypre_ParCSRMatrixOffd(W);
W_diag_i = hypre_CSRMatrixI(W_diag);
W_diag_j = hypre_CSRMatrixJ(W_diag);
W_diag_data = hypre_CSRMatrixData(W_diag);
W_offd_i = hypre_CSRMatrixI(W_offd);
W_offd_j = hypre_CSRMatrixJ(W_offd);
W_offd_data = hypre_CSRMatrixData(W_offd);
W_diag_i = hypre_CSRMatrixI(W_diag);
W_diag_j = hypre_CSRMatrixJ(W_diag);
W_diag_data = hypre_CSRMatrixData(W_diag);
W_offd_i = hypre_CSRMatrixI(W_offd);
W_offd_j = hypre_CSRMatrixJ(W_offd);
W_offd_data = hypre_CSRMatrixData(W_offd);
num_cols_P_offd = hypre_CSRMatrixNumCols(W_offd);
/*-----------------------------------------------------------------------
* Intialize data for P
@ -316,8 +318,8 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
P_offd_i[i+1] = cnt_offd;
}
} /* end parallel region */
} /* end parallel region */
/*-----------------------------------------------------------------------
* Create matrix
*-----------------------------------------------------------------------*/
@ -359,7 +361,7 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
P_offd_j = hypre_CSRMatrixJ(P_offd);
P_diag_size = P_diag_i[n_fine];
P_offd_size = P_offd_i[n_fine];
col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
if (num_cols_P_offd)
{
@ -368,7 +370,7 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
{
P_marker[P_offd_j[i]] = 1;
}
new_ncols_P_offd = 0;
for (i=0; i < num_cols_P_offd; i++)
{
@ -398,8 +400,8 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
}
hypre_TFree(col_map_offd_P, HYPRE_MEMORY_HOST);
hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
hypre_TFree(map, HYPRE_MEMORY_HOST);
}
}
@ -421,21 +423,67 @@ hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
return hypre_error_flag;
}
/*-----------------------------------------------------------------------*
* Modularized Extended Interpolation
*-----------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
#if defined(HYPRE_USING_CUDA)
hypre_NvtxPushRange("ModExtInterp");
#endif
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
HYPRE_Int ierr = 0;
if (exec == HYPRE_EXEC_HOST)
{
ierr = hypre_BoomerAMGBuildModExtInterpHost(A,CF_marker,S,num_cpts_global,
debug_flag,trunc_factor,max_elmts,col_offd_S_to_A,P_ptr);
}
#if defined(HYPRE_USING_CUDA)
else
{
ierr = hypre_BoomerAMGBuildExtInterpDevice(A,CF_marker,S,num_cpts_global,1,NULL,
debug_flag,trunc_factor,max_elmts,col_offd_S_to_A,P_ptr);
}
#endif
#if defined(HYPRE_USING_CUDA)
hypre_NvtxPopRange();
#endif
return ierr;
}
/*---------------------------------------------------------------------------
* hypre_BoomerAMGBuildModExtPIInterp
* Comment:
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor, HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
hypre_BoomerAMGBuildModExtPIInterpHost(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
/* Communication Variables */
MPI_Comm comm = hypre_ParCSRMatrixComm(A);
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
HYPRE_MemoryLocation memory_location_P = hypre_ParCSRMatrixMemoryLocation(A);
hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
hypre_ParCSRCommHandle *comm_handle = NULL;
@ -596,7 +644,7 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
start_array[my_thread_num+1] = stop;
for (i=start; i < stop; i++)
{
if (CF_marker[i] > 0)
if (CF_marker[i] > 0)
{
cpt_array[my_thread_num]++;
}
@ -752,7 +800,7 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
for (i=startf; i<stopf; i++)
{
theta = (D_theta[i]+D_w[i]);
if (theta)
if (theta)
{
theta = -1.0/theta;
for (j=As_FF_diag_i[i]; j < As_FF_diag_i[i+1]; j++)
@ -762,17 +810,17 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
}
}
} /* end parallel region */
} /* end parallel region */
W = hypre_ParMatmul(As_FF, As_FC);
W_diag = hypre_ParCSRMatrixDiag(W);
W_offd = hypre_ParCSRMatrixOffd(W);
W_diag_i = hypre_CSRMatrixI(W_diag);
W_diag_j = hypre_CSRMatrixJ(W_diag);
W_diag_data = hypre_CSRMatrixData(W_diag);
W_offd_i = hypre_CSRMatrixI(W_offd);
W_offd_j = hypre_CSRMatrixJ(W_offd);
W_offd_data = hypre_CSRMatrixData(W_offd);
W_diag_i = hypre_CSRMatrixI(W_diag);
W_diag_j = hypre_CSRMatrixJ(W_diag);
W_diag_data = hypre_CSRMatrixData(W_diag);
W_offd_i = hypre_CSRMatrixI(W_offd);
W_offd_j = hypre_CSRMatrixJ(W_offd);
W_offd_data = hypre_CSRMatrixData(W_offd);
num_cols_P_offd = hypre_CSRMatrixNumCols(W_offd);
/*-----------------------------------------------------------------------
* Intialize data for P
@ -837,8 +885,8 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
P_offd_i[i+1] = cnt_offd;
}
} /* end parallel region */
} /* end parallel region */
/*-----------------------------------------------------------------------
* Create matrix
*-----------------------------------------------------------------------*/
@ -887,7 +935,7 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_P_offd, HYPRE_MEMORY_HOST);
for (i=0; i < P_offd_size; i++)
P_marker[P_offd_j[i]] = 1;
new_ncols_P_offd = 0;
for (i=0; i < num_cols_P_offd; i++)
if (P_marker[i]) new_ncols_P_offd++;
@ -914,8 +962,8 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
new_ncols_P_offd);
}
hypre_TFree(col_map_offd_P, HYPRE_MEMORY_HOST);
hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
hypre_ParCSRMatrixColMapOffd(P) = new_col_map_offd;
hypre_CSRMatrixNumCols(P_offd) = new_ncols_P_offd;
hypre_TFree(map, HYPRE_MEMORY_HOST);
}
}
@ -942,3 +990,45 @@ hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker,
return hypre_error_flag;
}
/*-----------------------------------------------------------------------*
* Modularized Extended+i Interpolation
*-----------------------------------------------------------------------*/
HYPRE_Int
hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A,
HYPRE_Int *CF_marker,
hypre_ParCSRMatrix *S,
HYPRE_BigInt *num_cpts_global,
HYPRE_Int debug_flag,
HYPRE_Real trunc_factor,
HYPRE_Int max_elmts,
HYPRE_Int *col_offd_S_to_A,
hypre_ParCSRMatrix **P_ptr)
{
#if defined(HYPRE_USING_CUDA)
hypre_NvtxPushRange("ExtPIInterp");
#endif
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_ParCSRMatrixMemoryLocation(A) );
HYPRE_Int ierr = 0;
if (exec == HYPRE_EXEC_HOST)
{
ierr = hypre_BoomerAMGBuildModExtPIInterpHost(A, CF_marker, S, num_cpts_global,
debug_flag, trunc_factor, max_elmts, col_offd_S_to_A, P_ptr);
}
#if defined(HYPRE_USING_CUDA)
else
{
ierr = hypre_BoomerAMGBuildExtPIInterpDevice(A, CF_marker, S, num_cpts_global, 1, NULL,
debug_flag, trunc_factor, max_elmts, P_ptr);
}
#endif
#if defined(HYPRE_USING_CUDA)
hypre_NvtxPopRange();
#endif
return ierr;
}

View File

@ -1053,6 +1053,10 @@ HYPRE_Int hypre_BoomerAMGBuildFFInterp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_m
HYPRE_Int hypre_BoomerAMGBuildFF1Interp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_marker , hypre_ParCSRMatrix *S , HYPRE_BigInt *num_cpts_global , HYPRE_Int num_functions , HYPRE_Int *dof_func , HYPRE_Int debug_flag , HYPRE_Real trunc_factor , HYPRE_Int max_elmts , HYPRE_Int *col_offd_S_to_A , hypre_ParCSRMatrix **P_ptr );
HYPRE_Int hypre_BoomerAMGBuildExtInterp ( hypre_ParCSRMatrix *A , HYPRE_Int *CF_marker , hypre_ParCSRMatrix *S , HYPRE_BigInt *num_cpts_global , HYPRE_Int num_functions , HYPRE_Int *dof_func , HYPRE_Int debug_flag , HYPRE_Real trunc_factor , HYPRE_Int max_elmts , HYPRE_Int *col_offd_S_to_A , hypre_ParCSRMatrix **P_ptr );
/* par_lr_interp_device.c */
HYPRE_Int hypre_BoomerAMGBuildExtInterpDevice(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
HYPRE_Int hypre_BoomerAMGBuildExtPIInterpDevice( hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, hypre_ParCSRMatrix **P_ptr);
/* par_mod_lr_interp.c */
HYPRE_Int hypre_BoomerAMGBuildModExtInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);
HYPRE_Int hypre_BoomerAMGBuildModExtPIInterp(hypre_ParCSRMatrix *A, HYPRE_Int *CF_marker, hypre_ParCSRMatrix *S, HYPRE_BigInt *num_cpts_global, HYPRE_Int debug_flag, HYPRE_Real trunc_factor, HYPRE_Int max_elmts, HYPRE_Int *col_offd_S_to_A, hypre_ParCSRMatrix **P_ptr);

View File

@ -5671,11 +5671,13 @@ hypre_ParCSRMatrixExtractSubmatrixFC( hypre_ParCSRMatrix *A,
B_nnz_offd);
B_diag = hypre_ParCSRMatrixDiag(B);
hypre_CSRMatrixMemoryLocation(B_diag) = HYPRE_MEMORY_HOST;
hypre_CSRMatrixData(B_diag) = B_diag_a;
hypre_CSRMatrixI(B_diag) = B_diag_i;
hypre_CSRMatrixJ(B_diag) = B_diag_j;
B_offd = hypre_ParCSRMatrixOffd(B);
hypre_CSRMatrixMemoryLocation(B_offd) = HYPRE_MEMORY_HOST;
hypre_CSRMatrixData(B_offd) = B_offd_a;
hypre_CSRMatrixI(B_offd) = B_offd_i;
hypre_CSRMatrixJ(B_offd) = B_offd_j;

View File

@ -8,12 +8,20 @@
# ij: Test BoomerAMG on GPU options
#=============================================================================
mpirun -np 4 ./ij -n 256 256 128 -P 2 2 1 -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -no_cuda_um 0 -mm_cusparse 0 -interptype 3 -solver 1 \
mpirun -np 4 ./ij -n 256 256 128 -P 2 2 1 -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 3 -solver 1 \
> gpu_boomer.out.1
mpirun -np 3 ./ij -n 128 128 384 -P 1 1 3 -pmis -keepT 1 -rlx 7 -w 0.85 -exec_device -rap 1 -mod_rap2 1 -no_cuda_um 1 -mm_cusparse 0 -interptype 3 -solver 1 \
mpirun -np 3 ./ij -n 128 128 384 -P 1 1 3 -pmis -keepT 1 -rlx 7 -w 0.85 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 3 -solver 1 \
>> gpu_boomer.out.2
mpirun -np 4 ./ij -n 4096 4096 1 -P 2 2 1 -9pt -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -no_cuda_um 1 -mm_cusparse 0 -interptype 3 -solver 1 \
mpirun -np 4 ./ij -n 4096 4096 1 -P 2 2 1 -9pt -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 3 -solver 1 \
>> gpu_boomer.out.3
mpirun -np 4 ./ij -n 256 256 128 -P 2 2 1 -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 14 -solver 1 \
>> gpu_boomer.out.4
mpirun -np 4 ./ij -n 256 256 128 -P 2 2 1 -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 6 -solver 1 \
>> gpu_boomer.out.5
mpirun -np 4 ./ij -n 4096 4096 1 -P 2 2 1 -9pt -pmis -keepT 1 -rlx 18 -exec_device -rap 1 -mod_rap2 1 -mm_cusparse 0 -interptype 15 -solver 1 \
>> gpu_boomer.out.6

View File

@ -22,3 +22,26 @@ Final Relative Residual Norm = 9.439905e-09
Iterations = 91
Final Relative Residual Norm = 9.334377e-09
# Output file: gpu_boomer.out.4
Complexity: grid = 1.361991
operator = 2.858223
memory = 3.484106
Iterations = 20
Final Relative Residual Norm = 9.758706e-09
# Output file: gpu_boomer.out.5
Complexity: grid = 1.354135
operator = 2.781538
memory = 3.406330
Iterations = 19
Final Relative Residual Norm = 8.539869e-09
# Output file: gpu_boomer.out.6
Complexity: grid = 1.233945
operator = 1.239473
memory = 1.468494
Iterations = 84
Final Relative Residual Norm = 9.747204e-09

View File

@ -16,6 +16,9 @@ FILES="\
${TNAME}.out.1\
${TNAME}.out.2\
${TNAME}.out.3\
${TNAME}.out.4\
${TNAME}.out.5\
${TNAME}.out.6\
"
for i in $FILES

View File

@ -3166,6 +3166,9 @@ template <typename T1, typename T2, typename T3> HYPRE_Int hypreDevice_ReduceByT
template <typename T>
HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, T *d_row_num, T *d_row_ind);
template <typename T>
HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v);
#ifdef __cplusplus
}
#endif
@ -3190,8 +3193,6 @@ HYPRE_Int hypreDevice_CsrRowIndicesToPtrs_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYP
HYPRE_Int hypreDevice_GenScatterAdd(HYPRE_Real *x, HYPRE_Int ny, HYPRE_Int *map, HYPRE_Real *y, char *work);
HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v);
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex *y);

View File

@ -553,8 +553,9 @@ hypreDevice_GenScatterAdd(HYPRE_Real *x, HYPRE_Int ny, HYPRE_Int *map, HYPRE_Rea
}
/* x[map[i]] = v */
template <typename T>
__global__ void
hypreCUDAKernel_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v)
hypreCUDAKernel_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v)
{
HYPRE_Int global_thread_id = hypre_cuda_get_grid_thread_id<1,1>();
@ -567,8 +568,9 @@ hypreCUDAKernel_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE
/* x[map[i]] = v
* n is length of map
* TODO: thrust? */
template <typename T>
HYPRE_Int
hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v)
hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v)
{
/* trivial case */
if (n <= 0)
@ -584,6 +586,9 @@ hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int
return hypre_error_flag;
}
template HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v);
template HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Complex *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Complex v);
__global__ void
hypreCUDAKernel_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y)
{

View File

@ -282,6 +282,9 @@ template <typename T1, typename T2, typename T3> HYPRE_Int hypreDevice_ReduceByT
template <typename T>
HYPRE_Int hypreDevice_CsrRowPtrsToIndicesWithRowNum(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, T *d_row_num, T *d_row_ind);
template <typename T>
HYPRE_Int hypreDevice_ScatterConstant(T *x, HYPRE_Int n, HYPRE_Int *map, T v);
#ifdef __cplusplus
}
#endif
@ -306,8 +309,6 @@ HYPRE_Int hypreDevice_CsrRowIndicesToPtrs_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYP
HYPRE_Int hypreDevice_GenScatterAdd(HYPRE_Real *x, HYPRE_Int ny, HYPRE_Int *map, HYPRE_Real *y, char *work);
HYPRE_Int hypreDevice_ScatterConstant(HYPRE_Int *x, HYPRE_Int n, HYPRE_Int *map, HYPRE_Int v);
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
HYPRE_Int hypreDevice_DiagScaleVector(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Complex *A_data, HYPRE_Complex *x, HYPRE_Complex *y);