[Multivec 2/5]: Extend multivector support (#693)

* Add new device functions needed by multivectors (`hypreDevice_IntStridedCopy` and `hypreDevice_IVAMXPMY`)
* Extend `hypre_SeqVectorElmdivpy` to work with multivectors.
This commit is contained in:
Victor A. Paludetto Magri 2022-07-29 15:37:24 -07:00 committed by GitHub
parent 26f334002f
commit 662e886881
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 437 additions and 160 deletions

View File

@ -1109,8 +1109,12 @@ hypre_BoomerAMGRelax7Jacobi( hypre_ParCSRMatrix *A,
hypre_Vector l1_norms_vec;
hypre_ParVector l1_norms_parvec;
hypre_VectorNumVectors(&l1_norms_vec) = 1;
hypre_VectorMultiVecStorageMethod(&l1_norms_vec) = 0;
hypre_VectorOwnsData(&l1_norms_vec) = 0;
hypre_VectorData(&l1_norms_vec) = l1_norms;
hypre_VectorSize(&l1_norms_vec) = num_rows;
/* TODO XXX
* The next line is NOT 100% correct, which should be the memory location of l1_norms instead of f
* But how do I know it? As said, don't use raw pointers, don't use raw pointers!

View File

@ -236,6 +236,7 @@ HYPRE_Int hypre_SeqVectorInitialize_v2( hypre_Vector *vector,
HYPRE_MemoryLocation memory_location );
HYPRE_Int hypre_SeqVectorInitialize ( hypre_Vector *vector );
HYPRE_Int hypre_SeqVectorSetDataOwner ( hypre_Vector *vector, HYPRE_Int owns_data );
HYPRE_Int hypre_SeqVectorSetSize ( hypre_Vector *vector, HYPRE_Int size );
hypre_Vector *hypre_SeqVectorRead ( char *file_name );
HYPRE_Int hypre_SeqVectorPrint ( hypre_Vector *vector, char *file_name );
HYPRE_Int hypre_SeqVectorSetConstantValues ( hypre_Vector *v, HYPRE_Complex value );

View File

@ -508,6 +508,7 @@ HYPRE_Int hypre_SeqVectorInitialize_v2( hypre_Vector *vector,
HYPRE_MemoryLocation memory_location );
HYPRE_Int hypre_SeqVectorInitialize ( hypre_Vector *vector );
HYPRE_Int hypre_SeqVectorSetDataOwner ( hypre_Vector *vector, HYPRE_Int owns_data );
HYPRE_Int hypre_SeqVectorSetSize ( hypre_Vector *vector, HYPRE_Int size );
hypre_Vector *hypre_SeqVectorRead ( char *file_name );
HYPRE_Int hypre_SeqVectorPrint ( hypre_Vector *vector, char *file_name );
HYPRE_Int hypre_SeqVectorSetConstantValues ( hypre_Vector *v, HYPRE_Complex value );

View File

@ -60,13 +60,11 @@ hypre_SeqMultiVectorCreate( HYPRE_Int size, HYPRE_Int num_vectors )
HYPRE_Int
hypre_SeqVectorDestroy( hypre_Vector *vector )
{
HYPRE_Int ierr = 0;
if (vector)
{
HYPRE_MemoryLocation memory_location = hypre_VectorMemoryLocation(vector);
if ( hypre_VectorOwnsData(vector) )
if (hypre_VectorOwnsData(vector))
{
hypre_TFree(hypre_VectorData(vector), memory_location);
}
@ -74,7 +72,7 @@ hypre_SeqVectorDestroy( hypre_Vector *vector )
hypre_TFree(vector, HYPRE_MEMORY_HOST);
}
return ierr;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
@ -87,7 +85,6 @@ HYPRE_Int
hypre_SeqVectorInitialize_v2( hypre_Vector *vector, HYPRE_MemoryLocation memory_location )
{
HYPRE_Int size = hypre_VectorSize(vector);
HYPRE_Int ierr = 0;
HYPRE_Int num_vectors = hypre_VectorNumVectors(vector);
HYPRE_Int multivec_storage_method = hypre_VectorMultiVecStorageMethod(vector);
@ -97,27 +94,28 @@ hypre_SeqVectorInitialize_v2( hypre_Vector *vector, HYPRE_MemoryLocation memory_
* to be consistent with `memory_location'
* Otherwise, mismatches will exist and problems will be encountered
* when being used, and freed */
if ( !hypre_VectorData(vector) )
if (!hypre_VectorData(vector))
{
hypre_VectorData(vector) = hypre_CTAlloc(HYPRE_Complex, num_vectors * size, memory_location);
}
if ( multivec_storage_method == 0 )
if (multivec_storage_method == 0)
{
hypre_VectorVectorStride(vector) = size;
hypre_VectorIndexStride(vector) = 1;
hypre_VectorIndexStride(vector) = 1;
}
else if ( multivec_storage_method == 1 )
else if (multivec_storage_method == 1)
{
hypre_VectorVectorStride(vector) = 1;
hypre_VectorIndexStride(vector) = num_vectors;
hypre_VectorIndexStride(vector) = num_vectors;
}
else
{
++ierr;
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Invalid multivec storage method!\n");
return hypre_error_flag;
}
return ierr;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
@ -127,11 +125,7 @@ hypre_SeqVectorInitialize_v2( hypre_Vector *vector, HYPRE_MemoryLocation memory_
HYPRE_Int
hypre_SeqVectorInitialize( hypre_Vector *vector )
{
HYPRE_Int ierr;
ierr = hypre_SeqVectorInitialize_v2( vector, hypre_VectorMemoryLocation(vector) );
return ierr;
return hypre_SeqVectorInitialize_v2(vector, hypre_VectorMemoryLocation(vector));
}
/*--------------------------------------------------------------------------
@ -142,11 +136,28 @@ HYPRE_Int
hypre_SeqVectorSetDataOwner( hypre_Vector *vector,
HYPRE_Int owns_data )
{
HYPRE_Int ierr = 0;
hypre_VectorOwnsData(vector) = owns_data;
return ierr;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
* hypre_SeqVectorSetSize
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_SeqVectorSetSize( hypre_Vector *vector,
HYPRE_Int size )
{
HYPRE_Int multivec_storage_method = hypre_VectorMultiVecStorageMethod(vector);
hypre_VectorSize(vector) = size;
if (multivec_storage_method == 0)
{
hypre_VectorVectorStride(vector) = size;
}
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
@ -201,7 +212,7 @@ HYPRE_Int
hypre_SeqVectorPrint( hypre_Vector *vector,
char *file_name )
{
FILE *fp;
FILE *fp;
HYPRE_Complex *data;
HYPRE_Int size, num_vectors, vecstride, idxstride;
@ -209,8 +220,6 @@ hypre_SeqVectorPrint( hypre_Vector *vector,
HYPRE_Int i, j;
HYPRE_Complex value;
HYPRE_Int ierr = 0;
num_vectors = hypre_VectorNumVectors(vector);
vecstride = hypre_VectorVectorStride(vector);
idxstride = hypre_VectorIndexStride(vector);
@ -265,7 +274,7 @@ hypre_SeqVectorPrint( hypre_Vector *vector,
fclose(fp);
return ierr;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
@ -281,51 +290,65 @@ hypre_SeqVectorSetConstantValues( hypre_Vector *v,
#endif
HYPRE_Complex *vector_data = hypre_VectorData(v);
HYPRE_Int num_vectors = hypre_VectorNumVectors(v);
HYPRE_Int size = hypre_VectorSize(v);
HYPRE_Int ierr = 0;
HYPRE_Int total_size = size * num_vectors;
size *= hypre_VectorNumVectors(v);
//hypre_SeqVectorPrefetch(v, HYPRE_MEMORY_DEVICE);
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
if (size > 0)
/* Trivial case */
if (total_size <= 0)
{
hypreDevice_ComplexFilln( vector_data, size, value );
return hypre_error_flag;
}
#elif defined(HYPRE_USING_SYCL)
if (size > 0)
{
HYPRE_ONEDPL_CALL( std::fill_n, vector_data, size, value );
}
#else
HYPRE_Int i;
#if defined(HYPRE_USING_DEVICE_OPENMP)
#pragma omp target teams distribute parallel for private(i) is_device_ptr(vector_data)
#elif defined(HYPRE_USING_OPENMP)
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < size; i++)
{
vector_data[i] = value;
}
#endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
#if defined(HYPRE_USING_GPU)
hypre_SyncComputeStream(hypre_handle());
HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_VectorMemoryLocation(v));
//hypre_SeqVectorPrefetch(v, HYPRE_MEMORY_DEVICE);
if (exec == HYPRE_EXEC_DEVICE)
{
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypreDevice_ComplexFilln(vector_data, total_size, value);
#elif defined(HYPRE_USING_SYCL)
HYPRE_ONEDPL_CALL(std::fill_n, vector_data, total_size, value);
#elif defined(HYPRE_USING_DEVICE_OPENMP)
HYPRE_Int i;
#pragma omp target teams distribute parallel for private(i) is_device_ptr(vector_data)
for (i = 0; i < total_size; i++)
{
vector_data[i] = value;
}
#endif
hypre_SyncComputeStream(hypre_handle());
}
else
#endif /* defined(HYPRE_USING_GPU) */
{
HYPRE_Int i;
#if defined(HYPRE_USING_OPENMP)
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < total_size; i++)
{
vector_data[i] = value;
}
}
#ifdef HYPRE_PROFILE
hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
#endif
return ierr;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
* hypre_SeqVectorSetRandomValues
*
* returns vector of values randomly distributed between -1.0 and +1.0
* returns vector of values randomly distributed between -1.0 and +1.0
*--------------------------------------------------------------------------*/
HYPRE_Int
@ -335,9 +358,8 @@ hypre_SeqVectorSetRandomValues( hypre_Vector *v,
HYPRE_Complex *vector_data = hypre_VectorData(v);
HYPRE_Int size = hypre_VectorSize(v);
HYPRE_Int i;
HYPRE_Int ierr = 0;
hypre_SeedRand(seed);
hypre_SeedRand(seed);
size *= hypre_VectorNumVectors(v);
if (hypre_GetActualMemLocation(hypre_VectorMemoryLocation(v)) == hypre_MEMORY_HOST)
@ -360,7 +382,7 @@ hypre_SeqVectorSetRandomValues( hypre_Vector *v,
hypre_TFree(h_data, HYPRE_MEMORY_HOST);
}
return ierr;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
@ -377,8 +399,6 @@ hypre_SeqVectorCopy( hypre_Vector *x,
hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
#endif
HYPRE_Int ierr = 0;
size_t size = hypre_min( hypre_VectorSize(x), hypre_VectorSize(y) ) * hypre_VectorNumVectors(x);
hypre_TMemcpy( hypre_VectorData(y),
@ -392,7 +412,7 @@ hypre_SeqVectorCopy( hypre_Vector *x,
hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
#endif
return ierr;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
@ -443,7 +463,7 @@ hypre_SeqVectorCloneShallow( hypre_Vector *x )
hypre_VectorMemoryLocation(y) = hypre_VectorMemoryLocation(x);
hypre_VectorData(y) = hypre_VectorData(x);
hypre_SeqVectorSetDataOwner( y, 0 );
hypre_SeqVectorSetDataOwner(y, 0);
hypre_SeqVectorInitialize(y);
return y;
@ -452,6 +472,7 @@ hypre_SeqVectorCloneShallow( hypre_Vector *x )
/*--------------------------------------------------------------------------
* hypre_SeqVectorScale
*--------------------------------------------------------------------------*/
HYPRE_Int
hypre_SeqVectorScale( HYPRE_Complex alpha,
hypre_Vector *y )
@ -473,7 +494,6 @@ hypre_SeqVectorScale( HYPRE_Complex alpha,
HYPRE_Complex *y_data = hypre_VectorData(y);
HYPRE_Int size = hypre_VectorSize(y);
HYPRE_Int ierr = 0;
size *= hypre_VectorNumVectors(y);
@ -526,7 +546,7 @@ hypre_SeqVectorScale( HYPRE_Complex alpha,
hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
#endif
return ierr;
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
@ -544,7 +564,6 @@ hypre_SeqVectorAxpy( HYPRE_Complex alpha,
HYPRE_Complex *x_data = hypre_VectorData(x);
HYPRE_Complex *y_data = hypre_VectorData(y);
HYPRE_Int size = hypre_VectorSize(x);
HYPRE_Int ierr = 0;
size *= hypre_VectorNumVectors(x);
@ -599,7 +618,7 @@ hypre_SeqVectorAxpy( HYPRE_Complex alpha,
hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
#endif
return ierr;
return hypre_error_flag;
}
/* y = y + x ./ b */
@ -612,10 +631,26 @@ hypre_SeqVectorElmdivpy( hypre_Vector *x,
hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
#endif
HYPRE_Complex *x_data = hypre_VectorData(x);
HYPRE_Complex *b_data = hypre_VectorData(b);
HYPRE_Complex *y_data = hypre_VectorData(y);
HYPRE_Int size = hypre_VectorSize(b);
HYPRE_Complex *x_data = hypre_VectorData(x);
HYPRE_Complex *b_data = hypre_VectorData(b);
HYPRE_Complex *y_data = hypre_VectorData(y);
HYPRE_Int num_vectors_x = hypre_VectorNumVectors(x);
HYPRE_Int num_vectors_y = hypre_VectorNumVectors(y);
HYPRE_Int num_vectors_b = hypre_VectorNumVectors(b);
HYPRE_Int size = hypre_VectorSize(x);
/* Sanity checks */
if (hypre_VectorSize(x) != hypre_VectorSize(y) ||
hypre_VectorSize(y) != hypre_VectorSize(b))
{
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: size of x, y, and b do not match!\n");
return hypre_error_flag;
}
/* row-wise multivec is not supportted */
hypre_assert(hypre_VectorMultiVecStorageMethod(x) == 0);
hypre_assert(hypre_VectorMultiVecStorageMethod(b) == 0);
hypre_assert(hypre_VectorMultiVecStorageMethod(y) == 0);
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
//HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_VectorMemoryLocation(x), hypre_VectorMemoryLocation(b) );
@ -630,18 +665,84 @@ hypre_SeqVectorElmdivpy( hypre_Vector *x,
#pragma omp target teams distribute parallel for private(i) is_device_ptr(u_data,v_data,l1_norms)
#endif
*/
hypreDevice_IVAXPY(size, b_data, x_data, y_data);
if (num_vectors_b == 1)
{
if (num_vectors_x == 1)
{
hypreDevice_IVAXPY(size, b_data, x_data, y_data);
}
else if (num_vectors_x == num_vectors_y)
{
hypreDevice_IVAMXPMY(num_vectors_x, size, b_data, x_data, y_data);
}
else
{
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unsupported combination of num_vectors!\n");
return hypre_error_flag;
}
}
else
{
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unsupported combination of num_vectors!\n");
return hypre_error_flag;
}
}
else
#endif
{
HYPRE_Int i;
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < size; i++)
HYPRE_Int i, j;
if (num_vectors_b == 1)
{
y_data[i] += x_data[i] / b_data[i];
if (num_vectors_x == 1 && num_vectors_y == 1)
{
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < size; i++)
{
y_data[i] += x_data[i] / b_data[i];
}
}
else if (num_vectors_x == 2 && num_vectors_y == 2)
{
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < size; i++)
{
HYPRE_Complex val = 1.0 / b_data[i];
y_data[i] += x_data[i] * val;
y_data[i + size] += x_data[i + size] * val;
}
}
else if (num_vectors_x == num_vectors_y)
{
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i, j) HYPRE_SMP_SCHEDULE
#endif
for (i = 0; i < size; i++)
{
HYPRE_Complex val = 1.0 / b_data[i];
for (j = 0; j < num_vectors_x; j++)
{
y_data[i + size * j] += x_data[i + size * j] * val;
}
}
}
else
{
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unsupported combination of num_vectors!\n");
return hypre_error_flag;
}
}
else
{
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unsupported combination of num_vectors!\n");
return hypre_error_flag;
}
}
@ -813,7 +914,6 @@ HYPRE_Complex hypre_SeqVectorSumElts( hypre_Vector *vector )
HYPRE_Int
hypre_SeqVectorPrefetch( hypre_Vector *x, HYPRE_MemoryLocation memory_location)
{
HYPRE_Int ierr = 0;
#ifdef HYPRE_USING_UNIFIED_MEMORY
if (hypre_VectorMemoryLocation(x) != HYPRE_MEMORY_DEVICE)
{
@ -826,13 +926,13 @@ hypre_SeqVectorPrefetch( hypre_Vector *x, HYPRE_MemoryLocation memory_location)
if (size == 0)
{
return ierr;
return hypre_error_flag;
}
hypre_MemPrefetch(x_data, sizeof(HYPRE_Complex)*size, memory_location);
#endif
return ierr;
return hypre_error_flag;
}
#if 0
@ -850,7 +950,6 @@ hypre_SeqVectorMax( HYPRE_Complex alpha,
HYPRE_Complex *x_data = hypre_VectorData(x);
HYPRE_Complex *y_data = hypre_VectorData(y);
HYPRE_Int size = hypre_VectorSize(x);
HYPRE_Int ierr = 0;
size *= hypre_VectorNumVectors(x);
@ -886,6 +985,6 @@ hypre_SeqVectorMax( HYPRE_Complex alpha,
hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
#endif
return ierr;
return hypre_error_flag;
}
#endif

View File

@ -1785,10 +1785,13 @@ HYPRE_Int hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Comple
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
HYPRE_Int hypreDevice_IVAXPYMarked(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x,
HYPRE_Complex *y, HYPRE_Int *marker, HYPRE_Int marker_val);
HYPRE_Int hypreDevice_IVAMXPMY(HYPRE_Int m, HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
HYPRE_Int hypreDevice_IntFilln(HYPRE_Int *d_x, size_t n, HYPRE_Int v);
HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v);
HYPRE_Int hypreDevice_ComplexFilln(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v);
HYPRE_Int hypreDevice_CharFilln(char *d_x, size_t n, char v);
HYPRE_Int hypreDevice_IntStridedCopy ( HYPRE_Int size, HYPRE_Int stride,
HYPRE_Int *in, HYPRE_Int *out );
HYPRE_Int hypreDevice_IntScalen(HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int v);
HYPRE_Int hypreDevice_ComplexScalen(HYPRE_Complex *d_x, size_t n, HYPRE_Complex *d_y,
HYPRE_Complex v);

View File

@ -571,6 +571,101 @@ hypreDevice_IVAXPYMarked( HYPRE_Int n,
return hypre_error_flag;
}
/*--------------------------------------------------------------------------
* hypreGPUKernel_IVAMXPMY
*
* Device kernel for hypreDevice_IVAMXPMY. The template argument MM tells
* the maximum number of vectors in the unrolled loop
*--------------------------------------------------------------------------*/
template <HYPRE_Int MM>
__global__ void
hypreGPUKernel_IVAMXPMY( hypre_DeviceItem &item,
HYPRE_Int m,
HYPRE_Int n,
HYPRE_Complex *a,
HYPRE_Complex *x,
HYPRE_Complex *y)
{
HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item);
if (i < n)
{
HYPRE_Complex val = 1.0 / a[i];
if (MM > 0)
{
#pragma unroll
for (HYPRE_Int j = 0; j < MM; j++)
{
y[i + j * n] += x[i + j * n] * val;
}
}
else
{
/* Generic case */
for (HYPRE_Int j = 0; j < m; j++)
{
y[i + j * n] += x[i + j * n] * val;
}
}
}
}
/*--------------------------------------------------------------------------
* hypreDevice_IVAMXPMY
*
* Inverse Vector AXPY for m vectors x and y of size n stored column-wise:
*
* y[i + 0] += x[i + 0] / a[i]
* y[i + n] += x[i + n] / a[i]
* ... ...
* y[i + (m-1)*n] += x[i + (m-1)*n] / a[i]
*
* Note: does not work for row-wise multivectors
*--------------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_IVAMXPMY( HYPRE_Int m,
HYPRE_Int n,
HYPRE_Complex *a,
HYPRE_Complex *x,
HYPRE_Complex *y)
{
/* trivial case */
if (n <= 0)
{
return hypre_error_flag;
}
dim3 bDim = hypre_GetDefaultDeviceBlockDimension();
dim3 gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim);
switch (m)
{
case 1:
HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAXPY, gDim, bDim, n, a, x, y );
break;
case 2:
HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAMXPMY<2>, gDim, bDim, m, n, a, x, y );
break;
case 3:
HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAMXPMY<3>, gDim, bDim, m, n, a, x, y );
break;
case 4:
HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAMXPMY<4>, gDim, bDim, m, n, a, x, y );
break;
default:
HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAMXPMY<0>, gDim, bDim, m, n, a, x, y );
break;
}
return hypre_error_flag;
}
/*--------------------------------------------------------------------
* hypreDevice_CsrRowPtrsToIndices
*--------------------------------------------------------------------*/
@ -1618,87 +1713,6 @@ hypre_ResetDeviceRandGenerator( hypre_ulonglongint seed,
#endif /* #if defined(HYPRE_USING_CURAND) || defined(HYPRE_USING_ROCRAND) || defined(HYPRE_USING_ONEMKLRAND) */
/*--------------------------------------------------------------------
* hypreGPUKernel_filln
*--------------------------------------------------------------------*/
template<typename T>
__global__ void
hypreGPUKernel_filln(hypre_DeviceItem &item, T *x, size_t n, T v)
{
HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item);
if (i < n)
{
x[i] = v;
}
}
/*--------------------------------------------------------------------
* hypreDevice_Filln
*--------------------------------------------------------------------*/
template<typename T>
HYPRE_Int
hypreDevice_Filln(T *d_x, size_t n, T v)
{
#if 0
HYPRE_THRUST_CALL( fill_n, d_x, n, v);
#else
if (n <= 0)
{
return hypre_error_flag;
}
dim3 bDim = hypre_GetDefaultDeviceBlockDimension();
dim3 gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim);
HYPRE_GPU_LAUNCH( hypreGPUKernel_filln, gDim, bDim, d_x, n, v );
#endif
return hypre_error_flag;
}
/*--------------------------------------------------------------------
* hypreDevice_ComplexFilln
*--------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_ComplexFilln(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v)
{
return hypreDevice_Filln(d_x, n, v);
}
/*--------------------------------------------------------------------
* hypreDevice_CharFilln
*--------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_CharFilln(char *d_x, size_t n, char v)
{
return hypreDevice_Filln(d_x, n, v);
}
/*--------------------------------------------------------------------
* hypreDevice_IntFilln
*--------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_IntFilln(HYPRE_Int *d_x, size_t n, HYPRE_Int v)
{
return hypreDevice_Filln(d_x, n, v);
}
/*--------------------------------------------------------------------
* hypreDevice_BigIntFilln
*--------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v)
{
return hypreDevice_Filln(d_x, n, v);
}
#endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -1868,6 +1882,158 @@ hypreDevice_ComplexScalen( HYPRE_Complex *d_x,
return hypreDevice_Scalen(d_x, n, d_y, v);
}
/*--------------------------------------------------------------------
* hypreGPUKernel_filln
*--------------------------------------------------------------------*/
template<typename T>
__global__ void
hypreGPUKernel_filln(hypre_DeviceItem &item, T *x, size_t n, T v)
{
HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item);
if (i < n)
{
x[i] = v;
}
}
/*--------------------------------------------------------------------
* hypreDevice_Filln
*--------------------------------------------------------------------*/
template<typename T>
HYPRE_Int
hypreDevice_Filln(T *d_x, size_t n, T v)
{
#if 0
HYPRE_THRUST_CALL( fill_n, d_x, n, v);
#else
if (n <= 0)
{
return hypre_error_flag;
}
dim3 bDim = hypre_GetDefaultDeviceBlockDimension();
dim3 gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim);
HYPRE_GPU_LAUNCH( hypreGPUKernel_filln, gDim, bDim, d_x, n, v );
#endif
return hypre_error_flag;
}
/*--------------------------------------------------------------------
* hypreDevice_ComplexFilln
*--------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_ComplexFilln( HYPRE_Complex *d_x,
size_t n,
HYPRE_Complex v )
{
return hypreDevice_Filln(d_x, n, v);
}
/*--------------------------------------------------------------------
* hypreDevice_CharFilln
*--------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_CharFilln( char *d_x,
size_t n,
char v )
{
return hypreDevice_Filln(d_x, n, v);
}
/*--------------------------------------------------------------------
* hypreDevice_IntFilln
*--------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_IntFilln( HYPRE_Int *d_x,
size_t n,
HYPRE_Int v )
{
return hypreDevice_Filln(d_x, n, v);
}
/*--------------------------------------------------------------------
* hypreDevice_BigIntFilln
*--------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_BigIntFilln( HYPRE_BigInt *d_x,
size_t n,
HYPRE_BigInt v)
{
return hypreDevice_Filln(d_x, n, v);
}
/*--------------------------------------------------------------------
* hypreGPUKernel_StridedCopy
*--------------------------------------------------------------------*/
template<typename T>
__global__ void
hypreGPUKernel_StridedCopy(hypre_DeviceItem &item,
HYPRE_Int size,
HYPRE_Int stride,
T *in,
T *out )
{
HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item);
if (i < size)
{
out[i] = in[i * stride];
}
}
/*--------------------------------------------------------------------
* hypreDevice_StridedCopy
*--------------------------------------------------------------------*/
template<typename T>
HYPRE_Int
hypreDevice_StridedCopy( HYPRE_Int size,
HYPRE_Int stride,
T *in,
T *out )
{
if (size < 1 || stride < 1)
{
return hypre_error_flag;
}
if (in == out)
{
hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Cannot perform in-place strided copy");
return hypre_error_flag;
}
dim3 bDim = hypre_GetDefaultDeviceBlockDimension();
dim3 gDim = hypre_GetDefaultDeviceGridDimension(size, "thread", bDim);
HYPRE_GPU_LAUNCH( hypreGPUKernel_StridedCopy, gDim, bDim, size, stride, in, out );
return hypre_error_flag;
}
/*--------------------------------------------------------------------
* hypreDevice_IntStridedCopy
*--------------------------------------------------------------------*/
HYPRE_Int
hypreDevice_IntStridedCopy( HYPRE_Int size,
HYPRE_Int stride,
HYPRE_Int *in,
HYPRE_Int *out )
{
return hypreDevice_StridedCopy(size, stride, in, out);
}
/*--------------------------------------------------------------------
* hypreDevice_CsrRowPtrsToIndicesWithRowNum
*

View File

@ -282,10 +282,13 @@ HYPRE_Int hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Comple
HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
HYPRE_Int hypreDevice_IVAXPYMarked(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x,
HYPRE_Complex *y, HYPRE_Int *marker, HYPRE_Int marker_val);
HYPRE_Int hypreDevice_IVAMXPMY(HYPRE_Int m, HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y);
HYPRE_Int hypreDevice_IntFilln(HYPRE_Int *d_x, size_t n, HYPRE_Int v);
HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v);
HYPRE_Int hypreDevice_ComplexFilln(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v);
HYPRE_Int hypreDevice_CharFilln(char *d_x, size_t n, char v);
HYPRE_Int hypreDevice_IntStridedCopy ( HYPRE_Int size, HYPRE_Int stride,
HYPRE_Int *in, HYPRE_Int *out );
HYPRE_Int hypreDevice_IntScalen(HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int v);
HYPRE_Int hypreDevice_ComplexScalen(HYPRE_Complex *d_x, size_t n, HYPRE_Complex *d_y,
HYPRE_Complex v);