hypre/src/seq_mv/csr_matvec_device.c
Ruipeng Li 790e8e7826
fix cuda 11 build (#569)
This PR fixes CUDA 11 build after merging #549, also adds regression tests (build only) with CUDA 11.
2022-02-02 08:40:15 -08:00

358 lines
13 KiB
C

/******************************************************************************
* Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
* HYPRE Project Developers. See the top-level COPYRIGHT file for details.
*
* SPDX-License-Identifier: (Apache-2.0 OR MIT)
******************************************************************************/
/******************************************************************************
*
* Matvec functions for hypre_CSRMatrix class.
*
*****************************************************************************/
#include "seq_mv.h"
#include "_hypre_utilities.hpp"
#include "seq_mv.hpp"
#if defined(HYPRE_USING_GPU)
/* y = alpha * A * x + beta * y
* This function is supposed to be only used inside the other functions in this file
*/
static inline HYPRE_Int
hypre_CSRMatrixMatvecDevice2( HYPRE_Int trans,
HYPRE_Complex alpha,
hypre_CSRMatrix *A,
hypre_Vector *x,
HYPRE_Complex beta,
hypre_Vector *y,
HYPRE_Int offset )
{
if (hypre_VectorData(x) == hypre_VectorData(y))
{
hypre_error_w_msg(HYPRE_ERROR_GENERIC,
"ERROR::x and y are the same pointer in hypre_CSRMatrixMatvecDevice2");
}
#ifdef HYPRE_USING_CUSPARSE
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
/* Luke E: The generic API is techinically supported on 10.1,10.2 as a preview,
* with Dscrmv being deprecated. However, there are limitations.
* While in Cuda < 11, there are specific mentions of using csr2csc involving
* transposed matrix products with dcsrm*,
* they are not present in SpMV interface.
*/
hypre_CSRMatrixMatvecCusparseNewAPI(trans, alpha, A, x, beta, y, offset);
#else
hypre_CSRMatrixMatvecCusparseOldAPI(trans, alpha, A, x, beta, y, offset);
#endif
#elif defined(HYPRE_USING_DEVICE_OPENMP)
hypre_CSRMatrixMatvecOMPOffload(trans, alpha, A, x, beta, y, offset);
#elif defined(HYPRE_USING_ROCSPARSE)
hypre_CSRMatrixMatvecRocsparse(trans, alpha, A, x, beta, y, offset);
#elif defined(HYPRE_USING_ONEMKLSPARSE)
hypre_CSRMatrixMatvecOnemklsparse(trans, alpha, A, x, beta, y, offset);
#else // #ifdef HYPRE_USING_CUSPARSE
#error HYPRE SPMV TODO
#endif
return hypre_error_flag;
}
/* y = alpha * A * x + beta * b */
HYPRE_Int
hypre_CSRMatrixMatvecDevice( HYPRE_Int trans,
HYPRE_Complex alpha,
hypre_CSRMatrix *A,
hypre_Vector *x,
HYPRE_Complex beta,
hypre_Vector *b,
hypre_Vector *y,
HYPRE_Int offset )
{
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_GpuProfilingPushRange("CSRMatrixMatvec");
#endif
// TODO: RL: do we need offset > 0 at all?
hypre_assert(offset == 0);
HYPRE_Int nx = trans ? hypre_CSRMatrixNumRows(A) : hypre_CSRMatrixNumCols(A);
HYPRE_Int ny = trans ? hypre_CSRMatrixNumCols(A) : hypre_CSRMatrixNumRows(A);
//RL: Note the "<=", since the vectors sometimes can be temporary work spaces that have
// large sizes than the needed (such as in par_cheby.c)
hypre_assert(ny <= hypre_VectorSize(y));
hypre_assert(nx <= hypre_VectorSize(x));
hypre_assert(ny <= hypre_VectorSize(b));
//hypre_CSRMatrixPrefetch(A, HYPRE_MEMORY_DEVICE);
//hypre_SeqVectorPrefetch(x, HYPRE_MEMORY_DEVICE);
//hypre_SeqVectorPrefetch(b, HYPRE_MEMORY_DEVICE);
//if (hypre_VectorData(b) != hypre_VectorData(y))
//{
// hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
//}
if (hypre_VectorData(b) != hypre_VectorData(y))
{
hypre_TMemcpy( hypre_VectorData(y) + offset,
hypre_VectorData(b) + offset,
HYPRE_Complex,
ny - offset,
hypre_VectorMemoryLocation(y),
hypre_VectorMemoryLocation(b) );
}
if (hypre_CSRMatrixNumNonzeros(A) <= 0 || alpha == 0.0)
{
hypre_SeqVectorScale(beta, y);
}
else
{
hypre_CSRMatrixMatvecDevice2(trans, alpha, A, x, beta, y, offset);
}
hypre_SyncComputeStream(hypre_handle());
#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
hypre_GpuProfilingPopRange();
#endif
return hypre_error_flag;
}
#if defined(HYPRE_USING_CUSPARSE)
#if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
HYPRE_Int
hypre_CSRMatrixMatvecCusparseNewAPI( HYPRE_Int trans,
HYPRE_Complex alpha,
hypre_CSRMatrix *A,
hypre_Vector *x,
HYPRE_Complex beta,
hypre_Vector *y,
HYPRE_Int offset )
{
const cudaDataType data_type = hypre_HYPREComplexToCudaDataType();
const cusparseIndexType_t index_type = hypre_HYPREIntToCusparseIndexType();
cusparseSpMatDescr_t matA;
cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
hypre_CSRMatrix *AT;
if (trans)
{
/* We handle the transpose explicitly to ensure the same output each run
* and for potential performance improvement memory for AT */
hypre_CSRMatrixTransposeDevice(A, &AT, 1);
matA = hypre_CSRMatrixToCusparseSpMat(AT, offset);
}
else
{
matA = hypre_CSRMatrixToCusparseSpMat(A, offset);
}
/* SpMV */
size_t bufferSize = 0;
char *dBuffer = hypre_CSRMatrixGPUMatSpMVBuffer(A);
HYPRE_Int x_size_override = trans ? hypre_CSRMatrixNumRows(A) : hypre_CSRMatrixNumCols(A);
HYPRE_Int y_size_override = trans ? hypre_CSRMatrixNumCols(A) : hypre_CSRMatrixNumRows(A);
cusparseDnVecDescr_t vecX = hypre_VectorToCusparseDnVec(x, 0, x_size_override);
cusparseDnVecDescr_t vecY = hypre_VectorToCusparseDnVec(y, offset, y_size_override - offset);
if (!dBuffer)
{
HYPRE_CUSPARSE_CALL( cusparseSpMV_bufferSize(handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,
matA,
vecX,
&beta,
vecY,
data_type,
#if CUSPARSE_VERSION >= 11400
CUSPARSE_SPMV_CSR_ALG2,
#else
CUSPARSE_CSRMV_ALG2,
#endif
&bufferSize) );
dBuffer = hypre_TAlloc(char, bufferSize, HYPRE_MEMORY_DEVICE);
hypre_CSRMatrixGPUMatSpMVBuffer(A) = dBuffer;
}
HYPRE_CUSPARSE_CALL( cusparseSpMV(handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha,
matA,
vecX,
&beta,
vecY,
data_type,
#if CUSPARSE_VERSION >= 11400
CUSPARSE_SPMV_CSR_ALG2,
#else
CUSPARSE_CSRMV_ALG2,
#endif
dBuffer) );
hypre_SyncComputeStream(hypre_handle());
if (trans)
{
hypre_CSRMatrixDestroy(AT);
}
/* This function releases the host memory allocated for the sparse matrix descriptor */
HYPRE_CUSPARSE_CALL( cusparseDestroySpMat(matA) );
HYPRE_CUSPARSE_CALL( cusparseDestroyDnVec(vecX) );
HYPRE_CUSPARSE_CALL( cusparseDestroyDnVec(vecY) );
return hypre_error_flag;
}
#else // #if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
HYPRE_Int
hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int trans,
HYPRE_Complex alpha,
hypre_CSRMatrix *A,
hypre_Vector *x,
HYPRE_Complex beta,
hypre_Vector *y,
HYPRE_Int offset )
{
#ifdef HYPRE_BIGINT
#error "ERROR: cusparse old API should not be used when bigint is enabled!"
#endif
cusparseHandle_t handle = hypre_HandleCusparseHandle(hypre_handle());
cusparseMatDescr_t descr = hypre_CSRMatrixGPUMatDescr(A);
hypre_CSRMatrix *B;
if (trans)
{
hypre_CSRMatrixTransposeDevice(A, &B, 1);
}
else
{
B = A;
}
HYPRE_CUSPARSE_CALL( cusparseDcsrmv(handle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
hypre_CSRMatrixNumRows(B) - offset,
hypre_CSRMatrixNumCols(B),
hypre_CSRMatrixNumNonzeros(B),
&alpha,
descr,
hypre_CSRMatrixData(B),
hypre_CSRMatrixI(B) + offset,
hypre_CSRMatrixJ(B),
hypre_VectorData(x),
&beta,
hypre_VectorData(y) + offset) );
if (trans)
{
hypre_CSRMatrixDestroy(B);
}
return hypre_error_flag;
}
#endif // #if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION
#endif // #if defined(HYPRE_USING_CUSPARSE)
#if defined(HYPRE_USING_ROCSPARSE)
HYPRE_Int
hypre_CSRMatrixMatvecRocsparse( HYPRE_Int trans,
HYPRE_Complex alpha,
hypre_CSRMatrix *A,
hypre_Vector *x,
HYPRE_Complex beta,
hypre_Vector *y,
HYPRE_Int offset )
{
rocsparse_handle handle = hypre_HandleCusparseHandle(hypre_handle());
rocsparse_mat_descr descr = hypre_CSRMatrixGPUMatDescr(A);
rocsparse_mat_info info = hypre_CSRMatrixGPUMatInfo(A);
hypre_CSRMatrix *B;
if (trans)
{
hypre_CSRMatrixTransposeDevice(A, &B, 1);
}
else
{
B = A;
}
HYPRE_ROCSPARSE_CALL( rocsparse_dcsrmv(handle,
rocsparse_operation_none,
hypre_CSRMatrixNumRows(B) - offset,
hypre_CSRMatrixNumCols(B),
hypre_CSRMatrixNumNonzeros(B),
&alpha,
descr,
hypre_CSRMatrixData(B),
hypre_CSRMatrixI(B) + offset,
hypre_CSRMatrixJ(B),
info,
hypre_VectorData(x),
&beta,
hypre_VectorData(y) + offset) );
if (trans)
{
hypre_CSRMatrixDestroy(B);
}
return hypre_error_flag;
}
#endif // #if defined(HYPRE_USING_ROCSPARSE)
#if defined(HYPRE_USING_ONEMKLSPARSE)
HYPRE_Int
hypre_CSRMatrixMatvecOnemklsparse( HYPRE_Int trans,
HYPRE_Complex alpha,
hypre_CSRMatrix *A,
hypre_Vector *x,
HYPRE_Complex beta,
hypre_Vector *y,
HYPRE_Int offset )
{
sycl::queue *compute_queue = hypre_HandleComputeStream(hypre_handle());
hypre_CSRMatrix *AT;
oneapi::mkl::sparse::matrix_handle_t matA_handle = hypre_CSRMatrixGPUMatHandle(A);
if (trans)
{
hypre_CSRMatrixTransposeDevice(A, &AT, 1);
matA_handle = hypre_CSRMatrixGPUMatHandle(AT);
}
HYPRE_SYCL_CALL( oneapi::mkl::sparse::gemv(*compute_queue,
oneapi::mkl::transpose::nontrans,
alpha,
matA_handle,
hypre_VectorData(x),
beta,
hypre_VectorData(y) + offset).wait() );
if (trans)
{
hypre_CSRMatrixDestroy(AT);
}
return hypre_error_flag;
}
#endif // #if defined(HYPRE_USING_ROCSPARSE)
#endif // #if defined(HYPRE_USING_GPU)