diff --git a/src/parcsr_mv/HYPRE_parcsr_vector.c b/src/parcsr_mv/HYPRE_parcsr_vector.c index 4309f9ace..6fb9c9497 100644 --- a/src/parcsr_mv/HYPRE_parcsr_vector.c +++ b/src/parcsr_mv/HYPRE_parcsr_vector.c @@ -165,6 +165,7 @@ HYPRE_ParVectorScale( HYPRE_Complex value, /*-------------------------------------------------------------------------- * HYPRE_ParVectorAxpy *--------------------------------------------------------------------------*/ + HYPRE_Int HYPRE_ParVectorAxpy( HYPRE_Complex alpha, HYPRE_ParVector x, diff --git a/src/parcsr_mv/_hypre_parcsr_mv.h b/src/parcsr_mv/_hypre_parcsr_mv.h index a5b75c2c7..c492ec71e 100644 --- a/src/parcsr_mv/_hypre_parcsr_mv.h +++ b/src/parcsr_mv/_hypre_parcsr_mv.h @@ -1211,6 +1211,9 @@ hypre_ParVector *hypre_ParVectorCloneDeep_v2( hypre_ParVector *x, HYPRE_Int hypre_ParVectorMigrate(hypre_ParVector *x, HYPRE_MemoryLocation memory_location); HYPRE_Int hypre_ParVectorScale ( HYPRE_Complex alpha, hypre_ParVector *y ); HYPRE_Int hypre_ParVectorAxpy ( HYPRE_Complex alpha, hypre_ParVector *x, hypre_ParVector *y ); +HYPRE_Int hypre_ParVectorAxpyz ( HYPRE_Complex alpha, hypre_ParVector *x, + HYPRE_Complex beta, hypre_ParVector *y, + hypre_ParVector *z ); HYPRE_Int hypre_ParVectorMassAxpy ( HYPRE_Complex *alpha, hypre_ParVector **x, hypre_ParVector *y, HYPRE_Int k, HYPRE_Int unroll); HYPRE_Real hypre_ParVectorInnerProd ( hypre_ParVector *x, hypre_ParVector *y ); diff --git a/src/parcsr_mv/par_vector.c b/src/parcsr_mv/par_vector.c index 5c4f734ec..f104a9818 100644 --- a/src/parcsr_mv/par_vector.c +++ b/src/parcsr_mv/par_vector.c @@ -348,6 +348,7 @@ hypre_ParVectorCopy( hypre_ParVector *x, { hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); + return hypre_SeqVectorCopy(x_local, y_local); } @@ -442,6 +443,24 @@ hypre_ParVectorAxpy( HYPRE_Complex alpha, return hypre_SeqVectorAxpy(alpha, x_local, y_local); } +/*-------------------------------------------------------------------------- + * hypre_ParVectorAxpyz + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_ParVectorAxpyz( HYPRE_Complex alpha, + hypre_ParVector *x, + HYPRE_Complex beta, + hypre_ParVector *y, + hypre_ParVector *z ) +{ + hypre_Vector *x_local = hypre_ParVectorLocalVector(x); + hypre_Vector *y_local = hypre_ParVectorLocalVector(y); + hypre_Vector *z_local = hypre_ParVectorLocalVector(z); + + return hypre_SeqVectorAxpyz(alpha, x_local, beta, y_local, z_local); +} + /*-------------------------------------------------------------------------- * hypre_ParVectorInnerProd *--------------------------------------------------------------------------*/ diff --git a/src/parcsr_mv/protos.h b/src/parcsr_mv/protos.h index 13ec57517..660028a22 100644 --- a/src/parcsr_mv/protos.h +++ b/src/parcsr_mv/protos.h @@ -558,6 +558,9 @@ hypre_ParVector *hypre_ParVectorCloneDeep_v2( hypre_ParVector *x, HYPRE_Int hypre_ParVectorMigrate(hypre_ParVector *x, HYPRE_MemoryLocation memory_location); HYPRE_Int hypre_ParVectorScale ( HYPRE_Complex alpha, hypre_ParVector *y ); HYPRE_Int hypre_ParVectorAxpy ( HYPRE_Complex alpha, hypre_ParVector *x, hypre_ParVector *y ); +HYPRE_Int hypre_ParVectorAxpyz ( HYPRE_Complex alpha, hypre_ParVector *x, + HYPRE_Complex beta, hypre_ParVector *y, + hypre_ParVector *z ); HYPRE_Int hypre_ParVectorMassAxpy ( HYPRE_Complex *alpha, hypre_ParVector **x, hypre_ParVector *y, HYPRE_Int k, HYPRE_Int unroll); HYPRE_Real hypre_ParVectorInnerProd ( hypre_ParVector *x, hypre_ParVector *y ); diff --git a/src/seq_mv/protos.h b/src/seq_mv/protos.h index 3afe68d07..5879f7de7 100644 --- a/src/seq_mv/protos.h +++ b/src/seq_mv/protos.h @@ -253,6 +253,12 @@ HYPRE_Int hypre_SeqVectorScaleDevice( HYPRE_Complex alpha, hypre_Vector *y ); HYPRE_Int hypre_SeqVectorAxpy ( HYPRE_Complex alpha, hypre_Vector *x, hypre_Vector *y ); HYPRE_Int hypre_SeqVectorAxpyHost ( HYPRE_Complex alpha, hypre_Vector *x, hypre_Vector *y ); HYPRE_Int hypre_SeqVectorAxpyDevice ( HYPRE_Complex alpha, hypre_Vector *x, hypre_Vector *y ); +HYPRE_Int hypre_SeqVectorAxpyz ( HYPRE_Complex alpha, hypre_Vector *x, + HYPRE_Complex beta, hypre_Vector *y, + hypre_Vector *z ); +HYPRE_Int hypre_SeqVectorAxpyzDevice ( HYPRE_Complex alpha, hypre_Vector *x, + HYPRE_Complex beta, hypre_Vector *y, + hypre_Vector *z ); HYPRE_Real hypre_SeqVectorInnerProd ( hypre_Vector *x, hypre_Vector *y ); HYPRE_Real hypre_SeqVectorInnerProdHost ( hypre_Vector *x, hypre_Vector *y ); HYPRE_Real hypre_SeqVectorInnerProdDevice ( hypre_Vector *x, hypre_Vector *y ); diff --git a/src/seq_mv/seq_mv.h b/src/seq_mv/seq_mv.h index f4f947d5d..0e6ea057d 100644 --- a/src/seq_mv/seq_mv.h +++ b/src/seq_mv/seq_mv.h @@ -525,6 +525,12 @@ HYPRE_Int hypre_SeqVectorScaleDevice( HYPRE_Complex alpha, hypre_Vector *y ); HYPRE_Int hypre_SeqVectorAxpy ( HYPRE_Complex alpha, hypre_Vector *x, hypre_Vector *y ); HYPRE_Int hypre_SeqVectorAxpyHost ( HYPRE_Complex alpha, hypre_Vector *x, hypre_Vector *y ); HYPRE_Int hypre_SeqVectorAxpyDevice ( HYPRE_Complex alpha, hypre_Vector *x, hypre_Vector *y ); +HYPRE_Int hypre_SeqVectorAxpyz ( HYPRE_Complex alpha, hypre_Vector *x, + HYPRE_Complex beta, hypre_Vector *y, + hypre_Vector *z ); +HYPRE_Int hypre_SeqVectorAxpyzDevice ( HYPRE_Complex alpha, hypre_Vector *x, + HYPRE_Complex beta, hypre_Vector *y, + hypre_Vector *z ); HYPRE_Real hypre_SeqVectorInnerProd ( hypre_Vector *x, hypre_Vector *y ); HYPRE_Real hypre_SeqVectorInnerProdHost ( hypre_Vector *x, hypre_Vector *y ); HYPRE_Real hypre_SeqVectorInnerProdDevice ( hypre_Vector *x, hypre_Vector *y ); diff --git a/src/seq_mv/vector.c b/src/seq_mv/vector.c index b1e054aef..0d39a86c9 100644 --- a/src/seq_mv/vector.c +++ b/src/seq_mv/vector.c @@ -601,6 +601,66 @@ hypre_SeqVectorAxpy( HYPRE_Complex alpha, return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * hypre_SeqVectorAxpyzHost + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorAxpyzHost( HYPRE_Complex alpha, + hypre_Vector *x, + HYPRE_Complex beta, + hypre_Vector *y, + hypre_Vector *z ) +{ + HYPRE_Complex *x_data = hypre_VectorData(x); + HYPRE_Complex *y_data = hypre_VectorData(y); + HYPRE_Complex *z_data = hypre_VectorData(z); + + HYPRE_Int num_vectors = hypre_VectorNumVectors(x); + HYPRE_Int size = hypre_VectorSize(x); + HYPRE_Int total_size = size * num_vectors; + HYPRE_Int i; + +#if defined(HYPRE_USING_OPENMP) + #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < total_size; i++) + { + z_data[i] = alpha * x_data[i] + beta * y_data[i]; + } + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + * hypre_SeqVectorAxpyz + * + * Computes z = a*x + b*y + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorAxpyz( HYPRE_Complex alpha, + hypre_Vector *x, + HYPRE_Complex beta, + hypre_Vector *y, + hypre_Vector *z ) +{ +#if defined(HYPRE_USING_GPU) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_VectorMemoryLocation(x), + hypre_VectorMemoryLocation(y)); + if (exec == HYPRE_EXEC_DEVICE) + { + hypre_SeqVectorAxpyzDevice(alpha, x, beta, y, z); + } + else +#endif + { + hypre_SeqVectorAxpyzHost(alpha, x, beta, y, z); + } + + return hypre_error_flag; +} + /*-------------------------------------------------------------------------- * hypre_SeqVectorElmdivpyHost * diff --git a/src/seq_mv/vector_device.c b/src/seq_mv/vector_device.c index 613ae1ad0..5de829c41 100644 --- a/src/seq_mv/vector_device.c +++ b/src/seq_mv/vector_device.c @@ -151,6 +151,43 @@ hypre_SeqVectorAxpyDevice( HYPRE_Complex alpha, return hypre_error_flag; } +/*-------------------------------------------------------------------------- + * hypre_SeqVectorAxpyzDevice + *--------------------------------------------------------------------------*/ + +HYPRE_Int +hypre_SeqVectorAxpyzDevice( HYPRE_Complex alpha, + hypre_Vector *x, + HYPRE_Complex beta, + hypre_Vector *y, + hypre_Vector *z ) +{ + HYPRE_Complex *x_data = hypre_VectorData(x); + HYPRE_Complex *y_data = hypre_VectorData(y); + HYPRE_Complex *z_data = hypre_VectorData(z); + + HYPRE_Int num_vectors = hypre_VectorNumVectors(x); + HYPRE_Int size = hypre_VectorSize(x); + HYPRE_Int total_size = size * num_vectors; + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL) + hypreDevice_ComplexAxpyzn(total_size, x_data, y_data, z_data, alpha, beta); + +#elif defined(HYPRE_USING_DEVICE_OPENMP) + HYPRE_Int i; + + #pragma omp target teams distribute parallel for private(i) is_device_ptr(z_data, y_data, x_data) + for (i = 0; i < total_size; i++) + { + z_data[i] = alpha * x_data[i] + beta * z_data[i]; + } +#endif + + hypre_SyncComputeStream(hypre_handle()); + + return hypre_error_flag; +} + /*-------------------------------------------------------------------------- * hypre_SeqVectorElmdivpyDevice *--------------------------------------------------------------------------*/ diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 32a1e1252..a23350a6c 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -737,7 +737,6 @@ typedef void (*GPUMfreeFunc)(void *); #endif #endif - /****************************************************************************** * Copyright (c) 1998 Lawrence Livermore National Security, LLC and other * HYPRE Project Developers. See the top-level COPYRIGHT file for details. @@ -1868,6 +1867,8 @@ HYPRE_Int hypreDevice_IntAxpyn(HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_I HYPRE_Int a); HYPRE_Int hypreDevice_BigIntAxpyn(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt *d_y, HYPRE_BigInt *d_z, HYPRE_BigInt a); +HYPRE_Int hypreDevice_ComplexAxpyzn(HYPRE_Int n, HYPRE_Complex *d_x, HYPRE_Complex *d_y, + HYPRE_Complex *d_z, HYPRE_Complex a, HYPRE_Complex b); HYPRE_Int* hypreDevice_CsrRowPtrsToIndices(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr); HYPRE_Int hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_ind); diff --git a/src/utilities/device_utils.c b/src/utilities/device_utils.c index 4a7411dc7..97f7b74ed 100644 --- a/src/utilities/device_utils.c +++ b/src/utilities/device_utils.c @@ -1413,37 +1413,35 @@ hypreDevice_GenScatterAdd( HYPRE_Real *x, } /*-------------------------------------------------------------------- - * hypreGPUKernel_axpyn + * hypreGPUKernel_Axpyzn *--------------------------------------------------------------------*/ template __global__ void -hypreGPUKernel_axpyn( hypre_DeviceItem &item, - T *x, - size_t n, - T *y, - T *z, - T a ) +hypreGPUKernel_Axpyzn( hypre_DeviceItem &item, + HYPRE_Int n, + T *x, + T *y, + T *z, + T a, + T b ) { HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item); if (i < n) { - z[i] = a * x[i] + y[i]; + z[i] = a * x[i] + b * y[i]; } } /*-------------------------------------------------------------------- - * hypreDevice_Axpyn + * hypreDevice_Axpyzn *--------------------------------------------------------------------*/ template HYPRE_Int -hypreDevice_Axpyn(T *d_x, size_t n, T *d_y, T *d_z, T a) +hypreDevice_Axpyzn(HYPRE_Int n, T *d_x, T *d_y, T *d_z, T a, T b) { -#if 0 - HYPRE_THRUST_CALL( transform, d_x, d_x + n, d_y, d_z, a * _1 + _2 ); -#else if (n <= 0) { return hypre_error_flag; @@ -1452,8 +1450,7 @@ hypreDevice_Axpyn(T *d_x, size_t n, T *d_y, T *d_z, T a) dim3 bDim = hypre_GetDefaultDeviceBlockDimension(); dim3 gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim); - HYPRE_GPU_LAUNCH( hypreGPUKernel_axpyn, gDim, bDim, d_x, n, d_y, d_z, a ); -#endif + HYPRE_GPU_LAUNCH( hypreGPUKernel_Axpyzn, gDim, bDim, n, d_x, d_y, d_z, a, b ); return hypre_error_flag; } @@ -1469,7 +1466,7 @@ hypreDevice_ComplexAxpyn( HYPRE_Complex *d_x, HYPRE_Complex *d_z, HYPRE_Complex a ) { - return hypreDevice_Axpyn(d_x, n, d_y, d_z, a); + return hypreDevice_Axpyzn((HYPRE_Int) n, d_x, d_y, d_z, a, 1.0); } /*-------------------------------------------------------------------- @@ -1483,7 +1480,7 @@ hypreDevice_IntAxpyn( HYPRE_Int *d_x, HYPRE_Int *d_z, HYPRE_Int a ) { - return hypreDevice_Axpyn(d_x, n, d_y, d_z, a); + return hypreDevice_Axpyzn((HYPRE_Int) n, d_x, d_y, d_z, a, 1); } /*-------------------------------------------------------------------- @@ -1497,7 +1494,22 @@ hypreDevice_BigIntAxpyn( HYPRE_BigInt *d_x, HYPRE_BigInt *d_z, HYPRE_BigInt a ) { - return hypreDevice_Axpyn(d_x, n, d_y, d_z, a); + return hypreDevice_Axpyzn((HYPRE_Int) n, d_x, d_y, d_z, a, 1); +} + +/*-------------------------------------------------------------------- + * hypreDevice_ComplexAxpyzn + *--------------------------------------------------------------------*/ + +HYPRE_Int +hypreDevice_ComplexAxpyzn( HYPRE_Int n, + HYPRE_Complex *d_x, + HYPRE_Complex *d_y, + HYPRE_Complex *d_z, + HYPRE_Complex a, + HYPRE_Complex b ) +{ + return hypreDevice_Axpyzn(n, d_x, d_y, d_z, a, b); } #if defined(HYPRE_USING_CURAND) diff --git a/src/utilities/memory.h b/src/utilities/memory.h index 939ef90b2..ce845351d 100644 --- a/src/utilities/memory.h +++ b/src/utilities/memory.h @@ -221,4 +221,3 @@ typedef void (*GPUMfreeFunc)(void *); #endif #endif - diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 0b85efc33..ca0d5a745 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -302,6 +302,8 @@ HYPRE_Int hypreDevice_IntAxpyn(HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_I HYPRE_Int a); HYPRE_Int hypreDevice_BigIntAxpyn(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt *d_y, HYPRE_BigInt *d_z, HYPRE_BigInt a); +HYPRE_Int hypreDevice_ComplexAxpyzn(HYPRE_Int n, HYPRE_Complex *d_x, HYPRE_Complex *d_y, + HYPRE_Complex *d_z, HYPRE_Complex a, HYPRE_Complex b); HYPRE_Int* hypreDevice_CsrRowPtrsToIndices(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr); HYPRE_Int hypreDevice_CsrRowPtrsToIndices_v2(HYPRE_Int nrows, HYPRE_Int nnz, HYPRE_Int *d_row_ptr, HYPRE_Int *d_row_ind);