[Multivec 2/5]: Extend multivector support (#693)
* Add new device functions needed by multivectors (`hypreDevice_IntStridedCopy` and `hypreDevice_IVAMXPMY`) * Extend `hypre_SeqVectorElmdivpy` to work with multivectors.
This commit is contained in:
		
							parent
							
								
									26f334002f
								
							
						
					
					
						commit
						662e886881
					
				| @ -1109,8 +1109,12 @@ hypre_BoomerAMGRelax7Jacobi( hypre_ParCSRMatrix *A, | ||||
|    hypre_Vector    l1_norms_vec; | ||||
|    hypre_ParVector l1_norms_parvec; | ||||
| 
 | ||||
|    hypre_VectorNumVectors(&l1_norms_vec) = 1; | ||||
|    hypre_VectorMultiVecStorageMethod(&l1_norms_vec) = 0; | ||||
|    hypre_VectorOwnsData(&l1_norms_vec) = 0; | ||||
|    hypre_VectorData(&l1_norms_vec) = l1_norms; | ||||
|    hypre_VectorSize(&l1_norms_vec) = num_rows; | ||||
| 
 | ||||
|    /* TODO XXX
 | ||||
|     * The next line is NOT 100% correct, which should be the memory location of l1_norms instead of f | ||||
|     * But how do I know it? As said, don't use raw pointers, don't use raw pointers! | ||||
|  | ||||
| @ -236,6 +236,7 @@ HYPRE_Int hypre_SeqVectorInitialize_v2( hypre_Vector *vector, | ||||
|                                         HYPRE_MemoryLocation memory_location ); | ||||
| HYPRE_Int hypre_SeqVectorInitialize ( hypre_Vector *vector ); | ||||
| HYPRE_Int hypre_SeqVectorSetDataOwner ( hypre_Vector *vector, HYPRE_Int owns_data ); | ||||
| HYPRE_Int hypre_SeqVectorSetSize ( hypre_Vector *vector, HYPRE_Int size ); | ||||
| hypre_Vector *hypre_SeqVectorRead ( char *file_name ); | ||||
| HYPRE_Int hypre_SeqVectorPrint ( hypre_Vector *vector, char *file_name ); | ||||
| HYPRE_Int hypre_SeqVectorSetConstantValues ( hypre_Vector *v, HYPRE_Complex value ); | ||||
|  | ||||
| @ -508,6 +508,7 @@ HYPRE_Int hypre_SeqVectorInitialize_v2( hypre_Vector *vector, | ||||
|                                         HYPRE_MemoryLocation memory_location ); | ||||
| HYPRE_Int hypre_SeqVectorInitialize ( hypre_Vector *vector ); | ||||
| HYPRE_Int hypre_SeqVectorSetDataOwner ( hypre_Vector *vector, HYPRE_Int owns_data ); | ||||
| HYPRE_Int hypre_SeqVectorSetSize ( hypre_Vector *vector, HYPRE_Int size ); | ||||
| hypre_Vector *hypre_SeqVectorRead ( char *file_name ); | ||||
| HYPRE_Int hypre_SeqVectorPrint ( hypre_Vector *vector, char *file_name ); | ||||
| HYPRE_Int hypre_SeqVectorSetConstantValues ( hypre_Vector *v, HYPRE_Complex value ); | ||||
|  | ||||
| @ -60,13 +60,11 @@ hypre_SeqMultiVectorCreate( HYPRE_Int size, HYPRE_Int num_vectors ) | ||||
| HYPRE_Int | ||||
| hypre_SeqVectorDestroy( hypre_Vector *vector ) | ||||
| { | ||||
|    HYPRE_Int ierr = 0; | ||||
| 
 | ||||
|    if (vector) | ||||
|    { | ||||
|       HYPRE_MemoryLocation memory_location = hypre_VectorMemoryLocation(vector); | ||||
| 
 | ||||
|       if ( hypre_VectorOwnsData(vector) ) | ||||
|       if (hypre_VectorOwnsData(vector)) | ||||
|       { | ||||
|          hypre_TFree(hypre_VectorData(vector), memory_location); | ||||
|       } | ||||
| @ -74,7 +72,7 @@ hypre_SeqVectorDestroy( hypre_Vector *vector ) | ||||
|       hypre_TFree(vector, HYPRE_MEMORY_HOST); | ||||
|    } | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
| @ -87,7 +85,6 @@ HYPRE_Int | ||||
| hypre_SeqVectorInitialize_v2( hypre_Vector *vector, HYPRE_MemoryLocation memory_location ) | ||||
| { | ||||
|    HYPRE_Int  size = hypre_VectorSize(vector); | ||||
|    HYPRE_Int  ierr = 0; | ||||
|    HYPRE_Int  num_vectors = hypre_VectorNumVectors(vector); | ||||
|    HYPRE_Int  multivec_storage_method = hypre_VectorMultiVecStorageMethod(vector); | ||||
| 
 | ||||
| @ -97,27 +94,28 @@ hypre_SeqVectorInitialize_v2( hypre_Vector *vector, HYPRE_MemoryLocation memory_ | ||||
|     * to be consistent with `memory_location' | ||||
|     * Otherwise, mismatches will exist and problems will be encountered | ||||
|     * when being used, and freed */ | ||||
|    if ( !hypre_VectorData(vector) ) | ||||
|    if (!hypre_VectorData(vector)) | ||||
|    { | ||||
|       hypre_VectorData(vector) = hypre_CTAlloc(HYPRE_Complex, num_vectors * size, memory_location); | ||||
|    } | ||||
| 
 | ||||
|    if ( multivec_storage_method == 0 ) | ||||
|    if (multivec_storage_method == 0) | ||||
|    { | ||||
|       hypre_VectorVectorStride(vector) = size; | ||||
|       hypre_VectorIndexStride(vector) = 1; | ||||
|       hypre_VectorIndexStride(vector)  = 1; | ||||
|    } | ||||
|    else if ( multivec_storage_method == 1 ) | ||||
|    else if (multivec_storage_method == 1) | ||||
|    { | ||||
|       hypre_VectorVectorStride(vector) = 1; | ||||
|       hypre_VectorIndexStride(vector) = num_vectors; | ||||
|       hypre_VectorIndexStride(vector)  = num_vectors; | ||||
|    } | ||||
|    else | ||||
|    { | ||||
|       ++ierr; | ||||
|       hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Invalid multivec storage method!\n"); | ||||
|       return hypre_error_flag; | ||||
|    } | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
| @ -127,11 +125,7 @@ hypre_SeqVectorInitialize_v2( hypre_Vector *vector, HYPRE_MemoryLocation memory_ | ||||
| HYPRE_Int | ||||
| hypre_SeqVectorInitialize( hypre_Vector *vector ) | ||||
| { | ||||
|    HYPRE_Int ierr; | ||||
| 
 | ||||
|    ierr = hypre_SeqVectorInitialize_v2( vector, hypre_VectorMemoryLocation(vector) ); | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_SeqVectorInitialize_v2(vector, hypre_VectorMemoryLocation(vector)); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
| @ -142,11 +136,28 @@ HYPRE_Int | ||||
| hypre_SeqVectorSetDataOwner( hypre_Vector *vector, | ||||
|                              HYPRE_Int     owns_data   ) | ||||
| { | ||||
|    HYPRE_Int    ierr = 0; | ||||
| 
 | ||||
|    hypre_VectorOwnsData(vector) = owns_data; | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
|  * hypre_SeqVectorSetSize | ||||
|  *--------------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypre_SeqVectorSetSize( hypre_Vector *vector, | ||||
|                         HYPRE_Int     size   ) | ||||
| { | ||||
|    HYPRE_Int  multivec_storage_method = hypre_VectorMultiVecStorageMethod(vector); | ||||
| 
 | ||||
|    hypre_VectorSize(vector) = size; | ||||
|    if (multivec_storage_method == 0) | ||||
|    { | ||||
|       hypre_VectorVectorStride(vector) = size; | ||||
|    } | ||||
| 
 | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
| @ -201,7 +212,7 @@ HYPRE_Int | ||||
| hypre_SeqVectorPrint( hypre_Vector *vector, | ||||
|                       char         *file_name ) | ||||
| { | ||||
|    FILE    *fp; | ||||
|    FILE          *fp; | ||||
| 
 | ||||
|    HYPRE_Complex *data; | ||||
|    HYPRE_Int      size, num_vectors, vecstride, idxstride; | ||||
| @ -209,8 +220,6 @@ hypre_SeqVectorPrint( hypre_Vector *vector, | ||||
|    HYPRE_Int      i, j; | ||||
|    HYPRE_Complex  value; | ||||
| 
 | ||||
|    HYPRE_Int      ierr = 0; | ||||
| 
 | ||||
|    num_vectors = hypre_VectorNumVectors(vector); | ||||
|    vecstride = hypre_VectorVectorStride(vector); | ||||
|    idxstride = hypre_VectorIndexStride(vector); | ||||
| @ -265,7 +274,7 @@ hypre_SeqVectorPrint( hypre_Vector *vector, | ||||
| 
 | ||||
|    fclose(fp); | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
| @ -281,51 +290,65 @@ hypre_SeqVectorSetConstantValues( hypre_Vector *v, | ||||
| #endif | ||||
| 
 | ||||
|    HYPRE_Complex *vector_data = hypre_VectorData(v); | ||||
|    HYPRE_Int      num_vectors = hypre_VectorNumVectors(v); | ||||
|    HYPRE_Int      size        = hypre_VectorSize(v); | ||||
|    HYPRE_Int      ierr  = 0; | ||||
|    HYPRE_Int      total_size  = size * num_vectors; | ||||
| 
 | ||||
|    size *= hypre_VectorNumVectors(v); | ||||
| 
 | ||||
|    //hypre_SeqVectorPrefetch(v, HYPRE_MEMORY_DEVICE);
 | ||||
| 
 | ||||
| #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) | ||||
|    if (size > 0) | ||||
|    /* Trivial case */ | ||||
|    if (total_size <= 0) | ||||
|    { | ||||
|       hypreDevice_ComplexFilln( vector_data, size, value ); | ||||
|       return hypre_error_flag; | ||||
|    } | ||||
| #elif defined(HYPRE_USING_SYCL) | ||||
|    if (size > 0) | ||||
|    { | ||||
|       HYPRE_ONEDPL_CALL( std::fill_n, vector_data, size, value ); | ||||
|    } | ||||
| #else | ||||
|    HYPRE_Int i; | ||||
| #if defined(HYPRE_USING_DEVICE_OPENMP) | ||||
|    #pragma omp target teams distribute parallel for private(i) is_device_ptr(vector_data) | ||||
| #elif defined(HYPRE_USING_OPENMP) | ||||
|    #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE | ||||
| #endif | ||||
|    for (i = 0; i < size; i++) | ||||
|    { | ||||
|       vector_data[i] = value; | ||||
|    } | ||||
| #endif /* defined(HYPRE_USING_CUDA)  || defined(HYPRE_USING_HIP) */ | ||||
| 
 | ||||
| #if defined(HYPRE_USING_GPU) | ||||
|    hypre_SyncComputeStream(hypre_handle()); | ||||
|    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1(hypre_VectorMemoryLocation(v)); | ||||
| 
 | ||||
|    //hypre_SeqVectorPrefetch(v, HYPRE_MEMORY_DEVICE);
 | ||||
|    if (exec == HYPRE_EXEC_DEVICE) | ||||
|    { | ||||
| #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) | ||||
|       hypreDevice_ComplexFilln(vector_data, total_size, value); | ||||
| 
 | ||||
| #elif defined(HYPRE_USING_SYCL) | ||||
|       HYPRE_ONEDPL_CALL(std::fill_n, vector_data, total_size, value); | ||||
| 
 | ||||
| #elif defined(HYPRE_USING_DEVICE_OPENMP) | ||||
|       HYPRE_Int i; | ||||
| 
 | ||||
|       #pragma omp target teams distribute parallel for private(i) is_device_ptr(vector_data) | ||||
|       for (i = 0; i < total_size; i++) | ||||
|       { | ||||
|          vector_data[i] = value; | ||||
|       } | ||||
| #endif | ||||
| 
 | ||||
|       hypre_SyncComputeStream(hypre_handle()); | ||||
|    } | ||||
|    else | ||||
| #endif /* defined(HYPRE_USING_GPU) */ | ||||
|    { | ||||
|       HYPRE_Int i; | ||||
| 
 | ||||
| #if defined(HYPRE_USING_OPENMP) | ||||
|       #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE | ||||
| #endif | ||||
|       for (i = 0; i < total_size; i++) | ||||
|       { | ||||
|          vector_data[i] = value; | ||||
|       } | ||||
|    } | ||||
| 
 | ||||
| #ifdef HYPRE_PROFILE | ||||
|    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime(); | ||||
| #endif | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
|  * hypre_SeqVectorSetRandomValues | ||||
|  * | ||||
|  *     returns vector of values randomly distributed between -1.0 and +1.0 | ||||
|  * returns vector of values randomly distributed between -1.0 and +1.0 | ||||
|  *--------------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| @ -335,9 +358,8 @@ hypre_SeqVectorSetRandomValues( hypre_Vector *v, | ||||
|    HYPRE_Complex *vector_data = hypre_VectorData(v); | ||||
|    HYPRE_Int      size        = hypre_VectorSize(v); | ||||
|    HYPRE_Int      i; | ||||
|    HYPRE_Int      ierr  = 0; | ||||
|    hypre_SeedRand(seed); | ||||
| 
 | ||||
|    hypre_SeedRand(seed); | ||||
|    size *= hypre_VectorNumVectors(v); | ||||
| 
 | ||||
|    if (hypre_GetActualMemLocation(hypre_VectorMemoryLocation(v)) == hypre_MEMORY_HOST) | ||||
| @ -360,7 +382,7 @@ hypre_SeqVectorSetRandomValues( hypre_Vector *v, | ||||
|       hypre_TFree(h_data, HYPRE_MEMORY_HOST); | ||||
|    } | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
| @ -377,8 +399,6 @@ hypre_SeqVectorCopy( hypre_Vector *x, | ||||
|    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime(); | ||||
| #endif | ||||
| 
 | ||||
|    HYPRE_Int ierr = 0; | ||||
| 
 | ||||
|    size_t size = hypre_min( hypre_VectorSize(x), hypre_VectorSize(y) ) * hypre_VectorNumVectors(x); | ||||
| 
 | ||||
|    hypre_TMemcpy( hypre_VectorData(y), | ||||
| @ -392,7 +412,7 @@ hypre_SeqVectorCopy( hypre_Vector *x, | ||||
|    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime(); | ||||
| #endif | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
| @ -443,7 +463,7 @@ hypre_SeqVectorCloneShallow( hypre_Vector *x ) | ||||
|    hypre_VectorMemoryLocation(y) = hypre_VectorMemoryLocation(x); | ||||
| 
 | ||||
|    hypre_VectorData(y) = hypre_VectorData(x); | ||||
|    hypre_SeqVectorSetDataOwner( y, 0 ); | ||||
|    hypre_SeqVectorSetDataOwner(y, 0); | ||||
|    hypre_SeqVectorInitialize(y); | ||||
| 
 | ||||
|    return y; | ||||
| @ -452,6 +472,7 @@ hypre_SeqVectorCloneShallow( hypre_Vector *x ) | ||||
| /*--------------------------------------------------------------------------
 | ||||
|  * hypre_SeqVectorScale | ||||
|  *--------------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypre_SeqVectorScale( HYPRE_Complex alpha, | ||||
|                       hypre_Vector *y ) | ||||
| @ -473,7 +494,6 @@ hypre_SeqVectorScale( HYPRE_Complex alpha, | ||||
| 
 | ||||
|    HYPRE_Complex *y_data = hypre_VectorData(y); | ||||
|    HYPRE_Int      size   = hypre_VectorSize(y); | ||||
|    HYPRE_Int      ierr = 0; | ||||
| 
 | ||||
|    size *= hypre_VectorNumVectors(y); | ||||
| 
 | ||||
| @ -526,7 +546,7 @@ hypre_SeqVectorScale( HYPRE_Complex alpha, | ||||
|    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime(); | ||||
| #endif | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
| @ -544,7 +564,6 @@ hypre_SeqVectorAxpy( HYPRE_Complex alpha, | ||||
|    HYPRE_Complex *x_data = hypre_VectorData(x); | ||||
|    HYPRE_Complex *y_data = hypre_VectorData(y); | ||||
|    HYPRE_Int      size   = hypre_VectorSize(x); | ||||
|    HYPRE_Int      ierr = 0; | ||||
| 
 | ||||
|    size *= hypre_VectorNumVectors(x); | ||||
| 
 | ||||
| @ -599,7 +618,7 @@ hypre_SeqVectorAxpy( HYPRE_Complex alpha, | ||||
|    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime(); | ||||
| #endif | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /* y = y + x ./ b */ | ||||
| @ -612,10 +631,26 @@ hypre_SeqVectorElmdivpy( hypre_Vector *x, | ||||
|    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime(); | ||||
| #endif | ||||
| 
 | ||||
|    HYPRE_Complex *x_data = hypre_VectorData(x); | ||||
|    HYPRE_Complex *b_data = hypre_VectorData(b); | ||||
|    HYPRE_Complex *y_data = hypre_VectorData(y); | ||||
|    HYPRE_Int      size   = hypre_VectorSize(b); | ||||
|    HYPRE_Complex *x_data        = hypre_VectorData(x); | ||||
|    HYPRE_Complex *b_data        = hypre_VectorData(b); | ||||
|    HYPRE_Complex *y_data        = hypre_VectorData(y); | ||||
|    HYPRE_Int      num_vectors_x = hypre_VectorNumVectors(x); | ||||
|    HYPRE_Int      num_vectors_y = hypre_VectorNumVectors(y); | ||||
|    HYPRE_Int      num_vectors_b = hypre_VectorNumVectors(b); | ||||
|    HYPRE_Int      size          = hypre_VectorSize(x); | ||||
| 
 | ||||
|    /* Sanity checks */ | ||||
|    if (hypre_VectorSize(x) != hypre_VectorSize(y) || | ||||
|        hypre_VectorSize(y) != hypre_VectorSize(b)) | ||||
|    { | ||||
|       hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Error: size of x, y, and b do not match!\n"); | ||||
|       return hypre_error_flag; | ||||
|    } | ||||
| 
 | ||||
|    /* row-wise multivec is not supportted */ | ||||
|    hypre_assert(hypre_VectorMultiVecStorageMethod(x) == 0); | ||||
|    hypre_assert(hypre_VectorMultiVecStorageMethod(b) == 0); | ||||
|    hypre_assert(hypre_VectorMultiVecStorageMethod(y) == 0); | ||||
| 
 | ||||
| #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL) | ||||
|    //HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_VectorMemoryLocation(x), hypre_VectorMemoryLocation(b) );
 | ||||
| @ -630,18 +665,84 @@ hypre_SeqVectorElmdivpy( hypre_Vector *x, | ||||
|       #pragma omp target teams distribute parallel for private(i) is_device_ptr(u_data,v_data,l1_norms) | ||||
|       #endif | ||||
|       */ | ||||
|       hypreDevice_IVAXPY(size, b_data, x_data, y_data); | ||||
| 
 | ||||
|       if (num_vectors_b == 1) | ||||
|       { | ||||
|          if (num_vectors_x == 1) | ||||
|          { | ||||
|             hypreDevice_IVAXPY(size, b_data, x_data, y_data); | ||||
|          } | ||||
|          else if (num_vectors_x == num_vectors_y) | ||||
|          { | ||||
|             hypreDevice_IVAMXPMY(num_vectors_x, size, b_data, x_data, y_data); | ||||
|          } | ||||
|          else | ||||
|          { | ||||
|             hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unsupported combination of num_vectors!\n"); | ||||
|             return hypre_error_flag; | ||||
|          } | ||||
|       } | ||||
|       else | ||||
|       { | ||||
|          hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unsupported combination of num_vectors!\n"); | ||||
|          return hypre_error_flag; | ||||
|       } | ||||
|    } | ||||
|    else | ||||
| #endif | ||||
|    { | ||||
|       HYPRE_Int i; | ||||
| #ifdef HYPRE_USING_OPENMP | ||||
|       #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE | ||||
| #endif | ||||
|       for (i = 0; i < size; i++) | ||||
|       HYPRE_Int i, j; | ||||
| 
 | ||||
|       if (num_vectors_b == 1) | ||||
|       { | ||||
|          y_data[i] += x_data[i] / b_data[i]; | ||||
|          if (num_vectors_x == 1 && num_vectors_y == 1) | ||||
|          { | ||||
| #ifdef HYPRE_USING_OPENMP | ||||
|             #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE | ||||
| #endif | ||||
|             for (i = 0; i < size; i++) | ||||
|             { | ||||
|                y_data[i] += x_data[i] / b_data[i]; | ||||
|             } | ||||
|          } | ||||
|          else if (num_vectors_x == 2 && num_vectors_y == 2) | ||||
|          { | ||||
| #ifdef HYPRE_USING_OPENMP | ||||
|             #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE | ||||
| #endif | ||||
|             for (i = 0; i < size; i++) | ||||
|             { | ||||
|                HYPRE_Complex  val = 1.0 / b_data[i]; | ||||
| 
 | ||||
|                y_data[i]        += x_data[i]        * val; | ||||
|                y_data[i + size] += x_data[i + size] * val; | ||||
|             } | ||||
|          } | ||||
|          else if (num_vectors_x == num_vectors_y) | ||||
|          { | ||||
| #ifdef HYPRE_USING_OPENMP | ||||
|             #pragma omp parallel for private(i, j) HYPRE_SMP_SCHEDULE | ||||
| #endif | ||||
|             for (i = 0; i < size; i++) | ||||
|             { | ||||
|                HYPRE_Complex  val = 1.0 / b_data[i]; | ||||
| 
 | ||||
|                for (j = 0; j < num_vectors_x; j++) | ||||
|                { | ||||
|                   y_data[i + size * j] += x_data[i + size * j] * val; | ||||
|                } | ||||
|             } | ||||
|          } | ||||
|          else | ||||
|          { | ||||
|             hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unsupported combination of num_vectors!\n"); | ||||
|             return hypre_error_flag; | ||||
|          } | ||||
|       } | ||||
|       else | ||||
|       { | ||||
|          hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Unsupported combination of num_vectors!\n"); | ||||
|          return hypre_error_flag; | ||||
|       } | ||||
|    } | ||||
| 
 | ||||
| @ -813,7 +914,6 @@ HYPRE_Complex hypre_SeqVectorSumElts( hypre_Vector *vector ) | ||||
| HYPRE_Int | ||||
| hypre_SeqVectorPrefetch( hypre_Vector *x, HYPRE_MemoryLocation memory_location) | ||||
| { | ||||
|    HYPRE_Int      ierr = 0; | ||||
| #ifdef HYPRE_USING_UNIFIED_MEMORY | ||||
|    if (hypre_VectorMemoryLocation(x) != HYPRE_MEMORY_DEVICE) | ||||
|    { | ||||
| @ -826,13 +926,13 @@ hypre_SeqVectorPrefetch( hypre_Vector *x, HYPRE_MemoryLocation memory_location) | ||||
| 
 | ||||
|    if (size == 0) | ||||
|    { | ||||
|       return ierr; | ||||
|       return hypre_error_flag; | ||||
|    } | ||||
| 
 | ||||
|    hypre_MemPrefetch(x_data, sizeof(HYPRE_Complex)*size, memory_location); | ||||
| #endif | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| #if 0 | ||||
| @ -850,7 +950,6 @@ hypre_SeqVectorMax( HYPRE_Complex alpha, | ||||
|    HYPRE_Complex *x_data = hypre_VectorData(x); | ||||
|    HYPRE_Complex *y_data = hypre_VectorData(y); | ||||
|    HYPRE_Int      size   = hypre_VectorSize(x); | ||||
|    HYPRE_Int      ierr = 0; | ||||
| 
 | ||||
|    size *= hypre_VectorNumVectors(x); | ||||
| 
 | ||||
| @ -886,6 +985,6 @@ hypre_SeqVectorMax( HYPRE_Complex alpha, | ||||
|    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime(); | ||||
| #endif | ||||
| 
 | ||||
|    return ierr; | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| #endif | ||||
|  | ||||
| @ -1785,10 +1785,13 @@ HYPRE_Int hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Comple | ||||
| HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y); | ||||
| HYPRE_Int hypreDevice_IVAXPYMarked(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, | ||||
|                                    HYPRE_Complex *y, HYPRE_Int *marker, HYPRE_Int marker_val); | ||||
| HYPRE_Int hypreDevice_IVAMXPMY(HYPRE_Int m, HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y); | ||||
| HYPRE_Int hypreDevice_IntFilln(HYPRE_Int *d_x, size_t n, HYPRE_Int v); | ||||
| HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v); | ||||
| HYPRE_Int hypreDevice_ComplexFilln(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v); | ||||
| HYPRE_Int hypreDevice_CharFilln(char *d_x, size_t n, char v); | ||||
| HYPRE_Int hypreDevice_IntStridedCopy ( HYPRE_Int size, HYPRE_Int stride, | ||||
|                                        HYPRE_Int *in, HYPRE_Int *out ); | ||||
| HYPRE_Int hypreDevice_IntScalen(HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int v); | ||||
| HYPRE_Int hypreDevice_ComplexScalen(HYPRE_Complex *d_x, size_t n, HYPRE_Complex *d_y, | ||||
|                                     HYPRE_Complex v); | ||||
|  | ||||
| @ -571,6 +571,101 @@ hypreDevice_IVAXPYMarked( HYPRE_Int      n, | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
|  * hypreGPUKernel_IVAMXPMY | ||||
|  * | ||||
|  * Device kernel for hypreDevice_IVAMXPMY. The template argument MM tells | ||||
|  * the maximum number of vectors in the unrolled loop | ||||
|  *--------------------------------------------------------------------------*/ | ||||
| 
 | ||||
| template <HYPRE_Int MM> | ||||
| __global__ void | ||||
| hypreGPUKernel_IVAMXPMY( hypre_DeviceItem &item, | ||||
|                          HYPRE_Int         m, | ||||
|                          HYPRE_Int         n, | ||||
|                          HYPRE_Complex    *a, | ||||
|                          HYPRE_Complex    *x, | ||||
|                          HYPRE_Complex    *y) | ||||
| { | ||||
|    HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item); | ||||
| 
 | ||||
|    if (i < n) | ||||
|    { | ||||
|       HYPRE_Complex val = 1.0 / a[i]; | ||||
|       if (MM > 0) | ||||
|       { | ||||
| #pragma unroll | ||||
|          for (HYPRE_Int j = 0; j < MM; j++) | ||||
|          { | ||||
|             y[i + j * n] += x[i + j * n] * val; | ||||
|          } | ||||
|       } | ||||
|       else | ||||
|       { | ||||
|          /* Generic case */ | ||||
|          for (HYPRE_Int j = 0; j < m; j++) | ||||
|          { | ||||
|             y[i + j * n] += x[i + j * n] * val; | ||||
|          } | ||||
|       } | ||||
|    } | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------------
 | ||||
|  * hypreDevice_IVAMXPMY | ||||
|  * | ||||
|  * Inverse Vector AXPY for m vectors x and y of size n stored column-wise: | ||||
|  * | ||||
|  *   y[i +       0] += x[i +       0] / a[i] | ||||
|  *   y[i +       n] += x[i +       n] / a[i] | ||||
|  *     ...           ... | ||||
|  *   y[i + (m-1)*n] += x[i + (m-1)*n] / a[i] | ||||
|  * | ||||
|  * Note: does not work for row-wise multivectors | ||||
|  *--------------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_IVAMXPMY( HYPRE_Int       m, | ||||
|                       HYPRE_Int       n, | ||||
|                       HYPRE_Complex  *a, | ||||
|                       HYPRE_Complex  *x, | ||||
|                       HYPRE_Complex  *y) | ||||
| { | ||||
|    /* trivial case */ | ||||
|    if (n <= 0) | ||||
|    { | ||||
|       return hypre_error_flag; | ||||
|    } | ||||
| 
 | ||||
|    dim3 bDim = hypre_GetDefaultDeviceBlockDimension(); | ||||
|    dim3 gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim); | ||||
| 
 | ||||
|    switch (m) | ||||
|    { | ||||
|       case 1: | ||||
|          HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAXPY, gDim, bDim, n, a, x, y ); | ||||
|          break; | ||||
| 
 | ||||
|       case 2: | ||||
|          HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAMXPMY<2>, gDim, bDim, m, n, a, x, y ); | ||||
|          break; | ||||
| 
 | ||||
|       case 3: | ||||
|          HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAMXPMY<3>, gDim, bDim, m, n, a, x, y ); | ||||
|          break; | ||||
| 
 | ||||
|       case 4: | ||||
|          HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAMXPMY<4>, gDim, bDim, m, n, a, x, y ); | ||||
|          break; | ||||
| 
 | ||||
|       default: | ||||
|          HYPRE_GPU_LAUNCH( hypreGPUKernel_IVAMXPMY<0>, gDim, bDim, m, n, a, x, y ); | ||||
|          break; | ||||
|    } | ||||
| 
 | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_CsrRowPtrsToIndices | ||||
|  *--------------------------------------------------------------------*/ | ||||
| @ -1618,87 +1713,6 @@ hypre_ResetDeviceRandGenerator( hypre_ulonglongint seed, | ||||
| 
 | ||||
| #endif /* #if defined(HYPRE_USING_CURAND) || defined(HYPRE_USING_ROCRAND) || defined(HYPRE_USING_ONEMKLRAND) */ | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreGPUKernel_filln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| template<typename T> | ||||
| __global__ void | ||||
| hypreGPUKernel_filln(hypre_DeviceItem &item, T *x, size_t n, T v) | ||||
| { | ||||
|    HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item); | ||||
| 
 | ||||
|    if (i < n) | ||||
|    { | ||||
|       x[i] = v; | ||||
|    } | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_Filln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| template<typename T> | ||||
| HYPRE_Int | ||||
| hypreDevice_Filln(T *d_x, size_t n, T v) | ||||
| { | ||||
| #if 0 | ||||
|    HYPRE_THRUST_CALL( fill_n, d_x, n, v); | ||||
| #else | ||||
|    if (n <= 0) | ||||
|    { | ||||
|       return hypre_error_flag; | ||||
|    } | ||||
| 
 | ||||
|    dim3 bDim = hypre_GetDefaultDeviceBlockDimension(); | ||||
|    dim3 gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim); | ||||
| 
 | ||||
|    HYPRE_GPU_LAUNCH( hypreGPUKernel_filln, gDim, bDim, d_x, n, v ); | ||||
| #endif | ||||
| 
 | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_ComplexFilln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_ComplexFilln(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v) | ||||
| { | ||||
|    return hypreDevice_Filln(d_x, n, v); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_CharFilln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_CharFilln(char *d_x, size_t n, char v) | ||||
| { | ||||
|    return hypreDevice_Filln(d_x, n, v); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_IntFilln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_IntFilln(HYPRE_Int *d_x, size_t n, HYPRE_Int v) | ||||
| { | ||||
|    return hypreDevice_Filln(d_x, n, v); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_BigIntFilln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v) | ||||
| { | ||||
|    return hypreDevice_Filln(d_x, n, v); | ||||
| } | ||||
| 
 | ||||
| #endif // #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL)
 | ||||
| 
 | ||||
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | ||||
| @ -1868,6 +1882,158 @@ hypreDevice_ComplexScalen( HYPRE_Complex *d_x, | ||||
|    return hypreDevice_Scalen(d_x, n, d_y, v); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreGPUKernel_filln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| template<typename T> | ||||
| __global__ void | ||||
| hypreGPUKernel_filln(hypre_DeviceItem &item, T *x, size_t n, T v) | ||||
| { | ||||
|    HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item); | ||||
| 
 | ||||
|    if (i < n) | ||||
|    { | ||||
|       x[i] = v; | ||||
|    } | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_Filln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| template<typename T> | ||||
| HYPRE_Int | ||||
| hypreDevice_Filln(T *d_x, size_t n, T v) | ||||
| { | ||||
| #if 0 | ||||
|    HYPRE_THRUST_CALL( fill_n, d_x, n, v); | ||||
| #else | ||||
|    if (n <= 0) | ||||
|    { | ||||
|       return hypre_error_flag; | ||||
|    } | ||||
| 
 | ||||
|    dim3 bDim = hypre_GetDefaultDeviceBlockDimension(); | ||||
|    dim3 gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim); | ||||
| 
 | ||||
|    HYPRE_GPU_LAUNCH( hypreGPUKernel_filln, gDim, bDim, d_x, n, v ); | ||||
| #endif | ||||
| 
 | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_ComplexFilln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_ComplexFilln( HYPRE_Complex *d_x, | ||||
|                           size_t         n, | ||||
|                           HYPRE_Complex  v ) | ||||
| { | ||||
|    return hypreDevice_Filln(d_x, n, v); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_CharFilln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_CharFilln( char   *d_x, | ||||
|                        size_t  n, | ||||
|                        char    v ) | ||||
| { | ||||
|    return hypreDevice_Filln(d_x, n, v); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_IntFilln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_IntFilln( HYPRE_Int *d_x, | ||||
|                       size_t     n, | ||||
|                       HYPRE_Int  v ) | ||||
| { | ||||
|    return hypreDevice_Filln(d_x, n, v); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_BigIntFilln | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_BigIntFilln( HYPRE_BigInt *d_x, | ||||
|                          size_t        n, | ||||
|                          HYPRE_BigInt  v) | ||||
| { | ||||
|    return hypreDevice_Filln(d_x, n, v); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreGPUKernel_StridedCopy | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| template<typename T> | ||||
| __global__ void | ||||
| hypreGPUKernel_StridedCopy(hypre_DeviceItem &item, | ||||
|                            HYPRE_Int         size, | ||||
|                            HYPRE_Int         stride, | ||||
|                            T                *in, | ||||
|                            T                *out ) | ||||
| { | ||||
|    HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item); | ||||
| 
 | ||||
|    if (i < size) | ||||
|    { | ||||
|       out[i] = in[i * stride]; | ||||
|    } | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_StridedCopy | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| template<typename T> | ||||
| HYPRE_Int | ||||
| hypreDevice_StridedCopy( HYPRE_Int  size, | ||||
|                          HYPRE_Int  stride, | ||||
|                          T         *in, | ||||
|                          T         *out ) | ||||
| { | ||||
|    if (size < 1 || stride < 1) | ||||
|    { | ||||
|       return hypre_error_flag; | ||||
|    } | ||||
| 
 | ||||
|    if (in == out) | ||||
|    { | ||||
|       hypre_error_w_msg(HYPRE_ERROR_GENERIC, "Cannot perform in-place strided copy"); | ||||
|       return hypre_error_flag; | ||||
|    } | ||||
| 
 | ||||
|    dim3 bDim = hypre_GetDefaultDeviceBlockDimension(); | ||||
|    dim3 gDim = hypre_GetDefaultDeviceGridDimension(size, "thread", bDim); | ||||
| 
 | ||||
|    HYPRE_GPU_LAUNCH( hypreGPUKernel_StridedCopy, gDim, bDim, size, stride, in, out ); | ||||
| 
 | ||||
|    return hypre_error_flag; | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_IntStridedCopy | ||||
|  *--------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypreDevice_IntStridedCopy( HYPRE_Int  size, | ||||
|                             HYPRE_Int  stride, | ||||
|                             HYPRE_Int *in, | ||||
|                             HYPRE_Int *out ) | ||||
| { | ||||
|    return hypreDevice_StridedCopy(size, stride, in, out); | ||||
| } | ||||
| 
 | ||||
| /*--------------------------------------------------------------------
 | ||||
|  * hypreDevice_CsrRowPtrsToIndicesWithRowNum | ||||
|  * | ||||
|  | ||||
| @ -282,10 +282,13 @@ HYPRE_Int hypreDevice_DiagScaleVector2(HYPRE_Int n, HYPRE_Int *A_i, HYPRE_Comple | ||||
| HYPRE_Int hypreDevice_IVAXPY(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y); | ||||
| HYPRE_Int hypreDevice_IVAXPYMarked(HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, | ||||
|                                    HYPRE_Complex *y, HYPRE_Int *marker, HYPRE_Int marker_val); | ||||
| HYPRE_Int hypreDevice_IVAMXPMY(HYPRE_Int m, HYPRE_Int n, HYPRE_Complex *a, HYPRE_Complex *x, HYPRE_Complex *y); | ||||
| HYPRE_Int hypreDevice_IntFilln(HYPRE_Int *d_x, size_t n, HYPRE_Int v); | ||||
| HYPRE_Int hypreDevice_BigIntFilln(HYPRE_BigInt *d_x, size_t n, HYPRE_BigInt v); | ||||
| HYPRE_Int hypreDevice_ComplexFilln(HYPRE_Complex *d_x, size_t n, HYPRE_Complex v); | ||||
| HYPRE_Int hypreDevice_CharFilln(char *d_x, size_t n, char v); | ||||
| HYPRE_Int hypreDevice_IntStridedCopy ( HYPRE_Int size, HYPRE_Int stride, | ||||
|                                        HYPRE_Int *in, HYPRE_Int *out ); | ||||
| HYPRE_Int hypreDevice_IntScalen(HYPRE_Int *d_x, size_t n, HYPRE_Int *d_y, HYPRE_Int v); | ||||
| HYPRE_Int hypreDevice_ComplexScalen(HYPRE_Complex *d_x, size_t n, HYPRE_Complex *d_y, | ||||
|                                     HYPRE_Complex v); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Victor A. Paludetto Magri
						Victor A. Paludetto Magri