GPU support for rownnz using IntArray
This commit is contained in:
		
							parent
							
								
									c09ab567e7
								
							
						
					
					
						commit
						8b02ab88d3
					
				| @ -483,9 +483,20 @@ hypre_ParMatmul( hypre_ParCSRMatrix  *A, | ||||
|    if (num_rownnz_diag_A != num_rows_diag_A && | ||||
|        num_rownnz_offd_A != num_rows_offd_A ) | ||||
|    { | ||||
|       hypre_MergeOrderedArrays(num_rownnz_diag_A, A_diag_ir, | ||||
|                                num_rownnz_offd_A, A_offd_ir, | ||||
|                                &num_rownnz_A, &rownnz_A); | ||||
|       hypre_IntArray arr_diag; | ||||
|       hypre_IntArray arr_offd; | ||||
|       hypre_IntArray arr_rownnz; | ||||
| 
 | ||||
|       hypre_IntArrayData(&arr_diag) = A_diag_ir; | ||||
|       hypre_IntArrayData(&arr_offd) = A_offd_ir; | ||||
|       hypre_IntArraySize(&arr_diag) = num_rownnz_diag_A; | ||||
|       hypre_IntArraySize(&arr_offd) = num_rownnz_offd_A; | ||||
|       hypre_IntArrayMemoryLocation(&arr_rownnz) = memory_location_A; | ||||
| 
 | ||||
|       hypre_MergeOrderedArrays(&arr_diag, &arr_offd, &arr_rownnz); | ||||
| 
 | ||||
|       num_rownnz_A = hypre_IntArraySize(&arr_rownnz); | ||||
|       rownnz_A     = hypre_IntArrayData(&arr_rownnz); | ||||
|    } | ||||
|    else | ||||
|    { | ||||
| @ -5193,9 +5204,20 @@ hypre_ParCSRMatrixAddHost( HYPRE_Complex        alpha, | ||||
|    if ((num_rownnz_diag_A < num_rows_diag_A) && | ||||
|        (num_rownnz_diag_B < num_rows_diag_B)) | ||||
|    { | ||||
|       hypre_MergeOrderedArrays( num_rownnz_diag_A,  rownnz_diag_A, | ||||
|                                 num_rownnz_diag_B,  rownnz_diag_B, | ||||
|                                 &num_rownnz_diag_C, &rownnz_diag_C); | ||||
|       hypre_IntArray arr_diagA; | ||||
|       hypre_IntArray arr_diagB; | ||||
|       hypre_IntArray arr_diagC; | ||||
| 
 | ||||
|       hypre_IntArrayData(&arr_diagA) = rownnz_diag_A; | ||||
|       hypre_IntArrayData(&arr_diagB) = rownnz_diag_B; | ||||
|       hypre_IntArraySize(&arr_diagA) = num_rownnz_diag_A; | ||||
|       hypre_IntArraySize(&arr_diagB) = num_rownnz_diag_B; | ||||
|       hypre_IntArrayMemoryLocation(&arr_diagC) = memory_location_C; | ||||
| 
 | ||||
|       hypre_MergeOrderedArrays(&arr_diagA, &arr_diagB, &arr_diagC); | ||||
| 
 | ||||
|       num_rownnz_diag_C = hypre_IntArraySize(&arr_diagC); | ||||
|       rownnz_diag_C     = hypre_IntArrayData(&arr_diagC); | ||||
|    } | ||||
| 
 | ||||
|    /* Set nonzero rows data of offd_C */ | ||||
| @ -5203,9 +5225,20 @@ hypre_ParCSRMatrixAddHost( HYPRE_Complex        alpha, | ||||
|    if ((num_rownnz_offd_A < num_rows_offd_A) && | ||||
|        (num_rownnz_offd_B < num_rows_offd_B)) | ||||
|    { | ||||
|       hypre_MergeOrderedArrays( num_rownnz_offd_A,  rownnz_offd_A, | ||||
|                                 num_rownnz_offd_B,  rownnz_offd_B, | ||||
|                                 &num_rownnz_offd_C, &rownnz_offd_C); | ||||
|       hypre_IntArray arr_offdA; | ||||
|       hypre_IntArray arr_offdB; | ||||
|       hypre_IntArray arr_offdC; | ||||
| 
 | ||||
|       hypre_IntArrayData(&arr_offdA) = rownnz_offd_A; | ||||
|       hypre_IntArrayData(&arr_offdB) = rownnz_offd_B; | ||||
|       hypre_IntArraySize(&arr_offdA) = num_rownnz_offd_A; | ||||
|       hypre_IntArraySize(&arr_offdB) = num_rownnz_offd_B; | ||||
|       hypre_IntArrayMemoryLocation(&arr_offdC) = memory_location_C; | ||||
| 
 | ||||
|       hypre_MergeOrderedArrays(&arr_offdA, &arr_offdB, &arr_offdC); | ||||
| 
 | ||||
|       num_rownnz_offd_C = hypre_IntArraySize(&arr_offdC); | ||||
|       rownnz_offd_C     = hypre_IntArrayData(&arr_offdC); | ||||
|    } | ||||
| 
 | ||||
|    /* Set diag_C */ | ||||
|  | ||||
| @ -396,9 +396,20 @@ hypre_CSRMatrixAddHost ( HYPRE_Complex    alpha, | ||||
|    nnzrows_C = nrows_A; | ||||
|    if ((nnzrows_A < nrows_A) && (nnzrows_B < nrows_B)) | ||||
|    { | ||||
|       hypre_MergeOrderedArrays(nnzrows_A, rownnz_A, | ||||
|                                nnzrows_B, rownnz_B, | ||||
|                                &nnzrows_C, &rownnz_C); | ||||
|       hypre_IntArray arr_A; | ||||
|       hypre_IntArray arr_B; | ||||
|       hypre_IntArray arr_C; | ||||
| 
 | ||||
|       hypre_IntArrayData(&arr_A) = rownnz_A; | ||||
|       hypre_IntArrayData(&arr_B) = rownnz_B; | ||||
|       hypre_IntArraySize(&arr_A) = nnzrows_A; | ||||
|       hypre_IntArraySize(&arr_B) = nnzrows_B; | ||||
|       hypre_IntArrayMemoryLocation(&arr_C) = memory_location_C; | ||||
| 
 | ||||
|       hypre_MergeOrderedArrays(&arr_A, &arr_B, &arr_C); | ||||
| 
 | ||||
|       nnzrows_C = hypre_IntArraySize(&arr_C); | ||||
|       rownnz_C  = hypre_IntArrayData(&arr_C); | ||||
|    } | ||||
|    else | ||||
|    { | ||||
|  | ||||
| @ -348,7 +348,6 @@ hypre_CSRMatrixSetRownnzHost( hypre_CSRMatrix *matrix ) | ||||
|    HYPRE_Int   num_rows = hypre_CSRMatrixNumRows(matrix); | ||||
|    HYPRE_Int  *A_i = hypre_CSRMatrixI(matrix); | ||||
|    HYPRE_Int  *Arownnz = hypre_CSRMatrixRownnz(matrix); | ||||
| 
 | ||||
|    HYPRE_Int   i; | ||||
|    HYPRE_Int   irownnz = 0; | ||||
| 
 | ||||
| @ -362,8 +361,10 @@ hypre_CSRMatrixSetRownnzHost( hypre_CSRMatrix *matrix ) | ||||
| 
 | ||||
|    hypre_CSRMatrixNumRownnz(matrix) = irownnz; | ||||
| 
 | ||||
|    HYPRE_MemoryLocation memory_location = hypre_CSRMatrixMemoryLocation(matrix); | ||||
| 
 | ||||
|    /* Free old rownnz pointer */ | ||||
|    hypre_TFree(Arownnz, HYPRE_MEMORY_HOST); | ||||
|    hypre_TFree(Arownnz, memory_location); | ||||
| 
 | ||||
|    /* Set new rownnz pointer */ | ||||
|    if (irownnz == 0 || irownnz == num_rows) | ||||
| @ -372,7 +373,7 @@ hypre_CSRMatrixSetRownnzHost( hypre_CSRMatrix *matrix ) | ||||
|    } | ||||
|    else | ||||
|    { | ||||
|       Arownnz = hypre_CTAlloc(HYPRE_Int, irownnz, HYPRE_MEMORY_HOST); | ||||
|       Arownnz = hypre_CTAlloc(HYPRE_Int, irownnz, memory_location); | ||||
|       irownnz = 0; | ||||
|       for (i = 0; i < num_rows; i++) | ||||
|       { | ||||
|  | ||||
| @ -1761,8 +1761,7 @@ typedef struct | ||||
|  * 1) Merge sort can take advantage of eliminating duplicates. | ||||
|  * 2) Merge sort is more efficiently parallelizable than qsort | ||||
|  */ | ||||
| HYPRE_Int hypre_MergeOrderedArrays( HYPRE_Int size1, HYPRE_Int *array1, HYPRE_Int size2, | ||||
|                                     HYPRE_Int *array2, HYPRE_Int *size3_ptr, HYPRE_Int **array3_ptr); | ||||
| HYPRE_Int hypre_MergeOrderedArrays( hypre_IntArray *array1, hypre_IntArray *array2, hypre_IntArray *array3 ); | ||||
| void hypre_union2(HYPRE_Int n1, HYPRE_BigInt *arr1, HYPRE_Int n2, HYPRE_BigInt *arr2, HYPRE_Int *n3, | ||||
|                   HYPRE_BigInt *arr3, HYPRE_Int *map1, HYPRE_Int *map2); | ||||
| void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **sorted); | ||||
|  | ||||
| @ -20,46 +20,51 @@ | ||||
|  *--------------------------------------------------------------------------*/ | ||||
| 
 | ||||
| HYPRE_Int | ||||
| hypre_MergeOrderedArrays( HYPRE_Int  size1,     HYPRE_Int  *array1, | ||||
|                           HYPRE_Int  size2,     HYPRE_Int  *array2, | ||||
|                           HYPRE_Int *size3_ptr, HYPRE_Int **array3_ptr ) | ||||
| hypre_MergeOrderedArrays( hypre_IntArray *array1, | ||||
|                           hypre_IntArray *array2, | ||||
|                           hypre_IntArray *array3 ) | ||||
| { | ||||
|    HYPRE_Int  *array3; | ||||
|    HYPRE_Int   i, j, k; | ||||
|    HYPRE_Int i = 0, j = 0, k = 0; | ||||
|    const HYPRE_Int size1 = hypre_IntArraySize(array1); | ||||
|    const HYPRE_Int size2 = hypre_IntArraySize(array2); | ||||
| 
 | ||||
|    array3 = hypre_CTAlloc(HYPRE_Int, (size1 + size2), HYPRE_MEMORY_HOST); | ||||
|    HYPRE_MemoryLocation memory_location = hypre_IntArrayMemoryLocation(array3); | ||||
| 
 | ||||
|    HYPRE_Int *array1_data = hypre_IntArrayData(array1); | ||||
|    HYPRE_Int *array2_data = hypre_IntArrayData(array2); | ||||
|    HYPRE_Int *array3_data = hypre_TAlloc(HYPRE_Int, size1 + size2, memory_location); | ||||
| 
 | ||||
|    i = j = k = 0; | ||||
|    while (i < size1 && j < size2) | ||||
|    { | ||||
|       if (array1[i] > array2[j]) | ||||
|       if (array1_data[i] > array2_data[j]) | ||||
|       { | ||||
|          array3[k++] = array2[j++]; | ||||
|          array3_data[k++] = array2_data[j++]; | ||||
|       } | ||||
|       else if (array1[i] < array2[j]) | ||||
|       else if (array1_data[i] < array2_data[j]) | ||||
|       { | ||||
|          array3[k++] = array1[i++]; | ||||
|          array3_data[k++] = array1_data[i++]; | ||||
|       } | ||||
|       else | ||||
|       { | ||||
|          array3[k++] = array1[i++]; | ||||
|          array3_data[k++] = array1_data[i++]; | ||||
|          j++; | ||||
|       } | ||||
|    } | ||||
| 
 | ||||
|    while (i < size1) | ||||
|    { | ||||
|       array3[k++] = array1[i++]; | ||||
|       array3_data[k++] = array1_data[i++]; | ||||
|    } | ||||
| 
 | ||||
|    while (j < size2) | ||||
|    { | ||||
|       array3[k++] = array2[j++]; | ||||
|       array3_data[k++] = array2_data[j++]; | ||||
|    } | ||||
| 
 | ||||
|    /* Set pointers */ | ||||
|    *size3_ptr  = k; | ||||
|    *array3_ptr = hypre_TReAlloc(array3, HYPRE_Int, k, HYPRE_MEMORY_HOST); | ||||
|    array3_data = hypre_TReAlloc_v2(array3_data, HYPRE_Int, size1 + size2, HYPRE_Int, k, memory_location); | ||||
| 
 | ||||
|    hypre_IntArraySize(array3) = k; | ||||
|    hypre_IntArrayData(array3) = array3_data; | ||||
| 
 | ||||
|    return hypre_error_flag; | ||||
| } | ||||
|  | ||||
| @ -258,8 +258,7 @@ typedef struct | ||||
|  * 1) Merge sort can take advantage of eliminating duplicates. | ||||
|  * 2) Merge sort is more efficiently parallelizable than qsort | ||||
|  */ | ||||
| HYPRE_Int hypre_MergeOrderedArrays( HYPRE_Int size1, HYPRE_Int *array1, HYPRE_Int size2, | ||||
|                                     HYPRE_Int *array2, HYPRE_Int *size3_ptr, HYPRE_Int **array3_ptr); | ||||
| HYPRE_Int hypre_MergeOrderedArrays( hypre_IntArray *array1, hypre_IntArray *array2, hypre_IntArray *array3 ); | ||||
| void hypre_union2(HYPRE_Int n1, HYPRE_BigInt *arr1, HYPRE_Int n2, HYPRE_BigInt *arr2, HYPRE_Int *n3, | ||||
|                   HYPRE_BigInt *arr3, HYPRE_Int *map1, HYPRE_Int *map2); | ||||
| void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **sorted); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Ruipeng Li
						Ruipeng Li