GPU support for rownnz using IntArray
This commit is contained in:
parent
c09ab567e7
commit
8b02ab88d3
@ -483,9 +483,20 @@ hypre_ParMatmul( hypre_ParCSRMatrix *A,
|
||||
if (num_rownnz_diag_A != num_rows_diag_A &&
|
||||
num_rownnz_offd_A != num_rows_offd_A )
|
||||
{
|
||||
hypre_MergeOrderedArrays(num_rownnz_diag_A, A_diag_ir,
|
||||
num_rownnz_offd_A, A_offd_ir,
|
||||
&num_rownnz_A, &rownnz_A);
|
||||
hypre_IntArray arr_diag;
|
||||
hypre_IntArray arr_offd;
|
||||
hypre_IntArray arr_rownnz;
|
||||
|
||||
hypre_IntArrayData(&arr_diag) = A_diag_ir;
|
||||
hypre_IntArrayData(&arr_offd) = A_offd_ir;
|
||||
hypre_IntArraySize(&arr_diag) = num_rownnz_diag_A;
|
||||
hypre_IntArraySize(&arr_offd) = num_rownnz_offd_A;
|
||||
hypre_IntArrayMemoryLocation(&arr_rownnz) = memory_location_A;
|
||||
|
||||
hypre_MergeOrderedArrays(&arr_diag, &arr_offd, &arr_rownnz);
|
||||
|
||||
num_rownnz_A = hypre_IntArraySize(&arr_rownnz);
|
||||
rownnz_A = hypre_IntArrayData(&arr_rownnz);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -5193,9 +5204,20 @@ hypre_ParCSRMatrixAddHost( HYPRE_Complex alpha,
|
||||
if ((num_rownnz_diag_A < num_rows_diag_A) &&
|
||||
(num_rownnz_diag_B < num_rows_diag_B))
|
||||
{
|
||||
hypre_MergeOrderedArrays( num_rownnz_diag_A, rownnz_diag_A,
|
||||
num_rownnz_diag_B, rownnz_diag_B,
|
||||
&num_rownnz_diag_C, &rownnz_diag_C);
|
||||
hypre_IntArray arr_diagA;
|
||||
hypre_IntArray arr_diagB;
|
||||
hypre_IntArray arr_diagC;
|
||||
|
||||
hypre_IntArrayData(&arr_diagA) = rownnz_diag_A;
|
||||
hypre_IntArrayData(&arr_diagB) = rownnz_diag_B;
|
||||
hypre_IntArraySize(&arr_diagA) = num_rownnz_diag_A;
|
||||
hypre_IntArraySize(&arr_diagB) = num_rownnz_diag_B;
|
||||
hypre_IntArrayMemoryLocation(&arr_diagC) = memory_location_C;
|
||||
|
||||
hypre_MergeOrderedArrays(&arr_diagA, &arr_diagB, &arr_diagC);
|
||||
|
||||
num_rownnz_diag_C = hypre_IntArraySize(&arr_diagC);
|
||||
rownnz_diag_C = hypre_IntArrayData(&arr_diagC);
|
||||
}
|
||||
|
||||
/* Set nonzero rows data of offd_C */
|
||||
@ -5203,9 +5225,20 @@ hypre_ParCSRMatrixAddHost( HYPRE_Complex alpha,
|
||||
if ((num_rownnz_offd_A < num_rows_offd_A) &&
|
||||
(num_rownnz_offd_B < num_rows_offd_B))
|
||||
{
|
||||
hypre_MergeOrderedArrays( num_rownnz_offd_A, rownnz_offd_A,
|
||||
num_rownnz_offd_B, rownnz_offd_B,
|
||||
&num_rownnz_offd_C, &rownnz_offd_C);
|
||||
hypre_IntArray arr_offdA;
|
||||
hypre_IntArray arr_offdB;
|
||||
hypre_IntArray arr_offdC;
|
||||
|
||||
hypre_IntArrayData(&arr_offdA) = rownnz_offd_A;
|
||||
hypre_IntArrayData(&arr_offdB) = rownnz_offd_B;
|
||||
hypre_IntArraySize(&arr_offdA) = num_rownnz_offd_A;
|
||||
hypre_IntArraySize(&arr_offdB) = num_rownnz_offd_B;
|
||||
hypre_IntArrayMemoryLocation(&arr_offdC) = memory_location_C;
|
||||
|
||||
hypre_MergeOrderedArrays(&arr_offdA, &arr_offdB, &arr_offdC);
|
||||
|
||||
num_rownnz_offd_C = hypre_IntArraySize(&arr_offdC);
|
||||
rownnz_offd_C = hypre_IntArrayData(&arr_offdC);
|
||||
}
|
||||
|
||||
/* Set diag_C */
|
||||
|
||||
@ -396,9 +396,20 @@ hypre_CSRMatrixAddHost ( HYPRE_Complex alpha,
|
||||
nnzrows_C = nrows_A;
|
||||
if ((nnzrows_A < nrows_A) && (nnzrows_B < nrows_B))
|
||||
{
|
||||
hypre_MergeOrderedArrays(nnzrows_A, rownnz_A,
|
||||
nnzrows_B, rownnz_B,
|
||||
&nnzrows_C, &rownnz_C);
|
||||
hypre_IntArray arr_A;
|
||||
hypre_IntArray arr_B;
|
||||
hypre_IntArray arr_C;
|
||||
|
||||
hypre_IntArrayData(&arr_A) = rownnz_A;
|
||||
hypre_IntArrayData(&arr_B) = rownnz_B;
|
||||
hypre_IntArraySize(&arr_A) = nnzrows_A;
|
||||
hypre_IntArraySize(&arr_B) = nnzrows_B;
|
||||
hypre_IntArrayMemoryLocation(&arr_C) = memory_location_C;
|
||||
|
||||
hypre_MergeOrderedArrays(&arr_A, &arr_B, &arr_C);
|
||||
|
||||
nnzrows_C = hypre_IntArraySize(&arr_C);
|
||||
rownnz_C = hypre_IntArrayData(&arr_C);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@ -348,7 +348,6 @@ hypre_CSRMatrixSetRownnzHost( hypre_CSRMatrix *matrix )
|
||||
HYPRE_Int num_rows = hypre_CSRMatrixNumRows(matrix);
|
||||
HYPRE_Int *A_i = hypre_CSRMatrixI(matrix);
|
||||
HYPRE_Int *Arownnz = hypre_CSRMatrixRownnz(matrix);
|
||||
|
||||
HYPRE_Int i;
|
||||
HYPRE_Int irownnz = 0;
|
||||
|
||||
@ -362,8 +361,10 @@ hypre_CSRMatrixSetRownnzHost( hypre_CSRMatrix *matrix )
|
||||
|
||||
hypre_CSRMatrixNumRownnz(matrix) = irownnz;
|
||||
|
||||
HYPRE_MemoryLocation memory_location = hypre_CSRMatrixMemoryLocation(matrix);
|
||||
|
||||
/* Free old rownnz pointer */
|
||||
hypre_TFree(Arownnz, HYPRE_MEMORY_HOST);
|
||||
hypre_TFree(Arownnz, memory_location);
|
||||
|
||||
/* Set new rownnz pointer */
|
||||
if (irownnz == 0 || irownnz == num_rows)
|
||||
@ -372,7 +373,7 @@ hypre_CSRMatrixSetRownnzHost( hypre_CSRMatrix *matrix )
|
||||
}
|
||||
else
|
||||
{
|
||||
Arownnz = hypre_CTAlloc(HYPRE_Int, irownnz, HYPRE_MEMORY_HOST);
|
||||
Arownnz = hypre_CTAlloc(HYPRE_Int, irownnz, memory_location);
|
||||
irownnz = 0;
|
||||
for (i = 0; i < num_rows; i++)
|
||||
{
|
||||
|
||||
@ -1761,8 +1761,7 @@ typedef struct
|
||||
* 1) Merge sort can take advantage of eliminating duplicates.
|
||||
* 2) Merge sort is more efficiently parallelizable than qsort
|
||||
*/
|
||||
HYPRE_Int hypre_MergeOrderedArrays( HYPRE_Int size1, HYPRE_Int *array1, HYPRE_Int size2,
|
||||
HYPRE_Int *array2, HYPRE_Int *size3_ptr, HYPRE_Int **array3_ptr);
|
||||
HYPRE_Int hypre_MergeOrderedArrays( hypre_IntArray *array1, hypre_IntArray *array2, hypre_IntArray *array3 );
|
||||
void hypre_union2(HYPRE_Int n1, HYPRE_BigInt *arr1, HYPRE_Int n2, HYPRE_BigInt *arr2, HYPRE_Int *n3,
|
||||
HYPRE_BigInt *arr3, HYPRE_Int *map1, HYPRE_Int *map2);
|
||||
void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **sorted);
|
||||
|
||||
@ -20,46 +20,51 @@
|
||||
*--------------------------------------------------------------------------*/
|
||||
|
||||
HYPRE_Int
|
||||
hypre_MergeOrderedArrays( HYPRE_Int size1, HYPRE_Int *array1,
|
||||
HYPRE_Int size2, HYPRE_Int *array2,
|
||||
HYPRE_Int *size3_ptr, HYPRE_Int **array3_ptr )
|
||||
hypre_MergeOrderedArrays( hypre_IntArray *array1,
|
||||
hypre_IntArray *array2,
|
||||
hypre_IntArray *array3 )
|
||||
{
|
||||
HYPRE_Int *array3;
|
||||
HYPRE_Int i, j, k;
|
||||
HYPRE_Int i = 0, j = 0, k = 0;
|
||||
const HYPRE_Int size1 = hypre_IntArraySize(array1);
|
||||
const HYPRE_Int size2 = hypre_IntArraySize(array2);
|
||||
|
||||
array3 = hypre_CTAlloc(HYPRE_Int, (size1 + size2), HYPRE_MEMORY_HOST);
|
||||
HYPRE_MemoryLocation memory_location = hypre_IntArrayMemoryLocation(array3);
|
||||
|
||||
HYPRE_Int *array1_data = hypre_IntArrayData(array1);
|
||||
HYPRE_Int *array2_data = hypre_IntArrayData(array2);
|
||||
HYPRE_Int *array3_data = hypre_TAlloc(HYPRE_Int, size1 + size2, memory_location);
|
||||
|
||||
i = j = k = 0;
|
||||
while (i < size1 && j < size2)
|
||||
{
|
||||
if (array1[i] > array2[j])
|
||||
if (array1_data[i] > array2_data[j])
|
||||
{
|
||||
array3[k++] = array2[j++];
|
||||
array3_data[k++] = array2_data[j++];
|
||||
}
|
||||
else if (array1[i] < array2[j])
|
||||
else if (array1_data[i] < array2_data[j])
|
||||
{
|
||||
array3[k++] = array1[i++];
|
||||
array3_data[k++] = array1_data[i++];
|
||||
}
|
||||
else
|
||||
{
|
||||
array3[k++] = array1[i++];
|
||||
array3_data[k++] = array1_data[i++];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
while (i < size1)
|
||||
{
|
||||
array3[k++] = array1[i++];
|
||||
array3_data[k++] = array1_data[i++];
|
||||
}
|
||||
|
||||
while (j < size2)
|
||||
{
|
||||
array3[k++] = array2[j++];
|
||||
array3_data[k++] = array2_data[j++];
|
||||
}
|
||||
|
||||
/* Set pointers */
|
||||
*size3_ptr = k;
|
||||
*array3_ptr = hypre_TReAlloc(array3, HYPRE_Int, k, HYPRE_MEMORY_HOST);
|
||||
array3_data = hypre_TReAlloc_v2(array3_data, HYPRE_Int, size1 + size2, HYPRE_Int, k, memory_location);
|
||||
|
||||
hypre_IntArraySize(array3) = k;
|
||||
hypre_IntArrayData(array3) = array3_data;
|
||||
|
||||
return hypre_error_flag;
|
||||
}
|
||||
|
||||
@ -258,8 +258,7 @@ typedef struct
|
||||
* 1) Merge sort can take advantage of eliminating duplicates.
|
||||
* 2) Merge sort is more efficiently parallelizable than qsort
|
||||
*/
|
||||
HYPRE_Int hypre_MergeOrderedArrays( HYPRE_Int size1, HYPRE_Int *array1, HYPRE_Int size2,
|
||||
HYPRE_Int *array2, HYPRE_Int *size3_ptr, HYPRE_Int **array3_ptr);
|
||||
HYPRE_Int hypre_MergeOrderedArrays( hypre_IntArray *array1, hypre_IntArray *array2, hypre_IntArray *array3 );
|
||||
void hypre_union2(HYPRE_Int n1, HYPRE_BigInt *arr1, HYPRE_Int n2, HYPRE_BigInt *arr2, HYPRE_Int *n3,
|
||||
HYPRE_BigInt *arr3, HYPRE_Int *map1, HYPRE_Int *map2);
|
||||
void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **sorted);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user