From 8730346c03468e1d63e8dd16ef82b64917b1d28a Mon Sep 17 00:00:00 2001 From: Wayne Mitchell Date: Thu, 9 Jun 2022 19:39:03 +0000 Subject: [PATCH] More debugging code and astyle. About to sync up with Ruipeng. --- src/parcsr_mv/par_csr_triplemat_device.c | 20 +++++++++++++++++--- src/seq_mv/csr_matrix.c | 10 ++++++---- src/utilities/_hypre_utilities.h | 3 ++- src/utilities/merge_sort.c | 3 ++- src/utilities/protos.h | 3 ++- 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/parcsr_mv/par_csr_triplemat_device.c b/src/parcsr_mv/par_csr_triplemat_device.c index 6631ba566..9134fb06a 100644 --- a/src/parcsr_mv/par_csr_triplemat_device.c +++ b/src/parcsr_mv/par_csr_triplemat_device.c @@ -785,7 +785,7 @@ hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R, /* WM: debug */ HYPRE_Int my_id; hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id); - if (my_id == 0) + if (my_id == 0 && hypre_ParCSRMatrixNumRows(A) > 400) { hypre_CSRMatrixPrint(Cbar, "Cbar"); hypre_CSRMatrixPrint(Cext, "Cext"); @@ -805,7 +805,7 @@ hypre_ParCSRMatrixRAPKTDevice( hypre_ParCSRMatrix *R, &num_cols_offd_C, &col_map_offd_C); /* WM: debug */ - if (my_id == 0) + if (my_id == 0 && hypre_ParCSRMatrixNumRows(A) > 400) { hypre_CSRMatrixPrint(C_diag, "C_diag"); hypre_CSRMatrixPrint(C_offd, "C_offd"); @@ -927,6 +927,7 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg, // Convert Cext from BigJ to J // Cext offd #if defined(HYPRE_USING_SYCL) + /* WM: debug - the below is suspicious... */ auto off_end = hypreSycl_copy_if( oneapi::dpl::make_zip_iterator(oneapi::dpl::counting_iterator(0), Cext_bigj), oneapi::dpl::make_zip_iterator(oneapi::dpl::counting_iterator(0), @@ -982,6 +983,7 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg, // Cext diag #if defined(HYPRE_USING_SYCL) + /* WM: debug - the below is suspicious... */ auto dia_end = hypreSycl_copy_if( oneapi::dpl::make_zip_iterator(oneapi::dpl::counting_iterator(0), Cext_bigj), oneapi::dpl::make_zip_iterator(oneapi::dpl::counting_iterator(0), @@ -1106,6 +1108,10 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg, t1 = hypre_MPI_Wtime(); #endif hypreDevice_CSRSpGemm(IE, CC, &Cz); + /* WM: debug */ + hypre_CSRMatrixPrint(IE, "IE"); + hypre_CSRMatrixPrint(CC, "CC"); + hypre_CSRMatrixPrint(Cz, "Cz"); hypre_CSRMatrixDestroy(IE); hypre_CSRMatrixDestroy(CC); @@ -1151,6 +1157,10 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg, HYPRE_Complex *C_diag_a = hypre_CSRMatrixData(C_diag); #if defined(HYPRE_USING_SYCL) + /* WM: debug */ + hypre_printf("WM: debug - zmp_a = "); + for (auto i = 0; i < 100; i++) hypre_printf("%f ", zmp_a[i]); + hypre_printf("\n"); auto new_end = hypreSycl_copy_if( oneapi::dpl::make_zip_iterator(zmp_i, zmp_j, zmp_a), oneapi::dpl::make_zip_iterator(zmp_i, zmp_j, zmp_a) + local_nnz_C, zmp_j, @@ -1164,6 +1174,11 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg, zmp_j, thrust::make_zip_iterator(thrust::make_tuple(C_diag_ii, C_diag_j, C_diag_a)), pred ); + /* WM: debug */ + hypre_printf("WM: debug - C_diag_a = "); + for (auto i = 0; i < 100; i++) hypre_printf("%f ", C_diag_a[i]); + hypre_printf("\n"); + hypre_assert( std::get<0>(new_end.base()) == C_offd_ii + nnz_C_offd ); hypre_assert( thrust::get<0>(new_end.get_iterator_tuple()) == C_diag_ii + nnz_C_diag ); #endif hypreDevice_CsrRowIndicesToPtrs_v2(hypre_CSRMatrixNumRows(C_diag), nnz_C_diag, C_diag_ii, @@ -1182,7 +1197,6 @@ hypre_ParCSRTMatMatPartialAddDevice( hypre_ParCSRCommPkg *comm_pkg, zmp_j, oneapi::dpl::make_zip_iterator(C_offd_ii, C_offd_j, C_offd_a), std::not_fn(pred) ); - hypre_assert( std::get<0>(new_end.base()) == C_offd_ii + nnz_C_offd ); #else new_end = HYPRE_THRUST_CALL( copy_if, thrust::make_zip_iterator(thrust::make_tuple(zmp_i, zmp_j, zmp_a)), diff --git a/src/seq_mv/csr_matrix.c b/src/seq_mv/csr_matrix.c index 5f38c01f9..306bb99d7 100644 --- a/src/seq_mv/csr_matrix.c +++ b/src/seq_mv/csr_matrix.c @@ -588,7 +588,8 @@ hypre_CSRMatrixPrintMM( hypre_CSRMatrix *matrix, HYPRE_Int trans, const char *file_name ) { - hypre_assert(hypre_CSRMatrixI(matrix)[hypre_CSRMatrixNumRows(matrix)] == hypre_CSRMatrixNumNonzeros(matrix)); + hypre_assert(hypre_CSRMatrixI(matrix)[hypre_CSRMatrixNumRows(matrix)] == hypre_CSRMatrixNumNonzeros( + matrix)); FILE *fp = file_name ? fopen(file_name, "w") : stdout; @@ -611,9 +612,10 @@ hypre_CSRMatrixPrintMM( hypre_CSRMatrix *matrix, hypre_fprintf(fp, "%%%%MatrixMarket matrix coordinate pattern general\n"); } - hypre_fprintf(fp, "%d %d %d\n", trans ? hypre_CSRMatrixNumCols(matrix) : hypre_CSRMatrixNumRows(matrix), - trans ? hypre_CSRMatrixNumRows(matrix) : hypre_CSRMatrixNumCols(matrix), - hypre_CSRMatrixNumNonzeros(matrix)); + hypre_fprintf(fp, "%d %d %d\n", + trans ? hypre_CSRMatrixNumCols(matrix) : hypre_CSRMatrixNumRows(matrix), + trans ? hypre_CSRMatrixNumRows(matrix) : hypre_CSRMatrixNumCols(matrix), + hypre_CSRMatrixNumNonzeros(matrix)); HYPRE_Int i, j; diff --git a/src/utilities/_hypre_utilities.h b/src/utilities/_hypre_utilities.h index 6468f6228..e907a9d1d 100644 --- a/src/utilities/_hypre_utilities.h +++ b/src/utilities/_hypre_utilities.h @@ -1761,7 +1761,8 @@ typedef struct * 1) Merge sort can take advantage of eliminating duplicates. * 2) Merge sort is more efficiently parallelizable than qsort */ -HYPRE_Int hypre_MergeOrderedArrays( hypre_IntArray *array1, hypre_IntArray *array2, hypre_IntArray *array3 ); +HYPRE_Int hypre_MergeOrderedArrays( hypre_IntArray *array1, hypre_IntArray *array2, + hypre_IntArray *array3 ); void hypre_union2(HYPRE_Int n1, HYPRE_BigInt *arr1, HYPRE_Int n2, HYPRE_BigInt *arr2, HYPRE_Int *n3, HYPRE_BigInt *arr3, HYPRE_Int *map1, HYPRE_Int *map2); void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **sorted); diff --git a/src/utilities/merge_sort.c b/src/utilities/merge_sort.c index 80c76ed97..65e8217e4 100644 --- a/src/utilities/merge_sort.c +++ b/src/utilities/merge_sort.c @@ -61,7 +61,8 @@ hypre_MergeOrderedArrays( hypre_IntArray *array1, array3_data[k++] = array2_data[j++]; } - array3_data = hypre_TReAlloc_v2(array3_data, HYPRE_Int, size1 + size2, HYPRE_Int, k, memory_location); + array3_data = hypre_TReAlloc_v2(array3_data, HYPRE_Int, size1 + size2, HYPRE_Int, k, + memory_location); hypre_IntArraySize(array3) = k; hypre_IntArrayData(array3) = array3_data; diff --git a/src/utilities/protos.h b/src/utilities/protos.h index 78797212b..e847339a5 100644 --- a/src/utilities/protos.h +++ b/src/utilities/protos.h @@ -258,7 +258,8 @@ typedef struct * 1) Merge sort can take advantage of eliminating duplicates. * 2) Merge sort is more efficiently parallelizable than qsort */ -HYPRE_Int hypre_MergeOrderedArrays( hypre_IntArray *array1, hypre_IntArray *array2, hypre_IntArray *array3 ); +HYPRE_Int hypre_MergeOrderedArrays( hypre_IntArray *array1, hypre_IntArray *array2, + hypre_IntArray *array3 ); void hypre_union2(HYPRE_Int n1, HYPRE_BigInt *arr1, HYPRE_Int n2, HYPRE_BigInt *arr2, HYPRE_Int *n3, HYPRE_BigInt *arr3, HYPRE_Int *map1, HYPRE_Int *map2); void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **sorted);