From cea24d824de72ccfc8b0df3640b9f2a4dfb34a8d Mon Sep 17 00:00:00 2001 From: falgout Date: Tue, 11 Apr 2000 21:05:51 +0000 Subject: [PATCH] First OpenMP threading code. --- parcsr_linear_solvers/par_coarsen.c | 4 ++++ parcsr_linear_solvers/par_interp.c | 16 ++++++++++++++-- parcsr_linear_solvers/par_relax.c | 17 +++++++++++++++++ parcsr_ls/par_coarsen.c | 4 ++++ parcsr_ls/par_interp.c | 16 ++++++++++++++-- parcsr_ls/par_relax.c | 17 +++++++++++++++++ seq_matrix_vector/csr_matvec.c | 19 +++++++++++++++++++ seq_matrix_vector/vector.c | 13 ++++++++++++- seq_mv/csr_matvec.c | 19 +++++++++++++++++++ seq_mv/vector.c | 13 ++++++++++++- 10 files changed, 132 insertions(+), 6 deletions(-) diff --git a/parcsr_linear_solvers/par_coarsen.c b/parcsr_linear_solvers/par_coarsen.c index ebc6e0029..717d1436d 100644 --- a/parcsr_linear_solvers/par_coarsen.c +++ b/parcsr_linear_solvers/par_coarsen.c @@ -258,6 +258,8 @@ hypre_ParAMGCoarsen( hypre_ParCSRMatrix *A, /* give S same nonzero structure as A */ hypre_ParCSRMatrixCopy(A,S,0); +#define HYPRE_SMP_PRIVATE i,diag,row_scale,row_sum,jA +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_variables; i++) { diag = A_diag_data[A_diag_i[i]]; @@ -356,6 +358,7 @@ hypre_ParAMGCoarsen( hypre_ParCSRMatrix *A, * that builds interpolation would have to be modified first. *----------------------------------------------------------------*/ +/* RDF: not sure if able to thread this loop */ jS = 0; for (i = 0; i < num_variables; i++) { @@ -372,6 +375,7 @@ hypre_ParAMGCoarsen( hypre_ParCSRMatrix *A, S_diag_i[num_variables] = jS; hypre_CSRMatrixNumNonzeros(S_diag) = jS; +/* RDF: not sure if able to thread this loop */ jS = 0; for (i = 0; i < num_variables; i++) { diff --git a/parcsr_linear_solvers/par_interp.c b/parcsr_linear_solvers/par_interp.c index 6a9293317..8247b26d7 100644 --- a/parcsr_linear_solvers/par_interp.c +++ b/parcsr_linear_solvers/par_interp.c @@ -196,6 +196,10 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, num_cpts_global = hypre_CTAlloc(int, num_procs+1); num_cpts_local = 0; +#define HYPRE_SMP_PRIVATE i +#define HYPRE_SMP_REDUCTION_OP + +#define HYPRE_SMP_REDUCTION_VARS num_cpts_local +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n_fine; i++) { if (CF_marker[i] >= 0) num_cpts_local++; @@ -239,6 +243,8 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, coarse_counter = 0; fine_to_coarse = hypre_CTAlloc(int, n_fine); +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1; jj_counter = start_indexing; @@ -247,7 +253,8 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, /*----------------------------------------------------------------------- * Loop over fine grid. *-----------------------------------------------------------------------*/ - + +/* RDF: this looks a little tricky, but doable */ for (i = 0; i < n_fine; i++) { @@ -293,7 +300,6 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, } } - /*----------------------------------------------------------------------- * Allocate arrays. *-----------------------------------------------------------------------*/ @@ -322,11 +328,15 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, jj_counter = start_indexing; jj_counter_offd = start_indexing; +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n_fine; i++) { P_marker[i] = -1; } +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols_A_offd; i++) { P_marker_offd[i] = -1; @@ -350,6 +360,8 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, fine_to_coarse_offd = hypre_CTAlloc(int, num_cols_A_offd); +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n_fine; i++) fine_to_coarse[i] += my_first_cpt; index = 0; for (i = 0; i < num_sends; i++) diff --git a/parcsr_linear_solvers/par_relax.c b/parcsr_linear_solvers/par_relax.c index 269621360..040374ac9 100644 --- a/parcsr_linear_solvers/par_relax.c +++ b/parcsr_linear_solvers/par_relax.c @@ -135,6 +135,8 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { Vtemp_data[i] = u_data[i]; @@ -151,6 +153,8 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, if (relax_points == 0) { +#define HYPRE_SMP_PRIVATE i,ii,jj,res +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { @@ -183,6 +187,8 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, else { +#define HYPRE_SMP_PRIVATE i,ii,jj,res +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { @@ -232,6 +238,8 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, *-----------------------------------------------------------------*/ hypre_ParCSRMatrixMatvec(-1.0,A, u, 1.0, Vtemp); +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { @@ -292,6 +300,9 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, if (relax_points == 0) { +/* RDF: This is doable. Add an additional outer loop that manually + * breaks up the i-loop into NumThreads parts. Do the interior + * of each one of these with GS, and the boundary with Jacobi */ for (i = 0; i < n; i++) /* interior points first */ { @@ -323,6 +334,9 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, else { +/* RDF: This is doable. Add an additional outer loop that manually + * breaks up the i-loop into NumThreads parts. Do the interior + * of each one of these with GS, and the boundary with Jacobi */ for (i = 0; i < n; i++) /* relax interior points */ { @@ -536,6 +550,9 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, *-----------------------------------------------------------------*/ if (relax_points == 0) { +/* RDF: This is doable. Add an additional outer loop that manually + * breaks up the i-loop into NumThreads parts. Do the interior + * of each one of these with GS, and the boundary with Jacobi */ for (i = 0; i < n; i++) { diff --git a/parcsr_ls/par_coarsen.c b/parcsr_ls/par_coarsen.c index ebc6e0029..717d1436d 100644 --- a/parcsr_ls/par_coarsen.c +++ b/parcsr_ls/par_coarsen.c @@ -258,6 +258,8 @@ hypre_ParAMGCoarsen( hypre_ParCSRMatrix *A, /* give S same nonzero structure as A */ hypre_ParCSRMatrixCopy(A,S,0); +#define HYPRE_SMP_PRIVATE i,diag,row_scale,row_sum,jA +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_variables; i++) { diag = A_diag_data[A_diag_i[i]]; @@ -356,6 +358,7 @@ hypre_ParAMGCoarsen( hypre_ParCSRMatrix *A, * that builds interpolation would have to be modified first. *----------------------------------------------------------------*/ +/* RDF: not sure if able to thread this loop */ jS = 0; for (i = 0; i < num_variables; i++) { @@ -372,6 +375,7 @@ hypre_ParAMGCoarsen( hypre_ParCSRMatrix *A, S_diag_i[num_variables] = jS; hypre_CSRMatrixNumNonzeros(S_diag) = jS; +/* RDF: not sure if able to thread this loop */ jS = 0; for (i = 0; i < num_variables; i++) { diff --git a/parcsr_ls/par_interp.c b/parcsr_ls/par_interp.c index 6a9293317..8247b26d7 100644 --- a/parcsr_ls/par_interp.c +++ b/parcsr_ls/par_interp.c @@ -196,6 +196,10 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, num_cpts_global = hypre_CTAlloc(int, num_procs+1); num_cpts_local = 0; +#define HYPRE_SMP_PRIVATE i +#define HYPRE_SMP_REDUCTION_OP + +#define HYPRE_SMP_REDUCTION_VARS num_cpts_local +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n_fine; i++) { if (CF_marker[i] >= 0) num_cpts_local++; @@ -239,6 +243,8 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, coarse_counter = 0; fine_to_coarse = hypre_CTAlloc(int, n_fine); +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n_fine; i++) fine_to_coarse[i] = -1; jj_counter = start_indexing; @@ -247,7 +253,8 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, /*----------------------------------------------------------------------- * Loop over fine grid. *-----------------------------------------------------------------------*/ - + +/* RDF: this looks a little tricky, but doable */ for (i = 0; i < n_fine; i++) { @@ -293,7 +300,6 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, } } - /*----------------------------------------------------------------------- * Allocate arrays. *-----------------------------------------------------------------------*/ @@ -322,11 +328,15 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, jj_counter = start_indexing; jj_counter_offd = start_indexing; +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n_fine; i++) { P_marker[i] = -1; } +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols_A_offd; i++) { P_marker_offd[i] = -1; @@ -350,6 +360,8 @@ hypre_ParAMGBuildInterp( hypre_ParCSRMatrix *A, fine_to_coarse_offd = hypre_CTAlloc(int, num_cols_A_offd); +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n_fine; i++) fine_to_coarse[i] += my_first_cpt; index = 0; for (i = 0; i < num_sends; i++) diff --git a/parcsr_ls/par_relax.c b/parcsr_ls/par_relax.c index 269621360..040374ac9 100644 --- a/parcsr_ls/par_relax.c +++ b/parcsr_ls/par_relax.c @@ -135,6 +135,8 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, * Copy current approximation into temporary vector. *-----------------------------------------------------------------*/ +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { Vtemp_data[i] = u_data[i]; @@ -151,6 +153,8 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, if (relax_points == 0) { +#define HYPRE_SMP_PRIVATE i,ii,jj,res +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { @@ -183,6 +187,8 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, else { +#define HYPRE_SMP_PRIVATE i,ii,jj,res +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { @@ -232,6 +238,8 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, *-----------------------------------------------------------------*/ hypre_ParCSRMatrixMatvec(-1.0,A, u, 1.0, Vtemp); +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < n; i++) { @@ -292,6 +300,9 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, if (relax_points == 0) { +/* RDF: This is doable. Add an additional outer loop that manually + * breaks up the i-loop into NumThreads parts. Do the interior + * of each one of these with GS, and the boundary with Jacobi */ for (i = 0; i < n; i++) /* interior points first */ { @@ -323,6 +334,9 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, else { +/* RDF: This is doable. Add an additional outer loop that manually + * breaks up the i-loop into NumThreads parts. Do the interior + * of each one of these with GS, and the boundary with Jacobi */ for (i = 0; i < n; i++) /* relax interior points */ { @@ -536,6 +550,9 @@ int hypre_ParAMGRelax( hypre_ParCSRMatrix *A, *-----------------------------------------------------------------*/ if (relax_points == 0) { +/* RDF: This is doable. Add an additional outer loop that manually + * breaks up the i-loop into NumThreads parts. Do the interior + * of each one of these with GS, and the boundary with Jacobi */ for (i = 0; i < n; i++) { diff --git a/seq_matrix_vector/csr_matvec.c b/seq_matrix_vector/csr_matvec.c index 62e79d961..e187d6ece 100644 --- a/seq_matrix_vector/csr_matvec.c +++ b/seq_matrix_vector/csr_matvec.c @@ -68,6 +68,8 @@ hypre_CSRMatrixMatvec( double alpha, if (alpha == 0.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) y_data[i] *= beta; @@ -84,11 +86,15 @@ hypre_CSRMatrixMatvec( double alpha, { if (temp == 0.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) y_data[i] = 0.0; } else { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) y_data[i] *= temp; } @@ -98,6 +104,8 @@ hypre_CSRMatrixMatvec( double alpha, * y += A*x *-----------------------------------------------------------------*/ +#define HYPRE_SMP_PRIVATE i,jj,j +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) @@ -113,6 +121,8 @@ hypre_CSRMatrixMatvec( double alpha, if (alpha != 1.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) y_data[i] *= alpha; } @@ -177,6 +187,8 @@ hypre_CSRMatrixMatvecT( double alpha, if (alpha == 0.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols; i++) y_data[i] *= beta; @@ -193,11 +205,15 @@ hypre_CSRMatrixMatvecT( double alpha, { if (temp == 0.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols; i++) y_data[i] = 0.0; } else { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols; i++) y_data[i] *= temp; } @@ -207,6 +223,7 @@ hypre_CSRMatrixMatvecT( double alpha, * y += A^T*x *-----------------------------------------------------------------*/ +/* RDF: have to think about how to thread this loop */ for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) @@ -222,6 +239,8 @@ hypre_CSRMatrixMatvecT( double alpha, if (alpha != 1.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols; i++) y_data[i] *= alpha; } diff --git a/seq_matrix_vector/vector.c b/seq_matrix_vector/vector.c index 8ec543c8b..bc8e60bc8 100644 --- a/seq_matrix_vector/vector.c +++ b/seq_matrix_vector/vector.c @@ -178,6 +178,8 @@ hypre_VectorSetConstantValues( hypre_Vector *v, int ierr = 0; +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < size; i++) vector_data[i] = value; @@ -202,6 +204,7 @@ hypre_VectorSetRandomValues( hypre_Vector *v, int ierr = 0; hypre_SeedRand(seed); +/* RDF: threading this loop may cause problems because of hypre_Rand() */ for (i = 0; i < size; i++) vector_data[i] = 2.0 * hypre_Rand() - 1.0; @@ -245,6 +248,8 @@ hypre_VectorScale( double alpha, int ierr = 0; +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < size; i++) y_data[i] *= alpha; @@ -268,6 +273,8 @@ hypre_VectorAxpy( double alpha, int ierr = 0; +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < size; i++) y_data[i] += alpha * x_data[i]; @@ -289,9 +296,13 @@ double hypre_VectorInnerProd( hypre_Vector *x, double result = 0.0; +#define HYPRE_SMP_PRIVATE i +#define HYPRE_SMP_REDUCTION_OP + +#define HYPRE_SMP_REDUCTION_VARS result +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < size; i++) result += y_data[i] * x_data[i]; - + return result; } diff --git a/seq_mv/csr_matvec.c b/seq_mv/csr_matvec.c index 62e79d961..e187d6ece 100644 --- a/seq_mv/csr_matvec.c +++ b/seq_mv/csr_matvec.c @@ -68,6 +68,8 @@ hypre_CSRMatrixMatvec( double alpha, if (alpha == 0.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) y_data[i] *= beta; @@ -84,11 +86,15 @@ hypre_CSRMatrixMatvec( double alpha, { if (temp == 0.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) y_data[i] = 0.0; } else { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) y_data[i] *= temp; } @@ -98,6 +104,8 @@ hypre_CSRMatrixMatvec( double alpha, * y += A*x *-----------------------------------------------------------------*/ +#define HYPRE_SMP_PRIVATE i,jj,j +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) @@ -113,6 +121,8 @@ hypre_CSRMatrixMatvec( double alpha, if (alpha != 1.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_rows; i++) y_data[i] *= alpha; } @@ -177,6 +187,8 @@ hypre_CSRMatrixMatvecT( double alpha, if (alpha == 0.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols; i++) y_data[i] *= beta; @@ -193,11 +205,15 @@ hypre_CSRMatrixMatvecT( double alpha, { if (temp == 0.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols; i++) y_data[i] = 0.0; } else { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols; i++) y_data[i] *= temp; } @@ -207,6 +223,7 @@ hypre_CSRMatrixMatvecT( double alpha, * y += A^T*x *-----------------------------------------------------------------*/ +/* RDF: have to think about how to thread this loop */ for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) @@ -222,6 +239,8 @@ hypre_CSRMatrixMatvecT( double alpha, if (alpha != 1.0) { +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < num_cols; i++) y_data[i] *= alpha; } diff --git a/seq_mv/vector.c b/seq_mv/vector.c index 8ec543c8b..bc8e60bc8 100644 --- a/seq_mv/vector.c +++ b/seq_mv/vector.c @@ -178,6 +178,8 @@ hypre_VectorSetConstantValues( hypre_Vector *v, int ierr = 0; +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < size; i++) vector_data[i] = value; @@ -202,6 +204,7 @@ hypre_VectorSetRandomValues( hypre_Vector *v, int ierr = 0; hypre_SeedRand(seed); +/* RDF: threading this loop may cause problems because of hypre_Rand() */ for (i = 0; i < size; i++) vector_data[i] = 2.0 * hypre_Rand() - 1.0; @@ -245,6 +248,8 @@ hypre_VectorScale( double alpha, int ierr = 0; +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < size; i++) y_data[i] *= alpha; @@ -268,6 +273,8 @@ hypre_VectorAxpy( double alpha, int ierr = 0; +#define HYPRE_SMP_PRIVATE i +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < size; i++) y_data[i] += alpha * x_data[i]; @@ -289,9 +296,13 @@ double hypre_VectorInnerProd( hypre_Vector *x, double result = 0.0; +#define HYPRE_SMP_PRIVATE i +#define HYPRE_SMP_REDUCTION_OP + +#define HYPRE_SMP_REDUCTION_VARS result +#include "../utilities/hypre_smp_forloop.h" for (i = 0; i < size; i++) result += y_data[i] * x_data[i]; - + return result; }