Merge pull request #27 from hypre-space/umy-fixes

Umy fixes
This commit is contained in:
ulrikeyang 2019-06-27 07:28:31 -07:00 committed by GitHub
commit 9521f79be6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 132 additions and 49 deletions

View File

@ -344,38 +344,48 @@ hypre_PrefixSumInt(HYPRE_Int nvals,
nthreads = hypre_NumThreads();
bsize = (nvals + nthreads - 1) / nthreads; /* This distributes the remainder */
/* Compute preliminary partial sums (in parallel) within each interval */
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
#endif
for (j = 0; j < nvals; j += bsize)
if (nvals < nthreads || bsize == 1)
{
HYPRE_Int i, n = hypre_min((j+bsize), nvals);
sums[0] = 0;
for (i = j+1; i < n; i++)
{
sums[i] = sums[i-1] + vals[i-1];
}
for (j=1; j < nvals; j++)
sums[j] += sums[j-1] + vals[j-1];
}
/* Compute final partial sums (in serial) for the first entry of every interval */
for (j = bsize; j < nvals; j += bsize)
else
{
sums[j] = sums[j-bsize] + sums[j-1] + vals[j-1];
}
/* Compute final partial sums (in parallel) for the remaining entries */
/* Compute preliminary partial sums (in parallel) within each interval */
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
#endif
for (j = bsize; j < nvals; j += bsize)
{
HYPRE_Int i, n = hypre_min((j+bsize), nvals);
for (i = j+1; i < n; i++)
for (j = 0; j < nvals; j += bsize)
{
sums[i] += sums[j];
HYPRE_Int i, n = hypre_min((j+bsize), nvals);
sums[0] = 0;
for (i = j+1; i < n; i++)
{
sums[i] = sums[i-1] + vals[i-1];
}
}
/* Compute final partial sums (in serial) for the first entry of every interval */
for (j = bsize; j < nvals; j += bsize)
{
sums[j] = sums[j-bsize] + sums[j-1] + vals[j-1];
}
/* Compute final partial sums (in parallel) for the remaining entries */
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
#endif
for (j = bsize; j < nvals; j += bsize)
{
HYPRE_Int i, n = hypre_min((j+bsize), nvals);
for (i = j+1; i < n; i++)
{
sums[i] += sums[j];
}
}
}

View File

@ -969,7 +969,7 @@ hypre_IJMatrixSetValuesParCSR( hypre_IJMatrix *matrix,
offd_data = hypre_CSRMatrixData(offd);
if (!big_offd_j)
{
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_HOST);
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_SHARED);
hypre_CSRMatrixBigJ(offd) = big_offd_j;
}
}
@ -1015,9 +1015,9 @@ hypre_IJMatrixSetValuesParCSR( hypre_IJMatrix *matrix,
}
else /* insert into diag */
{
col_j = (HYPRE_Int)(cols[indx]-col_0);
for (j=diag_i[row_local]; j < diag_indx; j++)
{
col_j = (HYPRE_Int)(cols[indx]-col_0);
if (diag_j[j] == col_j)
{
diag_data[j] = values[indx];
@ -1501,7 +1501,7 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix,
offd_data = hypre_CSRMatrixData(offd);
if (!big_offd_j)
{
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_HOST);
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_SHARED);
hypre_CSRMatrixBigJ(offd) = big_offd_j;
}
}
@ -1547,10 +1547,9 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix,
}
else /* insert into diag */
{
HYPRE_Int col_j;
HYPRE_Int col_j = (HYPRE_Int)( cols[indx] - col_0);
for (j=diag_i[row_local]; j < diag_indx; j++)
{
col_j = (HYPRE_Int)( cols[indx] - col_0);
if (diag_j[j] == col_j)
{
diag_data[j] += values[indx];
@ -2993,6 +2992,8 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix)
hypre_ParCSRMatrixColMapOffd(par_matrix) = col_map_offd;
hypre_CSRMatrixNumCols(offd) = num_cols_offd;
hypre_TFree(tmp_j, HYPRE_MEMORY_HOST);
hypre_TFree(big_offd_j, HYPRE_MEMORY_SHARED);
hypre_CSRMatrixBigJ(offd) = NULL;
}
hypre_IJMatrixAssembleFlag(matrix) = 1;
}
@ -3388,7 +3389,7 @@ hypre_IJMatrixSetValuesOMPParCSR( hypre_IJMatrix *matrix,
big_offd_j = hypre_CSRMatrixBigJ(offd);
if (!big_offd_j)
{
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_HOST);
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_SHARED);
hypre_CSRMatrixBigJ(offd) = big_offd_j;
}
}
@ -4005,7 +4006,7 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix,
offd_data = hypre_CSRMatrixData(offd);
if (!big_offd_j)
{
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_HOST);
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_SHARED);
hypre_CSRMatrixBigJ(offd) = big_offd_j;
}
}

View File

@ -389,6 +389,22 @@ hypre_F90_IFACE(hypre_parcsrhybridsettruncfacto, HYPRE_PARCSRHYBRIDSETTRUNCFACTO
hypre_F90_PassReal (trunc_factor) ));
}
/*--------------------------------------------------------------------------
* HYPRE_ParCSRHybridSetPMaxElmts
*--------------------------------------------------------------------------*/
void
hypre_F90_IFACE(hypre_parcsrhybridsetpmaxelmts, HYPRE_PARCSRHYBRIDSETPMAXELMTS)
(hypre_F90_Obj *solver,
hypre_F90_Int *p_max_elmts,
hypre_F90_Int *ierr)
{
*ierr = (hypre_F90_Int)
(HYPRE_ParCSRHybridSetPMaxElmts(
hypre_F90_PassObj (HYPRE_Solver, solver),
hypre_F90_PassInt (p_max_elmts) ));
}
/*--------------------------------------------------------------------------
* HYPRE_ParCSRHybridSetMaxLevels
*--------------------------------------------------------------------------*/
@ -437,6 +453,22 @@ hypre_F90_IFACE(hypre_parcsrhybridsetcoarsentyp, HYPRE_PARCSRHYBRIDSETCOARSENTYP
hypre_F90_PassInt (coarsen_type) ));
}
/*--------------------------------------------------------------------------
* HYPRE_ParCSRHybridSetInterpType
*--------------------------------------------------------------------------*/
void
hypre_F90_IFACE(hypre_parcsrhybridsetinterptyp, HYPRE_PARCSRHYBRIDSETINTERPTYP)
(hypre_F90_Obj *solver,
hypre_F90_Int *interp_type,
hypre_F90_Int *ierr)
{
*ierr = (hypre_F90_Int)
(HYPRE_ParCSRHybridSetCoarsenType(
hypre_F90_PassObj (HYPRE_Solver, solver),
hypre_F90_PassInt (interp_type) ));
}
/*--------------------------------------------------------------------------
* HYPRE_ParCSRHybridSetCycleType
*--------------------------------------------------------------------------*/
@ -469,7 +501,7 @@ hypre_F90_IFACE(hypre_parcsrhybridsetnumgridswe, HYPRE_PARCSRHYBRIDSETNUMGRIDSWE
hypre_F90_PassIntArray (num_grid_sweeps) ));
}
/*--------------------------------------------------------------------------
/*------------------------------------------------------------------------
* HYPRE_ParCSRHybridSetGridRelaxType
*--------------------------------------------------------------------------*/

View File

@ -1502,9 +1502,10 @@ HYPRE_Int hypre_BoomerAMGCreateScalarCFS ( hypre_ParCSRMatrix *SN , HYPRE_Int *C
HYPRE_Int hypre_BoomerAMGCreateScalarCF ( HYPRE_Int *CFN_marker , HYPRE_Int num_functions , HYPRE_Int num_nodes , HYPRE_Int **dof_func_ptr , HYPRE_Int **CF_marker_ptr );
/* par_nongalerkin.c */
HYPRE_Int hypre_GrabSubArray ( HYPRE_Int *indices , HYPRE_Int start , HYPRE_Int end , HYPRE_BigInt *array , HYPRE_Int *output );
HYPRE_Int hypre_GrabSubArray ( HYPRE_Int *indices , HYPRE_Int start , HYPRE_Int end , HYPRE_BigInt *array , HYPRE_BigInt *output );
void hypre_qsort2_abs ( HYPRE_Int *v , HYPRE_Real *w , HYPRE_Int left , HYPRE_Int right );
HYPRE_Int hypre_IntersectTwoArrays ( HYPRE_Int *x , HYPRE_Real *x_data , HYPRE_Int x_length , HYPRE_Int *y , HYPRE_Int y_length , HYPRE_Int *z , HYPRE_Real *output_x_data , HYPRE_Int *intersect_length );
HYPRE_Int hypre_IntersectTwoBigArrays ( HYPRE_BigInt *x , HYPRE_Real *x_data , HYPRE_Int x_length , HYPRE_BigInt *y , HYPRE_Int y_length , HYPRE_BigInt *z , HYPRE_Real *output_x_data , HYPRE_Int *intersect_length );
HYPRE_Int hypre_SortedCopyParCSRData ( hypre_ParCSRMatrix *A , hypre_ParCSRMatrix *B );
HYPRE_Int hypre_BoomerAMG_MyCreateS ( hypre_ParCSRMatrix *A , HYPRE_Real strength_threshold , HYPRE_Real max_row_sum , HYPRE_Int num_functions , HYPRE_Int *dof_func , hypre_ParCSRMatrix **S_ptr );
HYPRE_Int hypre_BoomerAMGCreateSFromCFMarker(hypre_ParCSRMatrix *A, HYPRE_Real strength_threshold, HYPRE_Real max_row_sum, HYPRE_Int *CF_marker, HYPRE_Int SMRK, hypre_ParCSRMatrix **S_ptr);

View File

@ -28,7 +28,7 @@ hypre_GrabSubArray(HYPRE_Int * indices,
HYPRE_Int start,
HYPRE_Int end,
HYPRE_BigInt * array,
HYPRE_Int * output)
HYPRE_BigInt * output)
{
HYPRE_Int i, length;
length = end - start + 1;
@ -76,12 +76,50 @@ void hypre_qsort2_abs( HYPRE_Int *v,
* in the longer array is faster.
* */
HYPRE_Int
hypre_IntersectTwoArrays(HYPRE_Int *x,
hypre_IntersectTwoArrays(HYPRE_Int *x,
HYPRE_Real *x_data,
HYPRE_Int x_length,
HYPRE_Int *y,
HYPRE_Int *y,
HYPRE_Int y_length,
HYPRE_Int *z,
HYPRE_Int *z,
HYPRE_Real *output_x_data,
HYPRE_Int *intersect_length)
{
HYPRE_Int x_index = 0;
HYPRE_Int y_index = 0;
*intersect_length = 0;
/* Compute Intersection, looping over each array */
while ( (x_index < x_length) && (y_index < y_length) )
{
if (x[x_index] > y[y_index])
{
y_index = y_index + 1;
}
else if (x[x_index] < y[y_index])
{
x_index = x_index + 1;
}
else
{
z[*intersect_length] = x[x_index];
output_x_data[*intersect_length] = x_data[x_index];
x_index = x_index + 1;
y_index = y_index + 1;
*intersect_length = *intersect_length + 1;
}
}
return 1;
}
HYPRE_Int
hypre_IntersectTwoBigArrays(HYPRE_BigInt *x,
HYPRE_Real *x_data,
HYPRE_Int x_length,
HYPRE_BigInt *y,
HYPRE_Int y_length,
HYPRE_BigInt *z,
HYPRE_Real *output_x_data,
HYPRE_Int *intersect_length)
{
@ -347,7 +385,7 @@ hypre_BoomerAMG_MyCreateS(hypre_ParCSRMatrix *A,
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i,diag,row_scale,row_sum,jA) HYPRE_SMP_SCHEDULE
#endif
#endif
for (i = 0; i < num_variables; i++)
{
diag = A_diag_data[A_diag_i[i]];
@ -1083,7 +1121,7 @@ hypre_NonGalerkinSparsityPattern(hypre_ParCSRMatrix *R_IAP,
*/
/*#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i,j,max_entry,max_entry_offd,global_col,global_row) HYPRE_SMP_SCHEDULE
#endif*/
#endif */
for(i = 0; i < num_variables; i++)
{
global_row = i+first_col_diag_RAP;
@ -1221,9 +1259,9 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
/* Lumping related variables */
HYPRE_IJMatrix ijmatrix;
HYPRE_Int * Pattern_offd_indices = NULL;
HYPRE_Int * S_offd_indices = NULL;
HYPRE_Int * offd_intersection = NULL;
HYPRE_BigInt * Pattern_offd_indices = NULL;
HYPRE_BigInt * S_offd_indices = NULL;
HYPRE_BigInt * offd_intersection = NULL;
HYPRE_Real * offd_intersection_data = NULL;
HYPRE_Int * diag_intersection = NULL;
HYPRE_Real * diag_intersection_data = NULL;
@ -1686,7 +1724,7 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
if(Pattern_offd_indices_allocated_len < Pattern_offd_indices_len)
{
hypre_TFree(Pattern_offd_indices, HYPRE_MEMORY_HOST);
Pattern_offd_indices = hypre_CTAlloc(HYPRE_Int, Pattern_offd_indices_len, HYPRE_MEMORY_HOST);
Pattern_offd_indices = hypre_CTAlloc(HYPRE_BigInt, Pattern_offd_indices_len, HYPRE_MEMORY_HOST);
Pattern_offd_indices_allocated_len = Pattern_offd_indices_len;
}
/* Grab sub array from col_map, corresponding to the slice of Pattern_offd_j */
@ -1741,7 +1779,7 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
if(S_offd_indices_allocated_len < S_offd_indices_len)
{
hypre_TFree(S_offd_indices, HYPRE_MEMORY_HOST);
S_offd_indices = hypre_CTAlloc(HYPRE_Int, S_offd_indices_len, HYPRE_MEMORY_HOST);
S_offd_indices = hypre_CTAlloc(HYPRE_BigInt, S_offd_indices_len, HYPRE_MEMORY_HOST);
S_offd_indices_allocated_len = S_offd_indices_len;
}
/* Grab sub array from col_map, corresponding to the slice of S_offd_j */
@ -1757,13 +1795,13 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
{
hypre_TFree(offd_intersection, HYPRE_MEMORY_HOST);
hypre_TFree(offd_intersection_data, HYPRE_MEMORY_HOST);
offd_intersection = hypre_CTAlloc(HYPRE_Int, cnt, HYPRE_MEMORY_HOST);
offd_intersection = hypre_CTAlloc(HYPRE_BigInt, cnt, HYPRE_MEMORY_HOST);
offd_intersection_data = hypre_CTAlloc(HYPRE_Real, cnt, HYPRE_MEMORY_HOST);
offd_intersection_allocated_len = cnt;
}
/* This intersection also tracks S_offd_data and assumes that
* S_offd_indices is the first argument here */
hypre_IntersectTwoArrays(S_offd_indices,
hypre_IntersectTwoBigArrays(S_offd_indices,
&(S_offd_data[ S_offd_i[col_indx_RAP] ]),
S_offd_indices_len,
Pattern_offd_indices,
@ -2029,7 +2067,7 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
if(S_offd_indices_allocated_len < S_offd_indices_len)
{
hypre_TFree(S_offd_indices, HYPRE_MEMORY_HOST);
S_offd_indices = hypre_CTAlloc(HYPRE_Int, S_offd_indices_len, HYPRE_MEMORY_HOST);
S_offd_indices = hypre_CTAlloc(HYPRE_BigInt, S_offd_indices_len, HYPRE_MEMORY_HOST);
S_offd_indices_allocated_len = S_offd_indices_len;
}
/* Grab sub array from col_map, corresponding to the slice of S_ext_offd_j */
@ -2045,11 +2083,11 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
{
hypre_TFree(offd_intersection, HYPRE_MEMORY_HOST);
hypre_TFree(offd_intersection_data, HYPRE_MEMORY_HOST);
offd_intersection = hypre_CTAlloc(HYPRE_Int, cnt, HYPRE_MEMORY_HOST);
offd_intersection = hypre_CTAlloc(HYPRE_BigInt, cnt, HYPRE_MEMORY_HOST);
offd_intersection_data = hypre_CTAlloc(HYPRE_Real, cnt, HYPRE_MEMORY_HOST);
offd_intersection_allocated_len = cnt;
}
hypre_IntersectTwoArrays(S_offd_indices,
hypre_IntersectTwoBigArrays(S_offd_indices,
&(S_ext_offd_data[ S_ext_offd_i[row_indx_Sext] ]),
S_offd_indices_len,
Pattern_offd_indices,

View File

@ -89,6 +89,7 @@ hypre_CSRMatrixDestroy( hypre_CSRMatrix *matrix )
hypre_TFree(hypre_CSRMatrixBigJ(matrix), HYPRE_MEMORY_SHARED);
hypre_CSRMatrixData(matrix) = NULL;
hypre_CSRMatrixJ(matrix) = NULL;
hypre_CSRMatrixBigJ(matrix) = NULL;
}
hypre_TFree(matrix, HYPRE_MEMORY_HOST);
matrix = NULL;