commit
9521f79be6
@ -344,38 +344,48 @@ hypre_PrefixSumInt(HYPRE_Int nvals,
|
||||
nthreads = hypre_NumThreads();
|
||||
bsize = (nvals + nthreads - 1) / nthreads; /* This distributes the remainder */
|
||||
|
||||
/* Compute preliminary partial sums (in parallel) within each interval */
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
|
||||
#endif
|
||||
for (j = 0; j < nvals; j += bsize)
|
||||
if (nvals < nthreads || bsize == 1)
|
||||
{
|
||||
HYPRE_Int i, n = hypre_min((j+bsize), nvals);
|
||||
|
||||
sums[0] = 0;
|
||||
for (i = j+1; i < n; i++)
|
||||
{
|
||||
sums[i] = sums[i-1] + vals[i-1];
|
||||
}
|
||||
for (j=1; j < nvals; j++)
|
||||
sums[j] += sums[j-1] + vals[j-1];
|
||||
}
|
||||
|
||||
/* Compute final partial sums (in serial) for the first entry of every interval */
|
||||
for (j = bsize; j < nvals; j += bsize)
|
||||
else
|
||||
{
|
||||
sums[j] = sums[j-bsize] + sums[j-1] + vals[j-1];
|
||||
}
|
||||
|
||||
/* Compute final partial sums (in parallel) for the remaining entries */
|
||||
|
||||
/* Compute preliminary partial sums (in parallel) within each interval */
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
|
||||
#endif
|
||||
for (j = bsize; j < nvals; j += bsize)
|
||||
{
|
||||
HYPRE_Int i, n = hypre_min((j+bsize), nvals);
|
||||
|
||||
for (i = j+1; i < n; i++)
|
||||
for (j = 0; j < nvals; j += bsize)
|
||||
{
|
||||
sums[i] += sums[j];
|
||||
HYPRE_Int i, n = hypre_min((j+bsize), nvals);
|
||||
|
||||
sums[0] = 0;
|
||||
for (i = j+1; i < n; i++)
|
||||
{
|
||||
sums[i] = sums[i-1] + vals[i-1];
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute final partial sums (in serial) for the first entry of every interval */
|
||||
for (j = bsize; j < nvals; j += bsize)
|
||||
{
|
||||
sums[j] = sums[j-bsize] + sums[j-1] + vals[j-1];
|
||||
}
|
||||
|
||||
/* Compute final partial sums (in parallel) for the remaining entries */
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE
|
||||
#endif
|
||||
for (j = bsize; j < nvals; j += bsize)
|
||||
{
|
||||
HYPRE_Int i, n = hypre_min((j+bsize), nvals);
|
||||
|
||||
for (i = j+1; i < n; i++)
|
||||
{
|
||||
sums[i] += sums[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -969,7 +969,7 @@ hypre_IJMatrixSetValuesParCSR( hypre_IJMatrix *matrix,
|
||||
offd_data = hypre_CSRMatrixData(offd);
|
||||
if (!big_offd_j)
|
||||
{
|
||||
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_HOST);
|
||||
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_SHARED);
|
||||
hypre_CSRMatrixBigJ(offd) = big_offd_j;
|
||||
}
|
||||
}
|
||||
@ -1015,9 +1015,9 @@ hypre_IJMatrixSetValuesParCSR( hypre_IJMatrix *matrix,
|
||||
}
|
||||
else /* insert into diag */
|
||||
{
|
||||
col_j = (HYPRE_Int)(cols[indx]-col_0);
|
||||
for (j=diag_i[row_local]; j < diag_indx; j++)
|
||||
{
|
||||
col_j = (HYPRE_Int)(cols[indx]-col_0);
|
||||
if (diag_j[j] == col_j)
|
||||
{
|
||||
diag_data[j] = values[indx];
|
||||
@ -1501,7 +1501,7 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix,
|
||||
offd_data = hypre_CSRMatrixData(offd);
|
||||
if (!big_offd_j)
|
||||
{
|
||||
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_HOST);
|
||||
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_SHARED);
|
||||
hypre_CSRMatrixBigJ(offd) = big_offd_j;
|
||||
}
|
||||
}
|
||||
@ -1547,10 +1547,9 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix,
|
||||
}
|
||||
else /* insert into diag */
|
||||
{
|
||||
HYPRE_Int col_j;
|
||||
HYPRE_Int col_j = (HYPRE_Int)( cols[indx] - col_0);
|
||||
for (j=diag_i[row_local]; j < diag_indx; j++)
|
||||
{
|
||||
col_j = (HYPRE_Int)( cols[indx] - col_0);
|
||||
if (diag_j[j] == col_j)
|
||||
{
|
||||
diag_data[j] += values[indx];
|
||||
@ -2993,6 +2992,8 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix)
|
||||
hypre_ParCSRMatrixColMapOffd(par_matrix) = col_map_offd;
|
||||
hypre_CSRMatrixNumCols(offd) = num_cols_offd;
|
||||
hypre_TFree(tmp_j, HYPRE_MEMORY_HOST);
|
||||
hypre_TFree(big_offd_j, HYPRE_MEMORY_SHARED);
|
||||
hypre_CSRMatrixBigJ(offd) = NULL;
|
||||
}
|
||||
hypre_IJMatrixAssembleFlag(matrix) = 1;
|
||||
}
|
||||
@ -3388,7 +3389,7 @@ hypre_IJMatrixSetValuesOMPParCSR( hypre_IJMatrix *matrix,
|
||||
big_offd_j = hypre_CSRMatrixBigJ(offd);
|
||||
if (!big_offd_j)
|
||||
{
|
||||
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_HOST);
|
||||
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_SHARED);
|
||||
hypre_CSRMatrixBigJ(offd) = big_offd_j;
|
||||
}
|
||||
}
|
||||
@ -4005,7 +4006,7 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix,
|
||||
offd_data = hypre_CSRMatrixData(offd);
|
||||
if (!big_offd_j)
|
||||
{
|
||||
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_HOST);
|
||||
big_offd_j = hypre_CTAlloc(HYPRE_BigInt, offd_i[hypre_CSRMatrixNumRows(offd)], HYPRE_MEMORY_SHARED);
|
||||
hypre_CSRMatrixBigJ(offd) = big_offd_j;
|
||||
}
|
||||
}
|
||||
|
||||
@ -389,6 +389,22 @@ hypre_F90_IFACE(hypre_parcsrhybridsettruncfacto, HYPRE_PARCSRHYBRIDSETTRUNCFACTO
|
||||
hypre_F90_PassReal (trunc_factor) ));
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
* HYPRE_ParCSRHybridSetPMaxElmts
|
||||
*--------------------------------------------------------------------------*/
|
||||
|
||||
void
|
||||
hypre_F90_IFACE(hypre_parcsrhybridsetpmaxelmts, HYPRE_PARCSRHYBRIDSETPMAXELMTS)
|
||||
(hypre_F90_Obj *solver,
|
||||
hypre_F90_Int *p_max_elmts,
|
||||
hypre_F90_Int *ierr)
|
||||
{
|
||||
*ierr = (hypre_F90_Int)
|
||||
(HYPRE_ParCSRHybridSetPMaxElmts(
|
||||
hypre_F90_PassObj (HYPRE_Solver, solver),
|
||||
hypre_F90_PassInt (p_max_elmts) ));
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
* HYPRE_ParCSRHybridSetMaxLevels
|
||||
*--------------------------------------------------------------------------*/
|
||||
@ -437,6 +453,22 @@ hypre_F90_IFACE(hypre_parcsrhybridsetcoarsentyp, HYPRE_PARCSRHYBRIDSETCOARSENTYP
|
||||
hypre_F90_PassInt (coarsen_type) ));
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
* HYPRE_ParCSRHybridSetInterpType
|
||||
*--------------------------------------------------------------------------*/
|
||||
|
||||
void
|
||||
hypre_F90_IFACE(hypre_parcsrhybridsetinterptyp, HYPRE_PARCSRHYBRIDSETINTERPTYP)
|
||||
(hypre_F90_Obj *solver,
|
||||
hypre_F90_Int *interp_type,
|
||||
hypre_F90_Int *ierr)
|
||||
{
|
||||
*ierr = (hypre_F90_Int)
|
||||
(HYPRE_ParCSRHybridSetCoarsenType(
|
||||
hypre_F90_PassObj (HYPRE_Solver, solver),
|
||||
hypre_F90_PassInt (interp_type) ));
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
* HYPRE_ParCSRHybridSetCycleType
|
||||
*--------------------------------------------------------------------------*/
|
||||
@ -469,7 +501,7 @@ hypre_F90_IFACE(hypre_parcsrhybridsetnumgridswe, HYPRE_PARCSRHYBRIDSETNUMGRIDSWE
|
||||
hypre_F90_PassIntArray (num_grid_sweeps) ));
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------
|
||||
/*------------------------------------------------------------------------
|
||||
* HYPRE_ParCSRHybridSetGridRelaxType
|
||||
*--------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
@ -1502,9 +1502,10 @@ HYPRE_Int hypre_BoomerAMGCreateScalarCFS ( hypre_ParCSRMatrix *SN , HYPRE_Int *C
|
||||
HYPRE_Int hypre_BoomerAMGCreateScalarCF ( HYPRE_Int *CFN_marker , HYPRE_Int num_functions , HYPRE_Int num_nodes , HYPRE_Int **dof_func_ptr , HYPRE_Int **CF_marker_ptr );
|
||||
|
||||
/* par_nongalerkin.c */
|
||||
HYPRE_Int hypre_GrabSubArray ( HYPRE_Int *indices , HYPRE_Int start , HYPRE_Int end , HYPRE_BigInt *array , HYPRE_Int *output );
|
||||
HYPRE_Int hypre_GrabSubArray ( HYPRE_Int *indices , HYPRE_Int start , HYPRE_Int end , HYPRE_BigInt *array , HYPRE_BigInt *output );
|
||||
void hypre_qsort2_abs ( HYPRE_Int *v , HYPRE_Real *w , HYPRE_Int left , HYPRE_Int right );
|
||||
HYPRE_Int hypre_IntersectTwoArrays ( HYPRE_Int *x , HYPRE_Real *x_data , HYPRE_Int x_length , HYPRE_Int *y , HYPRE_Int y_length , HYPRE_Int *z , HYPRE_Real *output_x_data , HYPRE_Int *intersect_length );
|
||||
HYPRE_Int hypre_IntersectTwoBigArrays ( HYPRE_BigInt *x , HYPRE_Real *x_data , HYPRE_Int x_length , HYPRE_BigInt *y , HYPRE_Int y_length , HYPRE_BigInt *z , HYPRE_Real *output_x_data , HYPRE_Int *intersect_length );
|
||||
HYPRE_Int hypre_SortedCopyParCSRData ( hypre_ParCSRMatrix *A , hypre_ParCSRMatrix *B );
|
||||
HYPRE_Int hypre_BoomerAMG_MyCreateS ( hypre_ParCSRMatrix *A , HYPRE_Real strength_threshold , HYPRE_Real max_row_sum , HYPRE_Int num_functions , HYPRE_Int *dof_func , hypre_ParCSRMatrix **S_ptr );
|
||||
HYPRE_Int hypre_BoomerAMGCreateSFromCFMarker(hypre_ParCSRMatrix *A, HYPRE_Real strength_threshold, HYPRE_Real max_row_sum, HYPRE_Int *CF_marker, HYPRE_Int SMRK, hypre_ParCSRMatrix **S_ptr);
|
||||
|
||||
@ -28,7 +28,7 @@ hypre_GrabSubArray(HYPRE_Int * indices,
|
||||
HYPRE_Int start,
|
||||
HYPRE_Int end,
|
||||
HYPRE_BigInt * array,
|
||||
HYPRE_Int * output)
|
||||
HYPRE_BigInt * output)
|
||||
{
|
||||
HYPRE_Int i, length;
|
||||
length = end - start + 1;
|
||||
@ -76,12 +76,50 @@ void hypre_qsort2_abs( HYPRE_Int *v,
|
||||
* in the longer array is faster.
|
||||
* */
|
||||
HYPRE_Int
|
||||
hypre_IntersectTwoArrays(HYPRE_Int *x,
|
||||
hypre_IntersectTwoArrays(HYPRE_Int *x,
|
||||
HYPRE_Real *x_data,
|
||||
HYPRE_Int x_length,
|
||||
HYPRE_Int *y,
|
||||
HYPRE_Int *y,
|
||||
HYPRE_Int y_length,
|
||||
HYPRE_Int *z,
|
||||
HYPRE_Int *z,
|
||||
HYPRE_Real *output_x_data,
|
||||
HYPRE_Int *intersect_length)
|
||||
{
|
||||
HYPRE_Int x_index = 0;
|
||||
HYPRE_Int y_index = 0;
|
||||
*intersect_length = 0;
|
||||
|
||||
/* Compute Intersection, looping over each array */
|
||||
while ( (x_index < x_length) && (y_index < y_length) )
|
||||
{
|
||||
if (x[x_index] > y[y_index])
|
||||
{
|
||||
y_index = y_index + 1;
|
||||
}
|
||||
else if (x[x_index] < y[y_index])
|
||||
{
|
||||
x_index = x_index + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
z[*intersect_length] = x[x_index];
|
||||
output_x_data[*intersect_length] = x_data[x_index];
|
||||
x_index = x_index + 1;
|
||||
y_index = y_index + 1;
|
||||
*intersect_length = *intersect_length + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
HYPRE_Int
|
||||
hypre_IntersectTwoBigArrays(HYPRE_BigInt *x,
|
||||
HYPRE_Real *x_data,
|
||||
HYPRE_Int x_length,
|
||||
HYPRE_BigInt *y,
|
||||
HYPRE_Int y_length,
|
||||
HYPRE_BigInt *z,
|
||||
HYPRE_Real *output_x_data,
|
||||
HYPRE_Int *intersect_length)
|
||||
{
|
||||
@ -347,7 +385,7 @@ hypre_BoomerAMG_MyCreateS(hypre_ParCSRMatrix *A,
|
||||
|
||||
#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for private(i,diag,row_scale,row_sum,jA) HYPRE_SMP_SCHEDULE
|
||||
#endif
|
||||
#endif
|
||||
for (i = 0; i < num_variables; i++)
|
||||
{
|
||||
diag = A_diag_data[A_diag_i[i]];
|
||||
@ -1083,7 +1121,7 @@ hypre_NonGalerkinSparsityPattern(hypre_ParCSRMatrix *R_IAP,
|
||||
*/
|
||||
/*#ifdef HYPRE_USING_OPENMP
|
||||
#pragma omp parallel for private(i,j,max_entry,max_entry_offd,global_col,global_row) HYPRE_SMP_SCHEDULE
|
||||
#endif*/
|
||||
#endif */
|
||||
for(i = 0; i < num_variables; i++)
|
||||
{
|
||||
global_row = i+first_col_diag_RAP;
|
||||
@ -1221,9 +1259,9 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
|
||||
|
||||
/* Lumping related variables */
|
||||
HYPRE_IJMatrix ijmatrix;
|
||||
HYPRE_Int * Pattern_offd_indices = NULL;
|
||||
HYPRE_Int * S_offd_indices = NULL;
|
||||
HYPRE_Int * offd_intersection = NULL;
|
||||
HYPRE_BigInt * Pattern_offd_indices = NULL;
|
||||
HYPRE_BigInt * S_offd_indices = NULL;
|
||||
HYPRE_BigInt * offd_intersection = NULL;
|
||||
HYPRE_Real * offd_intersection_data = NULL;
|
||||
HYPRE_Int * diag_intersection = NULL;
|
||||
HYPRE_Real * diag_intersection_data = NULL;
|
||||
@ -1686,7 +1724,7 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
|
||||
if(Pattern_offd_indices_allocated_len < Pattern_offd_indices_len)
|
||||
{
|
||||
hypre_TFree(Pattern_offd_indices, HYPRE_MEMORY_HOST);
|
||||
Pattern_offd_indices = hypre_CTAlloc(HYPRE_Int, Pattern_offd_indices_len, HYPRE_MEMORY_HOST);
|
||||
Pattern_offd_indices = hypre_CTAlloc(HYPRE_BigInt, Pattern_offd_indices_len, HYPRE_MEMORY_HOST);
|
||||
Pattern_offd_indices_allocated_len = Pattern_offd_indices_len;
|
||||
}
|
||||
/* Grab sub array from col_map, corresponding to the slice of Pattern_offd_j */
|
||||
@ -1741,7 +1779,7 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
|
||||
if(S_offd_indices_allocated_len < S_offd_indices_len)
|
||||
{
|
||||
hypre_TFree(S_offd_indices, HYPRE_MEMORY_HOST);
|
||||
S_offd_indices = hypre_CTAlloc(HYPRE_Int, S_offd_indices_len, HYPRE_MEMORY_HOST);
|
||||
S_offd_indices = hypre_CTAlloc(HYPRE_BigInt, S_offd_indices_len, HYPRE_MEMORY_HOST);
|
||||
S_offd_indices_allocated_len = S_offd_indices_len;
|
||||
}
|
||||
/* Grab sub array from col_map, corresponding to the slice of S_offd_j */
|
||||
@ -1757,13 +1795,13 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
|
||||
{
|
||||
hypre_TFree(offd_intersection, HYPRE_MEMORY_HOST);
|
||||
hypre_TFree(offd_intersection_data, HYPRE_MEMORY_HOST);
|
||||
offd_intersection = hypre_CTAlloc(HYPRE_Int, cnt, HYPRE_MEMORY_HOST);
|
||||
offd_intersection = hypre_CTAlloc(HYPRE_BigInt, cnt, HYPRE_MEMORY_HOST);
|
||||
offd_intersection_data = hypre_CTAlloc(HYPRE_Real, cnt, HYPRE_MEMORY_HOST);
|
||||
offd_intersection_allocated_len = cnt;
|
||||
}
|
||||
/* This intersection also tracks S_offd_data and assumes that
|
||||
* S_offd_indices is the first argument here */
|
||||
hypre_IntersectTwoArrays(S_offd_indices,
|
||||
hypre_IntersectTwoBigArrays(S_offd_indices,
|
||||
&(S_offd_data[ S_offd_i[col_indx_RAP] ]),
|
||||
S_offd_indices_len,
|
||||
Pattern_offd_indices,
|
||||
@ -2029,7 +2067,7 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
|
||||
if(S_offd_indices_allocated_len < S_offd_indices_len)
|
||||
{
|
||||
hypre_TFree(S_offd_indices, HYPRE_MEMORY_HOST);
|
||||
S_offd_indices = hypre_CTAlloc(HYPRE_Int, S_offd_indices_len, HYPRE_MEMORY_HOST);
|
||||
S_offd_indices = hypre_CTAlloc(HYPRE_BigInt, S_offd_indices_len, HYPRE_MEMORY_HOST);
|
||||
S_offd_indices_allocated_len = S_offd_indices_len;
|
||||
}
|
||||
/* Grab sub array from col_map, corresponding to the slice of S_ext_offd_j */
|
||||
@ -2045,11 +2083,11 @@ hypre_BoomerAMGBuildNonGalerkinCoarseOperator( hypre_ParCSRMatrix **RAP_ptr,
|
||||
{
|
||||
hypre_TFree(offd_intersection, HYPRE_MEMORY_HOST);
|
||||
hypre_TFree(offd_intersection_data, HYPRE_MEMORY_HOST);
|
||||
offd_intersection = hypre_CTAlloc(HYPRE_Int, cnt, HYPRE_MEMORY_HOST);
|
||||
offd_intersection = hypre_CTAlloc(HYPRE_BigInt, cnt, HYPRE_MEMORY_HOST);
|
||||
offd_intersection_data = hypre_CTAlloc(HYPRE_Real, cnt, HYPRE_MEMORY_HOST);
|
||||
offd_intersection_allocated_len = cnt;
|
||||
}
|
||||
hypre_IntersectTwoArrays(S_offd_indices,
|
||||
hypre_IntersectTwoBigArrays(S_offd_indices,
|
||||
&(S_ext_offd_data[ S_ext_offd_i[row_indx_Sext] ]),
|
||||
S_offd_indices_len,
|
||||
Pattern_offd_indices,
|
||||
|
||||
@ -89,6 +89,7 @@ hypre_CSRMatrixDestroy( hypre_CSRMatrix *matrix )
|
||||
hypre_TFree(hypre_CSRMatrixBigJ(matrix), HYPRE_MEMORY_SHARED);
|
||||
hypre_CSRMatrixData(matrix) = NULL;
|
||||
hypre_CSRMatrixJ(matrix) = NULL;
|
||||
hypre_CSRMatrixBigJ(matrix) = NULL;
|
||||
}
|
||||
hypre_TFree(matrix, HYPRE_MEMORY_HOST);
|
||||
matrix = NULL;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user