completely threaded MatvecT.
This commit is contained in:
parent
f3f03bc821
commit
8fb1efd755
@ -153,12 +153,14 @@ hypre_CSRMatrixMatvecT( double alpha,
|
||||
|
||||
double *x_data = hypre_VectorData(x);
|
||||
double *y_data = hypre_VectorData(y);
|
||||
double **y_tmp_array;
|
||||
double *y_tmp_data;
|
||||
int x_size = hypre_VectorSize(x);
|
||||
int y_size = hypre_VectorSize(y);
|
||||
|
||||
double temp;
|
||||
double temp, size, rest;
|
||||
|
||||
int i, j, jj;
|
||||
int i, i1, j, jj, ns, ne;
|
||||
|
||||
int ierr = 0;
|
||||
|
||||
@ -222,17 +224,61 @@ hypre_CSRMatrixMatvecT( double alpha,
|
||||
/*-----------------------------------------------------------------
|
||||
* y += A^T*x
|
||||
*-----------------------------------------------------------------*/
|
||||
|
||||
/* RDF: have to think about how to thread this loop */
|
||||
for (i = 0; i < num_rows; i++)
|
||||
if (hypre_NumThreads > 1)
|
||||
{
|
||||
for (jj = A_i[i]; jj < A_i[i+1]; jj++)
|
||||
y_tmp_array = hypre_CTAlloc(double *,hypre_NumThreads);
|
||||
for (i=0; i < hypre_NumThreads; i++)
|
||||
y_tmp_array[i] = hypre_CTAlloc(double,num_rows);
|
||||
|
||||
#define HYPRE_SMP_PRIVATE i, i1,jj,j,y_tmp_data,ns,ne,size,rest
|
||||
#include "../utilities/hypre_smp_forloop.h"
|
||||
for (i1 = 0; i1 < hypre_NumThreads; i1++)
|
||||
{
|
||||
j = A_j[jj];
|
||||
y_data[j] += A_data[jj] * x_data[i];
|
||||
size = num_rows/hypre_NumThreads;
|
||||
rest = num_rows - size*hypre_NumThreads;
|
||||
if (i1 < rest)
|
||||
{
|
||||
ns = i1*size+i1;
|
||||
ne = (i1+1)*size+i1+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ns = i1*size+rest;
|
||||
ne = (i1+1)*size+rest;
|
||||
}
|
||||
y_tmp_data = y_tmp_array[i1];
|
||||
for (i = ns; i < ne; i++)
|
||||
{
|
||||
for (jj = A_i[i]; jj < A_i[i+1]; jj++)
|
||||
{
|
||||
j = A_j[jj];
|
||||
y_tmp_data[j] += A_data[jj] * x_data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define HYPRE_SMP_PRIVATE i, j
|
||||
#include "../utilities/hypre_smp_forloop.h"
|
||||
for (i = 0; i < num_cols; i++)
|
||||
for (j = 0; j < hypre_NumThreads; j++)
|
||||
y_data[i] += y_tmp_array[j][i];
|
||||
|
||||
for (i=0; i < hypre_NumThreads; i++)
|
||||
hypre_TFree(y_tmp_array[i]);
|
||||
hypre_TFree(y_tmp_array);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < num_rows; i++)
|
||||
{
|
||||
for (jj = A_i[i]; jj < A_i[i+1]; jj++)
|
||||
{
|
||||
j = A_j[jj];
|
||||
y_data[j] += A_data[jj] * x_data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* y = alpha*y
|
||||
*-----------------------------------------------------------------*/
|
||||
|
||||
@ -153,12 +153,14 @@ hypre_CSRMatrixMatvecT( double alpha,
|
||||
|
||||
double *x_data = hypre_VectorData(x);
|
||||
double *y_data = hypre_VectorData(y);
|
||||
double **y_tmp_array;
|
||||
double *y_tmp_data;
|
||||
int x_size = hypre_VectorSize(x);
|
||||
int y_size = hypre_VectorSize(y);
|
||||
|
||||
double temp;
|
||||
double temp, size, rest;
|
||||
|
||||
int i, j, jj;
|
||||
int i, i1, j, jj, ns, ne;
|
||||
|
||||
int ierr = 0;
|
||||
|
||||
@ -222,17 +224,61 @@ hypre_CSRMatrixMatvecT( double alpha,
|
||||
/*-----------------------------------------------------------------
|
||||
* y += A^T*x
|
||||
*-----------------------------------------------------------------*/
|
||||
|
||||
/* RDF: have to think about how to thread this loop */
|
||||
for (i = 0; i < num_rows; i++)
|
||||
if (hypre_NumThreads > 1)
|
||||
{
|
||||
for (jj = A_i[i]; jj < A_i[i+1]; jj++)
|
||||
y_tmp_array = hypre_CTAlloc(double *,hypre_NumThreads);
|
||||
for (i=0; i < hypre_NumThreads; i++)
|
||||
y_tmp_array[i] = hypre_CTAlloc(double,num_rows);
|
||||
|
||||
#define HYPRE_SMP_PRIVATE i, i1,jj,j,y_tmp_data,ns,ne,size,rest
|
||||
#include "../utilities/hypre_smp_forloop.h"
|
||||
for (i1 = 0; i1 < hypre_NumThreads; i1++)
|
||||
{
|
||||
j = A_j[jj];
|
||||
y_data[j] += A_data[jj] * x_data[i];
|
||||
size = num_rows/hypre_NumThreads;
|
||||
rest = num_rows - size*hypre_NumThreads;
|
||||
if (i1 < rest)
|
||||
{
|
||||
ns = i1*size+i1;
|
||||
ne = (i1+1)*size+i1+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ns = i1*size+rest;
|
||||
ne = (i1+1)*size+rest;
|
||||
}
|
||||
y_tmp_data = y_tmp_array[i1];
|
||||
for (i = ns; i < ne; i++)
|
||||
{
|
||||
for (jj = A_i[i]; jj < A_i[i+1]; jj++)
|
||||
{
|
||||
j = A_j[jj];
|
||||
y_tmp_data[j] += A_data[jj] * x_data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define HYPRE_SMP_PRIVATE i, j
|
||||
#include "../utilities/hypre_smp_forloop.h"
|
||||
for (i = 0; i < num_cols; i++)
|
||||
for (j = 0; j < hypre_NumThreads; j++)
|
||||
y_data[i] += y_tmp_array[j][i];
|
||||
|
||||
for (i=0; i < hypre_NumThreads; i++)
|
||||
hypre_TFree(y_tmp_array[i]);
|
||||
hypre_TFree(y_tmp_array);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < num_rows; i++)
|
||||
{
|
||||
for (jj = A_i[i]; jj < A_i[i+1]; jj++)
|
||||
{
|
||||
j = A_j[jj];
|
||||
y_data[j] += A_data[jj] * x_data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* y = alpha*y
|
||||
*-----------------------------------------------------------------*/
|
||||
|
||||
Loading…
Reference in New Issue
Block a user