diff --git a/src/parcsr_ls/dsuperlu.c b/src/parcsr_ls/dsuperlu.c index 4f24ea09e..95864c9a4 100644 --- a/src/parcsr_ls/dsuperlu.c +++ b/src/parcsr_ls/dsuperlu.c @@ -40,16 +40,24 @@ hypre_SLUDistSetup(HYPRE_Solver *solver, { /* Par Data Structure variables */ HYPRE_BigInt global_num_rows = hypre_ParCSRMatrixGlobalNumRows(A); - MPI_Comm comm = hypre_ParCSRMatrixComm(A); + MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_local; - HYPRE_Int num_rows; - HYPRE_Int num_procs, my_id; - HYPRE_Int pcols = 1, prows = 1; - HYPRE_BigInt *big_rowptr = NULL; + + HYPRE_Int pcols = 1; + HYPRE_Int prows = 1; hypre_DSLUData *dslu_data = NULL; HYPRE_Int info = 0; HYPRE_Int nrhs = 0; + HYPRE_Int num_rows; + HYPRE_Int num_procs, my_id; + HYPRE_Int i; + + /* SuperLU_Dist variables. Note it uses "int_t" to denote integer types */ + int_t *slu_rowptr; + int_t *slu_colidx; + hypre_double *slu_data; + hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); @@ -61,50 +69,87 @@ hypre_SLUDistSetup(HYPRE_Solver *solver, /* Merge diag and offd into one matrix (global ids) */ A_local = hypre_MergeDiagAndOffd(A); -#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL) +#if defined(HYPRE_USING_GPU) if (hypre_GetActualMemLocation(hypre_CSRMatrixMemoryLocation(A_local)) != hypre_MEMORY_HOST) { hypre_CSRMatrixMigrate(A_local, HYPRE_MEMORY_HOST); } #endif - num_rows = hypre_CSRMatrixNumRows(A_local); - /* Now convert hypre matrix to a SuperMatrix */ -#ifdef HYPRE_MIXEDINT + + /* SuperLU uses int_t to denote its integer type. Hence, the conversion/checks below: */ + if (sizeof(int_t) != sizeof(HYPRE_Int)) { - HYPRE_Int *rowptr = NULL; - HYPRE_Int i; - rowptr = hypre_CSRMatrixI(A_local); - big_rowptr = hypre_CTAlloc(HYPRE_BigInt, (num_rows + 1), HYPRE_MEMORY_HOST); - for (i = 0; i < (num_rows + 1); i++) + slu_rowptr = hypre_CTAlloc(int_t, (num_rows + 1), hypre_CSRMatrixMemoryLocation(A_local)); + for (i = 0; i < num_rows + 1; i++) { - big_rowptr[i] = (HYPRE_BigInt)rowptr[i]; + slu_rowptr[i] = (int_t) hypre_CSRMatrixI(A_local)[i]; } } -#else - big_rowptr = hypre_CSRMatrixI(A_local); -#endif + else + { + slu_rowptr = (int_t*) hypre_CSRMatrixI(A_local); + } + if (sizeof(int_t) != sizeof(HYPRE_BigInt)) + { + slu_colidx = hypre_CTAlloc(int_t, hypre_CSRMatrixNumNonzeros(A_local), + hypre_CSRMatrixMemoryLocation(A_local)); + for (i = 0; i < hypre_CSRMatrixNumNonzeros(A_local); i++) + { + slu_colidx[i] = (int_t) hypre_CSRMatrixBigJ(A_local)[i]; + } + } + else + { + slu_colidx = (int_t*) hypre_CSRMatrixBigJ(A_local); + } + + /* SuperLU uses dbl to denote its floating point type. Hence, the conversion/checks below: */ + if (sizeof(hypre_double) != sizeof(HYPRE_Complex)) + { + slu_data = hypre_CTAlloc(hypre_double, hypre_CSRMatrixNumNonzeros(A_local), + hypre_CSRMatrixMemoryLocation(A_local)); + for (i = 0; i < hypre_CSRMatrixNumNonzeros(A_local); i++) + { + slu_data[i] = (hypre_double) hypre_CSRMatrixData(A_local)[i]; + } + } + else + { + slu_data = (hypre_double*) hypre_CSRMatrixData(A_local); + } + + /* Now convert hypre matrix to a SuperMatrix */ dCreate_CompRowLoc_Matrix_dist( - &(dslu_data->A_dslu), global_num_rows, global_num_rows, - hypre_CSRMatrixNumNonzeros(A_local), - num_rows, - hypre_ParCSRMatrixFirstRowIndex(A), - hypre_CSRMatrixData(A_local), - hypre_CSRMatrixBigJ(A_local), big_rowptr, + &(dslu_data->A_dslu), + (int_t) global_num_rows, + (int_t) global_num_rows, + (int_t) hypre_CSRMatrixNumNonzeros(A_local), + (int_t) num_rows, + (int_t) hypre_ParCSRMatrixFirstRowIndex(A), + slu_data, + slu_colidx, + slu_rowptr, SLU_NR_loc, SLU_D, SLU_GE); /* DOK: SuperLU frees assigned data, so set them to null before - * calling hypre_CSRMatrixdestroy on A_local to avoid memory errors. - */ -#ifndef HYPRE_MIXEDINT - hypre_CSRMatrixI(A_local) = NULL; -#endif - hypre_CSRMatrixData(A_local) = NULL; - hypre_CSRMatrixBigJ(A_local) = NULL; + calling hypre_CSRMatrixdestroy on A_local to avoid memory errors. */ + if ((void*) slu_rowptr == (void*) hypre_CSRMatrixI(A_local)) + { + hypre_CSRMatrixI(A_local) = NULL; + } + if ((void*) slu_colidx == (void*) hypre_CSRMatrixBigJ(A_local)) + { + hypre_CSRMatrixBigJ(A_local) = NULL; + } + if ((void*) slu_data == (void*) hypre_CSRMatrixData(A_local)) + { + hypre_CSRMatrixData(A_local) = NULL; + } hypre_CSRMatrixDestroy(A_local); - /*Create process grid */ + /* Create process grid */ while (prows * pcols <= num_procs) { ++prows; } --prows; pcols = num_procs / prows; @@ -159,10 +204,13 @@ hypre_SLUDistSolve(void *solver, hypre_ParVector *x_host = NULL; HYPRE_Int size = hypre_VectorSize(hypre_ParVectorLocalVector(x)); HYPRE_Int nrhs = 1; + HYPRE_Int i; + + hypre_double *slu_data; hypre_ParVectorCopy(b, x); -#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL) +#if defined(HYPRE_USING_GPU) if (hypre_GetActualMemLocation(hypre_ParVectorMemoryLocation(x)) != hypre_MEMORY_HOST) { x_host = hypre_ParVectorCloneDeep_v2(x, HYPRE_MEMORY_HOST); @@ -174,12 +222,40 @@ hypre_SLUDistSolve(void *solver, x_data = hypre_VectorData(hypre_ParVectorLocalVector(x)); } - pdgssvx(&(dslu_data->dslu_options), &(dslu_data->A_dslu), - &(dslu_data->dslu_ScalePermstruct), x_data, size, nrhs, - &(dslu_data->dslu_data_grid), &(dslu_data->dslu_data_LU), - &(dslu_data->dslu_solve), dslu_data->berr, &(dslu_data->dslu_data_stat), &info); + /* SuperLU uses sbl to denote its floating point type. Hence, the conversion/checks below: */ + if (sizeof(hypre_double) != sizeof(HYPRE_Complex)) + { + slu_data = hypre_CTAlloc(hypre_double, size, HYPRE_MEMORY_HOST); + for (i = 0; i < size; i++) + { + slu_data[i] = (hypre_double) x_data[i]; + } + } + else + { + slu_data = (hypre_double*) x_data; + } -#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) || defined(HYPRE_USING_SYCL) + pdgssvx(&(dslu_data->dslu_options), + &(dslu_data->A_dslu), + &(dslu_data->dslu_ScalePermstruct), + slu_data, + (int_t) size, + (int_t) nrhs, + &(dslu_data->dslu_data_grid), + &(dslu_data->dslu_data_LU), + &(dslu_data->dslu_solve), + dslu_data->berr, + &(dslu_data->dslu_data_stat), + &info); + + /* Free memory */ + if ((void*) slu_data != (void*) x_data) + { + hypre_TFree(slu_data, HYPRE_MEMORY_HOST); + } + +#if defined(HYPRE_USING_GPU) if (x_host) { hypre_ParVectorCopy(x_host, x); @@ -198,7 +274,9 @@ hypre_SLUDistDestroy(void* solver) PStatFree(&(dslu_data->dslu_data_stat)); Destroy_CompRowLoc_Matrix_dist(&(dslu_data->A_dslu)); dScalePermstructFree(&(dslu_data->dslu_ScalePermstruct)); - dDestroy_LU(dslu_data->global_num_rows, &(dslu_data->dslu_data_grid), &(dslu_data->dslu_data_LU)); + dDestroy_LU(dslu_data->global_num_rows, + &(dslu_data->dslu_data_grid), + &(dslu_data->dslu_data_LU)); dLUstructFree(&(dslu_data->dslu_data_LU)); if (dslu_data->dslu_options.SolveInitialized) { diff --git a/src/parcsr_ls/dsuperlu.h b/src/parcsr_ls/dsuperlu.h index add1d2e09..74781b391 100644 --- a/src/parcsr_ls/dsuperlu.h +++ b/src/parcsr_ls/dsuperlu.h @@ -9,20 +9,22 @@ #define hypre_DSLU_DATA_HEADER #include "superlu_ddefs.h" + /*-------------------------------------------------------------------------- * hypre_DSLUData *--------------------------------------------------------------------------*/ + typedef struct { - HYPRE_BigInt global_num_rows; - SuperMatrix A_dslu; - HYPRE_Real *berr; - dLUstruct_t dslu_data_LU; - SuperLUStat_t dslu_data_stat; - superlu_dist_options_t dslu_options; - gridinfo_t dslu_data_grid; - dScalePermstruct_t dslu_ScalePermstruct; - dSOLVEstruct_t dslu_solve; + HYPRE_BigInt global_num_rows; + SuperMatrix A_dslu; + hypre_double *berr; + dLUstruct_t dslu_data_LU; + SuperLUStat_t dslu_data_stat; + superlu_dist_options_t dslu_options; + gridinfo_t dslu_data_grid; + dScalePermstruct_t dslu_ScalePermstruct; + dSOLVEstruct_t dslu_solve; } hypre_DSLUData; diff --git a/src/parcsr_ls/par_mgr_stats.c b/src/parcsr_ls/par_mgr_stats.c index 7026fdbc5..805e52782 100644 --- a/src/parcsr_ls/par_mgr_stats.c +++ b/src/parcsr_ls/par_mgr_stats.c @@ -364,7 +364,7 @@ hypre_MGRSetupStats(void *mgr_vdata) } #ifdef HYPRE_USING_DSUPERLU else if ((HYPRE_PtrToParSolverFcn) hypre_ParMGRDataCoarseGridSolverSetup(mgr_data) == - hypre_MGRDirectSolverSetup) + (HYPRE_PtrToParSolverFcn) hypre_MGRDirectSolverSetup) { /* TODO (VPM): Set SuperLU solver specifics */ num_sublevels_amg[coarsest_mgr_level] = 0;