GPU support with single precision (#572)

This PR fixes the GPU support with single precision.
2022-03-04 12:05:32 -08:00 · 2022-03-04 12:05:32 -08:00 · 95e6433fc7
commit 95e6433fc7
parent ebd6eb88c3
15 changed files with 800 additions and 74 deletions
--- a/AUTOTEST/machine-lassen.sh
+++ b/AUTOTEST/machine-lassen.sh
@ -18,7 +18,7 @@ case $1 in
   where: -h|-help   prints this usage information and exits
          {src_dir}  is the hypre source directory

-   This script runs a number of tests suitable for the syrah cluster.
+   This script runs a number of tests suitable for the lassen cluster.

   Example usage: $0 ../src

@ -67,6 +67,12 @@ co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enabl
 ./test.sh basic.sh $src_dir -co: $co -mo: $mo
 ./renametest.sh basic $output_dir/basic-cuda-um-shared

+#CUDA with UM and single precision
+co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
+ro="-single -rt -mpibind -save ${save}"
+./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
+./renametest.sh basic $output_dir/basic-cuda-um-single
+
 # CUDA with UM without MPI [no run]
 #co="--with-cuda --enable-unified-memory --without-MPI --with-gpu-arch=\\'60 70\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
 #./test.sh basic.sh $src_dir -co: $co -mo: $mo
@ -89,7 +95,7 @@ ro="-bench -struct -rt -mpibind -save ${save}"

 # OMP 4.5 without UM in debug mode [struct]
 co="--with-device-openmp --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
-ro="-struct -rt -mpibind -save ${host}"
+ro="-struct -rt -mpibind -save ${save}"
 ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
 ./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct

@ -116,6 +122,7 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
 ################################
 ## CUDA 11 build (only) tests ##
 ################################
+
 co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
 module -q load cuda/11
 module list cuda/11 |& grep "None found"
--- a/AUTOTEST/machine-ray.sh
+++ b/AUTOTEST/machine-ray.sh
@ -18,7 +18,7 @@ case $1 in
   where: -h|-help   prints this usage information and exits
          {src_dir}  is the hypre source directory

-   This script runs a number of tests suitable for the syrah cluster.
+   This script runs a number of tests suitable for the ray cluster.

   Example usage: $0 ../src

@ -67,6 +67,12 @@ co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enabl
 ./test.sh basic.sh $src_dir -co: $co -mo: $mo
 ./renametest.sh basic $output_dir/basic-cuda-um-shared

+#CUDA with UM and single precision
+co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
+ro="-single -rt -mpibind -save ${save}"
+./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
+./renametest.sh basic $output_dir/basic-cuda-um-single
+
 # CUDA with UM without MPI [no run]
 #co="--with-cuda --enable-unified-memory --without-MPI --with-gpu-arch=\\'60 70\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
 #./test.sh basic.sh $src_dir -co: $co -mo: $mo
@ -89,13 +95,14 @@ ro="-bench -struct -rt -mpibind -save ${save}"

 # OMP 4.5 without UM in debug mode [struct]
 co="--with-device-openmp --enable-debug --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
-ro="-struct -rt -mpibind -save ${host}"
+ro="-struct -rt -mpibind -save ${save}"
 ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
 ./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct

-#################################
-# CUDA + CMake build (only) tests
-#################################
+#####################################
+## CUDA + CMake build (only) tests ##
+#####################################
+
 mo="-j"
 # CUDA with UM + CMake
 co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
@ -112,9 +119,10 @@ co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_
 ./test.sh cmake.sh $src_dir -co: $co -mo: $mo
 ./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct

-############################
-# CUDA 11 build (only) tests
-############################
+################################
+## CUDA 11 build (only) tests ##
+################################
+
 co="--with-cuda --enable-unified-memory --with-gpu-arch=\\'60 70\\' --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CUFLAGS=\\'--Wno-deprecated-declarations\\'"
 module -q load cuda/11
 module list cuda/11 |& grep "None found"
--- a/AUTOTEST/machine-redwood.sh
+++ b/AUTOTEST/machine-redwood.sh
@ -11,14 +11,14 @@ case $1 in
   -h|-help)
      cat <<EOF

-   **** Only run this script on the lassen cluster ****
+   **** Only run this script on the redwood cluster ****

   $0 [-h|-help] {src_dir}

   where: -h|-help   prints this usage information and exits
          {src_dir}  is the hypre source directory

-   This script runs a number of tests suitable for the syrah cluster.
+   This script runs a number of tests suitable for the redwood cluster.

   Example usage: $0 ../src

@ -55,6 +55,11 @@ ro="-bench -struct -rt -save ${save} -D MV2_USE_CUDA=1"
 ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
 ./renametest.sh basic $output_dir/basic-hip-nonum

+#HIP with UM and single precision [no run]
+co="--with-hip --enable-unified-memory --enable-single --enable-debug --with-MPI-include=/opt/cray/pe/cray-mvapich2_nogpu/2.3.5/infiniband/cray/10.0/include --with-MPI-lib-dirs=/opt/cray/pe/cray-mvapich2_nogpu/2.3.5/infiniband/cray/10.0/lib --with-MPI-libs=mpi --with-gpu-arch=\\'gfx906,gfx908\\'"
+./test.sh basic.sh $src_dir -co: $co -mo: $mo
+./renametest.sh basic $output_dir/basic-hip-um-single
+
 # Echo to stderr all nonempty error files in $output_dir
 for errfile in $( find $output_dir ! -size 0 -name "*.err" )
 do
--- a/src/distributed_ls/Euclid/SubdomainGraph_dh.c
+++ b/src/distributed_ls/Euclid/SubdomainGraph_dh.c
@ -164,8 +164,8 @@ hypre_fprintf(stderr, "blocks= %i\n", blocks);
 void SubdomainGraph_dhPrintStatsLong(SubdomainGraph_dh s, FILE *fp)
 {
  START_FUNC_DH
-    HYPRE_Int i, j, k; 
-    HYPRE_Real max = 0, min = INT_MAX;
+    HYPRE_Int i, j, k;
+    HYPRE_Real max = 0, min = (HYPRE_Real) INT_MAX;

    hypre_fprintf(fp, "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
    hypre_fprintf(fp, "colors used     = %i\n", s->colors);
@ -243,7 +243,7 @@ void SubdomainGraph_dhPrintStatsLong(SubdomainGraph_dh s, FILE *fp)

  } else {
    /*-----------------------------------------
-     * local n2o_row permutation 
+     * local n2o_row permutation
     *-----------------------------------------*/
    hypre_fprintf(fp, "\nlocal n2o_row permutation:\n");
    hypre_fprintf(fp, "--------------------------\n");
@ -1427,7 +1427,7 @@ void SubdomainGraph_dhDump(SubdomainGraph_dh s, char *filename)
      hypre_fprintf(fp, "%i ", s->bdry_count[i]);
    }
    hypre_fprintf(fp, "\n");
-     
+
  }

  /* write subdomain graph */
@ -1567,7 +1567,7 @@ void find_bdry_nodes_seq_private(SubdomainGraph_dh s, HYPRE_Int m, void* A)
    tmp = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR;
    for (i=0; i<m; ++i) tmp[i] = 0;

-    /*------------------------------------------ 
+    /*------------------------------------------
     * mark all boundary nodes
     *------------------------------------------ */
    for (i=0; i<blocks; ++i) {
--- a/src/parcsr_ls/par_lr_interp_device.c
+++ b/src/parcsr_ls/par_lr_interp_device.c
@ -1013,7 +1013,7 @@ hypreDevice_extendWtoP( HYPRE_Int      P_nr_of_rows,
                      PC_i,
                      P_diag_j );

-   hypreDevice_ScatterConstant(P_diag_data, W_nr_of_cols, PC_i, 1.0);
+   hypreDevice_ScatterConstant(P_diag_data, W_nr_of_cols, PC_i, (HYPRE_Complex) 1.0);

   hypre_TFree(PC_i, HYPRE_MEMORY_DEVICE);
 }
--- a/src/seq_mv/csr_matop_device.c
+++ b/src/seq_mv/csr_matop_device.c
@ -1725,13 +1725,13 @@ hypre_CSRMatrixSortRow(hypre_CSRMatrix *A)
 * @param[in,out] *d_a_sorted On Start: Unsorted values. On Return: Sorted values corresponding with column indices
 */
 void
-hypre_SortCSRCusparse(       HYPRE_Int      n,
-                             HYPRE_Int      m,
-                             HYPRE_Int      nnzA,
-                             cusparseMatDescr_t descrA,
-                             const HYPRE_Int     *d_ia,
-                             HYPRE_Int     *d_ja_sorted,
-                             HYPRE_Complex *d_a_sorted )
+hypre_SortCSRCusparse( HYPRE_Int           n,
+                       HYPRE_Int           m,
+                       HYPRE_Int           nnzA,
+                       cusparseMatDescr_t  descrA,
+                       const HYPRE_Int     *d_ia,
+                       HYPRE_Int           *d_ja_sorted,
+                       HYPRE_Complex       *d_a_sorted )
 {
   cusparseHandle_t cusparsehandle = hypre_HandleCusparseHandle(hypre_handle());

@ -1741,10 +1741,8 @@ hypre_SortCSRCusparse(       HYPRE_Int      n,
   csru2csrInfo_t sortInfoA;
   HYPRE_CUSPARSE_CALL( cusparseCreateCsru2csrInfo(&sortInfoA) );

-   HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
-   HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
-
-   if (isDoublePrecision)
+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   {
      HYPRE_CUSPARSE_CALL( cusparseDcsru2csr_bufferSizeExt(cusparsehandle,
                                                           n, m, nnzA, d_a_sorted, d_ia, d_ja_sorted,
@ -1756,18 +1754,20 @@ hypre_SortCSRCusparse(       HYPRE_Int      n,
                                             n, m, nnzA, descrA, d_a_sorted, d_ia, d_ja_sorted,
                                             sortInfoA, pBuffer) );
   }
-   else if (isSinglePrecision)
+#elif defined(HYPRE_SINGLE)
   {
      HYPRE_CUSPARSE_CALL( cusparseScsru2csr_bufferSizeExt(cusparsehandle,
-                                                           n, m, nnzA, (float *) d_a_sorted, d_ia, d_ja_sorted,
+                                                           n, m, nnzA, d_a_sorted, d_ia, d_ja_sorted,
                                                           sortInfoA, &pBufferSizeInBytes));

      pBuffer = hypre_TAlloc(char, pBufferSizeInBytes, HYPRE_MEMORY_DEVICE);

      HYPRE_CUSPARSE_CALL( cusparseScsru2csr(cusparsehandle,
-                                             n, m, nnzA, descrA, (float *)d_a_sorted, d_ia, d_ja_sorted,
+                                             n, m, nnzA, descrA, d_a_sorted, d_ia, d_ja_sorted,
                                             sortInfoA, pBuffer) );
   }
+#endif
+#endif

   hypre_TFree(pBuffer, HYPRE_MEMORY_DEVICE);
   HYPRE_CUSPARSE_CALL(cusparseDestroyCsru2csrInfo(sortInfoA));
@ -1861,8 +1861,15 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char             uplo,
      {
         HYPRE_CUSPARSE_CALL( cusparseCreateCsrsv2Info(&hypre_CsrsvDataInfoL(csrsv_data)) );

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
         HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                                                         nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
+#elif defined(HYPRE_SINGLE)
+         HYPRE_CUSPARSE_CALL( cusparseScsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                         nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
+#endif
+#endif

         if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
         {
@ -1873,10 +1880,19 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char             uplo,
            hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
         }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
         HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                                                       nrow, nnzA, descr, A_sa, A_i, A_sj,
                                                       hypre_CsrsvDataInfoL(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
                                                       hypre_CsrsvDataBuffer(csrsv_data)) );
+#elif defined(HYPRE_SINGLE)
+         HYPRE_CUSPARSE_CALL( cusparseScsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                       nrow, nnzA, descr, A_sa, A_i, A_sj,
+                                                       hypre_CsrsvDataInfoL(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                       hypre_CsrsvDataBuffer(csrsv_data)) );
+#endif
+#endif

         cusparseStatus_t status = cusparseXcsrsv2_zeroPivot(handle, hypre_CsrsvDataInfoL(csrsv_data),
                                                             &structural_zero);
@ -1890,11 +1906,21 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char             uplo,
         }
      }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
      HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                                                 nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
                                                 hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
                                                 CUSPARSE_SOLVE_POLICY_USE_LEVEL,
                                                 hypre_CsrsvDataBuffer(csrsv_data)) );
+#elif defined(HYPRE_SINGLE)
+      HYPRE_CUSPARSE_CALL( cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                 nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
+                                                 hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
+                                                 CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                 hypre_CsrsvDataBuffer(csrsv_data)) );
+#endif
+#endif
   }
   else
   {
@ -1904,8 +1930,15 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char             uplo,
      {
         HYPRE_CUSPARSE_CALL( cusparseCreateCsrsv2Info(&hypre_CsrsvDataInfoU(csrsv_data)) );

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
         HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                                                         nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
+#elif defined(HYPRE_SINGLE)
+         HYPRE_CUSPARSE_CALL( cusparseScsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                         nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
+#endif
+#endif

         if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
         {
@ -1916,10 +1949,19 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char             uplo,
            hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
         }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
         HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                                                       nrow, nnzA, descr, A_sa, A_i, A_sj,
                                                       hypre_CsrsvDataInfoU(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
                                                       hypre_CsrsvDataBuffer(csrsv_data)) );
+#elif defined(HYPRE_SINGLE)
+         HYPRE_CUSPARSE_CALL( cusparseScsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                       nrow, nnzA, descr, A_sa, A_i, A_sj,
+                                                       hypre_CsrsvDataInfoU(csrsv_data), CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                       hypre_CsrsvDataBuffer(csrsv_data)) );
+#endif
+#endif

         cusparseStatus_t status = cusparseXcsrsv2_zeroPivot(handle, hypre_CsrsvDataInfoU(csrsv_data),
                                                             &structural_zero);
@ -1929,15 +1971,24 @@ hypre_CSRMatrixTriLowerUpperSolveCusparse(char             uplo,
            hypre_sprintf(msg, "hypre_CSRMatrixTriLowerUpperSolveCusparse A(%d,%d) is missing\n",
                          structural_zero, structural_zero);
            hypre_error_w_msg(1, msg);
-            //hypre_assert(0);
         }
      }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
      HYPRE_CUSPARSE_CALL( cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                                                 nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
                                                 hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
                                                 CUSPARSE_SOLVE_POLICY_USE_LEVEL,
                                                 hypre_CsrsvDataBuffer(csrsv_data)) );
+#elif defined(HYPRE_SINGLE)
+      HYPRE_CUSPARSE_CALL( cusparseScsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                                 nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
+                                                 hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
+                                                 CUSPARSE_SOLVE_POLICY_USE_LEVEL,
+                                                 hypre_CsrsvDataBuffer(csrsv_data)) );
+#endif
+#endif
   }

   return hypre_error_flag;
@ -2035,8 +2086,15 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char              uplo,
      {
         HYPRE_ROCSPARSE_CALL( rocsparse_create_mat_info(&hypre_CsrsvDataInfoL(csrsv_data)) );

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
         HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_buffer_size(handle, rocsparse_operation_none,
                                                            nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
+#elif defined(HYPRE_SINGLE)
+         HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_buffer_size(handle, rocsparse_operation_none,
+                                                            nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoL(csrsv_data), &buffer_size) );
+#endif
+#endif

         if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
         {
@ -2047,10 +2105,19 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char              uplo,
            hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
         }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
         HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_analysis(handle, rocsparse_operation_none,
                                                         nrow, nnzA, descr, A_sa, A_i, A_sj,
                                                         hypre_CsrsvDataInfoL(csrsv_data), rocsparse_analysis_policy_reuse,
                                                         rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
+#elif defined(HYPRE_SINGLE)
+         HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_analysis(handle, rocsparse_operation_none,
+                                                         nrow, nnzA, descr, A_sa, A_i, A_sj,
+                                                         hypre_CsrsvDataInfoL(csrsv_data), rocsparse_analysis_policy_reuse,
+                                                         rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
+#endif
+#endif

         rocsparse_status status = rocsparse_csrsv_zero_pivot(handle, descr,
                                                              hypre_CsrsvDataInfoL(csrsv_data), &structural_zero);
@ -2064,11 +2131,21 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char              uplo,
         }
      }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
      HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_solve(handle, rocsparse_operation_none,
                                                   nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
                                                   hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
                                                   rocsparse_solve_policy_auto,
                                                   hypre_CsrsvDataBuffer(csrsv_data)) );
+#elif defined(HYPRE_SINGLE)
+      HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_solve(handle, rocsparse_operation_none,
+                                                   nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
+                                                   hypre_CsrsvDataInfoL(csrsv_data), f_data, u_data,
+                                                   rocsparse_solve_policy_auto,
+                                                   hypre_CsrsvDataBuffer(csrsv_data)) );
+#endif
+#endif
   }
   else
   {
@ -2078,8 +2155,15 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char              uplo,
      {
         HYPRE_ROCSPARSE_CALL( rocsparse_create_mat_info(&hypre_CsrsvDataInfoU(csrsv_data)) );

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
         HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_buffer_size(handle, rocsparse_operation_none,
                                                            nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
+#elif defined(HYPRE_SINGLE)
+         HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_buffer_size(handle, rocsparse_operation_none,
+                                                            nrow, nnzA, descr, A_sa, A_i, A_sj, hypre_CsrsvDataInfoU(csrsv_data), &buffer_size) );
+#endif
+#endif

         if (hypre_CsrsvDataBufferSize(csrsv_data) < buffer_size)
         {
@ -2090,10 +2174,19 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char              uplo,
            hypre_CsrsvDataBufferSize(csrsv_data) = buffer_size;
         }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
         HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_analysis(handle, rocsparse_operation_none,
                                                         nrow, nnzA, descr, A_sa, A_i, A_sj,
                                                         hypre_CsrsvDataInfoU(csrsv_data), rocsparse_analysis_policy_reuse,
                                                         rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
+#elif defined(HYPRE_SINGLE)
+         HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_analysis(handle, rocsparse_operation_none,
+                                                         nrow, nnzA, descr, A_sa, A_i, A_sj,
+                                                         hypre_CsrsvDataInfoU(csrsv_data), rocsparse_analysis_policy_reuse,
+                                                         rocsparse_solve_policy_auto, hypre_CsrsvDataBuffer(csrsv_data)) );
+#endif
+#endif

         rocsparse_status status = rocsparse_csrsv_zero_pivot(handle, descr,
                                                              hypre_CsrsvDataInfoU(csrsv_data), &structural_zero);
@ -2107,11 +2200,21 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char              uplo,
         }
      }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
      HYPRE_ROCSPARSE_CALL( rocsparse_dcsrsv_solve(handle, rocsparse_operation_none,
                                                   nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
                                                   hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
                                                   rocsparse_solve_policy_auto,
                                                   hypre_CsrsvDataBuffer(csrsv_data)) );
+#elif defined(HYPRE_SINGLE)
+      HYPRE_ROCSPARSE_CALL( rocsparse_scsrsv_solve(handle, rocsparse_operation_none,
+                                                   nrow, nnzA, &alpha, descr, A_sa, A_i, A_sj,
+                                                   hypre_CsrsvDataInfoU(csrsv_data), f_data, u_data,
+                                                   rocsparse_solve_policy_auto,
+                                                   hypre_CsrsvDataBuffer(csrsv_data)) );
+#endif
+#endif
   }

   return hypre_error_flag;
@ -2126,13 +2229,13 @@ hypre_CSRMatrixTriLowerUpperSolveRocsparse(char              uplo,
 * @param[in,out] *d_a_sorted On Start: Unsorted values. On Return: Sorted values corresponding with column indices
 */
 void
-hypre_SortCSRRocsparse(       HYPRE_Int      n,
-                              HYPRE_Int      m,
-                              HYPRE_Int      nnzA,
-                              rocsparse_mat_descr descrA,
-                              const HYPRE_Int     *d_ia,
-                              HYPRE_Int     *d_ja_sorted,
-                              HYPRE_Complex *d_a_sorted )
+hypre_SortCSRRocsparse( HYPRE_Int            n,
+                        HYPRE_Int            m,
+                        HYPRE_Int            nnzA,
+                        rocsparse_mat_descr  descrA,
+                        const HYPRE_Int     *d_ia,
+                        HYPRE_Int           *d_ja_sorted,
+                        HYPRE_Complex       *d_a_sorted )
 {
   rocsparse_handle handle = hypre_HandleCusparseHandle(hypre_handle());

@ -2140,9 +2243,6 @@ hypre_SortCSRRocsparse(       HYPRE_Int      n,
   void *pBuffer = NULL;
   HYPRE_Int *P = NULL;

-   HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
-   HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
-
   // FIXME: There is not in-place version of csr sort in rocSPARSE currently, so we make
   //        a temporary copy of the data for gthr, sort that, and then copy the sorted values
   //        back to the array being returned. Where there is an in-place version available,
@ -2160,16 +2260,19 @@ hypre_SortCSRRocsparse(       HYPRE_Int      n,
   HYPRE_ROCSPARSE_CALL( rocsparse_csrsort(handle, n, m, nnzA, descrA, d_ia, d_ja_sorted, P,
                                           pBuffer) );

-   if (isDoublePrecision)
+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   {
      HYPRE_ROCSPARSE_CALL( rocsparse_dgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
                                            rocsparse_index_base_zero) );
   }
-   else if (isSinglePrecision)
+#elif defined(HYPRE_SINGLE)
   {
-      HYPRE_ROCSPARSE_CALL( rocsparse_sgthr(handle, nnzA, (float *) d_a_sorted, (float *) d_a_tmp, P,
+      HYPRE_ROCSPARSE_CALL( rocsparse_sgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
                                            rocsparse_index_base_zero) );
   }
+#endif
+#endif

   hypre_TFree(pBuffer, HYPRE_MEMORY_DEVICE);
   hypre_TFree(P, HYPRE_MEMORY_DEVICE);
@ -2183,7 +2286,8 @@ hypre_SortCSRRocsparse(       HYPRE_Int      n,
 void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix)
 {
 #if defined(HYPRE_USING_ROCSPARSE)
-
+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   HYPRE_ROCSPARSE_CALL( rocsparse_dcsrmv_analysis(hypre_HandleCusparseHandle(hypre_handle()),
                                                   rocsparse_operation_none,
                                                   hypre_CSRMatrixNumRows(matrix),
@ -2194,6 +2298,19 @@ void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix)
                                                   hypre_CSRMatrixI(matrix),
                                                   hypre_CSRMatrixJ(matrix),
                                                   hypre_CSRMatrixGPUMatInfo(matrix)) );
-
+#elif defined(HYPRE_SINGLE)
+   HYPRE_ROCSPARSE_CALL( rocsparse_scsrmv_analysis(hypre_HandleCusparseHandle(hypre_handle()),
+                                                   rocsparse_operation_none,
+                                                   hypre_CSRMatrixNumRows(matrix),
+                                                   hypre_CSRMatrixNumCols(matrix),
+                                                   hypre_CSRMatrixNumNonzeros(matrix),
+                                                   hypre_CSRMatrixGPUMatDescr(matrix),
+                                                   hypre_CSRMatrixData(matrix),
+                                                   hypre_CSRMatrixI(matrix),
+                                                   hypre_CSRMatrixJ(matrix),
+                                                   hypre_CSRMatrixGPUMatInfo(matrix)) );
+#endif
+#endif
 #endif // #if defined(HYPRE_USING_ROCSPARSE)
 }
+
--- a/src/seq_mv/csr_matvec_device.c
+++ b/src/seq_mv/csr_matvec_device.c
@ -241,6 +241,8 @@ hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int        trans,
      B = A;
   }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   HYPRE_CUSPARSE_CALL( cusparseDcsrmv(handle,
                                       CUSPARSE_OPERATION_NON_TRANSPOSE,
                                       hypre_CSRMatrixNumRows(B) - offset,
@ -254,7 +256,22 @@ hypre_CSRMatrixMatvecCusparseOldAPI( HYPRE_Int        trans,
                                       hypre_VectorData(x),
                                       &beta,
                                       hypre_VectorData(y) + offset) );
-
+#elif defined(HYPRE_SINGLE)
+   HYPRE_CUSPARSE_CALL( cusparseScsrmv(handle,
+                                       CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                       hypre_CSRMatrixNumRows(B) - offset,
+                                       hypre_CSRMatrixNumCols(B),
+                                       hypre_CSRMatrixNumNonzeros(B),
+                                       &alpha,
+                                       descr,
+                                       hypre_CSRMatrixData(B),
+                                       hypre_CSRMatrixI(B) + offset,
+                                       hypre_CSRMatrixJ(B),
+                                       hypre_VectorData(x),
+                                       &beta,
+                                       hypre_VectorData(y) + offset) );
+#endif
+#endif

   if (trans)
   {
@ -292,6 +309,8 @@ hypre_CSRMatrixMatvecRocsparse( HYPRE_Int        trans,
      B = A;
   }

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   HYPRE_ROCSPARSE_CALL( rocsparse_dcsrmv(handle,
                                          rocsparse_operation_none,
                                          hypre_CSRMatrixNumRows(B) - offset,
@ -306,6 +325,23 @@ hypre_CSRMatrixMatvecRocsparse( HYPRE_Int        trans,
                                          hypre_VectorData(x),
                                          &beta,
                                          hypre_VectorData(y) + offset) );
+#elif defined(HYPRE_SINGLE)
+   HYPRE_ROCSPARSE_CALL( rocsparse_scsrmv(handle,
+                                          rocsparse_operation_none,
+                                          hypre_CSRMatrixNumRows(B) - offset,
+                                          hypre_CSRMatrixNumCols(B),
+                                          hypre_CSRMatrixNumNonzeros(B),
+                                          &alpha,
+                                          descr,
+                                          hypre_CSRMatrixData(B),
+                                          hypre_CSRMatrixI(B) + offset,
+                                          hypre_CSRMatrixJ(B),
+                                          info,
+                                          hypre_VectorData(x),
+                                          &beta,
+                                          hypre_VectorData(y) + offset) );
+#endif
+#endif

   if (trans)
   {
--- a/src/seq_mv/csr_spgemm_device_cusparse.c
+++ b/src/seq_mv/csr_spgemm_device_cusparse.c
@ -208,11 +208,6 @@ hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int          m,
   cusparseOperation_t transA = CUSPARSE_OPERATION_NON_TRANSPOSE;
   cusparseOperation_t transB = CUSPARSE_OPERATION_NON_TRANSPOSE;

-   HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
-   HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
-
-   hypre_assert(isDoublePrecision || isSinglePrecision);
-
   /* Copy the unsorted over as the initial "sorted" */
   hypre_TMemcpy(d_ja_sorted, d_ja, HYPRE_Int,     nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
   hypre_TMemcpy(d_a_sorted,  d_a,  HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
@ -250,20 +245,23 @@ hypreDevice_CSRSpGemmCusparseOldAPI(HYPRE_Int          m,
   d_jc = hypre_TAlloc(HYPRE_Int,     nnzC, HYPRE_MEMORY_DEVICE);
   d_c  = hypre_TAlloc(HYPRE_Complex, nnzC, HYPRE_MEMORY_DEVICE);

-   if (isDoublePrecision)
+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   {
      HYPRE_CUSPARSE_CALL( cusparseDcsrgemm(cusparsehandle, transA, transB, m, n, k,
                                            descr_A, nnzA, d_a_sorted, d_ia, d_ja_sorted,
                                            descr_B, nnzB, d_b_sorted, d_ib, d_jb_sorted,
                                            descr_C,       d_c, d_ic, d_jc) );
   }
-   else if (isSinglePrecision)
+#elif defined(HYPRE_SINGLE)
   {
      HYPRE_CUSPARSE_CALL( cusparseScsrgemm(cusparsehandle, transA, transB, m, n, k,
-                                            descr_A, nnzA, (float *) d_a_sorted, d_ia, d_ja_sorted,
-                                            descr_B, nnzB, (float *) d_b_sorted, d_ib, d_jb_sorted,
-                                            descr_C,       (float *) d_c, d_ic, d_jc) );
+                                            descr_A, nnzA, d_a_sorted, d_ia, d_ja_sorted,
+                                            descr_B, nnzB, d_b_sorted, d_ib, d_jb_sorted,
+                                            descr_C, d_c, d_ic, d_jc) );
   }
+#endif
+#endif

   *d_ic_out = d_ic;
   *d_jc_out = d_jc;
--- a/src/seq_mv/csr_spgemm_device_rocsparse.c
+++ b/src/seq_mv/csr_spgemm_device_rocsparse.c
@ -46,11 +46,6 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int           m,
   rocsparse_operation transA = rocsparse_operation_none;
   rocsparse_operation transB = rocsparse_operation_none;

-   HYPRE_Int isDoublePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double);
-   HYPRE_Int isSinglePrecision = sizeof(HYPRE_Complex) == sizeof(hypre_double) / 2;
-
-   hypre_assert(isDoublePrecision || isSinglePrecision);
-
   /* Copy the unsorted over as the initial "sorted" */
   hypre_TMemcpy(d_ja_sorted, d_ja, HYPRE_Int,     nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
   hypre_TMemcpy(d_a_sorted,  d_a,  HYPRE_Complex, nnzA, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE);
@ -82,7 +77,8 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int           m,
   size_t rs_buffer_size = 0;
   void *rs_buffer;

-   if (isDoublePrecision)
+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   {
      HYPRE_ROCSPARSE_CALL( rocsparse_dcsrgemm_buffer_size(handle,
                                                           transA, transB,
@ -94,17 +90,19 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int           m,
                                                           NULL,   0,    NULL, NULL, // D is nothing
                                                           infoC, &rs_buffer_size) );
   }
-   else if (isSinglePrecision)
+#elif defined(HYPRE_SINGLE)
   {
      HYPRE_ROCSPARSE_CALL( rocsparse_scsrgemm_buffer_size(handle, transA, transB,
                                                           m, n, k,
-                                                           (float *) &alpha, // \alpha = 1
+                                                           &alpha, // \alpha = 1
                                                           descrA, nnzA, d_ia, d_ja_sorted,
                                                           descrB, nnzB, d_ib, d_jb_sorted,
                                                           NULL, // \beta = 0
                                                           NULL,   0,    NULL, NULL,
                                                           infoC, &rs_buffer_size) );
   }
+#endif
+#endif

   rs_buffer = hypre_TAlloc(char, rs_buffer_size, HYPRE_MEMORY_DEVICE);

@ -133,7 +131,8 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int           m,
   d_jc = hypre_TAlloc(HYPRE_Int,     nnzC, HYPRE_MEMORY_DEVICE);
   d_c  = hypre_TAlloc(HYPRE_Complex, nnzC, HYPRE_MEMORY_DEVICE);

-   if (isDoublePrecision)
+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   {
      HYPRE_ROCSPARSE_CALL( rocsparse_dcsrgemm(handle, transA, transB,
                                               m, n, k,
@ -145,18 +144,20 @@ hypreDevice_CSRSpGemmRocsparse(HYPRE_Int           m,
                                               descrC,       d_c, d_ic, d_jc,
                                               infoC, rs_buffer) );
   }
-   else if (isSinglePrecision)
+#elif defined(HYPRE_SINGLE)
   {
      HYPRE_ROCSPARSE_CALL( rocsparse_scsrgemm(handle, transA, transB,
                                               m, n, k,
-                                               (float *) &alpha, // alpha = 1
-                                               descrA, nnzA, (float *) d_a_sorted, d_ia, d_ja_sorted,
-                                               descrB, nnzB, (float *) d_b_sorted, d_ib, d_jb_sorted,
+                                               &alpha, // alpha = 1
+                                               descrA, nnzA, d_a_sorted, d_ia, d_ja_sorted,
+                                               descrB, nnzB, d_b_sorted, d_ib, d_jb_sorted,
                                               NULL, // beta = 0
                                               NULL,   0,    NULL,       NULL, NULL, // D is nothing
-                                               descrC,       (float *) d_c, d_ic, d_jc,
+                                               descrC,       d_c, d_ic, d_jc,
                                               infoC, rs_buffer) );
   }
+#endif
+#endif

   // Free up the memory needed by rocsparse
   hypre_TFree(rs_buffer, HYPRE_MEMORY_DEVICE);
--- a/src/seq_mv/csr_sptrans_device.c
+++ b/src/seq_mv/csr_sptrans_device.c
@ -64,13 +64,24 @@ hypreDevice_CSRSpTransCusparse(HYPRE_Int   m,        HYPRE_Int   n,        HYPRE

   hypre_TFree(dBuffer, HYPRE_MEMORY_DEVICE);
 #else
+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   HYPRE_CUSPARSE_CALL( cusparseDcsr2csc(handle,
                                         m, n, nnzA,
                                         d_aa, d_ia, d_ja,
                                         csc_a, csc_j, csc_i,
                                         action,
                                         CUSPARSE_INDEX_BASE_ZERO) );
+#elif defined(HYPRE_SINGLE)
+   HYPRE_CUSPARSE_CALL( cusparseScsr2csc(handle,
+                                         m, n, nnzA,
+                                         d_aa, d_ia, d_ja,
+                                         csc_a, csc_j, csc_i,
+                                         action,
+                                         CUSPARSE_INDEX_BASE_ZERO) );
 #endif
+#endif /* #if !defined(HYPRE_COMPLEX) */
+#endif /* #if CUSPARSE_VERSION >= CUSPARSE_NEWAPI_VERSION */

   *d_ic_out = csc_i;
   *d_jc_out = csc_j;
@ -124,6 +135,8 @@ hypreDevice_CSRSpTransRocsparse(HYPRE_Int   m,        HYPRE_Int   n,        HYPR
   void * buffer;
   buffer = hypre_TAlloc(char, buffer_size, HYPRE_MEMORY_DEVICE);

+#if !defined(HYPRE_COMPLEX)
+#if !defined(HYPRE_SINGLE) && !defined(HYPRE_LONG_DOUBLE)
   HYPRE_ROCSPARSE_CALL( rocsparse_dcsr2csc(handle,
                                            m, n, nnzA,
                                            d_aa, d_ia, d_ja,
@ -131,6 +144,17 @@ hypreDevice_CSRSpTransRocsparse(HYPRE_Int   m,        HYPRE_Int   n,        HYPR
                                            action,
                                            rocsparse_index_base_zero,
                                            buffer) );
+#elif defined(HYPRE_SINGLE)
+   HYPRE_ROCSPARSE_CALL( rocsparse_scsr2csc(handle,
+                                            m, n, nnzA,
+                                            d_aa, d_ia, d_ja,
+                                            csc_a, csc_j, csc_i,
+                                            action,
+                                            rocsparse_index_base_zero,
+                                            buffer) );
+#endif
+#endif /* #if !defined(HYPRE_COMPLEX) */
+
   hypre_TFree(buffer, HYPRE_MEMORY_DEVICE);

   *d_ic_out = csc_i;
--- a/src/test/TEST_single/solvers_ij.saved.lassen
+++ b/src/test/TEST_single/solvers_ij.saved.lassen
@ -0,0 +1,145 @@
+# Output file: solvers_ij.out.0
+Iterations = 6
+Final Relative Residual Norm = 3.800163e-05
+
+# Output file: solvers_ij.out.1
+Iterations = 26
+Final Relative Residual Norm = 7.193501e-05
+
+# Output file: solvers_ij.out.2
+GMRES Iterations = 6
+Final GMRES Relative Residual Norm = 4.962838e-05
+
+# Output file: solvers_ij.out.3
+GMRES Iterations = 39
+Final GMRES Relative Residual Norm = 9.043231e-05
+
+# Output file: solvers_ij.out.4
+Iterations = 5
+Final Relative Residual Norm = 1.785553e-05
+
+# Output file: solvers_ij.out.5
+Iterations = 103
+Final Relative Residual Norm = 8.784425e-05
+
+# Output file: solvers_ij.out.6
+GMRES Iterations = 15
+Final GMRES Relative Residual Norm = 7.131146e-05
+
+# Output file: solvers_ij.out.7
+Iterations = 13
+Final Relative Residual Norm = 7.750613e-05
+
+# Output file: solvers_ij.out.8
+Iterations = 26
+PCG_Iterations = 0
+DSCG_Iterations = 26
+Final Relative Residual Norm = 7.193501e-05
+
+# Output file: solvers_ij.out.9
+Iterations = 7
+PCG_Iterations = 3
+DSCG_Iterations = 4
+Final Relative Residual Norm = 8.301256e-05
+
+# Output file: solvers_ij.out.10
+Iterations = 6
+PCG_Iterations = 4
+DSCG_Iterations = 2
+Final Relative Residual Norm = 2.785249e-05
+
+# Output file: solvers_ij.out.11
+Iterations = 5
+PCG_Iterations = 2
+DSCG_Iterations = 3
+Final Relative Residual Norm = 1.361495e-05
+
+# Output file: solvers_ij.out.sysh
+ Average Convergence Factor = 0.196477
+
+     Complexity:    grid = 1.392875
+                operator = 2.633675
+                   cycle = 5.267332
+
+# Output file: solvers_ij.out.sysn
+ Average Convergence Factor = 0.533116
+
+     Complexity:    grid = 1.390750
+                operator = 2.080112
+                   cycle = 10.160150
+
+# Output file: solvers_ij.out.sysu
+ Average Convergence Factor = 0.737621
+
+     Complexity:    grid = 1.390813
+                operator = 2.718671
+                   cycle = 5.437173
+
+# Output file: solvers_ij.out.101
+LGMRES Iterations = 39
+Final LGMRES Relative Residual Norm = 7.229704e-05
+
+# Output file: solvers_ij.out.102
+LGMRES Iterations = 6
+Final LGMRES Relative Residual Norm = 4.962838e-05
+
+# Output file: solvers_ij.out.103
+FlexGMRES Iterations = 39
+Final FlexGMRES Relative Residual Norm = 9.043153e-05
+
+# Output file: solvers_ij.out.104
+FlexGMRES Iterations = 6
+Final FlexGMRES Relative Residual Norm = 4.961434e-05
+
+# Output file: solvers_ij.out.105
+Iterations = 11
+Final Relative Residual Norm = 4.022052e-05
+
+# Output file: solvers_ij.out.106
+Iterations = 11
+Final Relative Residual Norm = 4.022052e-05
+
+# Output file: solvers_ij.out.107
+Iterations = 16
+Final Relative Residual Norm = 6.623256e-05
+
+# Output file: solvers_ij.out.108
+Iterations = 16
+Final Relative Residual Norm = 6.623272e-05
+
+# Output file: solvers_ij.out.109
+Iterations = 11
+Final Relative Residual Norm = 7.168805e-05
+
+# Output file: solvers_ij.out.110
+Iterations = 11
+Final Relative Residual Norm = 7.168810e-05
+
+# Output file: solvers_ij.out.111
+Iterations = 17
+Final Relative Residual Norm = 7.756719e-05
+
+# Output file: solvers_ij.out.112
+GMRES Iterations = 21
+Final GMRES Relative Residual Norm = 7.660792e-05
+
+# Output file: solvers_ij.out.113
+GMRES Iterations = 14
+Final GMRES Relative Residual Norm = 9.868194e-05
+
+# Output file: solvers_ij.out.114
+BoomerAMG Iterations = 17
+Final Relative Residual Norm = 9.048652e-05
+
+# Output file: solvers_ij.out.115
+BoomerAMG Iterations = 17
+Final Relative Residual Norm = 9.091324e-05
+
+# Output file: solvers_ij.out.116
+GMRES Iterations = 8
+Final GMRES Relative Residual Norm = 7.735370e-05
+
+# Output file: solvers_ij.out.117
+GMRES Iterations = 8
+Final GMRES Relative Residual Norm = 7.728816e-05
+
--- a/src/test/TEST_single/solvers_ij.saved.ray
+++ b/src/test/TEST_single/solvers_ij.saved.ray
@ -0,0 +1,145 @@
+# Output file: solvers_ij.out.0
+Iterations = 6
+Final Relative Residual Norm = 3.800161e-05
+
+# Output file: solvers_ij.out.1
+Iterations = 26
+Final Relative Residual Norm = 7.193501e-05
+
+# Output file: solvers_ij.out.2
+GMRES Iterations = 6
+Final GMRES Relative Residual Norm = 4.962170e-05
+
+# Output file: solvers_ij.out.3
+GMRES Iterations = 39
+Final GMRES Relative Residual Norm = 9.043231e-05
+
+# Output file: solvers_ij.out.4
+Iterations = 5
+Final Relative Residual Norm = 1.785550e-05
+
+# Output file: solvers_ij.out.5
+Iterations = 103
+Final Relative Residual Norm = 8.784425e-05
+
+# Output file: solvers_ij.out.6
+GMRES Iterations = 15
+Final GMRES Relative Residual Norm = 7.131146e-05
+
+# Output file: solvers_ij.out.7
+Iterations = 13
+Final Relative Residual Norm = 7.750613e-05
+
+# Output file: solvers_ij.out.8
+Iterations = 26
+PCG_Iterations = 0
+DSCG_Iterations = 26
+Final Relative Residual Norm = 7.193501e-05
+
+# Output file: solvers_ij.out.9
+Iterations = 7
+PCG_Iterations = 3
+DSCG_Iterations = 4
+Final Relative Residual Norm = 8.301259e-05
+
+# Output file: solvers_ij.out.10
+Iterations = 6
+PCG_Iterations = 4
+DSCG_Iterations = 2
+Final Relative Residual Norm = 2.785446e-05
+
+# Output file: solvers_ij.out.11
+Iterations = 5
+PCG_Iterations = 2
+DSCG_Iterations = 3
+Final Relative Residual Norm = 1.361500e-05
+
+# Output file: solvers_ij.out.sysh
+ Average Convergence Factor = 0.193839
+
+     Complexity:    grid = 1.392875
+                operator = 2.632649
+                   cycle = 5.265280
+
+# Output file: solvers_ij.out.sysn
+ Average Convergence Factor = 0.533116
+
+     Complexity:    grid = 1.390750
+                operator = 2.080112
+                   cycle = 10.160150
+
+# Output file: solvers_ij.out.sysu
+ Average Convergence Factor = 0.739514
+
+     Complexity:    grid = 1.390563
+                operator = 2.717318
+                   cycle = 5.434468
+
+# Output file: solvers_ij.out.101
+LGMRES Iterations = 39
+Final LGMRES Relative Residual Norm = 7.229704e-05
+
+# Output file: solvers_ij.out.102
+LGMRES Iterations = 6
+Final LGMRES Relative Residual Norm = 4.962170e-05
+
+# Output file: solvers_ij.out.103
+FlexGMRES Iterations = 39
+Final FlexGMRES Relative Residual Norm = 9.043153e-05
+
+# Output file: solvers_ij.out.104
+FlexGMRES Iterations = 6
+Final FlexGMRES Relative Residual Norm = 4.961419e-05
+
+# Output file: solvers_ij.out.105
+Iterations = 11
+Final Relative Residual Norm = 4.434195e-05
+
+# Output file: solvers_ij.out.106
+Iterations = 11
+Final Relative Residual Norm = 4.434195e-05
+
+# Output file: solvers_ij.out.107
+Iterations = 16
+Final Relative Residual Norm = 6.671497e-05
+
+# Output file: solvers_ij.out.108
+Iterations = 16
+Final Relative Residual Norm = 6.671443e-05
+
+# Output file: solvers_ij.out.109
+Iterations = 11
+Final Relative Residual Norm = 7.168811e-05
+
+# Output file: solvers_ij.out.110
+Iterations = 11
+Final Relative Residual Norm = 7.168806e-05
+
+# Output file: solvers_ij.out.111
+Iterations = 17
+Final Relative Residual Norm = 7.756725e-05
+
+# Output file: solvers_ij.out.112
+GMRES Iterations = 21
+Final GMRES Relative Residual Norm = 7.643850e-05
+
+# Output file: solvers_ij.out.113
+GMRES Iterations = 14
+Final GMRES Relative Residual Norm = 9.851967e-05
+
+# Output file: solvers_ij.out.114
+BoomerAMG Iterations = 18
+Final Relative Residual Norm = 6.353526e-05
+
+# Output file: solvers_ij.out.115
+BoomerAMG Iterations = 18
+Final Relative Residual Norm = 6.077210e-05
+
+# Output file: solvers_ij.out.116
+GMRES Iterations = 8
+Final GMRES Relative Residual Norm = 7.078722e-05
+
+# Output file: solvers_ij.out.117
+GMRES Iterations = 8
+Final GMRES Relative Residual Norm = 7.086178e-05
+
--- a/src/test/TEST_single/solvers_struct.saved.lassen
+++ b/src/test/TEST_single/solvers_struct.saved.lassen
@ -0,0 +1,120 @@
+# Output file: solvers_struct.out.0
+Iterations = 3
+Final Relative Residual Norm = 3.246689e-05
+
+# Output file: solvers_struct.out.1
+Iterations = 6
+Final Relative Residual Norm = 2.055851e-05
+
+# Output file: solvers_struct.out.2
+Iterations = 16
+Final Relative Residual Norm = 5.377654e-05
+
+# Output file: solvers_struct.out.3
+Iterations = 16
+Final Relative Residual Norm = 3.718371e-05
+
+# Output file: solvers_struct.out.4
+Iterations = 16
+Final Relative Residual Norm = 3.718370e-05
+
+# Output file: solvers_struct.out.10.lobpcg
+Iterations = 3
+Final Relative Residual Norm = 6.275833e-06
+
+# Output file: solvers_struct.out.10.lobpcg.1
+Eigenvalue lambda   1.84366509318352e-01
+Residual   2.48082087637158e-05
+
+# Output file: solvers_struct.out.10.lobpcg.3
+Iteration 10 	bsize 2 	maxres   4.33511973824352e-04
+Iteration 11 	bsize 1 	maxres   2.04666575882584e-04
+Iteration 12 	bsize 1 	maxres   8.50733777042478e-05
+
+Eigenvalue lambda   1.84366509318352e-01
+Eigenvalue lambda   2.50882238149643e-01
+Eigenvalue lambda   3.60091090202332e-01
+Residual   7.41478434065357e-05
+Residual   4.07401748816483e-05
+Residual   8.50733777042478e-05
+
+# Output file: solvers_struct.out.11.lobpcg
+Iterations = 6
+Final Relative Residual Norm = 2.112819e-05
+
+# Output file: solvers_struct.out.11.lobpcg.1
+Eigenvalue lambda   1.84366583824158e-01
+Residual   3.15948745992500e-05
+
+# Output file: solvers_struct.out.11.lobpcg.3
+Iteration 11 	bsize 2 	maxres   6.90118235070258e-04
+Iteration 12 	bsize 2 	maxres   2.52081663347781e-04
+Iteration 13 	bsize 1 	maxres   7.02887409715913e-05
+
+Eigenvalue lambda   1.84366479516029e-01
+Eigenvalue lambda   2.50883370637894e-01
+Eigenvalue lambda   3.60090911388397e-01
+Residual   5.58231040486135e-05
+Residual   2.60377983067883e-05
+Residual   7.02887409715913e-05
+
+# Output file: solvers_struct.out.17.lobpcg
+Iterations = 17
+Final Relative Residual Norm = 8.241194e-07
+
+# Output file: solvers_struct.out.17.lobpcg.1
+Eigenvalue lambda   1.84366509318352e-01
+Residual   1.95705306396121e-05
+
+# Output file: solvers_struct.out.17.lobpcg.3
+Iteration 10 	bsize 2 	maxres   3.62064485670999e-04
+Iteration 11 	bsize 1 	maxres   1.69921870110556e-04
+Iteration 12 	bsize 1 	maxres   7.11168977431953e-05
+
+Eigenvalue lambda   1.84366405010223e-01
+Eigenvalue lambda   2.50881940126419e-01
+Eigenvalue lambda   3.60090613365173e-01
+Residual   5.52630408492405e-05
+Residual   3.09487622871529e-05
+Residual   7.11168977431953e-05
+
+# Output file: solvers_struct.out.18.lobpcg
+Iterations = 32
+Final Relative Residual Norm = 8.266953e-07
+
+# Output file: solvers_struct.out.18.lobpcg.1
+Eigenvalue lambda   1.84366241097450e-01
+Residual   4.44491524831392e-05
+
+# Output file: solvers_struct.out.18.lobpcg.3
+Iteration 10 	bsize 2 	maxres   5.81342901568860e-04
+Iteration 11 	bsize 1 	maxres   1.98838606593199e-04
+Iteration 12 	bsize 1 	maxres   9.27079236134887e-05
+
+Eigenvalue lambda   1.84366494417191e-01
+Eigenvalue lambda   2.50879585742950e-01
+Eigenvalue lambda   3.60090494155884e-01
+Residual   9.27079236134887e-05
+Residual   8.72101882123388e-05
+Residual   5.49681753909681e-05
+
+# Output file: solvers_struct.out.19.lobpcg
+Iterations = 25
+Final Relative Residual Norm = 7.712439e-05
+
+# Output file: solvers_struct.out.19.lobpcg.1
+Eigenvalue lambda   1.84366539120674e-01
+Residual   4.44510842498858e-05
+
+# Output file: solvers_struct.out.19.lobpcg.3
+Iteration 10 	bsize 2 	maxres   5.81450236495584e-04
+Iteration 11 	bsize 1 	maxres   1.98705645743757e-04
+Iteration 12 	bsize 1 	maxres   9.26581269595772e-05
+
+Eigenvalue lambda   1.84366509318352e-01
+Eigenvalue lambda   2.50874906778336e-01
+Eigenvalue lambda   3.60090017318726e-01
+Residual   9.26581269595772e-05
+Residual   8.80578954820521e-05
+Residual   5.49828182556666e-05
+
--- a/src/test/TEST_single/solvers_struct.saved.ray
+++ b/src/test/TEST_single/solvers_struct.saved.ray
@ -0,0 +1,120 @@
+# Output file: solvers_struct.out.0
+Iterations = 3
+Final Relative Residual Norm = 3.246689e-05
+
+# Output file: solvers_struct.out.1
+Iterations = 6
+Final Relative Residual Norm = 2.055851e-05
+
+# Output file: solvers_struct.out.2
+Iterations = 16
+Final Relative Residual Norm = 5.377654e-05
+
+# Output file: solvers_struct.out.3
+Iterations = 16
+Final Relative Residual Norm = 3.718371e-05
+
+# Output file: solvers_struct.out.4
+Iterations = 16
+Final Relative Residual Norm = 3.718370e-05
+
+# Output file: solvers_struct.out.10.lobpcg
+Iterations = 3
+Final Relative Residual Norm = 6.275833e-06
+
+# Output file: solvers_struct.out.10.lobpcg.1
+Eigenvalue lambda   1.84366509318352e-01
+Residual   2.48082087637158e-05
+
+# Output file: solvers_struct.out.10.lobpcg.3
+Iteration 10 	bsize 2 	maxres   4.33511973824352e-04
+Iteration 11 	bsize 1 	maxres   2.04666575882584e-04
+Iteration 12 	bsize 1 	maxres   8.50733777042478e-05
+
+Eigenvalue lambda   1.84366509318352e-01
+Eigenvalue lambda   2.50882238149643e-01
+Eigenvalue lambda   3.60091090202332e-01
+Residual   7.41478434065357e-05
+Residual   4.07401748816483e-05
+Residual   8.50733777042478e-05
+
+# Output file: solvers_struct.out.11.lobpcg
+Iterations = 6
+Final Relative Residual Norm = 2.112819e-05
+
+# Output file: solvers_struct.out.11.lobpcg.1
+Eigenvalue lambda   1.84366583824158e-01
+Residual   3.15948745992500e-05
+
+# Output file: solvers_struct.out.11.lobpcg.3
+Iteration 11 	bsize 2 	maxres   6.90118235070258e-04
+Iteration 12 	bsize 2 	maxres   2.52081663347781e-04
+Iteration 13 	bsize 1 	maxres   7.02887409715913e-05
+
+Eigenvalue lambda   1.84366479516029e-01
+Eigenvalue lambda   2.50883370637894e-01
+Eigenvalue lambda   3.60090911388397e-01
+Residual   5.58231040486135e-05
+Residual   2.60377983067883e-05
+Residual   7.02887409715913e-05
+
+# Output file: solvers_struct.out.17.lobpcg
+Iterations = 17
+Final Relative Residual Norm = 8.241194e-07
+
+# Output file: solvers_struct.out.17.lobpcg.1
+Eigenvalue lambda   1.84366509318352e-01
+Residual   1.95705306396121e-05
+
+# Output file: solvers_struct.out.17.lobpcg.3
+Iteration 10 	bsize 2 	maxres   3.62064485670999e-04
+Iteration 11 	bsize 1 	maxres   1.69921870110556e-04
+Iteration 12 	bsize 1 	maxres   7.11168977431953e-05
+
+Eigenvalue lambda   1.84366405010223e-01
+Eigenvalue lambda   2.50881940126419e-01
+Eigenvalue lambda   3.60090613365173e-01
+Residual   5.52630408492405e-05
+Residual   3.09487622871529e-05
+Residual   7.11168977431953e-05
+
+# Output file: solvers_struct.out.18.lobpcg
+Iterations = 32
+Final Relative Residual Norm = 8.266953e-07
+
+# Output file: solvers_struct.out.18.lobpcg.1
+Eigenvalue lambda   1.84366241097450e-01
+Residual   4.44491524831392e-05
+
+# Output file: solvers_struct.out.18.lobpcg.3
+Iteration 10 	bsize 2 	maxres   5.81342901568860e-04
+Iteration 11 	bsize 1 	maxres   1.98838606593199e-04
+Iteration 12 	bsize 1 	maxres   9.27079236134887e-05
+
+Eigenvalue lambda   1.84366494417191e-01
+Eigenvalue lambda   2.50879585742950e-01
+Eigenvalue lambda   3.60090494155884e-01
+Residual   9.27079236134887e-05
+Residual   8.72101882123388e-05
+Residual   5.49681753909681e-05
+
+# Output file: solvers_struct.out.19.lobpcg
+Iterations = 25
+Final Relative Residual Norm = 7.712439e-05
+
+# Output file: solvers_struct.out.19.lobpcg.1
+Eigenvalue lambda   1.84366539120674e-01
+Residual   4.44510842498858e-05
+
+# Output file: solvers_struct.out.19.lobpcg.3
+Iteration 10 	bsize 2 	maxres   5.81450236495584e-04
+Iteration 11 	bsize 1 	maxres   1.98705645743757e-04
+Iteration 12 	bsize 1 	maxres   9.26581269595772e-05
+
+Eigenvalue lambda   1.84366509318352e-01
+Eigenvalue lambda   2.50874906778336e-01
+Eigenvalue lambda   3.60090017318726e-01
+Residual   9.26581269595772e-05
+Residual   8.80578954820521e-05
+Residual   5.49828182556666e-05
+
--- a/src/utilities/random.c
+++ b/src/utilities/random.c
@ -98,6 +98,6 @@ HYPRE_Int hypre_RandI()
 *--------------------------------------------------------------------------*/
 HYPRE_Real hypre_Rand()
 {
-   return ((HYPRE_Real)(hypre_RandI()) / m);
+   return ((HYPRE_Real)(hypre_RandI()) / (HYPRE_Real)m);
 }