hypre/AUTOTEST/machine-ray.sh
Ruipeng Li b49727f16b
Cuda triangular smoothers (#240)
* This commit has CUDA based smoothers for AMG based on the triangular parts of sparse matrices. This includes an Gauss-Seidel (relax_type==3), which uses CUSPARSE triangular solvers to invert L. Symmetric Gauss Seidel is implemented in relax_type==6 also via CUSPARSE. Finally, 2 new smoothers are added. THe first is a 2 stage approximation to Gauss Seidel using a parallel MatVec and L (relax_type==11). The second (relax_type==12) is a less effective version of 11. It uses A_diag instead of L for the smoothing. CPU implementations of these new smoothers are also provided. For the two stage algorithms, L and U are NOT explicitly created. This seems faster and saves memory. In the two stage preconditioner, multiply by invdiag rather than divide by diagonal reduces register pressure and yields full occupancy.
Co-authored-by: Paul Mullowney <pmullown@nrel.gov>
Co-authored-by: PaulMullowney <60452402+PaulMullowney@users.noreply.github.com>
2020-12-17 19:37:59 -08:00

108 lines
4.6 KiB
Bash
Executable File

#!/bin/sh
# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
# HYPRE Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
testname=`basename $0 .sh`
# Echo usage information
case $1 in
-h|-help)
cat <<EOF
**** Only run this script on the ray cluster ****
$0 [-h|-help] {src_dir}
where: -h|-help prints this usage information and exits
{src_dir} is the hypre source directory
This script runs a number of tests suitable for the syrah cluster.
Example usage: $0 ../src
EOF
exit
;;
esac
# Setup
test_dir=`pwd`
output_dir=`pwd`/$testname.dir
rm -fr $output_dir
mkdir -p $output_dir
src_dir=`cd $1; pwd`
shift
# Basic build and run tests
mo="-j test"
eo=""
roij="-ij -ams -rt -mpibind -rtol 1e-3 -atol 8e-3"
ross="-struct -sstruct -rt -mpibind -rtol 1e-6 -atol 1e-6"
rost="-struct -rt -mpibind -rtol 1e-8 -atol 1e-8"
rocuda="-cuda_ray -rt -mpibind"
# CUDA with UM
co="--with-cuda --enable-unified-memory --enable-persistent --enable-cub --enable-debug --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $roij
./renametest.sh basic $output_dir/basic-cuda-um-ij
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ross
./renametest.sh basic $output_dir/basic-cuda-um-struct-sstruct
# CUDA with UM [shared library, no run]
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-um-shared
#./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $roij
#./renametest.sh basic $output_dir/basic-cuda-um-shared-ij
#./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ross
#./renametest.sh basic $output_dir/basic-cuda-um-shared-struct-sstruct
# OMP 4.5 with UM
co="--with-device-openmp --enable-unified-memory --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
#./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $roij
#./renametest.sh basic $output_dir/basic-deviceomp-um-ij
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ross
./renametest.sh basic $output_dir/basic-deviceomp-um-struct-sstruct
# OMP 4.5 with UM [shared library, no run]
#co="--with-device-openmp --enable-unified-memory --enable-shared --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029:1500-030:1501-308\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029:1500-030:1501-308\\'"
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
#./renametest.sh basic $output_dir/basic-deviceomp-um-shared
#./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $roij
#./renametest.sh basic $output_dir/basic-deviceomp-um-shared-ij
#./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ross
#./renametest.sh basic $output_dir/basic-deviceomp-um-shared-struct-sstruct
# OMP 4.5 with UM [in debug mode]
#co="--with-device-openmp --enable-unified-memory --enable-debug --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
#./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $roij
#./renametest.sh basic $output_dir/basic-deviceomp-um-debug-ij
#./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ross
#./renametest.sh basic $output_dir/basic-deviceomp-um-debug-struct-sstruct
# CUDA w.o UM
co="--with-cuda --enable-debug --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $rost
./renametest.sh basic $output_dir/basic-cuda-nonum-struct
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $rocuda
./renametest.sh basic $output_dir/basic-cuda-nonum-cuda
# OMP 4.5 w.o UM, only struct [in debug mode]
co="--with-device-openmp --enable-debug --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $rost
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct
# CUDA with UM without MPI
#co="--with-cuda --enable-unified-memory --without-MPI --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
#./renametest.sh basic $output_dir/basic-cuda-um-without-MPI
# Echo to stderr all nonempty error files in $output_dir
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
do
echo $errfile >&2
done