From 5eb84ec1dbcfb83235e78192a2f8001b5349eec1 Mon Sep 17 00:00:00 2001 From: Ruipeng Li Date: Fri, 15 Jul 2022 11:20:41 -0700 Subject: [PATCH] Fix GPU memory leak (#677) This PR fixes a memory leak on GPUs. --- AUTOTEST/machine-lassen.sh | 5 +-- AUTOTEST/runtests-gpumemcheck | 2 ++ src/parcsr_mv/par_csr_fffc_device.c | 2 ++ src/test/TEST_gpumemcheck/gpu.jobs | 13 +++++++ src/test/TEST_gpumemcheck/gpu.saved.lassen | 40 ++++++++++++++++++++++ src/test/TEST_gpumemcheck/gpu.sh | 34 ++++++++++++++++++ src/test/runtest.sh | 8 ++++- 7 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 AUTOTEST/runtests-gpumemcheck create mode 100644 src/test/TEST_gpumemcheck/gpu.jobs create mode 100644 src/test/TEST_gpumemcheck/gpu.saved.lassen create mode 100755 src/test/TEST_gpumemcheck/gpu.sh diff --git a/AUTOTEST/machine-lassen.sh b/AUTOTEST/machine-lassen.sh index 7ff68f20b..e47d270ad 100755 --- a/AUTOTEST/machine-lassen.sh +++ b/AUTOTEST/machine-lassen.sh @@ -62,9 +62,10 @@ ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save} -rtol ${rto ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro ./renametest.sh basic $output_dir/basic-cuda-um-mixedint -# CUDA with UM with shared library [no run] +# CUDA with UM with shared library co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'" -./test.sh basic.sh $src_dir -co: $co -mo: $mo +ro="-gpumemcheck -rt -mpibind -cudamemcheck -save ${save}" +./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro ./renametest.sh basic $output_dir/basic-cuda-um-shared #CUDA with UM and single precision diff --git a/AUTOTEST/runtests-gpumemcheck b/AUTOTEST/runtests-gpumemcheck new file mode 100644 index 000000000..8915428bc --- /dev/null +++ b/AUTOTEST/runtests-gpumemcheck @@ -0,0 +1,2 @@ +TEST_gpumemcheck/*.sh + diff --git a/src/parcsr_mv/par_csr_fffc_device.c b/src/parcsr_mv/par_csr_fffc_device.c index 65fef346f..606929887 100644 --- a/src/parcsr_mv/par_csr_fffc_device.c +++ b/src/parcsr_mv/par_csr_fffc_device.c @@ -368,6 +368,8 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix *A, map2FC ); /* FC combined */ #endif + hypre_TFree(itmp, HYPRE_MEMORY_DEVICE); + if (option == 2) { map2F2 = hypre_TAlloc(HYPRE_Int, n_local, HYPRE_MEMORY_DEVICE); diff --git a/src/test/TEST_gpumemcheck/gpu.jobs b/src/test/TEST_gpumemcheck/gpu.jobs new file mode 100644 index 000000000..a9ac61084 --- /dev/null +++ b/src/test/TEST_gpumemcheck/gpu.jobs @@ -0,0 +1,13 @@ +#!/bin/sh +# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +mpirun -np 4 ./ij -n 20 20 10 -P 2 2 1 -solver 1 > gpu.out.1 + +mpirun -np 4 ./ij -n 20 20 10 -P 2 2 1 -solver 3 > gpu.out.2 + +mpirun -np 4 ./struct -P 2 2 1 -solver 0 > gpu.out.3 + +mpirun -np 4 ./struct -P 2 2 1 -solver 1 > gpu.out.4 diff --git a/src/test/TEST_gpumemcheck/gpu.saved.lassen b/src/test/TEST_gpumemcheck/gpu.saved.lassen new file mode 100644 index 000000000..2279d14ed --- /dev/null +++ b/src/test/TEST_gpumemcheck/gpu.saved.lassen @@ -0,0 +1,40 @@ +# Output file: gpu.out.1 +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors + +# Output file: gpu.out.2 +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors + +# Output file: gpu.out.3 +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors + +# Output file: gpu.out.4 +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= LEAK SUMMARY: 0 bytes leaked in 0 allocations +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors +========= ERROR SUMMARY: 0 errors + diff --git a/src/test/TEST_gpumemcheck/gpu.sh b/src/test/TEST_gpumemcheck/gpu.sh new file mode 100755 index 000000000..f6c8c907e --- /dev/null +++ b/src/test/TEST_gpumemcheck/gpu.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +TNAME=`basename $0 .sh` +RTOL=$1 +ATOL=$2 + +#============================================================================= +# compare with baseline case +#============================================================================= + +FILES="\ + ${TNAME}.out.1\ + ${TNAME}.out.2\ + ${TNAME}.out.3\ + ${TNAME}.out.4\ +" + +for i in $FILES +do + echo "# Output file: $i" + grep "LEAK SUMMARY" $i + grep "ERROR SUMMARY" $i + echo +done > ${TNAME}.out + +#============================================================================= +# remove temporary files +#============================================================================= + +rm -f ${TNAME}.testdata* diff --git a/src/test/runtest.sh b/src/test/runtest.sh index fea8e2069..3dee4882a 100755 --- a/src/test/runtest.sh +++ b/src/test/runtest.sh @@ -19,6 +19,7 @@ TestDirNames="" # string of names of TEST_* directories used HOST=`hostname` NumThreads=0 # number of OpenMP threads to use if > 0 Valgrind="" # string to add to MpirunString when using valgrind +cudamemcheck="" # string to add to MpirunString when using cudamemcheck mpibind="" # string to add to MpirunString when using mpibind script="" # string to add to MpirunString when using script SaveExt="saved" # saved file extension @@ -42,6 +43,7 @@ function usage printf " -atol use absolute tolerance 'tol' to compare numeric test values\n" printf " -save use '.saved. for the saved-file extension\n" printf " -valgrind use valgrind memory checker\n" + printf " -cudamemcheck use CUDA memory checker\n" printf " -mpibind use mpibind\n" printf " -script use a script before the command\n" printf " -n|-norun turn off execute mode, echo what would be run\n" @@ -147,7 +149,7 @@ function MpirunString NumArgs2=$(($#+1)) if [ "$NumArgs1" -eq "$NumArgs2" ] ; then shift - RunString="$RunString $script $mpibind $Valgrind $*" + RunString="$RunString $script $mpibind $cudamemcheck $Valgrind $*" #echo $RunString fi } @@ -525,6 +527,10 @@ do shift Valgrind="valgrind -q --suppressions=`pwd`/runtest.valgrind --leak-check=yes --track-origins=yes" ;; + -cudamemcheck) + shift + cudamemcheck="cuda-memcheck --leak-check full" + ;; -mpibind) shift mpibind="mpibind"