From 5eb84ec1dbcfb83235e78192a2f8001b5349eec1 Mon Sep 17 00:00:00 2001
From: Ruipeng Li <li50@llnl.gov>
Date: Fri, 15 Jul 2022 11:20:41 -0700
Subject: [PATCH] Fix GPU memory leak (#677)

This PR fixes a memory leak on GPUs.
---
 AUTOTEST/machine-lassen.sh                 |  5 +--
 AUTOTEST/runtests-gpumemcheck              |  2 ++
 src/parcsr_mv/par_csr_fffc_device.c        |  2 ++
 src/test/TEST_gpumemcheck/gpu.jobs         | 13 +++++++
 src/test/TEST_gpumemcheck/gpu.saved.lassen | 40 ++++++++++++++++++++++
 src/test/TEST_gpumemcheck/gpu.sh           | 34 ++++++++++++++++++
 src/test/runtest.sh                        |  8 ++++-
 7 files changed, 101 insertions(+), 3 deletions(-)
 create mode 100644 AUTOTEST/runtests-gpumemcheck
 create mode 100644 src/test/TEST_gpumemcheck/gpu.jobs
 create mode 100644 src/test/TEST_gpumemcheck/gpu.saved.lassen
 create mode 100755 src/test/TEST_gpumemcheck/gpu.sh

diff --git a/AUTOTEST/machine-lassen.sh b/AUTOTEST/machine-lassen.sh
index 7ff68f20b..e47d270ad 100755
--- a/AUTOTEST/machine-lassen.sh
+++ b/AUTOTEST/machine-lassen.sh
@@ -62,9 +62,10 @@ ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save} -rtol ${rto
 ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
 ./renametest.sh basic $output_dir/basic-cuda-um-mixedint
 
-# CUDA with UM with shared library [no run]
+# CUDA with UM with shared library
 co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
-./test.sh basic.sh $src_dir -co: $co -mo: $mo
+ro="-gpumemcheck -rt -mpibind -cudamemcheck -save ${save}"
+./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
 ./renametest.sh basic $output_dir/basic-cuda-um-shared
 
 #CUDA with UM and single precision
diff --git a/AUTOTEST/runtests-gpumemcheck b/AUTOTEST/runtests-gpumemcheck
new file mode 100644
index 000000000..8915428bc
--- /dev/null
+++ b/AUTOTEST/runtests-gpumemcheck
@@ -0,0 +1,2 @@
+TEST_gpumemcheck/*.sh
+
diff --git a/src/parcsr_mv/par_csr_fffc_device.c b/src/parcsr_mv/par_csr_fffc_device.c
index 65fef346f..606929887 100644
--- a/src/parcsr_mv/par_csr_fffc_device.c
+++ b/src/parcsr_mv/par_csr_fffc_device.c
@@ -368,6 +368,8 @@ hypre_ParCSRMatrixGenerateFFFCDevice_core( hypre_ParCSRMatrix  *A,
                       map2FC ); /* FC combined */
 #endif
 
+   hypre_TFree(itmp, HYPRE_MEMORY_DEVICE);
+
    if (option == 2)
    {
       map2F2 = hypre_TAlloc(HYPRE_Int, n_local, HYPRE_MEMORY_DEVICE);
diff --git a/src/test/TEST_gpumemcheck/gpu.jobs b/src/test/TEST_gpumemcheck/gpu.jobs
new file mode 100644
index 000000000..a9ac61084
--- /dev/null
+++ b/src/test/TEST_gpumemcheck/gpu.jobs
@@ -0,0 +1,13 @@
+#!/bin/sh
+# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
+# HYPRE Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+mpirun -np 4 ./ij -n 20 20 10 -P 2 2 1 -solver 1 > gpu.out.1
+
+mpirun -np 4 ./ij -n 20 20 10 -P 2 2 1 -solver 3 > gpu.out.2
+
+mpirun -np 4 ./struct -P 2 2 1 -solver 0 > gpu.out.3
+
+mpirun -np 4 ./struct -P 2 2 1 -solver 1 > gpu.out.4
diff --git a/src/test/TEST_gpumemcheck/gpu.saved.lassen b/src/test/TEST_gpumemcheck/gpu.saved.lassen
new file mode 100644
index 000000000..2279d14ed
--- /dev/null
+++ b/src/test/TEST_gpumemcheck/gpu.saved.lassen
@@ -0,0 +1,40 @@
+# Output file: gpu.out.1
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+
+# Output file: gpu.out.2
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+
+# Output file: gpu.out.3
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+
+# Output file: gpu.out.4
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+========= ERROR SUMMARY: 0 errors
+
diff --git a/src/test/TEST_gpumemcheck/gpu.sh b/src/test/TEST_gpumemcheck/gpu.sh
new file mode 100755
index 000000000..f6c8c907e
--- /dev/null
+++ b/src/test/TEST_gpumemcheck/gpu.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
+# HYPRE Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+TNAME=`basename $0 .sh`
+RTOL=$1
+ATOL=$2
+
+#=============================================================================
+# compare with baseline case
+#=============================================================================
+
+FILES="\
+ ${TNAME}.out.1\
+ ${TNAME}.out.2\
+ ${TNAME}.out.3\
+ ${TNAME}.out.4\
+"
+
+for i in $FILES
+do
+  echo "# Output file: $i"
+  grep "LEAK SUMMARY" $i
+  grep "ERROR SUMMARY" $i
+  echo
+done > ${TNAME}.out
+
+#=============================================================================
+# remove temporary files
+#=============================================================================
+
+rm -f ${TNAME}.testdata*
diff --git a/src/test/runtest.sh b/src/test/runtest.sh
index fea8e2069..3dee4882a 100755
--- a/src/test/runtest.sh
+++ b/src/test/runtest.sh
@@ -19,6 +19,7 @@ TestDirNames=""            # string of names of TEST_* directories used
 HOST=`hostname`
 NumThreads=0               # number of OpenMP threads to use if > 0
 Valgrind=""                # string to add to MpirunString when using valgrind
+cudamemcheck=""            # string to add to MpirunString when using cudamemcheck
 mpibind=""                 # string to add to MpirunString when using mpibind
 script=""                  # string to add to MpirunString when using script
 SaveExt="saved"            # saved file extension
@@ -42,6 +43,7 @@ function usage
    printf "    -atol <tol>    use absolute tolerance 'tol' to compare numeric test values\n"
    printf "    -save <ext>    use '<test>.saved.<ext> for the saved-file extension\n"
    printf "    -valgrind      use valgrind memory checker\n"
+   printf "    -cudamemcheck  use CUDA memory checker\n"
    printf "    -mpibind       use mpibind\n"
    printf "    -script <sh>   use a script before the command\n"
    printf "    -n|-norun      turn off execute mode, echo what would be run\n"
@@ -147,7 +149,7 @@ function MpirunString
    NumArgs2=$(($#+1))
    if [ "$NumArgs1" -eq "$NumArgs2" ] ; then
       shift
-      RunString="$RunString $script $mpibind $Valgrind $*"
+      RunString="$RunString $script $mpibind $cudamemcheck $Valgrind $*"
       #echo $RunString
    fi
 }
@@ -525,6 +527,10 @@ do
          shift
          Valgrind="valgrind -q --suppressions=`pwd`/runtest.valgrind --leak-check=yes --track-origins=yes"
          ;;
+      -cudamemcheck)
+         shift
+         cudamemcheck="cuda-memcheck --leak-check full"
+         ;;
       -mpibind)
          shift
          mpibind="mpibind"