Browse Source

Merge topic 'cuda-memcheck'

cee92a9fb0 Help: add release notes for CTest cuda-memcheck support
f38e4a1871 Tests: Add cases for CTest cuda-memcheck parser
fe062800f0 CTest: add cuda-memcheck support

Acked-by: Kitware Robot <[email protected]>
Acked-by: tcojean <[email protected]>
Merge-request: !4952
Brad King 5 years ago
parent
commit
8d268f57b4

+ 2 - 0
.gitlab/artifacts.yml

@@ -34,10 +34,12 @@
             - build/Tests/CMake*/PseudoMemcheck/purify
             - build/Tests/CMake*/PseudoMemcheck/memcheck_fail
             - build/Tests/CMake*/PseudoMemcheck/BC
+            - build/Tests/CMake*/PseudoMemcheck/cuda-memcheck
             - build/Tests/CMake*/PseudoMemcheck/valgrind.exe
             - build/Tests/CMake*/PseudoMemcheck/purify.exe
             - build/Tests/CMake*/PseudoMemcheck/memcheck_fail.exe
             - build/Tests/CMake*/PseudoMemcheck/BC.exe
+            - build/Tests/CMake*/PseudoMemcheck/cuda-memcheck.exe
             - build/Tests/CMake*/PseudoMemcheck/NoLog
             - build/Tests/CMake*Lib/*LibTests
             - build/Tests/CMake*Lib/*LibTests.exe

+ 8 - 0
Help/release/dev/ctest-cuda-memcheck.rst

@@ -0,0 +1,8 @@
+CTest
+-----
+
+* :manual:`ctest(1)` gained support for cuda-memcheck as ``CTEST_MEMORYCHECK_COMMAND``.
+  The different tools (memcheck, racecheck, synccheck, initcheck) supplied by
+  cuda-memcheck can be selected by setting the appropriate flags using the
+  ``CTEST_MEMORYCHECK_COMMAND_OPTIONS`` variable.
+  The default flags are `--tool memcheck --leak-check full`.

+ 142 - 0
Source/CTest/cmCTestMemCheckHandler.cxx

@@ -326,6 +326,9 @@ void cmCTestMemCheckHandler::GenerateDartOutput(cmXMLWriter& xml)
     case cmCTestMemCheckHandler::BOUNDS_CHECKER:
       xml.Attribute("Checker", "BoundsChecker");
       break;
+    case cmCTestMemCheckHandler::CUDA_MEMCHECK:
+      xml.Attribute("Checker", "CudaMemcheck");
+      break;
     case cmCTestMemCheckHandler::ADDRESS_SANITIZER:
       xml.Attribute("Checker", "AddressSanitizer");
       break;
@@ -465,6 +468,8 @@ bool cmCTestMemCheckHandler::InitializeMemoryChecking()
       this->MemoryTesterStyle = cmCTestMemCheckHandler::PURIFY;
     } else if (testerName.find("BC") != std::string::npos) {
       this->MemoryTesterStyle = cmCTestMemCheckHandler::BOUNDS_CHECKER;
+    } else if (testerName.find("cuda-memcheck") != std::string::npos) {
+      this->MemoryTesterStyle = cmCTestMemCheckHandler::CUDA_MEMCHECK;
     } else {
       this->MemoryTesterStyle = cmCTestMemCheckHandler::UNKNOWN;
     }
@@ -485,6 +490,11 @@ bool cmCTestMemCheckHandler::InitializeMemoryChecking()
     this->MemoryTester =
       this->CTest->GetCTestConfiguration("BoundsCheckerCommand");
     this->MemoryTesterStyle = cmCTestMemCheckHandler::BOUNDS_CHECKER;
+  } else if (cmSystemTools::FileExists(
+               this->CTest->GetCTestConfiguration("CudaMemcheckCommand"))) {
+    this->MemoryTester =
+      this->CTest->GetCTestConfiguration("CudaMemcheckCommand");
+    this->MemoryTesterStyle = cmCTestMemCheckHandler::CUDA_MEMCHECK;
   }
   if (this->CTest->GetCTestConfiguration("MemoryCheckType") ==
       "AddressSanitizer") {
@@ -528,6 +538,8 @@ bool cmCTestMemCheckHandler::InitializeMemoryChecking()
       this->MemoryTesterStyle = cmCTestMemCheckHandler::VALGRIND;
     } else if (checkType == "DrMemory") {
       this->MemoryTesterStyle = cmCTestMemCheckHandler::DRMEMORY;
+    } else if (checkType == "CudaMemcheck") {
+      this->MemoryTesterStyle = cmCTestMemCheckHandler::CUDA_MEMCHECK;
     }
   }
   if (this->MemoryTester.empty()) {
@@ -553,6 +565,10 @@ bool cmCTestMemCheckHandler::InitializeMemoryChecking()
                 .empty()) {
     memoryTesterOptions =
       this->CTest->GetCTestConfiguration("DrMemoryCommandOptions");
+  } else if (!this->CTest->GetCTestConfiguration("CudaMemcheckCommandOptions")
+                .empty()) {
+    memoryTesterOptions =
+      this->CTest->GetCTestConfiguration("CudaMemcheckCommandOptions");
   }
   this->MemoryTesterOptions =
     cmSystemTools::ParseArguments(memoryTesterOptions);
@@ -686,6 +702,18 @@ bool cmCTestMemCheckHandler::InitializeMemoryChecking()
       this->MemoryTesterOptions.emplace_back("/M");
       break;
     }
+    case cmCTestMemCheckHandler::CUDA_MEMCHECK: {
+      // cuda-memcheck separates flags from arguments by spaces
+      if (this->MemoryTesterOptions.empty()) {
+        this->MemoryTesterOptions.emplace_back("--tool");
+        this->MemoryTesterOptions.emplace_back("memcheck");
+        this->MemoryTesterOptions.emplace_back("--leak-check");
+        this->MemoryTesterOptions.emplace_back("full");
+      }
+      this->MemoryTesterDynamicOptions.emplace_back("--log-file");
+      this->MemoryTesterDynamicOptions.push_back(this->MemoryTesterOutputFile);
+      break;
+    }
     // these are almost the same but the env var used is different
     case cmCTestMemCheckHandler::ADDRESS_SANITIZER:
     case cmCTestMemCheckHandler::LEAK_SANITIZER:
@@ -771,6 +799,8 @@ bool cmCTestMemCheckHandler::ProcessMemCheckOutput(const std::string& str,
       return this->ProcessMemCheckSanitizerOutput(str, log, results);
     case cmCTestMemCheckHandler::BOUNDS_CHECKER:
       return this->ProcessMemCheckBoundsCheckerOutput(str, log, results);
+    case cmCTestMemCheckHandler::CUDA_MEMCHECK:
+      return this->ProcessMemCheckCudaOutput(str, log, results);
     default:
       log.append("\nMemory checking style used was: ");
       log.append("None that I know");
@@ -1103,6 +1133,118 @@ bool cmCTestMemCheckHandler::ProcessMemCheckBoundsCheckerOutput(
   return defects == 0;
 }
 
+bool cmCTestMemCheckHandler::ProcessMemCheckCudaOutput(
+  const std::string& str, std::string& log, std::vector<int>& results)
+{
+  std::vector<std::string> lines;
+  cmsys::SystemTools::Split(str, lines);
+  bool unlimitedOutput = false;
+  if (str.find("CTEST_FULL_OUTPUT") != std::string::npos ||
+      this->CustomMaximumFailedTestOutputSize == 0) {
+    unlimitedOutput = true;
+  }
+
+  std::string::size_type cc;
+
+  std::ostringstream ostr;
+  log.clear();
+
+  int defects = 0;
+
+  cmsys::RegularExpression memcheckLine("^========");
+
+  cmsys::RegularExpression leakExpr("== Leaked [0-9,]+ bytes at");
+
+  // list of matchers for output messages that contain variable content
+  // (addresses, sizes, ...) or can be shortened in general. the first match is
+  // used as a error name.
+  std::vector<cmsys::RegularExpression> matchers{
+    // API errors
+    "== Malloc/Free error encountered: (.*)",
+    "== Program hit error ([^ ]*).* on CUDA API call to",
+    "== Program hit ([^ ]*).* on CUDA API call to",
+    // memcheck
+    "== (Invalid .*) of size [0-9,]+",
+    // racecheck
+    "== .* (Potential .* hazard detected)", "== .* (Race reported)",
+    // synccheck
+    "== (Barrier error)",
+    // initcheck
+    "== (Uninitialized .* memory read)", "== (Unused memory)",
+    // generic error: ignore ERROR SUMMARY, CUDA-MEMCHECK and others
+    "== ([A-Z][a-z].*)"
+  };
+
+  std::vector<std::string::size_type> nonMemcheckOutput;
+  auto sttime = std::chrono::steady_clock::now();
+  cmCTestOptionalLog(this->CTest, DEBUG,
+                     "Start test: " << lines.size() << std::endl, this->Quiet);
+  std::string::size_type totalOutputSize = 0;
+  for (cc = 0; cc < lines.size(); cc++) {
+    cmCTestOptionalLog(this->CTest, DEBUG,
+                       "test line " << lines[cc] << std::endl, this->Quiet);
+
+    if (memcheckLine.find(lines[cc])) {
+      cmCTestOptionalLog(this->CTest, DEBUG,
+                         "cuda-memcheck line " << lines[cc] << std::endl,
+                         this->Quiet);
+      int failure = -1;
+      auto& line = lines[cc];
+      if (leakExpr.find(line)) {
+        failure = static_cast<int>(this->FindOrAddWarning("Memory leak"));
+      } else {
+        for (auto& matcher : matchers) {
+          if (matcher.find(line)) {
+            failure =
+              static_cast<int>(this->FindOrAddWarning(matcher.match(1)));
+            break;
+          }
+        }
+      }
+
+      if (failure >= 0) {
+        ostr << "<b>" << this->ResultStrings[failure] << "</b> ";
+        if (results.empty() || unsigned(failure) > results.size() - 1) {
+          results.push_back(1);
+        } else {
+          results[failure]++;
+        }
+        defects++;
+      }
+      totalOutputSize += lines[cc].size();
+      ostr << lines[cc] << std::endl;
+    } else {
+      nonMemcheckOutput.push_back(cc);
+    }
+  }
+  // Now put all all the non cuda-memcheck output into the test output
+  // This should be last in case it gets truncated by the output
+  // limiting code
+  for (std::string::size_type i : nonMemcheckOutput) {
+    totalOutputSize += lines[i].size();
+    ostr << lines[i] << std::endl;
+    if (!unlimitedOutput &&
+        totalOutputSize >
+          static_cast<size_t>(this->CustomMaximumFailedTestOutputSize)) {
+      ostr << "....\n";
+      ostr << "Test Output for this test has been truncated see testing"
+              " machine logs for full output,\n";
+      ostr << "or put CTEST_FULL_OUTPUT in the output of "
+              "this test program.\n";
+      break; // stop the copy of output if we are full
+    }
+  }
+  cmCTestOptionalLog(this->CTest, DEBUG,
+                     "End test (elapsed: "
+                       << cmDurationTo<unsigned int>(
+                            std::chrono::steady_clock::now() - sttime)
+                       << "s)" << std::endl,
+                     this->Quiet);
+  log = ostr.str();
+  this->DefectCount += defects;
+  return defects == 0;
+}
+
 // PostProcessTest memcheck results
 void cmCTestMemCheckHandler::PostProcessTest(cmCTestTestResult& res, int test)
 {

+ 3 - 0
Source/CTest/cmCTestMemCheckHandler.h

@@ -46,6 +46,7 @@ private:
     DRMEMORY,
     BOUNDS_CHECKER,
     // checkers after here do not use the standard error list
+    CUDA_MEMCHECK,
     ADDRESS_SANITIZER,
     LEAK_SANITIZER,
     THREAD_SANITIZER,
@@ -137,6 +138,8 @@ private:
                                      std::vector<int>& results);
   bool ProcessMemCheckPurifyOutput(const std::string& str, std::string& log,
                                    std::vector<int>& results);
+  bool ProcessMemCheckCudaOutput(const std::string& str, std::string& log,
+                                 std::vector<int>& results);
   bool ProcessMemCheckSanitizerOutput(const std::string& str, std::string& log,
                                       std::vector<int>& results);
   bool ProcessMemCheckBoundsCheckerOutput(const std::string& str,

+ 3 - 0
Tests/CMakeLib/PseudoMemcheck/CMakeLists.txt

@@ -15,6 +15,9 @@ target_link_libraries(pseudo_purify CMakeLib)
 add_executable(pseudo_BC "${CMAKE_CURRENT_BINARY_DIR}/ret0.cxx")
 set_target_properties(pseudo_BC PROPERTIES OUTPUT_NAME BC)
 target_link_libraries(pseudo_BC CMakeLib)
+add_executable(pseudo_cuda-memcheck "${CMAKE_CURRENT_BINARY_DIR}/ret0.cxx")
+set_target_properties(pseudo_cuda-memcheck PROPERTIES OUTPUT_NAME cuda-memcheck)
+target_link_libraries(pseudo_cuda-memcheck CMakeLib)
 
 # binary to be used as pre- and post-memcheck command that fails
 add_executable(memcheck_fail "${CMAKE_CURRENT_BINARY_DIR}/ret1.cxx")

+ 32 - 2
Tests/CMakeLib/PseudoMemcheck/memtester.cxx.in

@@ -1,8 +1,14 @@
-#include <cmSystemTools.h>
-#include "cmsys/Encoding.hxx"
 #include <string>
+#include <vector>
+
+#include "cmsys/Encoding.hxx"
+
+#include <cmSystemTools.h>
 
+// clang-format off
 #define RETVAL @_retval@
+#define CMAKE_COMMAND "@CMAKE_COMMAND@"
+// clang-format on
 
 int main(int ac, char** av)
 {
@@ -14,6 +20,9 @@ int main(int ac, char** av)
   std::string exename = argv[0];
   std::string logarg;
   bool nextarg = false;
+  // execute the part after the last argument?
+  // the logfile path gets passed as environment variable PSEUDO_LOGFILE
+  bool exec = false;
 
   if (exename.find("valgrind") != std::string::npos) {
     logarg = "--log-file=";
@@ -26,6 +35,10 @@ int main(int ac, char** av)
   } else if (exename.find("BC") != std::string::npos) {
     nextarg = true;
     logarg = "/X";
+  } else if (exename.find("cuda-memcheck") != std::string::npos) {
+    nextarg = true;
+    exec = true;
+    logarg = "--log-file";
   }
 
   if (!logarg.empty()) {
@@ -45,8 +58,25 @@ int main(int ac, char** av)
       }
     }
 
+    // find the last argument position
+    int lastarg_pos = 1;
+    for (int i = 1; i < argc; ++i) {
+      std::string arg = argv[i];
+      if (arg.find("--") == 0) {
+        lastarg_pos = i;
+      }
+    }
+
     if (!logfile.empty()) {
       cmSystemTools::Touch(logfile, true);
+      // execute everything after the last argument with additional environment
+      int callarg_pos = lastarg_pos + (nextarg ? 2 : 1);
+      if (exec && callarg_pos < argc) {
+        std::vector<std::string> callargs{ CMAKE_COMMAND, "-E", "env",
+                                           "PSEUDO_LOGFILE=" + logfile };
+        callargs.insert(callargs.end(), &argv[callarg_pos], &argv[argc]);
+        cmSystemTools::RunSingleCommand(callargs);
+      }
     }
   }
 

+ 1 - 0
Tests/RunCMake/CMakeLists.txt

@@ -163,6 +163,7 @@ if(NOT CMake_TEST_EXTERNAL_CMAKE)
     -DPSEUDO_BC=$<TARGET_FILE:pseudo_BC>
     -DPSEUDO_PURIFY=$<TARGET_FILE:pseudo_purify>
     -DPSEUDO_VALGRIND=$<TARGET_FILE:pseudo_valgrind>
+    -DPSEUDO_CUDA_MEMCHECK=$<TARGET_FILE:pseudo_cuda-memcheck>
     -DPSEUDO_BC_NOLOG=$<TARGET_FILE:pseudonl_BC>
     -DPSEUDO_PURIFY_NOLOG=$<TARGET_FILE:pseudonl_purify>
     -DPSEUDO_VALGRIND_NOLOG=$<TARGET_FILE:pseudonl_valgrind>

+ 1 - 0
Tests/RunCMake/ctest_memcheck/DummyCudaMemcheck-result.txt

@@ -0,0 +1 @@
+0

+ 1 - 0
Tests/RunCMake/ctest_memcheck/DummyCudaMemcheck-stderr.txt

@@ -0,0 +1 @@
+Defect count: 20

+ 10 - 0
Tests/RunCMake/ctest_memcheck/DummyCudaMemcheck-stdout.txt

@@ -0,0 +1,10 @@
+Memory checking results:
+Uninitialized __global__ memory read - 1
+Unused memory - 1
+Barrier error - 2
+Invalid __global__ read - 1
+cudaErrorLaunchFailure - 2
+Memory leak - 1
+Potential WAR hazard detected - 4
+Potential RAW hazard detected - 4
+Race reported - 4

+ 12 - 0
Tests/RunCMake/ctest_memcheck/RunCMakeTest.cmake

@@ -175,3 +175,15 @@ unset(CMAKELISTS_EXTRA_CODE)
 unset(CTEST_EXTRA_CODE)
 unset(CTEST_MEMCHECK_ARGS)
 unset(CTEST_SUFFIX_CODE)
+
+#-----------------------------------------------------------------------------
+set(CMAKELISTS_EXTRA_CODE
+"add_test(NAME TestSan COMMAND \"${CMAKE_COMMAND}\"
+-P \"${RunCMake_SOURCE_DIR}/testCudaMemcheck.cmake\")
+")
+set(CTEST_SUFFIX_CODE "message(\"Defect count: \${defect_count}\")")
+set(CTEST_MEMCHECK_ARGS "DEFECT_COUNT defect_count")
+run_mc_test(DummyCudaMemcheck "${PSEUDO_CUDA_MEMCHECK}")
+unset(CTEST_MEMCHECK_ARGS)
+unset(CTEST_SUFFIX_CODE)
+unset(CTEST_EXTRA_CODE)

+ 264 - 0
Tests/RunCMake/ctest_memcheck/testCudaMemcheck.cmake

@@ -0,0 +1,264 @@
+# this file simulates an execution of cuda-memcheck
+
+set(LOG_FILE "$ENV{PSEUDO_LOGFILE}")
+message("LOG_FILE=[${LOG_FILE}]")
+
+# clear the log file
+file(REMOVE "${LOG_FILE}")
+
+# create an error of each type of sanitizer tool and failure
+
+# initcheck
+file(APPEND "${LOG_FILE}"
+"========= CUDA-MEMCHECK
+========= Uninitialized __global__ memory read of size 4
+=========     at 0x00000020 in test(int*, int*)
+=========     by thread (0,0,0) in block (0,0,0)
+=========     Address 0x1303d80000
+=========     Saved host backtrace up to driver entry point
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./uninit-read [0x101d9]
+=========     Host Frame:./uninit-read [0x10267]
+=========     Host Frame:./uninit-read [0x465b5]
+=========     Host Frame:./uninit-read [0x3342]
+=========     Host Frame:./uninit-read [0x3143]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./uninit-read [0x31e2]
+=========
+========= Unused memory in allocation 0x1303d80000 of size 16 bytes
+=========     Not written any memory.
+=========     100.00% of allocation were unused.
+=========     Saved host backtrace up to driver entry point
+=========     Host Frame:/lib64/libcuda.so.1 (cuMemAlloc_v2 + 0x1b7) [0x26ec97]
+=========     Host Frame:./uninit-read [0x2bbd3]
+=========     Host Frame:./uninit-read [0x71ab]
+=========     Host Frame:./uninit-read [0x3c84f]
+=========     Host Frame:./uninit-read [0x3111]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./uninit-read [0x31e2]
+=========
+========= ERROR SUMMARY: 2 errors
+")
+
+
+# synccheck
+file(APPEND "${LOG_FILE}"
+"========= CUDA-MEMCHECK
+========= Barrier error detected. Divergent thread(s) in warp
+=========     at 0x00000058 in test(int*, int*)
+=========     by thread (1,0,0) in block (0,0,0)
+=========     Device Frame:test(int*, int*) (test(int*, int*) : 0x60)
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./sync [0x101d9]
+=========     Host Frame:./sync [0x10267]
+=========     Host Frame:./sync [0x465b5]
+=========     Host Frame:./sync [0x3342]
+=========     Host Frame:./sync [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./sync [0x31e2]
+=========
+========= Barrier error detected. Divergent thread(s) in warp
+=========     at 0x00000058 in test(int*, int*)
+=========     by thread (0,0,0) in block (0,0,0)
+=========     Device Frame:test(int*, int*) (test(int*, int*) : 0x60)
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./sync [0x101d9]
+=========     Host Frame:./sync [0x10267]
+=========     Host Frame:./sync [0x465b5]
+=========     Host Frame:./sync [0x3342]
+=========     Host Frame:./sync [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./sync [0x31e2]
+=========
+========= ERROR SUMMARY: 2 errors
+")
+
+# memcheck
+file(APPEND "${LOG_FILE}"
+"========= CUDA-MEMCHECK
+========= Invalid __global__ read of size 4
+=========     at 0x00000020 in test(int*, int*)
+=========     by thread (0,0,0) in block (0,0,0)
+=========     Address 0x00000000 is out of bounds
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./invalid-read [0x101d9]
+=========     Host Frame:./invalid-read [0x10267]
+=========     Host Frame:./invalid-read [0x465b5]
+=========     Host Frame:./invalid-read [0x3342]
+=========     Host Frame:./invalid-read [0x3142]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./invalid-read [0x31e2]
+=========
+========= Program hit cudaErrorLaunchFailure (error 719) due to \"unspecified launch failure\" on CUDA API call to cudaDeviceSynchronize.
+=========     Saved host backtrace up to driver entry point at error
+=========     Host Frame:/lib64/libcuda.so.1 [0x3ac5a3]
+=========     Host Frame:./invalid-read [0x2e576]
+=========     Host Frame:./invalid-read [0x3147]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./invalid-read [0x31e2]
+=========
+========= Program hit cudaErrorLaunchFailure (error 719) due to \"unspecified launch failure\" on CUDA API call to cudaFree.
+=========     Saved host backtrace up to driver entry point at error
+=========     Host Frame:/lib64/libcuda.so.1 [0x3ac5a3]
+=========     Host Frame:./invalid-read [0x3c106]
+=========     Host Frame:./invalid-read [0x3150]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./invalid-read [0x31e2]
+=========
+========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
+========= ERROR SUMMARY: 3 errors
+")
+
+# memcheck with leak-check full
+file(APPEND "${LOG_FILE}"
+"========= CUDA-MEMCHECK
+========= Leaked 10 bytes at 0x1303d80000
+=========     Saved host backtrace up to driver entry point at cudaMalloc time
+=========     Host Frame:/lib64/libcuda.so.1 (cuMemAlloc_v2 + 0x1b7) [0x26ec97]
+=========     Host Frame:./leak [0x2bab3]
+=========     Host Frame:./leak [0x708b]
+=========     Host Frame:./leak [0x3c72f]
+=========     Host Frame:./leak [0x3113]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./leak [0x3174]
+=========
+========= LEAK SUMMARY: 10 bytes leaked in 1 allocations
+========= ERROR SUMMARY: 1 error
+")
+
+# racecheck with racecheck-report all
+file(APPEND "${LOG_FILE}"
+"========= CUDA-MEMCHECK
+========= WARN:(Warp Level Programming) Potential WAR hazard detected at __shared__ 0x3 in block (0, 0, 0) :
+=========     Read Thread (31, 0, 0) at 0x00000170 in ./race.cu:4:test(int*, int*)
+=========     Write Thread (0, 0, 0) at 0x000001a8 in ./race.cu:4:test(int*, int*)
+=========     Current Value : 0, Incoming Value : 0
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./race [0x101d9]
+=========     Host Frame:./race [0x10267]
+=========     Host Frame:./race [0x465b5]
+=========     Host Frame:./race [0x3342]
+=========     Host Frame:./race [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./race [0x31e2]
+=========
+========= WARN:(Warp Level Programming) Potential WAR hazard detected at __shared__ 0x2 in block (0, 0, 0) :
+=========     Read Thread (31, 0, 0) at 0x00000170 in ./race.cu:4:test(int*, int*)
+=========     Write Thread (0, 0, 0) at 0x000001a8 in ./race.cu:4:test(int*, int*)
+=========     Current Value : 0, Incoming Value : 0
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./race [0x101d9]
+=========     Host Frame:./race [0x10267]
+=========     Host Frame:./race [0x465b5]
+=========     Host Frame:./race [0x3342]
+=========     Host Frame:./race [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./race [0x31e2]
+=========
+========= WARN:(Warp Level Programming) Potential WAR hazard detected at __shared__ 0x1 in block (0, 0, 0) :
+=========     Read Thread (31, 0, 0) at 0x00000170 in ./race.cu:4:test(int*, int*)
+=========     Write Thread (0, 0, 0) at 0x000001a8 in ./race.cu:4:test(int*, int*)
+=========     Current Value : 0, Incoming Value : 0
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./race [0x101d9]
+=========     Host Frame:./race [0x10267]
+=========     Host Frame:./race [0x465b5]
+=========     Host Frame:./race [0x3342]
+=========     Host Frame:./race [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./race [0x31e2]
+=========
+========= WARN:(Warp Level Programming) Potential WAR hazard detected at __shared__ 0x0 in block (0, 0, 0) :
+=========     Read Thread (31, 0, 0) at 0x00000170 in ./race.cu:4:test(int*, int*)
+=========     Write Thread (0, 0, 0) at 0x000001a8 in ./race.cu:4:test(int*, int*)
+=========     Current Value : 0, Incoming Value : 1
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./race [0x101d9]
+=========     Host Frame:./race [0x10267]
+=========     Host Frame:./race [0x465b5]
+=========     Host Frame:./race [0x3342]
+=========     Host Frame:./race [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./race [0x31e2]
+=========
+========= WARN:(Warp Level Programming) Potential RAW hazard detected at __shared__ 0x3 in block (0, 0, 0) :
+=========     Write Thread (31, 0, 0) at 0x00000148 in ./race.cu:3:test(int*, int*)
+=========     Read Thread (0, 0, 0) at 0x00000170 in ./race.cu:4:test(int*, int*)
+=========     Current Value : 0
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./race [0x101d9]
+=========     Host Frame:./race [0x10267]
+=========     Host Frame:./race [0x465b5]
+=========     Host Frame:./race [0x3342]
+=========     Host Frame:./race [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./race [0x31e2]
+=========
+========= WARN:(Warp Level Programming) Potential RAW hazard detected at __shared__ 0x2 in block (0, 0, 0) :
+=========     Write Thread (31, 0, 0) at 0x00000148 in ./race.cu:3:test(int*, int*)
+=========     Read Thread (0, 0, 0) at 0x00000170 in ./race.cu:4:test(int*, int*)
+=========     Current Value : 0
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./race [0x101d9]
+=========     Host Frame:./race [0x10267]
+=========     Host Frame:./race [0x465b5]
+=========     Host Frame:./race [0x3342]
+=========     Host Frame:./race [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./race [0x31e2]
+=========
+========= WARN:(Warp Level Programming) Potential RAW hazard detected at __shared__ 0x1 in block (0, 0, 0) :
+=========     Write Thread (31, 0, 0) at 0x00000148 in ./race.cu:3:test(int*, int*)
+=========     Read Thread (0, 0, 0) at 0x00000170 in ./race.cu:4:test(int*, int*)
+=========     Current Value : 0
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./race [0x101d9]
+=========     Host Frame:./race [0x10267]
+=========     Host Frame:./race [0x465b5]
+=========     Host Frame:./race [0x3342]
+=========     Host Frame:./race [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./race [0x31e2]
+=========
+========= WARN:(Warp Level Programming) Potential RAW hazard detected at __shared__ 0x0 in block (0, 0, 0) :
+=========     Write Thread (31, 0, 0) at 0x00000148 in ./race.cu:3:test(int*, int*)
+=========     Read Thread (0, 0, 0) at 0x00000170 in ./race.cu:4:test(int*, int*)
+=========     Current Value : 0
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame:/lib64/libcuda.so.1 (cuLaunchKernel + 0x346) [0x297db6]
+=========     Host Frame:./race [0x101d9]
+=========     Host Frame:./race [0x10267]
+=========     Host Frame:./race [0x465b5]
+=========     Host Frame:./race [0x3342]
+=========     Host Frame:./race [0x314a]
+=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22505]
+=========     Host Frame:./race [0x31e2]
+=========
+========= WARN: Race reported between Read access at 0x00000170 in ./race.cu:4:test(int*, int*)
+=========     and Write access at 0x00000148 in ./race.cu:3:test(int*, int*) [4 hazards]
+=========     and Write access at 0x000001a8 in ./race.cu:4:test(int*, int*) [4 hazards]
+=========
+========= WARN: Race reported between Write access at 0x00000148 in ./race.cu:3:test(int*, int*)
+=========     and Write access at 0x00000148 in ./race.cu:3:test(int*, int*) [124 hazards]
+=========     and Read access at 0x00000170 in ./race.cu:4:test(int*, int*) [4 hazards]
+=========
+========= WARN: Race reported between Write access at 0x000001a8 in ./race.cu:4:test(int*, int*)
+=========     and Write access at 0x000001a8 in ./race.cu:4:test(int*, int*) [124 hazards]
+=========     and Read access at 0x00000170 in ./race.cu:4:test(int*, int*) [4 hazards]
+=========
+========= WARN: Race reported between Write access at 0x00000148 in ./race.cu:3:test(int*, int*)
+=========     and Write access at 0x00000148 in ./race.cu:3:test(int*, int*) [124 hazards]
+=========     and Read access at 0x00000170 in ./race.cu:4:test(int*, int*) [4 hazards]
+=========
+========= RACECHECK SUMMARY: 12 hazards displayed (0 errors, 12 warnings)
+")