Răsfoiți Sursa

Merge topic 'cuda_tests_improve_error_reporting'

166b3393 Tests/Cuda: Select a CUDA device supporting compute 3.0
cbe4d595 Tests/Cuda: Return a non-zero code if errors occurred
8731701c Tests/Cuda: Use memory allocated on the GPU in the kernels
0ae5386a Tests/Cuda: Add missing separable compilation property
ce19607f Tests/Cuda: Fix missing CUDA static library at runtime on macOS
008ed80d Tests/Cuda: Output error messages to std::cerr instead of std::cout
c0d7bb83 Tests/Cuda: Print asynchronous error messages, if any
21a125cd Tests/Cuda: Print error message if mixed_kernel failed
eebb2be8 Tests/Cuda: Add identifiers to error messages
84f3c87b Tests/Cuda: Print error message if an error occurred
Brad King 9 ani în urmă
părinte
comite
7da1cde3ae

+ 8 - 0
Tests/Cuda/Complex/CMakeLists.txt

@@ -32,9 +32,17 @@ add_library(CudaComplexSharedLib SHARED dynamic.cu)
 target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase)
 
 add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu)
+set_target_properties(CudaComplexMixedLib
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 target_link_libraries(CudaComplexMixedLib
                       PUBLIC CudaComplexSharedLib
                       PRIVATE CudaComplexSeperableLib)
 
 add_executable(CudaComplex main.cpp)
 target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib)
+
+if(APPLE)
+  # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+  # the static cuda runtime can find it at runtime.
+  target_link_libraries(CudaComplex PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()

+ 49 - 2
Tests/Cuda/Complex/dynamic.cu

@@ -22,12 +22,59 @@ void DetermineIfValidCudaDevice()
 {
 }
 
+EXPORT int choose_cuda_device()
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess)
+    {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+    }
+  for (int i = 0; i < nDevices; ++i)
+    {
+    cudaDeviceProp prop;
+    cudaError_t err = cudaGetDeviceProperties(&prop, i);
+    if (err != cudaSuccess)
+      {
+      std::cerr << "Could not retrieve properties from CUDA device " << i
+                << std::endl;
+      return 1;
+      }
+    if (prop.major >= 4)
+      {
+        err = cudaSetDevice(i);
+        if (err != cudaSuccess)
+          {
+          std::cout << "Could not select CUDA device " << i << std::endl;
+          }
+        else
+          {
+            return 0;
+          }
+      }
+    }
+
+  std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+            << std::endl;
+
+  return 1;
+}
+
 EXPORT void cuda_dynamic_lib_func()
 {
   DetermineIfValidCudaDevice <<<1,1>>> ();
   cudaError_t err = cudaGetLastError();
-  if(err == cudaSuccess)
+  if(err != cudaSuccess)
+    {
+    std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: "
+              << cudaGetErrorString(err) << std::endl;
+    }
+  err = cudaDeviceSynchronize();
+  if(err != cudaSuccess)
     {
-    std::cerr << cudaGetErrorString(err) << std::endl;
+    std::cerr << "DetermineIfValidCudaDevice [ASYNC] failed: "
+              << cudaGetErrorString(cudaGetLastError()) << std::endl;
     }
 }

+ 32 - 7
Tests/Cuda/Complex/file3.cu

@@ -9,21 +9,46 @@ result_type_dynamic __device__ file2_func(int x);
 
 static
 __global__
-void file3_kernel(result_type& r, int x)
+void file3_kernel(result_type* r, int x)
 {
-  r = file1_func(x);
+  *r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
 }
 
 int file3_launch_kernel(int x)
 {
-  result_type r;
+  result_type* r;
+  cudaError_t err = cudaMallocManaged(&r, sizeof(result_type));
+  if(err != cudaSuccess)
+    {
+    std::cerr << "file3_launch_kernel: cudaMallocManaged failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return x;
+    }
+
   file3_kernel <<<1,1>>> (r,x);
-  cudaError_t err = cudaGetLastError();
-  if(err == cudaSuccess)
+  err = cudaGetLastError();
+  if(err != cudaSuccess)
     {
-    std::cerr << cudaGetErrorString(err) << std::endl;
+    std::cerr << "file3_kernel [SYNC] failed: "
+              << cudaGetErrorString(err) << std::endl;
     return x;
     }
-  return r.sum;
+  err = cudaDeviceSynchronize();
+  if(err != cudaSuccess)
+    {
+    std::cerr << "file3_kernel [ASYNC] failed: "
+              << cudaGetErrorString(cudaGetLastError()) << std::endl;
+    return x;
+    }
+  int result = r->sum;
+  err = cudaFree(r);
+  if(err != cudaSuccess)
+    {
+    std::cerr << "file3_launch_kernel: cudaFree failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return x;
+    }
+
+  return result;
 }

+ 9 - 3
Tests/Cuda/Complex/main.cpp

@@ -9,12 +9,18 @@
 #define IMPORT
 #endif
 
+IMPORT int choose_cuda_device();
 IMPORT int call_cuda_seperable_code(int x);
 IMPORT int mixed_launch_kernel(int x);
 
 int main(int argc, char** argv)
 {
-  call_cuda_seperable_code(42);
-  mixed_launch_kernel(42);
-  return 0;
+  int ret = choose_cuda_device();
+  if (ret) {
+    return 0;
+  }
+
+  int r1 = call_cuda_seperable_code(42);
+  int r2 = mixed_launch_kernel(42);
+  return (r1 == 42 || r2 == 42) ? 1 : 0;
 }

+ 36 - 4
Tests/Cuda/Complex/mixed.cu

@@ -19,9 +19,9 @@ IMPORT void __host__ cuda_dynamic_lib_func();
 
 static
 __global__
-void mixed_kernel(result_type& r, int x)
+void mixed_kernel(result_type* r, int x)
 {
-  r = file1_func(x);
+  *r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
 }
 
@@ -29,7 +29,39 @@ EXPORT int mixed_launch_kernel(int x)
 {
   cuda_dynamic_lib_func();
 
-  result_type r;
+  result_type* r;
+  cudaError_t err = cudaMallocManaged(&r, sizeof(result_type));
+  if(err != cudaSuccess)
+    {
+    std::cerr << "mixed_launch_kernel: cudaMallocManaged failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return x;
+    }
+
   mixed_kernel <<<1,1>>> (r,x);
-  return r.sum;
+  err = cudaGetLastError();
+  if(err != cudaSuccess)
+    {
+    std::cerr << "mixed_kernel [SYNC] failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return x;
+    }
+  err = cudaDeviceSynchronize();
+  if(err != cudaSuccess)
+    {
+    std::cerr << "mixed_kernel [ASYNC] failed: "
+              << cudaGetErrorString(cudaGetLastError()) << std::endl;
+    return x;
+    }
+
+  int result = r->sum;
+  err = cudaFree(r);
+  if(err != cudaSuccess)
+    {
+    std::cerr << "mixed_launch_kernel: cudaFree failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return x;
+    }
+
+  return result;
 }

+ 5 - 3
Tests/Cuda/ObjectLibrary/main.cpp

@@ -4,14 +4,16 @@
 int static_func(int);
 int file1_sq_func(int);
 
-void test_functions()
+int test_functions()
 {
-  file1_sq_func(static_func(42));
+  return file1_sq_func(static_func(42));
 }
 
 int main(int argc, char** argv)
 {
-  test_functions();
+  if (test_functions() == 1) {
+    return 1;
+  }
   std::cout
     << "this executable doesn't use cuda code, just call methods defined"
     << std::endl;

+ 2 - 2
Tests/Cuda/ObjectLibrary/static.cu

@@ -10,8 +10,8 @@ int __host__ file1_sq_func(int x)
   err = cudaGetDeviceCount(&nDevices);
   if(err != cudaSuccess)
   {
-    std::cout << "nDevices: " << nDevices << std::endl;
-    std::cout << "err: " << err << std::endl;
+    std::cerr << "nDevices: " << nDevices << std::endl;
+    std::cerr << "err: " << err << std::endl;
     return 1;
   }
   std::cout << "this library uses cuda code" << std::endl;