فهرست منبع

Merge topic 'test-cuda-fix-result-pointers' into release-4.2

c238f174a9 Tests: Remove invalid CUDA code from tests

Acked-by: Kitware Robot <[email protected]>
Merge-request: !11607
Brad King 2 هفته پیش
والد
کامیت
cf19e65504

+ 11 - 6
Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu

@@ -1,18 +1,23 @@
 
 #include "file2.h"
 
-static __global__ void file2_kernel(result_type_dynamic& r, int x)
+static __global__ void file2_kernel(result_type_dynamic* r, int x)
 {
   // call static_func which is a method that is defined in the
   // static library that is always out of date
-  r = file2_func(x);
+  *r = file2_func(x);
 }
 
-static __global__ void file2_kernel(result_type_dynamic& r, int x);
-
 int file2_launch_kernel(int x)
 {
-  result_type_dynamic r;
+  result_type_dynamic* r;
+  cudaMallocManaged(&r, sizeof(result_type_dynamic));
+
   file2_kernel<<<1, 1>>>(r, x);
-  return r.sum;
+  cudaDeviceSynchronize();
+
+  auto sum = r->sum;
+  cudaFree(r);
+
+  return sum;
 }

+ 11 - 4
Tests/CudaOnly/RuntimeControls/file1.cu

@@ -5,14 +5,21 @@
 #  define EXPORT
 #endif
 
-void __global__ file1_kernel(int x, int& r)
+void __global__ file1_kernel(int x, int* r)
 {
-  r = -x;
+  *r = -x;
 }
 
 EXPORT int file1_launch_kernel(int x)
 {
-  int r = 0;
+  int* r;
+  cudaMallocManaged(&r, sizeof(int));
+
   file1_kernel<<<1, 1>>>(x, r);
-  return r;
+  cudaDeviceSynchronize();
+
+  auto result = *r;
+  cudaFree(r);
+
+  return result;
 }

+ 11 - 4
Tests/CudaOnly/RuntimeControls/file2.cu

@@ -5,14 +5,21 @@
 #  define EXPORT
 #endif
 
-void __global__ file2_kernel(int x, int& r)
+void __global__ file2_kernel(int x, int* r)
 {
-  r = -x;
+  *r = -x;
 }
 
 EXPORT int file2_launch_kernel(int x)
 {
-  int r = 0;
+  int* r;
+  cudaMallocManaged(&r, sizeof(int));
+
   file2_kernel<<<1, 1>>>(x, r);
-  return r;
+  cudaDeviceSynchronize();
+
+  auto result = *r;
+  cudaFree(r);
+
+  return result;
 }

+ 11 - 4
Tests/CudaOnly/SeparateCompilation/file3.cu

@@ -6,17 +6,24 @@
 result_type __device__ file1_func(int x);
 result_type_dynamic __device__ file2_func(int x);
 
-static __global__ void file3_kernel(result_type& r, int x)
+static __global__ void file3_kernel(result_type* r, int x)
 {
   // call static_func which is a method that is defined in the
   // static library that is always out of date
-  r = file1_func(x);
+  *r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
 }
 
 result_type file3_launch_kernel(int x)
 {
-  result_type r;
+  result_type* r;
+  cudaMallocManaged(&r, sizeof(result_type));
+
   file3_kernel<<<1, 1>>>(r, x);
-  return r;
+  cudaDeviceSynchronize();
+
+  auto result = *r;
+  cudaFree(r);
+
+  return result;
 }

+ 11 - 4
Tests/CudaOnly/SeparateCompilation/file4.cu

@@ -4,17 +4,24 @@
 result_type __device__ file1_func(int x);
 result_type_dynamic __device__ file2_func(int x);
 
-static __global__ void file4_kernel(result_type& r, int x)
+static __global__ void file4_kernel(result_type* r, int x)
 {
   // call static_func which is a method that is defined in the
   // static library that is always out of date
-  r = file1_func(x);
+  *r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
 }
 
 EXPORT int file4_launch_kernel(int x)
 {
-  result_type r;
+  result_type* r;
+  cudaMallocManaged(&r, sizeof(result_type));
+
   file4_kernel<<<1, 1>>>(r, x);
-  return r.sum;
+  cudaDeviceSynchronize();
+
+  auto sum = r->sum;
+  cudaFree(r);
+
+  return sum;
 }

+ 11 - 4
Tests/CudaOnly/SeparateCompilation/file5.cu

@@ -4,17 +4,24 @@
 result_type __device__ file1_func(int x);
 result_type_dynamic __device__ file2_func(int x);
 
-static __global__ void file5_kernel(result_type& r, int x)
+static __global__ void file5_kernel(result_type* r, int x)
 {
   // call static_func which is a method that is defined in the
   // static library that is always out of date
-  r = file1_func(x);
+  *r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
 }
 
 EXPORT int file5_launch_kernel(int x)
 {
-  result_type r;
+  result_type* r;
+  cudaMallocManaged(&r, sizeof(result_type));
+
   file5_kernel<<<1, 1>>>(r, x);
-  return r.sum;
+  cudaDeviceSynchronize();
+
+  auto sum = r->sum;
+  cudaFree(r);
+
+  return sum;
 }