Browse Source

CUDA: Add tests to verify CUDA compiler works properly.

Robert Maynard 9 years ago
parent
commit
7b9131da64
37 changed files with 624 additions and 0 deletions
  1. 7 0
      Tests/CMakeLists.txt
  2. 4 0
      Tests/Cuda/CMakeLists.txt
  3. 40 0
      Tests/Cuda/Complex/CMakeLists.txt
  4. 5 0
      Tests/Cuda/Complex/dynamic.cpp
  5. 29 0
      Tests/Cuda/Complex/dynamic.cu
  6. 10 0
      Tests/Cuda/Complex/file1.cu
  7. 7 0
      Tests/Cuda/Complex/file1.h
  8. 20 0
      Tests/Cuda/Complex/file2.cu
  9. 10 0
      Tests/Cuda/Complex/file2.h
  10. 25 0
      Tests/Cuda/Complex/file3.cu
  11. 14 0
      Tests/Cuda/Complex/main.cpp
  12. 14 0
      Tests/Cuda/Complex/mixed.cpp
  13. 25 0
      Tests/Cuda/Complex/mixed.cu
  14. 17 0
      Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt
  15. 18 0
      Tests/Cuda/ConsumeCompileFeatures/main.cu
  16. 10 0
      Tests/Cuda/ConsumeCompileFeatures/static.cpp
  17. 9 0
      Tests/Cuda/ConsumeCompileFeatures/static.cu
  18. 12 0
      Tests/Cuda/ObjectLibrary/CMakeLists.txt
  19. 20 0
      Tests/Cuda/ObjectLibrary/main.cpp
  20. 6 0
      Tests/Cuda/ObjectLibrary/static.cpp
  21. 21 0
      Tests/Cuda/ObjectLibrary/static.cu
  22. 4 0
      Tests/CudaOnly/CMakeLists.txt
  23. 15 0
      Tests/CudaOnly/EnableStandard/CMakeLists.txt
  24. 17 0
      Tests/CudaOnly/EnableStandard/main.cu
  25. 9 0
      Tests/CudaOnly/EnableStandard/shared.cu
  26. 9 0
      Tests/CudaOnly/EnableStandard/static.cu
  27. 33 0
      Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
  28. 10 0
      Tests/CudaOnly/SeparateCompilation/file1.cu
  29. 7 0
      Tests/CudaOnly/SeparateCompilation/file1.h
  30. 20 0
      Tests/CudaOnly/SeparateCompilation/file2.cu
  31. 10 0
      Tests/CudaOnly/SeparateCompilation/file2.h
  32. 25 0
      Tests/CudaOnly/SeparateCompilation/file3.cu
  33. 25 0
      Tests/CudaOnly/SeparateCompilation/file4.cu
  34. 25 0
      Tests/CudaOnly/SeparateCompilation/file5.cu
  35. 15 0
      Tests/CudaOnly/SeparateCompilation/main.cu
  36. 31 0
      Tests/CudaOnly/WithDefs/CMakeLists.txt
  37. 46 0
      Tests/CudaOnly/WithDefs/main.notcu

+ 7 - 0
Tests/CMakeLists.txt

@@ -324,6 +324,8 @@ if(BUILD_TESTING)
       ADD_TEST_MACRO(VSGNUFortran ${CMAKE_COMMAND} -P runtest.cmake)
     endif()
   endif()
+
+
   ADD_TEST_MACRO(COnly COnly)
   ADD_TEST_MACRO(CxxOnly CxxOnly)
   ADD_TEST_MACRO(CxxSubdirC CxxSubdirC)
@@ -1353,6 +1355,11 @@ ${CMake_BINARY_DIR}/bin/cmake -DDIR=dev -P ${CMake_SOURCE_DIR}/Utilities/Release
     endif()
   endif()
 
+  if(CMake_TEST_CUDA)
+    add_subdirectory(Cuda)
+    add_subdirectory(CudaOnly)
+  endif()
+
   if(CMake_TEST_FindBoost)
     add_subdirectory(FindBoost)
   endif()

+ 4 - 0
Tests/Cuda/CMakeLists.txt

@@ -0,0 +1,4 @@
+
+ADD_TEST_MACRO(Cuda.Complex CudaComplex)
+ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
+ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)

+ 40 - 0
Tests/Cuda/Complex/CMakeLists.txt

@@ -0,0 +1,40 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaComplex CXX CUDA)
+#Goal for this example:
+
+#build a cpp dynamic library base
+#build a cuda static library base that uses separable compilation
+
+#build a cuda dynamic library that uses the first dynamic library
+#build a mixed cpp & cuda dynamic library uses all 3 previous libraries
+
+#lastly build a cpp executable that uses this last cuda dynamic library
+
+#this tests that we can properly handle linking cuda and cpp together
+#and also bulding cpp targets that need cuda implicit libraries
+
+#verify that we can pass explicit cuda arch flags
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+set(CMAKE_CUDA_STANDARD 11)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+
+add_library(CudaComplexCppBase SHARED dynamic.cpp)
+add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu)
+set_target_properties(CudaComplexSeperableLib
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+set_target_properties( CudaComplexSeperableLib
+                       PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+add_library(CudaComplexSharedLib SHARED dynamic.cu)
+target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase)
+
+add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu)
+target_link_libraries(CudaComplexMixedLib
+                      PUBLIC CudaComplexSharedLib
+                      PRIVATE CudaComplexSeperableLib)
+
+add_executable(CudaComplex main.cpp)
+target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib)

+ 5 - 0
Tests/Cuda/Complex/dynamic.cpp

@@ -0,0 +1,5 @@
+
+int dynamic_base_func(int x)
+{
+  return x * x;
+}

+ 29 - 0
Tests/Cuda/Complex/dynamic.cu

@@ -0,0 +1,29 @@
+
+#include <string>
+#include <cuda.h>
+
+int dynamic_base_func(int);
+
+int __host__ cuda_dynamic_host_func(int x)
+{
+  return dynamic_base_func(x);
+}
+
+static
+__global__
+void DetermineIfValidCudaDevice()
+{
+}
+
+void cuda_dynamic_lib_func(std::string& contents )
+{
+  DetermineIfValidCudaDevice <<<1,1>>> ();
+  if(cudaSuccess == cudaGetLastError())
+    {
+    contents = "ran a cuda kernel";
+    }
+  else
+    {
+    contents = "cant run a cuda kernel";
+    }
+}

+ 10 - 0
Tests/Cuda/Complex/file1.cu

@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  result_type r;
+  r.input = x;
+  r.sum = x*x;
+  return r;
+}

+ 7 - 0
Tests/Cuda/Complex/file1.h

@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};

+ 20 - 0
Tests/Cuda/Complex/file2.cu

@@ -0,0 +1,20 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+  if(x!=42)
+    {
+    const result_type r = file1_func(x);
+    const result_type_dynamic rd { r.input, r.sum, true };
+    return rd;
+    }
+  else
+    {
+    const result_type_dynamic rd { x, x*x*x, false };
+    return rd;
+    }
+
+}

+ 10 - 0
Tests/Cuda/Complex/file2.h

@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+  int input;
+  int sum;
+  bool from_static;
+};

+ 25 - 0
Tests/Cuda/Complex/file3.cu

@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void file3_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file3_launch_kernel(int x)
+{
+  result_type r;
+  file3_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}

+ 14 - 0
Tests/Cuda/Complex/main.cpp

@@ -0,0 +1,14 @@
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type call_cuda_seperable_code(int x);
+result_type mixed_launch_kernel(int x);
+
+int main(int argc, char** argv)
+{
+  call_cuda_seperable_code(42);
+  mixed_launch_kernel(42);
+  return 0;
+}

+ 14 - 0
Tests/Cuda/Complex/mixed.cpp

@@ -0,0 +1,14 @@
+
+int dynamic_base_func(int);
+int cuda_dynamic_host_func(int);
+int file3_launch_kernel(int);
+
+int dynamic_final_func(int x)
+{
+  return cuda_dynamic_host_func(dynamic_base_func(x));
+}
+
+int call_cuda_seperable_code(int x)
+{
+  return file3_launch_kernel(x);
+}

+ 25 - 0
Tests/Cuda/Complex/mixed.cu

@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void mixed_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int mixed_launch_kernel(int x)
+{
+  result_type r;
+  mixed_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}

+ 17 - 0
Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt

@@ -0,0 +1,17 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaConsumeCompileFeatures CXX CUDA)
+#Goal for this example:
+
+#build a c++11 library that express a c++11 public compile feature
+#link a cuda library and verify it builds with c++11 enabled
+
+#build a standalone c++/cuda mixed executable where we express a c++11
+#compile feature.
+
+
+add_library(CudaConsumeLib STATIC static.cpp static.cu)
+target_compile_features(CudaConsumeLib PUBLIC cxx_constexpr)
+
+add_executable(CudaConsumeCompileFeatures main.cu)
+target_link_libraries(CudaConsumeCompileFeatures PRIVATE CudaConsumeLib)

+ 18 - 0
Tests/Cuda/ConsumeCompileFeatures/main.cu

@@ -0,0 +1,18 @@
+
+#include <iostream>
+
+int static_cxx11_func(int);
+
+void test_functions()
+{
+  auto x = static_cxx11_func( int(42) );
+  std::cout << x << std::endl;
+}
+
+int main(int argc, char **argv)
+{
+  test_functions();
+  std::cout << "this executable doesn't use cuda code, just call methods defined" << std::endl;
+  std::cout << "in libraries that have cuda code" << std::endl;
+  return 0;
+}

+ 10 - 0
Tests/Cuda/ConsumeCompileFeatures/static.cpp

@@ -0,0 +1,10 @@
+
+
+#include <type_traits>
+
+int static_cuda11_func(int);
+
+int static_cxx11_func(int x)
+{
+  return static_cuda11_func(x) + std::integral_constant<int, 32>::value;
+}

+ 9 - 0
Tests/Cuda/ConsumeCompileFeatures/static.cu

@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ static_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}

+ 12 - 0
Tests/Cuda/ObjectLibrary/CMakeLists.txt

@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.7)
+project (CudaObjectLibrary CUDA CXX)
+#Goal for this example:
+
+#build a object files some with cuda and some without than
+#embed these into an executable
+
+add_library(CudaMixedObjectLib OBJECT static.cu static.cpp)
+
+add_executable(CudaObjectLibrary
+               main.cpp
+               $<TARGET_OBJECTS:CudaMixedObjectLib>)

+ 20 - 0
Tests/Cuda/ObjectLibrary/main.cpp

@@ -0,0 +1,20 @@
+
+#include <iostream>
+
+int static_func(int);
+int file1_sq_func(int);
+
+void test_functions()
+{
+  file1_sq_func(static_func(42));
+}
+
+int main(int argc, char** argv)
+{
+  test_functions();
+  std::cout
+    << "this executable doesn't use cuda code, just call methods defined"
+    << std::endl;
+  std::cout << "in object files that have cuda code" << std::endl;
+  return 0;
+}

+ 6 - 0
Tests/Cuda/ObjectLibrary/static.cpp

@@ -0,0 +1,6 @@
+int file1_sq_func(int);
+
+int static_func(int x)
+{
+  return file1_sq_func(x);
+}

+ 21 - 0
Tests/Cuda/ObjectLibrary/static.cu

@@ -0,0 +1,21 @@
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+int __host__ file1_sq_func(int x)
+{
+  cudaError_t err;
+  int nDevices = 0;
+  err = cudaGetDeviceCount(&nDevices);
+  if(err != cudaSuccess)
+  {
+    std::cout << "nDevices: " << nDevices << std::endl;
+    std::cout << "err: " << err << std::endl;
+    return 1;
+  }
+  std::cout << "this library uses cuda code" << std::endl;
+  std::cout << "you have " << nDevices << " devices that support cuda" << std::endl;
+
+  return x * x;
+}

+ 4 - 0
Tests/CudaOnly/CMakeLists.txt

@@ -0,0 +1,4 @@
+
+ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
+ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)

+ 15 - 0
Tests/CudaOnly/EnableStandard/CMakeLists.txt

@@ -0,0 +1,15 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyEnableStandard CUDA)
+
+#Goal for this example:
+#build cuda sources that require C++11 to be enabled.
+
+add_library(CUDAStatic11 STATIC static.cu)
+add_library(CUDADynamic11 SHARED shared.cu)
+
+add_executable(CudaOnlyEnableStandard main.cu)
+target_link_libraries(CudaOnlyEnableStandard PRIVATE CUDAStatic11 CUDADynamic11)
+
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD 11)
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD_REQUIRED TRUE)

+ 17 - 0
Tests/CudaOnly/EnableStandard/main.cu

@@ -0,0 +1,17 @@
+
+#include <iostream>
+
+int static_cuda11_func(int);
+int shared_cuda11_func(int);
+
+void test_functions()
+{
+  static_cuda11_func( int(42) );
+  shared_cuda11_func( int(42) );
+}
+
+int main(int argc, char **argv)
+{
+  test_functions();
+  return 0;
+}

+ 9 - 0
Tests/CudaOnly/EnableStandard/shared.cu

@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ shared_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}

+ 9 - 0
Tests/CudaOnly/EnableStandard/static.cu

@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ static_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}

+ 33 - 0
Tests/CudaOnly/SeparateCompilation/CMakeLists.txt

@@ -0,0 +1,33 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlySeparateCompilation CUDA)
+
+#Goal for this example:
+#Build a static library that defines multiple methods and kernels that
+#use each other.
+#After that confirm that we can call those methods from dynamic libraries
+#and executables.
+#We complicate the matter by also testing that multiple static libraries
+#all containing cuda separable compilation code links properly
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+add_library(CUDASerarateLibA STATIC file1.cu file2.cu file3.cu)
+
+#Having file4/file5 in a shared library causes serious problems
+#with the nvcc linker and it will generate bad entries that will
+#cause a segv when trying to run the executable
+#
+add_library(CUDASerarateLibB STATIC file4.cu file5.cu)
+target_link_libraries(CUDASerarateLibB PRIVATE CUDASerarateLibA)
+
+add_executable(CudaOnlySeparateCompilation main.cu)
+target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASerarateLibB)
+
+set_target_properties( CUDASerarateLibA
+                       CUDASerarateLibB
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+set_target_properties( CUDASerarateLibA
+                       CUDASerarateLibB
+                       PROPERTIES POSITION_INDEPENDENT_CODE ON)

+ 10 - 0
Tests/CudaOnly/SeparateCompilation/file1.cu

@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  result_type r;
+  r.input = x;
+  r.sum = x*x;
+  return r;
+}

+ 7 - 0
Tests/CudaOnly/SeparateCompilation/file1.h

@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};

+ 20 - 0
Tests/CudaOnly/SeparateCompilation/file2.cu

@@ -0,0 +1,20 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+  if(x!=42)
+    {
+    const result_type r = file1_func(x);
+    const result_type_dynamic rd { r.input, r.sum, true };
+    return rd;
+    }
+  else
+    {
+    const result_type_dynamic rd { x, x*x*x, false };
+    return rd;
+    }
+
+}

+ 10 - 0
Tests/CudaOnly/SeparateCompilation/file2.h

@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+  int input;
+  int sum;
+  bool from_static;
+};

+ 25 - 0
Tests/CudaOnly/SeparateCompilation/file3.cu

@@ -0,0 +1,25 @@
+
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+
+static
+__global__
+void file3_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+result_type file3_launch_kernel(int x)
+{
+  result_type r;
+  file3_kernel <<<1,1>>> (r,x);
+  return r;
+}

+ 25 - 0
Tests/CudaOnly/SeparateCompilation/file4.cu

@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void file4_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file4_launch_kernel(int x)
+{
+  result_type r;
+  file4_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}

+ 25 - 0
Tests/CudaOnly/SeparateCompilation/file5.cu

@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void file5_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file5_launch_kernel(int x)
+{
+  result_type r;
+  file5_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}

+ 15 - 0
Tests/CudaOnly/SeparateCompilation/main.cu

@@ -0,0 +1,15 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+// result_type file4_launch_kernel(int x);
+// result_type file5_launch_kernel(int x);
+
+int main(int argc, char **argv)
+{
+  // file4_launch_kernel(42);
+  // file5_launch_kernel(42);
+  return 0;
+}

+ 31 - 0
Tests/CudaOnly/WithDefs/CMakeLists.txt

@@ -0,0 +1,31 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyWithDefs CUDA)
+
+#verify that we can pass explicit cuda arch flags
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+set(debug_compile_flags --generate-code arch=compute_20,code=sm_20 -Xcompiler=-Werror)
+set(release_compile_defs DEFREL)
+
+#Goal for this example:
+#build a executable that needs to be passed a complex define through add_defintions
+#this verifies we can pass things such as '_','(' to nvcc
+add_definitions("-DPACKED_DEFINE=__attribute__((packed))")
+set_source_files_properties(main.notcu PROPERTIES LANGUAGE CUDA)
+add_executable(CudaOnlyWithDefs main.notcu)
+
+target_compile_options(CudaOnlyWithDefs
+  PRIVATE
+    $<$<CONFIG:DEBUG>:$<BUILD_INTERFACE:${debug_compile_flags}>>
+  )
+
+target_compile_definitions(CudaOnlyWithDefs
+  PRIVATE
+    $<$<CONFIG:RELEASE>:$<BUILD_INTERFACE:${release_compile_defs}>>
+  )
+
+#we need to add an rpath for the cuda library so that everything
+#loads properly on the mac
+if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+  set_target_properties(CudaOnlyWithDefs PROPERTIES LINK_FLAGS "-Wl,-rpath,${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}")
+endif()

+ 46 - 0
Tests/CudaOnly/WithDefs/main.notcu

@@ -0,0 +1,46 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+static
+__global__
+void DetermineIfValidCudaDevice()
+{
+}
+
+struct PACKED_DEFINE result_type
+{
+  bool valid;
+  int value;
+#if defined(NDEBUG) && !defined(DEFREL)
+#error missing DEFREL flag
+#endif
+};
+
+result_type can_launch_kernel()
+{
+  result_type r;
+  DetermineIfValidCudaDevice <<<1,1>>> ();
+  r.valid = (cudaSuccess == cudaGetLastError());
+  if(r.valid)
+    {
+    r.value = 1;
+    }
+  else
+    {
+    r.value = -1;
+    }
+  return r;
+}
+
+int main(int argc, char **argv)
+{
+  cudaError_t err;
+  int nDevices = 0;
+  err = cudaGetDeviceCount(&nDevices);
+  if(err != cudaSuccess)
+    {
+      return 1;
+    }
+  return 0;
+}