Browse Source

Merge topic 'initial_cuda_language_support'

4cc601f2 Help: Add release note for CUDA support
7b9131da CUDA: Add tests to verify CUDA compiler works properly.
9cf5b98d CUDA: Prefer environment variables CUDACXX and CUDAHOSTCXX.
a5e806b3 CUDA: Add support for CMAKE_CUDA_COMPILE_OPTIONS_VISIBILITY
d038559e CUDA: Add separable compilation support to the makefile generator.
43ce4414 CUDA: Add separable compilation support to the ninja generator.
4b316097 CUDA: Add support for the CUDA_SEPARABLE_COMPILATION target property
ae05fcc6 CUDA: Add LinkLineComputer that computes cuda dlink lines.
115269a8 CUDA: Refactor cmLinkLineComputer to allow for better derived children.
5dec4031 CUDA: Refactor CMakeCUDAInformation to prepare for separable compilation.
5b20d0ab CUDA: C++ compile features now enable cuda c++11 support.
489c52ce CUDA: Use the host compiler for linking CUDA executables and shared libs.
bbaf2434 CUDA: add support for specifying an explicit host compiler.
a92f8d96 CUDA: Enable header dependency scanning.
ec6ce623 CUDA: State that cuda has preprocessor output and can generate assembly.
4f5155f6 CUDA: We now properly perform CUDA compiler identification.
...
Brad King 9 years ago
parent
commit
4838ca14df
67 changed files with 2131 additions and 19 deletions
  1. 1 0
      Help/manual/cmake-properties.7.rst
  2. 13 0
      Help/prop_tgt/CUDA_SEPARABLE_COMPILATION.rst
  3. 6 0
      Help/release/dev/CUDA-language-support.rst
  4. 20 0
      Modules/CMakeCUDACompiler.cmake.in
  5. 16 0
      Modules/CMakeCUDACompilerABI.cu
  6. 39 0
      Modules/CMakeCUDACompilerId.cu.in
  7. 193 0
      Modules/CMakeCUDAInformation.cmake
  8. 5 0
      Modules/CMakeCompilerIdDetection.cmake
  9. 115 0
      Modules/CMakeDetermineCUDACompiler.cmake
  10. 5 1
      Modules/CMakeDetermineCompilerId.cmake
  11. 71 0
      Modules/CMakeTestCUDACompiler.cmake
  12. 24 0
      Modules/Compiler/NVIDIA-CUDA.cmake
  13. 7 0
      Modules/Compiler/NVIDIA-DetermineCompiler.cmake
  14. 2 0
      Source/CMakeLists.txt
  15. 1 0
      Source/cmComputeLinkInformation.h
  16. 3 3
      Source/cmLinkLineComputer.h
  17. 74 0
      Source/cmLinkLineDeviceComputer.cxx
  18. 36 0
      Source/cmLinkLineDeviceComputer.h
  19. 5 0
      Source/cmLocalGenerator.cxx
  20. 4 3
      Source/cmLocalUnixMakefileGenerator3.cxx
  21. 3 0
      Source/cmMakefile.cxx
  22. 227 0
      Source/cmMakefileExecutableTargetGenerator.cxx
  23. 4 0
      Source/cmMakefileExecutableTargetGenerator.h
  24. 222 0
      Source/cmMakefileLibraryTargetGenerator.cxx
  25. 6 0
      Source/cmMakefileLibraryTargetGenerator.h
  26. 18 9
      Source/cmMakefileTargetGenerator.cxx
  27. 358 0
      Source/cmNinjaNormalTargetGenerator.cxx
  28. 10 0
      Source/cmNinjaNormalTargetGenerator.h
  29. 15 3
      Source/cmNinjaTargetGenerator.cxx
  30. 4 0
      Source/cmTarget.cxx
  31. 7 0
      Tests/CMakeLists.txt
  32. 4 0
      Tests/Cuda/CMakeLists.txt
  33. 40 0
      Tests/Cuda/Complex/CMakeLists.txt
  34. 5 0
      Tests/Cuda/Complex/dynamic.cpp
  35. 29 0
      Tests/Cuda/Complex/dynamic.cu
  36. 10 0
      Tests/Cuda/Complex/file1.cu
  37. 7 0
      Tests/Cuda/Complex/file1.h
  38. 20 0
      Tests/Cuda/Complex/file2.cu
  39. 10 0
      Tests/Cuda/Complex/file2.h
  40. 25 0
      Tests/Cuda/Complex/file3.cu
  41. 14 0
      Tests/Cuda/Complex/main.cpp
  42. 14 0
      Tests/Cuda/Complex/mixed.cpp
  43. 25 0
      Tests/Cuda/Complex/mixed.cu
  44. 17 0
      Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt
  45. 18 0
      Tests/Cuda/ConsumeCompileFeatures/main.cu
  46. 10 0
      Tests/Cuda/ConsumeCompileFeatures/static.cpp
  47. 9 0
      Tests/Cuda/ConsumeCompileFeatures/static.cu
  48. 12 0
      Tests/Cuda/ObjectLibrary/CMakeLists.txt
  49. 20 0
      Tests/Cuda/ObjectLibrary/main.cpp
  50. 6 0
      Tests/Cuda/ObjectLibrary/static.cpp
  51. 21 0
      Tests/Cuda/ObjectLibrary/static.cu
  52. 4 0
      Tests/CudaOnly/CMakeLists.txt
  53. 15 0
      Tests/CudaOnly/EnableStandard/CMakeLists.txt
  54. 17 0
      Tests/CudaOnly/EnableStandard/main.cu
  55. 9 0
      Tests/CudaOnly/EnableStandard/shared.cu
  56. 9 0
      Tests/CudaOnly/EnableStandard/static.cu
  57. 33 0
      Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
  58. 10 0
      Tests/CudaOnly/SeparateCompilation/file1.cu
  59. 7 0
      Tests/CudaOnly/SeparateCompilation/file1.h
  60. 20 0
      Tests/CudaOnly/SeparateCompilation/file2.cu
  61. 10 0
      Tests/CudaOnly/SeparateCompilation/file2.h
  62. 25 0
      Tests/CudaOnly/SeparateCompilation/file3.cu
  63. 25 0
      Tests/CudaOnly/SeparateCompilation/file4.cu
  64. 25 0
      Tests/CudaOnly/SeparateCompilation/file5.cu
  65. 15 0
      Tests/CudaOnly/SeparateCompilation/main.cu
  66. 31 0
      Tests/CudaOnly/WithDefs/CMakeLists.txt
  67. 46 0
      Tests/CudaOnly/WithDefs/main.notcu

+ 1 - 0
Help/manual/cmake-properties.7.rst

@@ -143,6 +143,7 @@ Properties on Targets
    /prop_tgt/CONFIG_OUTPUT_NAME
    /prop_tgt/CONFIG_POSTFIX
    /prop_tgt/CROSSCOMPILING_EMULATOR
+   /prop_tgt/CUDA_SEPARABLE_COMPILATION
    /prop_tgt/CXX_EXTENSIONS
    /prop_tgt/CXX_STANDARD
    /prop_tgt/CXX_STANDARD_REQUIRED

+ 13 - 0
Help/prop_tgt/CUDA_SEPARABLE_COMPILATION.rst

@@ -0,0 +1,13 @@
+CUDA_SEPARABLE_COMPILATION
+--------------------------
+
+CUDA only: Enables separate compilation of device code
+
+If set this will enable separable compilation for all CUDA files for
+the given target.
+
+For instance:
+
+.. code-block:: cmake
+
+  set_property(TARGET myexe PROPERTY CUDA_SEPARABLE_COMPILATION ON)

+ 6 - 0
Help/release/dev/CUDA-language-support.rst

@@ -0,0 +1,6 @@
+CUDA-language-support
+---------------------
+
+* CMake learned to support CUDA as a first-class language.
+  It is supported by the :ref:`Makefile Generators` and the
+  :generator:`Ninja` generator.

+ 20 - 0
Modules/CMakeCUDACompiler.cmake.in

@@ -0,0 +1,20 @@
+set(CMAKE_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
+set(CMAKE_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
+set(CMAKE_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")
+set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@")
+set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@")
+set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@")
+
+set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
+set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")
+
+set(CMAKE_CUDA_COMPILER_ID_RUN 1)
+set(CMAKE_CUDA_SOURCE_FILE_EXTENSIONS cu)
+
+set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES@")
+set(CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES@")
+set(CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES@")
+
+set(CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES@")
+set(CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "@CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES@")
+set(CMAKE_CUDA_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "@CMAKE_CUDA_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES@")

+ 16 - 0
Modules/CMakeCUDACompilerABI.cu

@@ -0,0 +1,16 @@
+#ifndef __CUDACC__
+# error "A C or C++ compiler has been selected for CUDA"
+#endif
+
+#include "CMakeCompilerABI.h"
+
+int main(int argc, char* argv[])
+{
+  int require = 0;
+  require += info_sizeof_dptr[argc];
+#if defined(ABI_ID)
+  require += info_abi[argc];
+#endif
+  (void)argv;
+  return require;
+}

+ 39 - 0
Modules/CMakeCUDACompilerId.cu.in

@@ -0,0 +1,39 @@
+#ifndef __CUDACC__
+# error "A C or C++ compiler has been selected for CUDA"
+#endif
+
+@CMAKE_CUDA_COMPILER_ID_CONTENT@
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+
+@CMAKE_CUDA_COMPILER_ID_PLATFORM_CONTENT@
+@CMAKE_CUDA_COMPILER_ID_ERROR_FOR_TEST@
+
+const char* info_language_dialect_default = "INFO" ":" "dialect_default["
+#if __cplusplus >= 201402L
+  "14"
+#elif __cplusplus >= 201103L
+  "11"
+#else
+  "98"
+#endif
+"]";
+
+/*--------------------------------------------------------------------------*/
+
+int main(int argc, char* argv[])
+{
+  int require = 0;
+  require += info_compiler[argc];
+  require += info_platform[argc];
+#ifdef COMPILER_VERSION_MAJOR
+  require += info_version[argc];
+#endif
+  require += info_language_dialect_default[argc];
+  (void)argv;
+  return require;
+}

+ 193 - 0
Modules/CMakeCUDAInformation.cmake

@@ -0,0 +1,193 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+set(CMAKE_CUDA_OUTPUT_EXTENSION .o)
+set(CMAKE_INCLUDE_FLAG_CUDA "-I")
+
+# Load compiler-specific information.
+if(CMAKE_CUDA_COMPILER_ID)
+  include(Compiler/${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL)
+endif()
+
+# load the system- and compiler specific files
+if(CMAKE_CUDA_COMPILER_ID)
+  # load a hardware specific file, mostly useful for embedded compilers
+  if(CMAKE_SYSTEM_PROCESSOR)
+    include(Platform/${CMAKE_SYSTEM_NAME}-${CMAKE_CUDA_COMPILER_ID}-CUDA-${CMAKE_SYSTEM_PROCESSOR} OPTIONAL)
+  endif()
+  include(Platform/${CMAKE_SYSTEM_NAME}-${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL)
+endif()
+
+
+if(NOT CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG)
+  set(CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG})
+endif()
+
+if(NOT CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG_SEP)
+  set(CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG_SEP})
+endif()
+
+if(NOT CMAKE_SHARED_LIBRARY_RPATH_LINK_CUDA_FLAG)
+  set(CMAKE_SHARED_LIBRARY_RPATH_LINK_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_RPATH_LINK_C_FLAG})
+endif()
+
+if(NOT DEFINED CMAKE_EXE_EXPORTS_CUDA_FLAG)
+  set(CMAKE_EXE_EXPORTS_CUDA_FLAG ${CMAKE_EXE_EXPORTS_C_FLAG})
+endif()
+
+if(NOT DEFINED CMAKE_SHARED_LIBRARY_SONAME_CUDA_FLAG)
+  set(CMAKE_SHARED_LIBRARY_SONAME_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_SONAME_C_FLAG})
+endif()
+
+if(NOT CMAKE_EXECUTABLE_RUNTIME_CUDA_FLAG)
+  set(CMAKE_EXECUTABLE_RUNTIME_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG})
+endif()
+
+if(NOT CMAKE_EXECUTABLE_RUNTIME_CUDA_FLAG_SEP)
+  set(CMAKE_EXECUTABLE_RUNTIME_CUDA_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG_SEP})
+endif()
+
+if(NOT CMAKE_EXECUTABLE_RPATH_LINK_CUDA_FLAG)
+  set(CMAKE_EXECUTABLE_RPATH_LINK_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_RPATH_LINK_CUDA_FLAG})
+endif()
+
+if(NOT DEFINED CMAKE_SHARED_LIBRARY_LINK_CUDA_WITH_RUNTIME_PATH)
+  set(CMAKE_SHARED_LIBRARY_LINK_CUDA_WITH_RUNTIME_PATH ${CMAKE_SHARED_LIBRARY_LINK_C_WITH_RUNTIME_PATH})
+endif()
+
+
+# for most systems a module is the same as a shared library
+# so unless the variable CMAKE_MODULE_EXISTS is set just
+# copy the values from the LIBRARY variables
+if(NOT CMAKE_MODULE_EXISTS)
+  set(CMAKE_SHARED_MODULE_CUDA_FLAGS ${CMAKE_SHARED_LIBRARY_CUDA_FLAGS})
+  set(CMAKE_SHARED_MODULE_CREATE_CUDA_FLAGS ${CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS})
+endif()
+
+# add the flags to the cache based
+# on the initial values computed in the platform/*.cmake files
+# use _INIT variables so that this only happens the first time
+# and you can set these flags in the cmake cache
+set(CMAKE_CUDA_FLAGS_INIT "$ENV{CUDAFLAGS} ${CMAKE_CUDA_FLAGS_INIT}")
+
+foreach(c "" _DEBUG _RELEASE _MINSIZEREL _RELWITHDEBINFO)
+  string(STRIP "${CMAKE_CUDA_FLAGS${c}_INIT}" CMAKE_CUDA_FLAGS${c}_INIT)
+endforeach()
+
+set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS_INIT}" CACHE STRING
+     "Flags used by the compiler during all build types.")
+
+if(NOT CMAKE_NOT_USING_CONFIG_FLAGS)
+  set (CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG_INIT}" CACHE STRING
+     "Flags used by the compiler during debug builds.")
+  set (CMAKE_CUDA_FLAGS_MINSIZEREL "${CMAKE_CUDA_FLAGS_MINSIZEREL_INIT}" CACHE STRING
+     "Flags used by the compiler during release builds for minimum size.")
+  set (CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE_INIT}" CACHE STRING
+     "Flags used by the compiler during release builds.")
+  set (CMAKE_CUDA_FLAGS_RELWITHDEBINFO "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT}" CACHE STRING
+     "Flags used by the compiler during release builds with debug info.")
+
+endif()
+
+include(CMakeCommonLanguageInclude)
+
+# now define the following rules:
+# CMAKE_CUDA_CREATE_SHARED_LIBRARY
+# CMAKE_CUDA_CREATE_SHARED_MODULE
+# CMAKE_CUDA_COMPILE_OBJECT
+# CMAKE_CUDA_LINK_EXECUTABLE
+
+if(CMAKE_CUDA_HOST_COMPILER)
+  set(CMAKE_CUDA_HOST_FLAGS "-ccbin=<CMAKE_CUDA_HOST_COMPILER>")
+else()
+  set(CMAKE_CUDA_HOST_FLAGS "")
+endif()
+
+set(__IMPLICT_LINKS )
+foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
+  string(APPEND __IMPLICT_LINKS " -L\"${dir}\"")
+endforeach()
+foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES})
+  if(${lib} MATCHES "/")
+    string(APPEND __IMPLICT_LINKS " \"${lib}\"")
+  else()
+    string(APPEND __IMPLICT_LINKS " -l${lib}")
+  endif()
+endforeach()
+
+# create a shared library
+if(NOT CMAKE_CUDA_CREATE_SHARED_LIBRARY)
+  set(CMAKE_CUDA_CREATE_SHARED_LIBRARY
+      "<CMAKE_CUDA_HOST_LINK_LAUNCHER> <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
+endif()
+
+# create a shared module copy the shared library rule by default
+if(NOT CMAKE_CUDA_CREATE_SHARED_MODULE)
+  set(CMAKE_CUDA_CREATE_SHARED_MODULE ${CMAKE_CUDA_CREATE_SHARED_LIBRARY})
+endif()
+
+# Create a static archive incrementally for large object file counts.
+if(NOT DEFINED CMAKE_CUDA_ARCHIVE_CREATE)
+  set(CMAKE_CUDA_ARCHIVE_CREATE "<CMAKE_AR> qc <TARGET> <LINK_FLAGS> <OBJECTS>")
+endif()
+if(NOT DEFINED CMAKE_CUDA_ARCHIVE_APPEND)
+  set(CMAKE_CUDA_ARCHIVE_APPEND "<CMAKE_AR> q  <TARGET> <LINK_FLAGS> <OBJECTS>")
+endif()
+if(NOT DEFINED CMAKE_CUDA_ARCHIVE_FINISH)
+  set(CMAKE_CUDA_ARCHIVE_FINISH "<CMAKE_RANLIB> <TARGET>")
+endif()
+
+#Specify how to compile when ptx has been requested
+if(NOT CMAKE_CUDA_COMPILE_PTX_COMPILATION)
+  set(CMAKE_CUDA_COMPILE_PTX_COMPILATION
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -ptx <SOURCE> -o <OBJECT>")
+endif()
+
+#Specify how to compile when separable compilation has been requested
+if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION)
+  set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -dc <SOURCE> -o <OBJECT>")
+endif()
+
+#Specify how to compile when whole compilation has been requested
+if(NOT CMAKE_CUDA_COMPILE_WHOLE_COMPILATION)
+  set(CMAKE_CUDA_COMPILE_WHOLE_COMPILATION
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -c <SOURCE> -o <OBJECT>")
+endif()
+
+if(CMAKE_GENERATOR STREQUAL "Ninja")
+  set(CMAKE_CUDA_COMPILE_DEPENDENCY_DETECTION
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -M <SOURCE> -MT <OBJECT> -o $DEP_FILE")
+  #The Ninja generator uses the make file dependency files to determine what
+  #files need to be recompiled. Unfortunately, nvcc doesn't support building
+  #a source file and generating the dependencies of said file in a single
+  #invocation. Instead we have to state that you need to chain two commands.
+  #
+  #The makefile generators uses the custom CMake dependency scanner, and thus
+  #it is exempt from this logic.
+  list(APPEND CMAKE_CUDA_COMPILE_PTX_COMPILATION "${CMAKE_CUDA_COMPILE_DEPENDENCY_DETECTION}")
+  list(APPEND CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION "${CMAKE_CUDA_COMPILE_DEPENDENCY_DETECTION}")
+  list(APPEND CMAKE_CUDA_COMPILE_WHOLE_COMPILATION "${CMAKE_CUDA_COMPILE_DEPENDENCY_DETECTION}")
+endif()
+
+# compile a cu file into an executable
+if(NOT CMAKE_CUDA_LINK_EXECUTABLE)
+  set(CMAKE_CUDA_LINK_EXECUTABLE
+    "<CMAKE_CUDA_HOST_LINK_LAUNCHER> <CMAKE_CUDA_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
+endif()
+
+#These are used when linking relocatable (dc) cuda code
+set(CMAKE_CUDA_DEVICE_LINK_LIBRARY
+      "<CMAKE_CUDA_COMPILER> <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+set(CMAKE_CUDA_DEVICE_LINK_EXECUTABLE
+      "<CMAKE_CUDA_COMPILER> <CMAKE_CUDA_LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+
+
+mark_as_advanced(
+CMAKE_CUDA_FLAGS
+CMAKE_CUDA_FLAGS_RELEASE
+CMAKE_CUDA_FLAGS_RELWITHDEBINFO
+CMAKE_CUDA_FLAGS_MINSIZEREL
+CMAKE_CUDA_FLAGS_DEBUG)
+
+set(CMAKE_CUDA_INFORMATION_LOADED 1)

+ 5 - 0
Modules/CMakeCompilerIdDetection.cmake

@@ -90,6 +90,11 @@ function(compiler_id_detection outvar lang)
     list(APPEND ordered_compilers
       MIPSpro)
 
+    #Currently the only CUDA compilers are NVIDIA
+    if(lang STREQUAL CUDA)
+      set(ordered_compilers NVIDIA)
+    endif()
+
     if(CID_ID_DEFINE)
       foreach(Id ${ordered_compilers})
         set(CMAKE_${lang}_COMPILER_ID_CONTENT "${CMAKE_${lang}_COMPILER_ID_CONTENT}# define ${CID_PREFIX}COMPILER_IS_${Id} 0\n")

+ 115 - 0
Modules/CMakeDetermineCUDACompiler.cmake

@@ -0,0 +1,115 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake)
+include(${CMAKE_ROOT}/Modules//CMakeParseImplicitLinkInfo.cmake)
+
+if( NOT ( ("${CMAKE_GENERATOR}" MATCHES "Make") OR
+          ("${CMAKE_GENERATOR}" MATCHES "Ninja") ) )
+  message(FATAL_ERROR "CUDA language not currently supported by \"${CMAKE_GENERATOR}\" generator")
+endif()
+
+if(NOT CMAKE_CUDA_COMPILER)
+  set(CMAKE_CUDA_COMPILER_INIT NOTFOUND)
+
+    # prefer the environment variable CUDACXX
+    if(NOT $ENV{CUDACXX} STREQUAL "")
+      get_filename_component(CMAKE_CUDA_COMPILER_INIT $ENV{CUDACXX} PROGRAM PROGRAM_ARGS CMAKE_CUDA_FLAGS_ENV_INIT)
+      if(CMAKE_CUDA_FLAGS_ENV_INIT)
+        set(CMAKE_CUDA_COMPILER_ARG1 "${CMAKE_CUDA_FLAGS_ENV_INIT}" CACHE STRING "First argument to CXX compiler")
+      endif()
+      if(NOT EXISTS ${CMAKE_CUDA_COMPILER_INIT})
+        message(FATAL_ERROR "Could not find compiler set in environment variable CUDACXX:\n$ENV{CUDACXX}.\n${CMAKE_CUDA_COMPILER_INIT}")
+      endif()
+    endif()
+
+  # finally list compilers to try
+  if(NOT CMAKE_CUDA_COMPILER_INIT)
+    set(CMAKE_CUDA_COMPILER_LIST nvcc)
+  endif()
+
+  _cmake_find_compiler(CUDA)
+else()
+  _cmake_find_compiler_path(CUDA)
+endif()
+
+mark_as_advanced(CMAKE_CUDA_COMPILER)
+
+#Allow the user to specify a host compiler
+set(CMAKE_CUDA_HOST_COMPILER "" CACHE FILEPATH "Host compiler to be used by nvcc")
+if(NOT $ENV{CUDAHOSTCXX} STREQUAL "")
+  get_filename_component(CMAKE_CUDA_HOST_COMPILER $ENV{CUDAHOSTCXX} PROGRAM)
+  if(NOT EXISTS ${CMAKE_CUDA_HOST_COMPILER})
+    message(FATAL_ERROR "Could not find compiler set in environment variable CUDAHOSTCXX:\n$ENV{CUDAHOSTCXX}.\n${CMAKE_CUDA_HOST_COMPILER}")
+  endif()
+endif()
+
+# Build a small source file to identify the compiler.
+if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
+  set(CMAKE_CUDA_COMPILER_ID_RUN 1)
+
+  # Try to identify the compiler.
+  set(CMAKE_CUDA_COMPILER_ID)
+  set(CMAKE_CUDA_PLATFORM_ID)
+  file(READ ${CMAKE_ROOT}/Modules/CMakePlatformId.h.in
+    CMAKE_CUDA_COMPILER_ID_PLATFORM_CONTENT)
+
+  list(APPEND CMAKE_CUDA_COMPILER_ID_MATCH_VENDORS NVIDIA)
+  set(CMAKE_CUDA_COMPILER_ID_MATCH_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \(R\) Cuda compiler driver")
+
+  set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCUDA/(\\./)?(CompilerIdCUDA.xctest/)?CompilerIdCUDA[ \t\n\\\"]")
+  set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_INDEX 2)
+
+  set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
+  if(CMAKE_CUDA_HOST_COMPILER)
+      list(APPEND CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-ccbin=${CMAKE_CUDA_HOST_COMPILER}")
+  endif()
+
+  include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake)
+  CMAKE_DETERMINE_COMPILER_ID(CUDA CUDAFLAGS CMakeCUDACompilerId.cu)
+endif()
+
+include(CMakeFindBinUtils)
+
+#if this compiler vendor is matches NVIDIA we can determine
+#what the host compiler is. This only needs to be done if the CMAKE_CUDA_HOST_COMPILER
+#has NOT been explicitly set
+#
+#Find the line from compiler ID that contains a.out ( or last line )
+#We also need to find the implicit link lines. Which can be done by replacing
+#the compiler with cuda-fake-ld  and pass too CMAKE_PARSE_IMPLICIT_LINK_INFO
+if(CMAKE_CUDA_COMPILER_ID STREQUAL NVIDIA)
+  #grab the last line of the output which holds the link line
+  string(REPLACE "#\$ " ";" nvcc_output "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+  list(GET nvcc_output -1 nvcc_output)
+
+  #extract the compiler that is being used for linking
+  string(REPLACE " " ";" nvcc_output_to_find_launcher "${nvcc_output}")
+  list(GET nvcc_output_to_find_launcher 0 CMAKE_CUDA_HOST_LINK_LAUNCHER)
+  #we need to remove the quotes that nvcc adds around the directory section
+  #of the path
+  string(REPLACE "\"" "" CMAKE_CUDA_HOST_LINK_LAUNCHER "${CMAKE_CUDA_HOST_LINK_LAUNCHER}")
+
+  #prefix the line with cuda-fake-ld so that implicit link info believes it is
+  #a link line
+  set(nvcc_output "cuda-fake-ld ${nvcc_output}")
+  CMAKE_PARSE_IMPLICIT_LINK_INFO("${nvcc_output}"
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES
+                                 log
+                                 "${CMAKE_CUDA_IMPLICIT_OBJECT_REGEX}")
+
+  file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
+          "Parsed CUDA nvcc implicit link information from above output:\n${log}\n\n")
+
+endif()
+
+# configure all variables set in this file
+configure_file(${CMAKE_ROOT}/Modules/CMakeCUDACompiler.cmake.in
+  ${CMAKE_PLATFORM_INFO_DIR}/CMakeCUDACompiler.cmake
+  @ONLY
+  )
+
+set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
+set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")

+ 5 - 1
Modules/CMakeDetermineCompilerId.cmake

@@ -101,6 +101,8 @@ function(CMAKE_DETERMINE_COMPILER_ID lang flagvar src)
   set(CMAKE_${lang}_SIMULATE_ID "${CMAKE_${lang}_SIMULATE_ID}" PARENT_SCOPE)
   set(CMAKE_${lang}_SIMULATE_VERSION "${CMAKE_${lang}_SIMULATE_VERSION}" PARENT_SCOPE)
   set(CMAKE_${lang}_STANDARD_COMPUTED_DEFAULT "${CMAKE_${lang}_STANDARD_COMPUTED_DEFAULT}" PARENT_SCOPE)
+  set(CMAKE_${lang}_COMPILER_PRODUCED_OUTPUT "${COMPILER_${lang}_PRODUCED_OUTPUT}" PARENT_SCOPE)
+  set(CMAKE_${lang}_COMPILER_PRODUCED_FILES "${COMPILER_${lang}_PRODUCED_FILES}" PARENT_SCOPE)
 endfunction()
 
 include(CMakeCompilerIdDetection)
@@ -135,7 +137,7 @@ function(CMAKE_DETERMINE_COMPILER_ID_BUILD lang testflags src)
   set(COMPILER_DESCRIPTION
     "Compiler: ${CMAKE_${lang}_COMPILER} ${CMAKE_${lang}_COMPILER_ID_ARG1}
 Build flags: ${CMAKE_${lang}_COMPILER_ID_FLAGS_LIST}
-Id flags: ${testflags}
+Id flags: ${testflags} ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS}
 ")
 
   # Compile the compiler identification source.
@@ -322,6 +324,7 @@ Id flags: ${testflags}
               ${CMAKE_${lang}_COMPILER_ID_ARG1}
               ${CMAKE_${lang}_COMPILER_ID_FLAGS_LIST}
               ${testflags}
+              ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS}
               "${src}"
       WORKING_DIRECTORY ${CMAKE_${lang}_COMPILER_ID_DIR}
       OUTPUT_VARIABLE CMAKE_${lang}_COMPILER_ID_OUTPUT
@@ -400,6 +403,7 @@ ${CMAKE_${lang}_COMPILER_ID_OUTPUT}
   # Return the files produced by the compilation.
   set(COMPILER_${lang}_PRODUCED_FILES "${COMPILER_${lang}_PRODUCED_FILES}" PARENT_SCOPE)
   set(COMPILER_${lang}_PRODUCED_OUTPUT "${COMPILER_${lang}_PRODUCED_OUTPUT}" PARENT_SCOPE)
+
 endfunction()
 
 #-----------------------------------------------------------------------------

+ 71 - 0
Modules/CMakeTestCUDACompiler.cmake

@@ -0,0 +1,71 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+if(CMAKE_CUDA_COMPILER_FORCED)
+  # The compiler configuration was forced by the user.
+  # Assume the user has configured all compiler information.
+  set(CMAKE_CUDA_COMPILER_WORKS TRUE)
+  return()
+endif()
+
+include(CMakeTestCompilerCommon)
+
+# Remove any cached result from an older CMake version.
+# We now store this in CMakeCUDACompiler.cmake.
+unset(CMAKE_CUDA_COMPILER_WORKS CACHE)
+
+# This file is used by EnableLanguage in cmGlobalGenerator to
+# determine that that selected cuda compiler can actually compile
+# and link the most basic of programs.   If not, a fatal error
+# is set and cmake stops processing commands and will not generate
+# any makefiles or projects.
+if(NOT CMAKE_CUDA_COMPILER_WORKS)
+  PrintTestCompilerStatus("CUDA" "")
+  file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/main.cu
+    "#ifndef __CUDACC__\n"
+    "# error \"The CMAKE_CUDA_COMPILER is set to an invalid CUDA compiler\"\n"
+    "#endif\n"
+    "int main(){return 0;}\n")
+
+  try_compile(CMAKE_CUDA_COMPILER_WORKS ${CMAKE_BINARY_DIR}
+    ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/main.cu
+    OUTPUT_VARIABLE __CMAKE_CUDA_COMPILER_OUTPUT)
+
+  # Move result from cache to normal variable.
+  set(CMAKE_CUDA_COMPILER_WORKS ${CMAKE_CUDA_COMPILER_WORKS})
+  unset(CMAKE_CUDA_COMPILER_WORKS CACHE)
+  set(CUDA_TEST_WAS_RUN 1)
+endif()
+
+if(NOT CMAKE_CUDA_COMPILER_WORKS)
+  PrintTestCompilerStatus("CUDA" " -- broken")
+  file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
+    "Determining if the CUDA compiler works failed with "
+    "the following output:\n${__CMAKE_CUDA_COMPILER_OUTPUT}\n\n")
+  message(FATAL_ERROR "The CUDA compiler \"${CMAKE_CUDA_COMPILER}\" "
+    "is not able to compile a simple test program.\nIt fails "
+    "with the following output:\n ${__CMAKE_CUDA_COMPILER_OUTPUT}\n\n"
+    "CMake will not be able to correctly generate this project.")
+else()
+  if(CUDA_TEST_WAS_RUN)
+    PrintTestCompilerStatus("CUDA" " -- works")
+    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
+      "Determining if the CUDA compiler works passed with "
+      "the following output:\n${__CMAKE_CUDA_COMPILER_OUTPUT}\n\n")
+  endif()
+
+  # Try to identify the ABI and configure it into CMakeCUDACompiler.cmake
+  include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerABI.cmake)
+  CMAKE_DETERMINE_COMPILER_ABI(CUDA ${CMAKE_ROOT}/Modules/CMakeCUDACompilerABI.cu)
+
+  # Re-configure to save learned information.
+  configure_file(
+    ${CMAKE_ROOT}/Modules/CMakeCUDACompiler.cmake.in
+    ${CMAKE_PLATFORM_INFO_DIR}/CMakeCUDACompiler.cmake
+    @ONLY
+    )
+  include(${CMAKE_PLATFORM_INFO_DIR}/CMakeCUDACompiler.cmake)
+endif()
+
+
+unset(__CMAKE_CUDA_COMPILER_OUTPUT)

+ 24 - 0
Modules/Compiler/NVIDIA-CUDA.cmake

@@ -0,0 +1,24 @@
+set(CMAKE_CUDA_VERBOSE_FLAG "-v")
+
+
+set(CMAKE_CUDA_COMPILE_OPTIONS_PIE -Xcompiler=-fPIE)
+set(CMAKE_CUDA_COMPILE_OPTIONS_PIC -Xcompiler=-fPIC)
+#CMAKE_SHARED_LIBRARY_CUDA_FLAGS is sent to the host linker so we don' need
+#to forward it through nvcc
+set(CMAKE_SHARED_LIBRARY_CUDA_FLAGS -fPIC)
+set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
+set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
+set(CMAKE_CUDA_COMPILE_OPTIONS_VISIBILITY -Xcompiler=-fvisibility=)
+
+set(CMAKE_CUDA_FLAGS_INIT " ")
+set(CMAKE_CUDA_FLAGS_DEBUG_INIT " -g")
+set(CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -Os -DNDEBUG")
+set(CMAKE_CUDA_FLAGS_RELEASE_INIT " -O3 -DNDEBUG")
+set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -O2 -g -DNDEBUG")
+
+set(CMAKE_CUDA98_STANDARD_COMPILE_OPTION "")
+set(CMAKE_CUDA98_EXTENSION_COMPILE_OPTION "")
+set(CMAKE_CUDA11_STANDARD_COMPILE_OPTION "-std=c++11")
+set(CMAKE_CUDA11_EXTENSION_COMPILE_OPTION "-std=c++11")
+
+set(CMAKE_CUDA_STANDARD_DEFAULT 98)

+ 7 - 0
Modules/Compiler/NVIDIA-DetermineCompiler.cmake

@@ -0,0 +1,7 @@
+
+set(_compiler_id_pp_test "defined(__NVCC__)")
+
+set(_compiler_id_version_compute "
+# define @PREFIX@COMPILER_VERSION_MAJOR @MACRO_DEC@(__CUDACC_VER_MAJOR__)
+# define @PREFIX@COMPILER_VERSION_MINOR @MACRO_DEC@(__CUDACC_VER_MINOR__)
+# define @PREFIX@COMPILER_VERSION_PATCH @MACRO_DEC@(__CUDACC_VER_BUILD__)")

+ 2 - 0
Source/CMakeLists.txt

@@ -298,6 +298,8 @@ set(SRCS
   cmLinkItem.h
   cmLinkLineComputer.cxx
   cmLinkLineComputer.h
+  cmLinkLineDeviceComputer.cxx
+  cmLinkLineDeviceComputer.h
   cmListFileCache.cxx
   cmListFileCache.h
   cmListFileLexer.c

+ 1 - 0
Source/cmComputeLinkInformation.h

@@ -70,6 +70,7 @@ public:
   std::string const& GetRPathLinkFlag() const { return this->RPathLinkFlag; }
   std::string GetRPathLinkString();
 
+  std::string GetConfig() const { return this->Config; }
 private:
   void AddItem(std::string const& item, const cmGeneratorTarget* tgt);
   void AddSharedDepItem(std::string const& item, cmGeneratorTarget const* tgt);

+ 3 - 3
Source/cmLinkLineComputer.h

@@ -33,10 +33,10 @@ public:
   std::string ComputeFrameworkPath(cmComputeLinkInformation& cli,
                                    std::string const& fwSearchFlag);
 
-  std::string ComputeLinkLibraries(cmComputeLinkInformation& cli,
-                                   std::string const& stdLibString);
+  virtual std::string ComputeLinkLibraries(cmComputeLinkInformation& cli,
+                                           std::string const& stdLibString);
 
-private:
+protected:
   std::string ComputeLinkLibs(cmComputeLinkInformation& cli);
   std::string ComputeRPath(cmComputeLinkInformation& cli);
 

+ 74 - 0
Source/cmLinkLineDeviceComputer.cxx

@@ -0,0 +1,74 @@
+/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+   file Copyright.txt or https://cmake.org/licensing for details.  */
+
+#include "cmLinkLineDeviceComputer.h"
+#include "cmComputeLinkInformation.h"
+#include "cmGeneratorTarget.h"
+#include "cmGlobalNinjaGenerator.h"
+#include "cmOutputConverter.h"
+
+cmLinkLineDeviceComputer::cmLinkLineDeviceComputer(
+  cmOutputConverter* outputConverter, cmStateDirectory stateDir)
+  : cmLinkLineComputer(outputConverter, stateDir)
+{
+}
+
+cmLinkLineDeviceComputer::~cmLinkLineDeviceComputer()
+{
+}
+
+std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
+  cmComputeLinkInformation& cli, std::string const& stdLibString)
+{
+  // Write the library flags to the build rule.
+  std::ostringstream fout;
+  typedef cmComputeLinkInformation::ItemVector ItemVector;
+  ItemVector const& items = cli.GetItems();
+  std::string config = cli.GetConfig();
+  for (ItemVector::const_iterator li = items.begin(); li != items.end();
+       ++li) {
+    if (!li->Target) {
+      continue;
+    }
+
+    if (li->Target->GetType() == cmStateEnums::INTERFACE_LIBRARY ||
+        li->Target->GetType() == cmStateEnums::SHARED_LIBRARY ||
+        li->Target->GetType() == cmStateEnums::MODULE_LIBRARY) {
+      continue;
+    }
+
+    std::set<std::string> langs;
+    li->Target->GetLanguages(langs, config);
+    if (langs.count("CUDA") == 0) {
+      continue;
+    }
+
+    if (li->IsPath) {
+      fout << this->ConvertToOutputFormat(
+        this->ConvertToLinkReference(li->Value));
+    } else {
+      fout << li->Value;
+    }
+    fout << " ";
+  }
+
+  if (!stdLibString.empty()) {
+    fout << stdLibString << " ";
+  }
+
+  return fout.str();
+}
+
+cmNinjaLinkLineDeviceComputer::cmNinjaLinkLineDeviceComputer(
+  cmOutputConverter* outputConverter, cmStateDirectory stateDir,
+  cmGlobalNinjaGenerator const* gg)
+  : cmLinkLineDeviceComputer(outputConverter, stateDir)
+  , GG(gg)
+{
+}
+
+std::string cmNinjaLinkLineDeviceComputer::ConvertToLinkReference(
+  std::string const& lib) const
+{
+  return GG->ConvertToNinjaPath(lib);
+}

+ 36 - 0
Source/cmLinkLineDeviceComputer.h

@@ -0,0 +1,36 @@
+/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+   file Copyright.txt or https://cmake.org/licensing for details.  */
+
+#ifndef cmLinkLineDeviceComputer_h
+#define cmLinkLineDeviceComputer_h
+
+#include "cmLinkLineComputer.h"
+class cmGlobalNinjaGenerator;
+
+class cmLinkLineDeviceComputer : public cmLinkLineComputer
+{
+public:
+  cmLinkLineDeviceComputer(cmOutputConverter* outputConverter,
+                           cmStateDirectory stateDir);
+  ~cmLinkLineDeviceComputer() CM_OVERRIDE;
+
+  std::string ComputeLinkLibraries(cmComputeLinkInformation& cli,
+                                   std::string const& stdLibString)
+    CM_OVERRIDE;
+};
+
+class cmNinjaLinkLineDeviceComputer : public cmLinkLineDeviceComputer
+{
+public:
+  cmNinjaLinkLineDeviceComputer(cmOutputConverter* outputConverter,
+                                cmStateDirectory stateDir,
+                                cmGlobalNinjaGenerator const* gg);
+
+  std::string ConvertToLinkReference(std::string const& input) const
+    CM_OVERRIDE;
+
+private:
+  cmGlobalNinjaGenerator const* GG;
+};
+
+#endif

+ 5 - 0
Source/cmLocalGenerator.cxx

@@ -63,6 +63,8 @@ static const char* ruleReplaceVars[] = {
   "CMAKE_CURRENT_BINARY_DIR",
   "CMAKE_RANLIB",
   "CMAKE_LINKER",
+  "CMAKE_CUDA_HOST_COMPILER",
+  "CMAKE_CUDA_HOST_LINK_LAUNCHER",
   "CMAKE_CL_SHOWINCLUDES_PREFIX"
 };
 
@@ -1475,6 +1477,9 @@ void cmLocalGenerator::AddCompilerRequirementFlag(
     langStdMap["C"].push_back("11");
     langStdMap["C"].push_back("99");
     langStdMap["C"].push_back("90");
+
+    langStdMap["CUDA"].push_back("11");
+    langStdMap["CUDA"].push_back("98");
   }
 
   std::string standard(standardProp);

+ 4 - 3
Source/cmLocalUnixMakefileGenerator3.cxx

@@ -285,8 +285,8 @@ void cmLocalUnixMakefileGenerator3::WriteLocalMakefile()
     for (std::vector<LocalObjectEntry>::const_iterator ei = lo->second.begin();
          ei != lo->second.end(); ++ei) {
       if (ei->Language == "C" || ei->Language == "CXX" ||
-          ei->Language == "Fortran") {
-        // Right now, C, C++ and Fortran have both a preprocessor and the
+          ei->Language == "CUDA" || ei->Language == "Fortran") {
+        // Right now, C, C++, Fortran and CUDA have both a preprocessor and the
         // ability to generate assembly code
         lang_has_preprocessor = true;
         lang_has_assembly = true;
@@ -1458,7 +1458,8 @@ bool cmLocalUnixMakefileGenerator3::ScanDependencies(
 
     // Create the scanner for this language
     cmDepends* scanner = CM_NULLPTR;
-    if (lang == "C" || lang == "CXX" || lang == "RC" || lang == "ASM") {
+    if (lang == "C" || lang == "CXX" || lang == "RC" || lang == "ASM" ||
+        lang == "CUDA") {
       // TODO: Handle RC (resource files) dependencies correctly.
       scanner = new cmDependsC(this, targetDir, lang, &validDeps);
     }

+ 3 - 0
Source/cmMakefile.cxx

@@ -4413,10 +4413,13 @@ bool cmMakefile::AddRequiredTargetCxxFeature(cmTarget* target,
 
   if (setCxx14) {
     target->SetProperty("CXX_STANDARD", "14");
+    target->SetProperty("CUDA_STANDARD", "14");
   } else if (setCxx11) {
     target->SetProperty("CXX_STANDARD", "11");
+    target->SetProperty("CUDA_STANDARD", "11");
   } else if (setCxx98) {
     target->SetProperty("CXX_STANDARD", "98");
+    target->SetProperty("CUDA_STANDARD", "98");
   }
   return true;
 }

+ 227 - 0
Source/cmMakefileExecutableTargetGenerator.cxx

@@ -10,6 +10,7 @@
 #include "cmGeneratorTarget.h"
 #include "cmGlobalUnixMakefileGenerator3.h"
 #include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
 #include "cmLocalGenerator.h"
 #include "cmLocalUnixMakefileGenerator3.h"
 #include "cmMakefile.h"
@@ -56,6 +57,9 @@ void cmMakefileExecutableTargetGenerator::WriteRuleFiles()
   // write in rules for object files and custom commands
   this->WriteTargetBuildRules();
 
+  // write the device link rules
+  this->WriteDeviceExecutableRule(false);
+
   // write the link rules
   this->WriteExecutableRule(false);
   if (this->GeneratorTarget->NeedRelinkBeforeInstall(this->ConfigName)) {
@@ -77,6 +81,218 @@ void cmMakefileExecutableTargetGenerator::WriteRuleFiles()
   this->CloseFileStreams();
 }
 
+void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
+  bool relink)
+{
+#ifdef CMAKE_BUILD_WITH_CMAKE
+  const std::string cuda_lang("CUDA");
+  cmGeneratorTarget::LinkClosure const* closure =
+    this->GeneratorTarget->GetLinkClosure(this->ConfigName);
+
+  const bool hasCUDA =
+    (std::find(closure->Languages.begin(), closure->Languages.end(),
+               cuda_lang) != closure->Languages.end());
+  if (!hasCUDA) {
+    return;
+  }
+
+  std::vector<std::string> commands;
+
+  // Build list of dependencies.
+  std::vector<std::string> depends;
+  this->AppendLinkDepends(depends);
+
+  // Get the language to use for linking this library.
+  std::string linkLanguage = "CUDA";
+
+  // Get the name of the device object to generate.
+  std::string const targetOutputReal =
+    this->GeneratorTarget->ObjectDirectory + "cmake_device_link.o";
+  this->DeviceLinkObject = targetOutputReal;
+
+  this->NumberOfProgressActions++;
+  if (!this->NoRuleMessages) {
+    cmLocalUnixMakefileGenerator3::EchoProgress progress;
+    this->MakeEchoProgress(progress);
+    // Add the link message.
+    std::string buildEcho = "Linking ";
+    buildEcho += linkLanguage;
+    buildEcho += " device code ";
+    buildEcho += targetOutputReal;
+    this->LocalGenerator->AppendEcho(
+      commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
+  }
+
+  // Build a list of compiler flags and linker flags.
+  std::string flags;
+  std::string linkFlags;
+
+  // Add flags to create an executable.
+  // Add symbol export flags if necessary.
+  if (this->GeneratorTarget->IsExecutableWithExports()) {
+    std::string export_flag_var = "CMAKE_EXE_EXPORTS_";
+    export_flag_var += linkLanguage;
+    export_flag_var += "_FLAG";
+    this->LocalGenerator->AppendFlags(
+      linkFlags, this->Makefile->GetDefinition(export_flag_var));
+  }
+
+  this->LocalGenerator->AppendFlags(linkFlags,
+                                    this->LocalGenerator->GetLinkLibsCMP0065(
+                                      linkLanguage, *this->GeneratorTarget));
+
+  // Add language feature flags.
+  this->AddFeatureFlags(flags, linkLanguage);
+
+  this->LocalGenerator->AddArchitectureFlags(flags, this->GeneratorTarget,
+                                             linkLanguage, this->ConfigName);
+
+  // Add target-specific linker flags.
+  this->LocalGenerator->AppendFlags(
+    linkFlags, this->GeneratorTarget->GetProperty("LINK_FLAGS"));
+  std::string linkFlagsConfig = "LINK_FLAGS_";
+  linkFlagsConfig += cmSystemTools::UpperCase(this->ConfigName);
+  this->LocalGenerator->AppendFlags(
+    linkFlags, this->GeneratorTarget->GetProperty(linkFlagsConfig));
+
+  {
+    CM_AUTO_PTR<cmLinkLineComputer> linkLineComputer(
+      this->CreateLinkLineComputer(
+        this->LocalGenerator,
+        this->LocalGenerator->GetStateSnapshot().GetDirectory()));
+
+    this->AddModuleDefinitionFlag(linkLineComputer.get(), linkFlags);
+  }
+
+  // Construct a list of files associated with this executable that
+  // may need to be cleaned.
+  std::vector<std::string> exeCleanFiles;
+  exeCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
+    this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
+
+  // Determine whether a link script will be used.
+  bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
+
+  // Construct the main link rule.
+  std::vector<std::string> real_link_commands;
+  const std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_EXECUTABLE";
+  const std::string linkRule = this->GetLinkRule(linkRuleVar);
+  std::vector<std::string> commands1;
+  cmSystemTools::ExpandListArgument(linkRule, real_link_commands);
+
+  bool useResponseFileForObjects =
+    this->CheckUseResponseFileForObjects(linkLanguage);
+  bool const useResponseFileForLibs =
+    this->CheckUseResponseFileForLibraries(linkLanguage);
+
+  // Expand the rule variables.
+  {
+    bool useWatcomQuote =
+      this->Makefile->IsOn(linkRuleVar + "_USE_WATCOM_QUOTE");
+
+    // Set path conversion for link script shells.
+    this->LocalGenerator->SetLinkScriptShell(useLinkScript);
+
+    CM_AUTO_PTR<cmLinkLineComputer> linkLineComputer(
+      new cmLinkLineDeviceComputer(
+        this->LocalGenerator,
+        this->LocalGenerator->GetStateSnapshot().GetDirectory()));
+    linkLineComputer->SetForResponse(useResponseFileForLibs);
+    linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
+    linkLineComputer->SetRelink(relink);
+
+    // Collect up flags to link in needed libraries.
+    std::string linkLibs;
+    this->CreateLinkLibs(linkLineComputer.get(), linkLibs,
+                         useResponseFileForLibs, depends);
+
+    // Construct object file lists that may be needed to expand the
+    // rule.
+    std::string buildObjs;
+    this->CreateObjectLists(useLinkScript, false, useResponseFileForObjects,
+                            buildObjs, depends, useWatcomQuote);
+
+    cmRulePlaceholderExpander::RuleVariables vars;
+    std::string objectDir = this->GeneratorTarget->GetSupportDirectory();
+
+    objectDir = this->LocalGenerator->ConvertToOutputFormat(
+      this->LocalGenerator->MaybeConvertToRelativePath(
+        this->LocalGenerator->GetCurrentBinaryDirectory(), objectDir),
+      cmOutputConverter::SHELL);
+
+    cmOutputConverter::OutputFormat output = (useWatcomQuote)
+      ? cmOutputConverter::WATCOMQUOTE
+      : cmOutputConverter::SHELL;
+    std::string target = this->LocalGenerator->ConvertToOutputFormat(
+      this->LocalGenerator->MaybeConvertToRelativePath(
+        this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
+      output);
+
+    vars.Language = linkLanguage.c_str();
+    vars.Objects = buildObjs.c_str();
+    vars.ObjectDir = objectDir.c_str();
+    vars.Target = target.c_str();
+    vars.LinkLibraries = linkLibs.c_str();
+    vars.Flags = flags.c_str();
+    vars.LinkFlags = linkFlags.c_str();
+
+    std::string launcher;
+
+    const char* val = this->LocalGenerator->GetRuleLauncher(
+      this->GeneratorTarget, "RULE_LAUNCH_LINK");
+    if (val && *val) {
+      launcher = val;
+      launcher += " ";
+    }
+
+    CM_AUTO_PTR<cmRulePlaceholderExpander> rulePlaceholderExpander(
+      this->LocalGenerator->CreateRulePlaceholderExpander());
+
+    // Expand placeholders in the commands.
+    rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
+    for (std::vector<std::string>::iterator i = real_link_commands.begin();
+         i != real_link_commands.end(); ++i) {
+      *i = launcher + *i;
+      rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator, *i,
+                                                   vars);
+    }
+
+    // Restore path conversion to normal shells.
+    this->LocalGenerator->SetLinkScriptShell(false);
+  }
+
+  // Optionally convert the build rule to use a script to avoid long
+  // command lines in the make shell.
+  if (useLinkScript) {
+    // Use a link script.
+    const char* name = (relink ? "drelink.txt" : "dlink.txt");
+    this->CreateLinkScript(name, real_link_commands, commands1, depends);
+  } else {
+    // No link script.  Just use the link rule directly.
+    commands1 = real_link_commands;
+  }
+  this->LocalGenerator->CreateCDCommand(
+    commands1, this->Makefile->GetCurrentBinaryDirectory(),
+    this->LocalGenerator->GetBinaryDirectory());
+  commands.insert(commands.end(), commands1.begin(), commands1.end());
+  commands1.clear();
+
+  // Write the build rule.
+  this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, CM_NULLPTR,
+                                      targetOutputReal, depends, commands,
+                                      false);
+
+  // Write the main driver rule to build everything in this target.
+  this->WriteTargetDriverRule(targetOutputReal, relink);
+
+  // Clean all the possible executable names and symlinks.
+  this->CleanFiles.insert(this->CleanFiles.end(), exeCleanFiles.begin(),
+                          exeCleanFiles.end());
+#else
+  static_cast<void>(relink);
+#endif
+}
+
 void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)
 {
   std::vector<std::string> commands;
@@ -84,6 +300,9 @@ void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)
   // Build list of dependencies.
   std::vector<std::string> depends;
   this->AppendLinkDepends(depends);
+  if (!this->DeviceLinkObject.empty()) {
+    depends.push_back(this->DeviceLinkObject);
+  }
 
   // Get the name of the executable to generate.
   std::string targetName;
@@ -327,6 +546,14 @@ void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)
     std::string buildObjs;
     this->CreateObjectLists(useLinkScript, false, useResponseFileForObjects,
                             buildObjs, depends, useWatcomQuote);
+    if (!this->DeviceLinkObject.empty()) {
+      buildObjs += " " +
+        this->LocalGenerator->ConvertToOutputFormat(
+          this->LocalGenerator->MaybeConvertToRelativePath(
+            this->LocalGenerator->GetCurrentBinaryDirectory(),
+            this->DeviceLinkObject),
+          cmOutputConverter::SHELL);
+    }
 
     // maybe create .def file from list of objects
     if (this->GeneratorTarget->IsExecutableWithExports() &&

+ 4 - 0
Source/cmMakefileExecutableTargetGenerator.h

@@ -21,6 +21,10 @@ public:
 
 protected:
   virtual void WriteExecutableRule(bool relink);
+  virtual void WriteDeviceExecutableRule(bool relink);
+
+private:
+  std::string DeviceLinkObject;
 };
 
 #endif

+ 222 - 0
Source/cmMakefileLibraryTargetGenerator.cxx

@@ -11,6 +11,7 @@
 #include "cmGeneratorTarget.h"
 #include "cmGlobalUnixMakefileGenerator3.h"
 #include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
 #include "cmLocalGenerator.h"
 #include "cmLocalUnixMakefileGenerator3.h"
 #include "cmMakefile.h"
@@ -151,6 +152,24 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
     this->WriteFrameworkRules(relink);
     return;
   }
+
+  if (!relink) {
+    const std::string cuda_lang("CUDA");
+    cmGeneratorTarget::LinkClosure const* closure =
+      this->GeneratorTarget->GetLinkClosure(this->ConfigName);
+
+    const bool hasCUDA =
+      (std::find(closure->Languages.begin(), closure->Languages.end(),
+                 cuda_lang) != closure->Languages.end());
+    if (hasCUDA) {
+      std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
+      std::string extraFlags;
+      this->LocalGenerator->AppendFlags(
+        extraFlags, this->GeneratorTarget->GetProperty("LINK_FLAGS"));
+      this->WriteDeviceLibraryRules(linkRuleVar, extraFlags, relink);
+    }
+  }
+
   std::string linkLanguage =
     this->GeneratorTarget->GetLinkerLanguage(this->ConfigName);
   std::string linkRuleVar = "CMAKE_";
@@ -183,6 +202,24 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
 
 void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink)
 {
+
+  if (!relink) {
+    const std::string cuda_lang("CUDA");
+    cmGeneratorTarget::LinkClosure const* closure =
+      this->GeneratorTarget->GetLinkClosure(this->ConfigName);
+
+    const bool hasCUDA =
+      (std::find(closure->Languages.begin(), closure->Languages.end(),
+                 cuda_lang) != closure->Languages.end());
+    if (hasCUDA) {
+      std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
+      std::string extraFlags;
+      this->LocalGenerator->AppendFlags(
+        extraFlags, this->GeneratorTarget->GetProperty("LINK_FLAGS"));
+      this->WriteDeviceLibraryRules(linkRuleVar, extraFlags, relink);
+    }
+  }
+
   std::string linkLanguage =
     this->GeneratorTarget->GetLinkerLanguage(this->ConfigName);
   std::string linkRuleVar = "CMAKE_";
@@ -230,6 +267,180 @@ void cmMakefileLibraryTargetGenerator::WriteFrameworkRules(bool relink)
   this->WriteLibraryRules(linkRuleVar, extraFlags, relink);
 }
 
+void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
+  const std::string& linkRuleVar, const std::string& extraFlags, bool relink)
+{
+#ifdef CMAKE_BUILD_WITH_CMAKE
+  // TODO: Merge the methods that call this method to avoid
+  // code duplication.
+  std::vector<std::string> commands;
+
+  // Build list of dependencies.
+  std::vector<std::string> depends;
+  this->AppendLinkDepends(depends);
+
+  // Get the language to use for linking this library.
+  std::string linkLanguage = "CUDA";
+
+  // Create set of linking flags.
+  std::string linkFlags;
+  this->LocalGenerator->AppendFlags(linkFlags, extraFlags);
+
+  // Get the name of the device object to generate.
+  std::string const targetOutputReal =
+    this->GeneratorTarget->ObjectDirectory + "cmake_device_link.o";
+  this->DeviceLinkObject = targetOutputReal;
+
+  this->NumberOfProgressActions++;
+  if (!this->NoRuleMessages) {
+    cmLocalUnixMakefileGenerator3::EchoProgress progress;
+    this->MakeEchoProgress(progress);
+    // Add the link message.
+    std::string buildEcho = "Linking " + linkLanguage + " device code";
+    buildEcho += targetOutputReal;
+    this->LocalGenerator->AppendEcho(
+      commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
+  }
+  // Clean files associated with this library.
+  std::vector<std::string> libCleanFiles;
+  libCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
+    this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
+
+  // Determine whether a link script will be used.
+  bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
+
+  bool useResponseFileForObjects =
+    this->CheckUseResponseFileForObjects(linkLanguage);
+  bool const useResponseFileForLibs =
+    this->CheckUseResponseFileForLibraries(linkLanguage);
+
+  cmRulePlaceholderExpander::RuleVariables vars;
+  vars.Language = linkLanguage.c_str();
+
+  // Expand the rule variables.
+  std::vector<std::string> real_link_commands;
+  {
+    bool useWatcomQuote =
+      this->Makefile->IsOn(linkRuleVar + "_USE_WATCOM_QUOTE");
+
+    // Set path conversion for link script shells.
+    this->LocalGenerator->SetLinkScriptShell(useLinkScript);
+
+    // Collect up flags to link in needed libraries.
+    std::string linkLibs;
+    if (this->GeneratorTarget->GetType() != cmStateEnums::STATIC_LIBRARY) {
+
+      CM_AUTO_PTR<cmLinkLineComputer> linkLineComputer(
+        new cmLinkLineDeviceComputer(
+          this->LocalGenerator,
+          this->LocalGenerator->GetStateSnapshot().GetDirectory()));
+      linkLineComputer->SetForResponse(useResponseFileForLibs);
+      linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
+      linkLineComputer->SetRelink(relink);
+
+      this->CreateLinkLibs(linkLineComputer.get(), linkLibs,
+                           useResponseFileForLibs, depends);
+    }
+
+    // Construct object file lists that may be needed to expand the
+    // rule.
+    std::string buildObjs;
+    this->CreateObjectLists(useLinkScript, false, // useArchiveRules
+                            useResponseFileForObjects, buildObjs, depends,
+                            useWatcomQuote);
+
+    cmOutputConverter::OutputFormat output = (useWatcomQuote)
+      ? cmOutputConverter::WATCOMQUOTE
+      : cmOutputConverter::SHELL;
+
+    std::string objectDir = this->GeneratorTarget->GetSupportDirectory();
+    objectDir = this->LocalGenerator->ConvertToOutputFormat(
+      this->LocalGenerator->MaybeConvertToRelativePath(
+        this->LocalGenerator->GetCurrentBinaryDirectory(), objectDir),
+      cmOutputConverter::SHELL);
+
+    std::string target = this->LocalGenerator->ConvertToOutputFormat(
+      this->LocalGenerator->MaybeConvertToRelativePath(
+        this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
+      output);
+
+    vars.Objects = buildObjs.c_str();
+    vars.ObjectDir = objectDir.c_str();
+    vars.Target = target.c_str();
+    vars.LinkLibraries = linkLibs.c_str();
+    vars.ObjectsQuoted = buildObjs.c_str();
+    vars.LinkFlags = linkFlags.c_str();
+
+    // Add language feature flags.
+    std::string langFlags;
+    this->AddFeatureFlags(langFlags, linkLanguage);
+
+    vars.LanguageCompileFlags = langFlags.c_str();
+
+    std::string launcher;
+    const char* val = this->LocalGenerator->GetRuleLauncher(
+      this->GeneratorTarget, "RULE_LAUNCH_LINK");
+    if (val && *val) {
+      launcher = val;
+      launcher += " ";
+    }
+
+    CM_AUTO_PTR<cmRulePlaceholderExpander> rulePlaceholderExpander(
+      this->LocalGenerator->CreateRulePlaceholderExpander());
+
+    // Construct the main link rule and expand placeholders.
+    rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
+    std::string linkRule = this->GetLinkRule(linkRuleVar);
+    cmSystemTools::ExpandListArgument(linkRule, real_link_commands);
+
+    // Expand placeholders.
+    for (std::vector<std::string>::iterator i = real_link_commands.begin();
+         i != real_link_commands.end(); ++i) {
+      *i = launcher + *i;
+      rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator, *i,
+                                                   vars);
+    }
+    // Restore path conversion to normal shells.
+    this->LocalGenerator->SetLinkScriptShell(false);
+
+    // Clean all the possible library names and symlinks.
+    this->CleanFiles.insert(this->CleanFiles.end(), libCleanFiles.begin(),
+                            libCleanFiles.end());
+  }
+
+  std::vector<std::string> commands1;
+  // Optionally convert the build rule to use a script to avoid long
+  // command lines in the make shell.
+  if (useLinkScript) {
+    // Use a link script.
+    const char* name = (relink ? "drelink.txt" : "dlink.txt");
+    this->CreateLinkScript(name, real_link_commands, commands1, depends);
+  } else {
+    // No link script.  Just use the link rule directly.
+    commands1 = real_link_commands;
+  }
+  this->LocalGenerator->CreateCDCommand(
+    commands1, this->Makefile->GetCurrentBinaryDirectory(),
+    this->LocalGenerator->GetBinaryDirectory());
+  commands.insert(commands.end(), commands1.begin(), commands1.end());
+  commands1.clear();
+
+  // Compute the list of outputs.
+  std::vector<std::string> outputs(1, targetOutputReal);
+
+  // Write the build rule.
+  this->WriteMakeRule(*this->BuildFileStream, CM_NULLPTR, outputs, depends,
+                      commands, false);
+
+  // Write the main driver rule to build everything in this target.
+  this->WriteTargetDriverRule(targetOutputReal, relink);
+#else
+  static_cast<void>(linkRuleVar);
+  static_cast<void>(extraFlags);
+  static_cast<void>(relink);
+#endif
+}
+
 void cmMakefileLibraryTargetGenerator::WriteLibraryRules(
   const std::string& linkRuleVar, const std::string& extraFlags, bool relink)
 {
@@ -240,6 +451,9 @@ void cmMakefileLibraryTargetGenerator::WriteLibraryRules(
   // Build list of dependencies.
   std::vector<std::string> depends;
   this->AppendLinkDepends(depends);
+  if (!this->DeviceLinkObject.empty()) {
+    depends.push_back(this->DeviceLinkObject);
+  }
 
   // Get the language to use for linking this library.
   std::string linkLanguage =
@@ -518,6 +732,14 @@ void cmMakefileLibraryTargetGenerator::WriteLibraryRules(
     this->CreateObjectLists(useLinkScript, useArchiveRules,
                             useResponseFileForObjects, buildObjs, depends,
                             useWatcomQuote);
+    if (!this->DeviceLinkObject.empty()) {
+      buildObjs += " " +
+        this->LocalGenerator->ConvertToOutputFormat(
+          this->LocalGenerator->MaybeConvertToRelativePath(
+            this->LocalGenerator->GetCurrentBinaryDirectory(),
+            this->DeviceLinkObject),
+          cmOutputConverter::SHELL);
+    }
 
     // maybe create .def file from list of objects
     if (this->GeneratorTarget->GetType() == cmStateEnums::SHARED_LIBRARY &&

+ 6 - 0
Source/cmMakefileLibraryTargetGenerator.h

@@ -26,6 +26,9 @@ protected:
   void WriteStaticLibraryRules();
   void WriteSharedLibraryRules(bool relink);
   void WriteModuleLibraryRules(bool relink);
+
+  void WriteDeviceLibraryRules(const std::string& linkRule,
+                               const std::string& extraFlags, bool relink);
   void WriteLibraryRules(const std::string& linkRule,
                          const std::string& extraFlags, bool relink);
   // MacOSX Framework support methods
@@ -33,6 +36,9 @@ protected:
 
   // Store the computd framework version for OS X Frameworks.
   std::string FrameworkVersion;
+
+private:
+  std::string DeviceLinkObject;
 };
 
 #endif

+ 18 - 9
Source/cmMakefileTargetGenerator.cxx

@@ -583,11 +583,11 @@ void cmMakefileTargetGenerator::WriteObjectBuildFile(
   std::string const includesString = "$(" + lang + "_INCLUDES)";
   vars.Includes = includesString.c_str();
 
-  // At the moment, it is assumed that C, C++, and Fortran have both
+  // At the moment, it is assumed that C, C++, Fortran, and CUDA have both
   // assembly and preprocessor capabilities. The same is true for the
   // ability to export compile commands
-  bool lang_has_preprocessor =
-    ((lang == "C") || (lang == "CXX") || (lang == "Fortran"));
+  bool lang_has_preprocessor = ((lang == "C") || (lang == "CXX") ||
+                                (lang == "Fortran") || (lang == "CUDA"));
   bool const lang_has_assembly = lang_has_preprocessor;
   bool const lang_can_export_cmds = lang_has_preprocessor;
 
@@ -596,13 +596,22 @@ void cmMakefileTargetGenerator::WriteObjectBuildFile(
 
   // Construct the compile rules.
   {
-    std::string compileRuleVar = "CMAKE_";
-    compileRuleVar += lang;
-    compileRuleVar += "_COMPILE_OBJECT";
-    std::string compileRule =
-      this->Makefile->GetRequiredDefinition(compileRuleVar);
     std::vector<std::string> compileCommands;
-    cmSystemTools::ExpandListArgument(compileRule, compileCommands);
+    if (lang == "CUDA") {
+      std::string cmdVar;
+      if (this->GeneratorTarget->GetProperty("CUDA_SEPARABLE_COMPILATION")) {
+        cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION");
+      } else {
+        cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION");
+      }
+      std::string compileRule = this->Makefile->GetRequiredDefinition(cmdVar);
+      cmSystemTools::ExpandListArgument(compileRule, compileCommands);
+    } else {
+      const std::string cmdVar =
+        std::string("CMAKE_") + lang + "_COMPILE_OBJECT";
+      std::string compileRule = this->Makefile->GetRequiredDefinition(cmdVar);
+      cmSystemTools::ExpandListArgument(compileRule, compileCommands);
+    }
 
     if (this->Makefile->IsOn("CMAKE_EXPORT_COMPILE_COMMANDS") &&
         lang_can_export_cmds && compileCommands.size() == 1) {

+ 358 - 0
Source/cmNinjaNormalTargetGenerator.cxx

@@ -18,6 +18,7 @@
 #include "cmGeneratorTarget.h"
 #include "cmGlobalNinjaGenerator.h"
 #include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
 #include "cmLocalGenerator.h"
 #include "cmLocalNinjaGenerator.h"
 #include "cmMakefile.h"
@@ -47,6 +48,7 @@ cmNinjaNormalTargetGenerator::cmNinjaNormalTargetGenerator(
   , TargetNameImport()
   , TargetNamePDB()
   , TargetLinkLanguage("")
+  , DeviceLinkObject()
 {
   this->TargetLinkLanguage = target->GetLinkerLanguage(this->GetConfigName());
   if (target->GetType() == cmStateEnums::EXECUTABLE) {
@@ -94,6 +96,9 @@ void cmNinjaNormalTargetGenerator::Generate()
   if (this->GetGeneratorTarget()->GetType() == cmStateEnums::OBJECT_LIBRARY) {
     this->WriteObjectLibStatement();
   } else {
+    // If this target has cuda language link inputs, and we need to do
+    // device linking
+    this->WriteDeviceLinkStatement();
     this->WriteLinkStatement();
   }
 }
@@ -155,6 +160,14 @@ std::string cmNinjaNormalTargetGenerator::LanguageLinkerRule() const
                     this->GetGeneratorTarget()->GetName());
 }
 
+std::string cmNinjaNormalTargetGenerator::LanguageLinkerDeviceRule() const
+{
+  return this->TargetLinkLanguage + "_" +
+    cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()) +
+    "_DEVICE_LINKER__" + cmGlobalNinjaGenerator::EncodeRuleName(
+                           this->GetGeneratorTarget()->GetName());
+}
+
 struct cmNinjaRemoveNoOpCommands
 {
   bool operator()(std::string const& cmd)
@@ -163,6 +176,115 @@ struct cmNinjaRemoveNoOpCommands
   }
 };
 
+void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(bool useResponseFile)
+{
+  cmStateEnums::TargetType targetType = this->GetGeneratorTarget()->GetType();
+  std::string ruleName = this->LanguageLinkerDeviceRule();
+  // Select whether to use a response file for objects.
+  std::string rspfile;
+  std::string rspcontent;
+
+  if (!this->GetGlobalGenerator()->HasRule(ruleName)) {
+    cmRulePlaceholderExpander::RuleVariables vars;
+    vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
+    vars.CMTargetType =
+      cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType());
+
+    vars.Language = "CUDA";
+
+    std::string responseFlag;
+    if (!useResponseFile) {
+      vars.Objects = "$in";
+      vars.LinkLibraries = "$LINK_LIBRARIES";
+    } else {
+      std::string cmakeVarLang = "CMAKE_";
+      cmakeVarLang += this->TargetLinkLanguage;
+
+      // build response file name
+      std::string cmakeLinkVar = cmakeVarLang + "_RESPONSE_FILE_LINK_FLAG";
+      const char* flag = GetMakefile()->GetDefinition(cmakeLinkVar);
+      if (flag) {
+        responseFlag = flag;
+      } else {
+        responseFlag = "@";
+      }
+      rspfile = "$RSP_FILE";
+      responseFlag += rspfile;
+
+      // build response file content
+      if (this->GetGlobalGenerator()->IsGCCOnWindows()) {
+        rspcontent = "$in";
+      } else {
+        rspcontent = "$in_newline";
+      }
+      rspcontent += " $LINK_LIBRARIES";
+      vars.Objects = responseFlag.c_str();
+      vars.LinkLibraries = "";
+    }
+
+    vars.ObjectDir = "$OBJECT_DIR";
+
+    vars.Target = "$TARGET_FILE";
+
+    vars.SONameFlag = "$SONAME_FLAG";
+    vars.TargetSOName = "$SONAME";
+    vars.TargetPDB = "$TARGET_PDB";
+
+    vars.Flags = "$FLAGS";
+    vars.LinkFlags = "$LINK_FLAGS";
+    vars.Manifests = "$MANIFESTS";
+
+    std::string langFlags;
+    if (targetType != cmStateEnums::EXECUTABLE) {
+      langFlags += "$LANGUAGE_COMPILE_FLAGS $ARCH_FLAGS";
+      vars.LanguageCompileFlags = langFlags.c_str();
+    }
+
+    std::string launcher;
+    const char* val = this->GetLocalGenerator()->GetRuleLauncher(
+      this->GetGeneratorTarget(), "RULE_LAUNCH_LINK");
+    if (val && *val) {
+      launcher = val;
+      launcher += " ";
+    }
+
+    CM_AUTO_PTR<cmRulePlaceholderExpander> rulePlaceholderExpander(
+      this->GetLocalGenerator()->CreateRulePlaceholderExpander());
+
+    // Rule for linking library/executable.
+    std::vector<std::string> linkCmds = this->ComputeDeviceLinkCmd();
+    for (std::vector<std::string>::iterator i = linkCmds.begin();
+         i != linkCmds.end(); ++i) {
+      *i = launcher + *i;
+      rulePlaceholderExpander->ExpandRuleVariables(this->GetLocalGenerator(),
+                                                   *i, vars);
+    }
+    {
+      // If there is no ranlib the command will be ":".  Skip it.
+      std::vector<std::string>::iterator newEnd = std::remove_if(
+        linkCmds.begin(), linkCmds.end(), cmNinjaRemoveNoOpCommands());
+      linkCmds.erase(newEnd, linkCmds.end());
+    }
+
+    std::string linkCmd =
+      this->GetLocalGenerator()->BuildCommandLine(linkCmds);
+
+    // Write the linker rule with response file if needed.
+    std::ostringstream comment;
+    comment << "Rule for linking " << this->TargetLinkLanguage << " "
+            << this->GetVisibleTypeName() << ".";
+    std::ostringstream description;
+    description << "Linking " << this->TargetLinkLanguage << " "
+                << this->GetVisibleTypeName() << " $TARGET_FILE";
+    this->GetGlobalGenerator()->AddRule(ruleName, linkCmd, description.str(),
+                                        comment.str(),
+                                        /*depfile*/ "",
+                                        /*deptype*/ "", rspfile, rspcontent,
+                                        /*restat*/ "$RESTAT",
+                                        /*generator*/ false);
+  }
+}
+
 void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile)
 {
   cmStateEnums::TargetType targetType = this->GetGeneratorTarget()->GetType();
@@ -327,6 +449,32 @@ void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile)
   }
 }
 
+std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeDeviceLinkCmd()
+{
+  std::vector<std::string> linkCmds;
+
+  // this target requires separable cuda compilation
+  // now build the correct command depending on if the target is
+  // an executable or a dynamic library.
+  std::string linkCmd;
+  switch (this->GetGeneratorTarget()->GetType()) {
+    case cmStateEnums::SHARED_LIBRARY:
+    case cmStateEnums::MODULE_LIBRARY: {
+      const std::string cudaLinkCmd(
+        this->GetMakefile()->GetDefinition("CMAKE_CUDA_DEVICE_LINK_LIBRARY"));
+      cmSystemTools::ExpandListArgument(cudaLinkCmd, linkCmds);
+    } break;
+    case cmStateEnums::EXECUTABLE: {
+      const std::string cudaLinkCmd(this->GetMakefile()->GetDefinition(
+        "CMAKE_CUDA_DEVICE_LINK_EXECUTABLE"));
+      cmSystemTools::ExpandListArgument(cudaLinkCmd, linkCmds);
+    } break;
+    default:
+      break;
+  }
+  return linkCmds;
+}
+
 std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeLinkCmd()
 {
   std::vector<std::string> linkCmds;
@@ -421,6 +569,211 @@ static int calculateCommandLineLengthLimit(int linkRuleLength)
   return sz - linkRuleLength;
 }
 
+void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement()
+{
+  cmGeneratorTarget& genTarget = *this->GetGeneratorTarget();
+
+  // determine if we need to do any device linking for this target
+  const std::string cuda_lang("CUDA");
+  cmGeneratorTarget::LinkClosure const* closure =
+    genTarget.GetLinkClosure(this->GetConfigName());
+
+  const bool hasCUDA =
+    (std::find(closure->Languages.begin(), closure->Languages.end(),
+               cuda_lang) != closure->Languages.end());
+
+  bool shouldHaveDeviceLinking = false;
+  switch (genTarget.GetType()) {
+    case cmStateEnums::SHARED_LIBRARY:
+    case cmStateEnums::MODULE_LIBRARY:
+    case cmStateEnums::EXECUTABLE:
+      shouldHaveDeviceLinking = true;
+      break;
+    default:
+      break;
+  }
+
+  if (!shouldHaveDeviceLinking || !hasCUDA) {
+    return;
+  }
+
+  // Now we can do device linking
+
+  // First and very important step is to make sure while inside this
+  // step our link language is set to CUDA
+  std::string cudaLinkLanguage = "CUDA";
+
+  std::string const cfgName = this->GetConfigName();
+  std::string const targetOutputReal =
+    ConvertToNinjaPath(genTarget.ObjectDirectory + "cmake_device_link.o");
+
+  std::string const targetOutputImplib =
+    ConvertToNinjaPath(genTarget.GetFullPath(cfgName,
+                                             /*implib=*/true));
+
+  this->DeviceLinkObject = targetOutputReal;
+
+  // Write comments.
+  cmGlobalNinjaGenerator::WriteDivider(this->GetBuildFileStream());
+  const cmStateEnums::TargetType targetType = genTarget.GetType();
+  this->GetBuildFileStream() << "# Device Link build statements for "
+                             << cmState::GetTargetTypeName(targetType)
+                             << " target " << this->GetTargetName() << "\n\n";
+
+  // Compute the comment.
+  std::ostringstream comment;
+  comment << "Link the " << this->GetVisibleTypeName() << " "
+          << targetOutputReal;
+
+  cmNinjaDeps emptyDeps;
+  cmNinjaVars vars;
+
+  // Compute outputs.
+  cmNinjaDeps outputs;
+  outputs.push_back(targetOutputReal);
+  // Compute specific libraries to link with.
+  cmNinjaDeps explicitDeps = this->GetObjects();
+  cmNinjaDeps implicitDeps = this->ComputeLinkDeps();
+
+  std::string frameworkPath;
+  std::string linkPath;
+
+  std::string createRule = genTarget.GetCreateRuleVariable(
+    this->TargetLinkLanguage, this->GetConfigName());
+  const bool useWatcomQuote =
+    this->GetMakefile()->IsOn(createRule + "_USE_WATCOM_QUOTE");
+  cmLocalNinjaGenerator& localGen = *this->GetLocalGenerator();
+
+  vars["TARGET_FILE"] =
+    localGen.ConvertToOutputFormat(targetOutputReal, cmOutputConverter::SHELL);
+
+  CM_AUTO_PTR<cmLinkLineComputer> linkLineComputer(
+    new cmNinjaLinkLineDeviceComputer(
+      this->GetLocalGenerator(),
+      this->GetLocalGenerator()->GetStateSnapshot().GetDirectory(),
+      this->GetGlobalGenerator()));
+  linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
+
+  localGen.GetTargetFlags(
+    linkLineComputer.get(), this->GetConfigName(), vars["LINK_LIBRARIES"],
+    vars["FLAGS"], vars["LINK_FLAGS"], frameworkPath, linkPath, &genTarget);
+
+  this->addPoolNinjaVariable("JOB_POOL_LINK", &genTarget, vars);
+
+  this->AddModuleDefinitionFlag(linkLineComputer.get(), vars["LINK_FLAGS"]);
+  vars["LINK_FLAGS"] =
+    cmGlobalNinjaGenerator::EncodeLiteral(vars["LINK_FLAGS"]);
+
+  vars["MANIFESTS"] = this->GetManifests();
+
+  vars["LINK_PATH"] = frameworkPath + linkPath;
+
+  // Compute architecture specific link flags.  Yes, these go into a different
+  // variable for executables, probably due to a mistake made when duplicating
+  // code between the Makefile executable and library generators.
+  if (targetType == cmStateEnums::EXECUTABLE) {
+    std::string t = vars["FLAGS"];
+    localGen.AddArchitectureFlags(t, &genTarget, cudaLinkLanguage, cfgName);
+    vars["FLAGS"] = t;
+  } else {
+    std::string t = vars["ARCH_FLAGS"];
+    localGen.AddArchitectureFlags(t, &genTarget, cudaLinkLanguage, cfgName);
+    vars["ARCH_FLAGS"] = t;
+    t = "";
+    localGen.AddLanguageFlags(t, cudaLinkLanguage, cfgName);
+    vars["LANGUAGE_COMPILE_FLAGS"] = t;
+  }
+  if (this->GetGeneratorTarget()->HasSOName(cfgName)) {
+    vars["SONAME_FLAG"] =
+      this->GetMakefile()->GetSONameFlag(this->TargetLinkLanguage);
+    vars["SONAME"] = this->TargetNameSO;
+    if (targetType == cmStateEnums::SHARED_LIBRARY) {
+      std::string install_dir =
+        this->GetGeneratorTarget()->GetInstallNameDirForBuildTree(cfgName);
+      if (!install_dir.empty()) {
+        vars["INSTALLNAME_DIR"] = localGen.ConvertToOutputFormat(
+          install_dir, cmOutputConverter::SHELL);
+      }
+    }
+  }
+
+  cmNinjaDeps byproducts;
+
+  if (!this->TargetNameImport.empty()) {
+    const std::string impLibPath = localGen.ConvertToOutputFormat(
+      targetOutputImplib, cmOutputConverter::SHELL);
+    vars["TARGET_IMPLIB"] = impLibPath;
+    EnsureParentDirectoryExists(impLibPath);
+    if (genTarget.HasImportLibrary()) {
+      byproducts.push_back(targetOutputImplib);
+    }
+  }
+
+  const std::string objPath = GetGeneratorTarget()->GetSupportDirectory();
+  vars["OBJECT_DIR"] = this->GetLocalGenerator()->ConvertToOutputFormat(
+    this->ConvertToNinjaPath(objPath), cmOutputConverter::SHELL);
+  EnsureDirectoryExists(objPath);
+
+  if (this->GetGlobalGenerator()->IsGCCOnWindows()) {
+    // ar.exe can't handle backslashes in rsp files (implicitly used by gcc)
+    std::string& linkLibraries = vars["LINK_LIBRARIES"];
+    std::replace(linkLibraries.begin(), linkLibraries.end(), '\\', '/');
+    std::string& link_path = vars["LINK_PATH"];
+    std::replace(link_path.begin(), link_path.end(), '\\', '/');
+  }
+
+  const std::vector<cmCustomCommand>* cmdLists[3] = {
+    &genTarget.GetPreBuildCommands(), &genTarget.GetPreLinkCommands(),
+    &genTarget.GetPostBuildCommands()
+  };
+
+  std::vector<std::string> preLinkCmdLines, postBuildCmdLines;
+  vars["PRE_LINK"] = localGen.BuildCommandLine(preLinkCmdLines);
+  vars["POST_BUILD"] = localGen.BuildCommandLine(postBuildCmdLines);
+
+  std::vector<std::string>* cmdLineLists[3] = { &preLinkCmdLines,
+                                                &preLinkCmdLines,
+                                                &postBuildCmdLines };
+
+  for (unsigned i = 0; i != 3; ++i) {
+    for (std::vector<cmCustomCommand>::const_iterator ci =
+           cmdLists[i]->begin();
+         ci != cmdLists[i]->end(); ++ci) {
+      cmCustomCommandGenerator ccg(*ci, cfgName, this->GetLocalGenerator());
+      localGen.AppendCustomCommandLines(ccg, *cmdLineLists[i]);
+      std::vector<std::string> const& ccByproducts = ccg.GetByproducts();
+      std::transform(ccByproducts.begin(), ccByproducts.end(),
+                     std::back_inserter(byproducts), MapToNinjaPath());
+    }
+  }
+
+  cmGlobalNinjaGenerator& globalGen = *this->GetGlobalGenerator();
+
+  int commandLineLengthLimit = -1;
+  if (!this->ForceResponseFile()) {
+    commandLineLengthLimit = calculateCommandLineLengthLimit(
+      globalGen.GetRuleCmdLength(this->LanguageLinkerDeviceRule()));
+  }
+
+  const std::string rspfile =
+    std::string(cmake::GetCMakeFilesDirectoryPostSlash()) +
+    genTarget.GetName() + ".rsp";
+
+  // Gather order-only dependencies.
+  cmNinjaDeps orderOnlyDeps;
+  this->GetLocalGenerator()->AppendTargetDepends(this->GetGeneratorTarget(),
+                                                 orderOnlyDeps);
+
+  // Write the build statement for this target.
+  bool usedResponseFile = false;
+  globalGen.WriteBuild(this->GetBuildFileStream(), comment.str(),
+                       this->LanguageLinkerDeviceRule(), outputs,
+                       /*implicitOuts=*/cmNinjaDeps(), explicitDeps,
+                       implicitDeps, orderOnlyDeps, vars, rspfile,
+                       commandLineLengthLimit, &usedResponseFile);
+  this->WriteDeviceLinkRule(usedResponseFile);
+}
+
 void cmNinjaNormalTargetGenerator::WriteLinkStatement()
 {
   cmGeneratorTarget& gt = *this->GetGeneratorTarget();
@@ -481,6 +834,10 @@ void cmNinjaNormalTargetGenerator::WriteLinkStatement()
   cmNinjaDeps explicitDeps = this->GetObjects();
   cmNinjaDeps implicitDeps = this->ComputeLinkDeps();
 
+  if (!this->DeviceLinkObject.empty()) {
+    explicitDeps.push_back(this->DeviceLinkObject);
+  }
+
   cmMakefile* mf = this->GetMakefile();
 
   std::string frameworkPath;
@@ -504,6 +861,7 @@ void cmNinjaNormalTargetGenerator::WriteLinkStatement()
   localGen.GetTargetFlags(
     linkLineComputer.get(), this->GetConfigName(), vars["LINK_LIBRARIES"],
     vars["FLAGS"], vars["LINK_FLAGS"], frameworkPath, linkPath, &genTarget);
+
   if (this->GetMakefile()->IsOn("CMAKE_SUPPORT_WINDOWS_EXPORT_ALL_SYMBOLS") &&
       (gt.GetType() == cmStateEnums::SHARED_LIBRARY ||
        gt.IsExecutableWithExports())) {

+ 10 - 0
Source/cmNinjaNormalTargetGenerator.h

@@ -22,12 +22,21 @@ public:
 
 private:
   std::string LanguageLinkerRule() const;
+  std::string LanguageLinkerDeviceRule() const;
+
   const char* GetVisibleTypeName() const;
   void WriteLanguagesRules();
+
   void WriteLinkRule(bool useResponseFile);
+  void WriteDeviceLinkRule(bool useResponseFile);
+
   void WriteLinkStatement();
+  void WriteDeviceLinkStatement();
+
   void WriteObjectLibStatement();
+
   std::vector<std::string> ComputeLinkCmd();
+  std::vector<std::string> ComputeDeviceLinkCmd();
 
 private:
   // Target name info.
@@ -37,6 +46,7 @@ private:
   std::string TargetNameImport;
   std::string TargetNamePDB;
   std::string TargetLinkLanguage;
+  std::string DeviceLinkObject;
 };
 
 #endif // ! cmNinjaNormalTargetGenerator_h

+ 15 - 3
Source/cmNinjaTargetGenerator.cxx

@@ -583,10 +583,22 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang)
   }
 
   // Rule for compiling object file.
-  const std::string cmdVar = std::string("CMAKE_") + lang + "_COMPILE_OBJECT";
-  std::string compileCmd = mf->GetRequiredDefinition(cmdVar);
   std::vector<std::string> compileCmds;
-  cmSystemTools::ExpandListArgument(compileCmd, compileCmds);
+  if (lang == "CUDA") {
+    std::string cmdVar;
+    if (this->GeneratorTarget->GetProperty("CUDA_SEPARABLE_COMPILATION")) {
+      cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION");
+    } else {
+      cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION");
+    }
+    std::string compileCmd = mf->GetRequiredDefinition(cmdVar);
+    cmSystemTools::ExpandListArgument(compileCmd, compileCmds);
+  } else {
+    const std::string cmdVar =
+      std::string("CMAKE_") + lang + "_COMPILE_OBJECT";
+    std::string compileCmd = mf->GetRequiredDefinition(cmdVar);
+    cmSystemTools::ExpandListArgument(compileCmd, compileCmds);
+  }
 
   // Maybe insert an include-what-you-use runner.
   if (!compileCmds.empty() && (lang == "C" || lang == "CXX")) {

+ 4 - 0
Source/cmTarget.cxx

@@ -266,6 +266,9 @@ cmTarget::cmTarget(std::string const& name, cmStateEnums::TargetType type,
     this->SetPropertyDefault("CXX_STANDARD", CM_NULLPTR);
     this->SetPropertyDefault("CXX_STANDARD_REQUIRED", CM_NULLPTR);
     this->SetPropertyDefault("CXX_EXTENSIONS", CM_NULLPTR);
+    this->SetPropertyDefault("CUDA_STANDARD", CM_NULLPTR);
+    this->SetPropertyDefault("CUDA_STANDARD_REQUIRED", CM_NULLPTR);
+    this->SetPropertyDefault("CUDA_EXTENSIONS", CM_NULLPTR);
     this->SetPropertyDefault("LINK_SEARCH_START_STATIC", CM_NULLPTR);
     this->SetPropertyDefault("LINK_SEARCH_END_STATIC", CM_NULLPTR);
   }
@@ -360,6 +363,7 @@ cmTarget::cmTarget(std::string const& name, cmStateEnums::TargetType type,
       this->GetType() != cmStateEnums::UTILITY) {
     this->SetPropertyDefault("C_VISIBILITY_PRESET", CM_NULLPTR);
     this->SetPropertyDefault("CXX_VISIBILITY_PRESET", CM_NULLPTR);
+    this->SetPropertyDefault("CUDA_VISIBILITY_PRESET", CM_NULLPTR);
     this->SetPropertyDefault("VISIBILITY_INLINES_HIDDEN", CM_NULLPTR);
   }
 

+ 7 - 0
Tests/CMakeLists.txt

@@ -324,6 +324,8 @@ if(BUILD_TESTING)
       ADD_TEST_MACRO(VSGNUFortran ${CMAKE_COMMAND} -P runtest.cmake)
     endif()
   endif()
+
+
   ADD_TEST_MACRO(COnly COnly)
   ADD_TEST_MACRO(CxxOnly CxxOnly)
   ADD_TEST_MACRO(CxxSubdirC CxxSubdirC)
@@ -1353,6 +1355,11 @@ ${CMake_BINARY_DIR}/bin/cmake -DDIR=dev -P ${CMake_SOURCE_DIR}/Utilities/Release
     endif()
   endif()
 
+  if(CMake_TEST_CUDA)
+    add_subdirectory(Cuda)
+    add_subdirectory(CudaOnly)
+  endif()
+
   if(CMake_TEST_FindBoost)
     add_subdirectory(FindBoost)
   endif()

+ 4 - 0
Tests/Cuda/CMakeLists.txt

@@ -0,0 +1,4 @@
+
+ADD_TEST_MACRO(Cuda.Complex CudaComplex)
+ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
+ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)

+ 40 - 0
Tests/Cuda/Complex/CMakeLists.txt

@@ -0,0 +1,40 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaComplex CXX CUDA)
+#Goal for this example:
+
+#build a cpp dynamic library base
+#build a cuda static library base that uses separable compilation
+
+#build a cuda dynamic library that uses the first dynamic library
+#build a mixed cpp & cuda dynamic library uses all 3 previous libraries
+
+#lastly build a cpp executable that uses this last cuda dynamic library
+
+#this tests that we can properly handle linking cuda and cpp together
+#and also bulding cpp targets that need cuda implicit libraries
+
+#verify that we can pass explicit cuda arch flags
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+set(CMAKE_CUDA_STANDARD 11)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+
+add_library(CudaComplexCppBase SHARED dynamic.cpp)
+add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu)
+set_target_properties(CudaComplexSeperableLib
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+set_target_properties( CudaComplexSeperableLib
+                       PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+add_library(CudaComplexSharedLib SHARED dynamic.cu)
+target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase)
+
+add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu)
+target_link_libraries(CudaComplexMixedLib
+                      PUBLIC CudaComplexSharedLib
+                      PRIVATE CudaComplexSeperableLib)
+
+add_executable(CudaComplex main.cpp)
+target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib)

+ 5 - 0
Tests/Cuda/Complex/dynamic.cpp

@@ -0,0 +1,5 @@
+
+int dynamic_base_func(int x)
+{
+  return x * x;
+}

+ 29 - 0
Tests/Cuda/Complex/dynamic.cu

@@ -0,0 +1,29 @@
+
+#include <string>
+#include <cuda.h>
+
+int dynamic_base_func(int);
+
+int __host__ cuda_dynamic_host_func(int x)
+{
+  return dynamic_base_func(x);
+}
+
+static
+__global__
+void DetermineIfValidCudaDevice()
+{
+}
+
+void cuda_dynamic_lib_func(std::string& contents )
+{
+  DetermineIfValidCudaDevice <<<1,1>>> ();
+  if(cudaSuccess == cudaGetLastError())
+    {
+    contents = "ran a cuda kernel";
+    }
+  else
+    {
+    contents = "cant run a cuda kernel";
+    }
+}

+ 10 - 0
Tests/Cuda/Complex/file1.cu

@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  result_type r;
+  r.input = x;
+  r.sum = x*x;
+  return r;
+}

+ 7 - 0
Tests/Cuda/Complex/file1.h

@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};

+ 20 - 0
Tests/Cuda/Complex/file2.cu

@@ -0,0 +1,20 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+  if(x!=42)
+    {
+    const result_type r = file1_func(x);
+    const result_type_dynamic rd { r.input, r.sum, true };
+    return rd;
+    }
+  else
+    {
+    const result_type_dynamic rd { x, x*x*x, false };
+    return rd;
+    }
+
+}

+ 10 - 0
Tests/Cuda/Complex/file2.h

@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+  int input;
+  int sum;
+  bool from_static;
+};

+ 25 - 0
Tests/Cuda/Complex/file3.cu

@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void file3_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file3_launch_kernel(int x)
+{
+  result_type r;
+  file3_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}

+ 14 - 0
Tests/Cuda/Complex/main.cpp

@@ -0,0 +1,14 @@
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type call_cuda_seperable_code(int x);
+result_type mixed_launch_kernel(int x);
+
+int main(int argc, char** argv)
+{
+  call_cuda_seperable_code(42);
+  mixed_launch_kernel(42);
+  return 0;
+}

+ 14 - 0
Tests/Cuda/Complex/mixed.cpp

@@ -0,0 +1,14 @@
+
+int dynamic_base_func(int);
+int cuda_dynamic_host_func(int);
+int file3_launch_kernel(int);
+
+int dynamic_final_func(int x)
+{
+  return cuda_dynamic_host_func(dynamic_base_func(x));
+}
+
+int call_cuda_seperable_code(int x)
+{
+  return file3_launch_kernel(x);
+}

+ 25 - 0
Tests/Cuda/Complex/mixed.cu

@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void mixed_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int mixed_launch_kernel(int x)
+{
+  result_type r;
+  mixed_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}

+ 17 - 0
Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt

@@ -0,0 +1,17 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaConsumeCompileFeatures CXX CUDA)
+#Goal for this example:
+
+#build a c++11 library that express a c++11 public compile feature
+#link a cuda library and verify it builds with c++11 enabled
+
+#build a standalone c++/cuda mixed executable where we express a c++11
+#compile feature.
+
+
+add_library(CudaConsumeLib STATIC static.cpp static.cu)
+target_compile_features(CudaConsumeLib PUBLIC cxx_constexpr)
+
+add_executable(CudaConsumeCompileFeatures main.cu)
+target_link_libraries(CudaConsumeCompileFeatures PRIVATE CudaConsumeLib)

+ 18 - 0
Tests/Cuda/ConsumeCompileFeatures/main.cu

@@ -0,0 +1,18 @@
+
+#include <iostream>
+
+int static_cxx11_func(int);
+
+void test_functions()
+{
+  auto x = static_cxx11_func( int(42) );
+  std::cout << x << std::endl;
+}
+
+int main(int argc, char **argv)
+{
+  test_functions();
+  std::cout << "this executable doesn't use cuda code, just call methods defined" << std::endl;
+  std::cout << "in libraries that have cuda code" << std::endl;
+  return 0;
+}

+ 10 - 0
Tests/Cuda/ConsumeCompileFeatures/static.cpp

@@ -0,0 +1,10 @@
+
+
+#include <type_traits>
+
+int static_cuda11_func(int);
+
+int static_cxx11_func(int x)
+{
+  return static_cuda11_func(x) + std::integral_constant<int, 32>::value;
+}

+ 9 - 0
Tests/Cuda/ConsumeCompileFeatures/static.cu

@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ static_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}

+ 12 - 0
Tests/Cuda/ObjectLibrary/CMakeLists.txt

@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.7)
+project (CudaObjectLibrary CUDA CXX)
+#Goal for this example:
+
+#build a object files some with cuda and some without than
+#embed these into an executable
+
+add_library(CudaMixedObjectLib OBJECT static.cu static.cpp)
+
+add_executable(CudaObjectLibrary
+               main.cpp
+               $<TARGET_OBJECTS:CudaMixedObjectLib>)

+ 20 - 0
Tests/Cuda/ObjectLibrary/main.cpp

@@ -0,0 +1,20 @@
+
+#include <iostream>
+
+int static_func(int);
+int file1_sq_func(int);
+
+void test_functions()
+{
+  file1_sq_func(static_func(42));
+}
+
+int main(int argc, char** argv)
+{
+  test_functions();
+  std::cout
+    << "this executable doesn't use cuda code, just call methods defined"
+    << std::endl;
+  std::cout << "in object files that have cuda code" << std::endl;
+  return 0;
+}

+ 6 - 0
Tests/Cuda/ObjectLibrary/static.cpp

@@ -0,0 +1,6 @@
+int file1_sq_func(int);
+
+int static_func(int x)
+{
+  return file1_sq_func(x);
+}

+ 21 - 0
Tests/Cuda/ObjectLibrary/static.cu

@@ -0,0 +1,21 @@
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+int __host__ file1_sq_func(int x)
+{
+  cudaError_t err;
+  int nDevices = 0;
+  err = cudaGetDeviceCount(&nDevices);
+  if(err != cudaSuccess)
+  {
+    std::cout << "nDevices: " << nDevices << std::endl;
+    std::cout << "err: " << err << std::endl;
+    return 1;
+  }
+  std::cout << "this library uses cuda code" << std::endl;
+  std::cout << "you have " << nDevices << " devices that support cuda" << std::endl;
+
+  return x * x;
+}

+ 4 - 0
Tests/CudaOnly/CMakeLists.txt

@@ -0,0 +1,4 @@
+
+ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
+ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)

+ 15 - 0
Tests/CudaOnly/EnableStandard/CMakeLists.txt

@@ -0,0 +1,15 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyEnableStandard CUDA)
+
+#Goal for this example:
+#build cuda sources that require C++11 to be enabled.
+
+add_library(CUDAStatic11 STATIC static.cu)
+add_library(CUDADynamic11 SHARED shared.cu)
+
+add_executable(CudaOnlyEnableStandard main.cu)
+target_link_libraries(CudaOnlyEnableStandard PRIVATE CUDAStatic11 CUDADynamic11)
+
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD 11)
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD_REQUIRED TRUE)

+ 17 - 0
Tests/CudaOnly/EnableStandard/main.cu

@@ -0,0 +1,17 @@
+
+#include <iostream>
+
+int static_cuda11_func(int);
+int shared_cuda11_func(int);
+
+void test_functions()
+{
+  static_cuda11_func( int(42) );
+  shared_cuda11_func( int(42) );
+}
+
+int main(int argc, char **argv)
+{
+  test_functions();
+  return 0;
+}

+ 9 - 0
Tests/CudaOnly/EnableStandard/shared.cu

@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ shared_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}

+ 9 - 0
Tests/CudaOnly/EnableStandard/static.cu

@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ static_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}

+ 33 - 0
Tests/CudaOnly/SeparateCompilation/CMakeLists.txt

@@ -0,0 +1,33 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlySeparateCompilation CUDA)
+
+#Goal for this example:
+#Build a static library that defines multiple methods and kernels that
+#use each other.
+#After that confirm that we can call those methods from dynamic libraries
+#and executables.
+#We complicate the matter by also testing that multiple static libraries
+#all containing cuda separable compilation code links properly
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+add_library(CUDASerarateLibA STATIC file1.cu file2.cu file3.cu)
+
+#Having file4/file5 in a shared library causes serious problems
+#with the nvcc linker and it will generate bad entries that will
+#cause a segv when trying to run the executable
+#
+add_library(CUDASerarateLibB STATIC file4.cu file5.cu)
+target_link_libraries(CUDASerarateLibB PRIVATE CUDASerarateLibA)
+
+add_executable(CudaOnlySeparateCompilation main.cu)
+target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASerarateLibB)
+
+set_target_properties( CUDASerarateLibA
+                       CUDASerarateLibB
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+set_target_properties( CUDASerarateLibA
+                       CUDASerarateLibB
+                       PROPERTIES POSITION_INDEPENDENT_CODE ON)

+ 10 - 0
Tests/CudaOnly/SeparateCompilation/file1.cu

@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  result_type r;
+  r.input = x;
+  r.sum = x*x;
+  return r;
+}

+ 7 - 0
Tests/CudaOnly/SeparateCompilation/file1.h

@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};

+ 20 - 0
Tests/CudaOnly/SeparateCompilation/file2.cu

@@ -0,0 +1,20 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+  if(x!=42)
+    {
+    const result_type r = file1_func(x);
+    const result_type_dynamic rd { r.input, r.sum, true };
+    return rd;
+    }
+  else
+    {
+    const result_type_dynamic rd { x, x*x*x, false };
+    return rd;
+    }
+
+}

+ 10 - 0
Tests/CudaOnly/SeparateCompilation/file2.h

@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+  int input;
+  int sum;
+  bool from_static;
+};

+ 25 - 0
Tests/CudaOnly/SeparateCompilation/file3.cu

@@ -0,0 +1,25 @@
+
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+
+static
+__global__
+void file3_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+result_type file3_launch_kernel(int x)
+{
+  result_type r;
+  file3_kernel <<<1,1>>> (r,x);
+  return r;
+}

+ 25 - 0
Tests/CudaOnly/SeparateCompilation/file4.cu

@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void file4_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file4_launch_kernel(int x)
+{
+  result_type r;
+  file4_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}

+ 25 - 0
Tests/CudaOnly/SeparateCompilation/file5.cu

@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void file5_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file5_launch_kernel(int x)
+{
+  result_type r;
+  file5_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}

+ 15 - 0
Tests/CudaOnly/SeparateCompilation/main.cu

@@ -0,0 +1,15 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+// result_type file4_launch_kernel(int x);
+// result_type file5_launch_kernel(int x);
+
+int main(int argc, char **argv)
+{
+  // file4_launch_kernel(42);
+  // file5_launch_kernel(42);
+  return 0;
+}

+ 31 - 0
Tests/CudaOnly/WithDefs/CMakeLists.txt

@@ -0,0 +1,31 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyWithDefs CUDA)
+
+#verify that we can pass explicit cuda arch flags
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+set(debug_compile_flags --generate-code arch=compute_20,code=sm_20 -Xcompiler=-Werror)
+set(release_compile_defs DEFREL)
+
+#Goal for this example:
+#build a executable that needs to be passed a complex define through add_defintions
+#this verifies we can pass things such as '_','(' to nvcc
+add_definitions("-DPACKED_DEFINE=__attribute__((packed))")
+set_source_files_properties(main.notcu PROPERTIES LANGUAGE CUDA)
+add_executable(CudaOnlyWithDefs main.notcu)
+
+target_compile_options(CudaOnlyWithDefs
+  PRIVATE
+    $<$<CONFIG:DEBUG>:$<BUILD_INTERFACE:${debug_compile_flags}>>
+  )
+
+target_compile_definitions(CudaOnlyWithDefs
+  PRIVATE
+    $<$<CONFIG:RELEASE>:$<BUILD_INTERFACE:${release_compile_defs}>>
+  )
+
+#we need to add an rpath for the cuda library so that everything
+#loads properly on the mac
+if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+  set_target_properties(CudaOnlyWithDefs PROPERTIES LINK_FLAGS "-Wl,-rpath,${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}")
+endif()

+ 46 - 0
Tests/CudaOnly/WithDefs/main.notcu

@@ -0,0 +1,46 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+static
+__global__
+void DetermineIfValidCudaDevice()
+{
+}
+
+struct PACKED_DEFINE result_type
+{
+  bool valid;
+  int value;
+#if defined(NDEBUG) && !defined(DEFREL)
+#error missing DEFREL flag
+#endif
+};
+
+result_type can_launch_kernel()
+{
+  result_type r;
+  DetermineIfValidCudaDevice <<<1,1>>> ();
+  r.valid = (cudaSuccess == cudaGetLastError());
+  if(r.valid)
+    {
+    r.value = 1;
+    }
+  else
+    {
+    r.value = -1;
+    }
+  return r;
+}
+
+int main(int argc, char **argv)
+{
+  cudaError_t err;
+  int nDevices = 0;
+  err = cudaGetDeviceCount(&nDevices);
+  if(err != cudaSuccess)
+    {
+      return 1;
+    }
+  return 0;
+}