5 ani în urmă · c63fe01835
--- a/Help/release/dev/cuda-clang-separable-compilation.rst
+++ b/Help/release/dev/cuda-clang-separable-compilation.rst
@@ -0,0 +1,4 @@
 
				+cuda-clang-separable-compilation
			
 
				+--------------------------------
			
 
				+
			
 
				+* :prop_tgt:`CUDA_SEPARABLE_COMPILATION` is now supported when using Clang.
			
--- a/Modules/CMakeCUDACompiler.cmake.in
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -3,6 +3,8 @@ set(CMAKE_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
 
				 set(CMAKE_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")
			
 
				 set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@")
			
 
				 set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@")
			
 
				+set(CMAKE_CUDA_DEVICE_LINKER "@CMAKE_CUDA_DEVICE_LINKER@")
			
 
				+set(CMAKE_CUDA_FATBINARY "@CMAKE_CUDA_FATBINARY@")
			
 
				 set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@")
			
 
				 set(CMAKE_CUDA_COMPILE_FEATURES "@CMAKE_CUDA_COMPILE_FEATURES@")
			
 
				 set(CMAKE_CUDA03_COMPILE_FEATURES "@CMAKE_CUDA03_COMPILE_FEATURES@")
			
@@ -44,6 +46,7 @@ if(CMAKE_CUDA_LIBRARY_ARCHITECTURE)
 
				 endif()
			
 
				 
			
 
				 set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@")
			
 
				+set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@")
			
 
				 set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@")
			
 
				 
			
 
				 set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
			
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -145,7 +145,7 @@ endif()
 
				 #Specify how to compile when separable compilation has been requested
			
 
				 if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION)
			
 
				   set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
			
 
				-    "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} -dc <SOURCE> -o <OBJECT>")
			
 
				+    "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} ${_CMAKE_CUDA_DEVICE_CODE} <SOURCE> -o <OBJECT>")
			
 
				 endif()
			
 
				 
			
 
				 #Specify how to compile when whole compilation has been requested
			
@@ -200,6 +200,11 @@ if(NOT CMAKE_CUDA_DEVICE_LINK_EXECUTABLE)
 
				     "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}")
			
 
				 endif()
			
 
				 
			
 
				+# Used when device linking is handled by CMake.
			
 
				+if(NOT CMAKE_CUDA_DEVICE_LINK_COMPILE)
			
 
				+  set(CMAKE_CUDA_DEVICE_LINK_COMPILE "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <FLAGS> -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -D__NV_EXTRA_INITIALIZATION=\"\" -D__NV_EXTRA_FINALIZATION=\"\" -DREGISTERLINKBINARYFILE=\\\"<REGISTER_FILE>\\\" -DFATBINFILE=\\\"<FATBINARY>\\\" ${_CMAKE_COMPILE_AS_CUDA_FLAG} -c \"${CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT}/bin/crt/link.stub\" -o <OBJECT>")
			
 
				+endif()
			
 
				+
			
 
				 unset(__IMPLICT_DLINK_FLAGS)
			
 
				 
			
 
				 set(CMAKE_CUDA_INFORMATION_LOADED 1)
			
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -169,11 +169,14 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
 
				     endif()
			
 
				 
			
 
				     get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${_CUDA_NVCC_EXECUTABLE}" DIRECTORY)
			
 
				+    set(CMAKE_CUDA_DEVICE_LINKER "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/nvlink${CMAKE_EXECUTABLE_SUFFIX}")
			
 
				+    set(CMAKE_CUDA_FATBINARY "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/fatbinary${CMAKE_EXECUTABLE_SUFFIX}")
			
 
				     get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY)
			
 
				 
			
 
				-    # CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file.
			
 
				-    # In a non-scattered installation this is equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT.
			
 
				+    # In a non-scattered installation the following are equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT.
			
 
				     # We first check for a non-scattered installation to prefer it over a scattered installation.
			
 
				+
			
 
				+    # CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file.
			
 
				     if(EXISTS "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/version.txt")
			
 
				       set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
			
 
				     elseif(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/version.txt")
			
@@ -181,6 +184,15 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
 
				     elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/cuda/version.txt")
			
 
				       set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/cuda")
			
 
				     endif()
			
 
				+
			
 
				+    # CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT contains the linking stubs necessary for device linking and other low-level library files.
			
 
				+    if(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub")
			
 
				+      set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit")
			
 
				+    elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub")
			
 
				+      set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit")
			
 
				+    else()
			
 
				+      set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
			
 
				+    endif()
			
 
				   endif()
			
 
				 
			
 
				   set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
			
--- a/Modules/Compiler/Clang-CUDA.cmake
+++ b/Modules/Compiler/Clang-CUDA.cmake
@@ -13,6 +13,7 @@ __compiler_clang_cxx_standards(CUDA)
 
				 set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE)
			
 
				 set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda")
			
 
				 set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S")
			
 
				+set(_CMAKE_CUDA_DEVICE_CODE "-fgpu-rdc -c")
			
 
				 
			
 
				 # RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default.
			
 
				 set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
			
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -6,6 +6,7 @@ set(CMAKE_CUDA_VERBOSE_COMPILE_FLAG "-Xcompiler=-v")
 
				 
			
 
				 set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu")
			
 
				 set(_CMAKE_CUDA_PTX_FLAG "-ptx")
			
 
				+set(_CMAKE_CUDA_DEVICE_CODE "-dc")
			
 
				 
			
 
				 if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89)
			
 
				   # The -forward-unknown-to-host-compiler flag was only
			
--- a/Source/cmLocalGenerator.cxx
+++ b/Source/cmLocalGenerator.cxx
@@ -1955,17 +1955,6 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
 
				   } else if (lang == "CUDA") {
			
 
				     target->AddCUDAArchitectureFlags(flags);
			
 
				     target->AddCUDAToolkitFlags(flags);
			
 
				-
			
 
				-    if (compiler == "Clang") {
			
 
				-      bool separable = target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION");
			
 
				-
			
 
				-      if (separable) {
			
 
				-        this->Makefile->IssueMessage(
			
 
				-          MessageType::FATAL_ERROR,
			
 
				-          "CUDA_SEPARABLE_COMPILATION isn't supported on Clang. "
			
 
				-          "See CMake issue #20726.");
			
 
				-      }
			
 
				-    }
			
 
				   } else if (lang == "ISPC") {
			
 
				     target->AddISPCTargetFlags(flags);
			
 
				   }
			
--- a/Source/cmLocalGenerator.h
+++ b/Source/cmLocalGenerator.h
@@ -446,7 +446,7 @@ public:
 
				   void GetTargetCompileFlags(cmGeneratorTarget* target,
			
 
				                              std::string const& config,
			
 
				                              std::string const& lang, std::string& flags,
			
 
				-                             std::string const& arch = std::string());
			
 
				+                             std::string const& arch);
			
 
				   std::vector<BT<std::string>> GetTargetCompileFlags(
			
 
				     cmGeneratorTarget* target, std::string const& config,
			
 
				     std::string const& lang, std::string const& arch = std::string());
			
--- a/Source/cmMakefileExecutableTargetGenerator.cxx
+++ b/Source/cmMakefileExecutableTargetGenerator.cxx
@@ -91,19 +91,12 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
 
				 
			
 
				   std::vector<std::string> commands;
			
 
				 
			
 
				-  // Get the language to use for linking this library.
			
 
				-  std::string linkLanguage = "CUDA";
			
 
				+  // Get the name of the device object to generate.
			
 
				   std::string const& objExt =
			
 
				     this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
			
 
				-
			
 
				-  // Build list of dependencies.
			
 
				-  std::vector<std::string> depends;
			
 
				-  this->AppendLinkDepends(depends, linkLanguage);
			
 
				-
			
 
				-  // Get the name of the device object to generate.
			
 
				-  std::string const targetOutputReal =
			
 
				+  std::string const targetOutput =
			
 
				     this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
			
 
				-  this->DeviceLinkObject = targetOutputReal;
			
 
				+  this->DeviceLinkObject = targetOutput;
			
 
				 
			
 
				   this->NumberOfProgressActions++;
			
 
				   if (!this->NoRuleMessages) {
			
@@ -111,7 +104,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
 
				     this->MakeEchoProgress(progress);
			
 
				     // Add the link message.
			
 
				     std::string buildEcho =
			
 
				-      cmStrCat("Linking ", linkLanguage, " device code ",
			
 
				+      cmStrCat("Linking CUDA device code ",
			
 
				                this->LocalGenerator->ConvertToOutputFormat(
			
 
				                  this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				                    this->LocalGenerator->GetCurrentBinaryDirectory(),
			
@@ -121,6 +114,29 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
 
				       commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
			
 
				   }
			
 
				 
			
 
				+  if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
			
 
				+    this->WriteDeviceLinkRule(commands, targetOutput);
			
 
				+  } else {
			
 
				+    this->WriteNvidiaDeviceExecutableRule(relink, commands, targetOutput);
			
 
				+  }
			
 
				+
			
 
				+  // Write the main driver rule to build everything in this target.
			
 
				+  this->WriteTargetDriverRule(targetOutput, relink);
			
 
				+#else
			
 
				+  static_cast<void>(relink);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
			
 
				+  bool relink, std::vector<std::string>& commands,
			
 
				+  const std::string& targetOutput)
			
 
				+{
			
 
				+  const std::string linkLanguage = "CUDA";
			
 
				+
			
 
				+  // Build list of dependencies.
			
 
				+  std::vector<std::string> depends;
			
 
				+  this->AppendLinkDepends(depends, linkLanguage);
			
 
				+
			
 
				   // Build a list of compiler flags and linker flags.
			
 
				   std::string langFlags;
			
 
				   std::string linkFlags;
			
@@ -136,7 +152,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
 
				   // may need to be cleaned.
			
 
				   std::vector<std::string> exeCleanFiles;
			
 
				   exeCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				-    this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
			
 
				+    this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput));
			
 
				 
			
 
				   // Determine whether a link script will be used.
			
 
				   bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
			
@@ -195,7 +211,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
 
				       : cmOutputConverter::SHELL;
			
 
				     std::string target = this->LocalGenerator->ConvertToOutputFormat(
			
 
				       this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				-        this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
			
 
				+        this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput),
			
 
				       output);
			
 
				 
			
 
				     std::string targetFullPathCompilePDB =
			
@@ -226,7 +242,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
 
				       this->LocalGenerator->CreateRulePlaceholderExpander());
			
 
				 
			
 
				     // Expand placeholders in the commands.
			
 
				-    rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
			
 
				+    rulePlaceholderExpander->SetTargetImpLib(targetOutput);
			
 
				     for (std::string& real_link_command : real_link_commands) {
			
 
				       real_link_command = cmStrCat(launcher, real_link_command);
			
 
				       rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator,
			
@@ -255,17 +271,10 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
 
				 
			
 
				   // Write the build rule.
			
 
				   this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
			
 
				-                                      targetOutputReal, depends, commands,
			
 
				-                                      false);
			
 
				-
			
 
				-  // Write the main driver rule to build everything in this target.
			
 
				-  this->WriteTargetDriverRule(targetOutputReal, relink);
			
 
				+                                      targetOutput, depends, commands, false);
			
 
				 
			
 
				   // Clean all the possible executable names and symlinks.
			
 
				   this->CleanFiles.insert(exeCleanFiles.begin(), exeCleanFiles.end());
			
 
				-#else
			
 
				-  static_cast<void>(relink);
			
 
				-#endif
			
 
				 }
			
 
				 
			
 
				 void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)
			
--- a/Source/cmMakefileExecutableTargetGenerator.h
+++ b/Source/cmMakefileExecutableTargetGenerator.h
@@ -5,6 +5,7 @@
 
				 #include "cmConfigure.h" // IWYU pragma: keep
			
 
				 
			
 
				 #include <string>
			
 
				+#include <vector>
			
 
				 
			
 
				 #include "cmMakefileTargetGenerator.h"
			
 
				 
			
@@ -23,6 +24,9 @@ public:
 
				 protected:
			
 
				   virtual void WriteExecutableRule(bool relink);
			
 
				   virtual void WriteDeviceExecutableRule(bool relink);
			
 
				+  virtual void WriteNvidiaDeviceExecutableRule(
			
 
				+    bool relink, std::vector<std::string>& commands,
			
 
				+    const std::string& targetOutput);
			
 
				 
			
 
				 private:
			
 
				   std::string DeviceLinkObject;
			
--- a/Source/cmMakefileLibraryTargetGenerator.cxx
+++ b/Source/cmMakefileLibraryTargetGenerator.cxx
@@ -129,8 +129,7 @@ void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules()
 
				   const bool requiresDeviceLinking = requireDeviceLinking(
			
 
				     *this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
			
 
				   if (requiresDeviceLinking) {
			
 
				-    std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
			
 
				-    this->WriteDeviceLibraryRules(linkRuleVar, false);
			
 
				+    this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", false);
			
 
				   }
			
 
				 
			
 
				   std::string linkLanguage =
			
@@ -156,8 +155,7 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
 
				     const bool requiresDeviceLinking = requireDeviceLinking(
			
 
				       *this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
			
 
				     if (requiresDeviceLinking) {
			
 
				-      std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
			
 
				-      this->WriteDeviceLibraryRules(linkRuleVar, relink);
			
 
				+      this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink);
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -191,8 +189,7 @@ void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink)
 
				     const bool requiresDeviceLinking = requireDeviceLinking(
			
 
				       *this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
			
 
				     if (requiresDeviceLinking) {
			
 
				-      std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
			
 
				-      this->WriteDeviceLibraryRules(linkRuleVar, relink);
			
 
				+      this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink);
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -239,29 +236,13 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
 
				   // TODO: Merge the methods that call this method to avoid
			
 
				   // code duplication.
			
 
				   std::vector<std::string> commands;
			
 
				-
			
 
				-  // Get the language to use for linking this library.
			
 
				-  std::string linkLanguage = "CUDA";
			
 
				   std::string const objExt =
			
 
				     this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
			
 
				 
			
 
				-  // Build list of dependencies.
			
 
				-  std::vector<std::string> depends;
			
 
				-  this->AppendLinkDepends(depends, linkLanguage);
			
 
				-
			
 
				-  // Add language-specific flags.
			
 
				-  std::string langFlags;
			
 
				-  this->LocalGenerator->AddLanguageFlagsForLinking(
			
 
				-    langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
			
 
				-
			
 
				-  // Create set of linking flags.
			
 
				-  std::string linkFlags;
			
 
				-  this->GetDeviceLinkFlags(linkFlags, linkLanguage);
			
 
				-
			
 
				   // Get the name of the device object to generate.
			
 
				-  std::string const targetOutputReal =
			
 
				+  std::string const targetOutput =
			
 
				     this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
			
 
				-  this->DeviceLinkObject = targetOutputReal;
			
 
				+  this->DeviceLinkObject = targetOutput;
			
 
				 
			
 
				   this->NumberOfProgressActions++;
			
 
				   if (!this->NoRuleMessages) {
			
@@ -269,7 +250,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
 
				     this->MakeEchoProgress(progress);
			
 
				     // Add the link message.
			
 
				     std::string buildEcho =
			
 
				-      cmStrCat("Linking ", linkLanguage, " device code ",
			
 
				+      cmStrCat("Linking CUDA device code ",
			
 
				                this->LocalGenerator->ConvertToOutputFormat(
			
 
				                  this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				                    this->LocalGenerator->GetCurrentBinaryDirectory(),
			
@@ -278,10 +259,41 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
 
				     this->LocalGenerator->AppendEcho(
			
 
				       commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
			
 
				   }
			
 
				+
			
 
				+  if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
			
 
				+    this->WriteDeviceLinkRule(commands, targetOutput);
			
 
				+  } else {
			
 
				+    this->WriteNvidiaDeviceLibraryRules(linkRuleVar, relink, commands,
			
 
				+                                        targetOutput);
			
 
				+  }
			
 
				+
			
 
				+  // Write the main driver rule to build everything in this target.
			
 
				+  this->WriteTargetDriverRule(targetOutput, relink);
			
 
				+}
			
 
				+
			
 
				+void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
			
 
				+  const std::string& linkRuleVar, bool relink,
			
 
				+  std::vector<std::string>& commands, const std::string& targetOutput)
			
 
				+{
			
 
				+  std::string linkLanguage = "CUDA";
			
 
				+
			
 
				+  // Build list of dependencies.
			
 
				+  std::vector<std::string> depends;
			
 
				+  this->AppendLinkDepends(depends, linkLanguage);
			
 
				+
			
 
				+  // Add language-specific flags.
			
 
				+  std::string langFlags;
			
 
				+  this->LocalGenerator->AddLanguageFlagsForLinking(
			
 
				+    langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
			
 
				+
			
 
				+  // Create set of linking flags.
			
 
				+  std::string linkFlags;
			
 
				+  this->GetDeviceLinkFlags(linkFlags, linkLanguage);
			
 
				+
			
 
				   // Clean files associated with this library.
			
 
				   std::set<std::string> libCleanFiles;
			
 
				   libCleanFiles.insert(this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				-    this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
			
 
				+    this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput));
			
 
				 
			
 
				   // Determine whether a link script will be used.
			
 
				   bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
			
@@ -335,7 +347,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
 
				 
			
 
				     std::string target = this->LocalGenerator->ConvertToOutputFormat(
			
 
				       this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				-        this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
			
 
				+        this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput),
			
 
				       output);
			
 
				 
			
 
				     std::string targetFullPathCompilePDB =
			
@@ -364,7 +376,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
 
				       this->LocalGenerator->CreateRulePlaceholderExpander());
			
 
				 
			
 
				     // Construct the main link rule and expand placeholders.
			
 
				-    rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
			
 
				+    rulePlaceholderExpander->SetTargetImpLib(targetOutput);
			
 
				     std::string linkRule = this->GetLinkRule(linkRuleVar);
			
 
				     cmExpandList(linkRule, real_link_commands);
			
 
				 
			
@@ -399,14 +411,11 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
 
				   commands1.clear();
			
 
				 
			
 
				   // Compute the list of outputs.
			
 
				-  std::vector<std::string> outputs(1, targetOutputReal);
			
 
				+  std::vector<std::string> outputs(1, targetOutput);
			
 
				 
			
 
				   // Write the build rule.
			
 
				   this->WriteMakeRule(*this->BuildFileStream, nullptr, outputs, depends,
			
 
				                       commands, false);
			
 
				-
			
 
				-  // Write the main driver rule to build everything in this target.
			
 
				-  this->WriteTargetDriverRule(targetOutputReal, relink);
			
 
				 #else
			
 
				   static_cast<void>(linkRuleVar);
			
 
				   static_cast<void>(relink);
			
--- a/Source/cmMakefileLibraryTargetGenerator.h
+++ b/Source/cmMakefileLibraryTargetGenerator.h
@@ -5,6 +5,7 @@
 
				 #include "cmConfigure.h" // IWYU pragma: keep
			
 
				 
			
 
				 #include <string>
			
 
				+#include <vector>
			
 
				 
			
 
				 #include "cmMakefileTargetGenerator.h"
			
 
				 
			
@@ -27,6 +28,10 @@ protected:
 
				   void WriteModuleLibraryRules(bool relink);
			
 
				 
			
 
				   void WriteDeviceLibraryRules(const std::string& linkRule, bool relink);
			
 
				+  void WriteNvidiaDeviceLibraryRules(const std::string& linkRuleVar,
			
 
				+                                     bool relink,
			
 
				+                                     std::vector<std::string>& commands,
			
 
				+                                     const std::string& targetOutput);
			
 
				   void WriteLibraryRules(const std::string& linkRule,
			
 
				                          const std::string& extraFlags, bool relink);
			
 
				   // MacOSX Framework support methods
			
--- a/Source/cmMakefileTargetGenerator.cxx
+++ b/Source/cmMakefileTargetGenerator.cxx
@@ -2,10 +2,13 @@
 
				    file Copyright.txt or https://cmake.org/licensing for details.  */
			
 
				 #include "cmMakefileTargetGenerator.h"
			
 
				 
			
 
				+#include <algorithm>
			
 
				 #include <cassert>
			
 
				 #include <cstdio>
			
 
				+#include <iterator>
			
 
				 #include <sstream>
			
 
				 #include <unordered_map>
			
 
				+#include <unordered_set>
			
 
				 #include <utility>
			
 
				 
			
 
				 #include <cm/memory>
			
@@ -25,6 +28,7 @@
 
				 #include "cmMakefileExecutableTargetGenerator.h"
			
 
				 #include "cmMakefileLibraryTargetGenerator.h"
			
 
				 #include "cmMakefileUtilityTargetGenerator.h"
			
 
				+#include "cmMessageType.h"
			
 
				 #include "cmOutputConverter.h"
			
 
				 #include "cmPolicies.h"
			
 
				 #include "cmProperty.h"
			
@@ -1323,6 +1327,130 @@ void cmMakefileTargetGenerator::WriteObjectDependRules(
 
				   }
			
 
				 }
			
 
				 
			
 
				+void cmMakefileTargetGenerator::WriteDeviceLinkRule(
			
 
				+  std::vector<std::string>& commands, const std::string& output)
			
 
				+{
			
 
				+  std::string architecturesStr =
			
 
				+    this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES");
			
 
				+
			
 
				+  if (cmIsOff(architecturesStr)) {
			
 
				+    this->Makefile->IssueMessage(MessageType::FATAL_ERROR,
			
 
				+                                 "CUDA_SEPARABLE_COMPILATION on Clang "
			
 
				+                                 "requires CUDA_ARCHITECTURES to be set.");
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  std::vector<std::string> architectures = cmExpandedList(architecturesStr);
			
 
				+
			
 
				+  // Ensure there are no duplicates.
			
 
				+  const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
			
 
				+    std::vector<std::string> deps;
			
 
				+    this->AppendTargetDepends(deps, true);
			
 
				+    this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
			
 
				+    std::copy(this->Objects.begin(), this->Objects.end(),
			
 
				+              std::back_inserter(deps));
			
 
				+
			
 
				+    std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
			
 
				+    deps.clear();
			
 
				+    std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps));
			
 
				+    return deps;
			
 
				+  }();
			
 
				+
			
 
				+  const std::string objectDir = this->GeneratorTarget->ObjectDirectory;
			
 
				+  const std::string relObjectDir =
			
 
				+    this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				+      this->LocalGenerator->GetCurrentBinaryDirectory(), objectDir);
			
 
				+
			
 
				+  // Construct a list of files associated with this executable that
			
 
				+  // may need to be cleaned.
			
 
				+  std::vector<std::string> cleanFiles;
			
 
				+  cleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				+    this->LocalGenerator->GetCurrentBinaryDirectory(), output));
			
 
				+
			
 
				+  std::string profiles;
			
 
				+  std::vector<std::string> fatbinaryDepends;
			
 
				+  std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
			
 
				+
			
 
				+  // Link device code for each architecture.
			
 
				+  for (const std::string& architectureKind : architectures) {
			
 
				+    // Clang always generates real code, so strip the specifier.
			
 
				+    const std::string architecture =
			
 
				+      architectureKind.substr(0, architectureKind.find('-'));
			
 
				+    const std::string cubin =
			
 
				+      cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
			
 
				+
			
 
				+    profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
			
 
				+    fatbinaryDepends.emplace_back(cubin);
			
 
				+
			
 
				+    std::string registerFileCmd;
			
 
				+
			
 
				+    // The generated register file contains macros that when expanded register
			
 
				+    // the device routines. Because the routines are the same for all
			
 
				+    // architectures the register file will be the same too. Thus generate it
			
 
				+    // only on the first invocation to reduce overhead.
			
 
				+    if (fatbinaryDepends.size() == 1) {
			
 
				+      std::string registerFileRel =
			
 
				+        this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				+          this->LocalGenerator->GetCurrentBinaryDirectory(), registerFile);
			
 
				+      registerFileCmd =
			
 
				+        cmStrCat(" --register-link-binaries=", registerFileRel);
			
 
				+      cleanFiles.push_back(registerFileRel);
			
 
				+    }
			
 
				+
			
 
				+    std::string command = cmStrCat(
			
 
				+      this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
			
 
				+      " -arch=sm_", architecture, registerFileCmd, " -o=$@ ",
			
 
				+      cmJoin(linkDeps, " "));
			
 
				+
			
 
				+    this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, cubin,
			
 
				+                                        linkDeps, { command }, false);
			
 
				+  }
			
 
				+
			
 
				+  // Combine all architectures into a single fatbinary.
			
 
				+  const std::string fatbinaryCommand =
			
 
				+    cmStrCat(this->Makefile->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"),
			
 
				+             " -64 -cmdline=--compile-only -compress-all -link "
			
 
				+             "--embedded-fatbin=$@",
			
 
				+             profiles);
			
 
				+  const std::string fatbinaryOutput =
			
 
				+    cmStrCat(objectDir, "cmake_cuda_fatbin.h");
			
 
				+  const std::string fatbinaryOutputRel =
			
 
				+    this->LocalGenerator->MaybeConvertToRelativePath(
			
 
				+      this->LocalGenerator->GetCurrentBinaryDirectory(), fatbinaryOutput);
			
 
				+
			
 
				+  this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
			
 
				+                                      fatbinaryOutputRel, fatbinaryDepends,
			
 
				+                                      { fatbinaryCommand }, false);
			
 
				+
			
 
				+  // Compile the stub that registers the kernels and contains the fatbinaries.
			
 
				+  cmRulePlaceholderExpander::RuleVariables vars;
			
 
				+  vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
			
 
				+  vars.CMTargetType =
			
 
				+    cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str();
			
 
				+
			
 
				+  vars.Language = "CUDA";
			
 
				+  vars.Object = output.c_str();
			
 
				+  vars.Fatbinary = fatbinaryOutput.c_str();
			
 
				+  vars.RegisterFile = registerFile.c_str();
			
 
				+
			
 
				+  std::string flags = this->GetFlags("CUDA", this->GetConfigName());
			
 
				+  vars.Flags = flags.c_str();
			
 
				+
			
 
				+  std::string compileCmd = this->GetLinkRule("CMAKE_CUDA_DEVICE_LINK_COMPILE");
			
 
				+  std::unique_ptr<cmRulePlaceholderExpander> rulePlaceholderExpander(
			
 
				+    this->LocalGenerator->CreateRulePlaceholderExpander());
			
 
				+  rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator,
			
 
				+                                               compileCmd, vars);
			
 
				+
			
 
				+  commands.emplace_back(compileCmd);
			
 
				+  this->LocalGenerator->WriteMakeRule(
			
 
				+    *this->BuildFileStream, nullptr, output,
			
 
				+    { cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
			
 
				+
			
 
				+  // Clean all the possible executable names and symlinks.
			
 
				+  this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());
			
 
				+}
			
 
				+
			
 
				 void cmMakefileTargetGenerator::GenerateCustomRuleFile(
			
 
				   cmCustomCommandGenerator const& ccg)
			
 
				 {
			
@@ -1579,10 +1707,11 @@ void cmMakefileTargetGenerator::WriteTargetDriverRule(
 
				 }
			
 
				 
			
 
				 void cmMakefileTargetGenerator::AppendTargetDepends(
			
 
				-  std::vector<std::string>& depends)
			
 
				+  std::vector<std::string>& depends, bool ignoreType)
			
 
				 {
			
 
				   // Static libraries never depend on anything for linking.
			
 
				-  if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY) {
			
 
				+  if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY &&
			
 
				+      !ignoreType) {
			
 
				     return;
			
 
				   }
			
 
				 
			
--- a/Source/cmMakefileTargetGenerator.h
+++ b/Source/cmMakefileTargetGenerator.h
@@ -104,6 +104,10 @@ protected:
 
				   void WriteObjectDependRules(cmSourceFile const& source,
			
 
				                               std::vector<std::string>& depends);
			
 
				 
			
 
				+  // CUDA device linking.
			
 
				+  void WriteDeviceLinkRule(std::vector<std::string>& commands,
			
 
				+                           const std::string& output);
			
 
				+
			
 
				   // write the build rule for a custom command
			
 
				   void GenerateCustomRuleFile(cmCustomCommandGenerator const& ccg);
			
 
				 
			
@@ -127,7 +131,8 @@ protected:
 
				   void DriveCustomCommands(std::vector<std::string>& depends);
			
 
				 
			
 
				   // append intertarget dependencies
			
 
				-  void AppendTargetDepends(std::vector<std::string>& depends);
			
 
				+  void AppendTargetDepends(std::vector<std::string>& depends,
			
 
				+                           bool ignoreType = false);
			
 
				 
			
 
				   // Append object file dependencies.
			
 
				   void AppendObjectDepends(std::vector<std::string>& depends);
			
--- a/Source/cmNinjaNormalTargetGenerator.cxx
+++ b/Source/cmNinjaNormalTargetGenerator.cxx
@@ -8,6 +8,7 @@
 
				 #include <map>
			
 
				 #include <set>
			
 
				 #include <sstream>
			
 
				+#include <unordered_set>
			
 
				 #include <utility>
			
 
				 
			
 
				 #include <cm/memory>
			
@@ -25,6 +26,7 @@
 
				 #include "cmLocalGenerator.h"
			
 
				 #include "cmLocalNinjaGenerator.h"
			
 
				 #include "cmMakefile.h"
			
 
				+#include "cmMessageType.h"
			
 
				 #include "cmNinjaLinkLineDeviceComputer.h"
			
 
				 #include "cmNinjaTypes.h"
			
 
				 #include "cmOSXBundleGenerator.h"
			
@@ -178,6 +180,33 @@ std::string cmNinjaNormalTargetGenerator::LanguageLinkerDeviceRule(
 
				     "_", config);
			
 
				 }
			
 
				 
			
 
				+std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceRule(
			
 
				+  const std::string& config) const
			
 
				+{
			
 
				+  return cmStrCat(
			
 
				+    this->TargetLinkLanguage(config), "_DEVICE_LINK__",
			
 
				+    cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
			
 
				+    '_', config);
			
 
				+}
			
 
				+
			
 
				+std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceCompileRule(
			
 
				+  const std::string& config) const
			
 
				+{
			
 
				+  return cmStrCat(
			
 
				+    this->TargetLinkLanguage(config), "_DEVICE_LINK_COMPILE__",
			
 
				+    cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
			
 
				+    '_', config);
			
 
				+}
			
 
				+
			
 
				+std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaFatbinaryRule(
			
 
				+  const std::string& config) const
			
 
				+{
			
 
				+  return cmStrCat(
			
 
				+    this->TargetLinkLanguage(config), "_FATBINARY__",
			
 
				+    cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
			
 
				+    '_', config);
			
 
				+}
			
 
				+
			
 
				 struct cmNinjaRemoveNoOpCommands
			
 
				 {
			
 
				   bool operator()(std::string const& cmd)
			
@@ -186,7 +215,7 @@ struct cmNinjaRemoveNoOpCommands
 
				   }
			
 
				 };
			
 
				 
			
 
				-void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(
			
 
				+void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkRule(
			
 
				   bool useResponseFile, const std::string& config)
			
 
				 {
			
 
				   cmNinjaRule rule(this->LanguageLinkerDeviceRule(config));
			
@@ -272,6 +301,55 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(
 
				   }
			
 
				 }
			
 
				 
			
 
				+void cmNinjaNormalTargetGenerator::WriteDeviceLinkRules(
			
 
				+  const std::string& config)
			
 
				+{
			
 
				+  const cmMakefile* mf = this->GetMakefile();
			
 
				+
			
 
				+  cmNinjaRule rule(LanguageLinkerCudaDeviceRule(config));
			
 
				+  rule.Command = this->GetLocalGenerator()->BuildCommandLine(
			
 
				+    { cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
			
 
				+               " -arch=$ARCH $REGISTER -o=$out $in") });
			
 
				+  rule.Comment = "Rule for CUDA device linking.";
			
 
				+  rule.Description = "Linking CUDA $out";
			
 
				+  this->GetGlobalGenerator()->AddRule(rule);
			
 
				+
			
 
				+  cmRulePlaceholderExpander::RuleVariables vars;
			
 
				+  vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
			
 
				+  vars.CMTargetType =
			
 
				+    cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str();
			
 
				+
			
 
				+  vars.Language = "CUDA";
			
 
				+  vars.Object = "$out";
			
 
				+  vars.Fatbinary = "$FATBIN";
			
 
				+  vars.RegisterFile = "$REGISTER";
			
 
				+
			
 
				+  std::string flags = this->GetFlags("CUDA", config);
			
 
				+  vars.Flags = flags.c_str();
			
 
				+
			
 
				+  std::string compileCmd = this->GetMakefile()->GetRequiredDefinition(
			
 
				+    "CMAKE_CUDA_DEVICE_LINK_COMPILE");
			
 
				+  std::unique_ptr<cmRulePlaceholderExpander> rulePlaceholderExpander(
			
 
				+    this->GetLocalGenerator()->CreateRulePlaceholderExpander());
			
 
				+  rulePlaceholderExpander->ExpandRuleVariables(this->GetLocalGenerator(),
			
 
				+                                               compileCmd, vars);
			
 
				+
			
 
				+  rule.Name = LanguageLinkerCudaDeviceCompileRule(config);
			
 
				+  rule.Command = this->GetLocalGenerator()->BuildCommandLine({ compileCmd });
			
 
				+  rule.Comment = "Rule for compiling CUDA device stubs.";
			
 
				+  rule.Description = "Compiling CUDA device stub $out";
			
 
				+  this->GetGlobalGenerator()->AddRule(rule);
			
 
				+
			
 
				+  rule.Name = LanguageLinkerCudaFatbinaryRule(config);
			
 
				+  rule.Command = this->GetLocalGenerator()->BuildCommandLine(
			
 
				+    { cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"),
			
 
				+               " -64 -cmdline=--compile-only -compress-all -link "
			
 
				+               "--embedded-fatbin=$out $PROFILES") });
			
 
				+  rule.Comment = "Rule for CUDA fatbinaries.";
			
 
				+  rule.Description = "Creating fatbinary $out";
			
 
				+  this->GetGlobalGenerator()->AddRule(rule);
			
 
				+}
			
 
				+
			
 
				 void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile,
			
 
				                                                  const std::string& config)
			
 
				 {
			
@@ -586,7 +664,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
 
				 
			
 
				   // First and very important step is to make sure while inside this
			
 
				   // step our link language is set to CUDA
			
 
				-  std::string cudaLinkLanguage = "CUDA";
			
 
				   std::string const& objExt =
			
 
				     this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
			
 
				 
			
@@ -598,6 +675,118 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
 
				   std::string targetOutputReal =
			
 
				     ConvertToNinjaPath(targetOutputDir + "cmake_device_link" + objExt);
			
 
				 
			
 
				+  if (firstForConfig) {
			
 
				+    globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal);
			
 
				+  }
			
 
				+  this->DeviceLinkObject = targetOutputReal;
			
 
				+
			
 
				+  // Write comments.
			
 
				+  cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream());
			
 
				+  this->GetCommonFileStream()
			
 
				+    << "# Device Link build statements for "
			
 
				+    << cmState::GetTargetTypeName(genTarget->GetType()) << " target "
			
 
				+    << this->GetTargetName() << "\n\n";
			
 
				+
			
 
				+  if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
			
 
				+    std::string architecturesStr =
			
 
				+      this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES");
			
 
				+
			
 
				+    if (cmIsOff(architecturesStr)) {
			
 
				+      this->Makefile->IssueMessage(MessageType::FATAL_ERROR,
			
 
				+                                   "CUDA_SEPARABLE_COMPILATION on Clang "
			
 
				+                                   "requires CUDA_ARCHITECTURES to be set.");
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    this->WriteDeviceLinkRules(config);
			
 
				+    this->WriteDeviceLinkStatements(config, cmExpandedList(architecturesStr),
			
 
				+                                    targetOutputReal);
			
 
				+  } else {
			
 
				+    this->WriteNvidiaDeviceLinkStatement(config, fileConfig, targetOutputDir,
			
 
				+                                         targetOutputReal);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
			
 
				+  const std::string& config, const std::vector<std::string>& architectures,
			
 
				+  const std::string& output)
			
 
				+{
			
 
				+  // Ensure there are no duplicates.
			
 
				+  const cmNinjaDeps explicitDeps = [&]() -> std::vector<std::string> {
			
 
				+    std::unordered_set<std::string> depsSet;
			
 
				+    const cmNinjaDeps linkDeps =
			
 
				+      this->ComputeLinkDeps(this->TargetLinkLanguage(config), config, true);
			
 
				+    const cmNinjaDeps objects = this->GetObjects(config);
			
 
				+    depsSet.insert(linkDeps.begin(), linkDeps.end());
			
 
				+    depsSet.insert(objects.begin(), objects.end());
			
 
				+
			
 
				+    std::vector<std::string> deps;
			
 
				+    std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps));
			
 
				+    return deps;
			
 
				+  }();
			
 
				+
			
 
				+  const std::string objectDir =
			
 
				+    cmStrCat(this->GeneratorTarget->GetSupportDirectory(),
			
 
				+             this->GetGlobalGenerator()->ConfigDirectory(config));
			
 
				+  const std::string ninjaOutputDir = this->ConvertToNinjaPath(objectDir);
			
 
				+
			
 
				+  cmNinjaBuild fatbinary(LanguageLinkerCudaFatbinaryRule(config));
			
 
				+
			
 
				+  // Link device code for each architecture.
			
 
				+  for (const std::string& architectureKind : architectures) {
			
 
				+    // Clang always generates real code, so strip the specifier.
			
 
				+    const std::string architecture =
			
 
				+      architectureKind.substr(0, architectureKind.find('-'));
			
 
				+    const std::string cubin =
			
 
				+      cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin");
			
 
				+
			
 
				+    fatbinary.Variables["PROFILES"] +=
			
 
				+      cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
			
 
				+    fatbinary.ExplicitDeps.emplace_back(cubin);
			
 
				+
			
 
				+    cmNinjaBuild dlink(LanguageLinkerCudaDeviceRule(config));
			
 
				+    dlink.ExplicitDeps = explicitDeps;
			
 
				+    dlink.Outputs = { cubin };
			
 
				+    dlink.Variables["ARCH"] = cmStrCat("sm_", architecture);
			
 
				+
			
 
				+    // The generated register file contains macros that when expanded register
			
 
				+    // the device routines. Because the routines are the same for all
			
 
				+    // architectures the register file will be the same too. Thus generate it
			
 
				+    // only on the first invocation to reduce overhead.
			
 
				+    if (fatbinary.ExplicitDeps.size() == 1) {
			
 
				+      dlink.Variables["REGISTER"] = cmStrCat(
			
 
				+        "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h");
			
 
				+    }
			
 
				+
			
 
				+    this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink);
			
 
				+  }
			
 
				+
			
 
				+  // Combine all architectures into a single fatbinary.
			
 
				+  fatbinary.Outputs = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") };
			
 
				+  this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(),
			
 
				+                                         fatbinary);
			
 
				+
			
 
				+  // Compile the stub that registers the kernels and contains the fatbinaries.
			
 
				+  cmNinjaBuild dcompile(LanguageLinkerCudaDeviceCompileRule(config));
			
 
				+  dcompile.Outputs = { output };
			
 
				+  dcompile.ExplicitDeps = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") };
			
 
				+  dcompile.Variables["FATBIN"] =
			
 
				+    this->GetLocalGenerator()->ConvertToOutputFormat(
			
 
				+      cmStrCat(objectDir, "/cmake_cuda_fatbin.h"), cmOutputConverter::SHELL);
			
 
				+  dcompile.Variables["REGISTER"] =
			
 
				+    this->GetLocalGenerator()->ConvertToOutputFormat(
			
 
				+      cmStrCat(objectDir, "/cmake_cuda_register.h"), cmOutputConverter::SHELL);
			
 
				+  this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(),
			
 
				+                                         dcompile);
			
 
				+}
			
 
				+
			
 
				+void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkStatement(
			
 
				+  const std::string& config, const std::string& fileConfig,
			
 
				+  const std::string& outputDir, const std::string& output)
			
 
				+{
			
 
				+  cmGeneratorTarget* genTarget = this->GetGeneratorTarget();
			
 
				+  cmGlobalNinjaGenerator* globalGen = this->GetGlobalGenerator();
			
 
				+
			
 
				   std::string targetOutputImplib = ConvertToNinjaPath(
			
 
				     genTarget->GetFullPath(config, cmStateEnums::ImportLibraryArtifact));
			
 
				 
			
@@ -606,8 +795,8 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
 
				       cmStrCat(this->GetLocalGenerator()->GetTargetDirectory(genTarget),
			
 
				                globalGen->ConfigDirectory(fileConfig), "/");
			
 
				     targetOutputFileConfigDir =
			
 
				-      globalGen->ExpandCFGIntDir(targetOutputDir, fileConfig);
			
 
				-    if (targetOutputDir == targetOutputFileConfigDir) {
			
 
				+      globalGen->ExpandCFGIntDir(outputDir, fileConfig);
			
 
				+    if (outputDir == targetOutputFileConfigDir) {
			
 
				       return;
			
 
				     }
			
 
				 
			
@@ -623,27 +812,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
 
				     }
			
 
				   }
			
 
				 
			
 
				-  if (firstForConfig) {
			
 
				-    globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal);
			
 
				-  }
			
 
				-  this->DeviceLinkObject = targetOutputReal;
			
 
				-
			
 
				-  // Write comments.
			
 
				-  cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream());
			
 
				-  const cmStateEnums::TargetType targetType = genTarget->GetType();
			
 
				-  this->GetCommonFileStream() << "# Device Link build statements for "
			
 
				-                              << cmState::GetTargetTypeName(targetType)
			
 
				-                              << " target " << this->GetTargetName() << "\n\n";
			
 
				-
			
 
				   // Compute the comment.
			
 
				   cmNinjaBuild build(this->LanguageLinkerDeviceRule(config));
			
 
				   build.Comment =
			
 
				-    cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', targetOutputReal);
			
 
				+    cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', output);
			
 
				 
			
 
				   cmNinjaVars& vars = build.Variables;
			
 
				 
			
 
				   // Compute outputs.
			
 
				-  build.Outputs.push_back(targetOutputReal);
			
 
				+  build.Outputs.push_back(output);
			
 
				   // Compute specific libraries to link with.
			
 
				   build.ExplicitDeps = this->GetObjects(config);
			
 
				   build.ImplicitDeps =
			
@@ -659,7 +836,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
 
				   cmLocalNinjaGenerator& localGen = *this->GetLocalGenerator();
			
 
				 
			
 
				   vars["TARGET_FILE"] =
			
 
				-    localGen.ConvertToOutputFormat(targetOutputReal, cmOutputConverter::SHELL);
			
 
				+    localGen.ConvertToOutputFormat(output, cmOutputConverter::SHELL);
			
 
				 
			
 
				   std::unique_ptr<cmLinkLineComputer> linkLineComputer(
			
 
				     new cmNinjaLinkLineDeviceComputer(
			
@@ -683,8 +860,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
 
				 
			
 
				   // Compute language specific link flags.
			
 
				   std::string langFlags;
			
 
				-  localGen.AddLanguageFlagsForLinking(langFlags, genTarget, cudaLinkLanguage,
			
 
				-                                      config);
			
 
				+  localGen.AddLanguageFlagsForLinking(langFlags, genTarget, "CUDA", config);
			
 
				   vars["LANGUAGE_COMPILE_FLAGS"] = langFlags;
			
 
				 
			
 
				   auto const tgtNames = this->TargetNames(config);
			
@@ -692,7 +868,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
 
				     vars["SONAME_FLAG"] =
			
 
				       this->GetMakefile()->GetSONameFlag(this->TargetLinkLanguage(config));
			
 
				     vars["SONAME"] = tgtNames.SharedObject;
			
 
				-    if (targetType == cmStateEnums::SHARED_LIBRARY) {
			
 
				+    if (genTarget->GetType() == cmStateEnums::SHARED_LIBRARY) {
			
 
				       std::string install_dir =
			
 
				         this->GetGeneratorTarget()->GetInstallNameDirForBuildTree(config);
			
 
				       if (!install_dir.empty()) {
			
@@ -731,7 +907,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
 
				   // do not check if the user has explicitly forced a response file.
			
 
				   int const commandLineLengthLimit =
			
 
				     static_cast<int>(cmSystemTools::CalculateCommandLineLengthLimit()) -
			
 
				-    globalGen->GetRuleCmdLength(this->LanguageLinkerDeviceRule(config));
			
 
				+    globalGen->GetRuleCmdLength(build.Rule);
			
 
				 
			
 
				   build.RspFile = this->ConvertToNinjaPath(
			
 
				     cmStrCat("CMakeFiles/", genTarget->GetName(),
			
@@ -746,7 +922,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
 
				   bool usedResponseFile = false;
			
 
				   globalGen->WriteBuild(this->GetCommonFileStream(), build,
			
 
				                         commandLineLengthLimit, &usedResponseFile);
			
 
				-  this->WriteDeviceLinkRule(usedResponseFile, config);
			
 
				+  this->WriteNvidiaDeviceLinkRule(usedResponseFile, config);
			
 
				 }
			
 
				 
			
 
				 void cmNinjaNormalTargetGenerator::WriteLinkStatement(
			
--- a/Source/cmNinjaNormalTargetGenerator.h
+++ b/Source/cmNinjaNormalTargetGenerator.h
@@ -21,18 +21,31 @@ public:
 
				 private:
			
 
				   std::string LanguageLinkerRule(const std::string& config) const;
			
 
				   std::string LanguageLinkerDeviceRule(const std::string& config) const;
			
 
				+  std::string LanguageLinkerCudaDeviceRule(const std::string& config) const;
			
 
				+  std::string LanguageLinkerCudaDeviceCompileRule(
			
 
				+    const std::string& config) const;
			
 
				+  std::string LanguageLinkerCudaFatbinaryRule(const std::string& config) const;
			
 
				 
			
 
				   const char* GetVisibleTypeName() const;
			
 
				   void WriteLanguagesRules(const std::string& config);
			
 
				 
			
 
				   void WriteLinkRule(bool useResponseFile, const std::string& config);
			
 
				-  void WriteDeviceLinkRule(bool useResponseFile, const std::string& config);
			
 
				+  void WriteDeviceLinkRules(const std::string& config);
			
 
				+  void WriteNvidiaDeviceLinkRule(bool useResponseFile,
			
 
				+                                 const std::string& config);
			
 
				 
			
 
				   void WriteLinkStatement(const std::string& config,
			
 
				                           const std::string& fileConfig, bool firstForConfig);
			
 
				   void WriteDeviceLinkStatement(const std::string& config,
			
 
				                                 const std::string& fileConfig,
			
 
				                                 bool firstForConfig);
			
 
				+  void WriteDeviceLinkStatements(const std::string& config,
			
 
				+                                 const std::vector<std::string>& architectures,
			
 
				+                                 const std::string& output);
			
 
				+  void WriteNvidiaDeviceLinkStatement(const std::string& config,
			
 
				+                                      const std::string& fileConfig,
			
 
				+                                      const std::string& outputDir,
			
 
				+                                      const std::string& output);
			
 
				 
			
 
				   void WriteObjectLibStatement(const std::string& config);
			
 
				 
			
--- a/Source/cmNinjaTargetGenerator.cxx
+++ b/Source/cmNinjaTargetGenerator.cxx
@@ -346,11 +346,13 @@ std::string cmNinjaTargetGenerator::ComputeIncludes(
 
				 }
			
 
				 
			
 
				 cmNinjaDeps cmNinjaTargetGenerator::ComputeLinkDeps(
			
 
				-  const std::string& linkLanguage, const std::string& config) const
			
 
				+  const std::string& linkLanguage, const std::string& config,
			
 
				+  bool ignoreType) const
			
 
				 {
			
 
				   // Static libraries never depend on other targets for linking.
			
 
				-  if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY ||
			
 
				-      this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY) {
			
 
				+  if (!ignoreType &&
			
 
				+      (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY ||
			
 
				+       this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY)) {
			
 
				     return cmNinjaDeps();
			
 
				   }
			
 
				 
			
@@ -1009,6 +1011,7 @@ void cmNinjaTargetGenerator::WriteObjectBuildStatements(
 
				   {
			
 
				     std::vector<cmSourceFile const*> objectSources;
			
 
				     this->GeneratorTarget->GetObjectSources(objectSources, config);
			
 
				+
			
 
				     for (cmSourceFile const* sf : objectSources) {
			
 
				       this->WriteObjectBuildStatement(sf, config, fileConfig, firstForConfig);
			
 
				     }
			
--- a/Source/cmNinjaTargetGenerator.h
+++ b/Source/cmNinjaTargetGenerator.h
@@ -113,7 +113,8 @@ protected:
 
				 
			
 
				   /// @return the list of link dependency for the given target @a target.
			
 
				   cmNinjaDeps ComputeLinkDeps(const std::string& linkLanguage,
			
 
				-                              const std::string& config) const;
			
 
				+                              const std::string& config,
			
 
				+                              bool ignoreType = false) const;
			
 
				 
			
 
				   /// @return the source file path for the given @a source.
			
 
				   std::string GetSourceFilePath(cmSourceFile const* source) const;
			
--- a/Source/cmRulePlaceholderExpander.cxx
+++ b/Source/cmRulePlaceholderExpander.cxx
@@ -141,6 +141,16 @@ std::string cmRulePlaceholderExpander::ExpandRuleVariable(
 
				       return replaceValues.DependencyFile;
			
 
				     }
			
 
				   }
			
 
				+  if (replaceValues.Fatbinary) {
			
 
				+    if (variable == "FATBINARY") {
			
 
				+      return replaceValues.Fatbinary;
			
 
				+    }
			
 
				+  }
			
 
				+  if (replaceValues.RegisterFile) {
			
 
				+    if (variable == "REGISTER_FILE") {
			
 
				+      return replaceValues.RegisterFile;
			
 
				+    }
			
 
				+  }
			
 
				 
			
 
				   if (replaceValues.Target) {
			
 
				     if (variable == "TARGET_QUOTED") {
			
--- a/Source/cmRulePlaceholderExpander.h
+++ b/Source/cmRulePlaceholderExpander.h
@@ -64,6 +64,8 @@ public:
 
				     const char* SwiftOutputFileMap;
			
 
				     const char* SwiftSources;
			
 
				     const char* ISPCHeader;
			
 
				+    const char* Fatbinary;
			
 
				+    const char* RegisterFile;
			
 
				   };
			
 
				 
			
 
				   // Expand rule variables in CMake of the type found in language rules
			
--- a/Tests/Cuda/CMakeLists.txt
+++ b/Tests/Cuda/CMakeLists.txt
@@ -17,13 +17,12 @@ add_cuda_test_macro(Cuda.SeparableCompCXXOnly SeparableCompCXXOnly)
 
				 add_cuda_test_macro(Cuda.Toolkit Toolkit)
			
 
				 add_cuda_test_macro(Cuda.IncludePathNoToolkit IncludePathNoToolkit)
			
 
				 add_cuda_test_macro(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit)
			
 
				+add_cuda_test_macro(Cuda.Complex CudaComplex)
			
 
				+add_cuda_test_macro(Cuda.ProperLinkFlags ProperLinkFlags)
			
 
				 
			
 
				-# Separable compilation is currently only supported on NVCC. Disable tests
			
 
				-# using it for other compilers.
			
 
				 if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
			
 
				-  add_cuda_test_macro(Cuda.Complex CudaComplex)
			
 
				+  # Clang lacks __CUDACC_VER*__ defines.
			
 
				   add_cuda_test_macro(Cuda.ProperDeviceLibraries ProperDeviceLibraries)
			
 
				-  add_cuda_test_macro(Cuda.ProperLinkFlags ProperLinkFlags)
			
 
				 endif()
			
 
				 
			
 
				 # The CUDA only ships the shared version of the toolkit libraries
			
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -12,33 +12,31 @@ add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusT
 
				 add_cuda_test_macro(CudaOnly.Standard98 CudaOnlyStandard98)
			
 
				 add_cuda_test_macro(CudaOnly.Toolkit CudaOnlyToolkit)
			
 
				 add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
			
 
				+add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
			
 
				+add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
			
 
				+add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
			
 
				 
			
 
				 if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
			
 
				+  # Clang doesn't have flags for selecting the runtime.
			
 
				   add_cuda_test_macro(CudaOnly.SharedRuntimeViaCUDAFlags CudaOnlySharedRuntimeViaCUDAFlags)
			
 
				 
			
 
				-  # Separable compilation is currently only supported on NVCC. Disable tests
			
 
				-  # using it for other compilers.
			
 
				-  add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
			
 
				-  add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
			
 
				-  add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
			
 
				-
			
 
				-  add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
			
 
				-    ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
			
 
				-    --build-and-test
			
 
				-    "${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
			
 
				-    "${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
			
 
				-    ${build_generator_args}
			
 
				-    --build-project DontResolveDeviceSymbols
			
 
				-    --build-options ${build_options}
			
 
				-    --test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
			
 
				-  )
			
 
				-  set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
			
 
				-    PROPERTY LABELS "CUDA")
			
 
				-
			
 
				   # Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode.
			
 
				   add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
			
 
				 endif()
			
 
				 
			
 
				+add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
			
 
				+  ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
			
 
				+  --build-and-test
			
 
				+  "${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
			
 
				+  "${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
			
 
				+  ${build_generator_args}
			
 
				+  --build-project DontResolveDeviceSymbols
			
 
				+  --build-options ${build_options}
			
 
				+  --test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
			
 
				+)
			
 
				+set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
			
 
				+  PROPERTY LABELS "CUDA")
			
 
				+
			
 
				 # The CUDA only ships the shared version of the toolkit libraries
			
 
				 # on windows
			
 
				 if(NOT WIN32)