3 years ago · 96bc59b1ca
--- a/Help/release/dev/cuda-device-lto.rst
+++ b/Help/release/dev/cuda-device-lto.rst
@@ -0,0 +1,7 @@
 
				+cuda-device-lto
			
 
				+---------------
			
 
				+
			
 
				+* ``CUDA`` language now supports device link time optimization when using
			
 
				+  ``nvcc``. The :variable:`CMAKE_INTERPROCEDURAL_OPTIMIZATION` variable and
			
 
				+  the associated :prop_tgt:`INTERPROCEDURAL_OPTIMIZATION` target property will
			
 
				+  activate device LTO.
			
--- a/Modules/CheckIPOSupported.cmake
+++ b/Modules/CheckIPOSupported.cmake
@@ -76,6 +76,23 @@ endmacro()
 
				 
			
 
				 # Run IPO/LTO test
			
 
				 macro(_ipo_run_language_check language)
			
 
				+  set(_C_ext "c")
			
 
				+  set(_CXX_ext "cpp")
			
 
				+  set(_Fortran_ext "f")
			
 
				+  string(COMPARE EQUAL "${language}" "CUDA" is_cuda)
			
 
				+
			
 
				+  set(ext ${_${language}_ext})
			
 
				+  if(NOT "${ext}" STREQUAL "")
			
 
				+    set(copy_sources foo.${ext} main.${ext})
			
 
				+  elseif(is_cuda)
			
 
				+    if(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE)
			
 
				+      set("${X_RESULT}" YES PARENT_SCOPE)
			
 
				+    endif()
			
 
				+    return()
			
 
				+  else()
			
 
				+    message(FATAL_ERROR "Language not supported")
			
 
				+  endif()
			
 
				+
			
 
				   set(testdir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/_CMakeLTOTest-${language}")
			
 
				 
			
 
				   file(REMOVE_RECURSE "${testdir}")
			
@@ -91,17 +108,6 @@ macro(_ipo_run_language_check language)
 
				 
			
 
				   set(try_compile_src "${CMAKE_ROOT}/Modules/CheckIPOSupported")
			
 
				 
			
 
				-  set(_C_ext "c")
			
 
				-  set(_CXX_ext "cpp")
			
 
				-  set(_Fortran_ext "f")
			
 
				-
			
 
				-  set(ext ${_${language}_ext})
			
 
				-  if(NOT "${ext}" STREQUAL "")
			
 
				-    set(copy_sources foo.${ext} main.${ext})
			
 
				-  else()
			
 
				-    message(FATAL_ERROR "Language not supported")
			
 
				-  endif()
			
 
				-
			
 
				   # Use:
			
 
				   # * TRY_COMPILE_PROJECT_NAME
			
 
				   # * CMAKE_VERSION
			
@@ -211,6 +217,11 @@ function(check_ipo_supported)
 
				       list(APPEND languages "C")
			
 
				     endif()
			
 
				 
			
 
				+    list(FIND enabled_languages "CUDA" result)
			
 
				+    if(NOT result EQUAL -1)
			
 
				+      list(APPEND languages "CUDA")
			
 
				+    endif()
			
 
				+
			
 
				     list(FIND enabled_languages "Fortran" result)
			
 
				     if(NOT result EQUAL -1)
			
 
				       list(APPEND languages "Fortran")
			
@@ -219,7 +230,7 @@ function(check_ipo_supported)
 
				     string(COMPARE EQUAL "${languages}" "" no_languages)
			
 
				     if(no_languages)
			
 
				       _ipo_not_supported(
			
 
				-          "no C/CXX/Fortran languages found in ENABLED_LANGUAGES global property"
			
 
				+          "no C/CXX/CUDA/Fortran languages found in ENABLED_LANGUAGES global property"
			
 
				       )
			
 
				       return()
			
 
				     endif()
			
@@ -227,7 +238,7 @@ function(check_ipo_supported)
 
				     set(languages "${X_LANGUAGES}")
			
 
				 
			
 
				     set(unsupported_languages "${languages}")
			
 
				-    list(REMOVE_ITEM unsupported_languages "C" "CXX" "Fortran")
			
 
				+    list(REMOVE_ITEM unsupported_languages "C" "CXX" "CUDA" "Fortran")
			
 
				     string(COMPARE NOTEQUAL "${unsupported_languages}" "" has_unsupported)
			
 
				     if(has_unsupported)
			
 
				       _ipo_not_supported(
			
--- a/Modules/Compiler/Clang-CUDA.cmake
+++ b/Modules/Compiler/Clang-CUDA.cmake
@@ -35,6 +35,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
 
				 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
			
 
				 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE   "")
			
 
				 
			
 
				+# Clang doesn't support CUDA device LTO
			
 
				+set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE NO)
			
 
				+set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER NO)
			
 
				+
			
 
				 if(UNIX)
			
 
				   list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
			
 
				 endif()
			
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -48,6 +48,13 @@ if((NOT DEFINED CMAKE_DEPENDS_USE_COMPILER OR CMAKE_DEPENDS_USE_COMPILER)
 
				   set(CMAKE_CUDA_DEPENDS_USE_COMPILER TRUE)
			
 
				 endif()
			
 
				 
			
 
				+if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
			
 
				+  set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE YES)
			
 
				+  set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER YES)
			
 
				+
			
 
				+  set(CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO " -dlto")
			
 
				+endif()
			
 
				+
			
 
				 if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
			
 
				   set(CMAKE_CUDA_COMPILE_OPTIONS_PIE -Xcompiler=-fPIE)
			
 
				   set(CMAKE_CUDA_COMPILE_OPTIONS_PIC -Xcompiler=-fPIC)
			
@@ -61,6 +68,7 @@ if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
 
				   string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -O1 -DNDEBUG")
			
 
				   string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -O2 -g -DNDEBUG")
			
 
				 endif()
			
 
				+
			
 
				 set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
			
 
				 set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
			
 
				 
			
--- a/Source/cmGeneratorTarget.cxx
+++ b/Source/cmGeneratorTarget.cxx
@@ -916,11 +916,19 @@ bool cmGeneratorTarget::IsIPOEnabled(std::string const& lang,
 
				     return false;
			
 
				   }
			
 
				 
			
 
				-  if (lang != "C" && lang != "CXX" && lang != "Fortran") {
			
 
				+  if (lang != "C" && lang != "CXX" && lang != "CUDA" && lang != "Fortran") {
			
 
				     // We do not define IPO behavior for other languages.
			
 
				     return false;
			
 
				   }
			
 
				 
			
 
				+  if (lang == "CUDA") {
			
 
				+    // CUDA IPO requires both CUDA_ARCHITECTURES and CUDA_SEPARABLE_COMPILATION
			
 
				+    if (cmIsOff(this->GetSafeProperty("CUDA_ARCHITECTURES")) ||
			
 
				+        cmIsOff(this->GetSafeProperty("CUDA_SEPARABLE_COMPILATION"))) {
			
 
				+      return false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   cmPolicies::PolicyStatus cmp0069 = this->GetPolicyStatusCMP0069();
			
 
				 
			
 
				   if (cmp0069 == cmPolicies::OLD || cmp0069 == cmPolicies::WARN) {
			
@@ -3428,7 +3436,9 @@ void cmGeneratorTarget::AddExplicitLanguageFlags(std::string& flags,
 
				                                              "EXPLICIT_LANGUAGE");
			
 
				 }
			
 
				 
			
 
				-void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
			
 
				+void cmGeneratorTarget::AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
			
 
				+                                                 const std::string& config,
			
 
				+                                                 std::string& flags) const
			
 
				 {
			
 
				   std::string property = this->GetSafeProperty("CUDA_ARCHITECTURES");
			
 
				 
			
@@ -3460,6 +3470,7 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
 
				 
			
 
				   std::string const& compiler =
			
 
				     this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
			
 
				+  const bool ipoEnabled = this->IsIPOEnabled("CUDA", config);
			
 
				 
			
 
				   // Check for special modes: `all`, `all-major`.
			
 
				   if (property == "all" || property == "all-major") {
			
@@ -3539,6 +3550,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
 
				   }
			
 
				 
			
 
				   if (compiler == "NVIDIA") {
			
 
				+    if (ipoEnabled && compileOrLink == cmBuildStep::Link) {
			
 
				+      if (cmValue cudaIPOFlags =
			
 
				+            this->Makefile->GetDefinition("CMAKE_CUDA_LINK_OPTIONS_IPO")) {
			
 
				+        flags += cudaIPOFlags;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				     for (CudaArchitecture& architecture : architectures) {
			
 
				       flags +=
			
 
				         " --generate-code=arch=compute_" + architecture.name + ",code=[";
			
@@ -3551,7 +3569,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
 
				         }
			
 
				       }
			
 
				 
			
 
				-      if (architecture.real) {
			
 
				+      if (ipoEnabled) {
			
 
				+        if (compileOrLink == cmBuildStep::Compile) {
			
 
				+          flags += "lto_" + architecture.name;
			
 
				+        } else if (compileOrLink == cmBuildStep::Link) {
			
 
				+          flags += "sm_" + architecture.name;
			
 
				+        }
			
 
				+      } else if (architecture.real) {
			
 
				         flags += "sm_" + architecture.name;
			
 
				       }
			
 
				 
			
--- a/Source/cmGeneratorTarget.h
+++ b/Source/cmGeneratorTarget.h
@@ -23,6 +23,7 @@
 
				 #include "cmStateTypes.h"
			
 
				 #include "cmValue.h"
			
 
				 
			
 
				+enum class cmBuildStep;
			
 
				 class cmComputeLinkInformation;
			
 
				 class cmCustomCommand;
			
 
				 class cmGlobalGenerator;
			
@@ -471,7 +472,9 @@ public:
 
				   void AddExplicitLanguageFlags(std::string& flags,
			
 
				                                 cmSourceFile const& sf) const;
			
 
				 
			
 
				-  void AddCUDAArchitectureFlags(std::string& flags) const;
			
 
				+  void AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
			
 
				+                                const std::string& config,
			
 
				+                                std::string& flags) const;
			
 
				   void AddCUDAToolkitFlags(std::string& flags) const;
			
 
				 
			
 
				   void AddHIPArchitectureFlags(std::string& flags) const;
			
--- a/Source/cmGhsMultiTargetGenerator.cxx
+++ b/Source/cmGhsMultiTargetGenerator.cxx
@@ -183,8 +183,8 @@ void cmGhsMultiTargetGenerator::SetCompilerFlags(std::string const& config,
 
				   auto i = this->FlagsByLanguage.find(language);
			
 
				   if (i == this->FlagsByLanguage.end()) {
			
 
				     std::string flags;
			
 
				-    this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
			
 
				-                                           language, config);
			
 
				+    this->LocalGenerator->AddLanguageFlags(
			
 
				+      flags, this->GeneratorTarget, cmBuildStep::Compile, language, config);
			
 
				     this->LocalGenerator->AddCMP0018Flags(flags, this->GeneratorTarget,
			
 
				                                           language, config);
			
 
				     this->LocalGenerator->AddVisibilityPresetFlags(
			
--- a/Source/cmGlobalXCodeGenerator.cxx
+++ b/Source/cmGlobalXCodeGenerator.cxx
@@ -2368,8 +2368,8 @@ void cmGlobalXCodeGenerator::CreateBuildSettings(cmGeneratorTarget* gtgt,
 
				     std::string& flags = cflags[lang];
			
 
				 
			
 
				     // Add language-specific flags.
			
 
				-    this->CurrentLocalGenerator->AddLanguageFlags(flags, gtgt, lang,
			
 
				-                                                  configName);
			
 
				+    this->CurrentLocalGenerator->AddLanguageFlags(
			
 
				+      flags, gtgt, cmBuildStep::Compile, lang, configName);
			
 
				 
			
 
				     if (gtgt->IsIPOEnabled(lang, configName)) {
			
 
				       this->CurrentLocalGenerator->AppendFeatureOptions(flags, lang, "IPO");
			
--- a/Source/cmLinkLineDeviceComputer.cxx
+++ b/Source/cmLinkLineDeviceComputer.cxx
@@ -68,6 +68,26 @@ bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinking(
 
				     });
			
 
				 }
			
 
				 
			
 
				+bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinkingIPOFlag(
			
 
				+  cmComputeLinkInformation& cli)
			
 
				+{
			
 
				+  // Determine if this item might requires device linking.
			
 
				+  // For this we only consider targets
			
 
				+  using ItemVector = cmComputeLinkInformation::ItemVector;
			
 
				+  ItemVector const& items = cli.GetItems();
			
 
				+  std::string config = cli.GetConfig();
			
 
				+  return std::any_of(
			
 
				+    items.begin(), items.end(),
			
 
				+    [config](cmComputeLinkInformation::Item const& item) -> bool {
			
 
				+      return item.Target &&
			
 
				+        item.Target->GetType() == cmStateEnums::STATIC_LIBRARY &&
			
 
				+        // this dependency requires us to device link it
			
 
				+        !item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS") &&
			
 
				+        item.Target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION") &&
			
 
				+        item.Target->IsIPOEnabled("CUDA", config);
			
 
				+    });
			
 
				+}
			
 
				+
			
 
				 void cmLinkLineDeviceComputer::ComputeLinkLibraries(
			
 
				   cmComputeLinkInformation& cli, std::string const& stdLibString,
			
 
				   std::vector<BT<std::string>>& linkLibraries)
			
--- a/Source/cmLinkLineDeviceComputer.h
+++ b/Source/cmLinkLineDeviceComputer.h
@@ -30,6 +30,7 @@ public:
 
				     delete;
			
 
				 
			
 
				   bool ComputeRequiresDeviceLinking(cmComputeLinkInformation& cli);
			
 
				+  bool ComputeRequiresDeviceLinkingIPOFlag(cmComputeLinkInformation& cli);
			
 
				 
			
 
				   void ComputeLinkLibraries(
			
 
				     cmComputeLinkInformation& cli, std::string const& stdLibString,
			
--- a/Source/cmLocalGenerator.cxx
+++ b/Source/cmLocalGenerator.cxx
@@ -36,6 +36,7 @@
 
				 #include "cmInstallScriptGenerator.h"
			
 
				 #include "cmInstallTargetGenerator.h"
			
 
				 #include "cmLinkLineComputer.h"
			
 
				+#include "cmLinkLineDeviceComputer.h"
			
 
				 #include "cmMakefile.h"
			
 
				 #include "cmRange.h"
			
 
				 #include "cmRulePlaceholderExpander.h"
			
@@ -1381,7 +1382,7 @@ std::vector<BT<std::string>> cmLocalGenerator::GetStaticLibraryFlags(
 
				 }
			
 
				 
			
 
				 void cmLocalGenerator::GetDeviceLinkFlags(
			
 
				-  cmLinkLineComputer& linkLineComputer, const std::string& config,
			
 
				+  cmLinkLineDeviceComputer& linkLineComputer, const std::string& config,
			
 
				   std::string& linkLibs, std::string& linkFlags, std::string& frameworkPath,
			
 
				   std::string& linkPath, cmGeneratorTarget* target)
			
 
				 {
			
@@ -1389,6 +1390,18 @@ void cmLocalGenerator::GetDeviceLinkFlags(
 
				 
			
 
				   cmComputeLinkInformation* pcli = target->GetLinkInformation(config);
			
 
				 
			
 
				+  auto linklang = linkLineComputer.GetLinkerLanguage(target, config);
			
 
				+  auto ipoEnabled = target->IsIPOEnabled(linklang, config);
			
 
				+  if (!ipoEnabled) {
			
 
				+    ipoEnabled = linkLineComputer.ComputeRequiresDeviceLinkingIPOFlag(*pcli);
			
 
				+  }
			
 
				+  if (ipoEnabled) {
			
 
				+    if (cmValue cudaIPOFlags = this->Makefile->GetDefinition(
			
 
				+          "CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO")) {
			
 
				+      linkFlags += cudaIPOFlags;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   if (pcli) {
			
 
				     // Compute the required device link libraries when
			
 
				     // resolving gpu lang device symbols
			
@@ -1396,6 +1409,8 @@ void cmLocalGenerator::GetDeviceLinkFlags(
 
				                               linkPath);
			
 
				   }
			
 
				 
			
 
				+  // iterate link deps and see if any of them need IPO
			
 
				+
			
 
				   std::vector<std::string> linkOpts;
			
 
				   target->GetLinkOptions(linkOpts, config, "CUDA");
			
 
				   // LINK_OPTIONS are escaped.
			
@@ -1590,7 +1605,8 @@ std::vector<BT<std::string>> cmLocalGenerator::GetTargetCompileFlags(
 
				   cmMakefile* mf = this->GetMakefile();
			
 
				 
			
 
				   // Add language-specific flags.
			
 
				-  this->AddLanguageFlags(compileFlags, target, lang, config);
			
 
				+  this->AddLanguageFlags(compileFlags, target, cmBuildStep::Compile, lang,
			
 
				+                         config);
			
 
				 
			
 
				   if (target->IsIPOEnabled(lang, config)) {
			
 
				     this->AppendFeatureOptions(compileFlags, lang, "IPO");
			
@@ -1903,6 +1919,7 @@ void cmLocalGenerator::AddArchitectureFlags(std::string& flags,
 
				 
			
 
				 void cmLocalGenerator::AddLanguageFlags(std::string& flags,
			
 
				                                         cmGeneratorTarget const* target,
			
 
				+                                        cmBuildStep compileOrLink,
			
 
				                                         const std::string& lang,
			
 
				                                         const std::string& config)
			
 
				 {
			
@@ -1926,7 +1943,7 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
 
				       }
			
 
				     }
			
 
				   } else if (lang == "CUDA") {
			
 
				-    target->AddCUDAArchitectureFlags(flags);
			
 
				+    target->AddCUDAArchitectureFlags(compileOrLink, config, flags);
			
 
				     target->AddCUDAToolkitFlags(flags);
			
 
				   } else if (lang == "ISPC") {
			
 
				     target->AddISPCTargetFlags(flags);
			
@@ -2038,7 +2055,7 @@ void cmLocalGenerator::AddLanguageFlagsForLinking(
 
				     this->AddCompilerRequirementFlag(flags, target, lang, config);
			
 
				   }
			
 
				 
			
 
				-  this->AddLanguageFlags(flags, target, lang, config);
			
 
				+  this->AddLanguageFlags(flags, target, cmBuildStep::Link, lang, config);
			
 
				 
			
 
				   if (target->IsIPOEnabled(lang, config)) {
			
 
				     this->AppendFeatureOptions(flags, lang, "IPO");
			
--- a/Source/cmLocalGenerator.h
+++ b/Source/cmLocalGenerator.h
@@ -35,6 +35,7 @@ class cmGeneratorTarget;
 
				 class cmGlobalGenerator;
			
 
				 class cmImplicitDependsList;
			
 
				 class cmLinkLineComputer;
			
 
				+class cmLinkLineDeviceComputer;
			
 
				 class cmMakefile;
			
 
				 class cmRulePlaceholderExpander;
			
 
				 class cmSourceFile;
			
@@ -59,6 +60,13 @@ enum class cmDependencyScannerKind
 
				   Compiler
			
 
				 };
			
 
				 
			
 
				+/** What to compute language flags for */
			
 
				+enum class cmBuildStep
			
 
				+{
			
 
				+  Compile,
			
 
				+  Link
			
 
				+};
			
 
				+
			
 
				 /** Target and source file which have a specific output.  */
			
 
				 struct cmSourcesWithOutput
			
 
				 {
			
@@ -143,7 +151,8 @@ public:
 
				                             const std::string& filterArch = std::string());
			
 
				 
			
 
				   void AddLanguageFlags(std::string& flags, cmGeneratorTarget const* target,
			
 
				-                        const std::string& lang, const std::string& config);
			
 
				+                        cmBuildStep compileOrLink, const std::string& lang,
			
 
				+                        const std::string& config);
			
 
				   void AddLanguageFlagsForLinking(std::string& flags,
			
 
				                                   cmGeneratorTarget const* target,
			
 
				                                   const std::string& lang,
			
@@ -476,7 +485,7 @@ public:
 
				 
			
 
				   /** Fill out these strings for the given target.  Libraries to link,
			
 
				    *  flags, and linkflags. */
			
 
				-  void GetDeviceLinkFlags(cmLinkLineComputer& linkLineComputer,
			
 
				+  void GetDeviceLinkFlags(cmLinkLineDeviceComputer& linkLineComputer,
			
 
				                           const std::string& config, std::string& linkLibs,
			
 
				                           std::string& linkFlags, std::string& frameworkPath,
			
 
				                           std::string& linkPath, cmGeneratorTarget* target);
			
--- a/Source/cmLocalVisualStudio7Generator.cxx
+++ b/Source/cmLocalVisualStudio7Generator.cxx
@@ -680,7 +680,8 @@ void cmLocalVisualStudio7Generator::WriteConfiguration(
 
				     langForClCompile = linkLanguage;
			
 
				     if (langForClCompile == "C" || langForClCompile == "CXX" ||
			
 
				         langForClCompile == "Fortran") {
			
 
				-      this->AddLanguageFlags(flags, target, langForClCompile, configName);
			
 
				+      this->AddLanguageFlags(flags, target, cmBuildStep::Compile,
			
 
				+                             langForClCompile, configName);
			
 
				     }
			
 
				     // set the correct language
			
 
				     if (linkLanguage == "C") {
			
--- a/Source/cmMakefileExecutableTargetGenerator.cxx
+++ b/Source/cmMakefileExecutableTargetGenerator.cxx
@@ -136,17 +136,11 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
 
				   std::vector<std::string> depends;
			
 
				   this->AppendLinkDepends(depends, linkLanguage);
			
 
				 
			
 
				-  // Build a list of compiler flags and linker flags.
			
 
				-  std::string langFlags;
			
 
				-  std::string linkFlags;
			
 
				-
			
 
				   // Add language feature flags.
			
 
				+  std::string langFlags;
			
 
				   this->LocalGenerator->AddLanguageFlagsForLinking(
			
 
				     langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
			
 
				 
			
 
				-  // Add device-specific linker flags.
			
 
				-  this->GetDeviceLinkFlags(linkFlags, linkLanguage);
			
 
				-
			
 
				   // Construct a list of files associated with this executable that
			
 
				   // may need to be cleaned.
			
 
				   std::vector<std::string> exeCleanFiles;
			
@@ -173,13 +167,20 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
 
				     // Set path conversion for link script shells.
			
 
				     this->LocalGenerator->SetLinkScriptShell(useLinkScript);
			
 
				 
			
 
				-    std::unique_ptr<cmLinkLineComputer> linkLineComputer(
			
 
				+    std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
			
 
				       new cmLinkLineDeviceComputer(
			
 
				         this->LocalGenerator,
			
 
				         this->LocalGenerator->GetStateSnapshot().GetDirectory()));
			
 
				     linkLineComputer->SetForResponse(useResponseFileForLibs);
			
 
				     linkLineComputer->SetRelink(relink);
			
 
				 
			
 
				+    // Create set of linking flags.
			
 
				+    std::string linkFlags;
			
 
				+    std::string ignored_;
			
 
				+    this->LocalGenerator->GetDeviceLinkFlags(
			
 
				+      *linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
			
 
				+      ignored_, this->GeneratorTarget);
			
 
				+
			
 
				     // Collect up flags to link in needed libraries.
			
 
				     std::string linkLibs;
			
 
				     this->CreateLinkLibs(
			
--- a/Source/cmMakefileLibraryTargetGenerator.cxx
+++ b/Source/cmMakefileLibraryTargetGenerator.cxx
@@ -287,10 +287,6 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
 
				   this->LocalGenerator->AddLanguageFlagsForLinking(
			
 
				     langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
			
 
				 
			
 
				-  // Create set of linking flags.
			
 
				-  std::string linkFlags;
			
 
				-  this->GetDeviceLinkFlags(linkFlags, linkLanguage);
			
 
				-
			
 
				   // Clean files associated with this library.
			
 
				   std::set<std::string> libCleanFiles;
			
 
				   libCleanFiles.insert(
			
@@ -315,13 +311,20 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
 
				 
			
 
				     // Collect up flags to link in needed libraries.
			
 
				     std::string linkLibs;
			
 
				-    std::unique_ptr<cmLinkLineComputer> linkLineComputer(
			
 
				+    std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
			
 
				       new cmLinkLineDeviceComputer(
			
 
				         this->LocalGenerator,
			
 
				         this->LocalGenerator->GetStateSnapshot().GetDirectory()));
			
 
				     linkLineComputer->SetForResponse(useResponseFileForLibs);
			
 
				     linkLineComputer->SetRelink(relink);
			
 
				 
			
 
				+    // Create set of linking flags.
			
 
				+    std::string linkFlags;
			
 
				+    std::string ignored_;
			
 
				+    this->LocalGenerator->GetDeviceLinkFlags(
			
 
				+      *linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
			
 
				+      ignored_, this->GeneratorTarget);
			
 
				+
			
 
				     this->CreateLinkLibs(
			
 
				       linkLineComputer.get(), linkLibs, useResponseFileForLibs, depends,
			
 
				       cmMakefileTargetGenerator::ResponseFlagFor::DeviceLink);
			
--- a/Source/cmVisualStudio10TargetGenerator.cxx
+++ b/Source/cmVisualStudio10TargetGenerator.cxx
@@ -3300,6 +3300,7 @@ bool cmVisualStudio10TargetGenerator::ComputeClOptions(
 
				   this->LangForClCompile = langForClCompile;
			
 
				   if (!langForClCompile.empty()) {
			
 
				     this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
			
 
				+                                           cmBuildStep::Compile,
			
 
				                                            langForClCompile, configName);
			
 
				     this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget,
			
 
				                                             langForClCompile, configName);
			
@@ -3675,8 +3676,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
 
				 
			
 
				   // Get compile flags for CUDA in this directory.
			
 
				   std::string flags;
			
 
				-  this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA",
			
 
				-                                         configName);
			
 
				+  this->LocalGenerator->AddLanguageFlags(
			
 
				+    flags, this->GeneratorTarget, cmBuildStep::Compile, "CUDA", configName);
			
 
				   this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget, "CUDA",
			
 
				                                           configName);
			
 
				 
			
@@ -3947,7 +3948,8 @@ bool cmVisualStudio10TargetGenerator::ComputeMasmOptions(
 
				 
			
 
				   std::string flags;
			
 
				   this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
			
 
				-                                         "ASM_MASM", configName);
			
 
				+                                         cmBuildStep::Compile, "ASM_MASM",
			
 
				+                                         configName);
			
 
				 
			
 
				   masmOptions.Parse(flags);
			
 
				 
			
@@ -3999,7 +4001,8 @@ bool cmVisualStudio10TargetGenerator::ComputeNasmOptions(
 
				 
			
 
				   std::string flags;
			
 
				   this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
			
 
				-                                         "ASM_NASM", configName);
			
 
				+                                         cmBuildStep::Compile, "ASM_NASM",
			
 
				+                                         configName);
			
 
				   flags += " -f";
			
 
				   flags += this->Makefile->GetSafeDefinition("CMAKE_ASM_NASM_OBJECT_FORMAT");
			
 
				   nasmOptions.Parse(flags);
			
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -618,6 +618,11 @@ if(BUILD_TESTING)
 
				   set(Module.CheckIPOSupported-CXX_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_CXX=${CMake_TEST_IPO_WORKS_CXX})
			
 
				   ADD_TEST_MACRO(Module.CheckIPOSupported-CXX CheckIPOSupported-CXX)
			
 
				 
			
 
				+  if(CMake_TEST_CUDA)
			
 
				+    ADD_TEST_MACRO(Module.CheckIPOSupported-CUDA CheckIPOSupported-CUDA)
			
 
				+    set_property(TEST Module.CheckIPOSupported-CUDA APPEND PROPERTY LABELS "CUDA")
			
 
				+  endif()
			
 
				+
			
 
				   if(CMAKE_Fortran_COMPILER)
			
 
				     set(Module.CheckIPOSupported-Fortran_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_Fortran=${CMake_TEST_IPO_WORKS_Fortran})
			
 
				     ADD_TEST_MACRO(Module.CheckIPOSupported-Fortran CheckIPOSupported-Fortran)
			
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -7,7 +7,6 @@ endmacro ()
 
				 add_cuda_test_macro(CudaOnly.Architecture Architecture)
			
 
				 add_cuda_test_macro(CudaOnly.ArchSpecial CudaOnlyArchSpecial)
			
 
				 add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags)
			
 
				-
			
 
				 add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard)
			
 
				 add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX)
			
 
				 add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)
			
@@ -28,6 +27,19 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
 
				   add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
			
 
				 endif()
			
 
				 
			
 
				+# The CUDA only ships the shared version of the toolkit libraries
			
 
				+# on windows
			
 
				+if(NOT WIN32)
			
 
				+  add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
			
 
				+endif()
			
 
				+
			
 
				+add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO)
			
 
				+
			
 
				+if(MSVC)
			
 
				+  # Tests for features that only work with MSVC
			
 
				+  add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
			
 
				+endif()
			
 
				+
			
 
				 add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
			
 
				   ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
			
 
				   --build-and-test
			
@@ -41,16 +53,6 @@ add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
 
				 set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
			
 
				   PROPERTY LABELS "CUDA")
			
 
				 
			
 
				-# The CUDA only ships the shared version of the toolkit libraries
			
 
				-# on windows
			
 
				-if(NOT WIN32)
			
 
				-  add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
			
 
				-endif()
			
 
				-
			
 
				-if(MSVC)
			
 
				-  add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
			
 
				-endif()
			
 
				-
			
 
				 add_test(NAME CudaOnly.RuntimeControls COMMAND
			
 
				   ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
			
 
				   --build-and-test
			
--- a/Tests/CudaOnly/DeviceLTO/CMakeLists.txt
+++ b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt
@@ -0,0 +1,37 @@
 
				+cmake_minimum_required(VERSION 3.18)
			
 
				+project(DeviceLTO CUDA)
			
 
				+
			
 
				+# Goal:
			
 
				+# Verify that we correctly compile with device LTO
			
 
				+# Verify that device LTO requirements are propagated to
			
 
				+# the final device link line
			
 
				+
			
 
				+add_library(CUDA_dlto STATIC file1.cu file2.cu file3.cu)
			
 
				+add_executable(CudaOnlyDeviceLTO main.cu)
			
 
				+
			
 
				+set_target_properties(CUDA_dlto
			
 
				+                      PROPERTIES
			
 
				+                      CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
			
 
				+                      CUDA_SEPARABLE_COMPILATION ON
			
 
				+                      POSITION_INDEPENDENT_CODE ON)
			
 
				+
			
 
				+set_target_properties(CudaOnlyDeviceLTO
			
 
				+                      PROPERTIES
			
 
				+                      CUDA_SEPARABLE_COMPILATION ON
			
 
				+                      CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
			
 
				+                      )
			
 
				+
			
 
				+target_link_libraries(CudaOnlyDeviceLTO PRIVATE CUDA_dlto)
			
 
				+
			
 
				+include(CheckIPOSupported)
			
 
				+check_ipo_supported(LANGUAGES CUDA RESULT ipo_supported)
			
 
				+if(ipo_supported)
			
 
				+  set_target_properties(CUDA_dlto
			
 
				+                        PROPERTIES
			
 
				+                        INTERPROCEDURAL_OPTIMIZATION ON)
			
 
				+
			
 
				+  # When non-LTO variants (i.e. virtual) are built together with LTO ones the
			
 
				+  # linker warns about missing device LTO for the virtual architectures.
			
 
				+  # Ignore these warnings.
			
 
				+  target_link_options(CudaOnlyDeviceLTO PRIVATE "$<DEVICE_LINK:-w>")
			
 
				+endif()
			
--- a/Tests/CudaOnly/DeviceLTO/file1.cu
+++ b/Tests/CudaOnly/DeviceLTO/file1.cu
@@ -0,0 +1,17 @@
 
				+#ifdef _WIN32
			
 
				+#  define EXPORT __declspec(dllexport)
			
 
				+#else
			
 
				+#  define EXPORT
			
 
				+#endif
			
 
				+
			
 
				+extern __device__ int file2_func(int);
			
 
				+void __global__ kernel(int x)
			
 
				+{
			
 
				+  file2_func(x);
			
 
				+}
			
 
				+
			
 
				+EXPORT int launch_kernel(int x)
			
 
				+{
			
 
				+  kernel<<<1, 1>>>(x);
			
 
				+  return x;
			
 
				+}
			
--- a/Tests/CudaOnly/DeviceLTO/file2.cu
+++ b/Tests/CudaOnly/DeviceLTO/file2.cu
@@ -0,0 +1,5 @@
 
				+extern __device__ int file3_func(int);
			
 
				+int __device__ file2_func(int x)
			
 
				+{
			
 
				+  return x + file3_func(x);
			
 
				+}
			
--- a/Tests/CudaOnly/DeviceLTO/file3.cu
+++ b/Tests/CudaOnly/DeviceLTO/file3.cu
@@ -0,0 +1,4 @@
 
				+int __device__ file3_func(int x)
			
 
				+{
			
 
				+  return x * x * x;
			
 
				+}
			
--- a/Tests/CudaOnly/DeviceLTO/main.cu
+++ b/Tests/CudaOnly/DeviceLTO/main.cu
@@ -0,0 +1,62 @@
 
				+#include <iostream>
			
 
				+
			
 
				+#include "cuda.h"
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+#  define IMPORT __declspec(dllimport)
			
 
				+#else
			
 
				+#  define IMPORT
			
 
				+#endif
			
 
				+
			
 
				+IMPORT int launch_kernel(int x);
			
 
				+
			
 
				+int choose_cuda_device()
			
 
				+{
			
 
				+  int nDevices = 0;
			
 
				+  cudaError_t err = cudaGetDeviceCount(&nDevices);
			
 
				+  if (err != cudaSuccess) {
			
 
				+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
			
 
				+              << std::endl;
			
 
				+    return 1;
			
 
				+  }
			
 
				+  for (int i = 0; i < nDevices; ++i) {
			
 
				+    cudaDeviceProp prop;
			
 
				+    cudaError_t err = cudaGetDeviceProperties(&prop, i);
			
 
				+    if (err != cudaSuccess) {
			
 
				+      std::cerr << "Could not retrieve properties from CUDA device " << i
			
 
				+                << std::endl;
			
 
				+      return 1;
			
 
				+    }
			
 
				+    std::cout << "prop.major: " << prop.major << std::endl;
			
 
				+    err = cudaSetDevice(i);
			
 
				+    if (err != cudaSuccess) {
			
 
				+      std::cout << "Could not select CUDA device " << i << std::endl;
			
 
				+    } else {
			
 
				+      return 0;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  std::cout << "Could not find a CUDA enabled card" << std::endl;
			
 
				+
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+int main()
			
 
				+{
			
 
				+  int ret = choose_cuda_device();
			
 
				+  if (ret) {
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  cudaError_t err;
			
 
				+  launch_kernel(1);
			
 
				+  err = cudaGetLastError();
			
 
				+  if (err != cudaSuccess) {
			
 
				+    std::cerr << "launch_kernel: kernel launch should have passed.\n "
			
 
				+                 "Error message: "
			
 
				+              << cudaGetErrorString(err) << std::endl;
			
 
				+    return 1;
			
 
				+  }
			
 
				+
			
 
				+  return 0;
			
 
				+}
			
--- a/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt
+++ b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt
@@ -0,0 +1,32 @@
 
				+cmake_minimum_required(VERSION 3.8)
			
 
				+project(CheckIPOSupported-CUDA LANGUAGES CUDA)
			
 
				+
			
 
				+cmake_policy(SET CMP0069 NEW)
			
 
				+
			
 
				+include(CheckIPOSupported)
			
 
				+check_ipo_supported(RESULT ipo_supported OUTPUT ipo_output)
			
 
				+if(ipo_supported)
			
 
				+  set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
			
 
				+endif()
			
 
				+
			
 
				+if(NOT ipo_supported AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
			
 
				+   AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
			
 
				+  message(FATAL_ERROR "CheckIPOSupported failed to correctly identify NVIDIA CUDA IPO support")
			
 
				+endif()
			
 
				+
			
 
				+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
			
 
				+
			
 
				+add_library(foo STATIC foo.cu)
			
 
				+set_target_properties(foo PROPERTIES
			
 
				+      WINDOWS_EXPORT_ALL_SYMBOLS ON
			
 
				+      POSITION_INDEPENDENT_CODE ON)
			
 
				+
			
 
				+add_library(bar SHARED bar.cu)
			
 
				+set_target_properties(bar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
			
 
				+target_link_libraries(bar PRIVATE foo)
			
 
				+
			
 
				+add_executable(CheckIPOSupported-CUDA main.cu)
			
 
				+target_link_libraries(CheckIPOSupported-CUDA PUBLIC bar)
			
 
				+
			
 
				+enable_testing()
			
 
				+add_test(NAME CheckIPOSupported-CUDA COMMAND CheckIPOSupported-CUDA)
			
--- a/Tests/Module/CheckIPOSupported-CUDA/bar.cu
+++ b/Tests/Module/CheckIPOSupported-CUDA/bar.cu
@@ -0,0 +1,12 @@
 
				+__device__ int foo_func(int);
			
 
				+
			
 
				+void __global__ bar_kernel(int x)
			
 
				+{
			
 
				+  foo_func(x);
			
 
				+}
			
 
				+
			
 
				+int launch_kernel(int x)
			
 
				+{
			
 
				+  bar_kernel<<<1, 1>>>(x);
			
 
				+  return x;
			
 
				+}
			
--- a/Tests/Module/CheckIPOSupported-CUDA/foo.cu
+++ b/Tests/Module/CheckIPOSupported-CUDA/foo.cu
@@ -0,0 +1,4 @@
 
				+extern __device__ int foo_func(int a)
			
 
				+{
			
 
				+  return a * 42 + 9;
			
 
				+}
			
--- a/Tests/Module/CheckIPOSupported-CUDA/main.cu
+++ b/Tests/Module/CheckIPOSupported-CUDA/main.cu
@@ -0,0 +1,62 @@
 
				+#include <iostream>
			
 
				+
			
 
				+#include "cuda.h"
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+#  define IMPORT __declspec(dllimport)
			
 
				+#else
			
 
				+#  define IMPORT
			
 
				+#endif
			
 
				+
			
 
				+IMPORT int launch_kernel(int x);
			
 
				+
			
 
				+int choose_cuda_device()
			
 
				+{
			
 
				+  int nDevices = 0;
			
 
				+  cudaError_t err = cudaGetDeviceCount(&nDevices);
			
 
				+  if (err != cudaSuccess) {
			
 
				+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
			
 
				+              << std::endl;
			
 
				+    return 1;
			
 
				+  }
			
 
				+  for (int i = 0; i < nDevices; ++i) {
			
 
				+    cudaDeviceProp prop;
			
 
				+    cudaError_t err = cudaGetDeviceProperties(&prop, i);
			
 
				+    if (err != cudaSuccess) {
			
 
				+      std::cerr << "Could not retrieve properties from CUDA device " << i
			
 
				+                << std::endl;
			
 
				+      return 1;
			
 
				+    }
			
 
				+    std::cout << "prop.major: " << prop.major << std::endl;
			
 
				+    err = cudaSetDevice(i);
			
 
				+    if (err != cudaSuccess) {
			
 
				+      std::cout << "Could not select CUDA device " << i << std::endl;
			
 
				+    } else {
			
 
				+      return 0;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  std::cout << "Could not find a CUDA enabled card" << std::endl;
			
 
				+
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+int main()
			
 
				+{
			
 
				+  int ret = choose_cuda_device();
			
 
				+  if (ret) {
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  cudaError_t err;
			
 
				+  launch_kernel(1);
			
 
				+  err = cudaGetLastError();
			
 
				+  if (err != cudaSuccess) {
			
 
				+    std::cerr << "launch_kernel: kernel launch should have passed.\n "
			
 
				+                 "Error message: "
			
 
				+              << cudaGetErrorString(err) << std::endl;
			
 
				+    return 1;
			
 
				+  }
			
 
				+
			
 
				+  return 0;
			
 
				+}
			
--- a/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt
+++ b/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt
@@ -1,6 +1,6 @@
 
				 ^CMake Error at .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(message\):
			
 
				-  IPO is not supported \(no C/CXX/Fortran languages found in ENABLED_LANGUAGES
			
 
				-  global property\)\.
			
 
				+  IPO is not supported \(no C/CXX/CUDA/Fortran languages found in
			
 
				+  ENABLED_LANGUAGES global property\)\.
			
 
				 Call Stack \(most recent call first\):
			
 
				   .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(_ipo_not_supported\)
			
 
				   default-lang-none\.cmake:[0-9]+ \(check_ipo_supported\)