Переглянути джерело

Merge topic 'cuda_separable_clang_make' into release-3.21

3975678fcc CUDA/Clang: Simplify --register-link-binaries logic
0b1cea66cd CUDA/Clang: Fix separable compilation in non-root directories with Makefiles

Acked-by: Kitware Robot <[email protected]>
Merge-request: !6400
Brad King 4 роки тому
батько
коміт
0a959bb271

+ 6 - 0
Help/release/3.21.rst

@@ -304,3 +304,9 @@ Changes made since CMake 3.21.0 include the following.
 
 * The :generator:`Visual Studio 17 2022` generator is now based on
   "Visual Studio 2022 Preview 2".  Previously it was based on "Preview 1.1".
+
+3.21.2
+------
+
+* ``CUDA`` targets with :prop_tgt:`CUDA_SEPARABLE_COMPILATION` enabled are now
+  correctly generated in non-root directories.

+ 23 - 19
Source/cmMakefileTargetGenerator.cxx

@@ -1484,14 +1484,18 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
   }
 
   std::vector<std::string> architectures = cmExpandedList(architecturesStr);
+  std::string const& relPath =
+    this->LocalGenerator->GetHomeRelativeOutputPath();
 
   // Ensure there are no duplicates.
   const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
     std::vector<std::string> deps;
     this->AppendTargetDepends(deps, true);
     this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
-    std::copy(this->Objects.begin(), this->Objects.end(),
-              std::back_inserter(deps));
+
+    for (std::string const& obj : this->Objects) {
+      deps.emplace_back(cmStrCat(relPath, obj));
+    }
 
     std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
     deps.clear();
@@ -1510,33 +1514,34 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
 
   std::string profiles;
   std::vector<std::string> fatbinaryDepends;
-  std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
+  std::string const registerFile =
+    cmStrCat(objectDir, "cmake_cuda_register.h");
 
   // Link device code for each architecture.
   for (const std::string& architectureKind : architectures) {
-    // Clang always generates real code, so strip the specifier.
-    const std::string architecture =
-      architectureKind.substr(0, architectureKind.find('-'));
-    const std::string cubin =
-      cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
-
-    profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
-    fatbinaryDepends.emplace_back(cubin);
-
     std::string registerFileCmd;
 
     // The generated register file contains macros that when expanded
     // register the device routines. Because the routines are the same for
     // all architectures the register file will be the same too. Thus
     // generate it only on the first invocation to reduce overhead.
-    if (fatbinaryDepends.size() == 1) {
-      std::string registerFileRel =
-        this->LocalGenerator->MaybeRelativeToCurBinDir(registerFile);
+    if (fatbinaryDepends.empty()) {
+      std::string const registerFileRel =
+        cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
       registerFileCmd =
         cmStrCat(" --register-link-binaries=", registerFileRel);
       cleanFiles.push_back(registerFileRel);
     }
 
+    // Clang always generates real code, so strip the specifier.
+    const std::string architecture =
+      architectureKind.substr(0, architectureKind.find('-'));
+    const std::string cubin =
+      cmStrCat(objectDir, "sm_", architecture, ".cubin");
+
+    profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
+    fatbinaryDepends.emplace_back(cubin);
+
     std::string command = cmStrCat(
       this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
       " -arch=sm_", architecture, registerFileCmd, " -o=$@ ",
@@ -1555,7 +1560,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
   const std::string fatbinaryOutput =
     cmStrCat(objectDir, "cmake_cuda_fatbin.h");
   const std::string fatbinaryOutputRel =
-    this->LocalGenerator->MaybeRelativeToCurBinDir(fatbinaryOutput);
+    cmStrCat(relPath, relObjectDir, "cmake_cuda_fatbin.h");
 
   this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
                                       fatbinaryOutputRel, fatbinaryDepends,
@@ -1583,9 +1588,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
                                                compileCmd, vars);
 
   commands.emplace_back(compileCmd);
-  this->LocalGenerator->WriteMakeRule(
-    *this->BuildFileStream, nullptr, output,
-    { cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
+  this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, output,
+                                      { fatbinaryOutputRel }, commands, false);
 
   // Clean all the possible executable names and symlinks.
   this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());

+ 5 - 5
Source/cmNinjaNormalTargetGenerator.cxx

@@ -753,10 +753,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
     const std::string cubin =
       cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin");
 
-    fatbinary.Variables["PROFILES"] +=
-      cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
-    fatbinary.ExplicitDeps.emplace_back(cubin);
-
     cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config));
     dlink.ExplicitDeps = explicitDeps;
     dlink.Outputs = { cubin };
@@ -766,11 +762,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
     // the device routines. Because the routines are the same for all
     // architectures the register file will be the same too. Thus generate it
     // only on the first invocation to reduce overhead.
-    if (fatbinary.ExplicitDeps.size() == 1) {
+    if (fatbinary.ExplicitDeps.empty()) {
       dlink.Variables["REGISTER"] = cmStrCat(
         "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h");
     }
 
+    fatbinary.Variables["PROFILES"] +=
+      cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
+    fatbinary.ExplicitDeps.emplace_back(cubin);
+
     this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink);
   }
 

+ 1 - 1
Tests/CudaOnly/CMakeLists.txt

@@ -15,7 +15,7 @@ add_cuda_test_macro(CudaOnly.ToolkitBeforeLang CudaOnlyToolkitBeforeLang)
 add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
 add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
 add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
-add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation)
 
 if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
   # Clang doesn't have flags for selecting the runtime.

+ 1 - 18
Tests/CudaOnly/SeparateCompilation/CMakeLists.txt

@@ -34,26 +34,9 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
 target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11)
 target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
 
-add_executable(CudaOnlySeparateCompilation main.cu)
-target_link_libraries(CudaOnlySeparateCompilation
-                      PRIVATE CUDASeparateLibB)
-set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD 11)
-set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
-
 set_target_properties(CUDASeparateLibA
                       CUDASeparateLibB
                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON
                       POSITION_INDEPENDENT_CODE ON)
 
-if (CMAKE_GENERATOR MATCHES "^Visual Studio")
-  #Visual Studio CUDA integration will not perform device linking
-  #on a target that itself does not have GenerateRelocatableDeviceCode
-  #enabled.
-  set_target_properties(CudaOnlySeparateCompilation
-                        PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-endif()
-
-if(APPLE)
-  # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
-  set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
-endif()
+add_subdirectory(main)

+ 18 - 0
Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt

@@ -0,0 +1,18 @@
+add_executable(CudaOnlySeparateCompilation main.cu)
+target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
+set_target_properties(CudaOnlySeparateCompilation PROPERTIES
+  CUDA_STANDARD 11
+  CUDA_STANDARD_REQUIRED TRUE
+)
+
+if(CMAKE_GENERATOR MATCHES "^Visual Studio")
+  # Visual Studio CUDA integration will not perform device linking
+  # on a target that itself does not have GenerateRelocatableDeviceCode
+  # enabled.
+  set_property(TARGET CudaOnlySeparateCompilation PROPERTY CUDA_SEPARABLE_COMPILATION ON)
+endif()
+
+if(APPLE)
+  # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+  set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()

+ 2 - 2
Tests/CudaOnly/SeparateCompilation/main.cu → Tests/CudaOnly/SeparateCompilation/main/main.cu

@@ -1,8 +1,8 @@
 
 #include <iostream>
 
-#include "file1.h"
-#include "file2.h"
+#include "../file1.h"
+#include "../file2.h"
 
 int file4_launch_kernel(int x);
 int file5_launch_kernel(int x);