Browse Source

CUDA/Clang: Fix separable compilation in non-root directories with Makefiles

Seems the relative paths were wrong basically all around such that only
compiling files in the top-level directory would work. I've modified
CudaOnly.SeparateCompilation to cover this.

Fixes #22482.
root 4 years ago
parent
commit
0b1cea66cd

+ 6 - 0
Help/release/3.21.rst

@@ -304,3 +304,9 @@ Changes made since CMake 3.21.0 include the following.
 
 
 * The :generator:`Visual Studio 17 2022` generator is now based on
 * The :generator:`Visual Studio 17 2022` generator is now based on
   "Visual Studio 2022 Preview 2".  Previously it was based on "Preview 1.1".
   "Visual Studio 2022 Preview 2".  Previously it was based on "Preview 1.1".
+
+3.21.2
+------
+
+* ``CUDA`` targets with :prop_tgt:`CUDA_SEPARABLE_COMPILATION` enabled are now
+  correctly generated in non-root directories.

+ 14 - 10
Source/cmMakefileTargetGenerator.cxx

@@ -1484,14 +1484,18 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
   }
   }
 
 
   std::vector<std::string> architectures = cmExpandedList(architecturesStr);
   std::vector<std::string> architectures = cmExpandedList(architecturesStr);
+  std::string const& relPath =
+    this->LocalGenerator->GetHomeRelativeOutputPath();
 
 
   // Ensure there are no duplicates.
   // Ensure there are no duplicates.
   const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
   const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
     std::vector<std::string> deps;
     std::vector<std::string> deps;
     this->AppendTargetDepends(deps, true);
     this->AppendTargetDepends(deps, true);
     this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
     this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
-    std::copy(this->Objects.begin(), this->Objects.end(),
-              std::back_inserter(deps));
+
+    for (std::string const& obj : this->Objects) {
+      deps.emplace_back(cmStrCat(relPath, obj));
+    }
 
 
     std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
     std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
     deps.clear();
     deps.clear();
@@ -1510,7 +1514,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
 
 
   std::string profiles;
   std::string profiles;
   std::vector<std::string> fatbinaryDepends;
   std::vector<std::string> fatbinaryDepends;
-  std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
+  std::string const registerFile =
+    cmStrCat(objectDir, "cmake_cuda_register.h");
 
 
   // Link device code for each architecture.
   // Link device code for each architecture.
   for (const std::string& architectureKind : architectures) {
   for (const std::string& architectureKind : architectures) {
@@ -1518,7 +1523,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
     const std::string architecture =
     const std::string architecture =
       architectureKind.substr(0, architectureKind.find('-'));
       architectureKind.substr(0, architectureKind.find('-'));
     const std::string cubin =
     const std::string cubin =
-      cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
+      cmStrCat(objectDir, "sm_", architecture, ".cubin");
 
 
     profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
     profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
     fatbinaryDepends.emplace_back(cubin);
     fatbinaryDepends.emplace_back(cubin);
@@ -1530,8 +1535,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
     // all architectures the register file will be the same too. Thus
     // all architectures the register file will be the same too. Thus
     // generate it only on the first invocation to reduce overhead.
     // generate it only on the first invocation to reduce overhead.
     if (fatbinaryDepends.size() == 1) {
     if (fatbinaryDepends.size() == 1) {
-      std::string registerFileRel =
-        this->LocalGenerator->MaybeRelativeToCurBinDir(registerFile);
+      std::string const registerFileRel =
+        cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
       registerFileCmd =
       registerFileCmd =
         cmStrCat(" --register-link-binaries=", registerFileRel);
         cmStrCat(" --register-link-binaries=", registerFileRel);
       cleanFiles.push_back(registerFileRel);
       cleanFiles.push_back(registerFileRel);
@@ -1555,7 +1560,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
   const std::string fatbinaryOutput =
   const std::string fatbinaryOutput =
     cmStrCat(objectDir, "cmake_cuda_fatbin.h");
     cmStrCat(objectDir, "cmake_cuda_fatbin.h");
   const std::string fatbinaryOutputRel =
   const std::string fatbinaryOutputRel =
-    this->LocalGenerator->MaybeRelativeToCurBinDir(fatbinaryOutput);
+    cmStrCat(relPath, relObjectDir, "cmake_cuda_fatbin.h");
 
 
   this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
   this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
                                       fatbinaryOutputRel, fatbinaryDepends,
                                       fatbinaryOutputRel, fatbinaryDepends,
@@ -1583,9 +1588,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
                                                compileCmd, vars);
                                                compileCmd, vars);
 
 
   commands.emplace_back(compileCmd);
   commands.emplace_back(compileCmd);
-  this->LocalGenerator->WriteMakeRule(
-    *this->BuildFileStream, nullptr, output,
-    { cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
+  this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, output,
+                                      { fatbinaryOutputRel }, commands, false);
 
 
   // Clean all the possible executable names and symlinks.
   // Clean all the possible executable names and symlinks.
   this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());
   this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());

+ 1 - 1
Tests/CudaOnly/CMakeLists.txt

@@ -15,7 +15,7 @@ add_cuda_test_macro(CudaOnly.ToolkitBeforeLang CudaOnlyToolkitBeforeLang)
 add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
 add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
 add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
 add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
 add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
 add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
-add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation)
 
 
 if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
 if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
   # Clang doesn't have flags for selecting the runtime.
   # Clang doesn't have flags for selecting the runtime.

+ 1 - 18
Tests/CudaOnly/SeparateCompilation/CMakeLists.txt

@@ -34,26 +34,9 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
 target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11)
 target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11)
 target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
 target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
 
 
-add_executable(CudaOnlySeparateCompilation main.cu)
-target_link_libraries(CudaOnlySeparateCompilation
-                      PRIVATE CUDASeparateLibB)
-set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD 11)
-set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
-
 set_target_properties(CUDASeparateLibA
 set_target_properties(CUDASeparateLibA
                       CUDASeparateLibB
                       CUDASeparateLibB
                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON
                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON
                       POSITION_INDEPENDENT_CODE ON)
                       POSITION_INDEPENDENT_CODE ON)
 
 
-if (CMAKE_GENERATOR MATCHES "^Visual Studio")
-  #Visual Studio CUDA integration will not perform device linking
-  #on a target that itself does not have GenerateRelocatableDeviceCode
-  #enabled.
-  set_target_properties(CudaOnlySeparateCompilation
-                        PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-endif()
-
-if(APPLE)
-  # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
-  set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
-endif()
+add_subdirectory(main)

+ 18 - 0
Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt

@@ -0,0 +1,18 @@
+add_executable(CudaOnlySeparateCompilation main.cu)
+target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
+set_target_properties(CudaOnlySeparateCompilation PROPERTIES
+  CUDA_STANDARD 11
+  CUDA_STANDARD_REQUIRED TRUE
+)
+
+if(CMAKE_GENERATOR MATCHES "^Visual Studio")
+  # Visual Studio CUDA integration will not perform device linking
+  # on a target that itself does not have GenerateRelocatableDeviceCode
+  # enabled.
+  set_property(TARGET CudaOnlySeparateCompilation PROPERTY CUDA_SEPARABLE_COMPILATION ON)
+endif()
+
+if(APPLE)
+  # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+  set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()

+ 2 - 2
Tests/CudaOnly/SeparateCompilation/main.cu → Tests/CudaOnly/SeparateCompilation/main/main.cu

@@ -1,8 +1,8 @@
 
 
 #include <iostream>
 #include <iostream>
 
 
-#include "file1.h"
-#include "file2.h"
+#include "../file1.h"
+#include "../file2.h"
 
 
 int file4_launch_kernel(int x);
 int file4_launch_kernel(int x);
 int file5_launch_kernel(int x);
 int file5_launch_kernel(int x);