Browse Source

Merge topic 'cuda_handle_target_objects_in_device_link'

cd984261e1 CUDA: Device linking now uses TARGET_OBJECTS content
aa8facefe8 CUDA: Visual Studio propagate objects to device linking

Acked-by: Kitware Robot <[email protected]>
Merge-request: !8829
Brad King 2 years ago
parent
commit
5a79ea2799

+ 8 - 8
Source/cmLinkLineDeviceComputer.cxx

@@ -101,9 +101,7 @@ void cmLinkLineDeviceComputer::ComputeLinkLibraries(
   ItemVector const& items = cli.GetItems();
   std::string config = cli.GetConfig();
   bool skipItemAfterFramework = false;
-  // Note:
-  // Any modification of this algorithm should be reflected also in
-  // cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions
+
   for (auto const& item : items) {
     if (skipItemAfterFramework) {
       skipItemAfterFramework = false;
@@ -132,11 +130,13 @@ void cmLinkLineDeviceComputer::ComputeLinkLibraries(
 
     BT<std::string> linkLib;
     if (item.IsPath == cmComputeLinkInformation::ItemIsPath::Yes) {
-      // nvcc understands absolute paths to libraries ending in '.a' or '.lib'.
-      // These should be passed to nvlink.  Other extensions need to be left
-      // out because nvlink may not understand or need them.  Even though it
-      // can tolerate '.so' or '.dylib' it cannot tolerate '.so.1'.
-      if (cmHasLiteralSuffix(item.Value.Value, ".a") ||
+      // nvcc understands absolute paths to libraries ending in '.o', .a', or
+      // '.lib'. These should be passed to nvlink.  Other extensions need to be
+      // left out because nvlink may not understand or need them.  Even though
+      // it can tolerate '.so' or '.dylib' it cannot tolerate '.so.1'.
+      if (cmHasLiteralSuffix(item.Value.Value, ".o") ||
+          cmHasLiteralSuffix(item.Value.Value, ".obj") ||
+          cmHasLiteralSuffix(item.Value.Value, ".a") ||
           cmHasLiteralSuffix(item.Value.Value, ".lib")) {
         linkLib.Value = item
                           .GetFormattedItem(this->ConvertToOutputFormat(

+ 33 - 14
Source/cmVisualStudio10TargetGenerator.cxx

@@ -3864,22 +3864,41 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
   }
   cudaLinkOptions.AppendFlagString("AdditionalOptions", linkFlags);
 
-  // For static libraries that have device linking enabled compute
-  // the  libraries
-  if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY &&
-      doDeviceLinking) {
-    cmComputeLinkInformation& cli = *pcli;
-    cmLinkLineDeviceComputer computer(
-      this->LocalGenerator,
-      this->LocalGenerator->GetStateSnapshot().GetDirectory());
-    std::vector<BT<std::string>> btLibVec;
-    computer.ComputeLinkLibraries(cli, std::string{}, btLibVec);
+  if (doDeviceLinking) {
     std::vector<std::string> libVec;
-    for (auto const& item : btLibVec) {
-      libVec.emplace_back(item.Value);
+    auto const& kinded = this->GeneratorTarget->GetKindedSources(configName);
+    // CMake conversion uses full paths when possible to allow deeper trees.
+    // However, CUDA 8.0 msbuild rules fail on absolute paths so for CUDA
+    // we must use relative paths.
+    const bool forceRelative = true;
+    for (cmGeneratorTarget::SourceAndKind const& si : kinded.Sources) {
+      switch (si.Kind) {
+        case cmGeneratorTarget::SourceKindExternalObject: {
+          std::string path =
+            this->ConvertPath(si.Source.Value->GetFullPath(), forceRelative);
+          ConvertToWindowsSlash(path);
+          libVec.emplace_back(std::move(path));
+        } break;
+        default:
+          break;
+      }
+    }
+    // For static libraries that have device linking enabled compute
+    // the  libraries
+    if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY) {
+      cmComputeLinkInformation& cli = *pcli;
+      cmLinkLineDeviceComputer computer(
+        this->LocalGenerator,
+        this->LocalGenerator->GetStateSnapshot().GetDirectory());
+      std::vector<BT<std::string>> btLibVec;
+      computer.ComputeLinkLibraries(cli, std::string{}, btLibVec);
+      for (auto const& item : btLibVec) {
+        libVec.emplace_back(item.Value);
+      }
+    }
+    if (!libVec.empty()) {
+      cudaLinkOptions.AddFlag("AdditionalDependencies", libVec);
     }
-
-    cudaLinkOptions.AddFlag("AdditionalDependencies", libVec);
   }
 
   this->CudaLinkOptions[configName] = std::move(pOptions);

+ 1 - 0
Tests/CudaOnly/CMakeLists.txt

@@ -20,6 +20,7 @@ add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
 add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
 add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation)
 add_cuda_test_macro(CudaOnly.SeparateCompilationPTX CudaOnlySeparateCompilationPTX)
+add_cuda_test_macro(CudaOnly.SeparateCompilationTargetObjects CudaOnlySeparateCompilationTargetObjects)
 
 if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
   # Clang doesn't have flags for selecting the runtime.

+ 18 - 0
Tests/CudaOnly/SeparateCompilationTargetObjects/CMakeLists.txt

@@ -0,0 +1,18 @@
+cmake_minimum_required(VERSION 3.25.5)
+
+project(SeparateCompilationObjects LANGUAGES CUDA)
+
+add_library(foo OBJECT foo.cu)
+set_target_properties(foo PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+add_library(bar OBJECT bar.cu)
+set_target_properties(bar PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+add_executable(CudaOnlySeparateCompilationTargetObjects main.cu)
+set_target_properties(CudaOnlySeparateCompilationTargetObjects PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+target_link_libraries(CudaOnlySeparateCompilationTargetObjects PRIVATE $<TARGET_OBJECTS:foo> bar)
+
+if(APPLE)
+  # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+  set_property(TARGET CudaOnlySeparateCompilationTargetObjects PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()

+ 18 - 0
Tests/CudaOnly/SeparateCompilationTargetObjects/bar.cu

@@ -0,0 +1,18 @@
+
+#include <iostream>
+
+#ifdef _WIN32
+#  define EXPORT __declspec(dllexport)
+#else
+#  define EXPORT __attribute__((__visibility__("default")))
+#endif
+
+__global__ void b1()
+{
+}
+
+EXPORT int bar()
+{
+  b1<<<1, 1>>>();
+  return 0;
+}

+ 18 - 0
Tests/CudaOnly/SeparateCompilationTargetObjects/foo.cu

@@ -0,0 +1,18 @@
+
+#include <iostream>
+
+#ifdef _WIN32
+#  define EXPORT __declspec(dllexport)
+#else
+#  define EXPORT __attribute__((__visibility__("default")))
+#endif
+
+__global__ void k1()
+{
+}
+
+EXPORT int foo()
+{
+  k1<<<1, 1>>>();
+  return 0;
+}

+ 16 - 0
Tests/CudaOnly/SeparateCompilationTargetObjects/main.cu

@@ -0,0 +1,16 @@
+// main.cu
+#include <iostream>
+
+#ifdef _WIN32
+#  define IMPORT __declspec(dllimport)
+#else
+#  define IMPORT
+#endif
+
+IMPORT int foo();
+IMPORT int bar();
+
+int main(int argc, char**)
+{
+  return foo() && bar();
+}