Pārlūkot izejas kodu

Merge topic 'cuda_support_system_libraries_with_device_symbols'

41eab150a8 CUDA: Pass more link libraries to device linking
88c7abb740 CUDA: Pass host linker directories to device linker

Acked-by: Kitware Robot <[email protected]>
Merge-request: !1634
Brad King 7 gadi atpakaļ
vecāks
revīzija
640bc9def4

+ 16 - 2
Modules/CMakeCUDAInformation.cmake

@@ -177,17 +177,31 @@ else()
   set(_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS "")
 endif()
 
+# Add implicit host link directories that contain device libraries
+# to the device link line.
+set(__IMPLICT_DLINK_DIRS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+if(__IMPLICT_DLINK_DIRS)
+  list(REMOVE_ITEM __IMPLICT_DLINK_DIRS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
+endif()
+set(__IMPLICT_DLINK_FLAGS )
+foreach(dir ${__IMPLICT_DLINK_DIRS})
+  if(EXISTS "${dir}/libcublas_device.a")
+    string(APPEND __IMPLICT_DLINK_FLAGS " -L\"${dir}\"")
+  endif()
+endforeach()
+unset(__IMPLICT_DLINK_DIRS)
 
 #These are used when linking relocatable (dc) cuda code
 if(NOT CMAKE_CUDA_DEVICE_LINK_LIBRARY)
   set(CMAKE_CUDA_DEVICE_LINK_LIBRARY
-    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}")
 endif()
 if(NOT CMAKE_CUDA_DEVICE_LINK_EXECUTABLE)
   set(CMAKE_CUDA_DEVICE_LINK_EXECUTABLE
-    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <FLAGS> <CMAKE_CUDA_LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <FLAGS> <CMAKE_CUDA_LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}")
 endif()
 
 unset(_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS)
+unset(__IMPLICT_DLINK_FLAGS)
 
 set(CMAKE_CUDA_INFORMATION_LOADED 1)

+ 17 - 2
Modules/Platform/Windows-NVIDIA-CUDA.cmake

@@ -36,12 +36,27 @@ else()
   set(_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS "")
 endif()
 
+# Add implicit host link directories that contain device libraries
+# to the device link line.
+set(__IMPLICT_DLINK_DIRS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+if(__IMPLICT_DLINK_DIRS)
+  list(REMOVE_ITEM __IMPLICT_DLINK_DIRS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
+endif()
+set(__IMPLICT_DLINK_FLAGS )
+foreach(dir ${__IMPLICT_DLINK_DIRS})
+  if(EXISTS "${dir}/cublas_device.lib")
+    string(APPEND __IMPLICT_DLINK_FLAGS " -L\"${dir}\"")
+  endif()
+endforeach()
+unset(__IMPLICT_DLINK_DIRS)
+
 set(CMAKE_CUDA_DEVICE_LINK_LIBRARY
-  "<CMAKE_CUDA_COMPILER> <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS")
+  "<CMAKE_CUDA_COMPILER> <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS${__IMPLICT_DLINK_FLAGS}")
 set(CMAKE_CUDA_DEVICE_LINK_EXECUTABLE
-  "<CMAKE_CUDA_COMPILER> <FLAGS> <CMAKE_CUDA_LINK_FLAGS> ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS")
+  "<CMAKE_CUDA_COMPILER> <FLAGS> <CMAKE_CUDA_LINK_FLAGS> ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS${__IMPLICT_DLINK_FLAGS}")
 
 unset(_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS)
+unset(__IMPLICT_DLINK_FLAGS)
 
 string(REPLACE "/D" "-D" _PLATFORM_DEFINES_CUDA "${_PLATFORM_DEFINES}${_PLATFORM_DEFINES_CXX}")
 

+ 24 - 30
Source/cmLinkLineDeviceComputer.cxx

@@ -3,9 +3,9 @@
 
 #include "cmLinkLineDeviceComputer.h"
 
-#include <set>
 #include <sstream>
 
+#include "cmAlgorithms.h"
 #include "cmComputeLinkInformation.h"
 #include "cmGeneratorTarget.h"
 #include "cmGlobalNinjaGenerator.h"
@@ -32,38 +32,32 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
   ItemVector const& items = cli.GetItems();
   std::string config = cli.GetConfig();
   for (auto const& item : items) {
-    if (!item.Target) {
-      continue;
-    }
-
-    bool skippable = false;
-    switch (item.Target->GetType()) {
-      case cmStateEnums::SHARED_LIBRARY:
-      case cmStateEnums::MODULE_LIBRARY:
-      case cmStateEnums::INTERFACE_LIBRARY:
-        skippable = true;
-        break;
-      case cmStateEnums::STATIC_LIBRARY:
-        // If a static library is resolving its device linking, it should
-        // be removed for other device linking
-        skippable =
-          item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
-        break;
-      default:
-        break;
-    }
-
-    if (skippable) {
-      continue;
-    }
-
-    std::set<std::string> langs;
-    item.Target->GetLanguages(langs, config);
-    if (langs.count("CUDA") == 0) {
-      continue;
+    if (item.Target) {
+      bool skip = false;
+      switch (item.Target->GetType()) {
+        case cmStateEnums::MODULE_LIBRARY:
+        case cmStateEnums::INTERFACE_LIBRARY:
+          skip = true;
+          break;
+        case cmStateEnums::STATIC_LIBRARY:
+          skip = item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
+          break;
+        default:
+          break;
+      }
+      if (skip) {
+        continue;
+      }
     }
 
     if (item.IsPath) {
+      // nvcc understands absolute paths to libraries ending in '.a' should
+      // be passed to nvlink.  Other extensions like '.so' or '.dylib' are
+      // rejected by the nvcc front-end even though nvlink knows to ignore
+      // them.  Bypass the front-end via '-Xnvlink'.
+      if (!cmHasLiteralSuffix(item.Value, ".a")) {
+        fout << "-Xnvlink ";
+      }
       fout << this->ConvertToOutputFormat(
         this->ConvertToLinkReference(item.Value));
     } else {

+ 1 - 1
Source/cmNinjaNormalTargetGenerator.cxx

@@ -187,7 +187,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(bool useResponseFile)
     std::string responseFlag;
     if (!useResponseFile) {
       vars.Objects = "$in";
-      vars.LinkLibraries = "$LINK_LIBRARIES";
+      vars.LinkLibraries = "$LINK_PATH $LINK_LIBRARIES";
     } else {
       std::string cmakeVarLang = "CMAKE_";
       cmakeVarLang += this->TargetLinkLanguage;

+ 1 - 0
Tests/CudaOnly/CMakeLists.txt

@@ -2,6 +2,7 @@
 ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
 ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
 ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
+ADD_TEST_MACRO(CudaOnly.LinkSystemDeviceLibraries CudaOnlyLinkSystemDeviceLibraries)
 ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
 ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
 ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)

+ 15 - 0
Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt

@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.8)
+project(CudaOnlyLinkSystemDeviceLibraries CUDA)
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_35,code=compute_35 -gencode arch=compute_35,code=sm_35")
+set(CMAKE_CUDA_STANDARD 11)
+
+add_executable(CudaOnlyLinkSystemDeviceLibraries main.cu)
+set_target_properties( CudaOnlyLinkSystemDeviceLibraries
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+target_link_libraries( CudaOnlyLinkSystemDeviceLibraries PRIVATE cublas_device)
+
+if(APPLE)
+  # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+  set_property(TARGET CudaOnlyLinkSystemDeviceLibraries PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()

+ 77 - 0
Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu

@@ -0,0 +1,77 @@
+
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+__global__ void deviceCublasSgemm(int n, float alpha, float beta,
+                                  const float* d_A, const float* d_B,
+                                  float* d_C)
+{
+  cublasHandle_t cnpHandle;
+  cublasStatus_t status = cublasCreate(&cnpHandle);
+
+  if (status != CUBLAS_STATUS_SUCCESS) {
+    return;
+  }
+
+  // Call function defined in the cublas_device system static library.
+  // This way we can verify that we properly pass system libraries to the
+  // device link line
+  status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha,
+                       d_A, n, d_B, n, &beta, d_C, n);
+
+  cublasDestroy(cnpHandle);
+}
+
+int choose_cuda_device()
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+  }
+  for (int i = 0; i < nDevices; ++i) {
+    cudaDeviceProp prop;
+    cudaError_t err = cudaGetDeviceProperties(&prop, i);
+    if (err != cudaSuccess) {
+      std::cerr << "Could not retrieve properties from CUDA device " << i
+                << std::endl;
+      return 1;
+    }
+
+    if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) {
+      err = cudaSetDevice(i);
+      if (err != cudaSuccess) {
+        std::cout << "Could not select CUDA device " << i << std::endl;
+      } else {
+        return 0;
+      }
+    }
+  }
+
+  std::cout << "Could not find a CUDA enabled card supporting compute >=3.5"
+            << std::endl;
+  return 1;
+}
+
+int main(int argc, char** argv)
+{
+  int ret = choose_cuda_device();
+  if (ret) {
+    return 0;
+  }
+
+  // initial values that will make sure that the cublasSgemm won't actually
+  // do any work
+  int n = 0;
+  float alpha = 1;
+  float beta = 1;
+  float* d_A = nullptr;
+  float* d_B = nullptr;
+  float* d_C = nullptr;
+  deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C);
+
+  return 0;
+}