瀏覽代碼

Merge topic 'nvcc_dlink_dedup_static_libs'

fd0523a215 CUDA: Properly de-duplicate libs when doing device linking
b07c71831c CUDA: Add a test to verify device linking can handle circular deps

Acked-by: Kitware Robot <[email protected]>
Merge-request: !2214
Brad King 7 年之前
父節點
當前提交
0ab23b9a1e

+ 17 - 5
Source/cmLinkLineDeviceComputer.cxx

@@ -3,7 +3,9 @@
 
 
 #include "cmLinkLineDeviceComputer.h"
 #include "cmLinkLineDeviceComputer.h"
 
 
+#include <set>
 #include <sstream>
 #include <sstream>
+#include <utility>
 
 
 #include "cmAlgorithms.h"
 #include "cmAlgorithms.h"
 #include "cmComputeLinkInformation.h"
 #include "cmComputeLinkInformation.h"
@@ -28,6 +30,12 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
 {
 {
   // Write the library flags to the build rule.
   // Write the library flags to the build rule.
   std::ostringstream fout;
   std::ostringstream fout;
+
+  // Generate the unique set of link items when device linking.
+  // The nvcc device linker is designed so that each static library
+  // with device symbols only needs to be listed once as it doesn't
+  // care about link order.
+  std::set<std::string> emitted;
   typedef cmComputeLinkInformation::ItemVector ItemVector;
   typedef cmComputeLinkInformation::ItemVector ItemVector;
   ItemVector const& items = cli.GetItems();
   ItemVector const& items = cli.GetItems();
   std::string config = cli.GetConfig();
   std::string config = cli.GetConfig();
@@ -50,20 +58,24 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
       }
       }
     }
     }
 
 
+    std::string out;
     if (item.IsPath) {
     if (item.IsPath) {
       // nvcc understands absolute paths to libraries ending in '.a' should
       // nvcc understands absolute paths to libraries ending in '.a' should
       // be passed to nvlink.  Other extensions like '.so' or '.dylib' are
       // be passed to nvlink.  Other extensions like '.so' or '.dylib' are
       // rejected by the nvcc front-end even though nvlink knows to ignore
       // rejected by the nvcc front-end even though nvlink knows to ignore
       // them.  Bypass the front-end via '-Xnvlink'.
       // them.  Bypass the front-end via '-Xnvlink'.
       if (!cmHasLiteralSuffix(item.Value, ".a")) {
       if (!cmHasLiteralSuffix(item.Value, ".a")) {
-        fout << "-Xnvlink ";
+        out += "-Xnvlink ";
       }
       }
-      fout << this->ConvertToOutputFormat(
-        this->ConvertToLinkReference(item.Value));
+      out +=
+        this->ConvertToOutputFormat(this->ConvertToLinkReference(item.Value));
     } else {
     } else {
-      fout << item.Value;
+      out += item.Value;
+    }
+
+    if (emitted.insert(out).second) {
+      fout << out << " ";
     }
     }
-    fout << " ";
   }
   }
 
 
   if (!stdLibString.empty()) {
   if (!stdLibString.empty()) {

+ 1 - 0
Tests/CudaOnly/CMakeLists.txt

@@ -1,4 +1,5 @@
 
 
+ADD_TEST_MACRO(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
 ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
 ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
 ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
 ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
 ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
 ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)

+ 34 - 0
Tests/CudaOnly/CircularLinkLine/CMakeLists.txt

@@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyCircularLinkLine CUDA)
+
+#Goal for this example:
+# Verify that we de-duplicate the device link line
+# Verify that a de-duplicated link line still works with circular static libraries
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30]")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+
+add_library(CUDACircularDeviceLinking1 STATIC file1.cu)
+add_library(CUDACircularDeviceLinking2 STATIC file2.cu)
+add_library(CUDACircularDeviceLinking3 STATIC file3.cu)
+add_executable(CudaOnlyCircularLinkLine main.cu)
+
+target_link_libraries(CUDACircularDeviceLinking1 PUBLIC CUDACircularDeviceLinking2)
+target_link_libraries(CUDACircularDeviceLinking2 PUBLIC CUDACircularDeviceLinking3)
+target_link_libraries(CUDACircularDeviceLinking3 PUBLIC CUDACircularDeviceLinking1)
+
+target_link_libraries(CudaOnlyCircularLinkLine PRIVATE CUDACircularDeviceLinking3)
+
+
+set_target_properties(CUDACircularDeviceLinking1
+                      PROPERTIES
+                      CUDA_SEPARABLE_COMPILATION ON)
+
+set_target_properties(CUDACircularDeviceLinking2
+                      PROPERTIES
+                      CUDA_SEPARABLE_COMPILATION ON)
+
+set_target_properties(CUDACircularDeviceLinking3
+                      PROPERTIES
+                      CUDA_SEPARABLE_COMPILATION ON)

+ 6 - 0
Tests/CudaOnly/CircularLinkLine/file1.cu

@@ -0,0 +1,6 @@
+
+extern __device__ int file2_func(int);
+int __device__ file1_func(int x)
+{
+  return file2_func(x);
+}

+ 6 - 0
Tests/CudaOnly/CircularLinkLine/file2.cu

@@ -0,0 +1,6 @@
+
+extern __device__ int file3_func(int);
+int __device__ file2_func(int x)
+{
+  return x + file3_func(x);
+}

+ 8 - 0
Tests/CudaOnly/CircularLinkLine/file3.cu

@@ -0,0 +1,8 @@
+
+extern __device__ int file1_func(int);
+int __device__ file3_func(int x)
+{
+  if (x > 0)
+    return file1_func(-x);
+  return x;
+}

+ 5 - 0
Tests/CudaOnly/CircularLinkLine/main.cu

@@ -0,0 +1,5 @@
+
+int main(int argc, char** argv)
+{
+  return 0;
+}