1
0
Эх сурвалжийг харах

CUDA: Visual Studio now properly delays device linking

Robert Maynard 8 жил өмнө
parent
commit
a36fb229ba

+ 68 - 0
Source/cmVisualStudio10TargetGenerator.cxx

@@ -116,6 +116,10 @@ cmVisualStudio10TargetGenerator::~cmVisualStudio10TargetGenerator()
        i != this->CudaOptions.end(); ++i) {
     delete i->second;
   }
+  for (OptionsMap::iterator i = this->CudaLinkOptions.begin();
+       i != this->CudaLinkOptions.end(); ++i) {
+    delete i->second;
+  }
   if (!this->BuildFileStream) {
     return;
   }
@@ -213,6 +217,9 @@ void cmVisualStudio10TargetGenerator::Generate()
     if (!this->ComputeCudaOptions()) {
       return;
     }
+    if (!this->ComputeCudaLinkOptions()) {
+      return;
+    }
     if (!this->ComputeMasmOptions()) {
       return;
     }
@@ -2524,6 +2531,66 @@ void cmVisualStudio10TargetGenerator::WriteCudaOptions(
   this->WriteString("</CudaCompile>\n", 2);
 }
 
+bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions()
+{
+  if (!this->GlobalGenerator->IsCudaEnabled()) {
+    return true;
+  }
+  for (std::vector<std::string>::const_iterator i =
+         this->Configurations.begin();
+       i != this->Configurations.end(); ++i) {
+    if (!this->ComputeCudaLinkOptions(*i)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
+  std::string const& configName)
+{
+  cmGlobalVisualStudio10Generator* gg =
+    static_cast<cmGlobalVisualStudio10Generator*>(this->GlobalGenerator);
+  CM_AUTO_PTR<Options> pOptions(new Options(
+    this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable()));
+  Options& cudaLinkOptions = *pOptions;
+
+  // Determine if we need to do a device link
+  bool doDeviceLinking = false;
+  switch (this->GeneratorTarget->GetType()) {
+    case cmStateEnums::SHARED_LIBRARY:
+    case cmStateEnums::MODULE_LIBRARY:
+    case cmStateEnums::EXECUTABLE:
+      doDeviceLinking = true;
+      break;
+    default:
+      break;
+  }
+
+  cudaLinkOptions.AddFlag("PerformDeviceLink",
+                          doDeviceLinking ? "true" : "false");
+
+  this->CudaLinkOptions[configName] = pOptions.release();
+  return true;
+}
+
+void cmVisualStudio10TargetGenerator::WriteCudaLinkOptions(
+  std::string const& configName)
+{
+  if (this->GeneratorTarget->GetType() > cmStateEnums::MODULE_LIBRARY) {
+    return;
+  }
+
+  if (!this->MSTools || !this->GlobalGenerator->IsCudaEnabled()) {
+    return;
+  }
+
+  this->WriteString("<CudaLink>\n", 2);
+  Options& cudaLinkOptions = *(this->CudaLinkOptions[configName]);
+  cudaLinkOptions.OutputFlagMap(*this->BuildFileStream, "      ");
+  this->WriteString("</CudaLink>\n", 2);
+}
+
 bool cmVisualStudio10TargetGenerator::ComputeMasmOptions()
 {
   if (!this->GlobalGenerator->IsMasmEnabled()) {
@@ -3283,6 +3350,7 @@ void cmVisualStudio10TargetGenerator::WriteItemDefinitionGroups()
     }
     //    output link flags       <Link></Link>
     this->WriteLinkOptions(*i);
+    this->WriteCudaLinkOptions(*i);
     //    output lib flags       <Lib></Lib>
     this->WriteLibOptions(*i);
     //    output manifest flags  <Manifest></Manifest>

+ 6 - 0
Source/cmVisualStudio10TargetGenerator.h

@@ -101,6 +101,11 @@ private:
   bool ComputeCudaOptions(std::string const& config);
   void WriteCudaOptions(std::string const& config,
                         std::vector<std::string> const& includes);
+
+  bool ComputeCudaLinkOptions();
+  bool ComputeCudaLinkOptions(std::string const& config);
+  void WriteCudaLinkOptions(std::string const& config);
+
   bool ComputeMasmOptions();
   bool ComputeMasmOptions(std::string const& config);
   void WriteMasmOptions(std::string const& config,
@@ -154,6 +159,7 @@ private:
   OptionsMap ClOptions;
   OptionsMap RcOptions;
   OptionsMap CudaOptions;
+  OptionsMap CudaLinkOptions;
   OptionsMap MasmOptions;
   OptionsMap NasmOptions;
   OptionsMap LinkOptions;

+ 1 - 1
Tests/Cuda/Complex/dynamic.cu

@@ -37,7 +37,7 @@ EXPORT int choose_cuda_device()
                 << std::endl;
       return 1;
     }
-    if (prop.major >= 4) {
+    if (prop.major >= 3) {
       err = cudaSetDevice(i);
       if (err != cudaSuccess) {
         std::cout << "Could not select CUDA device " << i << std::endl;

+ 20 - 7
Tests/CudaOnly/SeparateCompilation/CMakeLists.txt

@@ -12,6 +12,7 @@ project (CudaOnlySeparateCompilation CUDA)
 string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CUDA_STANDARD 11)
+
 add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu)
 
 #Having file4/file5 in a shared library causes serious problems
@@ -22,12 +23,24 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
 target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
 
 add_executable(CudaOnlySeparateCompilation main.cu)
-target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
+target_link_libraries(CudaOnlySeparateCompilation
+                      PRIVATE CUDASeparateLibB)
+
+set_target_properties(CUDASeparateLibA
+                      CUDASeparateLibB
+                      PROPERTIES CUDA_SEPARABLE_COMPILATION ON
+                      POSITION_INDEPENDENT_CODE ON)
 
-set_target_properties( CUDASeparateLibA
-                       CUDASeparateLibB
-                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+if (CMAKE_GENERATOR MATCHES "^Visual Studio")
+  #Visual Studio CUDA integration will not perform device linking
+  #on a target that itself does not have GenerateRelocatableDeviceCode
+  #enabled.
+  set_target_properties(CudaOnlySeparateCompilation
+                        PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+endif()
 
-set_target_properties( CUDASeparateLibA
-                       CUDASeparateLibB
-                       PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (APPLE)
+  # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+  # the static cuda runtime can find it at runtime.
+  target_link_libraries(CudaOnlySeparateCompilation PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()

+ 53 - 0
Tests/CudaOnly/SeparateCompilation/main.cu

@@ -7,9 +7,62 @@
 int file4_launch_kernel(int x);
 int file5_launch_kernel(int x);
 
+int choose_cuda_device()
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+  }
+  for (int i = 0; i < nDevices; ++i) {
+    cudaDeviceProp prop;
+    cudaError_t err = cudaGetDeviceProperties(&prop, i);
+    if (err != cudaSuccess) {
+      std::cerr << "Could not retrieve properties from CUDA device " << i
+                << std::endl;
+      return 1;
+    }
+    if (prop.major >= 3) {
+      err = cudaSetDevice(i);
+      if (err != cudaSuccess) {
+        std::cout << "Could not select CUDA device " << i << std::endl;
+      } else {
+        return 0;
+      }
+    }
+  }
+
+  std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+            << std::endl;
+
+  return 1;
+}
+
 int main(int argc, char** argv)
 {
+  int ret = choose_cuda_device();
+  if (ret) {
+    return 0;
+  }
+
+  cudaError_t err;
   file4_launch_kernel(42);
+  err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    std::cerr << "file4_launch_kernel: kernel launch failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return 1;
+  }
+
   file5_launch_kernel(42);
+  err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    std::cerr << "file5_launch_kernel: kernel launch failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return 1;
+  }
+
   return 0;
 }