Bläddra i källkod

CUDA: Add support for CUDA_ARCHITECTURES=native

CUDA 11.6 added the `nvcc -arch=native` flag to automatically compile
for the host GPUs' architectures.  Add support for specifying this
special `native` value in `CMAKE_CUDA_ARCHITECTURES` and
`CUDA_ARCHITECTURES`.  During the compiler ABI detection step,
detect the native architectures so we can pass them explicitly
when using Clang or older versions of nvcc.

Fixes: #22375
Brad King 3 år sedan
förälder
incheckning
d1b48bfabd

+ 5 - 0
Help/prop_tgt/CUDA_ARCHITECTURES.rst

@@ -34,6 +34,11 @@ The ``CUDA_ARCHITECTURES`` may be set to one of the following special values:
   Compile for all supported major real architectures, and the highest
   major virtual architecture.
 
+``native``
+  .. versionadded:: 3.24
+
+  Compile for the architecture(s) of the host's GPU(s).
+
 Examples
 ^^^^^^^^
 

+ 7 - 0
Help/release/dev/cuda-arch-native.rst

@@ -0,0 +1,7 @@
+cuda-arch-native
+----------------
+
+* The :variable:`CMAKE_CUDA_ARCHITECTURES` variable and associated
+  :prop_tgt:`CUDA_ARCHITECTURES` target property now support the
+  special ``native`` value to compile for the architectures(s)
+  of the host's GPU(s).

+ 1 - 0
Modules/CMakeCUDACompiler.cmake.in

@@ -55,6 +55,7 @@ set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@")
 
 set(CMAKE_CUDA_ARCHITECTURES_ALL "@CMAKE_CUDA_ARCHITECTURES_ALL@")
 set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR "@CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR@")
+set(CMAKE_CUDA_ARCHITECTURES_NATIVE "@CMAKE_CUDA_ARCHITECTURES_NATIVE@")
 
 set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
 

+ 31 - 2
Modules/CMakeCUDACompilerABI.cu

@@ -2,6 +2,10 @@
 #  error "A C or C++ compiler has been selected for CUDA"
 #endif
 
+#include <cstdio>
+
+#include <cuda_runtime.h>
+
 #include "CMakeCompilerABI.h"
 
 int main(int argc, char* argv[])
@@ -13,6 +17,31 @@ int main(int argc, char* argv[])
 #if defined(ABI_ID)
   require += info_abi[argc];
 #endif
-  (void)argv;
-  return require;
+  static_cast<void>(argv);
+
+  int count = 0;
+  if (cudaGetDeviceCount(&count) != cudaSuccess || count == 0) {
+    std::fprintf(stderr, "No CUDA devices found.\n");
+    return -1;
+  }
+
+  int found = 0;
+  const char* sep = "";
+  for (int device = 0; device < count; ++device) {
+    cudaDeviceProp prop;
+    if (cudaGetDeviceProperties(&prop, device) == cudaSuccess) {
+      std::printf("%s%d%d", sep, prop.major, prop.minor);
+      sep = ";";
+      found = 1;
+    }
+  }
+
+  if (!found) {
+    std::fprintf(stderr, "No CUDA architecture detected from any devices.\n");
+    // Convince the compiler that the non-zero return value depends
+    // on the info strings so they are not optimized out.
+    return require ? -1 : 1;
+  }
+
+  return 0;
 }

+ 13 - 2
Modules/CMakeDetermineCUDACompiler.cmake

@@ -249,7 +249,7 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
       set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}")
     endif()
 
-    # Make the all and all-major architecture information available.
+    # Make the all, all-major, and native architecture information available.
     # FIXME(#23161): Defer architecture detection until compiler testing.
     include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
   endif()
@@ -291,6 +291,17 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
           set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR})
         endif()
       endif()
+    elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
+      # For sufficiently new NVCC we can just use the 'native' value directly.
+      # For VS we don't test since we can't find nvcc this early (see #23161).
+      if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.6)
+        string(APPEND nvcc_test_flags " -arch=${CMAKE_CUDA_ARCHITECTURES}")
+        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
+      elseif(CMAKE_GENERATOR MATCHES "Visual Studio")
+        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
+      else()
+        set(architectures_test ${_CUDA_ARCHITECTURES_NATIVE})
+      endif()
     elseif(CMAKE_CUDA_ARCHITECTURES OR "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
       # Explicit architectures.  Test them during detection.
       set(architectures_explicit TRUE)
@@ -636,7 +647,7 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
       message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
     endif()
   endif()
-elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major)$")
+elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major|native)$")
   # Sort since order mustn't matter.
   list(SORT architectures_detected)
   list(SORT architectures_tested)

+ 8 - 0
Modules/CMakeDetermineCompilerABI.cmake

@@ -26,6 +26,14 @@ function(CMAKE_DETERMINE_COMPILER_ABI lang src)
     if(DEFINED CMAKE_${lang}_VERBOSE_COMPILE_FLAG)
       set(COMPILE_DEFINITIONS "${CMAKE_${lang}_VERBOSE_COMPILE_FLAG}")
     endif()
+    if(lang STREQUAL "CUDA")
+      if(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
+        # We are about to detect the native architectures, so we do
+        # not yet know them.  Use all architectures during detection.
+        set(CMAKE_CUDA_ARCHITECTURES "all")
+      endif()
+      set(CMAKE_CUDA_RUNTIME_LIBRARY "Static")
+    endif()
     if(NOT "x${CMAKE_${lang}_COMPILER_ID}" STREQUAL "xMSVC")
       # Avoid adding our own platform standard libraries for compilers
       # from which we might detect implicit link libraries.

+ 1 - 1
Modules/CMakeDetermineCompilerId.cmake

@@ -495,7 +495,7 @@ Id flags: ${testflags} ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS}
       if(CMAKE_VS_PLATFORM_NAME STREQUAL x64)
         set(cuda_target "<TargetMachinePlatform>64</TargetMachinePlatform>")
       endif()
-      if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major)$")
+      if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major|native)$")
         foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
           string(REGEX MATCH "[0-9]+" arch_name "${arch}")
           string(APPEND cuda_codegen "compute_${arch_name},sm_${arch_name};")

+ 25 - 0
Modules/CMakeTestCUDACompiler.cmake

@@ -21,6 +21,31 @@ if(CMAKE_CUDA_ABI_COMPILED)
   # The compiler worked so skip dedicated test below.
   set(CMAKE_CUDA_COMPILER_WORKS TRUE)
   message(STATUS "Check for working CUDA compiler: ${CMAKE_CUDA_COMPILER} - skipped")
+
+  # Run the test binary to detect the native architectures.
+  execute_process(COMMAND "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCompilerABI_CUDA.bin"
+    RESULT_VARIABLE _CUDA_ARCHS_RESULT
+    OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
+    ERROR_VARIABLE  _CUDA_ARCHS_OUTPUT
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+  if(_CUDA_ARCHS_RESULT EQUAL 0)
+    set(CMAKE_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}")
+    list(REMOVE_DUPLICATES CMAKE_CUDA_ARCHITECTURES_NATIVE)
+  else()
+    if(NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+")
+      set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})")
+    else()
+      set(_CUDA_ARCHS_STATUS "")
+    endif()
+    string(REPLACE "\n" "\n  " _CUDA_ARCHS_OUTPUT "  ${_CUDA_ARCHS_OUTPUT}")
+    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
+      "Detecting the CUDA native architecture(s) failed with "
+      "the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n")
+  endif()
+  unset(_CUDA_ARCHS_EXE)
+  unset(_CUDA_ARCHS_RESULT)
+  unset(_CUDA_ARCHS_OUTPUT)
 endif()
 
 # This file is used by EnableLanguage in cmGlobalGenerator to

+ 40 - 0
Modules/CUDA/architectures.cmake

@@ -44,3 +44,43 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4
    AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang"))
   list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
 endif()
+
+# FIXME(#23161): Detect architectures early since we test them during
+# compiler detection.  We already have code to detect them later during
+# compiler testing, so we should not need to do this here.
+if(NOT CMAKE_GENERATOR MATCHES "Visual Studio")
+  set(_CUDA_ARCHS_EXE "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCUDACompilerArchs.bin")
+  execute_process(
+    COMMAND "${_CUDA_NVCC_EXECUTABLE}" -o "${_CUDA_ARCHS_EXE}" --cudart=static "${CMAKE_ROOT}/Modules/CMakeCUDACompilerABI.cu"
+    RESULT_VARIABLE _CUDA_ARCHS_RESULT
+    OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
+    ERROR_VARIABLE  _CUDA_ARCHS_OUTPUT
+    )
+  if(_CUDA_ARCHS_RESULT EQUAL 0)
+    execute_process(
+      COMMAND "${_CUDA_ARCHS_EXE}"
+      RESULT_VARIABLE _CUDA_ARCHS_RESULT
+      OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
+      ERROR_VARIABLE  _CUDA_ARCHS_OUTPUT
+      OUTPUT_STRIP_TRAILING_WHITESPACE
+      )
+  endif()
+  if(_CUDA_ARCHS_RESULT EQUAL 0)
+    set(_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}")
+    list(REMOVE_DUPLICATES _CUDA_ARCHITECTURES_NATIVE)
+  else()
+    if (NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+")
+      set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})")
+    else()
+      set(_CUDA_ARCHS_STATUS "")
+    endif()
+    string(REPLACE "\n" "\n  " _CUDA_ARCHS_OUTPUT "  ${_CUDA_ARCHS_OUTPUT}")
+    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
+      "Detecting the CUDA native architecture(s) failed with "
+      "the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n")
+    set(_CUDA_ARCHS_OUTPUT "")
+  endif()
+  unset(_CUDA_ARCHS_EXE)
+  unset(_CUDA_ARCHS_RESULT)
+  unset(_CUDA_ARCHS_OUTPUT)
+endif()

+ 17 - 0
Source/cmGeneratorTarget.cxx

@@ -3467,6 +3467,23 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
       property =
         *this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR");
     }
+  } else if (property == "native") {
+    cmValue native =
+      this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_NATIVE");
+    if (native.IsEmpty()) {
+      this->Makefile->IssueMessage(
+        MessageType::FATAL_ERROR,
+        "CUDA_ARCHITECTURES is set to \"native\", but no GPU was detected.");
+    }
+    if (compiler == "NVIDIA" &&
+        cmSystemTools::VersionCompare(
+          cmSystemTools::OP_GREATER_EQUAL,
+          this->Makefile->GetDefinition("CMAKE_CUDA_COMPILER_VERSION"),
+          "11.6")) {
+      flags = cmStrCat(flags, " -arch=", property);
+      return;
+    }
+    property = *native;
   }
 
   struct CudaArchitecture

+ 2 - 1
Source/cmVisualStudioGeneratorOptions.cxx

@@ -182,7 +182,8 @@ void cmVisualStudioGeneratorOptions::FixCudaCodeGeneration()
   // First entries for the -arch=<arch> [-code=<code>,...] pair.
   if (!arch.empty()) {
     std::string arch_name = arch[0];
-    if (arch_name == "all" || arch_name == "all-major") {
+    if (arch_name == "all" || arch_name == "all-major" ||
+        arch_name == "native") {
       AppendFlagString("AdditionalOptions", "-arch=" + arch_name);
       return;
     }

+ 12 - 1
Tests/CudaOnly/ArchSpecial/CMakeLists.txt

@@ -25,6 +25,7 @@ function(verify_output flag)
   endforeach()
 
   list(SORT command_archs)
+  list(REMOVE_DUPLICATES command_archs)
   if(NOT "${command_archs}" STREQUAL "${architectures}")
     message(FATAL_ERROR "Architectures used for \"${flag}\" don't match the reference (\"${command_archs}\" != \"${architectures}\").")
   endif()
@@ -50,7 +51,17 @@ try_compile(all_major_archs_compiles
   )
 verify_output(all-major)
 
-if(all_archs_compiles AND all_major_archs_compiles)
+set(CMAKE_CUDA_ARCHITECTURES native)
+try_compile(native_archs_compiles
+  ${CMAKE_CURRENT_BINARY_DIR}/try_compile/native_archs_compiles
+  ${CMAKE_CURRENT_SOURCE_DIR}/main.cu
+  COMPILE_DEFINITIONS ${try_compile_flags}
+  OUTPUT_VARIABLE output
+  )
+verify_output(native)
+
+if(all_archs_compiles AND all_major_archs_compiles AND native_archs_compiles)
+  set(CMAKE_CUDA_ARCHITECTURES all)
   add_executable(CudaOnlyArchSpecial main.cu)
   target_compile_options(CudaOnlyArchSpecial PRIVATE ${compile_options})
 endif()

+ 1 - 0
Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake

@@ -2,6 +2,7 @@ include(RunCMake)
 
 run_cmake(architectures-all)
 run_cmake(architectures-all-major)
+run_cmake(architectures-native)
 run_cmake(architectures-empty)
 run_cmake(architectures-invalid)
 

+ 1 - 0
Tests/RunCMake/CUDA_architectures/architectures-all-major-stdout.txt

@@ -1,3 +1,4 @@
 -- CMAKE_CUDA_ARCHITECTURES='all-major'
 -- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
 -- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
+-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'

+ 1 - 0
Tests/RunCMake/CUDA_architectures/architectures-all-major.cmake

@@ -3,3 +3,4 @@ enable_language(CUDA)
 message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
 message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'")
 message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'")
+message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'")

+ 1 - 0
Tests/RunCMake/CUDA_architectures/architectures-all-stdout.txt

@@ -1,3 +1,4 @@
 -- CMAKE_CUDA_ARCHITECTURES='all'
 -- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
 -- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
+-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'

+ 1 - 0
Tests/RunCMake/CUDA_architectures/architectures-all.cmake

@@ -3,3 +3,4 @@ enable_language(CUDA)
 message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
 message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'")
 message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'")
+message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'")

+ 4 - 0
Tests/RunCMake/CUDA_architectures/architectures-native-stdout.txt

@@ -0,0 +1,4 @@
+-- CMAKE_CUDA_ARCHITECTURES='native'
+-- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
+-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
+-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'

+ 6 - 0
Tests/RunCMake/CUDA_architectures/architectures-native.cmake

@@ -0,0 +1,6 @@
+set(CMAKE_CUDA_ARCHITECTURES "native")
+enable_language(CUDA)
+message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
+message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'")
+message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'")
+message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'")