소스 검색

Merge topic 'cuda_vs_arch_all'

daf372c4d6 CUDA: Fix issuing error if default architecture detection fails
7a0d098352 CUDA: Error on empty/invalid CMAKE_CUDA_ARCHITECTURES set by user
d19273bc7b CUDA: Support all and all-major on Visual Studio
5f667d783a CUDA: Actually use reverse architecture deprecation order for Clang

Acked-by: Kitware Robot <[email protected]>
Acked-by: Brad King <[email protected]>
Merge-request: !6912
Brad King 3 년 전
부모
커밋
ba069c10dd

+ 11 - 0
Help/release/dev/cuda-compiler-detection-robustness.rst

@@ -0,0 +1,11 @@
+cuda-compiler-detection-robustness
+----------------------------------
+
+* CUDA compiler detection now issues an error in all cases when it's unable to
+  compute the default architecture(s) if required (see :policy:`CMP0104`).
+
+* CUDA compiler detection now correctly handles ``OFF`` for
+  :variable:`CMAKE_CUDA_ARCHITECTURES` on Clang.
+
+* CUDA compiler detection now supports the theoretical case of multiple default
+  architectures.

+ 5 - 0
Help/release/dev/cuda-invalid-architectures.rst

@@ -0,0 +1,5 @@
+cuda-invalid-architectures
+--------------------------
+
+* CUDA compiler detection now tries to detect invalid architectures and issue
+  an error.

+ 39 - 34
Modules/CMakeDetermineCUDACompiler.cmake

@@ -257,7 +257,7 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
   endif()
 
   # Append user-specified architectures.
-  if(CMAKE_CUDA_ARCHITECTURES)
+  if(DEFINED CMAKE_CUDA_ARCHITECTURES)
     if("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall")
       string(APPEND nvcc_test_flags " -arch=all")
       set(architectures_mode all)
@@ -279,11 +279,18 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     set(CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS ON)
   endif()
 
+  # Rest of the code treats an empty value as equivalent to "use the defaults".
+  # Error out early to prevent confusing errors as a result of this.
+  # Note that this also catches invalid non-numerical values such as "a".
+  if(architectures_mode STREQUAL "explicit" AND "${tested_architectures}" STREQUAL "")
+    message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be valid if set.")
+  endif()
+
   if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
     if(NOT CMAKE_CUDA_ARCHITECTURES)
       # Clang doesn't automatically select an architecture supported by the SDK.
       # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups).
-      foreach(arch "20" "30" "52")
+      foreach(arch "52" "30" "20")
         list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch}")
       endforeach()
     endif()
@@ -346,18 +353,12 @@ if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
   set(_SET_CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT
     "set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT \"${CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT}\")")
 elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
-  if(NOT CMAKE_CUDA_ARCHITECTURES)
-    # Find the architecture that we successfully compiled using and set it as the default.
-    string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
-    set(detected_architecture "${CMAKE_MATCH_1}")
-  else()
-    string(REGEX MATCHALL "-target-cpu sm_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+  string(REGEX MATCHALL "-target-cpu sm_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
 
-    foreach(cpu ${target_cpus})
-      string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${cpu}")
-      list(APPEND architectures "${CMAKE_MATCH_1}")
-    endforeach()
-  endif()
+  foreach(cpu ${target_cpus})
+    string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${cpu}")
+    list(APPEND architectures_detected "${CMAKE_MATCH_1}")
+  endforeach()
 
   # Find target directory when crosscompiling.
   if(CMAKE_CROSSCOMPILING)
@@ -583,28 +584,25 @@ if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
       "Failed to detect CUDA nvcc include information:\n${_nvcc_log}\n\n")
   endif()
 
-  # Parse default CUDA architecture.
-  cmake_policy(GET CMP0104 _CUDA_CMP0104)
-  if(NOT CMAKE_CUDA_ARCHITECTURES AND _CUDA_CMP0104 STREQUAL "NEW")
-    string(REGEX MATCH "arch[ =]compute_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
-    set(detected_architecture "${CMAKE_MATCH_1}")
-  elseif(CMAKE_CUDA_ARCHITECTURES)
-    string(REGEX MATCHALL "-arch compute_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
-
-    foreach(cpu ${target_cpus})
-      string(REGEX MATCH "-arch compute_([0-9]+)" dont_care "${cpu}")
-      list(APPEND architectures "${CMAKE_MATCH_1}")
-    endforeach()
-  endif()
+  string(REGEX MATCHALL "-arch compute_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+
+  foreach(cpu ${target_cpus})
+    string(REGEX MATCH "-arch compute_([0-9]+)" dont_care "${cpu}")
+    list(APPEND architectures_detected "${CMAKE_MATCH_1}")
+  endforeach()
 endif()
 
 # If the user didn't set the architectures, then set them to a default.
 # If the user did, then make sure those architectures worked.
-if(DEFINED detected_architecture AND "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
-  set(CMAKE_CUDA_ARCHITECTURES "${detected_architecture}" CACHE STRING "CUDA architectures")
+if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
+  cmake_policy(GET CMP0104 _CUDA_CMP0104)
+
+  if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" OR _CUDA_CMP0104 STREQUAL "NEW")
+    set(CMAKE_CUDA_ARCHITECTURES "${architectures_detected}" CACHE STRING "CUDA architectures")
 
-  if(NOT CMAKE_CUDA_ARCHITECTURES)
-    message(FATAL_ERROR "Failed to find a working CUDA architecture.")
+    if(NOT CMAKE_CUDA_ARCHITECTURES)
+      message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+    endif()
   endif()
 elseif(architectures AND (architectures_mode STREQUAL "xall" OR
                           architectures_mode STREQUAL "xall-major"))
@@ -617,9 +615,9 @@ elseif(architectures AND (architectures_mode STREQUAL "xall" OR
       "instead.")
   endif()
 
-elseif(architectures AND architectures_mode STREQUAL "xexplicit")
+elseif(architectures_mode STREQUAL "xexplicit")
   # Sort since order mustn't matter.
-  list(SORT architectures)
+  list(SORT architectures_detected)
   list(SORT tested_architectures)
 
   # We don't distinguish real/virtual architectures during testing.
@@ -627,12 +625,19 @@ elseif(architectures AND architectures_mode STREQUAL "xexplicit")
   # Thus we need to remove duplicates before checking if they're equal.
   list(REMOVE_DUPLICATES tested_architectures)
 
-  if(NOT "${architectures}" STREQUAL "${tested_architectures}")
+  # Print the actual architectures for generic values (all and all-major).
+  if(NOT DEFINED architectures_explicit)
+    set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${tested_architectures})")
+  else()
+    set(architectures_error "${tested_architectures}")
+  endif()
+
+  if(NOT "${architectures_detected}" STREQUAL "${tested_architectures}")
     message(FATAL_ERROR
       "The CMAKE_CUDA_ARCHITECTURES:\n"
       "  ${CMAKE_CUDA_ARCHITECTURES}\n"
       "do not all work with this compiler.  Try:\n"
-      "  ${architectures}\n"
+      "  ${architectures_detected}\n"
       "instead.")
   endif()
 endif()

+ 4 - 0
Source/cmVisualStudioGeneratorOptions.cxx

@@ -182,6 +182,10 @@ void cmVisualStudioGeneratorOptions::FixCudaCodeGeneration()
   // First entries for the -arch=<arch> [-code=<code>,...] pair.
   if (!arch.empty()) {
     std::string arch_name = arch[0];
+    if (arch_name == "all" || arch_name == "all-major") {
+      AppendFlagString("AdditionalOptions", "-arch=" + arch_name);
+      return;
+    }
     std::vector<std::string> codes;
     if (!code.empty()) {
       codes = cmTokenize(code[0], ",");

+ 4 - 0
Tests/RunCMake/CMakeLists.txt

@@ -534,6 +534,10 @@ add_RunCMake_test(no_install_prefix)
 add_RunCMake_test(configure_file)
 add_RunCMake_test(CTestTimeout -DTIMEOUT=${CTestTestTimeout_TIME})
 add_RunCMake_test(CTestTimeoutAfterMatch)
+if(CMake_TEST_CUDA)
+  add_RunCMake_test(CUDA_architectures)
+  set_property(TEST RunCMake.CUDA_architectures APPEND PROPERTY LABELS "CUDA")
+endif()
 add_RunCMake_test(DependencyGraph -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER})
 
 # ctresalloc links against CMakeLib and CTestLib, which means it can't be built

+ 3 - 0
Tests/RunCMake/CUDA_architectures/CMakeLists.txt

@@ -0,0 +1,3 @@
+cmake_minimum_required(VERSION 3.22)
+project(${RunCMake_TEST} NONE)
+include(${RunCMake_TEST}.cmake)

+ 4 - 0
Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake

@@ -0,0 +1,4 @@
+include(RunCMake)
+
+run_cmake(architectures-empty)
+run_cmake(architectures-invalid)

+ 1 - 0
Tests/RunCMake/CUDA_architectures/architectures-empty-result.txt

@@ -0,0 +1 @@
+1

+ 5 - 0
Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt

@@ -0,0 +1,5 @@
+^CMake Error at .*/Modules/CMakeDetermineCUDACompiler\.cmake:[0-9]+ \(message\):
+  CMAKE_CUDA_ARCHITECTURES must be valid if set\.
+Call Stack \(most recent call first\):
+  architectures-empty\.cmake:2 \(enable_language\)
+  CMakeLists\.txt:3 \(include\)

+ 2 - 0
Tests/RunCMake/CUDA_architectures/architectures-empty.cmake

@@ -0,0 +1,2 @@
+set(CMAKE_CUDA_ARCHITECTURES "")
+enable_language(CUDA)

+ 1 - 0
Tests/RunCMake/CUDA_architectures/architectures-invalid-result.txt

@@ -0,0 +1 @@
+1

+ 5 - 0
Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt

@@ -0,0 +1,5 @@
+^CMake Error at .*/Modules/CMakeDetermineCUDACompiler\.cmake:[0-9]+ \(message\):
+  CMAKE_CUDA_ARCHITECTURES must be valid if set\.
+Call Stack \(most recent call first\):
+  architectures-invalid\.cmake:2 \(enable_language\)
+  CMakeLists\.txt:3 \(include\)$

+ 2 - 0
Tests/RunCMake/CUDA_architectures/architectures-invalid.cmake

@@ -0,0 +1,2 @@
+set(CMAKE_CUDA_ARCHITECTURES "invalid")
+enable_language(CUDA)