Browse Source

Merge topic 'cuda-arch-verify'

c267ed205a CUDA: Defer architecture testing to the compiler testing step

Acked-by: Kitware Robot <[email protected]>
Acked-by: Robert Maynard <[email protected]>
Acked-by: Raul Tambre <[email protected]>
Merge-request: !7202
Brad King 3 years ago
parent
commit
d35f1c9bee

+ 20 - 93
Modules/CMakeDetermineCUDACompiler.cmake

@@ -248,10 +248,6 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     if(CMAKE_CUDA_COMPILER_ID_OUTPUT MATCHES [=[V([0-9]+\.[0-9]+\.[0-9]+)]=])
       set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}")
     endif()
-
-    # Make the all, all-major, and native architecture information available.
-    # FIXME(#23161): Defer architecture detection until compiler testing.
-    include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
   endif()
 
   set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
@@ -273,76 +269,34 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     endif()
   endif()
 
-  # FIXME(#23161): Defer architecture testing until compiler testing.
-  if(DEFINED CMAKE_CUDA_ARCHITECTURES)
-    if(CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major)$")
-      # For sufficiently new NVCC we can just use the all and all-major flags.
-      # For VS we don't test since we can't figure out the version this early (see #23161).
-      # For others select based on version.
-      if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.5)
-        string(APPEND nvcc_test_flags " -arch=${CMAKE_CUDA_ARCHITECTURES}")
-        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
-      elseif(CMAKE_GENERATOR MATCHES "Visual Studio")
-        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
-      else()
-        if(CMAKE_CUDA_ARCHITECTURES STREQUAL "all")
-          set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL})
-        elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "all-major")
-          set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR})
-        endif()
-      endif()
-    elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
-      # For sufficiently new NVCC we can just use the 'native' value directly.
-      # For VS we don't test since we can't find nvcc this early (see #23161).
-      if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.6)
-        string(APPEND nvcc_test_flags " -arch=${CMAKE_CUDA_ARCHITECTURES}")
-        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
-      elseif(CMAKE_GENERATOR MATCHES "Visual Studio")
-        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
-      else()
-        set(architectures_test ${_CUDA_ARCHITECTURES_NATIVE})
-      endif()
-    elseif(CMAKE_CUDA_ARCHITECTURES OR "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
-      # Explicit architectures.  Test them during detection.
-      set(architectures_explicit TRUE)
-      set(architectures_test ${CMAKE_CUDA_ARCHITECTURES})
-    endif()
-  endif()
-
-  foreach(arch ${architectures_test})
-    # Strip specifiers as PTX vs binary doesn't matter.
-    string(REGEX MATCH "[0-9]+" arch_name "${arch}")
-    string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
-    string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
-    list(APPEND architectures_tested "${arch_name}")
-  endforeach()
-
   # Rest of the code treats an empty value as equivalent to "use the defaults".
   # Error out early to prevent confusing errors as a result of this.
   # Note that this also catches invalid non-numerical values such as "a".
-  if(DEFINED architectures_explicit AND "${architectures_tested}" STREQUAL "")
-    message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be valid if set.")
+  if(DEFINED CMAKE_CUDA_ARCHITECTURES)
+    if(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
+      message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be non-empty if set.")
+    elseif(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^([0-9]+[;0-9]*|all|all-major|native)$")
+      message(FATAL_ERROR
+        "CMAKE_CUDA_ARCHITECTURES:\n"
+        "  ${CMAKE_CUDA_ARCHITECTURES}\n"
+        "is not one of the following:\n"
+        "* a semicolon-separated list of integers\n"
+        "* a special value: all, all-major, native\n"
+        )
+    endif()
   endif()
 
   if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
-    if(NOT CMAKE_CUDA_ARCHITECTURES)
-      # Clang doesn't automatically select an architecture supported by the SDK.
-      # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups).
-      foreach(arch "52" "30" "20")
-        list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch}")
-      endforeach()
-    endif()
-
-    # If the user specified CMAKE_CUDA_ARCHITECTURES this will include all the architecture flags.
-    # Otherwise this won't include any architecture flags and we'll fallback to Clang's defaults.
-    list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags}")
+    # Clang doesn't automatically select an architecture supported by the SDK.
+    # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups).
+    foreach(arch "52" "30" "20")
+      list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch}")
+    endforeach()
   elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
     list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${nvcc_test_flags}")
   endif()
 
   # We perform compiler identification for a second time to extract implicit linking info and host compiler for NVCC.
-  # We also use it to verify that CMAKE_CUDA_ARCHITECTURES and additionally on Clang that CUDA toolkit path works.
-  # The latter could be done during compiler testing in the future to avoid doing this for Clang.
   # We need to unset the compiler ID otherwise CMAKE_DETERMINE_COMPILER_ID() doesn't work.
   set(CMAKE_CUDA_COMPILER_ID)
   set(CMAKE_CUDA_PLATFORM_ID)
@@ -357,12 +311,12 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY)
     set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
 
-    # We now know the version, so make the architecture variables available.
+    # The compiler comes with the toolkit, so the versions are the same.
     set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION ${CMAKE_CUDA_COMPILER_VERSION})
-    # FIXME(#23161): Defer architecture detection until compiler testing.
-    include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
   endif()
 
+  include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
+
   _cmake_find_compiler_sysroot(CUDA)
 endif()
 
@@ -647,31 +601,6 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
       message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
     endif()
   endif()
-elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major|native)$")
-  # Sort since order mustn't matter.
-  list(SORT architectures_detected)
-  list(SORT architectures_tested)
-
-  # We don't distinguish real/virtual architectures during testing.
-  # For "70-real;70-virtual" we detect "70" as working and architectures_tested is "70;70".
-  # Thus we need to remove duplicates before checking if they're equal.
-  list(REMOVE_DUPLICATES architectures_tested)
-
-  # Print the actual architectures for generic values (all and all-major).
-  if(NOT DEFINED architectures_explicit)
-    set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${architectures_tested})")
-  else()
-    set(architectures_error "${architectures_tested}")
-  endif()
-
-  if(NOT "${architectures_detected}" STREQUAL "${architectures_tested}")
-    message(FATAL_ERROR
-      "The CMAKE_CUDA_ARCHITECTURES:\n"
-      "  ${architectures_error}\n"
-      "do not all work with this compiler.  Try:\n"
-      "  ${architectures_detected}\n"
-      "instead.")
-  endif()
 endif()
 
 # configure all variables set in this file
@@ -687,9 +616,7 @@ unset(_CUDA_LIBRARY_DIR)
 unset(_CUDA_TARGET_DIR)
 unset(_CUDA_TARGET_NAME)
 
-unset(architectures_explicit)
 unset(architectures_detected)
-unset(architectures_tested)
 
 set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
 set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")

+ 1 - 7
Modules/CMakeDetermineCompilerId.cmake

@@ -495,13 +495,7 @@ Id flags: ${testflags} ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS}
       if(CMAKE_VS_PLATFORM_NAME STREQUAL x64)
         set(cuda_target "<TargetMachinePlatform>64</TargetMachinePlatform>")
       endif()
-      if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major|native)$")
-        foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
-          string(REGEX MATCH "[0-9]+" arch_name "${arch}")
-          string(APPEND cuda_codegen "compute_${arch_name},sm_${arch_name};")
-        endforeach()
-      endif()
-      set(id_ItemDefinitionGroup_entry "<CudaCompile>${cuda_target}<AdditionalOptions>%(AdditionalOptions)-v</AdditionalOptions><CodeGeneration>${cuda_codegen}</CodeGeneration></CudaCompile>")
+      set(id_ItemDefinitionGroup_entry "<CudaCompile>${cuda_target}<AdditionalOptions>%(AdditionalOptions)-v</AdditionalOptions></CudaCompile>")
       set(id_PostBuildEvent_Command [[echo CMAKE_CUDA_COMPILER=$(CudaToolkitBinDir)\nvcc.exe]])
       if(CMAKE_VS_PLATFORM_TOOLSET_CUDA_CUSTOM_DIR)
         # check for legacy cuda custom toolkit folder structure

+ 0 - 60
Modules/CUDA/architectures.cmake

@@ -44,63 +44,3 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4
    AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang"))
   list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
 endif()
-
-# FIXME(#23161): Detect architectures early since we test them during
-# compiler detection.  We already have code to detect them later during
-# compiler testing, so we should not need to do this here.
-if(NOT CMAKE_GENERATOR MATCHES "Visual Studio")
-  set(_CUDA_ARCHS_EXE "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCUDACompilerArchs.bin")
-  execute_process(
-    COMMAND "${_CUDA_NVCC_EXECUTABLE}" -o "${_CUDA_ARCHS_EXE}" --cudart=static "${CMAKE_ROOT}/Modules/CMakeCUDACompilerABI.cu"
-    RESULT_VARIABLE _CUDA_ARCHS_RESULT
-    OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
-    ERROR_VARIABLE  _CUDA_ARCHS_OUTPUT
-    )
-  if(_CUDA_ARCHS_RESULT EQUAL 0)
-    execute_process(
-      COMMAND "${_CUDA_ARCHS_EXE}"
-      RESULT_VARIABLE _CUDA_ARCHS_RESULT
-      OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
-      ERROR_VARIABLE  _CUDA_ARCHS_OUTPUT
-      OUTPUT_STRIP_TRAILING_WHITESPACE
-      )
-  endif()
-  if(_CUDA_ARCHS_RESULT EQUAL 0)
-    if("$ENV{CMAKE_CUDA_ARCHITECTURES_NATIVE_CLAMP}")
-      # Undocumented hook used by CMake's CI.
-      # Clamp native architecture to version range supported by this CUDA.
-      list(GET CMAKE_CUDA_ARCHITECTURES_ALL 0  _CUDA_ARCH_MIN)
-      list(GET CMAKE_CUDA_ARCHITECTURES_ALL -1 _CUDA_ARCH_MAX)
-      set(_CUDA_ARCHITECTURES_NATIVE "")
-      foreach(_CUDA_ARCH IN LISTS _CUDA_ARCHS_OUTPUT)
-        if(_CUDA_ARCH LESS _CUDA_ARCH_MIN)
-          set(_CUDA_ARCH "${_CUDA_ARCH_MIN}")
-        endif()
-        if(_CUDA_ARCH GREATER _CUDA_ARCH_MAX)
-          set(_CUDA_ARCH "${_CUDA_ARCH_MAX}")
-        endif()
-        list(APPEND _CUDA_ARCHITECTURES_NATIVE ${_CUDA_ARCH})
-      endforeach()
-      unset(_CUDA_ARCH)
-      unset(_CUDA_ARCH_MIN)
-      unset(_CUDA_ARCH_MAX)
-    else()
-      set(_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}")
-    endif()
-    list(REMOVE_DUPLICATES _CUDA_ARCHITECTURES_NATIVE)
-  else()
-    if (NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+")
-      set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})")
-    else()
-      set(_CUDA_ARCHS_STATUS "")
-    endif()
-    string(REPLACE "\n" "\n  " _CUDA_ARCHS_OUTPUT "  ${_CUDA_ARCHS_OUTPUT}")
-    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
-      "Detecting the CUDA native architecture(s) failed with "
-      "the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n")
-    set(_CUDA_ARCHS_OUTPUT "")
-  endif()
-  unset(_CUDA_ARCHS_EXE)
-  unset(_CUDA_ARCHS_RESULT)
-  unset(_CUDA_ARCHS_OUTPUT)
-endif()

+ 1 - 1
Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt

@@ -1,5 +1,5 @@
 ^CMake Error at .*/Modules/CMakeDetermineCUDACompiler\.cmake:[0-9]+ \(message\):
-  CMAKE_CUDA_ARCHITECTURES must be valid if set\.
+  CMAKE_CUDA_ARCHITECTURES must be non-empty if set\.
 Call Stack \(most recent call first\):
   architectures-empty\.cmake:2 \(enable_language\)
   CMakeLists\.txt:3 \(include\)

+ 10 - 1
Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt

@@ -1,5 +1,14 @@
 ^CMake Error at .*/Modules/CMakeDetermineCUDACompiler\.cmake:[0-9]+ \(message\):
-  CMAKE_CUDA_ARCHITECTURES must be valid if set\.
+  CMAKE_CUDA_ARCHITECTURES:
+
+    invalid
+
+  is not one of the following:
+
+  \* a semicolon-separated list of integers
+
+  \* a special value: all, all-major, native
+
 Call Stack \(most recent call first\):
   architectures-invalid\.cmake:2 \(enable_language\)
   CMakeLists\.txt:3 \(include\)$