Просмотр исходного кода

Merge topic 'less_ptx_for_all_all-major_native'

e8591f1516 CUDA: native/all/all-major generates minimal set of ptx code

Acked-by: Kitware Robot <[email protected]>
Reviewed-by: Raul Tambre <[email protected]>
Merge-request: !7216
Brad King 3 лет назад
Родитель
Сommit
2045e1eee0

+ 1 - 0
Modules/CMakeTestCUDACompiler.cmake

@@ -52,6 +52,7 @@ if(CMAKE_CUDA_ABI_COMPILED)
       set(CMAKE_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}")
     endif()
     list(REMOVE_DUPLICATES CMAKE_CUDA_ARCHITECTURES_NATIVE)
+    list(TRANSFORM CMAKE_CUDA_ARCHITECTURES_NATIVE APPEND "-real")
   else()
     if(NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+")
       set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})")

+ 11 - 0
Modules/CUDA/architectures.cmake

@@ -44,3 +44,14 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4
    AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang"))
   list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
 endif()
+
+# only generate jit code for the newest arch for all/all-major
+list(POP_BACK CMAKE_CUDA_ARCHITECTURES_ALL _latest_arch)
+list(TRANSFORM CMAKE_CUDA_ARCHITECTURES_ALL APPEND "-real")
+list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL ${_latest_arch})
+
+list(POP_BACK CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR _latest_arch)
+list(TRANSFORM CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR APPEND "-real")
+list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR ${_latest_arch})
+
+unset(_latest_arch)

+ 1 - 0
Tests/CudaOnly/ArchSpecial/CMakeLists.txt

@@ -10,6 +10,7 @@ function(verify_output flag)
   string(REPLACE "-" "_" architectures "${flag}")
   string(TOUPPER "${architectures}" architectures)
   set(architectures "${CMAKE_CUDA_ARCHITECTURES_${architectures}}")
+  list(TRANSFORM architectures REPLACE "-real" "")
 
   if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
     set(match_regex "-target-cpu sm_([0-9]+)")

+ 3 - 3
Tests/RunCMake/CUDA_architectures/architectures-all-major-stdout.txt

@@ -1,4 +1,4 @@
 -- CMAKE_CUDA_ARCHITECTURES='all-major'
--- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
--- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
--- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'
+-- CMAKE_CUDA_ARCHITECTURES_ALL='([0-9]+-real;)+[0-9]+'
+-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='([0-9]+-real;)+[0-9]+'
+-- CMAKE_CUDA_ARCHITECTURES_NATIVE='([0-9]+-real;)*[0-9]+-real'

+ 3 - 3
Tests/RunCMake/CUDA_architectures/architectures-all-stdout.txt

@@ -1,4 +1,4 @@
 -- CMAKE_CUDA_ARCHITECTURES='all'
--- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
--- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
--- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'
+-- CMAKE_CUDA_ARCHITECTURES_ALL='([0-9]+-real;)+[0-9]+'
+-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='([0-9]+-real;)+[0-9]+'
+-- CMAKE_CUDA_ARCHITECTURES_NATIVE='([0-9]+-real;)*[0-9]+-real'

+ 3 - 3
Tests/RunCMake/CUDA_architectures/architectures-native-stdout.txt

@@ -1,4 +1,4 @@
 -- CMAKE_CUDA_ARCHITECTURES='native'
--- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
--- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
--- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'
+-- CMAKE_CUDA_ARCHITECTURES_ALL='([0-9]+-real;)+[0-9]+'
+-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='([0-9]+-real;)+[0-9]+'
+-- CMAKE_CUDA_ARCHITECTURES_NATIVE='([0-9]+-real;)*[0-9]+-real'

+ 3 - 3
Tests/RunCMake/CUDA_architectures/architectures-suffix-stdout.txt

@@ -1,4 +1,4 @@
 -- CMAKE_CUDA_ARCHITECTURES='[0-9]+-real;[0-9]+-virtual;'
--- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
--- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
--- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'
+-- CMAKE_CUDA_ARCHITECTURES_ALL='([0-9]+-real;)+[0-9]+'
+-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='([0-9]+-real;)+[0-9]+'
+-- CMAKE_CUDA_ARCHITECTURES_NATIVE='([0-9]+-real;)*[0-9]+-real'