Browse Source

CUDA: Add CUDA_ARCHITECTURES target property

Simplifies CUDA target architecture handling.

Required for Clang support as Clang doesn't automatically select a supported architecture.
We detect a supported architecture during compiler identification and set CMAKE_CUDA_ARCHITECTURES to it.

Introduces CMP0104 for backwards compatibility with manually setting code generation flags with NVCC.

Implements #17963.
Raul Tambre 5 years ago
parent
commit
e98588aaba

+ 1 - 0
Help/manual/cmake-policies.7.rst

@@ -57,6 +57,7 @@ Policies Introduced by CMake 3.18
 .. toctree::
    :maxdepth: 1
 
+   CMP0104: CMAKE_CUDA_ARCHITECTURES now detected for NVCC, empty CUDA_ARCHITECTURES not allowed. </policy/CMP0104>
    CMP0103: Multiple export() with same FILE without APPEND is not allowed. </policy/CMP0103>
 
 Policies Introduced by CMake 3.17

+ 1 - 0
Help/manual/cmake-properties.7.rst

@@ -172,6 +172,7 @@ Properties on Targets
    /prop_tgt/CONFIG_OUTPUT_NAME
    /prop_tgt/CONFIG_POSTFIX
    /prop_tgt/CROSSCOMPILING_EMULATOR
+   /prop_tgt/CUDA_ARCHITECTURES
    /prop_tgt/CUDA_PTX_COMPILATION
    /prop_tgt/CUDA_SEPARABLE_COMPILATION
    /prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS

+ 1 - 0
Help/manual/cmake-variables.7.rst

@@ -488,6 +488,7 @@ Variables for Languages
    /variable/CMAKE_COMPILER_IS_GNUCC
    /variable/CMAKE_COMPILER_IS_GNUCXX
    /variable/CMAKE_COMPILER_IS_GNUG77
+   /variable/CMAKE_CUDA_ARCHITECTURES
    /variable/CMAKE_CUDA_COMPILE_FEATURES
    /variable/CMAKE_CUDA_HOST_COMPILER
    /variable/CMAKE_CUDA_EXTENSIONS

+ 31 - 0
Help/policy/CMP0104.rst

@@ -0,0 +1,31 @@
+CMP0104
+-------
+
+Initialize :variable:`CMAKE_CUDA_ARCHITECTURES` when
+:variable:`CMAKE_CUDA_COMPILER_ID <CMAKE_<LANG>_COMPILER_ID>` is ``NVIDIA``.
+Raise an error if :prop_tgt:`CUDA_ARCHITECTURES` is empty.
+
+:variable:`CMAKE_CUDA_ARCHITECTURES` introduced in CMake 3.18 is used to
+initialize :prop_tgt:`CUDA_ARCHITECTURES`, which passes correct code generation
+flags to the CUDA compiler.
+
+Previous to this users had to manually specify the code generation flags. This
+policy is for backwards compatibility with manually specifying code generation
+flags.
+
+The ``OLD`` behavior for this policy is to not initialize
+:variable:`CMAKE_CUDA_ARCHITECTURES` when
+:variable:`CMAKE_CUDA_COMPILER_ID <CMAKE_<LANG>_COMPILER_ID>` is ``NVIDIA``.
+Empty :prop_tgt:`CUDA_ARCHITECTURES` is allowed.
+
+The ``NEW`` behavior of this policy is to initialize
+:variable:`CMAKE_CUDA_ARCHITECTURES` when
+:variable:`CMAKE_CUDA_COMPILER_ID <CMAKE_<LANG>_COMPILER_ID>` is ``NVIDIA``
+and raise an error if :prop_tgt:`CUDA_ARCHITECTURES` is empty during generation.
+
+This policy was introduced in CMake version 3.18.  CMake version
+|release| warns when the policy is not set and uses ``OLD`` behavior.
+Use the :command:`cmake_policy` command to set it to ``OLD`` or ``NEW``
+explicitly.
+
+.. include:: DEPRECATED.txt

+ 30 - 0
Help/prop_tgt/CUDA_ARCHITECTURES.rst

@@ -0,0 +1,30 @@
+CUDA_ARCHITECTURES
+------------------
+
+List of architectures to generate device code for.
+
+An architecture can be suffixed by either ``-real`` or ``-virtual`` to specify
+the kind of architecture to generate code for.
+If no suffix is given then code is generated for both real and virtual
+architectures.
+
+This property is initialized by the value of the :variable:`CMAKE_CUDA_ARCHITECTURES`
+variable if it is set when a target is created.
+
+The ``CUDA_ARCHITECTURES`` target property must be set to a non-empty value on targets
+that compile CUDA sources, or it is an error.  See policy :policy:`CMP0104`.
+
+Examples
+^^^^^^^^
+
+.. code-block:: cmake
+
+  set_property(TARGET tgt PROPERTY CUDA_ARCHITECTURES 35 50 72)
+
+Generates code for real and virtual architectures ``30``, ``50`` and ``72``.
+
+.. code-block:: cmake
+
+  set_property(TARGET tgt PROPERTY CUDA_ARCHITECTURES 70-real 72-virtual)
+
+Generates code for real architecture ``70`` and virtual architecture ``72``.

+ 7 - 0
Help/release/dev/cuda-architectures-empty.rst

@@ -0,0 +1,7 @@
+cuda-architectures-empty
+------------------------
+
+* :variable:`CMAKE_CUDA_ARCHITECTURES` is now initialized when
+  :variable:`CMAKE_CUDA_COMPILER_ID <CMAKE_<LANG>_COMPILER_ID>` is ``NVIDIA``.
+  Empty :prop_tgt:`CUDA_ARCHITECTURES` raises an error. See policy
+  :policy:`CMP0104`.

+ 6 - 0
Help/release/dev/cuda-architectures.rst

@@ -0,0 +1,6 @@
+cuda-architectures
+------------------
+
+* Added :prop_tgt:`CUDA_ARCHITECTURES` target property for specifying CUDA
+  output architectures. Users are encouraged to use this instead of specifying
+  options manually, as this approach is compiler-agnostic.

+ 17 - 0
Help/variable/CMAKE_CUDA_ARCHITECTURES.rst

@@ -0,0 +1,17 @@
+CMAKE_CUDA_ARCHITECTURES
+------------------------
+
+Default value for :prop_tgt:`CUDA_ARCHITECTURES` property of targets.
+
+This is initialized as follows depending on :variable:`CMAKE_CUDA_COMPILER_ID <CMAKE_<LANG>_COMPILER_ID>`:
+
+- For ``Clang``: the oldest architecture that works.
+
+- For ``NVIDIA``: the default architecture chosen by the compiler.
+  See policy :policy:`CMP0104`.
+
+Users are encouraged to override this, as the default varies across compilers
+and compiler versions.
+
+This variable is used to initialize the :prop_tgt:`CUDA_ARCHITECTURES` property
+on all targets. See the target property for additional information.

+ 11 - 0
Modules/CMakeDetermineCUDACompiler.cmake

@@ -216,6 +216,17 @@ if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
     file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
       "Failed to detect CUDA nvcc include information:\n${_nvcc_log}\n\n")
   endif()
+
+  # Parse default CUDA architecture.
+  cmake_policy(GET CMP0104 _CUDA_CMP0104)
+  if(NOT CMAKE_CUDA_ARCHITECTURES AND _CUDA_CMP0104 STREQUAL "NEW")
+    string(REGEX MATCH "arch[ =]compute_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+    set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_MATCH_1}" CACHE STRING "CUDA architectures")
+
+    if(NOT CMAKE_CUDA_ARCHITECTURES)
+      message(FATAL_ERROR "Failed to find default CUDA architecture.")
+    endif()
+  endif()
 endif()
 
 # configure all variables set in this file

+ 3 - 0
Source/cmCoreTryCompile.cxx

@@ -40,6 +40,8 @@ static std::string const kCMAKE_CXX_LINK_NO_PIE_SUPPORTED =
   "CMAKE_CXX_LINK_NO_PIE_SUPPORTED";
 static std::string const kCMAKE_CXX_LINK_PIE_SUPPORTED =
   "CMAKE_CXX_LINK_PIE_SUPPORTED";
+static std::string const kCMAKE_CUDA_ARCHITECTURES =
+  "CMAKE_CUDA_ARCHITECTURES";
 static std::string const kCMAKE_CUDA_COMPILER_TARGET =
   "CMAKE_CUDA_COMPILER_TARGET";
 static std::string const kCMAKE_ENABLE_EXPORTS = "CMAKE_ENABLE_EXPORTS";
@@ -713,6 +715,7 @@ int cmCoreTryCompile::TryCompileCode(std::vector<std::string> const& argv,
       vars.insert(kCMAKE_C_COMPILER_TARGET);
       vars.insert(kCMAKE_CXX_COMPILER_EXTERNAL_TOOLCHAIN);
       vars.insert(kCMAKE_CXX_COMPILER_TARGET);
+      vars.insert(kCMAKE_CUDA_ARCHITECTURES);
       vars.insert(kCMAKE_CUDA_COMPILER_TARGET);
       vars.insert(kCMAKE_ENABLE_EXPORTS);
       vars.insert(kCMAKE_LINK_SEARCH_END_STATIC);

+ 89 - 0
Source/cmGeneratorTarget.cxx

@@ -3085,6 +3085,95 @@ void cmGeneratorTarget::GetAppleArchs(const std::string& config,
   }
 }
 
+void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
+{
+  struct CudaArchitecture
+  {
+    std::string name;
+    bool real{ true };
+    bool virtual_{ true };
+  };
+  std::vector<CudaArchitecture> architectures;
+
+  {
+    std::vector<std::string> options;
+    cmExpandList(this->GetSafeProperty("CUDA_ARCHITECTURES"), options);
+
+    if (options.empty()) {
+      switch (this->GetPolicyStatusCMP0104()) {
+        case cmPolicies::WARN:
+          if (!this->LocalGenerator->GetCMakeInstance()->GetIsInTryCompile()) {
+            this->Makefile->IssueMessage(
+              MessageType::AUTHOR_WARNING,
+              cmPolicies::GetPolicyWarning(cmPolicies::CMP0104) +
+                "\nCUDA_ARCHITECTURES is empty for target \"" +
+                this->GetName() + "\".");
+          }
+          CM_FALLTHROUGH;
+        case cmPolicies::OLD:
+          break;
+        default:
+          this->Makefile->IssueMessage(
+            MessageType::FATAL_ERROR,
+            "CUDA_ARCHITECTURES is empty for target \"" + this->GetName() +
+              "\".");
+      }
+    }
+
+    for (std::string& option : options) {
+      CudaArchitecture architecture;
+
+      // Architecture name is up to the first specifier.
+      std::size_t pos = option.find_first_of('-');
+      architecture.name = option.substr(0, pos);
+
+      if (pos != std::string::npos) {
+        cm::string_view specifier{ option.c_str() + pos + 1,
+                                   option.length() - pos - 1 };
+
+        if (specifier == "real") {
+          architecture.real = true;
+          architecture.virtual_ = false;
+        } else if (specifier == "virtual") {
+          architecture.real = false;
+          architecture.virtual_ = true;
+        } else {
+          this->Makefile->IssueMessage(
+            MessageType::FATAL_ERROR,
+            "Uknown CUDA architecture specifier \"" + std::string(specifier) +
+              "\".");
+        }
+      }
+
+      architectures.emplace_back(architecture);
+    }
+  }
+
+  std::string const& compiler =
+    this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
+
+  if (compiler == "NVIDIA") {
+    for (CudaArchitecture& architecture : architectures) {
+      flags +=
+        " --generate-code=arch=compute_" + architecture.name + ",code=[";
+
+      if (architecture.virtual_) {
+        flags += "compute_" + architecture.name;
+
+        if (architecture.real) {
+          flags += ",";
+        }
+      }
+
+      if (architecture.real) {
+        flags += "sm_" + architecture.name;
+      }
+
+      flags += "]";
+    }
+  }
+}
+
 //----------------------------------------------------------------------------
 std::string cmGeneratorTarget::GetFeatureSpecificLinkRuleVariable(
   std::string const& var, std::string const& lang,

+ 2 - 0
Source/cmGeneratorTarget.h

@@ -421,6 +421,8 @@ public:
   void GetAppleArchs(const std::string& config,
                      std::vector<std::string>& archVec) const;
 
+  void AddCUDAArchitectureFlags(std::string& flags) const;
+
   std::string GetFeatureSpecificLinkRuleVariable(
     std::string const& var, std::string const& lang,
     std::string const& config) const;

+ 2 - 0
Source/cmLocalGenerator.cxx

@@ -1944,6 +1944,8 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
         this->AppendFlags(flags, "-swift-version " + std::string(v));
       }
     }
+  } else if (lang == "CUDA") {
+    target->AddCUDAArchitectureFlags(flags);
   }
 
   // Add MSVC runtime library flags.  This is activated by the presence

+ 6 - 1
Source/cmPolicies.h

@@ -308,6 +308,10 @@ class cmMakefile;
          3, 17, 0, cmPolicies::WARN)                                          \
   SELECT(POLICY, CMP0103,                                                     \
          "multiple export() with same FILE without APPEND is not allowed.",   \
+         3, 18, 0, cmPolicies::WARN)                                          \
+  SELECT(POLICY, CMP0104,                                                     \
+         "CMAKE_CUDA_ARCHITECTURES now detected for NVCC, empty "             \
+         "CUDA_ARCHITECTURES not allowed.",                                   \
          3, 18, 0, cmPolicies::WARN)
 
 #define CM_SELECT_ID(F, A1, A2, A3, A4, A5, A6) F(A1)
@@ -338,7 +342,8 @@ class cmMakefile;
   F(CMP0081)                                                                  \
   F(CMP0083)                                                                  \
   F(CMP0095)                                                                  \
-  F(CMP0099)
+  F(CMP0099)                                                                  \
+  F(CMP0104)
 
 /** \class cmPolicies
  * \brief Handles changes in CMake behavior and policies

+ 1 - 0
Source/cmTarget.cxx

@@ -360,6 +360,7 @@ cmTarget::cmTarget(std::string const& name, cmStateEnums::TargetType type,
     initProp("CUDA_SEPARABLE_COMPILATION");
     initProp("CUDA_RESOLVE_DEVICE_SYMBOLS");
     initProp("CUDA_RUNTIME_LIBRARY");
+    initProp("CUDA_ARCHITECTURES");
     initProp("LINK_SEARCH_START_STATIC");
     initProp("LINK_SEARCH_END_STATIC");
     initProp("Swift_LANGUAGE_VERSION");

+ 5 - 0
Tests/CudaOnly/Architecture/CMakeLists.txt

@@ -0,0 +1,5 @@
+cmake_minimum_required(VERSION 3.17)
+project(Architecture CUDA)
+
+set(CMAKE_CUDA_ARCHITECTURES 52)
+add_executable(Architecture main.cu)

+ 9 - 0
Tests/CudaOnly/Architecture/main.cu

@@ -0,0 +1,9 @@
+#ifdef __CUDA_ARCH__
+#  if __CUDA_ARCH__ != 520
+#    error "Passed architecture 52, but got something else."
+#  endif
+#endif
+
+int main()
+{
+}

+ 1 - 0
Tests/CudaOnly/CMakeLists.txt

@@ -1,4 +1,5 @@
 
+ADD_TEST_MACRO(CudaOnly.Architecture Architecture)
 ADD_TEST_MACRO(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
 ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
 ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)

+ 2 - 0
Tests/RunCMake/CMP0104/CMP0104-Common.cmake

@@ -0,0 +1,2 @@
+enable_language(CUDA)
+add_library(cuda main.cu)

+ 6 - 0
Tests/RunCMake/CMP0104/CMP0104-NEW.cmake

@@ -0,0 +1,6 @@
+cmake_policy(SET CMP0104 NEW)
+include(CMP0104-Common.cmake)
+
+if(NOT CMAKE_CUDA_ARCHITECTURES)
+  message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES is empty with CMP0104 enabled.")
+endif()

+ 12 - 0
Tests/RunCMake/CMP0104/CMP0104-OLD.cmake

@@ -0,0 +1,12 @@
+cmake_policy(SET CMP0104 OLD)
+include(CMP0104-Common.cmake)
+
+if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+  if(CMAKE_CUDA_ARCHITECTURES)
+    message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES isn't empty for NVIDIA with CMP0104 OLD.")
+  endif()
+else(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Unknown")
+  if(NOT CMAKE_CUDA_ARCHITECTURES)
+    message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES isn't non-empty for non-NVIDIA with CMP0104 OLD.")
+  endif()
+endif()

+ 8 - 0
Tests/RunCMake/CMP0104/CMP0104-WARN-stderr.txt

@@ -0,0 +1,8 @@
+CMake Warning \(dev\) in CMakeLists.txt:
+  Policy CMP0104 is not set: CMAKE_CUDA_ARCHITECTURES now detected for NVCC,
+  empty CUDA_ARCHITECTURES not allowed.  Run "cmake --help-policy CMP0104"
+  for policy details.  Use the cmake_policy command to set the policy and
+  suppress this warning.
+
+  CUDA_ARCHITECTURES is empty for target "cuda".
+This warning is for project developers.  Use -Wno-dev to suppress it.

+ 1 - 0
Tests/RunCMake/CMP0104/CMP0104-WARN.cmake

@@ -0,0 +1 @@
+include(CMP0104-Common.cmake)

+ 3 - 0
Tests/RunCMake/CMP0104/CMakeLists.txt

@@ -0,0 +1,3 @@
+cmake_minimum_required(VERSION 3.16)
+project(${RunCMake_TEST} NONE)
+include(${RunCMake_TEST}.cmake)

+ 5 - 0
Tests/RunCMake/CMP0104/RunCMakeTest.cmake

@@ -0,0 +1,5 @@
+include(RunCMake)
+
+run_cmake(CMP0104-OLD)
+run_cmake(CMP0104-NEW)
+run_cmake(CMP0104-WARN)

+ 3 - 0
Tests/RunCMake/CMP0104/main.cu

@@ -0,0 +1,3 @@
+int main()
+{
+}

+ 3 - 0
Tests/RunCMake/CMakeLists.txt

@@ -116,6 +116,9 @@ endif()
 add_RunCMake_test(CMP0069)
 add_RunCMake_test(CMP0081)
 add_RunCMake_test(CMP0102)
+if(CMake_TEST_CUDA)
+  add_RunCMake_test(CMP0104)
+endif()
 
 # The test for Policy 65 requires the use of the
 # CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS variable, which both the VS and Xcode

+ 1 - 0
Tests/RunCMake/CompilerLauncher/CUDA-common.cmake

@@ -1,3 +1,4 @@
+cmake_policy(SET CMP0104 NEW)
 enable_language(CUDA)
 set(CMAKE_VERBOSE_MAKEFILE TRUE)
 add_executable(main main.cu)

+ 1 - 0
Tests/RunCMake/NinjaMultiConfig/CudaSimple.cmake

@@ -1,3 +1,4 @@
+cmake_policy(SET CMP0104 NEW)
 enable_language(CUDA)
 file(TOUCH ${CMAKE_BINARY_DIR}/empty.cmake)
 

+ 1 - 0
Tests/RunCMake/TargetPolicies/PolicyList-stderr.txt

@@ -29,6 +29,7 @@
    \* CMP0083
    \* CMP0095
    \* CMP0099
+   \* CMP0104
 
 Call Stack \(most recent call first\):
   CMakeLists.txt:3 \(include\)