Browse Source

FASTBuild: support CUDA language

Fixes: #27318
Eduard Voronkin 1 month ago
parent
commit
4135175337

+ 8 - 2
Modules/Platform/Windows-NVIDIA-CUDA.cmake

@@ -11,8 +11,14 @@ foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES})
   string(APPEND __IMPLICIT_LINKS " \"${lib}\"")
 endforeach()
 
-set(_CMAKE_VS_LINK_DLL "<CMAKE_COMMAND> -E vs_link_dll --intdir=<OBJECT_DIR> --rc=<CMAKE_RC_COMPILER> --mt=<CMAKE_MT> --manifests <MANIFESTS> -- ")
-set(_CMAKE_VS_LINK_EXE "<CMAKE_COMMAND> -E vs_link_exe --intdir=<OBJECT_DIR> --rc=<CMAKE_RC_COMPILER> --mt=<CMAKE_MT> --manifests <MANIFESTS> -- ")
+set(_CMAKE_VS_LINK_DLL "")
+set(_CMAKE_VS_LINK_EXE "")
+# Use linker directly with FASTBuild
+if (NOT CMAKE_GENERATOR MATCHES "FASTBuild")
+  set(_CMAKE_VS_LINK_DLL "<CMAKE_COMMAND> -E vs_link_dll --intdir=<OBJECT_DIR> --rc=<CMAKE_RC_COMPILER> --mt=<CMAKE_MT> --manifests <MANIFESTS> -- ")
+  set(_CMAKE_VS_LINK_EXE "<CMAKE_COMMAND> -E vs_link_exe --intdir=<OBJECT_DIR> --rc=<CMAKE_RC_COMPILER> --mt=<CMAKE_MT> --manifests <MANIFESTS> -- ")
+endif()
+
 set(CMAKE_CUDA_CREATE_SHARED_LIBRARY
   "${_CMAKE_VS_LINK_DLL}<CMAKE_LINKER> ${CMAKE_CL_NOLOGO} <OBJECTS> ${CMAKE_START_TEMP_FILE} /out:<TARGET> /implib:<TARGET_IMPLIB> /pdb:<TARGET_PDB> /dll /version:<TARGET_VERSION_MAJOR>.<TARGET_VERSION_MINOR>${_PLATFORM_LINK_FLAGS} <LINK_FLAGS> <LINK_LIBRARIES>${__IMPLICIT_LINKS} ${CMAKE_END_TEMP_FILE}")
 set(CMAKE_CUDA_SHARED_LIBRARY_COMPILE_DEFINITIONS "_WINDLL")

+ 177 - 31
Source/cmFastbuildNormalTargetGenerator.cxx

@@ -4,6 +4,7 @@
 #include "cmFastbuildNormalTargetGenerator.h"
 
 #include <algorithm>
+#include <array>
 #include <cstddef>
 #include <iterator>
 #include <map>
@@ -15,6 +16,8 @@
 
 #include <cm/memory>
 #include <cm/optional>
+#include <cm/string_view>
+#include <cmext/string_view>
 
 #include "cmsys/FStream.hxx"
 
@@ -28,6 +31,7 @@
 #include "cmGlobalCommonGenerator.h"
 #include "cmGlobalFastbuildGenerator.h"
 #include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
 #include "cmList.h"
 #include "cmListFileCache.h"
 #include "cmLocalCommonGenerator.h"
@@ -75,6 +79,7 @@ cmFastbuildNormalTargetGenerator::cmFastbuildNormalTargetGenerator(
       this->GeneratorTarget->GetObjectDirectory(Config)))
   , Languages(GetLanguages())
   , CompileObjectCmakeRules(GetCompileObjectCommand())
+  , CudaCompileMode(this->GetCudaCompileMode())
 {
 
   LogMessage(cmStrCat("objectOutDir: ", ObjectOutDir));
@@ -143,6 +148,25 @@ std::string cmFastbuildNormalTargetGenerator::DetectCompilerFlags(
   return compileFlags;
 }
 
+void cmFastbuildNormalTargetGenerator::SplitLinkerFromArgs(
+  std::string const& command, std::string& outLinkerExecutable,
+  std::string& outLinkerArgs) const
+{
+#ifdef _WIN32
+  std::vector<std::string> args;
+  std::string tmp;
+  cmSystemTools::SplitProgramFromArgs(command, tmp, outLinkerArgs);
+  // cmLocalGenerator::GetStaticLibraryFlags seems to add empty quotes when
+  // appending "STATIC_LIBRARY_FLAGS_DEBUG"...
+  cmSystemTools::ReplaceString(outLinkerArgs, "\"\"", "");
+  cmSystemTools::ParseWindowsCommandLine(command.c_str(), args);
+  outLinkerExecutable = std::move(args[0]);
+#else
+  cmSystemTools::SplitProgramFromArgs(command, outLinkerExecutable,
+                                      outLinkerArgs);
+#endif
+}
+
 void cmFastbuildNormalTargetGenerator::GetLinkerExecutableAndArgs(
   std::string const& command, std::string& outLinkerExecutable,
   std::string& outLinkerArgs)
@@ -163,19 +187,7 @@ void cmFastbuildNormalTargetGenerator::GetLinkerExecutableAndArgs(
     outLinkerExecutable = iter->second.Executable;
     outLinkerArgs = cmStrCat(iter->second.Args, " ", command);
   } else {
-#ifdef _WIN32
-    std::vector<std::string> args;
-    std::string tmp;
-    cmSystemTools::SplitProgramFromArgs(command, tmp, outLinkerArgs);
-    // cmLocalGenerator::GetStaticLibraryFlags seems to add empty quotes when
-    // appending "STATIC_LIBRARY_FLAGS_DEBUG"...
-    cmSystemTools::ReplaceString(outLinkerArgs, "\"\"", "");
-    cmSystemTools::ParseWindowsCommandLine(command.c_str(), args);
-    outLinkerExecutable = std::move(args[0]);
-#else
-    cmSystemTools::SplitProgramFromArgs(command, outLinkerExecutable,
-                                        outLinkerArgs);
-#endif
+    SplitLinkerFromArgs(command, outLinkerExecutable, outLinkerArgs);
   }
   LogMessage("Linker Exe: " + outLinkerExecutable);
   LogMessage("Linker args: " + outLinkerArgs);
@@ -656,21 +668,10 @@ cmFastbuildNormalTargetGenerator::GetCompileObjectCommand() const
   for (std::string const& lang : Languages) {
     std::vector<std::string> commands;
     std::string cmakeVar;
-    if (lang == "CUDA") {
-      if (this->GeneratorTarget->GetPropertyAsBool(
-            "CUDA_SEPARABLE_COMPILATION")) {
-        cmakeVar = "CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION";
-      } else if (this->GeneratorTarget->GetPropertyAsBool(
-                   "CUDA_PTX_COMPILATION")) {
-        cmakeVar = "CMAKE_CUDA_COMPILE_PTX_COMPILATION";
-      } else {
-        cmakeVar = "CMAKE_CUDA_COMPILE_WHOLE_COMPILATION";
-      }
-    } else {
-      cmakeVar = "CMAKE_";
-      cmakeVar += lang;
-      cmakeVar += "_COMPILE_OBJECT";
-    }
+    cmakeVar = "CMAKE_";
+    cmakeVar += lang;
+    cmakeVar += "_COMPILE_OBJECT";
+
     std::string cmakeValue =
       LocalCommonGenerator->GetMakefile()->GetSafeDefinition(cmakeVar);
 
@@ -680,6 +681,39 @@ cmFastbuildNormalTargetGenerator::GetCompileObjectCommand() const
   }
   return result;
 }
+std::string cmFastbuildNormalTargetGenerator::GetCudaCompileMode() const
+{
+  if (Languages.find("CUDA") == Languages.end()) {
+    return {};
+  }
+  // TODO: unify it with makefile / ninja generators.
+  std::string cudaCompileMode;
+  if (this->GeneratorTarget->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION")) {
+    std::string const& rdcFlag =
+      this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG");
+    cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, ' ');
+  }
+  static std::array<cm::string_view, 4> const compileModes{
+    { "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s }
+  };
+  bool useNormalCompileMode = true;
+  for (cm::string_view mode : compileModes) {
+    auto propName = cmStrCat("CUDA_", mode, "_COMPILATION");
+    auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG");
+    if (this->GeneratorTarget->GetPropertyAsBool(propName)) {
+      std::string const& flag = this->Makefile->GetRequiredDefinition(defName);
+      cudaCompileMode = cmStrCat(cudaCompileMode, flag);
+      useNormalCompileMode = false;
+      break;
+    }
+  }
+  if (useNormalCompileMode) {
+    std::string const& wholeFlag =
+      this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG");
+    cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag);
+  }
+  return cudaCompileMode;
+}
 
 std::string cmFastbuildNormalTargetGenerator::GetLinkCommand() const
 {
@@ -851,6 +885,8 @@ void cmFastbuildNormalTargetGenerator::Generate()
   std::vector<std::string> objectDepends;
   AddObjectDependencies(fastbuildTarget, objectDepends);
 
+  GenerateCudaDeviceLink(fastbuildTarget);
+
   GenerateLink(fastbuildTarget, objectDepends);
 
   if (fastbuildTarget.LinkerNode.size() > 1) {
@@ -1134,7 +1170,7 @@ cmFastbuildNormalTargetGenerator::ComputeRuleVariables() const
   compileObjectVars.Source = FASTBUILD_1_INPUT_PLACEHOLDER;
   compileObjectVars.Object = FASTBUILD_2_INPUT_PLACEHOLDER;
   compileObjectVars.ObjectDir =
-    FASTBUILD_DOLLAR_TAG "TargetOutputDir" FASTBUILD_DOLLAR_TAG;
+    FASTBUILD_DOLLAR_TAG "TargetOutDir" FASTBUILD_DOLLAR_TAG;
   compileObjectVars.ObjectFileDir = "";
   compileObjectVars.Flags = "";
   compileObjectVars.Includes = "";
@@ -1196,6 +1232,9 @@ std::string cmFastbuildNormalTargetGenerator::GetCompileOptions(
   compileObjectVars.Flags = compilerFlags.c_str();
   compileObjectVars.Defines = compilerDefines.c_str();
   compileObjectVars.Language = language.c_str();
+  if (language == "CUDA") {
+    compileObjectVars.CudaCompileMode = this->CudaCompileMode.c_str();
+  }
 
   std::string rule = CompileObjectCmakeRules.at(language);
   RulePlaceholderExpander->ExpandRuleVariables(LocalCommonGenerator, rule,
@@ -1227,6 +1266,80 @@ std::vector<std::string> cmFastbuildNormalTargetGenerator::GetArches() const
   return arches;
 }
 
+void cmFastbuildNormalTargetGenerator::GetCudaDeviceLinkLinkerAndArgs(
+  std::string& linker, std::string& args) const
+{
+  std::string linkCmd =
+    this->GetMakefile()->GetDefinition("CMAKE_CUDA_DEVICE_LINK_"
+                                       "LIBRARY");
+  auto vars = ComputeRuleVariables();
+  vars.Language = "CUDA";
+  vars.Objects = FASTBUILD_1_INPUT_PLACEHOLDER;
+  vars.Target = FASTBUILD_2_INPUT_PLACEHOLDER;
+  std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
+    new cmLinkLineDeviceComputer(
+      this->LocalGenerator,
+      this->LocalGenerator->GetStateSnapshot().GetDirectory()));
+  std::string linkLibs;
+  std::string targetFlags;
+  std::string linkFlags;
+  std::string frameworkPath;
+  std::string linkPath;
+  // So that the call to "GetTargetFlags" does not pollute "LinkLibs" and
+  // "LinkFlags" with unneeded values.
+  std::string dummyLinkLibs;
+  std::string dummyLinkFlags;
+  this->LocalCommonGenerator->GetDeviceLinkFlags(
+    *linkLineComputer, Config, linkLibs, linkFlags, frameworkPath, linkPath,
+    this->GeneratorTarget);
+  this->LocalCommonGenerator->GetTargetFlags(
+    linkLineComputer.get(), Config, dummyLinkLibs, targetFlags, dummyLinkFlags,
+    frameworkPath, linkPath, this->GeneratorTarget);
+  vars.LanguageCompileFlags = "";
+  vars.LinkFlags = linkFlags.c_str();
+  vars.LinkLibraries = linkLibs.c_str();
+  vars.LanguageCompileFlags = targetFlags.c_str();
+  this->RulePlaceholderExpander->ExpandRuleVariables(this->GetLocalGenerator(),
+                                                     linkCmd, vars);
+  SplitLinkerFromArgs(linkCmd, linker, args);
+}
+
+void cmFastbuildNormalTargetGenerator::GenerateCudaDeviceLink(
+  FastbuildTarget& target) const
+{
+  auto const arches = this->GetArches();
+  if (!requireDeviceLinking(*this->GeneratorTarget, *this->GetLocalGenerator(),
+                            Config)) {
+    return;
+  }
+  LogMessage("GenerateCudaDeviceLink(...)");
+  for (auto const& arch : arches) {
+    std::string linker;
+    std::string args;
+    GetCudaDeviceLinkLinkerAndArgs(linker, args);
+
+    FastbuildLinkerNode deviceLinkNode;
+    deviceLinkNode.Name = cmStrCat(target.Name, "_cuda_device_link");
+    deviceLinkNode.Type = FastbuildLinkerNode::SHARED_LIBRARY;
+    deviceLinkNode.Linker = std::move(linker);
+    deviceLinkNode.LinkerOptions = std::move(args);
+    // Output
+    deviceLinkNode.LinkerOutput = this->ConvertToFastbuildPath(cmStrCat(
+      FASTBUILD_DOLLAR_TAG "TargetOutDi"
+                           "r" FASTBUILD_DOLLAR_TAG "/cmake_device_link",
+      (args.empty() ? "" : "_" + arch),
+      this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_"
+                                        "EXTENSION")));
+
+    // Input
+    for (auto const& objList : target.ObjectListNodes) {
+      deviceLinkNode.LibrarianAdditionalInputs.push_back(objList.Name);
+    }
+    target.CudaDeviceLinkNode.emplace_back(std::move(deviceLinkNode));
+  }
+  LogMessage("GenerateCudaDeviceLink end");
+}
+
 void cmFastbuildNormalTargetGenerator::GenerateObjects(FastbuildTarget& target)
 {
   this->GetGlobalGenerator()->AllFoldersToClean.insert(ObjectOutDir);
@@ -1360,9 +1473,16 @@ void cmFastbuildNormalTargetGenerator::GenerateObjects(FastbuildTarget& target)
           objectListNode.CompilerOutputExtension = cmStrCat('.', arch);
           objectListNode.arch = arch;
         }
+        char const* customExt =
+          this->GeneratorTarget->GetCustomObjectExtension();
+
         objectListNode.CompilerOutputExtension +=
           this->GetMakefile()->GetSafeDefinition(
             cmStrCat("CMAKE_", language, "_OUTPUT_EXTENSION"));
+        // Tested in "CudaOnly.ExportPTX" test.
+        if (customExt) {
+          objectListNode.CompilerOutputExtension += customExt;
+        }
       }
     }
   }
@@ -1907,7 +2027,8 @@ void cmFastbuildNormalTargetGenerator::AppendDirectObjectLibs(
 }
 
 void cmFastbuildNormalTargetGenerator::AppendLinkDeps(
-  std::set<FastbuildTargetDep>& preBuildDeps, FastbuildLinkerNode& linkerNode)
+  std::set<FastbuildTargetDep>& preBuildDeps, FastbuildLinkerNode& linkerNode,
+  FastbuildLinkerNode& cudaDeviceLinkLinkerNode)
 {
   std::set<std::string> linkedObjects;
   cmComputeLinkInformation const* linkInfo =
@@ -1921,6 +2042,8 @@ void cmFastbuildNormalTargetGenerator::AppendLinkDeps(
   // Object libs that are linked directly to target (e.g.
   // add_executable(test_exe archiveObjs)
   AppendDirectObjectLibs(linkerNode, linkedObjects);
+  std::size_t numberOfDirectlyLinkedObjects =
+    linkerNode.LibrarianAdditionalInputs.size();
   // target_link_libraries.
   cmComputeLinkInformation::ItemVector const items = linkInfo->GetItems();
 
@@ -1955,12 +2078,30 @@ void cmFastbuildNormalTargetGenerator::AppendLinkDeps(
     else if (item.Target) {
       AppendTargetDep(linkerNode, linkedObjects, item);
       AppendPrebuildDeps(linkerNode, item);
+      if (!item.Target->IsImported() &&
+          item.Target->GetType() == cmStateEnums::OBJECT_LIBRARY) {
+        ++numberOfDirectlyLinkedObjects;
+        cudaDeviceLinkLinkerNode.LibrarianAdditionalInputs.emplace_back(
+          cmStrCat(item.Target->GetName(), FASTBUILD_OBJECTS_ALIAS_POSTFIX));
+      }
+
     } else {
       AppendCommandLineDep(linkerNode, item);
       UsingCommandLine = true;
     }
   }
   AppendExternalObject(linkerNode, linkedObjects);
+
+  if (!cudaDeviceLinkLinkerNode.Name.empty()) {
+    linkerNode.LibrarianAdditionalInputs.push_back(
+      cudaDeviceLinkLinkerNode.Name);
+    // CUDA device-link stub needs to go AFTER direct object dependencies, but
+    // BEFORE all other dependencies. Needed for the correct left-to-right
+    // symbols resolution on Linux.
+    std::swap(
+      linkerNode.LibrarianAdditionalInputs[numberOfDirectlyLinkedObjects],
+      linkerNode.LibrarianAdditionalInputs.back());
+  }
 }
 
 void cmFastbuildNormalTargetGenerator::AddLipoCommand(FastbuildTarget& target)
@@ -2109,7 +2250,12 @@ void cmFastbuildNormalTargetGenerator::GenerateLink(
     linkerNode.LinkerType = linkerType;
     linkerNode.LinkerOptions += linkerOptions;
 
-    AppendLinkDeps(target.PreBuildDependencies, linkerNode);
+    // Check if we have CUDA device link stub for this target.
+    FastbuildLinkerNode dummyCudaDeviceLinkNode;
+    AppendLinkDeps(target.PreBuildDependencies, linkerNode,
+                   target.CudaDeviceLinkNode.size() > i
+                     ? target.CudaDeviceLinkNode[i]
+                     : dummyCudaDeviceLinkNode);
     ApplyLWYUToLinkerCommand(linkerNode);
 
     // On macOS, only the last LinkerNode performs lipo in POST_BUILD.

+ 10 - 1
Source/cmFastbuildNormalTargetGenerator.h

@@ -23,6 +23,7 @@ class cmFastbuildNormalTargetGenerator : public cmFastbuildTargetGenerator
   std::string const ObjectOutDir;
   std::set<std::string> const Languages;
   std::unordered_map<std::string, std::string> const CompileObjectCmakeRules;
+  std::string const CudaCompileMode;
 
   // Now we're adding our link deps to command line and using .Libraries2 for
   // tracking deps.
@@ -50,6 +51,7 @@ private:
   // Example return value: {"CXX" : "<CMAKE_CXX_COMPILER> <DEFINES> <INCLUDES>
   // <FLAGS> -o <OBJECT> -c <SOURCE>" }
   std::unordered_map<std::string, std::string> GetCompileObjectCommand() const;
+  std::string GetCudaCompileMode() const;
   std::string GetLinkCommand() const;
 
   void AddCompilerLaunchersForLanguages();
@@ -70,6 +72,9 @@ private:
 
   std::vector<std::string> GetArches() const;
 
+  void GetCudaDeviceLinkLinkerAndArgs(std::string& linker,
+                                      std::string& args) const;
+  void GenerateCudaDeviceLink(FastbuildTarget& target) const;
   void GenerateObjects(FastbuildTarget& target);
   FastbuildUnityNode GetOneUnity(std::set<std::string> const& isolatedFiles,
                                  std::vector<std::string>& files,
@@ -117,7 +122,8 @@ private:
                               std::set<std::string>& linkedObjects);
 
   void AppendLinkDeps(std::set<FastbuildTargetDep>& preBuildDeps,
-                      FastbuildLinkerNode& linkerNode);
+                      FastbuildLinkerNode& linkerNode,
+                      FastbuildLinkerNode& cudaDeviceLinkLinkerNode);
   void AddLipoCommand(FastbuildTarget& target);
   void GenerateModuleDefinitionInfo(FastbuildTarget& target) const;
   std::vector<FastbuildExecNode> GetSymlinkExecs() const;
@@ -133,6 +139,9 @@ private:
   std::string DetectCompilerFlags(cmSourceFile const& srcFile,
                                   std::string const& arch);
 
+  void SplitLinkerFromArgs(std::string const& command,
+                           std::string& outLinkerExecutable,
+                           std::string& outLinkerArgs) const;
   void GetLinkerExecutableAndArgs(std::string const& command,
                                   std::string& outLinkerExecutable,
                                   std::string& outLinkerArgs);

+ 3 - 12
Source/cmGlobalFastbuildGenerator.cxx

@@ -297,18 +297,6 @@ cmGlobalFastbuildGenerator::NewFactory()
     new cmGlobalGeneratorSimpleFactory<cmGlobalFastbuildGenerator>());
 }
 
-bool cmGlobalFastbuildGenerator::CheckLanguages(
-  std::vector<std::string> const& languages, cmMakefile* mf) const
-{
-  if (std::find(languages.begin(), languages.end(), "CUDA") !=
-      languages.end()) {
-    mf->IssueMessage(MessageType::FATAL_ERROR,
-                     "The FASTBuild generator does not support CUDA yet.");
-    return false;
-  }
-  return true;
-}
-
 void cmGlobalFastbuildGenerator::EnableLanguage(
   std::vector<std::string> const& lang, cmMakefile* mf, bool optional)
 {
@@ -1418,6 +1406,9 @@ void cmGlobalFastbuildGenerator::WriteTarget(FastbuildTarget const& target)
 
   // Libraries / executables.
   if (!target.LinkerNode.empty()) {
+    for (auto const& cudaDeviceLinkNode : target.CudaDeviceLinkNode) {
+      this->WriteLinker(cudaDeviceLinkNode, target.AllowDistribution);
+    }
     for (auto const& linkerNode : target.LinkerNode) {
       this->WriteLinker(linkerNode, target.AllowDistribution);
     }

+ 2 - 3
Source/cmGlobalFastbuildGenerator.h

@@ -320,7 +320,8 @@ struct FastbuildTarget : public FastbuildTargetBase
   std::map<std::string, std::string> Variables;
   std::vector<FastbuildObjectListNode> ObjectListNodes;
   std::vector<FastbuildUnityNode> UnityNodes;
-  // Potentially multiple libs for different archs (apple only);
+  // Potentially multiple libs for different archs (apple only)
+  std::vector<FastbuildLinkerNode> CudaDeviceLinkNode;
   std::vector<FastbuildLinkerNode> LinkerNode;
   std::string RealOutput;
   FastbuildAliasNode PreBuildExecNodes, ExecNodes;
@@ -355,8 +356,6 @@ public:
 
   bool FindMakeProgram(cmMakefile* mf) override;
 
-  bool CheckLanguages(std::vector<std::string> const& languages,
-                      cmMakefile* mf) const override;
   void EnableLanguage(std::vector<std::string> const& lang, cmMakefile* mf,
                       bool optional) override;
 

+ 15 - 2
Source/cmLocalFastbuildGenerator.cxx

@@ -12,8 +12,10 @@
 #include "cmGeneratorTarget.h"
 #include "cmGlobalFastbuildGenerator.h"
 #include "cmList.h"
+#include "cmLocalCommonGenerator.h"
 #include "cmMakefile.h"
 #include "cmObjectLocation.h"
+#include "cmStringAlgorithms.h"
 #include "cmSystemTools.h"
 #include "cmValue.h"
 #include "cmake.h"
@@ -63,12 +65,23 @@ void cmLocalFastbuildGenerator::ComputeObjectFilenames(
   std::map<cmSourceFile const*, cmObjectLocations>& mapping,
   std::string const& config, cmGeneratorTarget const* gt)
 {
+  char const* customExt = gt->GetCustomObjectExtension();
   for (auto& si : mapping) {
     cmSourceFile const* sf = si.first;
-    si.second.LongLoc =
-      this->GetObjectFileNameWithoutTarget(*sf, gt->ObjectDirectory);
+    si.second.LongLoc = this->GetObjectFileNameWithoutTarget(
+      *sf, gt->ObjectDirectory, nullptr, nullptr);
     this->FillCustomInstallObjectLocations(*sf, config, nullptr,
                                            si.second.InstallLongLoc);
+    // FASTBuild always appends output extension to the source file name.
+    // So if custom ext is ".ptx", then
+    // "kernelA.cu" will be outputted as "kernelA.cu.ptx",
+    // that's why we can't just replace ".cu" with ".ptx".
+    // This is needed to resolve $<TARGET_OBJECTS> genex correctly.
+    // Tested in "CudaOnly.ExportPTX" test.
+    if (customExt) {
+      si.second.LongLoc.Update(
+        cmStrCat(si.second.LongLoc.GetPath(), customExt));
+    }
   }
 }
 

+ 13 - 1
Tests/CudaOnly/ExportPTX/CMakeLists.txt

@@ -51,6 +51,18 @@ if(NOT bin_to_c)
     )
 endif()
 
+# FASTBuild has a bug where Exec nodes can't depend
+# on files produced by ObjectListNode... So, depend on the
+# target directly.
+# https://github.com/fastbuild/fastbuild/issues/1115
+
+set(DEP "")
+if (CMAKE_GENERATOR MATCHES "FASTBuild")
+  set(DEP "CudaPTX")
+else()
+  set(DEP "$<TARGET_OBJECTS:CudaPTX>")
+endif()
+
 add_custom_command(
   OUTPUT "${output_file}"
   COMMAND ${CMAKE_COMMAND}
@@ -59,7 +71,7 @@ add_custom_command(
     "-DOUTPUT=${output_file}"
     -P ${CMAKE_CURRENT_SOURCE_DIR}/../utils/bin2c_wrapper.cmake
   VERBATIM
-  DEPENDS $<TARGET_OBJECTS:CudaPTX>
+  DEPENDS ${DEP}
   COMMENT "Converting Object files to a C header"
   )
 

+ 13 - 1
Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt

@@ -28,6 +28,18 @@ if(NOT bin_to_c)
     )
 endif()
 
+# FASTBuild has a bug where Exec nodes can't depend
+# on files produced by ObjectListNode... So, depend on the
+# target directly.
+# https://github.com/fastbuild/fastbuild/issues/1115
+
+set(DEP "")
+if (CMAKE_GENERATOR MATCHES "FASTBuild")
+  set(DEP "CudaPTX")
+else()
+  set(DEP "$<TARGET_OBJECTS:CudaPTX>")
+endif()
+
 add_custom_command(
   OUTPUT "${output_file}"
   COMMAND ${CMAKE_COMMAND}
@@ -36,7 +48,7 @@ add_custom_command(
     "-DOUTPUT=${output_file}"
     -P ${CMAKE_CURRENT_SOURCE_DIR}/../utils/bin2c_wrapper.cmake
   VERBATIM
-  DEPENDS $<TARGET_OBJECTS:CudaPTX>
+  DEPENDS ${DEP}
   COMMENT "Converting Object files to a C header"
   )
 

+ 0 - 1
Tests/RunCMake/FASTBuild/CUDA-result.txt

@@ -1 +0,0 @@
-1

+ 0 - 4
Tests/RunCMake/FASTBuild/CUDA-stderr.txt

@@ -1,4 +0,0 @@
-^CMake Error at CUDA\.cmake:1 \(enable_language\):
-  The FASTBuild generator does not support CUDA yet\.
-Call Stack \(most recent call first\):
-  CMakeLists\.txt:[0-9]+ \(include\)

+ 0 - 1
Tests/RunCMake/FASTBuild/CUDA.cmake

@@ -1 +0,0 @@
-enable_language(CUDA)

+ 0 - 2
Tests/RunCMake/FASTBuild/RunCMakeTest.cmake

@@ -1,7 +1,5 @@
 include(RunCMake)
 
-run_cmake(CUDA)
-
 # Unity of size 1 doesn't make sense and shouldn't be created.
 run_cmake(Unity1)
 run_cmake(Unity2)