ソースを参照

CUDA/Clang: Simplify --register-link-binaries logic

Move the logic for appending cubin afterwards, so the check can simply be
empty().
With the Makefile generator the option is now at the front instead of being
intermixed with the actual bins.
root 4 年 前
コミット
3975678fcc
2 ファイル変更15 行追加15 行削除
  1. 10 10
      Source/cmMakefileTargetGenerator.cxx
  2. 5 5
      Source/cmNinjaNormalTargetGenerator.cxx

+ 10 - 10
Source/cmMakefileTargetGenerator.cxx

@@ -1519,22 +1519,13 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
 
 
   // Link device code for each architecture.
   // Link device code for each architecture.
   for (const std::string& architectureKind : architectures) {
   for (const std::string& architectureKind : architectures) {
-    // Clang always generates real code, so strip the specifier.
-    const std::string architecture =
-      architectureKind.substr(0, architectureKind.find('-'));
-    const std::string cubin =
-      cmStrCat(objectDir, "sm_", architecture, ".cubin");
-
-    profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
-    fatbinaryDepends.emplace_back(cubin);
-
     std::string registerFileCmd;
     std::string registerFileCmd;
 
 
     // The generated register file contains macros that when expanded
     // The generated register file contains macros that when expanded
     // register the device routines. Because the routines are the same for
     // register the device routines. Because the routines are the same for
     // all architectures the register file will be the same too. Thus
     // all architectures the register file will be the same too. Thus
     // generate it only on the first invocation to reduce overhead.
     // generate it only on the first invocation to reduce overhead.
-    if (fatbinaryDepends.size() == 1) {
+    if (fatbinaryDepends.empty()) {
       std::string const registerFileRel =
       std::string const registerFileRel =
         cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
         cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
       registerFileCmd =
       registerFileCmd =
@@ -1542,6 +1533,15 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
       cleanFiles.push_back(registerFileRel);
       cleanFiles.push_back(registerFileRel);
     }
     }
 
 
+    // Clang always generates real code, so strip the specifier.
+    const std::string architecture =
+      architectureKind.substr(0, architectureKind.find('-'));
+    const std::string cubin =
+      cmStrCat(objectDir, "sm_", architecture, ".cubin");
+
+    profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
+    fatbinaryDepends.emplace_back(cubin);
+
     std::string command = cmStrCat(
     std::string command = cmStrCat(
       this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
       this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
       " -arch=sm_", architecture, registerFileCmd, " -o=$@ ",
       " -arch=sm_", architecture, registerFileCmd, " -o=$@ ",

+ 5 - 5
Source/cmNinjaNormalTargetGenerator.cxx

@@ -753,10 +753,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
     const std::string cubin =
     const std::string cubin =
       cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin");
       cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin");
 
 
-    fatbinary.Variables["PROFILES"] +=
-      cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
-    fatbinary.ExplicitDeps.emplace_back(cubin);
-
     cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config));
     cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config));
     dlink.ExplicitDeps = explicitDeps;
     dlink.ExplicitDeps = explicitDeps;
     dlink.Outputs = { cubin };
     dlink.Outputs = { cubin };
@@ -766,11 +762,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
     // the device routines. Because the routines are the same for all
     // the device routines. Because the routines are the same for all
     // architectures the register file will be the same too. Thus generate it
     // architectures the register file will be the same too. Thus generate it
     // only on the first invocation to reduce overhead.
     // only on the first invocation to reduce overhead.
-    if (fatbinary.ExplicitDeps.size() == 1) {
+    if (fatbinary.ExplicitDeps.empty()) {
       dlink.Variables["REGISTER"] = cmStrCat(
       dlink.Variables["REGISTER"] = cmStrCat(
         "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h");
         "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h");
     }
     }
 
 
+    fatbinary.Variables["PROFILES"] +=
+      cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
+    fatbinary.ExplicitDeps.emplace_back(cubin);
+
     this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink);
     this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink);
   }
   }