Pārlūkot izejas kodu

Merge topic 'enable_ptx_compilation'

23691d78 CUDA: Allow sources to be compiled to .ptx files

Acked-by: Kitware Robot <[email protected]>
Merge-request: !725
Brad King 8 gadi atpakaļ
vecāks
revīzija
1d8f0f9181

+ 1 - 0
Help/manual/cmake-properties.7.rst

@@ -152,6 +152,7 @@ Properties on Targets
    /prop_tgt/CONFIG_OUTPUT_NAME
    /prop_tgt/CONFIG_POSTFIX
    /prop_tgt/CROSSCOMPILING_EMULATOR
+   /prop_tgt/CUDA_PTX_COMPILATION
    /prop_tgt/CUDA_SEPARABLE_COMPILATION
    /prop_tgt/CUDA_EXTENSIONS
    /prop_tgt/CUDA_STANDARD

+ 12 - 0
Help/prop_tgt/CUDA_PTX_COMPILATION.rst

@@ -0,0 +1,12 @@
+CUDA_PTX_COMPILATION
+--------------------
+
+Compile CUDA sources to ``.ptx`` files instead of ``.obj`` files
+within :ref:`Object Libraries`.
+
+For example:
+
+.. code-block:: cmake
+
+  add_library(myptx OBJECT a.cu b.cu)
+  set_property(TARGET myptx PROPERTY CUDA_PTX_COMPILATION ON)

+ 6 - 0
Help/release/dev/enable_ptx_compilation.rst

@@ -0,0 +1,6 @@
+enable_ptx_compilation
+----------------------
+
+* The :prop_tgt:`CUDA_PTX_COMPILATION` target property was added to
+  :ref:`Object Libraries` to support compiling to ``.ptx`` files
+  instead of host object files.

+ 12 - 0
Source/cmGeneratorTarget.cxx

@@ -536,6 +536,18 @@ const std::string& cmGeneratorTarget::GetObjectName(cmSourceFile const* file)
   return this->Objects[file];
 }
 
+const char* cmGeneratorTarget::GetCustomObjectExtension() const
+{
+  static std::string extension;
+  const bool has_ptx_extension =
+    this->GetPropertyAsBool("CUDA_PTX_COMPILATION");
+  if (has_ptx_extension) {
+    extension = ".ptx";
+    return extension.c_str();
+  }
+  return CM_NULLPTR;
+}
+
 void cmGeneratorTarget::AddExplicitObjectName(cmSourceFile const* sf)
 {
   this->ExplicitObjectName.insert(sf);

+ 1 - 0
Source/cmGeneratorTarget.h

@@ -124,6 +124,7 @@ public:
   void GetObjectSources(std::vector<cmSourceFile const*>&,
                         const std::string& config) const;
   const std::string& GetObjectName(cmSourceFile const* file);
+  const char* GetCustomObjectExtension() const;
 
   bool HasExplicitObjectName(cmSourceFile const* file) const;
   void AddExplicitObjectName(cmSourceFile const* sf);

+ 7 - 3
Source/cmLocalGenerator.cxx

@@ -2162,7 +2162,7 @@ bool cmLocalGenerator::IsNMake() const
 
 std::string cmLocalGenerator::GetObjectFileNameWithoutTarget(
   const cmSourceFile& source, std::string const& dir_max,
-  bool* hasSourceExtension)
+  bool* hasSourceExtension, char const* customOutputExtension)
 {
   // Construct the object file name using the full path to the source
   // file which is its only unique identification.
@@ -2223,7 +2223,7 @@ std::string cmLocalGenerator::GetObjectFileNameWithoutTarget(
     }
 
     // Remove the source extension if it is to be replaced.
-    if (replaceExt) {
+    if (replaceExt || customOutputExtension) {
       keptSourceExtension = false;
       std::string::size_type dot_pos = objectName.rfind('.');
       if (dot_pos != std::string::npos) {
@@ -2232,7 +2232,11 @@ std::string cmLocalGenerator::GetObjectFileNameWithoutTarget(
     }
 
     // Store the new extension.
-    objectName += this->GlobalGenerator->GetLanguageOutputExtension(source);
+    if (customOutputExtension) {
+      objectName += customOutputExtension;
+    } else {
+      objectName += this->GlobalGenerator->GetLanguageOutputExtension(source);
+    }
   }
   if (hasSourceExtension) {
     *hasSourceExtension = keptSourceExtension;

+ 2 - 1
Source/cmLocalGenerator.h

@@ -273,7 +273,8 @@ public:
   // Compute object file names.
   std::string GetObjectFileNameWithoutTarget(
     const cmSourceFile& source, std::string const& dir_max,
-    bool* hasSourceExtension = CM_NULLPTR);
+    bool* hasSourceExtension = CM_NULLPTR,
+    char const* customOutputExtension = CM_NULLPTR);
 
   /** Fill out the static linker flags for the given target.  */
   void GetStaticLibraryFlags(std::string& flags, std::string const& config,

+ 5 - 2
Source/cmLocalNinjaGenerator.cxx

@@ -249,12 +249,15 @@ void cmLocalNinjaGenerator::ComputeObjectFilenames(
   std::map<cmSourceFile const*, std::string>& mapping,
   cmGeneratorTarget const* gt)
 {
+  // Determine if these object files should use a custom extension
+  char const* custom_ext = gt->GetCustomObjectExtension();
   for (std::map<cmSourceFile const*, std::string>::iterator si =
          mapping.begin();
        si != mapping.end(); ++si) {
     cmSourceFile const* sf = si->first;
-    si->second =
-      this->GetObjectFileNameWithoutTarget(*sf, gt->ObjectDirectory);
+    bool keptSourceExtension;
+    si->second = this->GetObjectFileNameWithoutTarget(
+      *sf, gt->ObjectDirectory, &keptSourceExtension, custom_ext);
   }
 }
 

+ 5 - 2
Source/cmLocalUnixMakefileGenerator3.cxx

@@ -159,12 +159,15 @@ void cmLocalUnixMakefileGenerator3::ComputeObjectFilenames(
   std::map<cmSourceFile const*, std::string>& mapping,
   cmGeneratorTarget const* gt)
 {
+  // Determine if these object files should use a custom extension
+  char const* custom_ext = gt->GetCustomObjectExtension();
   for (std::map<cmSourceFile const*, std::string>::iterator si =
          mapping.begin();
        si != mapping.end(); ++si) {
     cmSourceFile const* sf = si->first;
-    si->second =
-      this->GetObjectFileNameWithoutTarget(*sf, gt->ObjectDirectory);
+    bool keptSourceExtension;
+    si->second = this->GetObjectFileNameWithoutTarget(
+      *sf, gt->ObjectDirectory, &keptSourceExtension, custom_ext);
   }
 }
 

+ 15 - 3
Source/cmLocalVisualStudioGenerator.cxx

@@ -32,6 +32,7 @@ void cmLocalVisualStudioGenerator::ComputeObjectFilenames(
   std::map<cmSourceFile const*, std::string>& mapping,
   cmGeneratorTarget const* gt)
 {
+  char const* custom_ext = gt->GetCustomObjectExtension();
   std::string dir_max = this->ComputeLongestObjectDirectory(gt);
 
   // Count the number of object files with each name.  Note that
@@ -44,7 +45,12 @@ void cmLocalVisualStudioGenerator::ComputeObjectFilenames(
     cmSourceFile const* sf = si->first;
     std::string objectNameLower = cmSystemTools::LowerCase(
       cmSystemTools::GetFilenameWithoutLastExtension(sf->GetFullPath()));
-    objectNameLower += this->GlobalGenerator->GetLanguageOutputExtension(*sf);
+    if (custom_ext) {
+      objectNameLower += custom_ext;
+    } else {
+      objectNameLower +=
+        this->GlobalGenerator->GetLanguageOutputExtension(*sf);
+    }
     counts[objectNameLower] += 1;
   }
 
@@ -57,10 +63,16 @@ void cmLocalVisualStudioGenerator::ComputeObjectFilenames(
     cmSourceFile const* sf = si->first;
     std::string objectName =
       cmSystemTools::GetFilenameWithoutLastExtension(sf->GetFullPath());
-    objectName += this->GlobalGenerator->GetLanguageOutputExtension(*sf);
+    if (custom_ext) {
+      objectName += custom_ext;
+    } else {
+      objectName += this->GlobalGenerator->GetLanguageOutputExtension(*sf);
+    }
     if (counts[cmSystemTools::LowerCase(objectName)] > 1) {
       const_cast<cmGeneratorTarget*>(gt)->AddExplicitObjectName(sf);
-      objectName = this->GetObjectFileNameWithoutTarget(*sf, dir_max);
+      bool keptSourceExtension;
+      objectName = this->GetObjectFileNameWithoutTarget(
+        *sf, dir_max, &keptSourceExtension, custom_ext);
     }
     si->second = objectName;
   }

+ 3 - 0
Source/cmMakefileTargetGenerator.cxx

@@ -589,6 +589,9 @@ void cmMakefileTargetGenerator::WriteObjectBuildFile(
       if (this->GeneratorTarget->GetPropertyAsBool(
             "CUDA_SEPARABLE_COMPILATION")) {
         cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION");
+      } else if (this->GeneratorTarget->GetPropertyAsBool(
+                   "CUDA_PTX_COMPILATION")) {
+        cmdVar = std::string("CMAKE_CUDA_COMPILE_PTX_COMPILATION");
       } else {
         cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION");
       }

+ 3 - 0
Source/cmNinjaTargetGenerator.cxx

@@ -589,6 +589,9 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang)
     if (this->GeneratorTarget->GetPropertyAsBool(
           "CUDA_SEPARABLE_COMPILATION")) {
       cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION");
+    } else if (this->GeneratorTarget->GetPropertyAsBool(
+                 "CUDA_PTX_COMPILATION")) {
+      cmdVar = std::string("CMAKE_CUDA_COMPILE_PTX_COMPILATION");
     } else {
       cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION");
     }

+ 8 - 0
Source/cmTarget.cxx

@@ -941,6 +941,14 @@ void cmTarget::SetProperty(const std::string& prop, const char* value)
   } else if (cmHasLiteralPrefix(prop, "IMPORTED_LIBNAME") &&
              !this->CheckImportedLibName(prop, value ? value : "")) {
     /* error was reported by check method */
+  } else if (prop == "CUDA_PTX_COMPILATION" &&
+             this->GetType() != cmStateEnums::OBJECT_LIBRARY) {
+    std::ostringstream e;
+    e << "CUDA_PTX_COMPILATION property can only be applied to OBJECT "
+         "targets (\""
+      << this->Name << "\")\n";
+    this->Makefile->IssueMessage(cmake::FATAL_ERROR, e.str());
+    return;
   } else {
     this->Properties.SetProperty(prop, value);
   }

+ 6 - 0
Source/cmVisualStudio10TargetGenerator.cxx

@@ -2467,6 +2467,12 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
 
   if (this->GeneratorTarget->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION")) {
     cudaOptions.AddFlag("GenerateRelocatableDeviceCode", "true");
+  } else if (this->GeneratorTarget->GetPropertyAsBool(
+               "CUDA_PTX_COMPILATION")) {
+    cudaOptions.AddFlag("NvccCompilation", "ptx");
+    // We drop the %(Extension) component as CMake expects all PTX files
+    // to not have the source file extension at all
+    cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx");
   }
 
   // Convert the host compiler options to the toolset's abstractions

+ 1 - 0
Tests/CudaOnly/CMakeLists.txt

@@ -1,4 +1,5 @@
 
 ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
+ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
 ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
 ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)

+ 82 - 0
Tests/CudaOnly/ExportPTX/CMakeLists.txt

@@ -0,0 +1,82 @@
+cmake_minimum_required(VERSION 3.8)
+project (CudaOnlyExportPTX CUDA)
+
+#Goal for this example:
+# How to generate PTX files instead of OBJECT files
+# How to reference PTX files for custom commands
+# How to install PTX files
+
+add_library(CudaPTX OBJECT kernelA.cu kernelB.cu)
+set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON)
+
+#Test ObjectFiles with file(GENERATE)
+file(GENERATE
+     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/gen_$<LOWER_CASE:$<CONFIG>/>path_to_objs.h
+     CONTENT [[
+
+#include <vector>
+#include <string>
+
+#ifndef path_to_objs
+#define path_to_objs
+
+static std::string ptx_paths = "$<TARGET_OBJECTS:CudaPTX>";
+
+#endif
+
+]]
+)
+#We are going to need a wrapper around bin2c for multiple reasons
+# 1. bin2c only converts a single file at a time
+# 2. bin2c has only standard out support, so we have to manually
+# redirect to a cmake buffer
+# 3. We want to pack everything into a single output file, so we
+# need to also pass the --name option
+set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h)
+
+get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY)
+find_program(bin_to_c
+  NAMES bin2c
+  PATHS ${cuda_compiler_bin}
+  )
+if(NOT bin_to_c)
+  message(FATAL_ERROR
+    "bin2c not found:\n"
+    "  CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n"
+    "  cuda_compiler_bin='${cuda_compiler_bin}'\n"
+    )
+endif()
+
+add_custom_command(
+  OUTPUT "${output_file}"
+  COMMAND ${CMAKE_COMMAND}
+    "-DBIN_TO_C_COMMAND=${bin_to_c}"
+    "-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>"
+    "-DOUTPUT=${output_file}"
+    -P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake
+  VERBATIM
+  DEPENDS $<TARGET_OBJECTS:CudaPTX>
+  COMMENT "Converting Object files to a C header"
+  )
+
+add_executable(CudaOnlyExportPTX main.cu ${output_file})
+add_dependencies(CudaOnlyExportPTX CudaPTX)
+target_include_directories(CudaOnlyExportPTX PRIVATE
+                           ${CMAKE_CURRENT_BINARY_DIR} )
+target_compile_definitions(CudaOnlyExportPTX PRIVATE
+                           "CONFIG_TYPE=gen_$<LOWER_CASE:$<CONFIG>>")
+
+if(APPLE)
+  # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+  # the static cuda runtime can find it at runtime.
+  target_link_libraries(CudaOnlyExportPTX PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
+
+#Verify that we can install object targets properly
+install(TARGETS CudaPTX CudaOnlyExportPTX
+   EXPORT cudaPTX
+   RUNTIME DESTINATION bin
+   LIBRARY DESTINATION lib
+   OBJECTS DESTINATION objs
+   )
+install(EXPORT cudaPTX DESTINATION lib/cudaPTX)

+ 19 - 0
Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake

@@ -0,0 +1,19 @@
+
+set(file_contents)
+foreach(obj ${OBJECTS})
+  get_filename_component(obj_ext ${obj} EXT)
+  get_filename_component(obj_name ${obj} NAME_WE)
+  get_filename_component(obj_dir ${obj} DIRECTORY)
+
+  if(obj_ext MATCHES ".ptx")
+    set(args --name ${obj_name} ${obj})
+    execute_process(COMMAND "${BIN_TO_C_COMMAND}" ${args}
+                    WORKING_DIRECTORY ${obj_dir}
+                    RESULT_VARIABLE result
+                    OUTPUT_VARIABLE output
+                    ERROR_VARIABLE error_var
+                    )
+    set(file_contents "${file_contents} \n${output}")
+  endif()
+endforeach()
+file(WRITE "${OUTPUT}" "${file_contents}")

+ 7 - 0
Tests/CudaOnly/ExportPTX/kernelA.cu

@@ -0,0 +1,7 @@
+
+__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
+{
+  for (int i = threadIdx.x; i < size; i += blockDim.x) {
+    r[i] = x[i] * y[i] + z[i];
+  }
+}

+ 8 - 0
Tests/CudaOnly/ExportPTX/kernelB.cu

@@ -0,0 +1,8 @@
+
+
+__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
+{
+  for (int i = threadIdx.x; i < size; i += blockDim.x) {
+    r[i] = x[i] * y[i] + z[i];
+  }
+}

+ 28 - 0
Tests/CudaOnly/ExportPTX/main.cu

@@ -0,0 +1,28 @@
+
+#include <iostream>
+
+/*
+  Define GENERATED_HEADER macro to allow c++ files to include headers
+  generated based on different configuration types.
+*/
+
+/* clang-format off */
+#define GENERATED_HEADER(x) GENERATED_HEADER0(CONFIG_TYPE/x)
+/* clang-format on */
+#define GENERATED_HEADER0(x) GENERATED_HEADER1(x)
+#define GENERATED_HEADER1(x) <x>
+
+#include GENERATED_HEADER(path_to_objs.h)
+
+#include "embedded_objs.h"
+
+int main(int argc, char** argv)
+{
+  (void)argc;
+  (void)argv;
+
+  unsigned char* ka = kernelA;
+  unsigned char* kb = kernelB;
+
+  return (ka != NULL && kb != NULL) ? 0 : 1;
+}