Browse Source

Merge topic 'cpack-archive-dedup'

770efceccb CPack: Avoid adding duplicate files to archive when combining components

Acked-by: Kitware Robot <[email protected]>
Tested-by: buildbot <[email protected]>
Merge-request: !8839
Brad King 2 years ago
parent
commit
f884a94a0b

+ 146 - 7
Source/CPack/cmCPackArchiveGenerator.cxx

@@ -5,6 +5,8 @@
 #include <cstring>
 #include <map>
 #include <ostream>
+#include <unordered_map>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -17,6 +19,121 @@
 #include "cmValue.h"
 #include "cmWorkingDirectory.h"
 
+enum class DeduplicateStatus
+{
+  Skip,
+  Add,
+  Error
+};
+
+/**
+ * @class cmCPackArchiveGenerator::Deduplicator
+ * @brief A utility class for deduplicating files, folders, and symlinks.
+ *
+ * This class is responsible for identifying duplicate files, folders, and
+ * symlinks when generating an archive. It keeps track of the paths that have
+ * been processed and helps in deciding whether a new path should be added,
+ * skipped, or flagged as an error.
+ */
+class cmCPackArchiveGenerator::Deduplicator
+{
+private:
+  /**
+   * @brief Compares a file with already processed files.
+   *
+   * @param path The path of the file to compare.
+   * @param localTopLevel The top-level directory for the file.
+   * @return DeduplicateStatus indicating whether to add, skip, or flag an
+   * error for the file.
+   */
+  DeduplicateStatus CompareFile(const std::string& path,
+                                const std::string& localTopLevel)
+  {
+    auto fileItr = this->Files.find(path);
+    if (fileItr != this->Files.end()) {
+      return cmSystemTools::FilesDiffer(path, fileItr->second)
+        ? DeduplicateStatus::Error
+        : DeduplicateStatus::Skip;
+    }
+
+    this->Files[path] = cmStrCat(localTopLevel, "/", path);
+    return DeduplicateStatus::Add;
+  }
+
+  /**
+   * @brief Compares a folder with already processed folders.
+   *
+   * @param path The path of the folder to compare.
+   * @return DeduplicateStatus indicating whether to add or skip the folder.
+   */
+  DeduplicateStatus CompareFolder(const std::string& path)
+  {
+    if (this->Folders.find(path) != this->Folders.end()) {
+      return DeduplicateStatus::Skip;
+    }
+
+    this->Folders.emplace(path);
+    return DeduplicateStatus::Add;
+  }
+
+  /**
+   * @brief Compares a symlink with already processed symlinks.
+   *
+   * @param path The path of the symlink to compare.
+   * @return DeduplicateStatus indicating whether to add, skip, or flag an
+   * error for the symlink.
+   */
+  DeduplicateStatus CompareSymlink(const std::string& path)
+  {
+    auto symlinkItr = this->Symlink.find(path);
+    std::string symlinkValue;
+    auto status = cmSystemTools::ReadSymlink(path, symlinkValue);
+    if (!status.IsSuccess()) {
+      return DeduplicateStatus::Error;
+    }
+
+    if (symlinkItr != this->Symlink.end()) {
+      return symlinkValue == symlinkItr->second ? DeduplicateStatus::Skip
+                                                : DeduplicateStatus::Error;
+    }
+
+    this->Symlink[path] = symlinkValue;
+    return DeduplicateStatus::Add;
+  }
+
+public:
+  /**
+   * @brief Determines the deduplication status of a given path.
+   *
+   * This method identifies whether the given path is a file, folder, or
+   * symlink and then delegates to the appropriate comparison method.
+   *
+   * @param path The path to check for deduplication.
+   * @param localTopLevel The top-level directory for the path.
+   * @return DeduplicateStatus indicating the action to take for the given
+   * path.
+   */
+  DeduplicateStatus IsDeduplicate(const std::string& path,
+                                  const std::string& localTopLevel)
+  {
+    DeduplicateStatus status;
+    if (cmSystemTools::FileIsDirectory(path)) {
+      status = this->CompareFolder(path);
+    } else if (cmSystemTools::FileIsSymlink(path)) {
+      status = this->CompareSymlink(path);
+    } else {
+      status = this->CompareFile(path, localTopLevel);
+    }
+
+    return status;
+  }
+
+private:
+  std::unordered_map<std::string, std::string> Symlink;
+  std::unordered_set<std::string> Folders;
+  std::unordered_map<std::string, std::string> Files;
+};
+
 cmCPackGenerator* cmCPackArchiveGenerator::Create7ZGenerator()
 {
   return new cmCPackArchiveGenerator(cmArchiveWrite::CompressNone, "7zip",
@@ -110,7 +227,8 @@ int cmCPackArchiveGenerator::InitializeInternal()
 }
 
 int cmCPackArchiveGenerator::addOneComponentToArchive(
-  cmArchiveWrite& archive, cmCPackComponent* component)
+  cmArchiveWrite& archive, cmCPackComponent* component,
+  Deduplicator* deduplicator)
 {
   cmCPackLogger(cmCPackLog::LOG_VERBOSE,
                 "   - packaging component: " << component->Name << std::endl);
@@ -139,8 +257,25 @@ int cmCPackArchiveGenerator::addOneComponentToArchive(
   }
   for (std::string const& file : component->Files) {
     std::string rp = filePrefix + file;
-    cmCPackLogger(cmCPackLog::LOG_DEBUG, "Adding file: " << rp << std::endl);
-    archive.Add(rp, 0, nullptr, false);
+
+    DeduplicateStatus status = DeduplicateStatus::Add;
+    if (deduplicator != nullptr) {
+      status = deduplicator->IsDeduplicate(rp, localToplevel);
+    }
+
+    if (deduplicator == nullptr || status == DeduplicateStatus::Add) {
+      cmCPackLogger(cmCPackLog::LOG_DEBUG, "Adding file: " << rp << std::endl);
+      archive.Add(rp, 0, nullptr, false);
+    } else if (status == DeduplicateStatus::Error) {
+      cmCPackLogger(cmCPackLog::LOG_ERROR,
+                    "ERROR The data in files with the "
+                    "same filename is different.");
+      return 0;
+    } else {
+      cmCPackLogger(cmCPackLog::LOG_DEBUG,
+                    "Passing file: " << rp << std::endl);
+    }
+
     if (!archive) {
       cmCPackLogger(cmCPackLog::LOG_ERROR,
                     "ERROR while packaging files: " << archive.GetError()
@@ -197,6 +332,8 @@ int cmCPackArchiveGenerator::PackageComponents(bool ignoreGroup)
       std::string packageFileName = std::string(this->toplevel) + "/" +
         this->GetArchiveComponentFileName(compG.first, true);
 
+      Deduplicator deduplicator;
+
       // open a block in order to automatically close archive
       // at the end of the block
       {
@@ -204,7 +341,7 @@ int cmCPackArchiveGenerator::PackageComponents(bool ignoreGroup)
         // now iterate over the component of this group
         for (cmCPackComponent* comp : (compG.second).Components) {
           // Add the files of this component to the archive
-          this->addOneComponentToArchive(archive, comp);
+          this->addOneComponentToArchive(archive, comp, &deduplicator);
         }
       }
       // add the generated package to package file names list
@@ -231,7 +368,7 @@ int cmCPackArchiveGenerator::PackageComponents(bool ignoreGroup)
         {
           DECLARE_AND_OPEN_ARCHIVE(packageFileName, archive);
           // Add the files of this component to the archive
-          this->addOneComponentToArchive(archive, &(comp.second));
+          this->addOneComponentToArchive(archive, &(comp.second), nullptr);
         }
         // add the generated package to package file names list
         this->packageFileNames.push_back(std::move(packageFileName));
@@ -252,7 +389,7 @@ int cmCPackArchiveGenerator::PackageComponents(bool ignoreGroup)
       {
         DECLARE_AND_OPEN_ARCHIVE(packageFileName, archive);
         // Add the files of this component to the archive
-        this->addOneComponentToArchive(archive, &(comp.second));
+        this->addOneComponentToArchive(archive, &(comp.second), nullptr);
       }
       // add the generated package to package file names list
       this->packageFileNames.push_back(std::move(packageFileName));
@@ -282,10 +419,12 @@ int cmCPackArchiveGenerator::PackageComponentsAllInOne()
                   << std::endl);
   DECLARE_AND_OPEN_ARCHIVE(packageFileNames[0], archive);
 
+  Deduplicator deduplicator;
+
   // The ALL COMPONENTS in ONE package case
   for (auto& comp : this->Components) {
     // Add the files of this component to the archive
-    this->addOneComponentToArchive(archive, &(comp.second));
+    this->addOneComponentToArchive(archive, &(comp.second), &deduplicator);
   }
 
   // archive goes out of scope so it will finalized and closed.

+ 5 - 1
Source/CPack/cmCPackArchiveGenerator.h

@@ -47,6 +47,8 @@ private:
   std::string GetArchiveComponentFileName(const std::string& component,
                                           bool isGroupName);
 
+  class Deduplicator;
+
 protected:
   int InitializeInternal() override;
   /**
@@ -54,9 +56,11 @@ protected:
    * to the provided (already opened) archive.
    * @param[in,out] archive the archive object
    * @param[in] component the component whose file will be added to archive
+   * @param[in] deduplicator file deduplicator utility.
    */
   int addOneComponentToArchive(cmArchiveWrite& archive,
-                               cmCPackComponent* component);
+                               cmCPackComponent* component,
+                               Deduplicator* deduplicator);
 
   /**
    * The main package file method.

+ 1 - 0
Tests/RunCMake/CPack/RunCMakeTest.cmake

@@ -69,3 +69,4 @@ run_cpack_test_subtests(
 )
 run_cpack_test(PROJECT_META "RPM.PROJECT_META;DEB.PROJECT_META" false "MONOLITHIC;COMPONENT")
 run_cpack_test_package_target(PRE_POST_SCRIPTS "ZIP" false "MONOLITHIC;COMPONENT")
+run_cpack_test_subtests(DUPLICATE_FILE "success;conflict_file;conflict_symlink" "TGZ" false "COMPONENT;GROUP")

+ 16 - 0
Tests/RunCMake/CPack/tests/DUPLICATE_FILE/ExpectedFiles.cmake

@@ -0,0 +1,16 @@
+if(RunCMake_SUBTEST_SUFFIX STREQUAL "success")
+    if(PACKAGING_TYPE STREQUAL "COMPONENT")
+        set(EXPECTED_FILES_COUNT "1")
+        set(EXPECTED_FILE_CONTENT_1_LIST "/files;/files/1.txt;/files/2.txt;/files/3.txt;/files/4.txt;/files/5.txt;/files/6.txt;/files/7.txt;/files/8.txt;/files/symlink2")
+    elseif(PACKAGING_TYPE STREQUAL "GROUP")
+        set(EXPECTED_FILES_COUNT "3")
+        set(EXPECTED_FILE_1 "duplicate_file-0.1.1-*-g1.${cpack_archive_extension_}")
+        set(EXPECTED_FILE_CONTENT_1_LIST "/files;/files/1.txt;/files/2.txt;/files/3.txt;/files/4.txt;/files/5.txt;/files/6.txt;/files/symlink2")
+        set(EXPECTED_FILE_2 "duplicate_file-0.1.1-*-g2.${cpack_archive_extension_}")
+        set(EXPECTED_FILE_CONTENT_2_LIST "/files;/files/3.txt;/files/4.txt;/files/5.txt;/files/6.txt;/files/7.txt;/files/8.txt")
+        set(EXPECTED_FILE_3 "duplicate_file-0.1.1-*-c5.${cpack_archive_extension_}")
+        set(EXPECTED_FILE_CONTENT_3_LIST "/files;/files/5.txt;/files/6.txt;/files/7.txt;/files/8.txt;/files/9.txt")
+    endif ()
+else()
+    set(EXPECTED_FILES_COUNT "0")
+endif ()

+ 1 - 0
Tests/RunCMake/CPack/tests/DUPLICATE_FILE/conflict_file-stderr.txt

@@ -0,0 +1 @@
+CPack Error: ERROR The data in files with the same filename is different.*

+ 1 - 0
Tests/RunCMake/CPack/tests/DUPLICATE_FILE/conflict_symlink-stderr.txt

@@ -0,0 +1 @@
+CPack Error: ERROR The data in files with the same filename is different.*

+ 74 - 0
Tests/RunCMake/CPack/tests/DUPLICATE_FILE/test.cmake

@@ -0,0 +1,74 @@
+# Create files named 1 to 9
+foreach(i RANGE 1 9)
+    file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/${i}.txt" "This is file ${i}")
+endforeach()
+
+set(COMPONENT_NAMES c1 c2 c3 c4 c5)
+foreach(j RANGE 1 5)
+    # Select 4 file and install to the component
+    math(EXPR COMPONENT_IDX "${j} - 1")
+    list(GET COMPONENT_NAMES "${COMPONENT_IDX}" SELECTED_COMPONENT)
+    math(EXPR END_FILE "${j} + 4")
+    foreach(k RANGE ${j} ${END_FILE})
+        install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${k}.txt" DESTINATION "files" COMPONENT ${SELECTED_COMPONENT})
+    endforeach()
+endforeach()
+
+if(RunCMake_SUBTEST_SUFFIX STREQUAL "conflict_file")
+    file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/conflict/1.txt" "This should create a conflict.")
+    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/conflict/1.txt" DESTINATION "files" COMPONENT c2)
+endif ()
+
+# You cannot create symlink in Windows test environment. Instead mock the symlink.
+if(NOT CMAKE_HOST_WIN32)
+    file(CREATE_LINK "${CMAKE_CURRENT_BINARY_DIR}/2.txt" "${CMAKE_CURRENT_BINARY_DIR}/symlink2" SYMBOLIC)
+else()
+    file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/symlink2" "This is file 2")
+endif()
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/symlink2" DESTINATION "files" COMPONENT c1)
+
+if(RunCMake_SUBTEST_SUFFIX STREQUAL "conflict_symlink" AND NOT CMAKE_HOST_WIN32)
+    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/conflict)
+    file(CREATE_LINK "${CMAKE_CURRENT_BINARY_DIR}/1.txt" "${CMAKE_CURRENT_BINARY_DIR}/conflict/symlink2" SYMBOLIC)
+    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/conflict/symlink2" DESTINATION "files" COMPONENT c2)
+elseif(RunCMake_SUBTEST_SUFFIX STREQUAL "conflict_symlink" AND CMAKE_HOST_WIN32)
+    file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/conflict/symlink2" "This should create a conflict.")
+    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/conflict/symlink2" DESTINATION "files" COMPONENT c2)
+else()
+    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/symlink2" DESTINATION "files" COMPONENT c2)
+endif ()
+
+
+if(PACKAGING_TYPE STREQUAL "COMPONENT")
+    set(CPACK_COMPONENTS_ALL_IN_ONE_PACKAGE ON)
+    set(CPACK_COMPONENTS_ALL "c1;c2;c3;c4")
+elseif(PACKAGING_TYPE STREQUAL "GROUP")
+    set(CPACK_COMPONENTS_ONE_PACKAGE_PER_GROUP ON)
+    set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
+    include(CPackComponent)
+
+    cpack_add_component_group(g1 DISPLAY_NAME "Group 1")
+    cpack_add_component_group(g2 DISPLAY_NAME "Group 2")
+    cpack_add_component(c1
+            DISPLAY_NAME "Group 1"
+            DESCRIPTION "Component for Group 1"
+            GROUP g1
+    )
+    cpack_add_component(c2
+            DISPLAY_NAME "Group 1"
+            DESCRIPTION "Component for Group 1"
+            GROUP g1
+    )
+    cpack_add_component(c3
+            DISPLAY_NAME "Group 2"
+            DESCRIPTION "Component for Group 2"
+            GROUP g2
+    )
+    cpack_add_component(c4
+            DISPLAY_NAME "Group 2"
+            DESCRIPTION "Component for Group 2"
+            GROUP g2
+    )
+
+    set(CPACK_${GENERATOR_TYPE}_PACKAGE_GROUP g1 g2)
+endif ()