Browse Source

CTest: Allocate hardware to tests

Kyle Edwards 6 years ago
parent
commit
e34de0691b

+ 136 - 4
Source/CTest/cmCTestMultiProcessHandler.cxx

@@ -3,8 +3,10 @@
 #include "cmCTestMultiProcessHandler.h"
 
 #include <algorithm>
+#include <cassert>
 #include <chrono>
 #include <cmath>
+#include <cstddef>
 #include <cstdlib>
 #include <cstring>
 #include <iomanip>
@@ -27,6 +29,7 @@
 #include "cmAffinity.h"
 #include "cmAlgorithms.h"
 #include "cmCTest.h"
+#include "cmCTestBinPacker.h"
 #include "cmCTestRunTest.h"
 #include "cmCTestTestHandler.h"
 #include "cmDuration.h"
@@ -133,6 +136,12 @@ void cmCTestMultiProcessHandler::RunTests()
   uv_run(&this->Loop, UV_RUN_DEFAULT);
   uv_loop_close(&this->Loop);
 
+  if (!this->StopTimePassed) {
+    assert(this->Completed == this->Total);
+    assert(this->Tests.empty());
+  }
+  assert(this->AllHardwareAvailable());
+
   this->MarkFinished();
   this->UpdateCostData();
 }
@@ -168,6 +177,10 @@ bool cmCTestMultiProcessHandler::StartTestProcess(int test)
   }
   testRun->SetIndex(test);
   testRun->SetTestProperties(this->Properties[test]);
+  if (this->TestHandler->UseHardwareSpec) {
+    testRun->SetUseAllocatedHardware(true);
+    testRun->SetAllocatedHardware(this->AllocatedHardware[test]);
+  }
 
   // Find any failed dependencies for this test. We assume the more common
   // scenario has no failed tests, so make it the outer loop.
@@ -179,7 +192,13 @@ bool cmCTestMultiProcessHandler::StartTestProcess(int test)
 
   // Always lock the resources we'll be using, even if we fail to set the
   // working directory because FinishTestProcess() will try to unlock them
-  this->LockResources(test);
+  this->AllocateResources(test);
+
+  if (!this->TestsHaveSufficientHardware[test]) {
+    testRun->StartFailure("Insufficient hardware");
+    this->FinishTestProcess(testRun, false);
+    return false;
+  }
 
   cmWorkingDirectory workdir(this->Properties[test]->Directory);
   if (workdir.Failed()) {
@@ -199,6 +218,110 @@ bool cmCTestMultiProcessHandler::StartTestProcess(int test)
   return false;
 }
 
+bool cmCTestMultiProcessHandler::AllocateHardware(int index)
+{
+  if (!this->TestHandler->UseHardwareSpec) {
+    return true;
+  }
+
+  std::map<std::string, std::vector<cmCTestBinPackerAllocation>> allocations;
+  if (!this->TryAllocateHardware(index, allocations)) {
+    return false;
+  }
+
+  auto& allocatedHardware = this->AllocatedHardware[index];
+  allocatedHardware.resize(this->Properties[index]->Processes.size());
+  for (auto const& it : allocations) {
+    for (auto const& alloc : it.second) {
+      bool result = this->HardwareAllocator.AllocateResource(
+        it.first, alloc.Id, alloc.SlotsNeeded);
+      (void)result;
+      assert(result);
+      allocatedHardware[alloc.ProcessIndex][it.first].push_back(
+        { alloc.Id, static_cast<unsigned int>(alloc.SlotsNeeded) });
+    }
+  }
+
+  return true;
+}
+
+bool cmCTestMultiProcessHandler::TryAllocateHardware(
+  int index,
+  std::map<std::string, std::vector<cmCTestBinPackerAllocation>>& allocations)
+{
+  allocations.clear();
+
+  std::size_t processIndex = 0;
+  for (auto const& process : this->Properties[index]->Processes) {
+    for (auto const& requirement : process) {
+      for (int i = 0; i < requirement.UnitsNeeded; ++i) {
+        allocations[requirement.ResourceType].push_back(
+          { processIndex, requirement.SlotsNeeded, "" });
+      }
+    }
+    ++processIndex;
+  }
+
+  auto const& availableHardware = this->HardwareAllocator.GetResources();
+  for (auto& it : allocations) {
+    if (!availableHardware.count(it.first)) {
+      return false;
+    }
+    if (!cmAllocateCTestHardwareRoundRobin(availableHardware.at(it.first),
+                                           it.second)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void cmCTestMultiProcessHandler::DeallocateHardware(int index)
+{
+  if (!this->TestHandler->UseHardwareSpec) {
+    return;
+  }
+
+  {
+    auto& allocatedHardware = this->AllocatedHardware[index];
+    for (auto const& processAlloc : allocatedHardware) {
+      for (auto const& it : processAlloc) {
+        auto resourceType = it.first;
+        for (auto const& it2 : it.second) {
+          bool success = this->HardwareAllocator.DeallocateResource(
+            resourceType, it2.Id, it2.Slots);
+          (void)success;
+          assert(success);
+        }
+      }
+    }
+  }
+  this->AllocatedHardware.erase(index);
+}
+
+bool cmCTestMultiProcessHandler::AllHardwareAvailable()
+{
+  for (auto const& it : this->HardwareAllocator.GetResources()) {
+    for (auto const& it2 : it.second) {
+      if (it2.second.Locked != 0) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+void cmCTestMultiProcessHandler::CheckHardwareAvailable()
+{
+  for (auto test : this->SortedTests) {
+    std::map<std::string, std::vector<cmCTestBinPackerAllocation>> allocations;
+    this->TestsHaveSufficientHardware[test] =
+      !this->TestHandler->UseHardwareSpec ||
+      this->TryAllocateHardware(test, allocations);
+  }
+}
+
 bool cmCTestMultiProcessHandler::CheckStopTimePassed()
 {
   if (!this->StopTimePassed) {
@@ -223,7 +346,7 @@ void cmCTestMultiProcessHandler::SetStopTimePassed()
   }
 }
 
-void cmCTestMultiProcessHandler::LockResources(int index)
+void cmCTestMultiProcessHandler::AllocateResources(int index)
 {
   this->LockedResources.insert(
     this->Properties[index]->LockedResources.begin(),
@@ -234,7 +357,7 @@ void cmCTestMultiProcessHandler::LockResources(int index)
   }
 }
 
-void cmCTestMultiProcessHandler::UnlockResources(int index)
+void cmCTestMultiProcessHandler::DeallocateResources(int index)
 {
   for (std::string const& i : this->Properties[index]->LockedResources) {
     this->LockedResources.erase(i);
@@ -281,12 +404,20 @@ bool cmCTestMultiProcessHandler::StartTest(int test)
     }
   }
 
+  // Allocate hardware
+  if (this->TestsHaveSufficientHardware[test] &&
+      !this->AllocateHardware(test)) {
+    this->DeallocateHardware(test);
+    return false;
+  }
+
   // if there are no depends left then run this test
   if (this->Tests[test].empty()) {
     return this->StartTestProcess(test);
   }
   // This test was not able to start because it is waiting
   // on depends to run
+  this->DeallocateHardware(test);
   return false;
 }
 
@@ -471,7 +602,8 @@ void cmCTestMultiProcessHandler::FinishTestProcess(cmCTestRunTest* runner,
   this->TestFinishMap[test] = true;
   this->TestRunningMap[test] = false;
   this->WriteCheckpoint(test);
-  this->UnlockResources(test);
+  this->DeallocateHardware(test);
+  this->DeallocateResources(test);
   this->RunningCount -= GetProcessorsUsed(test);
 
   for (auto p : properties->Affinity) {

+ 31 - 2
Source/CTest/cmCTestMultiProcessHandler.h

@@ -14,10 +14,13 @@
 
 #include "cm_uv.h"
 
+#include "cmCTestHardwareAllocator.h"
 #include "cmCTestTestHandler.h"
 #include "cmUVHandlePtr.h"
 
 class cmCTest;
+struct cmCTestBinPackerAllocation;
+class cmCTestHardwareSpec;
 class cmCTestRunTest;
 
 /** \class cmCTestMultiProcessHandler
@@ -44,6 +47,11 @@ public:
     : public std::map<int, cmCTestTestHandler::cmCTestTestProperties*>
   {
   };
+  struct HardwareAllocation
+  {
+    std::string Id;
+    unsigned int Slots;
+  };
 
   cmCTestMultiProcessHandler();
   virtual ~cmCTestMultiProcessHandler();
@@ -79,6 +87,13 @@ public:
 
   void SetQuiet(bool b) { this->Quiet = b; }
 
+  void InitHardwareAllocator(const cmCTestHardwareSpec& spec)
+  {
+    this->HardwareAllocator.InitializeFromHardwareSpec(spec);
+  }
+
+  void CheckHardwareAvailable();
+
 protected:
   // Start the next test or tests as many as are allowed by
   // ParallelLevel
@@ -119,8 +134,17 @@ protected:
   bool CheckStopTimePassed();
   void SetStopTimePassed();
 
-  void LockResources(int index);
-  void UnlockResources(int index);
+  void AllocateResources(int index);
+  void DeallocateResources(int index);
+
+  bool AllocateHardware(int index);
+  bool TryAllocateHardware(
+    int index,
+    std::map<std::string, std::vector<cmCTestBinPackerAllocation>>&
+      allocations);
+  void DeallocateHardware(int index);
+  bool AllHardwareAvailable();
+
   // map from test number to set of depend tests
   TestMap Tests;
   TestList SortedTests;
@@ -141,6 +165,11 @@ protected:
   std::vector<std::string>* Failed;
   std::vector<std::string> LastTestsFailed;
   std::set<std::string> LockedResources;
+  std::map<int,
+           std::vector<std::map<std::string, std::vector<HardwareAllocation>>>>
+    AllocatedHardware;
+  std::map<int, bool> TestsHaveSufficientHardware;
+  cmCTestHardwareAllocator HardwareAllocator;
   std::vector<cmCTestTestHandler::cmCTestTestResult>* TestResults;
   size_t ParallelLevel; // max number of process that can be run at once
   unsigned long TestLoad;

+ 43 - 0
Source/CTest/cmCTestRunTest.cxx

@@ -3,6 +3,7 @@
 #include "cmCTestRunTest.h"
 
 #include <chrono>
+#include <cstddef>
 #include <cstdint>
 #include <cstdio>
 #include <cstring>
@@ -689,10 +690,52 @@ bool cmCTestRunTest::ForkProcess(cmDuration testTimeOut, bool explicitTimeout,
     cmSystemTools::AppendEnv(*environment);
   }
 
+  if (this->UseAllocatedHardware) {
+    this->SetupHardwareEnvironment();
+  } else {
+    cmSystemTools::UnsetEnv("CTEST_PROCESS_COUNT");
+  }
+
   return this->TestProcess->StartProcess(this->MultiTestHandler.Loop,
                                          affinity);
 }
 
+void cmCTestRunTest::SetupHardwareEnvironment()
+{
+  std::string processCount = "CTEST_PROCESS_COUNT=";
+  processCount += std::to_string(this->AllocatedHardware.size());
+  cmSystemTools::PutEnv(processCount);
+
+  std::size_t i = 0;
+  for (auto const& process : this->AllocatedHardware) {
+    std::string prefix = "CTEST_PROCESS_";
+    prefix += std::to_string(i);
+    std::string resourceList = prefix + '=';
+    prefix += '_';
+    bool firstType = true;
+    for (auto const& it : process) {
+      if (!firstType) {
+        resourceList += ',';
+      }
+      firstType = false;
+      auto resourceType = it.first;
+      resourceList += resourceType;
+      std::string var = prefix + cmSystemTools::UpperCase(resourceType) + '=';
+      bool firstName = true;
+      for (auto const& it2 : it.second) {
+        if (!firstName) {
+          var += ';';
+        }
+        firstName = false;
+        var += "id:" + it2.Id + ",slots:" + std::to_string(it2.Slots);
+      }
+      cmSystemTools::PutEnv(var);
+    }
+    cmSystemTools::PutEnv(resourceList);
+    ++i;
+  }
+}
+
 void cmCTestRunTest::WriteLogOutputTop(size_t completed, size_t total)
 {
   std::ostringstream outputStream;

+ 18 - 1
Source/CTest/cmCTestRunTest.h

@@ -5,6 +5,7 @@
 
 #include "cmConfigure.h" // IWYU pragma: keep
 
+#include <map>
 #include <memory>
 #include <set>
 #include <string>
@@ -12,12 +13,12 @@
 
 #include <stddef.h>
 
+#include "cmCTestMultiProcessHandler.h"
 #include "cmCTestTestHandler.h"
 #include "cmDuration.h"
 #include "cmProcess.h"
 
 class cmCTest;
-class cmCTestMultiProcessHandler;
 
 /** \class cmRunTest
  * \brief represents a single test to be run
@@ -83,6 +84,16 @@ public:
 
   bool TimedOutForStopTime() const { return this->TimeoutIsForStopTime; }
 
+  void SetUseAllocatedHardware(bool use) { this->UseAllocatedHardware = use; }
+  void SetAllocatedHardware(
+    const std::vector<
+      std::map<std::string,
+               std::vector<cmCTestMultiProcessHandler::HardwareAllocation>>>&
+      hardware)
+  {
+    this->AllocatedHardware = hardware;
+  }
+
 private:
   bool NeedsToRerun();
   void DartProcessing();
@@ -94,6 +105,8 @@ private:
   // Run post processing of the process output for MemCheck
   void MemCheckPostProcess();
 
+  void SetupHardwareEnvironment();
+
   // Returns "completed/total Test #Index: "
   std::string GetTestPrefix(size_t completed, size_t total) const;
 
@@ -112,6 +125,10 @@ private:
   std::string StartTime;
   std::string ActualCommand;
   std::vector<std::string> Arguments;
+  bool UseAllocatedHardware = false;
+  std::vector<std::map<
+    std::string, std::vector<cmCTestMultiProcessHandler::HardwareAllocation>>>
+    AllocatedHardware;
   bool RunUntilFail;
   int NumberOfRunsLeft;
   bool RunAgain;

+ 4 - 0
Source/CTest/cmCTestTestCommand.cxx

@@ -32,6 +32,7 @@ void cmCTestTestCommand::BindArguments()
   this->Bind("SCHEDULE_RANDOM"_s, this->ScheduleRandom);
   this->Bind("STOP_TIME"_s, this->StopTime);
   this->Bind("TEST_LOAD"_s, this->TestLoad);
+  this->Bind("HARDWARE_SPEC_FILE"_s, this->HardwareSpecFile);
 }
 
 cmCTestGenericHandler* cmCTestTestCommand::InitializeHandler()
@@ -87,6 +88,9 @@ cmCTestGenericHandler* cmCTestTestCommand::InitializeHandler()
   if (!this->ScheduleRandom.empty()) {
     handler->SetOption("ScheduleRandom", this->ScheduleRandom.c_str());
   }
+  if (!this->HardwareSpecFile.empty()) {
+    handler->SetOption("HardwareSpecFile", this->HardwareSpecFile.c_str());
+  }
   if (!this->StopTime.empty()) {
     this->CTest->SetStopTime(this->StopTime);
   }

+ 1 - 0
Source/CTest/cmCTestTestCommand.h

@@ -58,6 +58,7 @@ protected:
   std::string ScheduleRandom;
   std::string StopTime;
   std::string TestLoad;
+  std::string HardwareSpecFile;
 };
 
 #endif

+ 15 - 0
Source/CTest/cmCTestTestHandler.cxx

@@ -289,6 +289,7 @@ cmCTestTestHandler::cmCTestTestHandler()
   this->UseIncludeRegExpFlag = false;
   this->UseExcludeRegExpFlag = false;
   this->UseExcludeRegExpFirst = false;
+  this->UseHardwareSpec = false;
 
   this->CustomMaximumPassedTestOutputSize = 1 * 1024;
   this->CustomMaximumFailedTestOutputSize = 300 * 1024;
@@ -509,6 +510,16 @@ bool cmCTestTestHandler::ProcessOptions()
   }
   this->SetRerunFailed(cmIsOn(this->GetOption("RerunFailed")));
 
+  val = this->GetOption("HardwareSpecFile");
+  if (val) {
+    this->UseHardwareSpec = true;
+    if (!this->HardwareSpec.ReadFromJSONFile(val)) {
+      cmCTestLog(this->CTest, ERROR_MESSAGE,
+                 "Could not read hardware spec file: " << val << std::endl);
+      return false;
+    }
+  }
+
   return true;
 }
 
@@ -1226,6 +1237,9 @@ void cmCTestTestHandler::ProcessDirectory(std::vector<std::string>& passed,
   } else {
     parallel->SetTestLoad(this->CTest->GetTestLoad());
   }
+  if (this->UseHardwareSpec) {
+    parallel->InitHardwareAllocator(this->HardwareSpec);
+  }
 
   *this->LogFile
     << "Start testing: " << this->CTest->CurrentTime() << std::endl
@@ -1269,6 +1283,7 @@ void cmCTestTestHandler::ProcessDirectory(std::vector<std::string>& passed,
   parallel->SetPassFailVectors(&passed, &failed);
   this->TestResults.clear();
   parallel->SetTestResults(&this->TestResults);
+  parallel->CheckHardwareAvailable();
 
   if (this->CTest->ShouldPrintLabels()) {
     parallel->PrintLabels();

+ 4 - 0
Source/CTest/cmCTestTestHandler.h

@@ -19,6 +19,7 @@
 #include "cmsys/RegularExpression.hxx"
 
 #include "cmCTestGenericHandler.h"
+#include "cmCTestHardwareSpec.h"
 #include "cmDuration.h"
 #include "cmListFileCache.h"
 
@@ -335,6 +336,9 @@ private:
   cmsys::RegularExpression IncludeTestsRegularExpression;
   cmsys::RegularExpression ExcludeTestsRegularExpression;
 
+  bool UseHardwareSpec;
+  cmCTestHardwareSpec HardwareSpec;
+
   void GenerateRegressionImages(cmXMLWriter& xml, const std::string& dart);
   cmsys::RegularExpression DartStuff1;
   void CheckLabelFilter(cmCTestTestProperties& it);

+ 9 - 0
Source/cmCTest.cxx

@@ -2090,6 +2090,15 @@ bool cmCTest::HandleCommandLineArguments(size_t& i,
       "ExcludeFixtureCleanupRegularExpression", args[i].c_str());
   }
 
+  if (this->CheckArgument(arg, "--hardware-spec-file") &&
+      i < args.size() - 1) {
+    i++;
+    this->GetTestHandler()->SetPersistentOption("HardwareSpecFile",
+                                                args[i].c_str());
+    this->GetMemCheckHandler()->SetPersistentOption("HardwareSpecFile",
+                                                    args[i].c_str());
+  }
+
   if (this->CheckArgument(arg, "--rerun-failed")) {
     this->GetTestHandler()->SetPersistentOption("RerunFailed", "true");
     this->GetMemCheckHandler()->SetPersistentOption("RerunFailed", "true");

+ 1 - 0
Source/ctest.cxx

@@ -103,6 +103,7 @@ static const char* cmDocumentationOptions[][2] = {
     "times without failing in order to pass" },
   { "--max-width <width>", "Set the max width for a test name to output" },
   { "--interactive-debug-mode [0|1]", "Set the interactive mode to 0 or 1." },
+  { "--hardware-spec-file <file>", "Set the hardware spec file to use." },
   { "--no-label-summary", "Disable timing summary information for labels." },
   { "--no-subproject-summary",
     "Disable timing summary information for "