Browse Source

Some more functional design

Signed-off-by: Nicolas De Loof <[email protected]>
Nicolas De Loof 5 năm trước cách đây
mục cha
commit
101e1555b8
2 tập tin đã thay đổi với 121 bổ sung51 xóa
  1. 109 39
      ecs/gpu.go
  2. 12 12
      ecs/gpu_test.go

+ 109 - 39
ecs/gpu.go

@@ -18,6 +18,7 @@ package ecs
 
 import (
 	"fmt"
+	"math"
 	"strconv"
 
 	"github.com/compose-spec/compose-go/types"
@@ -74,69 +75,138 @@ func (f family) firstOrError(msg string, args ...interface{}) (machine, error) {
 }
 
 func guessMachineType(project *types.Project) (string, error) {
-	// we select a machine type to match all gpu-bound services requirements
+	// we select a machine type to match all gpus-bound services requirements
 	// once https://github.com/aws/containers-roadmap/issues/631 is implemented we can define dedicated CapacityProviders per service.
-	minMemory, minCPU, minGPU, err := getResourceRequirements(project)
+	requirements, err := getResourceRequirements(project)
 	if err != nil {
 		return "", err
 	}
 
 	instanceType, err := p3family.
 		filter(func(m machine) bool {
-			return m.memory >= minMemory
+			return m.memory >= requirements.memory
 		}).
 		filter(func(m machine) bool {
-			return m.cpus >= minCPU
+			return m.cpus >= requirements.cpus
 		}).
 		filter(func(m machine) bool {
-			return m.gpus >= minGPU
+			return m.gpus >= requirements.gpus
 		}).
-		firstOrError("none of the AWS p3 machines match requirement for memory:%d cpu:%f gpu:%d", minMemory, minCPU, minGPU)
+		firstOrError("none of the AWS p3 machines match requirement for memory:%d cpu:%f gpus:%d", requirements.memory, requirements.cpus, requirements.gpus)
 	if err != nil {
 		return "", err
 	}
 	return instanceType.id, nil
 }
 
-func getResourceRequirements(project *types.Project) (types.UnitBytes, float64, int64, error) {
-	var minMemory types.UnitBytes
-	var minCPU float64
-	var minGPU int64
+type resourceRequirements struct {
+	memory types.UnitBytes
+	cpus   float64
+	gpus   int64
+}
+
+func getResourceRequirements(project *types.Project) (*resourceRequirements, error) {
+	return toResourceRequirementsSlice(project).
+		filter(func(requirements *resourceRequirements) bool {
+			return requirements.gpus != 0
+		}).
+		max()
+}
+
+type eitherRequirementsOrError struct {
+	requirements []*resourceRequirements
+	err          error
+}
+
+func toResourceRequirementsSlice(project *types.Project) eitherRequirementsOrError {
+	var requirements []*resourceRequirements
 	for _, service := range project.Services {
-		if service.Deploy == nil {
-			continue
-		}
-		reservations := service.Deploy.Resources.Reservations
-		if reservations == nil {
-			continue
+		r, err := toResourceRequirements(service)
+		if err != nil {
+			return eitherRequirementsOrError{nil, err}
 		}
+		requirements = append(requirements, r)
+	}
+	return eitherRequirementsOrError{requirements, nil}
+}
 
-		var requiredGPUs int64
-		for _, r := range reservations.GenericResources {
-			if r.DiscreteResourceSpec.Kind == "gpu" {
-				requiredGPUs = r.DiscreteResourceSpec.Value
-				break
-			}
-		}
-		if requiredGPUs == 0 {
-			continue
-		}
-		if requiredGPUs > minGPU {
-			minGPU = requiredGPUs
+func (r eitherRequirementsOrError) filter(fn func(*resourceRequirements) bool) eitherRequirementsOrError {
+	if r.err != nil {
+		return r
+	}
+	var requirements []*resourceRequirements
+	for _, req := range r.requirements {
+		if fn(req) {
+			requirements = append(requirements, req)
 		}
+	}
+	return eitherRequirementsOrError{requirements, nil}
+}
 
-		if reservations.MemoryBytes > minMemory {
-			minMemory = reservations.MemoryBytes
+func toResourceRequirements(service types.ServiceConfig) (*resourceRequirements, error) {
+	if service.Deploy == nil {
+		return nil, nil
+	}
+	reservations := service.Deploy.Resources.Reservations
+	if reservations == nil {
+		return nil, nil
+	}
+
+	var requiredGPUs int64
+	for _, r := range reservations.GenericResources {
+		if r.DiscreteResourceSpec.Kind == "gpus" {
+			requiredGPUs = r.DiscreteResourceSpec.Value
+			break
 		}
-		if reservations.NanoCPUs != "" {
-			nanocpu, err := strconv.ParseFloat(reservations.NanoCPUs, 64)
-			if err != nil {
-				return 0, 0, 0, err
-			}
-			if nanocpu > minCPU {
-				minCPU = nanocpu
-			}
+	}
+
+	var nanocpu float64
+	if reservations.NanoCPUs != "" {
+		v, err := strconv.ParseFloat(reservations.NanoCPUs, 64)
+		if err != nil {
+			return nil, err
 		}
+		nanocpu = v
+	}
+	return &resourceRequirements{
+		memory: reservations.MemoryBytes,
+		cpus:   nanocpu,
+		gpus:   requiredGPUs,
+	}, nil
+}
+
+func (r resourceRequirements) combine(o *resourceRequirements) resourceRequirements {
+	if o == nil {
+		return r
+	}
+	return resourceRequirements{
+		memory: maxUnitBytes(r.memory, o.memory),
+		cpus:   math.Max(r.cpus, o.cpus),
+		gpus:   maxInt64(r.gpus, o.gpus),
+	}
+}
+
+func (r eitherRequirementsOrError) max() (*resourceRequirements, error) {
+	if r.err != nil {
+		return nil, r.err
+	}
+	min := resourceRequirements{}
+	for _, req := range r.requirements {
+		min = min.combine(req)
+	}
+	return &min, nil
+}
+
+func maxInt64(a, b int64) int64 {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func maxUnitBytes(a, b types.UnitBytes) types.UnitBytes {
+	if a > b {
+		return a
 	}
-	return minMemory, minCPU, minGPU, nil
+	return b
 }

+ 12 - 12
ecs/gpu_test.go

@@ -28,63 +28,63 @@ func TestGuessMachineType(t *testing.T) {
 		wantErr bool
 	}{
 		{
-			name: "1-gpu",
+			name: "1-gpus",
 			yaml: `
 services:
     learning:
-        image: tensorflow/tensorflow:latest-gpu
+        image: tensorflow/tensorflow:latest-gpus
         deploy:
             resources:
                 reservations:
                    generic_resources:
                      - discrete_resource_spec:
-                         kind: gpu
+                         kind: gpus
                          value: 1
 `,
 			want:    "p3.2xlarge",
 			wantErr: false,
 		},
 		{
-			name: "4-gpu",
+			name: "4-gpus",
 			yaml: `
 services:
     learning:
-        image: tensorflow/tensorflow:latest-gpu
+        image: tensorflow/tensorflow:latest-gpus
         deploy:
             resources:
                 reservations:
                    generic_resources: 
                      - discrete_resource_spec:
-                         kind: gpu
+                         kind: gpus
                          value: 4
 `,
 			want:    "p3.8xlarge",
 			wantErr: false,
 		},
 		{
-			name: "1-gpu, high-memory",
+			name: "1-gpus, high-memory",
 			yaml: `
 services:
     learning:
-        image: tensorflow/tensorflow:latest-gpu
+        image: tensorflow/tensorflow:latest-gpus
         deploy:
             resources:
                 reservations: 
                    memory: 300Gb
                    generic_resources: 
                      - discrete_resource_spec:
-                         kind: gpu
+                         kind: gpus
                          value: 2
 `,
 			want:    "p3.16xlarge",
 			wantErr: false,
 		},
 		{
-			name: "1-gpu, high-cpu",
+			name: "1-gpus, high-cpu",
 			yaml: `
 services:
     learning:
-        image: tensorflow/tensorflow:latest-gpu
+        image: tensorflow/tensorflow:latest-gpus
         deploy:
             resources:
                 reservations: 
@@ -92,7 +92,7 @@ services:
                    cpus: "32"
                    generic_resources: 
                      - discrete_resource_spec:
-                         kind: gpu
+                         kind: gpus
                          value: 2
 `,
 			want:    "p3.8xlarge",