gpu.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /*
  2. Copyright 2020 Docker, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package ecs
  14. import (
  15. "fmt"
  16. "strconv"
  17. "github.com/compose-spec/compose-go/types"
  18. "github.com/docker/go-units"
  19. )
  20. type machine struct {
  21. id string
  22. cpus float64
  23. memory types.UnitBytes
  24. gpus int64
  25. }
  26. type family []machine
  27. var p3family = family{
  28. {
  29. id: "p3.2xlarge",
  30. cpus: 8,
  31. memory: 64 * units.GiB,
  32. gpus: 2,
  33. },
  34. {
  35. id: "p3.8xlarge",
  36. cpus: 32,
  37. memory: 244 * units.GiB,
  38. gpus: 4,
  39. },
  40. {
  41. id: "p3.16xlarge",
  42. cpus: 64,
  43. memory: 488 * units.GiB,
  44. gpus: 8,
  45. },
  46. }
  47. type filterFn func(machine) bool
  48. func (f family) filter(fn filterFn) family {
  49. var filtered family
  50. for _, machine := range f {
  51. if fn(machine) {
  52. filtered = append(filtered, machine)
  53. }
  54. }
  55. return filtered
  56. }
  57. func (f family) firstOrError(msg string, args ...interface{}) (machine, error) {
  58. if len(f) == 0 {
  59. return machine{}, fmt.Errorf(msg, args...)
  60. }
  61. return f[0], nil
  62. }
  63. func guessMachineType(project *types.Project) (string, error) {
  64. // we select a machine type to match all gpu-bound services requirements
  65. // once https://github.com/aws/containers-roadmap/issues/631 is implemented we can define dedicated CapacityProviders per service.
  66. minMemory, minCPU, minGPU, err := getResourceRequirements(project)
  67. if err != nil {
  68. return "", err
  69. }
  70. instanceType, err := p3family.
  71. filter(func(m machine) bool {
  72. return m.memory >= minMemory
  73. }).
  74. filter(func(m machine) bool {
  75. return m.cpus >= minCPU
  76. }).
  77. filter(func(m machine) bool {
  78. return m.gpus >= minGPU
  79. }).
  80. firstOrError("none of the AWS p3 machines match requirement for memory:%d cpu:%f gpu:%d", minMemory, minCPU, minGPU)
  81. if err != nil {
  82. return "", err
  83. }
  84. return instanceType.id, nil
  85. }
  86. func getResourceRequirements(project *types.Project) (types.UnitBytes, float64, int64, error) {
  87. var minMemory types.UnitBytes
  88. var minCPU float64
  89. var minGPU int64
  90. for _, service := range project.Services {
  91. if service.Deploy == nil {
  92. continue
  93. }
  94. reservations := service.Deploy.Resources.Reservations
  95. if reservations == nil {
  96. continue
  97. }
  98. var requiredGPUs int64
  99. for _, r := range reservations.GenericResources {
  100. if r.DiscreteResourceSpec.Kind == "gpu" {
  101. requiredGPUs = r.DiscreteResourceSpec.Value
  102. break
  103. }
  104. }
  105. if requiredGPUs == 0 {
  106. continue
  107. }
  108. if requiredGPUs > minGPU {
  109. minGPU = requiredGPUs
  110. }
  111. if reservations.MemoryBytes > minMemory {
  112. minMemory = reservations.MemoryBytes
  113. }
  114. if reservations.NanoCPUs != "" {
  115. nanocpu, err := strconv.ParseFloat(reservations.NanoCPUs, 64)
  116. if err != nil {
  117. return 0, 0, 0, err
  118. }
  119. if nanocpu > minCPU {
  120. minCPU = nanocpu
  121. }
  122. }
  123. }
  124. return minMemory, minCPU, minGPU, nil
  125. }