瀏覽代碼

Detect task failures

Signed-off-by: aiordache <[email protected]>
aiordache 5 年之前
父節點
當前提交
d3effd2ead
共有 4 個文件被更改,包括 126 次插入17 次删除
  1. 1 0
      api/compose/api.go
  2. 3 1
      cli/cmd/compose/list.go
  3. 98 14
      ecs/sdk.go
  4. 24 2
      ecs/wait.go

+ 1 - 0
api/compose/api.go

@@ -77,4 +77,5 @@ type Stack struct {
 	ID     string
 	Name   string
 	Status string
+	Reason string
 }

+ 3 - 1
cli/cmd/compose/list.go

@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"strings"
 
 	"github.com/spf13/cobra"
 	"github.com/spf13/pflag"
@@ -60,7 +61,8 @@ func runList(ctx context.Context, opts composeOptions) error {
 	view := viewFromStackList(stackList)
 	return formatter.Print(view, opts.Format, os.Stdout, func(w io.Writer) {
 		for _, stack := range view {
-			_, _ = fmt.Fprintf(w, "%s\t%s\n", stack.Name, stack.Status)
+			_, _ = fmt.Fprintf(w, "%s\t%s\n", stack.Name, strings.TrimSpace(
+				fmt.Sprintf("%s %s", stack.Status, stack.Reason))
 		}
 	}, "NAME", "STATUS")
 }

+ 98 - 14
ecs/sdk.go

@@ -304,29 +304,113 @@ func (s sdk) ListStacks(ctx context.Context, name string) ([]compose.Stack, erro
 	}
 	stacks := []compose.Stack{}
 	for _, stack := range cfStacks.Stacks {
+		skip := true
 		for _, t := range stack.Tags {
 			if *t.Key == compose.ProjectTag {
-				status := compose.RUNNING
-				switch aws.StringValue(stack.StackStatus) {
-				case "CREATE_IN_PROGRESS":
-					status = compose.STARTING
-				case "DELETE_IN_PROGRESS":
-					status = compose.REMOVING
-				case "UPDATE_IN_PROGRESS":
-					status = compose.UPDATING
-				}
-				stacks = append(stacks, compose.Stack{
-					ID:     aws.StringValue(stack.StackId),
-					Name:   aws.StringValue(stack.StackName),
-					Status: status,
-				})
+				skip = false
 				break
 			}
 		}
+		if skip {
+			continue
+		}
+		status := compose.RUNNING
+		reason := ""
+		switch aws.StringValue(stack.StackStatus) {
+		case "CREATE_IN_PROGRESS":
+			status = compose.STARTING
+		case "DELETE_IN_PROGRESS":
+			status = compose.REMOVING
+		case "UPDATE_IN_PROGRESS":
+			status = compose.UPDATING
+		}
+		if status == compose.STARTING {
+			if err := s.CheckStackState(ctx, aws.StringValue(stack.StackName)); err != nil {
+				status = compose.FAILED
+				reason = err.Error()
+			}
+		}
+		stacks = append(stacks, compose.Stack{
+			ID:     aws.StringValue(stack.StackId),
+			Name:   aws.StringValue(stack.StackName),
+			Status: status,
+			Reason: reason,
+		})
+
 	}
 	return stacks, nil
 }
 
+func (s sdk) CheckStackState(ctx context.Context, name string) error {
+	resources, err := s.CF.ListStackResourcesWithContext(ctx, &cloudformation.ListStackResourcesInput{
+		StackName: aws.String(name),
+	})
+	if err != nil {
+		return err
+	}
+	services := []*string{}
+	serviceNames := []string{}
+	var cluster *string
+	for _, r := range resources.StackResourceSummaries {
+		if aws.StringValue(r.ResourceType) == "AWS::ECS::Cluster" {
+			cluster = r.PhysicalResourceId
+			continue
+		}
+		if aws.StringValue(r.ResourceType) == "AWS::ECS::Service" {
+			if r.PhysicalResourceId == nil {
+				continue
+			}
+			services = append(services, r.PhysicalResourceId)
+			serviceNames = append(serviceNames, *r.LogicalResourceId)
+		}
+	}
+	for i, service := range services {
+		err := s.CheckTaskState(ctx, aws.StringValue(cluster), aws.StringValue(service))
+		if err != nil {
+			return fmt.Errorf("%s error: %s", serviceNames[i], err.Error())
+		}
+	}
+	return nil
+}
+
+func (s sdk) CheckTaskState(ctx context.Context, cluster string, serviceName string) error {
+	tasks, err := s.ECS.ListTasksWithContext(ctx, &ecs.ListTasksInput{
+		Cluster:     aws.String(cluster),
+		ServiceName: aws.String(serviceName),
+	})
+	if err != nil {
+		return err
+	}
+	if len(tasks.TaskArns) > 0 {
+		return nil
+	}
+	tasks, err = s.ECS.ListTasksWithContext(ctx, &ecs.ListTasksInput{
+		Cluster:       aws.String(cluster),
+		ServiceName:   aws.String(serviceName),
+		DesiredStatus: aws.String("STOPPED"),
+	})
+	if err != nil {
+		return err
+	}
+	if len(tasks.TaskArns) > 0 {
+		taskDescriptions, err := s.ECS.DescribeTasksWithContext(ctx, &ecs.DescribeTasksInput{
+			Cluster: aws.String(cluster),
+			Tasks:   tasks.TaskArns,
+		})
+		if err != nil {
+			return err
+		}
+		if len(taskDescriptions.Tasks) > 0 {
+			recentTask := taskDescriptions.Tasks[0]
+			switch aws.StringValue(recentTask.StopCode) {
+			case "TaskFailedToStart":
+				return fmt.Errorf(aws.StringValue(recentTask.StoppedReason))
+			}
+		}
+	}
+	return nil
+}
+
 func (s sdk) DescribeStackEvents(ctx context.Context, stackID string) ([]*cloudformation.StackEvent, error) {
 	// Fixme implement Paginator on Events and return as a chan(events)
 	events := []*cloudformation.StackEvent{}

+ 24 - 2
ecs/wait.go

@@ -52,6 +52,7 @@ func (b *ecsAPIService) WaitStackCompletion(ctx context.Context, name string, op
 
 	var completed bool
 	var stackErr error
+
 	for !completed {
 		select {
 		case <-done:
@@ -77,8 +78,8 @@ func (b *ecsAPIService) WaitStackCompletion(ctx context.Context, name string, op
 			reason := aws.StringValue(event.ResourceStatusReason)
 			status := aws.StringValue(event.ResourceStatus)
 			progressStatus := progress.Working
-
 			switch status {
+
 			case "CREATE_COMPLETE":
 				if operation == stackCreate {
 					progressStatus = progress.Done
@@ -100,12 +101,33 @@ func (b *ecsAPIService) WaitStackCompletion(ctx context.Context, name string, op
 					}
 				}
 			}
+
 			w.Event(progress.Event{
 				ID:         resource,
 				Status:     progressStatus,
-				StatusText: status,
+				StatusText: reason,
 			})
 		}
+		if operation != stackCreate || stackErr != nil {
+			continue
+		}
+		if err := b.SDK.CheckStackState(ctx, name); err != nil {
+			stackErr = err
+			b.SDK.DeleteStack(ctx, name)
+			operation = stackDelete
+
+			reason := err.Error()
+			if len(reason) > 30 {
+				reason = reason[:30] + "..."
+			}
+			w.Event(progress.Event{
+				ID:         name,
+				Status:     progress.Error,
+				StatusText: reason,
+			})
+
+		}
+
 	}
 
 	return stackErr