Skip to content

Commit

Permalink
[observer/ecs] Don't report EC2 tasks with unassigned container insta…
Browse files Browse the repository at this point in the history
…nces (#23279)

When ECS task is in state Provisioning/Pending, it can contain
container(s) which don't have EC2 instance yet. Such containers have
`nil` instance arn.

This change fixes service discovery error:
```error ecsobserver@v0.78.0/error.go:77 attachContainerInstance failed:
describe container instanced failed offset=0: ecs.DescribeContainerInstance
failed: InvalidParameterException: Container instance can not be blank.
{"kind": "extension", "name": "ecs_observer", "ErrScope": "Unknown"}
```


**Testing:** Related unit test is added.
  • Loading branch information
shchuko authored Jul 12, 2023
1 parent aeb0bc9 commit da15e5f
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 20 deletions.
20 changes: 20 additions & 0 deletions .chloggen/ecsobserver-nil-container-arn-fix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Use this changelog template to create an entry for release notes.
# If your change doesn't affect end users, such as a test fix or a tooling change,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: ecsobserver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Don't fail with error when finding a task of EC2 launch type and missing container instance, just ignore them. This fixes behavior when task is provisioning and its containers are not assigned to instances yet.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [23279]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
20 changes: 15 additions & 5 deletions extension/observer/ecsobserver/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ func newTaskFetcher(opts taskFetcherOptions) (*taskFetcher, error) {

func (f *taskFetcher) fetchAndDecorate(ctx context.Context) ([]*taskAnnotated, error) {
// taskAnnotated
rawTasks, err := f.getAllTasks(ctx)
rawTasks, err := f.getDiscoverableTasks(ctx)
if err != nil {
return nil, fmt.Errorf("getAllTasks failed: %w", err)
return nil, fmt.Errorf("getDiscoverableTasks failed: %w", err)
}
tasks, err := f.attachTaskDefinition(ctx, rawTasks)
if err != nil {
Expand All @@ -151,9 +151,10 @@ func (f *taskFetcher) fetchAndDecorate(ctx context.Context) ([]*taskAnnotated, e
return tasks, nil
}

// getAllTasks get arns of all running tasks and describe those tasks.
// getDiscoverableTasks get arns of all running tasks and describe those tasks
// and filter only fargate tasks or EC2 task which container instance is known.
// There is no API to list task detail without arn so we need to call two APIs.
func (f *taskFetcher) getAllTasks(ctx context.Context) ([]*ecs.Task, error) {
func (f *taskFetcher) getDiscoverableTasks(ctx context.Context) ([]*ecs.Task, error) {
svc := f.ecs
cluster := aws.String(f.cluster)
req := ecs.ListTasksInput{Cluster: cluster}
Expand All @@ -171,7 +172,16 @@ func (f *taskFetcher) getAllTasks(ctx context.Context) ([]*ecs.Task, error) {
if err != nil {
return nil, fmt.Errorf("ecs.DescribeTasks failed: %w", err)
}
tasks = append(tasks, descRes.Tasks...)

for _, task := range descRes.Tasks {
// Preserve only fargate tasks or EC2 tasks with non-nil ContainerInstanceArn.
// When ECS task of EC2 launch type is in state Provisioning/Pending, it may
// not have EC2 instance. Such tasks have `nil` instance arn and the
// attachContainerInstance call will fail
if task.ContainerInstanceArn != nil || aws.StringValue(task.LaunchType) != ecs.LaunchTypeEc2 {
tasks = append(tasks, task)
}
}
if listRes.NextToken == nil {
break
}
Expand Down
62 changes: 47 additions & 15 deletions extension/observer/ecsobserver/fetcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,47 @@ func TestFetcher_FetchAndDecorate(t *testing.T) {
assert.Equal(t, "s0", aws.StringValue(tasks[0].Service.ServiceArn))
}

func TestFetcher_GetAllTasks(t *testing.T) {
c := ecsmock.NewCluster()
f := newTestTaskFetcher(t, c)
const nTasks = 203
c.SetTasks(ecsmock.GenTasks("p", nTasks, nil))
ctx := context.Background()
tasks, err := f.getAllTasks(ctx)
require.NoError(t, err)
assert.Equal(t, nTasks, len(tasks))
func TestFetcher_GetDiscoverableTasks(t *testing.T) {
t.Run("without non discoverable tasks", func(t *testing.T) {
c := ecsmock.NewCluster()
f := newTestTaskFetcher(t, c)
const nTasks = 203
c.SetTasks(ecsmock.GenTasks("p", nTasks, nil))
ctx := context.Background()
tasks, err := f.getDiscoverableTasks(ctx)
require.NoError(t, err)
assert.Equal(t, nTasks, len(tasks))
})

t.Run("with non discoverable tasks", func(t *testing.T) {
c := ecsmock.NewCluster()
f := newTestTaskFetcher(t, c)
nTasks := 3

c.SetTaskDefinitions(ecsmock.GenTaskDefinitions("d", 1, 1, nil))
c.SetTasks(ecsmock.GenTasks("t", nTasks, func(i int, task *ecs.Task) {
task.TaskDefinitionArn = aws.String("d0:1")
switch i {
case 0:
task.LaunchType = aws.String(ecs.LaunchTypeEc2)
task.ContainerInstanceArn = nil
case 1:
task.LaunchType = aws.String(ecs.LaunchTypeFargate)
case 2:
task.LaunchType = aws.String(ecs.LaunchTypeEc2)
task.ContainerInstanceArn = aws.String("ci0")
}
}))

ctx := context.Background()
tasks, err := f.getDiscoverableTasks(ctx)
require.NoError(t, err)

// Expect 2 tasks, with LaunchType Fargate and EC2 with non-nil ContainerInstanceArn
assert.Equal(t, 2, len(tasks))
assert.Equal(t, ecs.LaunchTypeFargate, aws.StringValue(tasks[0].LaunchType))
assert.Equal(t, ecs.LaunchTypeEc2, aws.StringValue(tasks[1].LaunchType))
})
}

func TestFetcher_AttachTaskDefinitions(t *testing.T) {
Expand All @@ -93,7 +125,7 @@ func TestFetcher_AttachTaskDefinitions(t *testing.T) {
c.SetTaskDefinitions(ecsmock.GenTaskDefinitions("pdef", nTasks, 1, nil))

// no cache
tasks, err := f.getAllTasks(ctx)
tasks, err := f.getDiscoverableTasks(ctx)
require.NoError(t, err)
attached, err := f.attachTaskDefinition(ctx, tasks)
stats := c.Stats()
Expand All @@ -102,7 +134,7 @@ func TestFetcher_AttachTaskDefinitions(t *testing.T) {
assert.Equal(t, nTasks, stats.DescribeTaskDefinition.Called)

// all cached
tasks, err = f.getAllTasks(ctx)
tasks, err = f.getDiscoverableTasks(ctx)
require.NoError(t, err)
// do it again to trigger cache logic
attached, err = f.attachTaskDefinition(ctx, tasks)
Expand All @@ -116,7 +148,7 @@ func TestFetcher_AttachTaskDefinitions(t *testing.T) {
task.TaskDefinitionArn = aws.String(fmt.Sprintf("pdef%d:1", i))
}))
c.SetTaskDefinitions(ecsmock.GenTaskDefinitions("pdef", nTasks+1, 1, nil))
tasks, err = f.getAllTasks(ctx)
tasks, err = f.getDiscoverableTasks(ctx)
require.NoError(t, err)
_, err = f.attachTaskDefinition(ctx, tasks)
stats = c.Stats()
Expand Down Expand Up @@ -144,7 +176,7 @@ func TestFetcher_AttachContainerInstance(t *testing.T) {
c.SetEc2Instances(ecsmock.GenEc2Instances("i-", nInstances, nil))

ctx := context.Background()
rawTasks, err := f.getAllTasks(ctx)
rawTasks, err := f.getDiscoverableTasks(ctx)
require.NoError(t, err)
assert.Equal(t, nTasks, len(rawTasks))

Expand Down Expand Up @@ -182,7 +214,7 @@ func TestFetcher_AttachContainerInstance(t *testing.T) {
c.SetEc2Instances(ecsmock.GenEc2Instances("i-", nInstances, nil))

ctx := context.Background()
rawTasks, err := f.getAllTasks(ctx)
rawTasks, err := f.getDiscoverableTasks(ctx)
require.NoError(t, err)
assert.Equal(t, nTasks, len(rawTasks))

Expand Down Expand Up @@ -235,7 +267,7 @@ func TestFetcher_AttachService(t *testing.T) {
}))

ctx := context.Background()
rawTasks, err := f.getAllTasks(ctx)
rawTasks, err := f.getDiscoverableTasks(ctx)
require.NoError(t, err)
tasks, err := f.attachTaskDefinition(ctx, rawTasks)
require.NoError(t, err)
Expand Down

0 comments on commit da15e5f

Please sign in to comment.