Skip to content

Commit

Permalink
[Standalone Agent] Disallow upgrade if upgrade is already in progress (
Browse files Browse the repository at this point in the history
…#3473)

* [Standalone] Disallow upgrade if upgrade is already in progress

* Adding CHANGELOG entry

* Add TODOs

* WIP: integration test

* Check for Watcher; move function to upgrade package

* Check Upgrade Watcher PIDs first

* Fix syntax error

* Adjust test to start from older version

* Fixing more errors

* Fix test

* Mock client

* Fix typo
  • Loading branch information
ycombinator authored Sep 29, 2023
1 parent 99bc6d7 commit bc1982a
Show file tree
Hide file tree
Showing 8 changed files with 937 additions and 83 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: bug-fix

# Change summary; a 80ish characters long description of the change.
summary: Prevent a standalone Elastic Agent from being upgraded if an upgrade is already in progress.

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/3473

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
issue: https://github.com/elastic/elastic-agent/issues/2706
28 changes: 28 additions & 0 deletions internal/pkg/agent/application/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import (
"path/filepath"
"strings"

"github.com/elastic/elastic-agent/pkg/control/v2/client"
"github.com/elastic/elastic-agent/pkg/control/v2/cproto"

"github.com/otiai10/copy"
"go.elastic.co/apm"

Expand Down Expand Up @@ -366,3 +369,28 @@ func copyDir(l *logger.Logger, from, to string, ignoreErrs bool) error {
OnError: onErr,
})
}

// IsInProgress checks if an Elastic Agent upgrade is already in progress. It
// returns true if so and false if not.
// `c client.Client` is expected to be a connected client.
func IsInProgress(c client.Client, watcherPIDsFetcher func() ([]int, error)) (bool, error) {
// First we check if any Upgrade Watcher processes are running. If they are,
// it means an upgrade is in progress. We check this before checking the Elastic
// Agent's status because the Elastic Agent GRPC server may briefly be
// unavailable during an upgrade and so the client connection might fail.
watcherPIDs, err := watcherPIDsFetcher()
if err != nil {
return false, fmt.Errorf("failed to determine if upgrade watcher is running: %w", err)
}
if len(watcherPIDs) > 0 {
return true, nil
}

// Next we check the Elastic Agent's status using the GRPC client.
state, err := c.State(context.Background())
if err != nil {
return false, fmt.Errorf("failed to get agent state: %w", err)
}

return state.State == cproto.State_UPGRADING, nil
}
75 changes: 75 additions & 0 deletions internal/pkg/agent/application/upgrade/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package upgrade

import (
"context"
"fmt"
"io/ioutil"
"os"
Expand All @@ -13,9 +14,14 @@ import (
"strings"
"testing"

"github.com/elastic/elastic-agent/pkg/control/v2/client"
"github.com/elastic/elastic-agent/pkg/control/v2/client/mocks"
"github.com/elastic/elastic-agent/pkg/control/v2/cproto"

"github.com/gofrs/flock"
"github.com/stretchr/testify/require"

"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/release"
"github.com/elastic/elastic-agent/pkg/core/logger"
)
Expand Down Expand Up @@ -134,3 +140,72 @@ func TestShutdownCallback(t *testing.T) {
require.NoError(t, err, "reading file failed")
require.Equal(t, content, newContent, "contents are not equal")
}

func TestIsInProgress(t *testing.T) {
tests := map[string]struct {
state cproto.State
stateErr string
watcherPIDsFetcher func() ([]int, error)

expected bool
expectedErr string
}{
"state_error": {
state: cproto.State_STARTING,
stateErr: "some error",
watcherPIDsFetcher: func() ([]int, error) { return nil, nil },

expected: false,
expectedErr: "failed to get agent state: some error",
},
"state_upgrading": {
state: cproto.State_UPGRADING,
stateErr: "",
watcherPIDsFetcher: func() ([]int, error) { return nil, nil },

expected: true,
expectedErr: "",
},
"state_healthy_no_watcher": {
state: cproto.State_HEALTHY,
stateErr: "",
watcherPIDsFetcher: func() ([]int, error) { return []int{}, nil },

expected: false,
expectedErr: "",
},
"state_healthy_with_watcher": {
state: cproto.State_HEALTHY,
stateErr: "",
watcherPIDsFetcher: func() ([]int, error) { return []int{9999}, nil },

expected: true,
expectedErr: "",
},
}

for name, test := range tests {
t.Run(name, func(t *testing.T) {
// Expect client.State() call to be made only if no Upgrade Watcher PIDs
// are returned (i.e. no Upgrade Watcher is found to be running).
mc := mocks.NewClient(t)
if test.watcherPIDsFetcher != nil {
pids, _ := test.watcherPIDsFetcher()
if len(pids) == 0 {
if test.stateErr != "" {
mc.EXPECT().State(context.Background()).Return(nil, errors.New(test.stateErr)).Once()
} else {
mc.EXPECT().State(context.Background()).Return(&client.AgentState{State: test.state}, nil).Once()
}
}
}

inProgress, err := IsInProgress(mc, test.watcherPIDsFetcher)
if test.expectedErr != "" {
require.Equal(t, test.expectedErr, err.Error())
} else {
require.Equal(t, test.expected, inProgress)
}
})
}
}
14 changes: 12 additions & 2 deletions internal/pkg/agent/cmd/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ import (
"io/ioutil"
"os"

"github.com/spf13/cobra"

"github.com/elastic/elastic-agent/pkg/control"
"github.com/elastic/elastic-agent/pkg/control/v2/client"
"github.com/elastic/elastic-agent/pkg/utils"

"github.com/spf13/cobra"

"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download"
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/cli"
Expand Down Expand Up @@ -64,6 +66,14 @@ func upgradeCmd(streams *cli.IOStreams, cmd *cobra.Command, args []string) error
}
defer c.Disconnect()

isBeingUpgraded, err := upgrade.IsInProgress(c, utils.GetWatcherPIDs)
if err != nil {
return fmt.Errorf("failed to check if upgrade is already in progress: %w", err)
}
if isBeingUpgraded {
return errors.New("an upgrade is already in progress; please try again later.")
}

skipVerification, _ := cmd.Flags().GetBool(flagSkipVerify)
var pgpChecks []string
if !skipVerification {
Expand Down
1 change: 1 addition & 0 deletions pkg/control/v2/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ type DiagnosticComponentResult struct {
}

// Client communicates to Elastic Agent through the control protocol.
// go:generate mockery --name Client
type Client interface {
// Connect connects to the running Elastic Agent.
Connect(ctx context.Context) error
Expand Down
Loading

0 comments on commit bc1982a

Please sign in to comment.