Skip to content

Commit 8e61536

Browse files
Feature/4890 detect fail early upgrade (#5864) (#5978)
* feature(4890): added shouldUpgrade function in the upgrade cli file * feature(4890): added shouldUpgrade check into the upgrade command * feature(4890): ran gofmt * feature(4890): added a "force" flag, marked it as hidden * feature(4890): removed dpkg, rpm and container logic * feature(4890): ran gofmt * feature(4890): updated the function signature of the upgrade command, updated tests, added new tests * feature(4890): update comments * feature(4890): added changelog fragment * feature(4890): added fatal log in case there is an error while marking force flag as hidden * feature(4890): added error checks in tests * feature(4890): updated the summary in the changelog fragment * feature(4890): removed the shorthand flag for the force flag * feature(4890): updated synchronization in the tests * Update internal/pkg/agent/cmd/upgrade_test.go Co-authored-by: Paolo Chilà <paolo.chila@elastic.co> * feature(4890): using streams err output instead of defaulting to stderr * feature(4890): use EXPECT instead of On * feature(4890): moved unconfirmed upgrade error to a package var * feature(4890): removed confirmation from upgrade check for when force flag is set * Update internal/pkg/agent/cmd/upgrade.go Co-authored-by: Paolo Chilà <paolo.chila@elastic.co> * Update internal/pkg/agent/cmd/upgrade.go Co-authored-by: Paolo Chilà <paolo.chila@elastic.co> * feature(4890): fix errors * Update internal/pkg/agent/cmd/upgrade.go Co-authored-by: Craig MacKenzie <craig.mackenzie@elastic.co> * feature(4890): update test * fearure(4890): replace ageninfo with state call - updated agentinfo proto - updated relevant generated code - implemented state call in the upgrade cmd * feature(4890): updated proto, client and server implementation * feature(4890): fix struct tag * feature(4890): added skip-verify checks * feature(4890): ran addLicenseHeaders * feature(4890): ran mage clean * feature(4890): fix typo * feature(4890): added timeout to connection * feature(4890): changed condition check order * feature(4890): fix unit tests * feature(4890): refactored tests, using mock client * Update internal/pkg/agent/cmd/upgrade.go Co-authored-by: Andrzej Stencel <andrzej.stencel@elastic.co> * feature(4890): use lower case "f" in error messages to be more consistent * feature(4890): remove duplicate line * feature(4890): ran mage controlProto with correct protoc version --------- Co-authored-by: Paolo Chilà <paolo.chila@elastic.co> Co-authored-by: Craig MacKenzie <craig.mackenzie@elastic.co> Co-authored-by: Andrzej Stencel <andrzej.stencel@elastic.co> (cherry picked from commit 8579474) Co-authored-by: Kaan Yalti <kaan.yalti@elastic.co>
1 parent 22ad8f7 commit 8e61536

File tree

10 files changed

+556
-216
lines changed

10 files changed

+556
-216
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: enhancement
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: Detect and fail-early cli upgrades if agent is fleet-managed
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
description: This change brings restrictions on the upgrade cli command. If an agent is fleet-managed and is running in unprivileged mode, users won't be able to upgrade the agent using the cli. If an agent is fleet-managed and is running privileged, users will only be able to upgrade the agent using the cli if they provide --force flag.
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: "elastic-agent"
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
pr: https://github.com/elastic/elastic-agent/pull/5864
29+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
30+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
31+
#issue: https://github.com/owner/repo/1234

control_v2.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ message StateAgentInfo {
169169
int32 pid = 6;
170170
// True when running as unprivileged.
171171
bool unprivileged = 7;
172+
// True when agent is managed by fleet
173+
bool isManaged = 8;
172174
}
173175

174176
// StateResponse is the current state of Elastic Agent.

internal/pkg/agent/application/coordinator/coordinator.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1304,7 +1304,7 @@ func (c *Coordinator) generateComponentModel() (err error) {
13041304
configInjector = c.monitorMgr.MonitoringConfig
13051305
}
13061306

1307-
var existingCompState = make(map[string]uint64, len(c.state.Components))
1307+
existingCompState := make(map[string]uint64, len(c.state.Components))
13081308
for _, comp := range c.state.Components {
13091309
existingCompState[comp.Component.ID] = comp.State.Pid
13101310
}

internal/pkg/agent/application/coordinator/coordinator_state.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@ func (c *Coordinator) refreshState() {
135135
// Coordinator state and sets stateNeedsRefresh.
136136
// Must be called on the main Coordinator goroutine.
137137
func (c *Coordinator) applyComponentState(state runtime.ComponentComponentState) {
138-
139138
// check for any component updates to the known PID, so we can update the component monitoring
140139
found := false
141140
for i, other := range c.state.Components {
@@ -168,7 +167,6 @@ func (c *Coordinator) applyComponentState(state runtime.ComponentComponentState)
168167
}
169168

170169
c.stateNeedsRefresh = true
171-
172170
}
173171

174172
// generateReportableState aggregates the internal state of the Coordinator

internal/pkg/agent/application/info/agent_id.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@ import (
2424
)
2525

2626
// defaultAgentConfigFile is a name of file used to store agent information
27-
const agentInfoKey = "agent"
28-
const defaultLogLevel = "info"
29-
const maxRetriesloadAgentInfo = 5
27+
const (
28+
agentInfoKey = "agent"
29+
defaultLogLevel = "info"
30+
maxRetriesloadAgentInfo = 5
31+
)
3032

3133
type persistentAgentInfo struct {
3234
ID string `json:"id" yaml:"id" config:"id"`

internal/pkg/agent/cmd/upgrade.go

Lines changed: 112 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"fmt"
1010
"os"
1111
"strings"
12+
"time"
1213

1314
"github.com/spf13/cobra"
1415
"google.golang.org/grpc/codes"
@@ -31,6 +32,14 @@ const (
3132
flagPGPBytes = "pgp"
3233
flagPGPBytesPath = "pgp-path"
3334
flagPGPBytesURI = "pgp-uri"
35+
flagForce = "force"
36+
)
37+
38+
var (
39+
unsupportedUpgradeError error = errors.New("this agent is fleet managed and must be upgraded using Fleet")
40+
nonRootExecutionError = errors.New("upgrade command needs to be executed as root for fleet managed agents")
41+
skipVerifyNotAllowedError = errors.New(fmt.Sprintf("\"%s\" flag is not allowed when upgrading a fleet managed agent using the cli", flagSkipVerify))
42+
skipVerifyNotRootError = errors.New(fmt.Sprintf("user needs to be root to use \"%s\" flag when upgrading standalone agents", flagSkipVerify))
3443
)
3544

3645
func newUpgradeCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command {
@@ -40,6 +49,7 @@ func newUpgradeCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Comman
4049
Long: "This command upgrades the currently installed Elastic Agent to the specified version.",
4150
Args: cobra.ExactArgs(1),
4251
Run: func(c *cobra.Command, args []string) {
52+
c.SetContext(context.Background())
4353
if err := upgradeCmd(streams, c, args); err != nil {
4454
fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage())
4555
os.Exit(1)
@@ -53,24 +63,119 @@ func newUpgradeCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Comman
5363
cmd.Flags().String(flagPGPBytes, "", "PGP to use for package verification")
5464
cmd.Flags().String(flagPGPBytesURI, "", "Path to a web location containing PGP to use for package verification")
5565
cmd.Flags().String(flagPGPBytesPath, "", "Path to a file containing PGP to use for package verification")
66+
cmd.Flags().BoolP(flagForce, "", false, "Advanced option to force an upgrade on a fleet managed agent")
67+
err := cmd.Flags().MarkHidden(flagForce)
68+
if err != nil {
69+
fmt.Fprintf(streams.Err, "error while setting upgrade force flag attributes: %s", err.Error())
70+
os.Exit(1)
71+
}
5672

5773
return cmd
5874
}
5975

76+
type upgradeInput struct {
77+
streams *cli.IOStreams
78+
cmd *cobra.Command
79+
args []string
80+
c client.Client
81+
agentInfo client.AgentStateInfo
82+
isRoot bool
83+
}
84+
6085
func upgradeCmd(streams *cli.IOStreams, cmd *cobra.Command, args []string) error {
6186
c := client.New()
62-
return upgradeCmdWithClient(streams, cmd, args, c)
87+
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
88+
defer cancel()
89+
90+
err := c.Connect(ctx)
91+
if err != nil {
92+
return errors.New(err, "failed communicating to running daemon", errors.TypeNetwork, errors.M("socket", control.Address()))
93+
}
94+
defer c.Disconnect()
95+
state, err := c.State(cmd.Context())
96+
if err != nil {
97+
return fmt.Errorf("error while trying to get agent state: %w", err)
98+
}
99+
100+
isRoot, err := utils.HasRoot()
101+
if err != nil {
102+
return fmt.Errorf("error while retrieving user permission: %w", err)
103+
}
104+
105+
input := &upgradeInput{
106+
streams,
107+
cmd,
108+
args,
109+
c,
110+
state.Info,
111+
isRoot,
112+
}
113+
return upgradeCmdWithClient(input)
114+
}
115+
116+
type upgradeCond struct {
117+
isManaged bool
118+
force bool
119+
isRoot bool
120+
skipVerify bool
63121
}
64122

65-
func upgradeCmdWithClient(streams *cli.IOStreams, cmd *cobra.Command, args []string, c client.Client) error {
66-
version := args[0]
123+
func checkUpgradable(cond upgradeCond) error {
124+
checkManaged := func() error {
125+
if !cond.force {
126+
return unsupportedUpgradeError
127+
}
128+
129+
if cond.skipVerify {
130+
return skipVerifyNotAllowedError
131+
}
132+
133+
if !cond.isRoot {
134+
return nonRootExecutionError
135+
}
136+
137+
return nil
138+
}
139+
140+
checkStandalone := func() error {
141+
if cond.skipVerify && !cond.isRoot {
142+
return skipVerifyNotRootError
143+
}
144+
return nil
145+
}
146+
147+
if cond.isManaged {
148+
return checkManaged()
149+
}
150+
151+
return checkStandalone()
152+
}
153+
154+
func upgradeCmdWithClient(input *upgradeInput) error {
155+
cmd := input.cmd
156+
c := input.c
157+
version := input.args[0]
67158
sourceURI, _ := cmd.Flags().GetString(flagSourceURI)
68159

69-
err := c.Connect(context.Background())
160+
force, err := cmd.Flags().GetBool(flagForce)
70161
if err != nil {
71-
return errors.New(err, "Failed communicating to running daemon", errors.TypeNetwork, errors.M("socket", control.Address()))
162+
return fmt.Errorf("failed to retrieve command flag information while trying to upgrade the agent: %w", err)
163+
}
164+
165+
skipVerification, err := cmd.Flags().GetBool(flagSkipVerify)
166+
if err != nil {
167+
return fmt.Errorf("failed to retrieve %s flag information while upgrading the agent: %w", flagSkipVerify, err)
168+
}
169+
170+
err = checkUpgradable(upgradeCond{
171+
isManaged: input.agentInfo.IsManaged,
172+
force: force,
173+
isRoot: input.isRoot,
174+
skipVerify: skipVerification,
175+
})
176+
if err != nil {
177+
return fmt.Errorf("aborting upgrade: %w", err)
72178
}
73-
defer c.Disconnect()
74179

75180
isBeingUpgraded, err := upgrade.IsInProgress(c, utils.GetWatcherPIDs)
76181
if err != nil {
@@ -80,7 +185,6 @@ func upgradeCmdWithClient(streams *cli.IOStreams, cmd *cobra.Command, args []str
80185
return errors.New("an upgrade is already in progress; please try again later.")
81186
}
82187

83-
skipVerification, _ := cmd.Flags().GetBool(flagSkipVerify)
84188
var pgpChecks []string
85189
if !skipVerification {
86190
// get local PGP
@@ -122,6 +226,6 @@ func upgradeCmdWithClient(streams *cli.IOStreams, cmd *cobra.Command, args []str
122226
return errors.New(err, "Failed trigger upgrade of daemon")
123227
}
124228
}
125-
fmt.Fprintf(streams.Out, "Upgrade triggered to version %s, Elastic Agent is currently restarting\n", version)
229+
fmt.Fprintf(input.streams.Out, "Upgrade triggered to version %s, Elastic Agent is currently restarting\n", version)
126230
return nil
127231
}

0 commit comments

Comments
 (0)