Skip to content

Commit

Permalink
Revert "Add /liveness endpoint to elastic-agent (#4499)" (#4583)
Browse files Browse the repository at this point in the history
This reverts commit 29ce53e.
  • Loading branch information
cmacknz authored Apr 16, 2024
1 parent 29ce53e commit eca5bc7
Show file tree
Hide file tree
Showing 22 changed files with 88 additions and 975 deletions.
13 changes: 0 additions & 13 deletions _meta/config/common.p2.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,6 @@ inputs:
# # The name of the output to use for monitoring data.
# use_output: monitoring
# # exposes agent metrics using http, by default sockets and named pipes are used
# #
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
# # 200: Agent is healthy
# # 500: A component or unit is in a failed state
# # 503: The agent coordinator is unresponsive
# #
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
# # The possible values for `failon` are:
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
# http:
# # enables http endpoint
# enabled: false
Expand Down
15 changes: 1 addition & 14 deletions _meta/config/common.reference.p2.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,7 @@ inputs:
# pprof.enabled: false
# # The name of the output to use for monitoring data.
# use_output: monitoring
# # Exposes agent metrics using http, by default sockets and named pipes are used.
# #
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
# # 200: Agent is healthy
# # 500: A component or unit is in a failed state
# # 503: The agent coordinator is unresponsive
# #
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
# # The possible values for `failon` are:
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
# # exposes agent metrics using http, by default sockets and named pipes are used
# http:
# # enables http endpoint
# enabled: false
Expand Down
13 changes: 0 additions & 13 deletions _meta/config/elastic-agent.docker.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -116,19 +116,6 @@ inputs:
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
# pprof.enabled: false
# # exposes agent metrics using http, by default sockets and named pipes are used
# #
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
# # 200: Agent is healthy
# # 500: A component or unit is in a failed state
# # 503: The agent coordinator is unresponsive
# #
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
# # The possible values for `failon` are:
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
# http:
# # enables http endpoint
# enabled: false
Expand Down
10 changes: 1 addition & 9 deletions _meta/elastic-agent.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,7 @@ inputs:
# logs: false
# # enables metrics monitoring
# metrics: false
# # Exposes agent metrics using http, by default sockets and named pipes are used.
# # Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
# # 200: Agent is healthy
# # 500: A component or unit is in a failed state
# # 503: The agent coordinator is unresponsive
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
# # The two possible values for `failon` are `degraded` and `failed`. If no `failon` parameter is provided, the default
# # behavior is `failon=failed`
# # exposes agent metrics using http, by default sockets and named pipes are used
# http:
# # enables http endpoint
# enabled: false
Expand Down
32 changes: 0 additions & 32 deletions changelog/fragments/1711653910-add-liveness-endpoint.yaml

This file was deleted.

13 changes: 0 additions & 13 deletions elastic-agent.docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,19 +116,6 @@ inputs:
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
# pprof.enabled: false
# # exposes agent metrics using http, by default sockets and named pipes are used
# #
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
# # 200: Agent is healthy
# # 500: A component or unit is in a failed state
# # 503: The agent coordinator is unresponsive
# #
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
# # The possible values for `failon` are:
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
# http:
# # enables http endpoint
# enabled: false
Expand Down
15 changes: 1 addition & 14 deletions elastic-agent.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,20 +150,7 @@ inputs:
# pprof.enabled: false
# # The name of the output to use for monitoring data.
# use_output: monitoring
# # Exposes agent metrics using http, by default sockets and named pipes are used.
# #
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
# # 200: Agent is healthy
# # 500: A component or unit is in a failed state
# # 503: The agent coordinator is unresponsive
# #
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
# # The possible values for `failon` are:
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
# # exposes agent metrics using http, by default sockets and named pipes are used
# http:
# # enables http endpoint
# enabled: false
Expand Down
13 changes: 0 additions & 13 deletions elastic-agent.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,6 @@ inputs:
# # The name of the output to use for monitoring data.
# use_output: monitoring
# # exposes agent metrics using http, by default sockets and named pipes are used
# #
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
# # 200: Agent is healthy
# # 500: A component or unit is in a failed state
# # 503: The agent coordinator is unresponsive
# #
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
# # The possible values for `failon` are:
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
# http:
# # enables http endpoint
# enabled: false
Expand Down
24 changes: 0 additions & 24 deletions internal/pkg/agent/application/coordinator/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,11 +279,6 @@ type Coordinator struct {

// mx sync.RWMutex
// protection protection.Config

// a sync channel that can be called by other components to check if the main coordinator
// loop in runLoopIteration() is active and listening.
// Should only be interacted with via CoordinatorActive() or runLoopIteration()
heartbeatChan chan struct{}
}

// The channels Coordinator reads to receive updates from the various managers.
Expand Down Expand Up @@ -377,7 +372,6 @@ func New(logger *logger.Logger, cfg *configuration.Configuration, logLevel logp.
logLevelCh: make(chan logp.Level),
overrideStateChan: make(chan *coordinatorOverrideState),
upgradeDetailsChan: make(chan *details.Details),
heartbeatChan: make(chan struct{}),
}
// Setup communication channels for any non-nil components. This pattern
// lets us transparently accept nil managers / simulated events during
Expand Down Expand Up @@ -418,22 +412,6 @@ func (c *Coordinator) State() State {
return c.stateBroadcaster.Get()
}

// CoordinatorActive is a blocking method that waits for a channel response
// from the coordinator loop. This can be used to as a basic health check,
// as we'll timeout and return false if the coordinator run loop doesn't
// respond to our channel.
func (c *Coordinator) CoordinatorActive(timeout time.Duration) bool {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

select {
case <-c.heartbeatChan:
return true
case <-ctx.Done():
return false
}
}

func (c *Coordinator) RegisterMonitoringServer(s configReloader) {
c.monitoringServerReloader = s
}
Expand Down Expand Up @@ -999,8 +977,6 @@ func (c *Coordinator) runLoopIteration(ctx context.Context) {
case upgradeDetails := <-c.upgradeDetailsChan:
c.setUpgradeDetails(upgradeDetails)

case c.heartbeatChan <- struct{}{}:

case componentState := <-c.managerChans.runtimeManagerUpdate:
// New component change reported by the runtime manager via
// Coordinator.watchRuntimeComponents(), merge it with the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"context"
"errors"
"fmt"
"net"
"testing"
"time"

Expand Down Expand Up @@ -571,7 +570,7 @@ func TestCoordinatorPolicyChangeUpdatesMonitorReloader(t *testing.T) {
}

monitoringServer := &fakeMonitoringServer{}
newServerFn := func(*monitoringCfg.MonitoringConfig) (reload.ServerController, error) {
newServerFn := func() (reload.ServerController, error) {
return monitoringServer, nil
}
monitoringReloader := reload.NewServerReloader(newServerFn, logger, monitoringCfg.DefaultConfig())
Expand Down Expand Up @@ -1055,7 +1054,3 @@ func (fs *fakeMonitoringServer) Reset() {
fs.stopTriggered = false
fs.startTriggered = false
}

func (fs *fakeMonitoringServer) Addr() net.Addr {
return nil
}
12 changes: 1 addition & 11 deletions internal/pkg/agent/application/monitoring/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ import (
"encoding/json"
"fmt"
"net/http"
"time"

"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
)

const errTypeUnexpected = "UNEXPECTED"
Expand All @@ -19,13 +16,6 @@ type apiError interface {
Status() int
}

// CoordinatorState is used by the HTTP handlers that take a coordinator object.
// This interface exists to help make testing easier.
type CoordinatorState interface {
State() coordinator.State
CoordinatorActive(timeout time.Duration) bool
}

func createHandler(fn func(w http.ResponseWriter, r *http.Request) error) *apiHandler {
return &apiHandler{
innerFn: fn,
Expand All @@ -40,7 +30,7 @@ type apiHandler struct {
func (h *apiHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
err := h.innerFn(w, r)
if err != nil {
switch e := err.(type) { //nolint:errorlint // Will need refactor.
switch e := err.(type) { // nolint:errorlint // Will need refactor.
case apiError:
w.WriteHeader(e.Status())
default:
Expand Down
88 changes: 0 additions & 88 deletions internal/pkg/agent/application/monitoring/liveness.go

This file was deleted.

Loading

0 comments on commit eca5bc7

Please sign in to comment.