Skip to content

Commit dab33d0

Browse files
committed
feat: add support for health-check flag
This is an adaptation of GoogleCloudPlatform/cloud-sql-proxy#1271.
1 parent 1e7b33e commit dab33d0

File tree

6 files changed

+596
-50
lines changed

6 files changed

+596
-50
lines changed

cmd/root.go

Lines changed: 63 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"contrib.go.opencensus.io/exporter/prometheus"
3333
"contrib.go.opencensus.io/exporter/stackdriver"
3434
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/alloydb"
35+
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/internal/healthcheck"
3536
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/internal/log"
3637
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/internal/proxy"
3738
"github.com/spf13/cobra"
@@ -88,6 +89,7 @@ type Command struct {
8889
telemetryProject string
8990
telemetryPrefix string
9091
prometheusNamespace string
92+
healthCheck bool
9193
httpPort string
9294
}
9395

@@ -186,6 +188,10 @@ the maximum time has passed. Defaults to 0s.`)
186188
"Enable Prometheus for metric collection using the provided namespace")
187189
cmd.PersistentFlags().StringVar(&c.httpPort, "http-port", "9090",
188190
"Port for the Prometheus server to use")
191+
cmd.PersistentFlags().BoolVar(&c.healthCheck, "health-check", false,
192+
`Enables HTTP endpoints /startup, /liveness, and /readiness
193+
that report on the proxy's health. Endpoints are available on localhost
194+
only. Uses the port specified by the http-port flag.`)
189195

190196
// Global and per instance flags
191197
cmd.PersistentFlags().StringVarP(&c.conf.Addr, "address", "a", "127.0.0.1",
@@ -241,18 +247,18 @@ func parseConfig(cmd *Command, conf *proxy.Config, args []string) error {
241247
cmd.logger.Infof("Using API Endpoint %v", conf.APIEndpointURL)
242248
}
243249

244-
if userHasSet("http-port") && !userHasSet("prometheus-namespace") {
245-
return newBadCommandError("cannot specify --http-port without --prometheus-namespace")
250+
if userHasSet("http-port") && !userHasSet("prometheus-namespace") && !userHasSet("health-check") {
251+
cmd.logger.Infof("Ignoring --http-port because --prometheus-namespace or --health-check was not set")
246252
}
247253

248254
if !userHasSet("telemetry-project") && userHasSet("telemetry-prefix") {
249-
cmd.logger.Infof("Ignoring telementry-prefix as telemetry-project was not set")
255+
cmd.logger.Infof("Ignoring --telementry-prefix as --telemetry-project was not set")
250256
}
251257
if !userHasSet("telemetry-project") && userHasSet("disable-metrics") {
252-
cmd.logger.Infof("Ignoring disable-metrics as telemetry-project was not set")
258+
cmd.logger.Infof("Ignoring --disable-metrics as --telemetry-project was not set")
253259
}
254260
if !userHasSet("telemetry-project") && userHasSet("disable-traces") {
255-
cmd.logger.Infof("Ignoring disable-traces as telemetry-project was not set")
261+
cmd.logger.Infof("Ignoring --disable-traces as --telemetry-project was not set")
256262
}
257263

258264
var ics []proxy.InstanceConnConfig
@@ -328,9 +334,8 @@ func runSignalWrapper(cmd *Command) error {
328334
ctx, cancel := context.WithCancel(cmd.Context())
329335
defer cancel()
330336

331-
// Configure Cloud Trace and/or Cloud Monitoring based on command
332-
// invocation. If a project has not been enabled, no traces or metrics are
333-
// enabled.
337+
// Configure collectors before the proxy has started to ensure we are
338+
// collecting metrics before *ANY* Cloud SQL Admin API calls are made.
334339
enableMetrics := !cmd.disableMetrics
335340
enableTraces := !cmd.disableTraces
336341
if cmd.telemetryProject != "" && (enableMetrics || enableTraces) {
@@ -358,40 +363,22 @@ func runSignalWrapper(cmd *Command) error {
358363
}()
359364
}
360365

361-
shutdownCh := make(chan error)
362-
366+
var (
367+
needsHTTPServer bool
368+
mux = http.NewServeMux()
369+
)
363370
if cmd.prometheusNamespace != "" {
371+
needsHTTPServer = true
364372
e, err := prometheus.NewExporter(prometheus.Options{
365373
Namespace: cmd.prometheusNamespace,
366374
})
367375
if err != nil {
368376
return err
369377
}
370-
mux := http.NewServeMux()
371378
mux.Handle("/metrics", e)
372-
addr := fmt.Sprintf("localhost:%s", cmd.httpPort)
373-
server := &http.Server{Addr: addr, Handler: mux}
374-
go func() {
375-
select {
376-
case <-ctx.Done():
377-
// Give the HTTP server a second to shutdown cleanly.
378-
ctx2, _ := context.WithTimeout(context.Background(), time.Second)
379-
if err := server.Shutdown(ctx2); err != nil {
380-
cmd.logger.Errorf("failed to shutdown Prometheus HTTP server: %v\n", err)
381-
}
382-
}
383-
}()
384-
go func() {
385-
err := server.ListenAndServe()
386-
if err == http.ErrServerClosed {
387-
return
388-
}
389-
if err != nil {
390-
shutdownCh <- fmt.Errorf("failed to start prometheus HTTP server: %v", err)
391-
}
392-
}()
393379
}
394380

381+
shutdownCh := make(chan error)
395382
// watch for sigterm / sigint signals
396383
signals := make(chan os.Signal, 1)
397384
signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT)
@@ -429,18 +416,57 @@ func runSignalWrapper(cmd *Command) error {
429416
cmd.logger.Errorf("The proxy has encountered a terminal error: %v", err)
430417
return err
431418
case p = <-startCh:
419+
cmd.logger.Infof("The proxy has started successfully and is ready for new connections!")
432420
}
433-
cmd.logger.Infof("The proxy has started successfully and is ready for new connections!")
434-
defer p.Close()
435421
defer func() {
436422
if cErr := p.Close(); cErr != nil {
437423
cmd.logger.Errorf("error during shutdown: %v", cErr)
438424
}
439425
}()
440426

441-
go func() {
442-
shutdownCh <- p.Serve(ctx)
443-
}()
427+
notify := func() {}
428+
if cmd.healthCheck {
429+
needsHTTPServer = true
430+
hc := healthcheck.NewCheck(p, cmd.logger)
431+
mux.HandleFunc("/startup", hc.HandleStartup)
432+
mux.HandleFunc("/readiness", hc.HandleReadiness)
433+
mux.HandleFunc("/liveness", hc.HandleLiveness)
434+
notify = hc.NotifyStarted
435+
}
436+
437+
// Start the HTTP server if anything requiring HTTP is specified, including:
438+
// Prometheus, health-check
439+
// enabled.
440+
if needsHTTPServer {
441+
server := &http.Server{
442+
Addr: fmt.Sprintf("localhost:%s", cmd.httpPort),
443+
Handler: mux,
444+
}
445+
// Start the HTTP server.
446+
go func() {
447+
err := server.ListenAndServe()
448+
if err == http.ErrServerClosed {
449+
return
450+
}
451+
if err != nil {
452+
shutdownCh <- fmt.Errorf("failed to start HTTP server: %v", err)
453+
}
454+
}()
455+
// Handle shutdown of the HTTP server gracefully.
456+
go func() {
457+
select {
458+
case <-ctx.Done():
459+
// Give the HTTP server a second to shutdown cleanly.
460+
ctx2, cancel := context.WithTimeout(context.Background(), time.Second)
461+
defer cancel()
462+
if err := server.Shutdown(ctx2); err != nil {
463+
cmd.logger.Errorf("failed to shutdown Prometheus HTTP server: %v\n", err)
464+
}
465+
}
466+
}()
467+
}
468+
469+
go func() { shutdownCh <- p.Serve(ctx, notify) }()
444470

445471
err := <-shutdownCh
446472
switch {

cmd/root_test.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,10 +350,6 @@ func TestNewCommandWithErrors(t *testing.T) {
350350
desc: "using the unix socket and port query params",
351351
args: []string{"projects/proj/locations/region/clusters/clust/instances/inst?unix-socket=/path&port=5000"},
352352
},
353-
{
354-
desc: "enabling a Prometheus port without a namespace",
355-
args: []string{"--http-port", "1111", "proj:region:inst"},
356-
},
357353
{
358354
desc: "using an invalid url for host flag",
359355
args: []string{"--host", "https://invalid:url[/]", "proj:region:inst"},
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Copyright 2022 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// Package healthcheck tests and communicates the health of the Cloud SQL Auth proxy.
16+
package healthcheck
17+
18+
import (
19+
"context"
20+
"errors"
21+
"fmt"
22+
"net/http"
23+
"sync"
24+
25+
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/alloydb"
26+
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/internal/proxy"
27+
)
28+
29+
// Check provides HTTP handlers for use as healthchecks typically in a
30+
// Kubernetes context.
31+
type Check struct {
32+
once *sync.Once
33+
started chan struct{}
34+
proxy *proxy.Client
35+
logger alloydb.Logger
36+
}
37+
38+
// NewCheck is the initializer for Check.
39+
func NewCheck(p *proxy.Client, l alloydb.Logger) *Check {
40+
return &Check{
41+
once: &sync.Once{},
42+
started: make(chan struct{}),
43+
proxy: p,
44+
logger: l,
45+
}
46+
}
47+
48+
// NotifyStarted notifies the check that the proxy has started up successfully.
49+
func (c *Check) NotifyStarted() {
50+
c.once.Do(func() { close(c.started) })
51+
}
52+
53+
// HandleStartup reports whether the Check has been notified of startup.
54+
func (c *Check) HandleStartup(w http.ResponseWriter, _ *http.Request) {
55+
select {
56+
case <-c.started:
57+
w.WriteHeader(http.StatusOK)
58+
w.Write([]byte("ok"))
59+
default:
60+
w.WriteHeader(http.StatusServiceUnavailable)
61+
w.Write([]byte("error"))
62+
}
63+
}
64+
65+
var errNotStarted = errors.New("proxy is not started")
66+
67+
// HandleReadiness ensures the Check has been notified of successful startup,
68+
// that the proxy has not reached maximum connections, and that all connections
69+
// are healthy.
70+
func (c *Check) HandleReadiness(w http.ResponseWriter, _ *http.Request) {
71+
ctx, cancel := context.WithCancel(context.Background())
72+
defer cancel()
73+
74+
select {
75+
case <-c.started:
76+
default:
77+
c.logger.Errorf("[Health Check] Readiness failed: %v", errNotStarted)
78+
w.WriteHeader(http.StatusServiceUnavailable)
79+
w.Write([]byte(errNotStarted.Error()))
80+
return
81+
}
82+
83+
if open, max := c.proxy.ConnCount(); max > 0 && open == max {
84+
err := fmt.Errorf("max connections reached (open = %v, max = %v)", open, max)
85+
c.logger.Errorf("[Health Check] Readiness failed: %v", err)
86+
w.WriteHeader(http.StatusServiceUnavailable)
87+
w.Write([]byte(err.Error()))
88+
return
89+
}
90+
91+
err := c.proxy.CheckConnections(ctx)
92+
if err != nil {
93+
c.logger.Errorf("[Health Check] Readiness failed: %v", err)
94+
w.WriteHeader(http.StatusServiceUnavailable)
95+
w.Write([]byte(err.Error()))
96+
return
97+
}
98+
99+
w.WriteHeader(http.StatusOK)
100+
w.Write([]byte("ok"))
101+
}
102+
103+
// HandleLiveness indicates the process is up and responding to HTTP requests.
104+
// If this check fails (because it's not reachable), the process is in a bad
105+
// state and should be restarted.
106+
func (c *Check) HandleLiveness(w http.ResponseWriter, _ *http.Request) {
107+
w.WriteHeader(http.StatusOK)
108+
w.Write([]byte("ok"))
109+
}

0 commit comments

Comments
 (0)