Skip to content

Commit e0b95b9

Browse files
authored
feat: add support for health-check flag (GoogleCloudPlatform#85)
This is an adaptation of GoogleCloudPlatform/cloud-sql-proxy#1271.
1 parent 1e7b33e commit e0b95b9

File tree

7 files changed

+596
-51
lines changed

7 files changed

+596
-51
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ and from a AlloyDB instance. The `ALL_PROXY` environment variable supports
271271
`socks5h` protocol.
272272

273273
The `HTTPS_PROXY` (or `HTTP_PROXY`) specifies the proxy for all HTTP(S) traffic
274-
to the SQL Admin API. Specifying `HTTPS_PROXY` or `HTTP_PROXY` is only necessary
274+
to the AlloyDB Admin API. Specifying `HTTPS_PROXY` or `HTTP_PROXY` is only necessary
275275
when you want to proxy this traffic. Otherwise, it is optional. See
276276
[`http.ProxyFromEnvironment`](https://pkg.go.dev/net/http@go1.17.3#ProxyFromEnvironment)
277277
for possible values.

cmd/root.go

Lines changed: 61 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"contrib.go.opencensus.io/exporter/prometheus"
3333
"contrib.go.opencensus.io/exporter/stackdriver"
3434
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/alloydb"
35+
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/internal/healthcheck"
3536
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/internal/log"
3637
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/internal/proxy"
3738
"github.com/spf13/cobra"
@@ -88,6 +89,7 @@ type Command struct {
8889
telemetryProject string
8990
telemetryPrefix string
9091
prometheusNamespace string
92+
healthCheck bool
9193
httpPort string
9294
}
9395

@@ -186,6 +188,10 @@ the maximum time has passed. Defaults to 0s.`)
186188
"Enable Prometheus for metric collection using the provided namespace")
187189
cmd.PersistentFlags().StringVar(&c.httpPort, "http-port", "9090",
188190
"Port for the Prometheus server to use")
191+
cmd.PersistentFlags().BoolVar(&c.healthCheck, "health-check", false,
192+
`Enables HTTP endpoints /startup, /liveness, and /readiness
193+
that report on the proxy's health. Endpoints are available on localhost
194+
only. Uses the port specified by the http-port flag.`)
189195

190196
// Global and per instance flags
191197
cmd.PersistentFlags().StringVarP(&c.conf.Addr, "address", "a", "127.0.0.1",
@@ -241,18 +247,18 @@ func parseConfig(cmd *Command, conf *proxy.Config, args []string) error {
241247
cmd.logger.Infof("Using API Endpoint %v", conf.APIEndpointURL)
242248
}
243249

244-
if userHasSet("http-port") && !userHasSet("prometheus-namespace") {
245-
return newBadCommandError("cannot specify --http-port without --prometheus-namespace")
250+
if userHasSet("http-port") && !userHasSet("prometheus-namespace") && !userHasSet("health-check") {
251+
cmd.logger.Infof("Ignoring --http-port because --prometheus-namespace or --health-check was not set")
246252
}
247253

248254
if !userHasSet("telemetry-project") && userHasSet("telemetry-prefix") {
249-
cmd.logger.Infof("Ignoring telementry-prefix as telemetry-project was not set")
255+
cmd.logger.Infof("Ignoring --telementry-prefix as --telemetry-project was not set")
250256
}
251257
if !userHasSet("telemetry-project") && userHasSet("disable-metrics") {
252-
cmd.logger.Infof("Ignoring disable-metrics as telemetry-project was not set")
258+
cmd.logger.Infof("Ignoring --disable-metrics as --telemetry-project was not set")
253259
}
254260
if !userHasSet("telemetry-project") && userHasSet("disable-traces") {
255-
cmd.logger.Infof("Ignoring disable-traces as telemetry-project was not set")
261+
cmd.logger.Infof("Ignoring --disable-traces as --telemetry-project was not set")
256262
}
257263

258264
var ics []proxy.InstanceConnConfig
@@ -328,9 +334,8 @@ func runSignalWrapper(cmd *Command) error {
328334
ctx, cancel := context.WithCancel(cmd.Context())
329335
defer cancel()
330336

331-
// Configure Cloud Trace and/or Cloud Monitoring based on command
332-
// invocation. If a project has not been enabled, no traces or metrics are
333-
// enabled.
337+
// Configure collectors before the proxy has started to ensure we are
338+
// collecting metrics before *ANY* AlloyDB Admin API calls are made.
334339
enableMetrics := !cmd.disableMetrics
335340
enableTraces := !cmd.disableTraces
336341
if cmd.telemetryProject != "" && (enableMetrics || enableTraces) {
@@ -358,40 +363,22 @@ func runSignalWrapper(cmd *Command) error {
358363
}()
359364
}
360365

361-
shutdownCh := make(chan error)
362-
366+
var (
367+
needsHTTPServer bool
368+
mux = http.NewServeMux()
369+
)
363370
if cmd.prometheusNamespace != "" {
371+
needsHTTPServer = true
364372
e, err := prometheus.NewExporter(prometheus.Options{
365373
Namespace: cmd.prometheusNamespace,
366374
})
367375
if err != nil {
368376
return err
369377
}
370-
mux := http.NewServeMux()
371378
mux.Handle("/metrics", e)
372-
addr := fmt.Sprintf("localhost:%s", cmd.httpPort)
373-
server := &http.Server{Addr: addr, Handler: mux}
374-
go func() {
375-
select {
376-
case <-ctx.Done():
377-
// Give the HTTP server a second to shutdown cleanly.
378-
ctx2, _ := context.WithTimeout(context.Background(), time.Second)
379-
if err := server.Shutdown(ctx2); err != nil {
380-
cmd.logger.Errorf("failed to shutdown Prometheus HTTP server: %v\n", err)
381-
}
382-
}
383-
}()
384-
go func() {
385-
err := server.ListenAndServe()
386-
if err == http.ErrServerClosed {
387-
return
388-
}
389-
if err != nil {
390-
shutdownCh <- fmt.Errorf("failed to start prometheus HTTP server: %v", err)
391-
}
392-
}()
393379
}
394380

381+
shutdownCh := make(chan error)
395382
// watch for sigterm / sigint signals
396383
signals := make(chan os.Signal, 1)
397384
signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT)
@@ -429,18 +416,55 @@ func runSignalWrapper(cmd *Command) error {
429416
cmd.logger.Errorf("The proxy has encountered a terminal error: %v", err)
430417
return err
431418
case p = <-startCh:
419+
cmd.logger.Infof("The proxy has started successfully and is ready for new connections!")
432420
}
433-
cmd.logger.Infof("The proxy has started successfully and is ready for new connections!")
434-
defer p.Close()
435421
defer func() {
436422
if cErr := p.Close(); cErr != nil {
437423
cmd.logger.Errorf("error during shutdown: %v", cErr)
438424
}
439425
}()
440426

441-
go func() {
442-
shutdownCh <- p.Serve(ctx)
443-
}()
427+
notify := func() {}
428+
if cmd.healthCheck {
429+
needsHTTPServer = true
430+
hc := healthcheck.NewCheck(p, cmd.logger)
431+
mux.HandleFunc("/startup", hc.HandleStartup)
432+
mux.HandleFunc("/readiness", hc.HandleReadiness)
433+
mux.HandleFunc("/liveness", hc.HandleLiveness)
434+
notify = hc.NotifyStarted
435+
}
436+
437+
// Start the HTTP server if anything requiring HTTP is specified.
438+
if needsHTTPServer {
439+
server := &http.Server{
440+
Addr: fmt.Sprintf("localhost:%s", cmd.httpPort),
441+
Handler: mux,
442+
}
443+
// Start the HTTP server.
444+
go func() {
445+
err := server.ListenAndServe()
446+
if err == http.ErrServerClosed {
447+
return
448+
}
449+
if err != nil {
450+
shutdownCh <- fmt.Errorf("failed to start HTTP server: %v", err)
451+
}
452+
}()
453+
// Handle shutdown of the HTTP server gracefully.
454+
go func() {
455+
select {
456+
case <-ctx.Done():
457+
// Give the HTTP server a second to shutdown cleanly.
458+
ctx2, cancel := context.WithTimeout(context.Background(), time.Second)
459+
defer cancel()
460+
if err := server.Shutdown(ctx2); err != nil {
461+
cmd.logger.Errorf("failed to shutdown Prometheus HTTP server: %v\n", err)
462+
}
463+
}
464+
}()
465+
}
466+
467+
go func() { shutdownCh <- p.Serve(ctx, notify) }()
444468

445469
err := <-shutdownCh
446470
switch {

cmd/root_test.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,10 +350,6 @@ func TestNewCommandWithErrors(t *testing.T) {
350350
desc: "using the unix socket and port query params",
351351
args: []string{"projects/proj/locations/region/clusters/clust/instances/inst?unix-socket=/path&port=5000"},
352352
},
353-
{
354-
desc: "enabling a Prometheus port without a namespace",
355-
args: []string{"--http-port", "1111", "proj:region:inst"},
356-
},
357353
{
358354
desc: "using an invalid url for host flag",
359355
args: []string{"--host", "https://invalid:url[/]", "proj:region:inst"},
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// Copyright 2022 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// Package healthcheck tests and communicates the health of the AlloyDB Auth
16+
// proxy.
17+
package healthcheck
18+
19+
import (
20+
"context"
21+
"errors"
22+
"fmt"
23+
"net/http"
24+
"sync"
25+
26+
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/alloydb"
27+
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/internal/proxy"
28+
)
29+
30+
// Check provides HTTP handlers for use as healthchecks typically in a
31+
// Kubernetes context.
32+
type Check struct {
33+
once *sync.Once
34+
started chan struct{}
35+
proxy *proxy.Client
36+
logger alloydb.Logger
37+
}
38+
39+
// NewCheck is the initializer for Check.
40+
func NewCheck(p *proxy.Client, l alloydb.Logger) *Check {
41+
return &Check{
42+
once: &sync.Once{},
43+
started: make(chan struct{}),
44+
proxy: p,
45+
logger: l,
46+
}
47+
}
48+
49+
// NotifyStarted notifies the check that the proxy has started up successfully.
50+
func (c *Check) NotifyStarted() {
51+
c.once.Do(func() { close(c.started) })
52+
}
53+
54+
// HandleStartup reports whether the Check has been notified of startup.
55+
func (c *Check) HandleStartup(w http.ResponseWriter, _ *http.Request) {
56+
select {
57+
case <-c.started:
58+
w.WriteHeader(http.StatusOK)
59+
w.Write([]byte("ok"))
60+
default:
61+
w.WriteHeader(http.StatusServiceUnavailable)
62+
w.Write([]byte("error"))
63+
}
64+
}
65+
66+
var errNotStarted = errors.New("proxy is not started")
67+
68+
// HandleReadiness ensures the Check has been notified of successful startup,
69+
// that the proxy has not reached maximum connections, and that all connections
70+
// are healthy.
71+
func (c *Check) HandleReadiness(w http.ResponseWriter, _ *http.Request) {
72+
ctx, cancel := context.WithCancel(context.Background())
73+
defer cancel()
74+
75+
select {
76+
case <-c.started:
77+
default:
78+
c.logger.Errorf("[Health Check] Readiness failed: %v", errNotStarted)
79+
w.WriteHeader(http.StatusServiceUnavailable)
80+
w.Write([]byte(errNotStarted.Error()))
81+
return
82+
}
83+
84+
if open, max := c.proxy.ConnCount(); max > 0 && open == max {
85+
err := fmt.Errorf("max connections reached (open = %v, max = %v)", open, max)
86+
c.logger.Errorf("[Health Check] Readiness failed: %v", err)
87+
w.WriteHeader(http.StatusServiceUnavailable)
88+
w.Write([]byte(err.Error()))
89+
return
90+
}
91+
92+
err := c.proxy.CheckConnections(ctx)
93+
if err != nil {
94+
c.logger.Errorf("[Health Check] Readiness failed: %v", err)
95+
w.WriteHeader(http.StatusServiceUnavailable)
96+
w.Write([]byte(err.Error()))
97+
return
98+
}
99+
100+
w.WriteHeader(http.StatusOK)
101+
w.Write([]byte("ok"))
102+
}
103+
104+
// HandleLiveness indicates the process is up and responding to HTTP requests.
105+
// If this check fails (because it's not reachable), the process is in a bad
106+
// state and should be restarted.
107+
func (c *Check) HandleLiveness(w http.ResponseWriter, _ *http.Request) {
108+
w.WriteHeader(http.StatusOK)
109+
w.Write([]byte("ok"))
110+
}

0 commit comments

Comments
 (0)