Skip to content

Commit

Permalink
Merge pull request G-Research#40 from G-Research/healthchecks
Browse files Browse the repository at this point in the history
Makes the existing Info healthcheck used by Thanos call the query API to assert that OpenTSDB is ready to query.
Adds grpc healthcheck service endpoint.
Adds grpc_health_probe to docker image, using wget.
  • Loading branch information
robincw-gr authored Oct 2, 2019
2 parents a9fccfe + 5fbabb5 commit f0491cb
Show file tree
Hide file tree
Showing 6 changed files with 362 additions and 6 deletions.
7 changes: 5 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ RUN go build -mod=vendor -ldflags '-extldflags "-static"' -o geras ./cmd/geras/m

# final stage
FROM alpine
WORKDIR /app
COPY --from=build-env /src/geras /app/
WORKDIR /bin
COPY --from=build-env /src/geras /bin/
RUN GRPC_HEALTH_PROBE_VERSION=v0.3.0 && \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64 && \
chmod +x /bin/grpc_health_probe
ENTRYPOINT ["./geras"]
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ After the build you will have a self-contained binary (`geras`). It writes logs
Log format. One of [logfmt, json] (default "logfmt")
-log.level string
Log filtering level. One of [debug, info, warn, error] (default "error")
-healthcheck-metric
A metric to query as a readiness health check (default "tsd.rpc.recieved")
-metrics-refresh-interval duration
Time between metric name refreshes. Use negative duration to disable refreshes. (default 15m0s)
-metrics-suggestions
Expand Down
5 changes: 4 additions & 1 deletion cmd/geras/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/thanos-io/thanos/pkg/store/storepb"
"golang.org/x/net/trace"
"google.golang.org/grpc"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
)

func NewConfiguredLogger(format string, logLevel string) (log.Logger, error) {
Expand Down Expand Up @@ -97,6 +98,7 @@ func main() {
traceDumpBody := flag.Bool("trace-dumpbody", false, "Include TSDB request and response bodies in traces (can be expensive)")
logFormat := flag.String("log.format", "logfmt", "Log format. One of [logfmt, json]")
logLevel := flag.String("log.level", "error", "Log filtering level. One of [debug, info, warn, error]")
healthcheckMetric := flag.String("healthcheck-metric", "tsd.rpc.received", "A metric to query as a readiness health check.")
openTSDBAddress := flag.String("opentsdb-address", "", "<host>:<port>")
refreshInterval := flag.Duration("metrics-refresh-interval", time.Minute*15,
"Time between metric name refreshes. Use negative duration to disable refreshes.")
Expand Down Expand Up @@ -170,9 +172,10 @@ func main() {
http.Handle("/metrics", promhttp.Handler())

// create openTSDBStore and expose its api on a grpc server
srv := store.NewOpenTSDBStore(logger, client, prometheus.DefaultRegisterer, *refreshInterval, storeLabels, allowedMetricNames, blockedMetricNames, *enableMetricSuggestions)
srv := store.NewOpenTSDBStore(logger, client, prometheus.DefaultRegisterer, *refreshInterval, storeLabels, allowedMetricNames, blockedMetricNames, *enableMetricSuggestions, *healthcheckMetric)
grpcSrv := grpc.NewServer()
storepb.RegisterStoreServer(grpcSrv, srv)
healthpb.RegisterHealthServer(grpcSrv, srv)
l, err := net.Listen("tcp", *grpcListenAddr)
if err != nil {
level.Error(logger).Log("err", err)
Expand Down
26 changes: 23 additions & 3 deletions pkg/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/thanos-io/thanos/pkg/store/storepb"
"golang.org/x/net/trace"
"google.golang.org/grpc/codes"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/status"

opentsdb "github.com/G-Research/opentsdb-goclient/client"
Expand All @@ -33,9 +34,10 @@ type OpenTSDBStore struct {
allowedMetricNames, blockedMetricNames *regexp.Regexp
enableMetricSuggestions bool
storeLabels []storepb.Label
healthcheckMetric string
}

func NewOpenTSDBStore(logger log.Logger, client opentsdb.ClientContext, reg prometheus.Registerer, interval time.Duration, storeLabels []storepb.Label, allowedMetricNames, blockedMetricNames *regexp.Regexp, enableMetricSuggestions bool) *OpenTSDBStore {
func NewOpenTSDBStore(logger log.Logger, client opentsdb.ClientContext, reg prometheus.Registerer, interval time.Duration, storeLabels []storepb.Label, allowedMetricNames, blockedMetricNames *regexp.Regexp, enableMetricSuggestions bool, healthcheckMetric string) *OpenTSDBStore {
store := &OpenTSDBStore{
logger: log.With(logger, "component", "opentsdb"),
openTSDBClient: client,
Expand All @@ -45,6 +47,7 @@ func NewOpenTSDBStore(logger log.Logger, client opentsdb.ClientContext, reg prom
storeLabels: storeLabels,
allowedMetricNames: allowedMetricNames,
blockedMetricNames: blockedMetricNames,
healthcheckMetric: healthcheckMetric,
}
store.updateMetrics(context.Background(), logger)
return store
Expand Down Expand Up @@ -119,13 +122,30 @@ func (store *OpenTSDBStore) Info(
Labels: store.storeLabels,
}
var err error
store.timedTSDBOp("version", func() error {
_, err = store.openTSDBClient.WithContext(ctx).Version()
store.timedTSDBOp("query", func() error {
now := time.Now().Unix()
q := opentsdb.QueryParam{
Start: now,
End: now + 1,
Queries: []opentsdb.SubQuery{{
Metric: store.healthcheckMetric,
Aggregator: "sum",
}},
}
_, err = store.openTSDBClient.WithContext(ctx).Query(q)
return err
})
return &res, err
}

func (store OpenTSDBStore) Check(ctx context.Context, req *healthpb.HealthCheckRequest) (*healthpb.HealthCheckResponse, error) {
return &healthpb.HealthCheckResponse{Status: healthpb.HealthCheckResponse_SERVING}, nil
}

func (store OpenTSDBStore) Watch(req *healthpb.HealthCheckRequest, srv healthpb.Health_WatchServer) error {
return status.Errorf(codes.Unimplemented, "method Watch not implemented")
}

func (store *OpenTSDBStore) Series(
req *storepb.SeriesRequest,
server storepb.Store_SeriesServer) error {
Expand Down
Loading

0 comments on commit f0491cb

Please sign in to comment.