Skip to content

Commit e421794

Browse files
achilles42gouthamve
authored andcommitted
Add support to log slow queries in Frontend (#1744)
* Add support of logging slow queries Signed-off-by: Praveen Shukla <praveen.shukla.c42@gmail.com> * Update orgId, URL and time-taken field in query logging Signed-off-by: Praveen Shukla <praveen.shukla.c42@gmail.com> * Update default value as 0 and disable slow query logging disabled Signed-off-by: Praveen Shukla <praveen.shukla.c42@gmail.com> * Add slow log feature in changelog Signed-off-by: Praveen Shukla <praveen.shukla.c42@gmail.com> * Update slow query CLI flag to use frontend scope instead of querier Signed-off-by: Praveen Shukla <praveen.shukla.c42@gmail.com> * Update org_id in slow query logs and add more info in flag description Signed-off-by: Praveen Shukla <praveen.shukla.c42@gmail.com> * Add info log for slow query Signed-off-by: Praveen Shukla <praveen.shukla.c42@gmail.com>
1 parent e3de878 commit e421794

File tree

3 files changed

+23
-4
lines changed

3 files changed

+23
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* `-ingester.max-global-series-per-metric`
1212
* [FEATURE] Flush chunks with stale markers early with `ingester.max-stale-chunk-idle`. #1759
1313
* [FEATURE] EXPERIMENTAL: Added new KV Store backend based on memberlist library. Components can gossip about tokens and ingester states, instead of using Consul or Etcd. #1721
14+
* [FEATURE] Allow Query Frontend to log slow queries with `frontend.log-queries-longer-than`. #1744
1415
* [ENHANCEMENT] Allocation improvements in adding samples to Chunk. #1706
1516
* [ENHANCEMENT] Consul client now follows recommended practices for blocking queries wrt returned Index value. #1708
1617
* [ENHANCEMENT] Consul client can optionally rate-limit itself during Watch (used e.g. by ring watchers) and WatchPrefix (used by HA feature) operations. Rate limiting is disabled by default. New flags added: `--consul.watch-rate-limit`, and `--consul.watch-burst-size`. #1708

docs/prometheus-frontend.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ server:
1616
http_listen_port: 9091
1717

1818
frontend:
19+
log_queries_longer_than: 1s
1920
split_queries_by_day: true
2021
align_queries_with_step: true
2122
cache_results: true

pkg/querier/frontend/frontend.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"context"
66
"flag"
7+
"fmt"
78
"io"
89
"io/ioutil"
910
"math/rand"
@@ -15,6 +16,7 @@ import (
1516

1617
"github.com/NYTimes/gziphandler"
1718
"github.com/go-kit/kit/log"
19+
"github.com/go-kit/kit/log/level"
1820
opentracing "github.com/opentracing/opentracing-go"
1921
"github.com/prometheus/client_golang/prometheus"
2022
"github.com/prometheus/client_golang/prometheus/promauto"
@@ -42,16 +44,18 @@ var (
4244

4345
// Config for a Frontend.
4446
type Config struct {
45-
MaxOutstandingPerTenant int `yaml:"max_outstanding_per_tenant"`
46-
CompressResponses bool `yaml:"compress_responses"`
47-
DownstreamURL string `yaml:"downstream"`
47+
MaxOutstandingPerTenant int `yaml:"max_outstanding_per_tenant"`
48+
CompressResponses bool `yaml:"compress_responses"`
49+
DownstreamURL string `yaml:"downstream"`
50+
LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
4851
}
4952

5053
// RegisterFlags adds the flags required to config this to the given FlagSet.
5154
func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
5255
f.IntVar(&cfg.MaxOutstandingPerTenant, "querier.max-outstanding-requests-per-tenant", 100, "Maximum number of outstanding requests per tenant per frontend; requests beyond this error with HTTP 429.")
5356
f.BoolVar(&cfg.CompressResponses, "querier.compress-http-responses", false, "Compress HTTP responses.")
5457
f.StringVar(&cfg.DownstreamURL, "frontend.downstream-url", "", "URL of downstream Prometheus.")
58+
f.DurationVar(&cfg.LogQueriesLongerThan, "frontend.log-queries-longer-than", 0, "Log queries that are slower than the specified duration. 0 to disable.")
5559
}
5660

5761
// Frontend queues HTTP requests, dispatches them to backends, and handles retries
@@ -139,7 +143,20 @@ func (f *Frontend) Handler() http.Handler {
139143
}
140144

141145
func (f *Frontend) handle(w http.ResponseWriter, r *http.Request) {
146+
userID, err := user.ExtractOrgID(r.Context())
147+
if err != nil {
148+
server.WriteError(w, err)
149+
return
150+
}
151+
152+
startTime := time.Now()
142153
resp, err := f.roundTripper.RoundTrip(r)
154+
queryResponseTime := time.Now().Sub(startTime)
155+
156+
if f.cfg.LogQueriesLongerThan > 0 && queryResponseTime > f.cfg.LogQueriesLongerThan {
157+
level.Info(f.log).Log("msg", "slow query", "org_id", userID, "url", fmt.Sprintf("http://%s", r.Host+r.RequestURI), "time_taken", queryResponseTime.String())
158+
}
159+
143160
if err != nil {
144161
server.WriteError(w, err)
145162
return
@@ -187,7 +204,7 @@ func (c *httpgrpcHeadersCarrier) Set(key, val string) {
187204
})
188205
}
189206

190-
// RoundTripGRPC round trips a proto (instread of a HTTP request).
207+
// RoundTripGRPC round trips a proto (instead of a HTTP request).
191208
func (f *Frontend) RoundTripGRPC(ctx context.Context, req *ProcessRequest) (*ProcessResponse, error) {
192209
// Propagate trace context in gRPC too - this will be ignored if using HTTP.
193210
tracer, span := opentracing.GlobalTracer(), opentracing.SpanFromContext(ctx)

0 commit comments

Comments
 (0)