Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a new config and metric for reporting ruler query execution wall time. #4317

Merged
merged 16 commits into from
Jul 9, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Move query metric and log to separate function. Add log message for r…
…uler query time.

Signed-off-by: Tyler Reid <tyler.reid@grafana.com>
  • Loading branch information
Tyler Reid committed Jul 1, 2021
commit e5894e53e1ac9dd277a43afe6bda833e39671a8a
36 changes: 27 additions & 9 deletions pkg/ruler/compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"time"

"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/notifier"
Expand All @@ -20,6 +21,7 @@ import (

"github.com/cortexproject/cortex/pkg/cortexpb"
"github.com/cortexproject/cortex/pkg/querier"
util_log "github.com/cortexproject/cortex/pkg/util/log"
)

// Pusher is an ingester server that accepts pushes.
Expand Down Expand Up @@ -143,16 +145,9 @@ func EngineQueryFunc(engine *promql.Engine, q storage.Queryable, overrides Rules
}
}

func MetricsQueryFunc(qf rules.QueryFunc, queries, failedQueries prometheus.Counter, queryTime prometheus.Counter) rules.QueryFunc {
func MetricsQueryFunc(qf rules.QueryFunc, queries, failedQueries prometheus.Counter) rules.QueryFunc {
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
queries.Inc()

// If we've been passed a counter we want to record the wall time spent executing this request.
if queryTime != nil {
timer := prometheus.NewTimer(nil)
defer func() { queryTime.Add(timer.ObserveDuration().Seconds()) }()
}

result, err := qf(ctx, qs, t)
// We rely on TranslateToPromqlApiError to do its job here... it returns nil, if err is nil.
// It returns promql.ErrStorage, if error should be reported back as 500.
Expand All @@ -168,6 +163,29 @@ func MetricsQueryFunc(qf rules.QueryFunc, queries, failedQueries prometheus.Coun
}
}

func RecordAndReportRuleQueryMetrics(qf rules.QueryFunc, queryTime prometheus.Counter, logger log.Logger) rules.QueryFunc {
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
// If we've been passed a counter we want to record the wall time spent executing this request.
if queryTime != nil {
timer := prometheus.NewTimer(nil)
defer func() {
querySeconds := timer.ObserveDuration().Seconds()
queryTime.Add(querySeconds)

// Log ruler query stats.
logMessage := append([]interface{}{
"msg", "ruler query stats",
"cortex_ruler_query_seconds_total", querySeconds,
}, qs)
level.Info(util_log.WithContext(ctx, logger)).Log(logMessage...)
}()
}

result, err := qf(ctx, qs, t)
return result, err
}
}
Copy link
Contributor

@pstibrany pstibrany Jul 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can simplify this function:

  1. we don't need to return new function, if queryTime is nil. No need to check it each time.
  2. not sure what's going on with logging here, but we can just call Log, right?
Suggested change
func RecordAndReportRuleQueryMetrics(qf rules.QueryFunc, queryTime prometheus.Counter, logger log.Logger) rules.QueryFunc {
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
// If we've been passed a counter we want to record the wall time spent executing this request.
if queryTime != nil {
timer := prometheus.NewTimer(nil)
defer func() {
querySeconds := timer.ObserveDuration().Seconds()
queryTime.Add(querySeconds)
// Log ruler query stats.
logMessage := append([]interface{}{
"msg", "ruler query stats",
"cortex_ruler_query_seconds_total", querySeconds,
}, qs)
level.Info(util_log.WithContext(ctx, logger)).Log(logMessage...)
}()
}
result, err := qf(ctx, qs, t)
return result, err
}
}
func RecordAndReportRuleQueryMetrics(qf rules.QueryFunc, queryTime prometheus.Counter, logger log.Logger) rules.QueryFunc {
if queryTime == nil {
return qf
}
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
timer := prometheus.NewTimer(nil)
defer func() {
querySeconds := timer.ObserveDuration().Seconds()
queryTime.Add(querySeconds)
level.Info(util_log.WithContext(ctx, logger)).Log("msg", "ruler query stats", "cortex_ruler_query_seconds_total", querySeconds, "query", qs)
}()
result, err := qf(ctx, qs, t)
return result, err
}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure what's going on with logging here, but we can just call Log, right?

I'm trying to follow what we do for query front-end logging in case a user would also want to record this log message in the ruler and use it for billing instead of the added metric.


// This interface mimicks rules.Manager API. Interface is used to simplify tests.
type RulesManager interface {
// Starts rules manager. Blocks until Stop is called.
Expand Down Expand Up @@ -221,7 +239,7 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi
return rules.NewManager(&rules.ManagerOptions{
Appendable: NewPusherAppendable(p, userID, overrides, totalWrites, failedWrites),
Queryable: q,
QueryFunc: MetricsQueryFunc(EngineQueryFunc(engine, q, overrides, userID), totalQueries, failedQueries, queryTime),
QueryFunc: RecordAndReportRuleQueryMetrics(MetricsQueryFunc(EngineQueryFunc(engine, q, overrides, userID), totalQueries, failedQueries), queryTime, logger),
Context: user.InjectOrgID(ctx, userID),
ExternalURL: cfg.ExternalURL.URL,
NotifyFunc: SendAlerts(notifier, cfg.ExternalURL.URL.String()),
Expand Down
6 changes: 3 additions & 3 deletions pkg/ruler/compat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"testing"
"time"

"github.com/go-kit/kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/model"
Expand Down Expand Up @@ -226,12 +227,11 @@ func TestMetricsQueryFuncErrors(t *testing.T) {
t.Run(name, func(t *testing.T) {
queries := prometheus.NewCounter(prometheus.CounterOpts{})
failures := prometheus.NewCounter(prometheus.CounterOpts{})
queryTime := prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"})

mockFunc := func(ctx context.Context, q string, t time.Time) (promql.Vector, error) {
return promql.Vector{}, tc.returnedError
}
qf := MetricsQueryFunc(mockFunc, queries, failures, queryTime.WithLabelValues("userID"))
qf := MetricsQueryFunc(mockFunc, queries, failures)

_, err := qf(context.Background(), "test", time.Now())
require.Equal(t, tc.returnedError, err)
Expand All @@ -251,7 +251,7 @@ func TestMetricsQueryFuncMetrics(t *testing.T) {
time.Sleep(1 * time.Second)
return promql.Vector{}, nil
}
qf := MetricsQueryFunc(mockFunc, queries, failures, queryTime.WithLabelValues("userID"))
qf := RecordAndReportRuleQueryMetrics(MetricsQueryFunc(mockFunc, queries, failures), queryTime.WithLabelValues("userID"), log.NewNopLogger())
treid314 marked this conversation as resolved.
Show resolved Hide resolved

_, _ = qf(context.Background(), "test", time.Now())

Expand Down