Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ratelimits: Fix latency calculations #7627

Merged
merged 4 commits into from
Jul 25, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
ratelimits: Fix operation latency calculation
  • Loading branch information
beautifulentropy committed Jul 24, 2024
commit aec822c0215e2ca2de18c31aadef25a38a116d0a
66 changes: 49 additions & 17 deletions ratelimits/source_redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,15 @@ func NewRedisSource(client *redis.Ring, clk clock.Clock, stats prometheus.Regist
}
}

var errMixedSuccess = errors.New("some keys not found")

// resultForError returns a string representing the result of the operation
// based on the provided error.
func resultForError(err error) string {
if errors.Is(redis.Nil, err) {
if errors.Is(errMixedSuccess, err) {
// Indicates that some of the keys in a batchset operation were not found.
return "mixedSuccess"
} else if errors.Is(redis.Nil, err) {
// Bucket key does not exist.
return "notFound"
} else if errors.Is(err, context.DeadlineExceeded) {
Expand All @@ -68,6 +73,14 @@ func resultForError(err error) string {
return "failed"
}

func (r *RedisSource) observeLatency(call string, latency time.Duration, err error) {
result := "success"
if err != nil {
result = resultForError(err)
}
r.latency.With(prometheus.Labels{"call": call, "result": result}).Observe(latency.Seconds())
}

// BatchSet stores TATs at the specified bucketKeys using a pipelined Redis
// Transaction in order to reduce the number of round-trips to each Redis shard.
// An error is returned if the operation failed and nil otherwise.
Expand All @@ -80,11 +93,17 @@ func (r *RedisSource) BatchSet(ctx context.Context, buckets map[string]time.Time
}
_, err := pipeline.Exec(ctx)
if err != nil {
r.latency.With(prometheus.Labels{"call": "batchset", "result": resultForError(err)}).Observe(time.Since(start).Seconds())
r.observeLatency("batchset", r.clk.Since(start), err)
return err
}

r.latency.With(prometheus.Labels{"call": "batchset", "result": "success"}).Observe(time.Since(start).Seconds())
totalLatency := r.clk.Since(start)
perSetLatency := totalLatency / time.Duration(len(buckets))
for range buckets {
r.observeLatency("batchset_entry", perSetLatency, nil)
}

r.observeLatency("batchset", totalLatency, nil)
return nil
}

Expand All @@ -98,14 +117,15 @@ func (r *RedisSource) Get(ctx context.Context, bucketKey string) (time.Time, err
if err != nil {
if errors.Is(err, redis.Nil) {
// Bucket key does not exist.
r.latency.With(prometheus.Labels{"call": "get", "result": "notFound"}).Observe(time.Since(start).Seconds())
r.observeLatency("get", r.clk.Since(start), err)
return time.Time{}, ErrBucketNotFound
}
r.latency.With(prometheus.Labels{"call": "get", "result": resultForError(err)}).Observe(time.Since(start).Seconds())
// An error occurred while retrieving the TAT.
r.observeLatency("get", r.clk.Since(start), err)
return time.Time{}, err
}

r.latency.With(prometheus.Labels{"call": "get", "result": "success"}).Observe(time.Since(start).Seconds())
r.observeLatency("get", r.clk.Since(start), nil)
return time.Unix(0, tatNano).UTC(), nil
}

Expand All @@ -122,27 +142,38 @@ func (r *RedisSource) BatchGet(ctx context.Context, bucketKeys []string) (map[st
}
results, err := pipeline.Exec(ctx)
if err != nil {
r.latency.With(prometheus.Labels{"call": "batchget", "result": resultForError(err)}).Observe(time.Since(start).Seconds())
if !errors.Is(err, redis.Nil) {
r.observeLatency("batchget", r.clk.Since(start), err)
return nil, err
}
// One of the keys does not exist. We'll account for this when
// processing the results.
aarongable marked this conversation as resolved.
Show resolved Hide resolved
}

totalLatency := r.clk.Since(start)
perEntryLatency := totalLatency / time.Duration(len(bucketKeys))

var batchErr error
tats := make(map[string]time.Time, len(bucketKeys))
for i, result := range results {
tatNano, err := result.(*redis.StringCmd).Int64()
if err != nil {
if errors.Is(err, redis.Nil) {
// Bucket key does not exist.
continue
if !errors.Is(err, redis.Nil) {
// This should never happen as any errors should have been
// caught after the pipeline.Exec() call.
r.observeLatency("batchget", r.clk.Since(start), err)
return nil, err
}
r.latency.With(prometheus.Labels{"call": "batchget", "result": resultForError(err)}).Observe(time.Since(start).Seconds())
return nil, err
// Bucket key does not exist.
r.observeLatency("batchget_entry", perEntryLatency, err)
batchErr = errMixedSuccess
aarongable marked this conversation as resolved.
Show resolved Hide resolved
continue
}
tats[bucketKeys[i]] = time.Unix(0, tatNano).UTC()
r.observeLatency("batchget_entry", perEntryLatency, nil)
}

r.latency.With(prometheus.Labels{"call": "batchget", "result": "success"}).Observe(time.Since(start).Seconds())
r.observeLatency("batchget", totalLatency, batchErr)
return tats, nil
}

Expand All @@ -154,11 +185,11 @@ func (r *RedisSource) Delete(ctx context.Context, bucketKey string) error {

err := r.client.Del(ctx, bucketKey).Err()
if err != nil {
r.latency.With(prometheus.Labels{"call": "delete", "result": resultForError(err)}).Observe(time.Since(start).Seconds())
r.observeLatency("delete", r.clk.Since(start), err)
return err
}

r.latency.With(prometheus.Labels{"call": "delete", "result": "success"}).Observe(time.Since(start).Seconds())
r.observeLatency("delete", r.clk.Since(start), nil)
return nil
}

Expand All @@ -171,9 +202,10 @@ func (r *RedisSource) Ping(ctx context.Context) error {
return shard.Ping(ctx).Err()
})
if err != nil {
r.latency.With(prometheus.Labels{"call": "ping", "result": resultForError(err)}).Observe(time.Since(start).Seconds())
r.observeLatency("ping", r.clk.Since(start), err)
return err
}
r.latency.With(prometheus.Labels{"call": "ping", "result": "success"}).Observe(time.Since(start).Seconds())

r.observeLatency("ping", r.clk.Since(start), nil)
return nil
}
Loading