Skip to content

Commit e0ca67d

Browse files
authored
fix(query engine): Include lines with ts equal to end timestamp of the query range when executing range aggregations (#13448)
**Background** When performing range vector aggregations, such as `count_over_time({env="dev"}[1h])`, the query range is divided into multiple steps at which the aggregation operation (e.g. counting the log lines) is evaluated. Each step starts at `current step - step interval` and ends at `current step`, as depicted in the following chart. The select range for the logs is extended by the `step interval` into the past, in order to select logs for calculating the first step. ![screenshot_20240711_092352](https://github.com/grafana/loki/assets/281260/9ca6eaf5-148e-4743-aefa-6ff7071d64ad) However, the select range for logs is `start` inclusive and `end` exclusive (written as `[start, end)`), but the evaluation of the steps for the range aggregation is `start` exclusive and `end` inclusive (written as `(start, end]`). This leads to the problem that the very first timestamp at the beginning of the select range and the very last timestamp at the end of the select range are not included in the range aggregation. The "missing" last timestamp is not a problem, because a) in an instant query it is not supposed to be included anyway because of the `[start, end)` inclusivity of the query range and b) in a range query the last point of the previous step will be part of the next step evaluation. **Issue** The missing first timestamp, however, gets problematic when executing an instant query and the log timestamps are exactly at the start of the query range. This can happen when the query is split in the query frontend into multiple smaller time ranges, e.g. `1h`, `30m`, ... Since the sub queries are executed independently on the queriers, all logs that have a timestamp exactly a multiple of the split interval, e.g. 00:00, 01:00, 02:00, ... for a 1h interval, are dismissed and therefore missing in the query result over the full time range of the original query. **Fix** In order to avoid the missing logs that have a timestamp a multiple of the split interval in instant queries, we need to adjust the query range for logs to also include the `end` timestamp (written as `[start, end]`). This is done by adding a "leap nanosecond" to the `end` timestamp of the log select range. This ensures that the included `end` timestamp of the step evaluation is also included in the log selection. --- Signed-off-by: Christian Haudum <christian.haudum@gmail.com>
1 parent 38cabf1 commit e0ca67d

File tree

5 files changed

+34
-33
lines changed

5 files changed

+34
-33
lines changed

pkg/logql/engine_test.go

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ func TestEngine_LogsRateUnwrap(t *testing.T) {
167167
}
168168
}
169169

170-
func TestEngine_LogsInstantQuery(t *testing.T) {
170+
func TestEngine_InstantQuery(t *testing.T) {
171171
t.Parallel()
172172
for _, test := range []struct {
173173
qs string
@@ -182,26 +182,6 @@ func TestEngine_LogsInstantQuery(t *testing.T) {
182182

183183
expected interface{}
184184
}{
185-
{
186-
`{app="foo"}`, time.Unix(30, 0), logproto.FORWARD, 10,
187-
[][]logproto.Stream{
188-
{newStream(testSize, identity, `{app="foo"}`)},
189-
},
190-
[]SelectLogParams{
191-
{&logproto.QueryRequest{Direction: logproto.FORWARD, Start: time.Unix(0, 0), End: time.Unix(30, 0), Limit: 10, Selector: `{app="foo"}`}},
192-
},
193-
logqlmodel.Streams([]logproto.Stream{newStream(10, identity, `{app="foo"}`)}),
194-
},
195-
{
196-
`{app="bar"} |= "foo" |~ ".+bar"`, time.Unix(30, 0), logproto.BACKWARD, 30,
197-
[][]logproto.Stream{
198-
{newStream(testSize, identity, `{app="bar"}`)},
199-
},
200-
[]SelectLogParams{
201-
{&logproto.QueryRequest{Direction: logproto.BACKWARD, Start: time.Unix(0, 0), End: time.Unix(30, 0), Limit: 30, Selector: `{app="bar"}|="foo"|~".+bar"`}},
202-
},
203-
logqlmodel.Streams([]logproto.Stream{newStream(30, identity, `{app="bar"}`)}),
204-
},
205185
{
206186
`rate({app="foo"} |~".+bar" [1m])`, time.Unix(60, 0), logproto.BACKWARD, 10,
207187
[][]logproto.Series{
@@ -975,7 +955,6 @@ func TestEngine_LogsInstantQuery(t *testing.T) {
975955
} {
976956
test := test
977957
t.Run(fmt.Sprintf("%s %s", test.qs, test.direction), func(t *testing.T) {
978-
t.Parallel()
979958

980959
eng := NewEngine(EngineOpts{}, newQuerierRecorder(t, test.data, test.params), NoLimits, log.NewNopLogger())
981960

@@ -2755,6 +2734,11 @@ func (q *querierRecorder) SelectSamples(_ context.Context, p SelectSampleParams)
27552734
}
27562735

27572736
func paramsID(p interface{}) string {
2737+
switch params := p.(type) {
2738+
case SelectLogParams:
2739+
case SelectSampleParams:
2740+
return fmt.Sprintf("%d", params.Plan.Hash())
2741+
}
27582742
b, err := json.Marshal(p)
27592743
if err != nil {
27602744
panic(err)

pkg/logql/evaluator.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -331,9 +331,12 @@ func (ev *DefaultEvaluator) NewStepEvaluator(
331331
nextEvFactory = SampleEvaluatorFunc(func(ctx context.Context, _ SampleEvaluatorFactory, _ syntax.SampleExpr, _ Params) (StepEvaluator, error) {
332332
it, err := ev.querier.SelectSamples(ctx, SelectSampleParams{
333333
&logproto.SampleQueryRequest{
334-
Start: q.Start().Add(-rangExpr.Left.Interval).Add(-rangExpr.Left.Offset),
335-
End: q.End().Add(-rangExpr.Left.Offset),
336-
Selector: e.String(), // intentionally send the vector for reducing labels.
334+
// extend startTs backwards by step
335+
Start: q.Start().Add(-rangExpr.Left.Interval).Add(-rangExpr.Left.Offset),
336+
// add leap nanosecond to endTs to include lines exactly at endTs. range iterators work on start exclusive, end inclusive ranges
337+
End: q.End().Add(-rangExpr.Left.Offset).Add(time.Nanosecond),
338+
// intentionally send the vector for reducing labels.
339+
Selector: e.String(),
337340
Shards: q.Shards(),
338341
Plan: &plan.QueryPlan{
339342
AST: expr,
@@ -351,9 +354,12 @@ func (ev *DefaultEvaluator) NewStepEvaluator(
351354
case *syntax.RangeAggregationExpr:
352355
it, err := ev.querier.SelectSamples(ctx, SelectSampleParams{
353356
&logproto.SampleQueryRequest{
354-
Start: q.Start().Add(-e.Left.Interval).Add(-e.Left.Offset),
355-
End: q.End().Add(-e.Left.Offset),
356-
Selector: expr.String(),
357+
// extend startTs backwards by step
358+
Start: q.Start().Add(-e.Left.Interval).Add(-e.Left.Offset),
359+
// add leap nanosecond to endTs to include lines exactly at endTs. range iterators work on start exclusive, end inclusive ranges
360+
End: q.End().Add(-e.Left.Offset).Add(time.Nanosecond),
361+
// intentionally send the vector for reducing labels.
362+
Selector: e.String(),
357363
Shards: q.Shards(),
358364
Plan: &plan.QueryPlan{
359365
AST: expr,

pkg/storage/stores/shipper/indexshipper/tsdb/bounds.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"github.com/prometheus/common/model"
77
)
88

9+
// TODO(chaudum): Replace with new v1.Interval struct
910
type Bounded interface {
1011
Bounds() (model.Time, model.Time)
1112
}
@@ -34,9 +35,13 @@ func newBounds(mint, maxt model.Time) bounds { return bounds{mint: mint, maxt: m
3435

3536
func (b bounds) Bounds() (model.Time, model.Time) { return b.mint, b.maxt }
3637

37-
func Overlap(a, b Bounded) bool {
38-
aFrom, aThrough := a.Bounds()
39-
bFrom, bThrough := b.Bounds()
38+
// Overlap checks whether the given chunk or index bounds
39+
// overlap with the bounds of a query range.
40+
// chunk/index bounds are defined as [from, through]
41+
// query bounds are defined as [from, through)
42+
func Overlap(chk, qry Bounded) bool {
43+
chkFrom, chkThrough := chk.Bounds()
44+
qryFrom, qryThrough := qry.Bounds()
4045

41-
return aFrom < bThrough && aThrough > bFrom
46+
return chkFrom < qryThrough && chkThrough >= qryFrom
4247
}

pkg/storage/stores/shipper/indexshipper/tsdb/bounds_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ func TestOverlap(t *testing.T) {
2828
// ensure [start,end) inclusivity works as expected
2929
a: newBounds(1, 5),
3030
b: newBounds(5, 6),
31+
overlap: true,
32+
},
33+
{
34+
// ensure [start,end) inclusivity works as expected
35+
a: newBounds(5, 6),
36+
b: newBounds(1, 5),
3137
overlap: false,
3238
},
3339
} {

pkg/storage/stores/shipper/indexshipper/tsdb/multi_file_index.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ func (i *MultiIndex) forMatchingIndices(ctx context.Context, from, through model
115115
queryBounds := newBounds(from, through)
116116

117117
return i.iter.For(ctx, i.maxParallel, func(ctx context.Context, idx Index) error {
118-
if Overlap(queryBounds, idx) {
118+
if Overlap(idx, queryBounds) {
119119

120120
if i.filterer != nil {
121121
// TODO(owen-d): Find a nicer way

0 commit comments

Comments
 (0)