Skip to content

Batch Iterator optimization #5237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## master / unreleased

* [ENHANCEMENT] Querier: Batch Iterator optimization to prevent transversing it multiple times query ranges steps does not overlap. #5237

## 1.15.0 in progress

* [CHANGE] Storage: Make Max exemplars config per tenant instead of global configuration. #5016
Expand Down
14 changes: 14 additions & 0 deletions pkg/querier/batch/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ type iterator interface {
// Seek or Next have returned true.
AtTime() int64

// MaxCurrentChunkTime returns the max time on the current chunk.
MaxCurrentChunkTime() int64

// Batch returns the current batch. Must only be called after Seek or Next
// have returned true.
Batch() promchunk.Batch
Expand Down Expand Up @@ -98,6 +101,17 @@ func (a *iteratorAdapter) Seek(t int64) bool {
a.curr.Index++
}
return true
} else if t <= a.underlying.MaxCurrentChunkTime() {
// In this case, some timestamp inside the current underlying chunk can fulfill the seek.
// In this case we will call next until we find the sample as it will be faster than calling
// `a.underlying.Seek` directly as this would cause the iterator to start from the beginning of the chunk.
// See: https://github.com/cortexproject/cortex/blob/f69452975877c67ac307709e5f60b8d20477764c/pkg/querier/batch/chunk.go#L26-L45
// https://github.com/cortexproject/cortex/blob/f69452975877c67ac307709e5f60b8d20477764c/pkg/chunk/encoding/prometheus_chunk.go#L90-L95
for a.Next() {
if t <= a.curr.Timestamps[a.curr.Index] {
return true
}
}
}
}

Expand Down
59 changes: 54 additions & 5 deletions pkg/querier/batch/batch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func BenchmarkNewChunkMergeIterator_CreateAndIterate(b *testing.B) {
scenario.duplicationFactor,
scenario.enc.String())

chunks := createChunks(b, scenario.numChunks, scenario.numSamplesPerChunk, scenario.duplicationFactor, scenario.enc)
chunks := createChunks(b, step, scenario.numChunks, scenario.numSamplesPerChunk, scenario.duplicationFactor, scenario.enc)

b.Run(name, func(b *testing.B) {
b.ReportAllocs()
Expand All @@ -55,10 +55,59 @@ func BenchmarkNewChunkMergeIterator_CreateAndIterate(b *testing.B) {
}
}

func BenchmarkNewChunkMergeIterator_Seek(b *testing.B) {
scenarios := []struct {
numChunks int
numSamplesPerChunk int
duplicationFactor int
seekStep time.Duration
scrapeInterval time.Duration
enc promchunk.Encoding
}{
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 30 * time.Second, seekStep: 30 * time.Second / 2, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 30 * time.Second, seekStep: 30 * time.Second, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 30 * time.Second, seekStep: 30 * time.Second * 2, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 30 * time.Second, seekStep: 30 * time.Second * 10, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 30 * time.Second, seekStep: 30 * time.Second * 30, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 30 * time.Second, seekStep: 30 * time.Second * 50, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 30 * time.Second, seekStep: 30 * time.Second * 100, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 30 * time.Second, seekStep: 30 * time.Second * 200, enc: promchunk.PrometheusXorChunk},

{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 10 * time.Second, seekStep: 10 * time.Second / 2, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 10 * time.Second, seekStep: 10 * time.Second, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 10 * time.Second, seekStep: 10 * time.Second * 2, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 10 * time.Second, seekStep: 10 * time.Second * 10, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 10 * time.Second, seekStep: 10 * time.Second * 30, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 10 * time.Second, seekStep: 10 * time.Second * 50, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 10 * time.Second, seekStep: 10 * time.Second * 100, enc: promchunk.PrometheusXorChunk},
{numChunks: 1000, numSamplesPerChunk: 120, duplicationFactor: 3, scrapeInterval: 10 * time.Second, seekStep: 10 * time.Second * 200, enc: promchunk.PrometheusXorChunk},
}

for _, scenario := range scenarios {
name := fmt.Sprintf("scrapeInterval %vs seekStep: %vs",
scenario.scrapeInterval.Seconds(),
scenario.seekStep.Seconds())

chunks := createChunks(b, scenario.scrapeInterval, scenario.numChunks, scenario.numSamplesPerChunk, scenario.duplicationFactor, scenario.enc)

b.Run(name, func(b *testing.B) {
b.ReportAllocs()

for n := 0; n < b.N; n++ {
it := NewChunkMergeIterator(chunks, 0, 0)
i := int64(0)
for it.Seek(i*scenario.seekStep.Milliseconds()) != chunkenc.ValNone {
i++
}
}
})
}
}

func TestSeekCorrectlyDealWithSinglePointChunks(t *testing.T) {
t.Parallel()
chunkOne := mkChunk(t, model.Time(1*step/time.Millisecond), 1, promchunk.PrometheusXorChunk)
chunkTwo := mkChunk(t, model.Time(10*step/time.Millisecond), 1, promchunk.PrometheusXorChunk)
chunkOne := mkChunk(t, step, model.Time(1*step/time.Millisecond), 1, promchunk.PrometheusXorChunk)
chunkTwo := mkChunk(t, step, model.Time(10*step/time.Millisecond), 1, promchunk.PrometheusXorChunk)
chunks := []chunk.Chunk{chunkOne, chunkTwo}

sut := NewChunkMergeIterator(chunks, 0, 0)
Expand All @@ -72,13 +121,13 @@ func TestSeekCorrectlyDealWithSinglePointChunks(t *testing.T) {
require.Equal(t, int64(1*time.Second/time.Millisecond), actual)
}

func createChunks(b *testing.B, numChunks, numSamplesPerChunk, duplicationFactor int, enc promchunk.Encoding) []chunk.Chunk {
func createChunks(b *testing.B, step time.Duration, numChunks, numSamplesPerChunk, duplicationFactor int, enc promchunk.Encoding) []chunk.Chunk {
result := make([]chunk.Chunk, 0, numChunks)

for d := 0; d < duplicationFactor; d++ {
for c := 0; c < numChunks; c++ {
minTime := step * time.Duration(c*numSamplesPerChunk)
result = append(result, mkChunk(b, model.Time(minTime.Milliseconds()), numSamplesPerChunk, enc))
result = append(result, mkChunk(b, step, model.Time(minTime.Milliseconds()), numSamplesPerChunk, enc))
}
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/querier/batch/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ func (i *chunkIterator) reset(chunk GenericChunk) {
i.batch.Index = 0
}

func (i *chunkIterator) MaxCurrentChunkTime() int64 {
return i.chunk.MaxTime
}

// Seek advances the iterator forward to the value at or after
// the given timestamp.
func (i *chunkIterator) Seek(t int64, size int) bool {
Expand Down
4 changes: 2 additions & 2 deletions pkg/querier/batch/chunk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func forEncodings(t *testing.T, f func(t *testing.T, enc promchunk.Encoding)) {
}
}

func mkChunk(t require.TestingT, from model.Time, points int, enc promchunk.Encoding) chunk.Chunk {
func mkChunk(t require.TestingT, step time.Duration, from model.Time, points int, enc promchunk.Encoding) chunk.Chunk {
metric := labels.Labels{
{Name: model.MetricNameLabel, Value: "foo"},
}
Expand All @@ -65,7 +65,7 @@ func mkChunk(t require.TestingT, from model.Time, points int, enc promchunk.Enco
}

func mkGenericChunk(t require.TestingT, from model.Time, points int, enc promchunk.Encoding) GenericChunk {
ck := mkChunk(t, from, points, enc)
ck := mkChunk(t, step, from, points, enc)
return NewGenericChunk(int64(ck.From), int64(ck.Through), ck.Data.NewIterator)
}

Expand Down
8 changes: 8 additions & 0 deletions pkg/querier/batch/merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ func (c *mergeIterator) AtTime() int64 {
return c.batches[0].Timestamps[0]
}

func (c *mergeIterator) MaxCurrentChunkTime() int64 {
if len(c.h) < 1 {
return -1
}

return c.h[0].MaxCurrentChunkTime()
}

func (c *mergeIterator) Batch() promchunk.Batch {
return c.batches[0]
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/querier/batch/non_overlapping.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ func (it *nonOverlappingIterator) Seek(t int64, size int) bool {
}
}

func (it *nonOverlappingIterator) MaxCurrentChunkTime() int64 {
return it.iter.MaxCurrentChunkTime()
}

func (it *nonOverlappingIterator) Next(size int) bool {
for {
if it.iter.Next(size) {
Expand Down