Skip to content

Commit bdcec71

Browse files
committed
Fix nil when ingesterQueryMaxAttempts > 1 (#7369)
* Trigger nil with test Signed-off-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> * Fix nil results Signed-off-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> * fix changelog Signed-off-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> --------- Signed-off-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com>
1 parent 7687134 commit bdcec71

3 files changed

Lines changed: 75 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
## 1.21.0 in progress
66

77
* [BUGFIX] KV store: Fix false-positive `status_code="500"` metrics for HA tracker CAS operations when using memberlist. #7408
8+
* [BUGFIX] Fix nil when ingester_query_max_attempts > 1. #7369
89

910
* [CHANGE] Ruler: Graduate Ruler API from experimental. #7312
1011
* Flag: Renamed `-experimental.ruler.enable-api` to `-ruler.enable-api`. The old flag is kept as deprecated.

pkg/querier/distributor_queryable.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,16 @@ func (q *distributorQuerier) streamingSelect(ctx context.Context, sortSeries, pa
166166
return storage.ErrSeriesSet(err)
167167
}
168168

169+
// Guard against nil results. This can happen when queryWithRetry's
170+
// backoff loop exits without executing (e.g., context cancelled before
171+
// the first attempt), returning (nil, nil). See #7364.
172+
if results == nil {
173+
if err != nil {
174+
return storage.ErrSeriesSet(err)
175+
}
176+
return storage.EmptySeriesSet()
177+
}
178+
169179
serieses := make([]storage.Series, 0, len(results.Chunkseries))
170180
for _, result := range results.Chunkseries {
171181
// Sometimes the ingester can send series that have no data.

pkg/querier/distributor_queryable_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,70 @@ func TestDistributorQuerier_Retry(t *testing.T) {
387387
}
388388
}
389389

390+
// TestDistributorQuerier_Select_CancelledContext_NoRetry verifies that with
391+
// ingesterQueryMaxAttempts=1, a cancelled context does not panic because the
392+
// direct code path (no retry loop) is used.
393+
func TestDistributorQuerier_Select_CancelledContext_NoRetry(t *testing.T) {
394+
t.Parallel()
395+
396+
ctx := user.InjectOrgID(context.Background(), "0")
397+
ctx, cancel := context.WithCancel(ctx)
398+
cancel()
399+
400+
d := &MockDistributor{}
401+
d.On("QueryStream", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(&client.QueryStreamResponse{}, context.Canceled)
402+
403+
ingesterQueryMaxAttempts := 1
404+
queryable := newDistributorQueryable(d, true, true, batch.NewChunkMergeIterator, 0, func(string) bool {
405+
return true
406+
}, ingesterQueryMaxAttempts)
407+
querier, err := queryable.Querier(mint, maxt)
408+
require.NoError(t, err)
409+
410+
require.NotPanics(t, func() {
411+
seriesSet := querier.Select(ctx, true, &storage.SelectHints{Start: mint, End: maxt})
412+
_ = seriesSet.Err()
413+
})
414+
}
415+
416+
// TestDistributorQuerier_Select_CancelledContext reproduces the panic described
417+
// in https://github.com/cortexproject/cortex/issues/7364.
418+
//
419+
// When ingesterQueryMaxAttempts > 1 and the context is cancelled before the
420+
// retry loop starts (e.g. query timeout or another querier goroutine failing),
421+
// backoff.Ongoing() returns false immediately. The result variable stays nil,
422+
// queryWithRetry returns (nil, nil), and streamingSelect dereferences the nil
423+
// result at line 169 → panic.
424+
func TestDistributorQuerier_Select_CancelledContext(t *testing.T) {
425+
t.Parallel()
426+
427+
// Create a context that is already cancelled.
428+
ctx := user.InjectOrgID(context.Background(), "0")
429+
ctx, cancel := context.WithCancel(ctx)
430+
cancel()
431+
432+
d := &MockDistributor{}
433+
// No mock expectations needed — QueryStream should never be called
434+
// because the context is already cancelled.
435+
436+
ingesterQueryMaxAttempts := 2
437+
queryable := newDistributorQueryable(d, true, true, batch.NewChunkMergeIterator, 0, func(string) bool {
438+
return true
439+
}, ingesterQueryMaxAttempts)
440+
querier, err := queryable.Querier(mint, maxt)
441+
require.NoError(t, err)
442+
443+
// This should NOT panic. Before the fix, the cancelled context causes
444+
// queryWithRetry to return (nil, nil), and streamingSelect dereferences
445+
// the nil result: panic: runtime error: invalid memory address or nil
446+
// pointer dereference [signal SIGSEGV ... addr=0x8]
447+
require.NotPanics(t, func() {
448+
seriesSet := querier.Select(ctx, true, &storage.SelectHints{Start: mint, End: maxt})
449+
// With a cancelled context, we expect either an error or an empty result.
450+
_ = seriesSet.Err()
451+
})
452+
}
453+
390454
func TestDistributorQuerier_LabelNames(t *testing.T) {
391455
t.Parallel()
392456

0 commit comments

Comments
 (0)