Skip to content

Commit f0debeb

Browse files
committed
Fix race condition in timeout handler causing activator crashes
Add synchronization for HTTP header map access during request timeouts. When a timeout occurs, capture a snapshot of headers to prevent concurrent modification by the inner handler after the timeout handler returns. Fixes #15850
1 parent 41fafd1 commit f0debeb

File tree

2 files changed

+121
-1
lines changed

2 files changed

+121
-1
lines changed

pkg/http/handler/timeout.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,9 @@ type timeoutWriter struct {
169169
mu sync.Mutex
170170
timedOut bool
171171
lastWriteTime time.Time
172+
// headers is a snapshot of headers taken when timeout occurs
173+
// to prevent concurrent map access
174+
headers http.Header
172175
}
173176

174177
var (
@@ -201,7 +204,23 @@ func (tw *timeoutWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
201204
return websocket.HijackIfPossible(tw.w)
202205
}
203206

204-
func (tw *timeoutWriter) Header() http.Header { return tw.w.Header() }
207+
func (tw *timeoutWriter) Header() http.Header {
208+
tw.mu.Lock()
209+
timedOut := tw.timedOut
210+
headers := tw.headers
211+
tw.mu.Unlock()
212+
213+
if timedOut {
214+
// Return the snapshot of headers taken at timeout to prevent
215+
// concurrent modification of the header map
216+
if headers == nil {
217+
// If no headers were captured, return an empty map
218+
return make(http.Header)
219+
}
220+
return headers
221+
}
222+
return tw.w.Header()
223+
}
205224

206225
func (tw *timeoutWriter) Write(p []byte) (int, error) {
207226
tw.mu.Lock()
@@ -279,6 +298,13 @@ func (tw *timeoutWriter) tryIdleTimeoutAndWriteError(curTime time.Time, idleTime
279298
}
280299

281300
func (tw *timeoutWriter) timeoutAndWriteError(msg string) {
301+
// Capture a snapshot of headers before marking as timed out
302+
// to prevent concurrent access to the underlying header map
303+
tw.headers = make(http.Header)
304+
for k, v := range tw.w.Header() {
305+
tw.headers[k] = append([]string(nil), v...)
306+
}
307+
282308
tw.w.WriteHeader(http.StatusGatewayTimeout)
283309
io.WriteString(tw.w, msg)
284310

pkg/http/handler/timeout_test.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import (
2525
"testing"
2626
"time"
2727

28+
"sync/atomic"
29+
2830
"go.uber.org/zap/zaptest"
2931
"k8s.io/utils/clock"
3032
clocktest "k8s.io/utils/clock/testing"
@@ -626,6 +628,98 @@ func BenchmarkTimeoutHandler(b *testing.B) {
626628
})
627629
}
628630

631+
func TestTimeoutHandlerConcurrentHeaderAccess(t *testing.T) {
632+
// This test verifies the fix for the race condition when requests time out.
633+
// It simulates the scenario where the timeout handler completes while the
634+
// inner handler is still trying to modify headers. The key is that this
635+
// should not panic with a concurrent map access error.
636+
637+
var completedCount atomic.Int32
638+
var panicCount int32
639+
innerHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
640+
// Simulate work that takes around the same time as timeout
641+
time.Sleep(55 * time.Millisecond)
642+
643+
// After potential context cancellation, try to access headers
644+
// This simulates what the error handler does
645+
if r.Context().Err() != nil {
646+
// Try to modify headers - this should not cause a panic
647+
// even if timeout has occurred
648+
w.Header().Set("X-Test-Header", "value")
649+
http.Error(w, "context canceled", http.StatusBadGateway)
650+
} else {
651+
// If no timeout, write normally
652+
w.WriteHeader(http.StatusOK)
653+
}
654+
completedCount.Add(1)
655+
})
656+
657+
timeoutHandler := NewTimeoutHandler(
658+
innerHandler,
659+
"timeout",
660+
func(r *http.Request) (time.Duration, time.Duration, time.Duration) {
661+
return 50 * time.Millisecond, 0, 0
662+
},
663+
zaptest.NewLogger(t).Sugar(),
664+
)
665+
666+
// Run multiple concurrent requests to increase chances of hitting the race
667+
var wg sync.WaitGroup
668+
var timeoutResponses atomic.Int32
669+
var normalResponses atomic.Int32
670+
for i := 0; i < 10; i++ {
671+
wg.Add(1)
672+
go func() {
673+
defer wg.Done()
674+
defer func() {
675+
if r := recover(); r != nil {
676+
// Should not panic with concurrent map access
677+
atomic.AddInt32(&panicCount, 1)
678+
t.Errorf("Unexpected panic: %v", r)
679+
}
680+
}()
681+
682+
req, err := http.NewRequest(http.MethodGet, "/", nil)
683+
if err != nil {
684+
t.Error(err)
685+
return
686+
}
687+
688+
rec := httptest.NewRecorder()
689+
690+
// This should not panic with concurrent map access
691+
timeoutHandler.ServeHTTP(rec, req)
692+
693+
// We may get either a timeout or a normal response depending on timing
694+
// The key is that we don't panic
695+
if rec.Code == http.StatusGatewayTimeout {
696+
timeoutResponses.Add(1)
697+
} else if rec.Code == http.StatusOK {
698+
normalResponses.Add(1)
699+
} else {
700+
t.Errorf("Unexpected status code: %d", rec.Code)
701+
}
702+
}()
703+
}
704+
705+
wg.Wait()
706+
707+
// Give a bit more time for any lingering goroutines to complete
708+
time.Sleep(100 * time.Millisecond)
709+
710+
// Check that no panics occurred
711+
if panicCount > 0 {
712+
t.Errorf("Got %d panics, expected 0", panicCount)
713+
}
714+
715+
// At least some requests should have timed out
716+
if timeoutResponses.Load() == 0 {
717+
t.Error("Expected at least some timeout responses")
718+
}
719+
720+
t.Logf("Got %d timeout responses and %d normal responses", timeoutResponses.Load(), normalResponses.Load())
721+
}
722+
629723
func StaticTimeoutFunc(timeout time.Duration, requestStart time.Duration, idle time.Duration) TimeoutFunc {
630724
return func(req *http.Request) (time.Duration, time.Duration, time.Duration) {
631725
return timeout, requestStart, idle

0 commit comments

Comments
 (0)