-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Safe StreamEvents write loop #14557
Safe StreamEvents write loop #14557
Changes from 1 commit
9b44655
1bd313c
a59ddec
4159e66
d64a018
4b84fc2
cdf2241
6253939
168b26a
bf8a6c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,6 @@ import ( | |
eth "github.com/prysmaticlabs/prysm/v5/proto/prysm/v1alpha1" | ||
"github.com/prysmaticlabs/prysm/v5/runtime/version" | ||
"github.com/prysmaticlabs/prysm/v5/time/slots" | ||
log "github.com/sirupsen/logrus" | ||
) | ||
|
||
const DefaultEventFeedDepth = 1000 | ||
|
@@ -74,13 +73,6 @@ var ( | |
errWriterUnusable = errors.New("http response writer is unusable") | ||
) | ||
|
||
// StreamingResponseWriter defines a type that can be used by the eventStreamer. | ||
// This must be an http.ResponseWriter that supports flushing and hijacking. | ||
type StreamingResponseWriter interface { | ||
http.ResponseWriter | ||
http.Flusher | ||
} | ||
|
||
// The eventStreamer uses lazyReaders to defer serialization until the moment the value is ready to be written to the client. | ||
type lazyReader func() io.Reader | ||
|
||
|
@@ -159,47 +151,53 @@ func (s *Server) StreamEvents(w http.ResponseWriter, r *http.Request) { | |
return | ||
} | ||
|
||
sw, ok := w.(StreamingResponseWriter) | ||
if !ok { | ||
msg := "beacon node misconfiguration: http stack may not support required response handling features, like flushing" | ||
httputil.HandleError(w, msg, http.StatusInternalServerError) | ||
return | ||
timeout := s.EventWriteTimeout | ||
if timeout == 0 { | ||
timeout = time.Duration(params.BeaconConfig().SecondsPerSlot) * time.Second | ||
} | ||
depth := s.EventFeedDepth | ||
if depth == 0 { | ||
depth = DefaultEventFeedDepth | ||
ka := s.KeepAliveInterval | ||
if ka == 0 { | ||
ka = timeout | ||
} | ||
es, err := newEventStreamer(depth, s.KeepAliveInterval) | ||
if err != nil { | ||
httputil.HandleError(w, err.Error(), http.StatusInternalServerError) | ||
return | ||
buffSize := s.EventFeedDepth | ||
if buffSize == 0 { | ||
buffSize = DefaultEventFeedDepth | ||
} | ||
|
||
api.SetSSEHeaders(w) | ||
sw := NewStreamingResponseController(w, timeout) | ||
ctx, cancel := context.WithCancel(ctx) | ||
defer cancel() | ||
api.SetSSEHeaders(sw) | ||
defer func() { | ||
cancel() | ||
}() | ||
es := newEventStreamer(buffSize, ka) | ||
|
||
go es.outboxWriteLoop(ctx, cancel, sw) | ||
if err := es.recvEventLoop(ctx, cancel, topics, s); err != nil { | ||
log.WithError(err).Debug("Shutting down StreamEvents handler.") | ||
} | ||
es.waitForCleanup() | ||
} | ||
|
||
func newEventStreamer(buffSize int, ka time.Duration) (*eventStreamer, error) { | ||
if ka == 0 { | ||
ka = time.Duration(params.BeaconConfig().SecondsPerSlot) * time.Second | ||
} | ||
func newEventStreamer(buffSize int, ka time.Duration) *eventStreamer { | ||
return &eventStreamer{ | ||
outbox: make(chan lazyReader, buffSize), | ||
keepAlive: ka, | ||
}, nil | ||
cleanedUp: make(chan struct{}), | ||
} | ||
} | ||
|
||
type eventStreamer struct { | ||
outbox chan lazyReader | ||
keepAlive time.Duration | ||
cleanedUp chan struct{} | ||
} | ||
|
||
func (es *eventStreamer) recvEventLoop(ctx context.Context, cancel context.CancelFunc, req *topicRequest, s *Server) error { | ||
defer close(es.outbox) | ||
defer func() { | ||
cancel() | ||
}() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as discussed offline the context cancellation in |
||
eventsChan := make(chan *feed.Event, len(es.outbox)) | ||
if req.needOpsFeed { | ||
opsSub := s.OperationNotifier.OperationFeed().Subscribe(eventsChan) | ||
|
@@ -228,7 +226,6 @@ func (es *eventStreamer) recvEventLoop(ctx context.Context, cancel context.Cance | |
// channel should stay relatively empty, which gives this loop time to unsubscribe | ||
// and cleanup before the event stream channel fills and disrupts other readers. | ||
if err := es.safeWrite(ctx, lr); err != nil { | ||
cancel() | ||
// note: we could hijack the connection and close it here. Does that cause issues? What are the benefits? | ||
// A benefit of hijack and close is that it may force an error on the remote end, however just closing the context of the | ||
// http handler may be sufficient to cause the remote http response reader to close. | ||
|
@@ -265,14 +262,17 @@ func newlineReader() io.Reader { | |
|
||
// outboxWriteLoop runs in a separate goroutine. Its job is to write the values in the outbox to | ||
// the client as fast as the client can read them. | ||
func (es *eventStreamer) outboxWriteLoop(ctx context.Context, cancel context.CancelFunc, w StreamingResponseWriter) { | ||
func (es *eventStreamer) outboxWriteLoop(ctx context.Context, cancel context.CancelFunc, w *StreamingResponseWriterController) { | ||
var err error | ||
defer func() { | ||
if err != nil { | ||
log.WithError(err).Debug("Event streamer shutting down due to error.") | ||
} | ||
es.cleanup() | ||
}() | ||
defer func() { | ||
cancel() | ||
}() | ||
defer cancel() | ||
// Write a keepalive at the start to test the connection and simplify test setup. | ||
if err = es.writeOutbox(ctx, w, nil); err != nil { | ||
return | ||
|
@@ -310,18 +310,36 @@ func (es *eventStreamer) outboxWriteLoop(ctx context.Context, cancel context.Can | |
} | ||
} | ||
|
||
func writeLazyReaderWithRecover(w StreamingResponseWriter, lr lazyReader) (err error) { | ||
func (es *eventStreamer) cleanup() { | ||
drained := 0 | ||
for range es.outbox { | ||
drained += 1 | ||
} | ||
log.WithField("undelivered_events", drained).Debug("Event stream outbox drained.") | ||
close(es.cleanedUp) | ||
} | ||
|
||
func (es *eventStreamer) waitForCleanup() { | ||
<-es.cleanedUp | ||
} | ||
|
||
func writeLazyReaderWithRecover(w *StreamingResponseWriterController, lr lazyReader) (err error) { | ||
defer func() { | ||
if r := recover(); r != nil { | ||
log.WithField("panic", r).Error("Recovered from panic while writing event to client.") | ||
err = errWriterUnusable | ||
} | ||
}() | ||
_, err = io.Copy(w, lr()) | ||
r := lr() | ||
out, err := io.ReadAll(r) | ||
if err != nil { | ||
return err | ||
} | ||
_, err = w.Write(out) | ||
return err | ||
} | ||
|
||
func (es *eventStreamer) writeOutbox(ctx context.Context, w StreamingResponseWriter, first lazyReader) error { | ||
func (es *eventStreamer) writeOutbox(ctx context.Context, w *StreamingResponseWriterController, first lazyReader) error { | ||
needKeepAlive := true | ||
if first != nil { | ||
if err := writeLazyReaderWithRecover(w, first); err != nil { | ||
|
@@ -347,8 +365,7 @@ func (es *eventStreamer) writeOutbox(ctx context.Context, w StreamingResponseWri | |
return err | ||
} | ||
} | ||
w.Flush() | ||
return nil | ||
return w.Flush() | ||
} | ||
} | ||
} | ||
|
@@ -638,3 +655,49 @@ func (s *Server) currentPayloadAttributes(ctx context.Context) (lazyReader, erro | |
}) | ||
}, nil | ||
} | ||
|
||
func NewStreamingResponseController(rw http.ResponseWriter, timeout time.Duration) *StreamingResponseWriterController { | ||
rc := http.NewResponseController(rw) | ||
return &StreamingResponseWriterController{ | ||
timeout: timeout, | ||
rw: rw, | ||
rc: rc, | ||
} | ||
} | ||
|
||
type StreamingResponseWriterController struct { | ||
timeout time.Duration | ||
rw http.ResponseWriter | ||
rc *http.ResponseController | ||
} | ||
|
||
func (c *StreamingResponseWriterController) Write(b []byte) (int, error) { | ||
if err := c.setDeadline(); err != nil { | ||
return 0, err | ||
} | ||
out, err := c.rw.Write(b) | ||
if err != nil { | ||
return out, err | ||
} | ||
return out, c.clearDeadline() | ||
} | ||
|
||
func (c *StreamingResponseWriterController) setDeadline() error { | ||
return c.rc.SetWriteDeadline(time.Now().Add(c.timeout)) | ||
} | ||
|
||
func (c *StreamingResponseWriterController) clearDeadline() error { | ||
return c.rc.SetWriteDeadline(time.Time{}) | ||
} | ||
|
||
func (c *StreamingResponseWriterController) Flush() error { | ||
if err := c.setDeadline(); err != nil { | ||
return err | ||
} | ||
if err := c.rc.Flush(); err != nil { | ||
return err | ||
} | ||
return c.clearDeadline() | ||
} | ||
|
||
var _ io.Writer = &StreamingResponseWriterController{} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for other reviewers, this is key to fixing the panic here. previously StreamEvents would exit and the outboxWriteLoop could still be writing to a writer that was cleaned up via regular HTTP processes. now there is this wait and inside the writes wrapped with deadlines