Skip to content

Commit 2598237

Browse files
zsfelfoldifjl
authored andcommitted
les: fix retriever logic (#16776)
This PR fixes a retriever logic bug. When a peer had a soft timeout and then a response arrived, it always assumed it was the same peer even though it could have been a later requested one that did not time out at all yet. In this case the logic went to an illegal state and deadlocked, causing a goroutine leak. Fixes #16243 and replaces #16359. Thanks to @riceke for finding the bug in the logic.
1 parent 049f5b3 commit 2598237

File tree

1 file changed

+17
-13
lines changed

1 file changed

+17
-13
lines changed

les/retrieve.go

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ type sentReq struct {
6969
lock sync.RWMutex // protect access to sentTo map
7070
sentTo map[distPeer]sentReqToPeer
7171

72-
reqQueued bool // a request has been queued but not sent
73-
reqSent bool // a request has been sent but not timed out
74-
reqSrtoCount int // number of requests that reached soft (but not hard) timeout
72+
lastReqQueued bool // last request has been queued but not sent
73+
lastReqSentTo distPeer // if not nil then last request has been sent to given peer but not timed out
74+
reqSrtoCount int // number of requests that reached soft (but not hard) timeout
7575
}
7676

7777
// sentReqToPeer notifies the request-from-peer goroutine (tryRequest) about a response
@@ -180,7 +180,7 @@ type reqStateFn func() reqStateFn
180180
// retrieveLoop is the retrieval state machine event loop
181181
func (r *sentReq) retrieveLoop() {
182182
go r.tryRequest()
183-
r.reqQueued = true
183+
r.lastReqQueued = true
184184
state := r.stateRequesting
185185

186186
for state != nil {
@@ -214,7 +214,7 @@ func (r *sentReq) stateRequesting() reqStateFn {
214214
case rpSoftTimeout:
215215
// last request timed out, try asking a new peer
216216
go r.tryRequest()
217-
r.reqQueued = true
217+
r.lastReqQueued = true
218218
return r.stateRequesting
219219
case rpDeliveredValid:
220220
r.stop(nil)
@@ -233,7 +233,7 @@ func (r *sentReq) stateNoMorePeers() reqStateFn {
233233
select {
234234
case <-time.After(retryQueue):
235235
go r.tryRequest()
236-
r.reqQueued = true
236+
r.lastReqQueued = true
237237
return r.stateRequesting
238238
case ev := <-r.eventsCh:
239239
r.update(ev)
@@ -260,22 +260,26 @@ func (r *sentReq) stateStopped() reqStateFn {
260260
func (r *sentReq) update(ev reqPeerEvent) {
261261
switch ev.event {
262262
case rpSent:
263-
r.reqQueued = false
264-
if ev.peer != nil {
265-
r.reqSent = true
266-
}
263+
r.lastReqQueued = false
264+
r.lastReqSentTo = ev.peer
267265
case rpSoftTimeout:
268-
r.reqSent = false
266+
r.lastReqSentTo = nil
269267
r.reqSrtoCount++
270-
case rpHardTimeout, rpDeliveredValid, rpDeliveredInvalid:
268+
case rpHardTimeout:
271269
r.reqSrtoCount--
270+
case rpDeliveredValid, rpDeliveredInvalid:
271+
if ev.peer == r.lastReqSentTo {
272+
r.lastReqSentTo = nil
273+
} else {
274+
r.reqSrtoCount--
275+
}
272276
}
273277
}
274278

275279
// waiting returns true if the retrieval mechanism is waiting for an answer from
276280
// any peer
277281
func (r *sentReq) waiting() bool {
278-
return r.reqQueued || r.reqSent || r.reqSrtoCount > 0
282+
return r.lastReqQueued || r.lastReqSentTo != nil || r.reqSrtoCount > 0
279283
}
280284

281285
// tryRequest tries to send the request to a new peer and waits for it to either

0 commit comments

Comments
 (0)