Skip to content

Commit 6265f73

Browse files
committed
chainrpc: retry notifier RPCs during startup lag
lnd v0.20.0-rc3 delays ChainNotifier startup which causes Loop to hit "chain notifier RPC is still in the process of starting" during initial subscriptions (LND commit c6f458e478f9ef2cf1d394972bfbc512862c6707). Add a shared retry helper in lndclient so block epoch, confirmation and spend registrations transparently retry until the sub-server is ready, along with regression tests covering the behaviour.
1 parent 6cbaf58 commit 6265f73

File tree

2 files changed

+350
-15
lines changed

2 files changed

+350
-15
lines changed

chainnotifier_client.go

Lines changed: 116 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package lndclient
33
import (
44
"context"
55
"fmt"
6+
"strings"
67
"sync"
78
"time"
89

@@ -38,6 +39,19 @@ func DefaultNotifierOptions() *NotifierOptions {
3839
// events received from the notifier.
3940
type NotifierOption func(*NotifierOptions)
4041

42+
const (
43+
// chainNotifierStartupMessage matches the error string returned by lnd
44+
// v0.20.0-rc3+ when a ChainNotifier RPC is invoked before the
45+
// sub-server finishes initialization.
46+
chainNotifierStartupMessage = "chain notifier RPC is still in the " +
47+
"process of starting"
48+
49+
// chainNotifierRetryBackoff defines the delay between successive
50+
// subscription attempts while waiting for the ChainNotifier sub-server
51+
// to become operational.
52+
chainNotifierRetryBackoff = 500 * time.Millisecond
53+
)
54+
4155
// WithIncludeBlock is an optional argument that allows the caller to specify
4256
// that the block that mined a transaction should be included in the response.
4357
func WithIncludeBlock() NotifierOption {
@@ -133,11 +147,23 @@ func (s *chainNotifierClient) RegisterSpendNtfn(ctx context.Context,
133147
}
134148
}
135149

136-
macaroonAuth := s.chainMac.WithMacaroonAuth(ctx)
137-
resp, err := s.client.RegisterSpendNtfn(macaroonAuth, &chainrpc.SpendRequest{
138-
HeightHint: uint32(heightHint),
139-
Outpoint: rpcOutpoint,
140-
Script: pkScript,
150+
var (
151+
resp chainrpc.ChainNotifier_RegisterSpendNtfnClient
152+
err error
153+
)
154+
155+
// lnd v0.20.0-rc3 changed the startup ordering so the ChainNotifier
156+
// sub-server can report "still starting" for a short window. Retry the
157+
// registration in that case to avoid aborting clients that subscribe
158+
// immediately at startup.
159+
err = s.retryChainNotifierCall(ctx, func() error {
160+
macaroonAuth := s.chainMac.WithMacaroonAuth(ctx)
161+
resp, err = s.client.RegisterSpendNtfn(macaroonAuth, &chainrpc.SpendRequest{
162+
HeightHint: uint32(heightHint),
163+
Outpoint: rpcOutpoint,
164+
Script: pkScript,
165+
})
166+
return err
141167
})
142168
if err != nil {
143169
return nil, nil, err
@@ -251,15 +277,25 @@ func (s *chainNotifierClient) RegisterConfirmationsNtfn(ctx context.Context,
251277
if txid != nil {
252278
txidSlice = txid[:]
253279
}
254-
confStream, err := s.client.RegisterConfirmationsNtfn(
255-
s.chainMac.WithMacaroonAuth(ctx), &chainrpc.ConfRequest{
256-
Script: pkScript,
257-
NumConfs: uint32(numConfs),
258-
HeightHint: uint32(heightHint),
259-
Txid: txidSlice,
260-
IncludeBlock: opts.IncludeBlock,
261-
},
280+
var (
281+
confStream chainrpc.ChainNotifier_RegisterConfirmationsNtfnClient
282+
err error
262283
)
284+
// The confirmation RPC is also subject to the post-v0.20.0-rc3 startup
285+
// ordering change, so we retry here until lnd reports the sub-server
286+
// ready.
287+
err = s.retryChainNotifierCall(ctx, func() error {
288+
confStream, err = s.client.RegisterConfirmationsNtfn(
289+
s.chainMac.WithMacaroonAuth(ctx), &chainrpc.ConfRequest{
290+
Script: pkScript,
291+
NumConfs: uint32(numConfs),
292+
HeightHint: uint32(heightHint),
293+
Txid: txidSlice,
294+
IncludeBlock: opts.IncludeBlock,
295+
},
296+
)
297+
return err
298+
})
263299
if err != nil {
264300
return nil, nil, err
265301
}
@@ -362,9 +398,18 @@ func (s *chainNotifierClient) RegisterConfirmationsNtfn(ctx context.Context,
362398
func (s *chainNotifierClient) RegisterBlockEpochNtfn(ctx context.Context) (
363399
chan int32, chan error, error) {
364400

365-
blockEpochClient, err := s.client.RegisterBlockEpochNtfn(
366-
s.chainMac.WithMacaroonAuth(ctx), &chainrpc.BlockEpoch{},
401+
var (
402+
blockEpochClient chainrpc.ChainNotifier_RegisterBlockEpochNtfnClient
403+
err error
367404
)
405+
// Block epoch subscriptions similarly need to survive the "still
406+
// starting" period introduced in lnd v0.20.0-rc3.
407+
err = s.retryChainNotifierCall(ctx, func() error {
408+
blockEpochClient, err = s.client.RegisterBlockEpochNtfn(
409+
s.chainMac.WithMacaroonAuth(ctx), &chainrpc.BlockEpoch{},
410+
)
411+
return err
412+
})
368413
if err != nil {
369414
return nil, nil, err
370415
}
@@ -393,3 +438,59 @@ func (s *chainNotifierClient) RegisterBlockEpochNtfn(ctx context.Context) (
393438

394439
return blockEpochChan, blockErrorChan, nil
395440
}
441+
442+
// retryChainNotifierCall executes the passed RPC invocation, retrying while
443+
// lnd reports that the ChainNotifier sub-server is still initialising.
444+
//
445+
// Prior to v0.20.0-rc3 the ChainNotifier sub-server finished initialization
446+
// before dependent services started, so a single RPC attempt succeeded. From
447+
// rc3 (LND commit c6f458e478f9ef2cf1d394972bfbc512862c6707) onwards lnd starts
448+
// the notifier later in the daemon lifecycle to avoid rescans from stale
449+
// heights. During the brief gap between client connection and notifier
450+
// readiness lnd returns the string "chain notifier RPC is still in the process
451+
// of starting" wrapped in an Unknown gRPC status. Clients that interact with
452+
// lnd immediately after it connects - such as Loop during integration testing -
453+
// would previously treat that error as fatal and abort startup, even though
454+
// retrying shortly after would succeed.
455+
//
456+
// This helper centralises the retry policy: when the specific "still starting"
457+
// error is encountered we back off briefly and reissue the RPC. Non-startup
458+
// errors are returned to the caller unchanged, and the caller's context
459+
// controls the overall deadline so shutdown conditions are respected.
460+
func (s *chainNotifierClient) retryChainNotifierCall(ctx context.Context,
461+
call func() error) error {
462+
463+
for {
464+
err := call()
465+
if err == nil {
466+
return nil
467+
}
468+
469+
if !isChainNotifierStartingErr(err) {
470+
return err
471+
}
472+
473+
log.Warnf("Chain notifier RPC not ready yet, retrying: %v", err)
474+
475+
select {
476+
case <-time.After(chainNotifierRetryBackoff):
477+
continue
478+
479+
case <-ctx.Done():
480+
return ctx.Err()
481+
}
482+
}
483+
}
484+
485+
// detectChainNotifierStartupError reports whether `err` is due to the lnd
486+
// ChainNotifier sub-server still starting up. Starting with lnd v0.20.0-rc3
487+
// the notifier is initialised later in the daemon lifecycle, and the RPC layer
488+
// surfaces this as an Unknown gRPC status that contains the message defined in
489+
// chainNotifierStartupMessage.
490+
func isChainNotifierStartingErr(err error) bool {
491+
if err == nil {
492+
return false
493+
}
494+
495+
return strings.Contains(err.Error(), chainNotifierStartupMessage)
496+
}

0 commit comments

Comments
 (0)