Skip to content

Commit a97dda8

Browse files
committed
netann: fix disable issue for channels
We make sure we do not disable a channel if the channel is active in the switch and the enable timer hasn't fired yet during reconnection.
1 parent 1c2ff4a commit a97dda8

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

netann/chan_status_manager.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,24 @@ func (m *ChanStatusManager) disableInactiveChannels() {
568568
continue
569569
}
570570

571+
// Re-verify the channel is still inactive before disabling.
572+
// This prevents the race condition where a channel becomes
573+
// active again after being marked as pending disabled but
574+
// before the disable timeout expires. Otherwise there is a
575+
// race condition where the channel is disabled even though it
576+
// is active.
577+
chanID := lnwire.NewChanIDFromOutPoint(outpoint)
578+
if m.cfg.IsChannelActive(chanID) {
579+
// Channel became active again, cancel the pending
580+
// disable.
581+
log.Debugf("Channel(%v) became active, canceling "+
582+
"scheduled disable", outpoint)
583+
584+
m.chanStates.markEnabled(outpoint)
585+
586+
continue
587+
}
588+
571589
log.Infof("Announcing channel(%v) disabled "+
572590
"[detected]", outpoint)
573591

netann/chan_status_manager_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,58 @@ var stateMachineTests = []stateMachineTest{
909909
)
910910
},
911911
},
912+
{
913+
name: "channel reconnects before disable",
914+
startActive: true,
915+
startEnabled: true,
916+
fn: func(h testHarness) {
917+
// This test demonstrates the race condition fix
918+
// where a channel becomes inactive, gets marked as
919+
// pending disabled, then becomes active again before
920+
// the disable timeout expires.
921+
// The channel should NOT be disabled in this case.
922+
923+
// Step 1: Simulate disconnection - channel becomes
924+
// inactive.
925+
h.markInactive(h.graph.chans())
926+
927+
// Step 2: Wait for the channel to be marked as
928+
// pending disabled. Sample interval of the manager
929+
// is 50ms, so we wait for 50ms + 50ms (buffer).
930+
time.Sleep(50*time.Millisecond + 50*time.Millisecond)
931+
932+
// Step 3: Simulate reconnection - channel becomes
933+
// active again.
934+
//
935+
// NOTE: This does not reflect the actual behavior of
936+
// LND because as soon as the channel becomes active it
937+
// will start an enable timer and send an enable update.
938+
// However we want to avoid testing these timings
939+
// here. In general it is important that the channel
940+
// does not get disabled in case it reconnects before
941+
// the disable timeout expires.
942+
h.markActive(h.graph.chans())
943+
944+
// Step 4: Wait for the disable timeout to expire.
945+
// The disable timeout (1 second) expires, but our fix
946+
// should prevent the disable because the channel is
947+
// active again.
948+
time.Sleep(time.Second + 200*time.Millisecond)
949+
950+
// Step 5: Verify that NO disable update was sent.
951+
// The channel should remain enabled because it became
952+
// active again before the disable timeout expired,
953+
// and our fix re-checked its status before disabling.
954+
h.assertNoUpdates(500 * time.Millisecond)
955+
956+
// Step 6: Verify that the channel is still enabled by
957+
// checking that we can still request enable without
958+
// sending an update. This means the channel is still
959+
// enabled.
960+
h.assertEnables(h.graph.chans(), nil, false)
961+
h.assertNoUpdates(500 * time.Millisecond)
962+
},
963+
},
912964
}
913965

914966
// TestChanStatusManagerStateMachine tests the possible state transitions that

0 commit comments

Comments
 (0)