Skip to content

Commit b451094

Browse files
laanwjcodablock
authored andcommitted
Merge bitcoin#11490: Disconnect from outbound peers with bad headers chains
e065249 Add unit test for outbound peer eviction (Suhas Daftuar) 5a6d00c Permit disconnection of outbound peers on bad/slow chains (Suhas Daftuar) c60fd71 Disconnecting from bad outbound peers in IBD (Suhas Daftuar) Pull request description: The first commit will disconnect an outbound peer that serves us a headers chain with insufficient work while we're in IBD. The second commit introduces a way to disconnect outbound peers whose chains fall out of sync with ours: For a given outbound peer, we check whether their best known block (which is known from the blocks they announce to us) has at least as much work as our tip. If it doesn't, we set a 20 minute timeout, and if we still haven't heard about a block with as much work as our tip had when we set the timeout, then we send a single getheaders message, and wait 2 more minutes. If after two minutes their best known block has insufficient work, we disconnect that peer. We protect 4 of our outbound peers (who provide some "good" headers chains, ie a chain with at least as much work as our tip at some point) from being subject to this logic, to prevent excessive network topology changes as a result of this algorithm, while still ensuring that we have a reasonable number of nodes not known to be on bogus chains. We also don't require our peers to be on the same chain as us, to prevent accidental partitioning of the network in the event of a chain split. Note that if our peers are ever on a more work chain than our tip, then we will download and validate it, and then either reorg to it, or learn of a consensus incompatibility with that peer and disconnect. This PR is designed to protect against peers that are on a less work chain which we may never try to download and validate. Tree-SHA512: 2e0169a1dd8a7fb95980573ac4a201924bffdd724c19afcab5efcef076fdbe1f2cec7dc5f5d7e0a6327216f56d3828884f73642e00c8534b56ec2bb4c854a656
1 parent c35aa66 commit b451094

File tree

4 files changed

+200
-0
lines changed

4 files changed

+200
-0
lines changed

src/net_processing.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ namespace {
147147
/** Number of peers from which we're downloading blocks. */
148148
int nPeersWithValidatedDownloads = 0;
149149

150+
/** Number of outbound peers with m_chain_sync.m_protect. */
151+
int g_outbound_peers_with_protect_from_disconnect = 0;
152+
150153
/* This comment is here to force a merge conflict when bitcoin#11560 is backported
151154
* It introduces this member as int64_t while bitcoin#11824 changes it to atomic<int64_t>.
152155
* bitcoin#11824 is partially backported already, which means you'll have to take the atomic
@@ -223,6 +226,33 @@ struct CNodeState {
223226
*/
224227
bool fSupportsDesiredCmpctVersion;
225228

229+
/** State used to enforce CHAIN_SYNC_TIMEOUT
230+
* Only in effect for outbound, non-manual connections, with
231+
* m_protect == false
232+
* Algorithm: if a peer's best known block has less work than our tip,
233+
* set a timeout CHAIN_SYNC_TIMEOUT seconds in the future:
234+
* - If at timeout their best known block now has more work than our tip
235+
* when the timeout was set, then either reset the timeout or clear it
236+
* (after comparing against our current tip's work)
237+
* - If at timeout their best known block still has less work than our
238+
* tip did when the timeout was set, then send a getheaders message,
239+
* and set a shorter timeout, HEADERS_RESPONSE_TIME seconds in future.
240+
* If their best known block is still behind when that new timeout is
241+
* reached, disconnect.
242+
*/
243+
struct ChainSyncTimeoutState {
244+
//! A timeout used for checking whether our peer has sufficiently synced
245+
int64_t m_timeout;
246+
//! A header with the work we require on our peer's chain
247+
const CBlockIndex * m_work_header;
248+
//! After timeout is reached, set to true after sending getheaders
249+
bool m_sent_getheaders;
250+
//! Whether this peer is protected from disconnection due to a bad/slow chain
251+
bool m_protect;
252+
};
253+
254+
ChainSyncTimeoutState m_chain_sync;
255+
226256
CNodeState(CAddress addrIn, std::string addrNameIn) : address(addrIn), name(addrNameIn) {
227257
fCurrentlyConnected = false;
228258
nMisbehavior = 0;
@@ -243,6 +273,7 @@ struct CNodeState {
243273
fPreferHeaderAndIDs = false;
244274
fProvidesHeaderAndIDs = false;
245275
fSupportsDesiredCmpctVersion = false;
276+
m_chain_sync = { 0, nullptr, false, false };
246277
}
247278
};
248279

@@ -526,6 +557,13 @@ void FindNextBlocksToDownload(NodeId nodeid, unsigned int count, std::vector<con
526557

527558
} // namespace
528559

560+
// Returns true for outbound peers, excluding manual connections, feelers, and
561+
// one-shots
562+
bool IsOutboundDisconnectionCandidate(const CNode *node)
563+
{
564+
return !(node->fInbound || node->m_manual_connection || node->fFeeler || node->fOneShot);
565+
}
566+
529567
void PeerLogicValidation::InitializeNode(CNode *pnode) {
530568
CAddress addr = pnode->addr;
531569
std::string addrName = pnode->GetAddrName();
@@ -558,6 +596,8 @@ void PeerLogicValidation::FinalizeNode(NodeId nodeid, bool& fUpdateConnectionTim
558596
nPreferredDownload -= state->fPreferredDownload;
559597
nPeersWithValidatedDownloads -= (state->nBlocksInFlightValidHeaders != 0);
560598
assert(nPeersWithValidatedDownloads >= 0);
599+
g_outbound_peers_with_protect_from_disconnect -= state->m_chain_sync.m_protect;
600+
assert(g_outbound_peers_with_protect_from_disconnect >= 0);
561601

562602
mapNodeState.erase(nodeid);
563603

@@ -566,6 +606,7 @@ void PeerLogicValidation::FinalizeNode(NodeId nodeid, bool& fUpdateConnectionTim
566606
assert(mapBlocksInFlight.empty());
567607
assert(nPreferredDownload == 0);
568608
assert(nPeersWithValidatedDownloads == 0);
609+
assert(g_outbound_peers_with_protect_from_disconnect == 0);
569610
}
570611
LogPrint(BCLog::NET, "Cleared nodestate for peer=%d\n", nodeid);
571612
}
@@ -2623,6 +2664,10 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
26232664
assert(pindexLast);
26242665
UpdateBlockAvailability(pfrom->GetId(), pindexLast->GetBlockHash());
26252666

2667+
// From here, pindexBestKnownBlock should be guaranteed to be non-null,
2668+
// because it is set in UpdateBlockAvailability. Some nullptr checks
2669+
// are still present, however, as belt-and-suspenders.
2670+
26262671
if (nCount == MAX_HEADERS_RESULTS) {
26272672
// Headers message had its maximum size; the peer may have more headers.
26282673
// TODO: optimize: if pindexLast is an ancestor of chainActive.Tip or pindexBestHeader, continue
@@ -2680,6 +2725,35 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
26802725
}
26812726
}
26822727
}
2728+
// If we're in IBD, we want outbound peers that will serve us a useful
2729+
// chain. Disconnect peers that are on chains with insufficient work.
2730+
if (IsInitialBlockDownload() && nCount != MAX_HEADERS_RESULTS) {
2731+
// When nCount < MAX_HEADERS_RESULTS, we know we have no more
2732+
// headers to fetch from this peer.
2733+
if (nodestate->pindexBestKnownBlock && nodestate->pindexBestKnownBlock->nChainWork < nMinimumChainWork) {
2734+
// This peer has too little work on their headers chain to help
2735+
// us sync -- disconnect if using an outbound slot (unless
2736+
// whitelisted or addnode).
2737+
// Note: We compare their tip to nMinimumChainWork (rather than
2738+
// chainActive.Tip()) because we won't start block download
2739+
// until we have a headers chain that has at least
2740+
// nMinimumChainWork, even if a peer has a chain past our tip,
2741+
// as an anti-DoS measure.
2742+
if (IsOutboundDisconnectionCandidate(pfrom)) {
2743+
LogPrintf("Disconnecting outbound peer %d -- headers chain has insufficient work\n", pfrom->GetId());
2744+
pfrom->fDisconnect = true;
2745+
}
2746+
}
2747+
}
2748+
2749+
if (!pfrom->fDisconnect && IsOutboundDisconnectionCandidate(pfrom) && nodestate->pindexBestKnownBlock != nullptr) {
2750+
// If this is an outbound peer, check to see if we should protect
2751+
// it from the bad/lagging chain logic.
2752+
if (g_outbound_peers_with_protect_from_disconnect < MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT && nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) {
2753+
nodestate->m_chain_sync.m_protect = true;
2754+
++g_outbound_peers_with_protect_from_disconnect;
2755+
}
2756+
}
26832757
}
26842758
}
26852759

@@ -3117,6 +3191,58 @@ bool PeerLogicValidation::ProcessMessages(CNode* pfrom, std::atomic<bool>& inter
31173191
return fMoreWork;
31183192
}
31193193

3194+
void PeerLogicValidation::ConsiderEviction(CNode *pto, int64_t time_in_seconds)
3195+
{
3196+
AssertLockHeld(cs_main);
3197+
3198+
CNodeState &state = *State(pto->GetId());
3199+
const CNetMsgMaker msgMaker(pto->GetSendVersion());
3200+
3201+
if (!state.m_chain_sync.m_protect && IsOutboundDisconnectionCandidate(pto) && state.fSyncStarted) {
3202+
// This is an outbound peer subject to disconnection if they don't
3203+
// announce a block with as much work as the current tip within
3204+
// CHAIN_SYNC_TIMEOUT + HEADERS_RESPONSE_TIME seconds (note: if
3205+
// their chain has more work than ours, we should sync to it,
3206+
// unless it's invalid, in which case we should find that out and
3207+
// disconnect from them elsewhere).
3208+
if (state.pindexBestKnownBlock != nullptr && state.pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork) {
3209+
if (state.m_chain_sync.m_timeout != 0) {
3210+
state.m_chain_sync.m_timeout = 0;
3211+
state.m_chain_sync.m_work_header = nullptr;
3212+
state.m_chain_sync.m_sent_getheaders = false;
3213+
}
3214+
} else if (state.m_chain_sync.m_timeout == 0 || (state.m_chain_sync.m_work_header != nullptr && state.pindexBestKnownBlock != nullptr && state.pindexBestKnownBlock->nChainWork >= state.m_chain_sync.m_work_header->nChainWork)) {
3215+
// Our best block known by this peer is behind our tip, and we're either noticing
3216+
// that for the first time, OR this peer was able to catch up to some earlier point
3217+
// where we checked against our tip.
3218+
// Either way, set a new timeout based on current tip.
3219+
state.m_chain_sync.m_timeout = time_in_seconds + CHAIN_SYNC_TIMEOUT;
3220+
state.m_chain_sync.m_work_header = chainActive.Tip();
3221+
state.m_chain_sync.m_sent_getheaders = false;
3222+
} else if (state.m_chain_sync.m_timeout > 0 && time_in_seconds > state.m_chain_sync.m_timeout) {
3223+
// No evidence yet that our peer has synced to a chain with work equal to that
3224+
// of our tip, when we first detected it was behind. Send a single getheaders
3225+
// message to give the peer a chance to update us.
3226+
if (state.m_chain_sync.m_sent_getheaders) {
3227+
// They've run out of time to catch up!
3228+
LogPrintf("Disconnecting outbound peer %d for old chain, best known block = %s\n", pto->GetId(), state.pindexBestKnownBlock != nullptr ? state.pindexBestKnownBlock->GetBlockHash().ToString() : "<none>");
3229+
pto->fDisconnect = true;
3230+
} else {
3231+
LogPrint(BCLog::NET, "sending getheaders to outbound peer=%d to verify chain work (current best known block:%s, benchmark blockhash: %s)\n", pto->GetId(), state.pindexBestKnownBlock != nullptr ? state.pindexBestKnownBlock->GetBlockHash().ToString() : "<none>", state.m_chain_sync.m_work_header->GetBlockHash().ToString());
3232+
connman->PushMessage(pto, msgMaker.Make(NetMsgType::GETHEADERS, chainActive.GetLocator(state.m_chain_sync.m_work_header->pprev), uint256()));
3233+
state.m_chain_sync.m_sent_getheaders = true;
3234+
constexpr int64_t HEADERS_RESPONSE_TIME = 120; // 2 minutes
3235+
// Bump the timeout to allow a response, which could clear the timeout
3236+
// (if the response shows the peer has synced), reset the timeout (if
3237+
// the peer syncs to the required work but not to our tip), or result
3238+
// in disconnect (if we advance to the timeout and pindexBestKnownBlock
3239+
// has not sufficiently progressed)
3240+
state.m_chain_sync.m_timeout = time_in_seconds + HEADERS_RESPONSE_TIME;
3241+
}
3242+
}
3243+
}
3244+
}
3245+
31203246
class CompareInvMempoolOrder
31213247
{
31223248
CTxMemPool *mp;
@@ -3588,6 +3714,9 @@ bool PeerLogicValidation::SendMessages(CNode* pto, std::atomic<bool>& interruptM
35883714
}
35893715
}
35903716

3717+
// Check that outbound peers have reasonable chains
3718+
// GetTime() is used by this anti-DoS logic so we can test this using mocktime
3719+
ConsiderEviction(pto, GetTime());
35913720

35923721
//
35933722
// Message: getdata (blocks)

src/net_processing.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ static const int64_t ORPHAN_TX_EXPIRE_INTERVAL = 5 * 60;
2020
* Timeout = base + per_header * (expected number of headers) */
2121
static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_BASE = 15 * 60 * 1000000; // 15 minutes
2222
static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER = 1000; // 1ms/header
23+
/** Protect at least this many outbound peers from disconnection due to slow/
24+
* behind headers chain.
25+
*/
26+
static constexpr int32_t MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT = 4;
27+
/** Timeout for (unprotected) outbound peers to sync to our chainwork, in seconds */
28+
static constexpr int64_t CHAIN_SYNC_TIMEOUT = 20 * 60; // 20 minutes
2329

2430
/** Default number of orphan+recently-replaced txn to keep around for block reconstruction */
2531
static const unsigned int DEFAULT_BLOCK_RECONSTRUCTION_EXTRA_TXN = 100;
@@ -49,6 +55,8 @@ class PeerLogicValidation : public CValidationInterface, public NetEventsInterfa
4955
* @return True if there is more work to be done
5056
*/
5157
bool SendMessages(CNode* pto, std::atomic<bool>& interrupt) override;
58+
59+
void ConsiderEviction(CNode *pto, int64_t time_in_seconds);
5260
};
5361

5462
struct CNodeStateStats {

src/test/DoS_tests.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,51 @@ static NodeId id = 0;
4242

4343
BOOST_FIXTURE_TEST_SUITE(DoS_tests, TestingSetup)
4444

45+
// Test eviction of an outbound peer whose chain never advances
46+
// Mock a node connection, and use mocktime to simulate a peer
47+
// which never sends any headers messages. PeerLogic should
48+
// decide to evict that outbound peer, after the appropriate timeouts.
49+
// Note that we protect 4 outbound nodes from being subject to
50+
// this logic; this test takes advantage of that protection only
51+
// being applied to nodes which send headers with sufficient
52+
// work.
53+
BOOST_AUTO_TEST_CASE(outbound_slow_chain_eviction)
54+
{
55+
std::atomic<bool> interruptDummy(false);
56+
57+
// Mock an outbound peer
58+
CAddress addr1(ip(0xa0b0c001), NODE_NONE);
59+
CNode dummyNode1(id++, ServiceFlags(NODE_NETWORK|NODE_WITNESS), 0, INVALID_SOCKET, addr1, 0, 0, CAddress(), "", /*fInboundIn=*/ false);
60+
dummyNode1.SetSendVersion(PROTOCOL_VERSION);
61+
62+
peerLogic->InitializeNode(&dummyNode1);
63+
dummyNode1.nVersion = 1;
64+
dummyNode1.fSuccessfullyConnected = true;
65+
66+
// This test requires that we have a chain with non-zero work.
67+
BOOST_CHECK(chainActive.Tip() != nullptr);
68+
BOOST_CHECK(chainActive.Tip()->nChainWork > 0);
69+
70+
// Test starts here
71+
peerLogic->SendMessages(&dummyNode1, interruptDummy); // should result in getheaders
72+
BOOST_CHECK(dummyNode1.vSendMsg.size() > 0);
73+
dummyNode1.vSendMsg.clear();
74+
75+
int64_t nStartTime = GetTime();
76+
// Wait 21 minutes
77+
SetMockTime(nStartTime+21*60);
78+
peerLogic->SendMessages(&dummyNode1, interruptDummy); // should result in getheaders
79+
BOOST_CHECK(dummyNode1.vSendMsg.size() > 0);
80+
// Wait 3 more minutes
81+
SetMockTime(nStartTime+24*60);
82+
peerLogic->SendMessages(&dummyNode1, interruptDummy); // should result in disconnect
83+
BOOST_CHECK(dummyNode1.fDisconnect == true);
84+
SetMockTime(0);
85+
86+
bool dummy;
87+
peerLogic->FinalizeNode(dummyNode1.GetId(), dummy);
88+
}
89+
4590
BOOST_AUTO_TEST_CASE(DoS_banning)
4691
{
4792
std::atomic<bool> interruptDummy(false);
@@ -71,6 +116,10 @@ BOOST_AUTO_TEST_CASE(DoS_banning)
71116
Misbehaving(dummyNode2.GetId(), 50);
72117
peerLogic->SendMessages(&dummyNode2, interruptDummy);
73118
BOOST_CHECK(connman->IsBanned(addr2));
119+
120+
bool dummy;
121+
peerLogic->FinalizeNode(dummyNode1.GetId(), dummy);
122+
peerLogic->FinalizeNode(dummyNode2.GetId(), dummy);
74123
}
75124

76125
BOOST_AUTO_TEST_CASE(DoS_banscore)
@@ -95,6 +144,9 @@ BOOST_AUTO_TEST_CASE(DoS_banscore)
95144
peerLogic->SendMessages(&dummyNode1, interruptDummy);
96145
BOOST_CHECK(connman->IsBanned(addr1));
97146
gArgs.ForceSetArg("-banscore", std::to_string(DEFAULT_BANSCORE_THRESHOLD));
147+
148+
bool dummy;
149+
peerLogic->FinalizeNode(dummyNode1.GetId(), dummy);
98150
}
99151

100152
BOOST_AUTO_TEST_CASE(DoS_bantime)
@@ -121,6 +173,9 @@ BOOST_AUTO_TEST_CASE(DoS_bantime)
121173

122174
SetMockTime(nStartTime+60*60*24+1);
123175
BOOST_CHECK(!connman->IsBanned(addr));
176+
177+
bool dummy;
178+
peerLogic->FinalizeNode(dummyNode.GetId(), dummy);
124179
}
125180

126181
CTransactionRef RandomOrphan()

test/functional/minchainwork.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class MinimumChainWorkTest(BitcoinTestFramework):
2727
def set_test_params(self):
2828
self.setup_clean_chain = True
2929
self.num_nodes = 3
30+
3031
self.extra_args = [[], ["-minimumchainwork=0x65"], ["-minimumchainwork=0x65"]]
3132
self.node_min_work = [0, 101, 101]
3233

@@ -74,6 +75,13 @@ def run_test(self):
7475
self.nodes[0].generate(1)
7576

7677
self.log.info("Verifying nodes are all synced")
78+
79+
# Because nodes in regtest are all manual connections (eg using
80+
# addnode), node1 should not have disconnected node0. If not for that,
81+
# we'd expect node1 to have disconnected node0 for serving an
82+
# insufficient work chain, in which case we'd need to reconnect them to
83+
# continue the test.
84+
7785
self.sync_all()
7886
self.log.info("Blockcounts: %s", [n.getblockcount() for n in self.nodes])
7987

0 commit comments

Comments
 (0)