Skip to content

Commit

Permalink
mempool: disable MaxBatchBytes (#5800)
Browse files Browse the repository at this point in the history
@p4u from vocdoni.io reported that the mempool might behave incorrectly under a
high load. The consequences can range from pauses between blocks to the peers
disconnecting from this node.

My current theory is that the flowrate lib we're using to control flow
(multiplex over a single TCP connection) was not designed w/ large blobs
(1MB batch of txs) in mind.

I've tried decreasing the Mempool reactor priority, but that did not
have any visible effect. What actually worked is adding a time.Sleep
into mempool.Reactor#broadcastTxRoutine after an each successful send ==
manual control flow of sort.

As a temporary remedy (until the mempool package
is refactored), the max-batch-bytes was disabled. Transactions will be sent
one by one without batching

Closes #5796
  • Loading branch information
melekes committed Dec 21, 2020
1 parent dc90cf6 commit dc101f2
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 59 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG_PENDING.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

Special thanks to external contributors on this release:

@p4u from vocdoni.io reported that the mempool might behave incorrectly under a
high load. The consequences can range from pauses between blocks to the peers
disconnecting from this node. As a temporary remedy (until the mempool package
is refactored), the `max-batch-bytes` was disabled. Transactions will be sent
one by one without batching.

Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermint).

### BREAKING CHANGES
Expand All @@ -29,3 +35,4 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
### BUG FIXES

- [crypto] \#5707 Fix infinite recursion in string formatting of Secp256k1 keys (@erikgrinaker)
- [mempool] \#5800 Disable `max-batch-bytes` (@melekes)
16 changes: 5 additions & 11 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ type MempoolConfig struct {
MaxTxBytes int `mapstructure:"max_tx_bytes"`
// Maximum size of a batch of transactions to send to a peer
// Including space needed by encoding (one varint per transaction).
// XXX: Unused due to https://github.com/tendermint/tendermint/issues/5796
MaxBatchBytes int `mapstructure:"max_batch_bytes"`
}

Expand All @@ -676,11 +677,10 @@ func DefaultMempoolConfig() *MempoolConfig {
WalPath: "",
// Each signature verification takes .5ms, Size reduced until we implement
// ABCI Recheck
Size: 5000,
MaxTxsBytes: 1024 * 1024 * 1024, // 1GB
CacheSize: 10000,
MaxTxBytes: 1024 * 1024, // 1MB
MaxBatchBytes: 10 * 1024 * 1024, // 10MB
Size: 5000,
MaxTxsBytes: 1024 * 1024 * 1024, // 1GB
CacheSize: 10000,
MaxTxBytes: 1024 * 1024, // 1MB
}
}

Expand Down Expand Up @@ -716,12 +716,6 @@ func (cfg *MempoolConfig) ValidateBasic() error {
if cfg.MaxTxBytes < 0 {
return errors.New("max_tx_bytes can't be negative")
}
if cfg.MaxBatchBytes < 0 {
return errors.New("max_batch_bytes can't be negative")
}
if cfg.MaxBatchBytes <= cfg.MaxTxBytes {
return errors.New("max_batch_bytes can't be less or equal to max_tx_bytes")
}
return nil
}

Expand Down
1 change: 1 addition & 0 deletions config/toml.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ max_tx_bytes = {{ .Mempool.MaxTxBytes }}
# Maximum size of a batch of transactions to send to a peer
# Including space needed by encoding (one varint per transaction).
# XXX: Unused due to https://github.com/tendermint/tendermint/issues/5796
max_batch_bytes = {{ .Mempool.MaxBatchBytes }}
#######################################################
Expand Down
1 change: 1 addition & 0 deletions docs/tendermint-core/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ max_tx_bytes = 1048576

# Maximum size of a batch of transactions to send to a peer
# Including space needed by encoding (one varint per transaction).
# XXX: Unused due to https://github.com/tendermint/tendermint/issues/5796
max_batch_bytes = 10485760

#######################################################
Expand Down
50 changes: 12 additions & 38 deletions mempool/reactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,18 @@ func (memR *Reactor) OnStart() error {
// GetChannels implements Reactor by returning the list of channels for this
// reactor.
func (memR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
maxMsgSize := memR.config.MaxBatchBytes
largestTx := make([]byte, memR.config.MaxTxBytes)
batchMsg := protomem.Message{
Sum: &protomem.Message_Txs{
Txs: &protomem.Txs{Txs: [][]byte{largestTx}},
},
}

return []*p2p.ChannelDescriptor{
{
ID: MempoolChannel,
Priority: 5,
RecvMessageCapacity: maxMsgSize,
RecvMessageCapacity: batchMsg.Size(),
},
}
}
Expand Down Expand Up @@ -232,20 +238,19 @@ func (memR *Reactor) broadcastTxRoutine(peer p2p.Peer) {
continue
}

txs := memR.txs(next, peerID, peerState.GetHeight()) // WARNING: mutates next!
// NOTE: Transaction batching was disabled due to
// https://github.com/tendermint/tendermint/issues/5796

// send txs
if len(txs) > 0 {
if _, ok := memTx.senders.Load(peerID); !ok {
msg := protomem.Message{
Sum: &protomem.Message_Txs{
Txs: &protomem.Txs{Txs: txs},
Txs: &protomem.Txs{Txs: [][]byte{memTx.tx}},
},
}
bz, err := msg.Marshal()
if err != nil {
panic(err)
}
memR.Logger.Debug("Sending N txs to peer", "N", len(txs), "peer", peer)
success := peer.Send(MempoolChannel, bz)
if !success {
time.Sleep(peerCatchupSleepIntervalMS * time.Millisecond)
Expand All @@ -265,37 +270,6 @@ func (memR *Reactor) broadcastTxRoutine(peer p2p.Peer) {
}
}

// txs iterates over the transaction list and builds a batch of txs. next is
// included.
// WARNING: mutates next!
func (memR *Reactor) txs(next *clist.CElement, peerID uint16, peerHeight int64) [][]byte {
batch := make([][]byte, 0)

for {
memTx := next.Value.(*mempoolTx)

if _, ok := memTx.senders.Load(peerID); !ok {
// If current batch + this tx size is greater than max => return.
batchMsg := protomem.Message{
Sum: &protomem.Message_Txs{
Txs: &protomem.Txs{Txs: append(batch, memTx.tx)},
},
}
if batchMsg.Size() > memR.config.MaxBatchBytes {
return batch
}

batch = append(batch, memTx.tx)
}

n := next.Next()
if n == nil {
return batch
}
next = n
}
}

//-----------------------------------------------------------------------------
// Messages

Expand Down
15 changes: 5 additions & 10 deletions mempool/reactor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,8 @@ func TestReactorNoBroadcastToSender(t *testing.T) {
ensureNoTxs(t, reactors[peerID], 100*time.Millisecond)
}

func TestReactor_MaxBatchBytes(t *testing.T) {
func TestReactor_MaxTxBytes(t *testing.T) {
config := cfg.TestConfig()
config.Mempool.MaxBatchBytes = 1024

const N = 2
reactors := makeAndConnectReactors(config, N)
Expand All @@ -168,9 +167,9 @@ func TestReactor_MaxBatchBytes(t *testing.T) {
}
}

// Broadcast a tx, which has the max size (minus proto overhead)
// Broadcast a tx, which has the max size
// => ensure it's received by the second reactor.
tx1 := tmrand.Bytes(1018)
tx1 := tmrand.Bytes(config.Mempool.MaxTxBytes)
err := reactors[0].mempool.CheckTx(tx1, nil, TxInfo{SenderID: UnknownPeerID})
require.NoError(t, err)
waitForTxsOnReactors(t, []types.Tx{tx1}, reactors)
Expand All @@ -180,13 +179,9 @@ func TestReactor_MaxBatchBytes(t *testing.T) {

// Broadcast a tx, which is beyond the max size
// => ensure it's not sent
tx2 := tmrand.Bytes(1020)
tx2 := tmrand.Bytes(config.Mempool.MaxTxBytes + 1)
err = reactors[0].mempool.CheckTx(tx2, nil, TxInfo{SenderID: UnknownPeerID})
require.NoError(t, err)
ensureNoTxs(t, reactors[1], 100*time.Millisecond)
// => ensure the second reactor did not disconnect from us
out, in, _ := reactors[1].Switch.NumPeers()
assert.Equal(t, 1, out+in)
require.Error(t, err)
}

func TestBroadcastTxForPeerStopsWhenPeerStops(t *testing.T) {
Expand Down

0 comments on commit dc101f2

Please sign in to comment.