Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add more metrics to investigate tx broadcast issue #1007

Merged
merged 8 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions eth/fetcher/tx_fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ var (
txFetcherQueueingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/hashes", nil)
txFetcherFetchingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/peers", nil)
txFetcherFetchingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/hashes", nil)

peerAnnounceTxsLenGauge = metrics.NewRegisteredGauge("eth/fetcher/peer/announce/txs", nil)
peerRetrievalTxsLenGauge = metrics.NewRegisteredGauge("eth/fetcher/peer/retrieval/txs", nil)
)

// txAnnounce is the notification of the availability of a batch
Expand Down Expand Up @@ -792,6 +795,8 @@ func (f *TxFetcher) scheduleFetches(timer *mclock.Timer, timeout chan struct{},
})

log.Debug("Scheduling transaction retrieval", "peer", peer, "len(f.announces[peer])", len(f.announces[peer]), "len(hashes)", len(hashes))
peerAnnounceTxsLenGauge.Update(int64(len(f.announces[peer])))
peerRetrievalTxsLenGauge.Update(int64(len(hashes)))

// If any hashes were allocated, request them from the peer
if len(hashes) > 0 {
Expand Down
9 changes: 9 additions & 0 deletions eth/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,17 @@ import (
"github.com/scroll-tech/go-ethereum/ethdb"
"github.com/scroll-tech/go-ethereum/event"
"github.com/scroll-tech/go-ethereum/log"
"github.com/scroll-tech/go-ethereum/metrics"
"github.com/scroll-tech/go-ethereum/p2p"
"github.com/scroll-tech/go-ethereum/params"
"github.com/scroll-tech/go-ethereum/trie"
)

var (
annoTxsLenGauge = metrics.NewRegisteredGauge("eth/handler/broadast/announce/txs", nil)
directTxsLenGauge = metrics.NewRegisteredGauge("eth/handler/broadast/direct/txs", nil)
)

const (
// txChanSize is the size of channel listening to NewTxsEvent.
// The number is referenced from the size of tx pool.
Expand Down Expand Up @@ -523,6 +529,9 @@ func (h *handler) BroadcastTransactions(txs types.Transactions) {
log.Debug("Transaction broadcast", "txs", len(txs),
"announce packs", annoPeers, "announced hashes", annoCount,
"tx packs", directPeers, "broadcast txs", directCount)

directTxsLenGauge.Update(int64(directCount / directPeers))
annoTxsLenGauge.Update(int64(annoCount / annoPeers))
}

// minedBroadcastLoop sends mined blocks to connected peers.
Expand Down
20 changes: 20 additions & 0 deletions eth/protocols/eth/broadcast.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ import (
"github.com/scroll-tech/go-ethereum/common"
"github.com/scroll-tech/go-ethereum/core/types"
"github.com/scroll-tech/go-ethereum/log"
"github.com/scroll-tech/go-ethereum/metrics"
)

var (
broadcastSendTxsLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/broadcast/direct/txs", nil)
broadcastSendTxsFailGauge = metrics.NewRegisteredGauge("eth/protocols/eth/broadcast/direct/fail", nil)
0xmountaintop marked this conversation as resolved.
Show resolved Hide resolved
broadcastSendHashesLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/broadcast/direct/hashes", nil)
broadcastSendQueueLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/broadcast/direct/queue", nil)
broadcastAnnoTxsLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/broadcast/anno/txs", nil)
broadcastAnnoTxsFailGauge = metrics.NewRegisteredGauge("eth/protocols/eth/broadcast/anno/fail", nil)
0xmountaintop marked this conversation as resolved.
Show resolved Hide resolved
broadcastAnnoHashesLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/broadcast/anno/hashes", nil)
broadcastAnnoQueueLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/broadcast/anno/queue", nil)
)

const (
Expand Down Expand Up @@ -94,8 +106,10 @@ func (p *Peer) broadcastTransactions() {
done = make(chan struct{})
go func() {
log.Debug("Sending transactions", "count", len(txs))
broadcastSendTxsLenGauge.Update(int64(len(txs)))
if err := p.SendTransactions(txs); err != nil {
log.Debug("Sending transactions", "count", len(txs), "err", err)
broadcastSendTxsFailGauge.Inc(1)
fail <- err
return
}
Expand All @@ -115,6 +129,8 @@ func (p *Peer) broadcastTransactions() {
// New batch of transactions to be broadcast, queue them (with cap)
queue = append(queue, hashes...)
log.Debug("Queue size in broadcastTransactions", "len(hashes)", len(hashes), "len(queue)", len(queue), "maxQueuedTxs", maxQueuedTxs)
broadcastSendHashesLenGauge.Update(int64(len(hashes)))
broadcastSendQueueLenGauge.Update(int64(len(queue)))
if len(queue) > maxQueuedTxs {
// Fancy copy and resize to ensure buffer doesn't grow indefinitely
queue = queue[:copy(queue, queue[len(queue)-maxQueuedTxs:])]
Expand Down Expand Up @@ -165,8 +181,10 @@ func (p *Peer) announceTransactions() {
done = make(chan struct{})
go func() {
log.Debug("Sending transaction announcements", "count", len(pending))
broadcastAnnoTxsLenGauge.Update(int64(len(pending)))
if err := p.sendPooledTransactionHashes(pending); err != nil {
log.Debug("Sending transaction announcements", "count", len(pending), "err", err)
broadcastAnnoTxsFailGauge.Inc(1)
fail <- err
return
}
Expand All @@ -186,6 +204,8 @@ func (p *Peer) announceTransactions() {
// New batch of transactions to be broadcast, queue them (with cap)
queue = append(queue, hashes...)
log.Debug("Queue size in announceTransactions", "len(hashes)", len(hashes), "len(queue)", len(queue), "maxQueuedTxAnns", maxQueuedTxAnns)
broadcastAnnoHashesLenGauge.Update(int64(len(hashes)))
broadcastAnnoQueueLenGauge.Update(int64(len(queue)))
if len(queue) > maxQueuedTxAnns {
// Fancy copy and resize to ensure buffer doesn't grow indefinitely
queue = queue[:copy(queue, queue[len(queue)-maxQueuedTxAnns:])]
Expand Down
27 changes: 27 additions & 0 deletions eth/protocols/eth/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,25 @@ import (
"github.com/scroll-tech/go-ethereum/common"
"github.com/scroll-tech/go-ethereum/core/types"
"github.com/scroll-tech/go-ethereum/log"
"github.com/scroll-tech/go-ethereum/metrics"
"github.com/scroll-tech/go-ethereum/rlp"
"github.com/scroll-tech/go-ethereum/trie"
)

var (
newPooledTxHashesFailGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/newpooledtxhashes/fail", nil)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to counter

newPooledTxHashesLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/newpooledtxhashes/len", nil)
getPooledTxsFailGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/getpooledtxs/fail", nil)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to counter

getPooledTxsQueryLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/getpooledtxs/query", nil)
getPooledTxsRetrievedLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/getpooledtxs/retrieved", nil)
handleTxsFailGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/handletxs/fail", nil)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to counter

handleTxsLenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/handletxs/len", nil)
handleTxsNilGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/handletxs/nil", nil)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to counter

pooledTxs66FailGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/pooledtxs66/fail", nil)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to counter

pooledTxs66LenGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/pooledtxs66/len", nil)
pooledTxs66NillGauge = metrics.NewRegisteredGauge("eth/protocols/eth/handlers/pooledtxs66/nil", nil)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to counter

)

// handleGetBlockHeaders66 is the eth/66 version of handleGetBlockHeaders
func handleGetBlockHeaders66(backend Backend, msg Decoder, peer *Peer) error {
// Decode the complex header query
Expand Down Expand Up @@ -324,10 +339,12 @@ func handleNewPooledTransactionHashes(backend Backend, msg Decoder, peer *Peer)
ann := new(NewPooledTransactionHashesPacket)
if err := msg.Decode(ann); err != nil {
log.Debug("Failed to decode `NewPooledTransactionHashesPacket`", "peer", peer.String(), "err", err)
newPooledTxHashesFailGauge.Inc(1)
return fmt.Errorf("%w: message %v: %v", errDecode, msg, err)
}
// Schedule all the unknown hashes for retrieval
log.Debug("handleNewPooledTransactionHashes", "peer", peer.String(), "len(ann)", len(*ann))
newPooledTxHashesLenGauge.Update(int64(len(*ann)))
for _, hash := range *ann {
peer.markTransaction(hash)
}
Expand All @@ -339,10 +356,13 @@ func handleGetPooledTransactions66(backend Backend, msg Decoder, peer *Peer) err
var query GetPooledTransactionsPacket66
if err := msg.Decode(&query); err != nil {
log.Debug("Failed to decode `GetPooledTransactionsPacket66`", "peer", peer.String(), "err", err)
getPooledTxsFailGauge.Inc(1)
return fmt.Errorf("%w: message %v: %v", errDecode, msg, err)
}
hashes, txs := answerGetPooledTransactions(backend, query.GetPooledTransactionsPacket, peer)
log.Debug("handleGetPooledTransactions", "peer", peer.String(), "RequestId", query.RequestId, "len(query)", len(query.GetPooledTransactionsPacket), "retrieved", len(hashes))
getPooledTxsQueryLenGauge.Update(int64(len(query.GetPooledTransactionsPacket)))
getPooledTxsRetrievedLenGauge.Update(int64(len(hashes)))
return peer.ReplyPooledTransactionsRLP(query.RequestId, hashes, txs)
}

Expand Down Expand Up @@ -382,13 +402,16 @@ func handleTransactions(backend Backend, msg Decoder, peer *Peer) error {
// Transactions can be processed, parse all of them and deliver to the pool
var txs TransactionsPacket
if err := msg.Decode(&txs); err != nil {
handleTxsFailGauge.Inc(1)
log.Debug("Failed to decode `TransactionsPacket`", "peer", peer.String(), "err", err)
return fmt.Errorf("%w: message %v: %v", errDecode, msg, err)
}
log.Debug("handleTransactions", "peer", peer.String(), "len(txs)", len(txs))
handleTxsLenGauge.Update(int64(len(txs)))
for i, tx := range txs {
// Validate and mark the remote transaction
if tx == nil {
handleTxsNilGauge.Inc(1)
log.Debug("handleTransactions: transaction is nil", "peer", peer.String(), "i", i)
return fmt.Errorf("%w: transaction %d is nil", errDecode, i)
}
Expand All @@ -405,12 +428,16 @@ func handlePooledTransactions66(backend Backend, msg Decoder, peer *Peer) error
// Transactions can be processed, parse all of them and deliver to the pool
var txs PooledTransactionsPacket66
if err := msg.Decode(&txs); err != nil {
pooledTxs66FailGauge.Inc(1)
log.Debug("Failed to decode `PooledTransactionsPacket66`", "peer", peer.String(), "err", err)
return fmt.Errorf("%w: message %v: %v", errDecode, msg, err)
}
log.Debug("handlePooledTransactions66", "peer", peer.String(), "len(txs)", len(txs.PooledTransactionsPacket))
pooledTxs66LenGauge.Update(int64(len(txs.PooledTransactionsPacket)))
for i, tx := range txs.PooledTransactionsPacket {
// Validate and mark the remote transaction
if tx == nil {
pooledTxs66NillGauge.Inc(1)
log.Debug("handlePooledTransactions: transaction is nil", "peer", peer.String(), "i", i)
return fmt.Errorf("%w: transaction %d is nil", errDecode, i)
}
Expand Down
6 changes: 6 additions & 0 deletions eth/protocols/eth/peer.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,15 @@ import (
"github.com/scroll-tech/go-ethereum/common"
"github.com/scroll-tech/go-ethereum/core/types"
"github.com/scroll-tech/go-ethereum/log"
"github.com/scroll-tech/go-ethereum/metrics"
"github.com/scroll-tech/go-ethereum/p2p"
"github.com/scroll-tech/go-ethereum/rlp"
)

var (
peerRequestTxsCntGauge = metrics.NewRegisteredGauge("eth/protocols/eth/peer/request/txs", nil)
)

const (
// maxKnownTxs is the maximum transactions hashes to keep in the known list
// before starting to randomly evict them.
Expand Down Expand Up @@ -421,6 +426,7 @@ func (p *Peer) RequestTxs(hashes []common.Hash) error {
id := rand.Uint64()

log.Debug("Requesting transactions", "RequestId", id, "Peer.id", p.id, "count", len(hashes))
peerRequestTxsCntGauge.Update(int64(len(hashes)))

requestTracker.Track(p.id, p.version, GetPooledTransactionsMsg, PooledTransactionsMsg, id)
return p2p.Send(p.rw, GetPooledTransactionsMsg, &GetPooledTransactionsPacket66{
Expand Down
2 changes: 1 addition & 1 deletion params/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
const (
VersionMajor = 5 // Major version component of the current release
VersionMinor = 7 // Minor version component of the current release
VersionPatch = 2 // Patch version component of the current release
VersionPatch = 3 // Patch version component of the current release
VersionMeta = "mainnet" // Version metadata to append to the version string
)

Expand Down
Loading