Skip to content

Commit e89b59e

Browse files
committed
Measure bloom filter AppGossip hit rate
This commit adds a metric to measure the bloom filter hit rate % for the gossip pull queries. Signed-off-by: Yacov Manevich <yacov.manevich@avalabs.org>
1 parent c59a07b commit e89b59e

File tree

3 files changed

+118
-0
lines changed

3 files changed

+118
-0
lines changed

network/p2p/gossip/gossip.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ type Metrics struct {
101101
tracking *prometheus.GaugeVec
102102
trackingLifetimeAverage prometheus.Gauge
103103
topValidators *prometheus.GaugeVec
104+
bloomFilterHitRate prometheus.Histogram
104105
}
105106

106107
// NewMetrics returns a common set of metrics
@@ -109,6 +110,12 @@ func NewMetrics(
109110
namespace string,
110111
) (Metrics, error) {
111112
m := Metrics{
113+
bloomFilterHitRate: prometheus.NewHistogram(prometheus.HistogramOpts{
114+
Namespace: namespace,
115+
Name: "bloomfilter_hit_rate",
116+
Help: "Hit rate (%) of the bloom filter sent by pull gossip",
117+
Buckets: prometheus.LinearBuckets(0, 25, 5),
118+
}),
112119
count: prometheus.NewCounterVec(
113120
prometheus.CounterOpts{
114121
Namespace: namespace,
@@ -148,6 +155,7 @@ func NewMetrics(
148155
),
149156
}
150157
err := errors.Join(
158+
metrics.Register(m.bloomFilterHitRate),
151159
metrics.Register(m.count),
152160
metrics.Register(m.bytes),
153161
metrics.Register(m.tracking),

network/p2p/gossip/gossip_test.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ func TestGossiperGossip(t *testing.T) {
5757
responder []*testTx // what the peer we're requesting gossip from has
5858
expectedPossibleValues []*testTx // possible values we can have
5959
expectedLen int
60+
expectedObserved bool
61+
expectedObservedVal uint64
6062
}{
6163
{
6264
name: "no gossip - no one knows anything",
@@ -75,13 +77,16 @@ func TestGossiperGossip(t *testing.T) {
7577
responder: []*testTx{{id: ids.ID{0}}},
7678
expectedPossibleValues: []*testTx{{id: ids.ID{0}}},
7779
expectedLen: 1,
80+
expectedObservedVal: 100,
81+
expectedObserved: true,
7882
},
7983
{
8084
name: "gossip - requester knows nothing",
8185
targetResponseSize: 1024,
8286
responder: []*testTx{{id: ids.ID{0}}},
8387
expectedPossibleValues: []*testTx{{id: ids.ID{0}}},
8488
expectedLen: 1,
89+
expectedObserved: true,
8590
},
8691
{
8792
name: "gossip - requester knows less than responder",
@@ -90,13 +95,16 @@ func TestGossiperGossip(t *testing.T) {
9095
responder: []*testTx{{id: ids.ID{0}}, {id: ids.ID{1}}},
9196
expectedPossibleValues: []*testTx{{id: ids.ID{0}}, {id: ids.ID{1}}},
9297
expectedLen: 2,
98+
expectedObservedVal: 50,
99+
expectedObserved: true,
93100
},
94101
{
95102
name: "gossip - target response size exceeded",
96103
targetResponseSize: 32,
97104
responder: []*testTx{{id: ids.ID{0}}, {id: ids.ID{1}}, {id: ids.ID{2}}},
98105
expectedPossibleValues: []*testTx{{id: ids.ID{0}}, {id: ids.ID{1}}, {id: ids.ID{2}}},
99106
expectedLen: 2,
107+
expectedObserved: true,
100108
},
101109
}
102110

@@ -123,6 +131,12 @@ func TestGossiperGossip(t *testing.T) {
123131

124132
metrics, err := NewMetrics(prometheus.NewRegistry(), "")
125133
require.NoError(err)
134+
135+
testHistogram := &testHistogram{
136+
Histogram: metrics.bloomFilterHitRate,
137+
}
138+
metrics.bloomFilterHitRate = testHistogram
139+
126140
marshaller := testMarshaller{}
127141
handler := NewHandler[*testTx](
128142
logging.NoLog{},
@@ -175,6 +189,8 @@ func TestGossiperGossip(t *testing.T) {
175189

176190
require.Len(requestSet.txs, tt.expectedLen)
177191
require.Subset(tt.expectedPossibleValues, maps.Values(requestSet.txs))
192+
require.Equal(tt.expectedObserved, testHistogram.observed)
193+
require.Equal(tt.expectedObservedVal, testHistogram.observedVal)
178194

179195
// we should not receive anything that we already had before we
180196
// requested the gossip
@@ -611,3 +627,58 @@ type testValidatorSet struct {
611627
func (t testValidatorSet) Has(_ context.Context, nodeID ids.NodeID) bool {
612628
return t.validators.Contains(nodeID)
613629
}
630+
631+
func TestComputeBloomFilterHitPercentage(t *testing.T) {
632+
tests := []struct {
633+
name string
634+
hits uint64
635+
misses uint64
636+
percentage uint64
637+
ok bool
638+
}{
639+
{
640+
name: "no hits",
641+
hits: 0,
642+
misses: 10,
643+
percentage: 0,
644+
ok: true,
645+
},
646+
{
647+
name: "no misses",
648+
hits: 10,
649+
misses: 0,
650+
percentage: 100,
651+
ok: true,
652+
},
653+
{
654+
name: "some hits",
655+
hits: 5,
656+
misses: 5,
657+
percentage: 50,
658+
ok: true,
659+
},
660+
{
661+
name: "nothing",
662+
},
663+
}
664+
665+
for _, tt := range tests {
666+
t.Run(tt.name, func(t *testing.T) {
667+
got, ok := computeBloomFilterHitPercentage(tt.hits, tt.misses, logging.NoLog{})
668+
require.Equal(t, tt.ok, ok)
669+
require.Equal(t, tt.percentage, got)
670+
})
671+
}
672+
}
673+
674+
type testHistogram struct {
675+
prometheus.Histogram
676+
observedVal uint64
677+
observed bool
678+
}
679+
680+
func (t *testHistogram) Observe(value float64) {
681+
t.Histogram.Observe(value)
682+
t.observedVal = uint64(value)
683+
t.observed = true
684+
}

network/p2p/gossip/handler.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"github.com/ava-labs/avalanchego/snow/engine/common"
1515
"github.com/ava-labs/avalanchego/utils/bloom"
1616
"github.com/ava-labs/avalanchego/utils/logging"
17+
18+
safemath "github.com/ava-labs/avalanchego/utils/math"
1719
)
1820

1921
var _ p2p.Handler = (*Handler[*testTx])(nil)
@@ -50,13 +52,16 @@ func (h Handler[T]) AppRequest(_ context.Context, _ ids.NodeID, _ time.Time, req
5052
return nil, p2p.ErrUnexpected
5153
}
5254

55+
var hits, misses uint64
56+
5357
responseSize := 0
5458
gossipBytes := make([][]byte, 0)
5559
h.set.Iterate(func(gossipable T) bool {
5660
gossipID := gossipable.GossipID()
5761

5862
// filter out what the requesting peer already knows about
5963
if bloom.Contains(filter, gossipID[:], salt[:]) {
64+
hits++
6065
return true
6166
}
6267

@@ -70,13 +75,19 @@ func (h Handler[T]) AppRequest(_ context.Context, _ ids.NodeID, _ time.Time, req
7075
// size
7176
gossipBytes = append(gossipBytes, bytes)
7277
responseSize += len(bytes)
78+
misses++
7379

7480
return responseSize <= h.targetResponseSize
7581
})
7682
if err != nil {
7783
return nil, p2p.ErrUnexpected
7884
}
7985

86+
hitsPercentage, ok := computeBloomFilterHitPercentage(hits, misses, h.log)
87+
if ok {
88+
h.metrics.bloomFilterHitRate.Observe(float64(hitsPercentage))
89+
}
90+
8091
if err := h.metrics.observeMessage(sentPullLabels, len(gossipBytes), responseSize); err != nil {
8192
return nil, p2p.ErrUnexpected
8293
}
@@ -124,3 +135,31 @@ func (h Handler[_]) AppGossip(_ context.Context, nodeID ids.NodeID, gossipBytes
124135
)
125136
}
126137
}
138+
139+
func computeBloomFilterHitPercentage(hits uint64, misses uint64, log logging.Logger) (uint64, bool) {
140+
total, err := safemath.Add(hits, misses)
141+
if err != nil {
142+
log.Warn("failed to calculate total hits and misses",
143+
zap.Uint64("hits", hits),
144+
zap.Uint64("misses", misses),
145+
zap.Error(err),
146+
)
147+
return 0, false
148+
}
149+
150+
hitsOneHundred, err := safemath.Mul(hits, 100)
151+
if err != nil {
152+
log.Warn("failed to calculate hit ratio",
153+
zap.Uint64("hits", hits),
154+
zap.Uint64("misses", misses),
155+
zap.Error(err),
156+
)
157+
return 0, false
158+
}
159+
160+
if total > 0 {
161+
return hitsOneHundred / total, true
162+
}
163+
164+
return 0, false
165+
}

0 commit comments

Comments
 (0)