Skip to content

Commit 3822500

Browse files
author
Dan Laine
authored
sync -- re-add network client metrics (#1787)
1 parent 71eb412 commit 3822500

File tree

3 files changed

+65
-22
lines changed

3 files changed

+65
-22
lines changed

x/sync/client_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ func sendRangeRequest(
5656
sender := common.NewMockSender(ctrl)
5757
handler := NewNetworkServer(sender, db, logging.NoLog{})
5858
clientNodeID, serverNodeID := ids.GenerateTestNodeID(), ids.GenerateTestNodeID()
59-
networkClient := NewNetworkClient(sender, clientNodeID, 1, logging.NoLog{})
59+
networkClient, err := NewNetworkClient(sender, clientNodeID, 1, logging.NoLog{}, "", prometheus.NewRegistry())
60+
require.NoError(err)
6061
require.NoError(networkClient.Connected(context.Background(), serverNodeID, version.CurrentApp))
6162
client := NewClient(&ClientConfig{
6263
NetworkClient: networkClient,
@@ -315,7 +316,8 @@ func sendChangeRequest(
315316
sender := common.NewMockSender(ctrl)
316317
handler := NewNetworkServer(sender, db, logging.NoLog{})
317318
clientNodeID, serverNodeID := ids.GenerateTestNodeID(), ids.GenerateTestNodeID()
318-
networkClient := NewNetworkClient(sender, clientNodeID, 1, logging.NoLog{})
319+
networkClient, err := NewNetworkClient(sender, clientNodeID, 1, logging.NoLog{}, "", prometheus.NewRegistry())
320+
require.NoError(err)
319321
require.NoError(networkClient.Connected(context.Background(), serverNodeID, version.CurrentApp))
320322
client := NewClient(&ClientConfig{
321323
NetworkClient: networkClient,

x/sync/network_client.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
"sync"
1111
"time"
1212

13+
"github.com/prometheus/client_golang/prometheus"
14+
1315
"go.uber.org/zap"
1416

1517
"golang.org/x/sync/semaphore"
@@ -73,15 +75,22 @@ func NewNetworkClient(
7375
myNodeID ids.NodeID,
7476
maxActiveRequests int64,
7577
log logging.Logger,
76-
) NetworkClient {
78+
metricsNamespace string,
79+
registerer prometheus.Registerer,
80+
) (NetworkClient, error) {
81+
peerTracker, err := newPeerTracker(log, metricsNamespace, registerer)
82+
if err != nil {
83+
return nil, fmt.Errorf("failed to create peer tracker: %w", err)
84+
}
85+
7786
return &networkClient{
7887
appSender: appSender,
7988
myNodeID: myNodeID,
8089
outstandingRequestHandlers: make(map[uint32]ResponseHandler),
8190
activeRequests: semaphore.NewWeighted(maxActiveRequests),
82-
peers: newPeerTracker(log),
91+
peers: peerTracker,
8392
log: log,
84-
}
93+
}, nil
8594
}
8695

8796
// Always returns nil because the engine considers errors

x/sync/peer_tracker.go

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"sync"
99
"time"
1010

11+
"github.com/prometheus/client_golang/prometheus"
12+
1113
stdmath "math"
1214

1315
"go.uber.org/zap"
@@ -16,6 +18,7 @@ import (
1618
"github.com/ava-labs/avalanchego/utils/logging"
1719
"github.com/ava-labs/avalanchego/utils/math"
1820
"github.com/ava-labs/avalanchego/utils/set"
21+
"github.com/ava-labs/avalanchego/utils/wrappers"
1922
"github.com/ava-labs/avalanchego/version"
2023
)
2124

@@ -52,27 +55,56 @@ type peerTracker struct {
5255
// Peers that we're connected to that responded to the last request they were sent.
5356
responsivePeers set.Set[ids.NodeID]
5457
// Max heap that contains the average bandwidth of peers.
55-
bandwidthHeap math.AveragerHeap
56-
averageBandwidth math.Averager
57-
log logging.Logger
58-
// numTrackedPeers prometheus.Gauge
59-
// numResponsivePeers prometheus.Gauge
60-
// averageBandwidthMetric prometheus.Gauge
58+
bandwidthHeap math.AveragerHeap
59+
averageBandwidth math.Averager
60+
log logging.Logger
61+
numTrackedPeers prometheus.Gauge
62+
numResponsivePeers prometheus.Gauge
63+
averageBandwidthMetric prometheus.Gauge
6164
}
6265

63-
func newPeerTracker(log logging.Logger) *peerTracker {
64-
// TODO: initialize metrics
65-
return &peerTracker{
66+
func newPeerTracker(
67+
log logging.Logger,
68+
metricsNamespace string,
69+
registerer prometheus.Registerer,
70+
) (*peerTracker, error) {
71+
t := &peerTracker{
6672
peers: make(map[ids.NodeID]*peerInfo),
6773
trackedPeers: make(set.Set[ids.NodeID]),
6874
responsivePeers: make(set.Set[ids.NodeID]),
6975
bandwidthHeap: math.NewMaxAveragerHeap(),
7076
averageBandwidth: math.NewAverager(0, bandwidthHalflife, time.Now()),
7177
log: log,
72-
// numTrackedPeers: metrics.GetOrRegisterGauge("net_tracked_peers", nil),
73-
// numResponsivePeers: metrics.GetOrRegisterGauge("net_responsive_peers", nil),
74-
// averageBandwidthMetric: metrics.GetOrRegisterGaugeFloat64("net_average_bandwidth", nil),
78+
numTrackedPeers: prometheus.NewGauge(
79+
prometheus.GaugeOpts{
80+
Namespace: metricsNamespace,
81+
Name: "num_tracked_peers",
82+
Help: "number of tracked peers",
83+
},
84+
),
85+
numResponsivePeers: prometheus.NewGauge(
86+
prometheus.GaugeOpts{
87+
Namespace: metricsNamespace,
88+
Name: "num_responsive_peers",
89+
Help: "number of responsive peers",
90+
},
91+
),
92+
averageBandwidthMetric: prometheus.NewGauge(
93+
prometheus.GaugeOpts{
94+
Namespace: metricsNamespace,
95+
Name: "average_bandwidth",
96+
Help: "average sync bandwidth used by peers",
97+
},
98+
),
7599
}
100+
101+
errs := wrappers.Errs{}
102+
errs.Add(
103+
registerer.Register(t.numTrackedPeers),
104+
registerer.Register(t.numResponsivePeers),
105+
registerer.Register(t.averageBandwidthMetric),
106+
)
107+
return t, errs.Err
76108
}
77109

78110
// Returns true if we're not connected to enough peers.
@@ -158,7 +190,7 @@ func (p *peerTracker) TrackPeer(nodeID ids.NodeID) {
158190
defer p.lock.Unlock()
159191

160192
p.trackedPeers.Add(nodeID)
161-
// p.numTrackedPeers.Set(float64(p.trackedPeers.Len()))
193+
p.numTrackedPeers.Set(float64(p.trackedPeers.Len()))
162194
}
163195

164196
// Record that we observed that [nodeID]'s bandwidth is [bandwidth].
@@ -189,9 +221,9 @@ func (p *peerTracker) TrackBandwidth(nodeID ids.NodeID, bandwidth float64) {
189221
// TODO danlaine: shouldn't we add the observation of 0
190222
// to the average bandwidth in the if statement?
191223
p.averageBandwidth.Observe(bandwidth, now)
192-
// p.averageBandwidthMetric.Set(p.averageBandwidth.Read())
224+
p.averageBandwidthMetric.Set(p.averageBandwidth.Read())
193225
}
194-
// p.numResponsivePeers.Set(float64(p.responsivePeers.Len()))
226+
p.numResponsivePeers.Set(float64(p.responsivePeers.Len()))
195227
}
196228

197229
// Connected should be called when [nodeID] connects to this node
@@ -236,9 +268,9 @@ func (p *peerTracker) Disconnected(nodeID ids.NodeID) {
236268

237269
p.bandwidthHeap.Remove(nodeID)
238270
p.trackedPeers.Remove(nodeID)
239-
// p.numTrackedPeers.Set(float64(p.trackedPeers.Len()))
271+
p.numTrackedPeers.Set(float64(p.trackedPeers.Len()))
240272
p.responsivePeers.Remove(nodeID)
241-
// p.numResponsivePeers.Set(float64(p.responsivePeers.Len()))
273+
p.numResponsivePeers.Set(float64(p.responsivePeers.Len()))
242274
delete(p.peers, nodeID)
243275
}
244276

0 commit comments

Comments
 (0)