Skip to content

Commit 9e6bf96

Browse files
authored
Add CPU cycles and number of disk reads/writes metrics by pid (ava-labs#1334)
1 parent b459661 commit 9e6bf96

File tree

4 files changed

+118
-5
lines changed

4 files changed

+118
-5
lines changed

node/node.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,15 +1196,19 @@ func (n *Node) initVdrs() validators.Set {
11961196

11971197
// Initialize [n.resourceManager].
11981198
func (n *Node) initResourceManager(reg prometheus.Registerer) error {
1199-
n.resourceManager = resource.NewManager(
1199+
resourceManager, err := resource.NewManager(
12001200
n.Config.DatabaseConfig.Path,
12011201
n.Config.SystemTrackerFrequency,
12021202
n.Config.SystemTrackerCPUHalflife,
12031203
n.Config.SystemTrackerDiskHalflife,
1204+
reg,
12041205
)
1206+
if err != nil {
1207+
return err
1208+
}
1209+
n.resourceManager = resourceManager
12051210
n.resourceManager.TrackProcess(os.Getpid())
12061211

1207-
var err error
12081212
n.resourceTracker, err = tracker.NewResourceTracker(reg, n.resourceManager, &meter.ContinuousFactory{}, n.Config.SystemTrackerProcessingHalflife)
12091213
return err
12101214
}

utils/resource/metrics.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright (C) 2019-2023, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package resource
5+
6+
import (
7+
"github.com/prometheus/client_golang/prometheus"
8+
9+
"github.com/ava-labs/avalanchego/utils/wrappers"
10+
)
11+
12+
type metrics struct {
13+
numCPUCycles *prometheus.GaugeVec
14+
numDiskReads *prometheus.GaugeVec
15+
numDiskReadBytes *prometheus.GaugeVec
16+
numDiskWrites *prometheus.GaugeVec
17+
numDiskWritesBytes *prometheus.GaugeVec
18+
}
19+
20+
func newMetrics(namespace string, registerer prometheus.Registerer) (*metrics, error) {
21+
m := &metrics{
22+
numCPUCycles: prometheus.NewGaugeVec(
23+
prometheus.GaugeOpts{
24+
Namespace: namespace,
25+
Name: "num_cpu_cycles",
26+
Help: "Total number of CPU cycles",
27+
},
28+
[]string{"processID"},
29+
),
30+
numDiskReads: prometheus.NewGaugeVec(
31+
prometheus.GaugeOpts{
32+
Namespace: namespace,
33+
Name: "num_disk_reads",
34+
Help: "Total number of disk reads",
35+
},
36+
[]string{"processID"},
37+
),
38+
numDiskReadBytes: prometheus.NewGaugeVec(
39+
prometheus.GaugeOpts{
40+
Namespace: namespace,
41+
Name: "num_disk_read_bytes",
42+
Help: "Total number of disk read bytes",
43+
},
44+
[]string{"processID"},
45+
),
46+
numDiskWrites: prometheus.NewGaugeVec(
47+
prometheus.GaugeOpts{
48+
Namespace: namespace,
49+
Name: "num_disk_writes",
50+
Help: "Total number of disk writes",
51+
},
52+
[]string{"processID"},
53+
),
54+
numDiskWritesBytes: prometheus.NewGaugeVec(
55+
prometheus.GaugeOpts{
56+
Namespace: namespace,
57+
Name: "num_disk_write_bytes",
58+
Help: "Total number of disk write bytes",
59+
},
60+
[]string{"processID"},
61+
),
62+
}
63+
errs := wrappers.Errs{}
64+
errs.Add(
65+
registerer.Register(m.numCPUCycles),
66+
registerer.Register(m.numDiskReads),
67+
registerer.Register(m.numDiskReadBytes),
68+
registerer.Register(m.numDiskWrites),
69+
registerer.Register(m.numDiskWritesBytes),
70+
)
71+
return m, errs.Err
72+
}

utils/resource/usage.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@ package resource
55

66
import (
77
"math"
8+
"strconv"
89
"sync"
910
"time"
1011

12+
"github.com/prometheus/client_golang/prometheus"
13+
1114
"github.com/shirou/gopsutil/process"
1215

1316
"github.com/ava-labs/avalanchego/utils/storage"
@@ -62,6 +65,8 @@ type Manager interface {
6265
}
6366

6467
type manager struct {
68+
processMetrics *metrics
69+
6570
processesLock sync.Mutex
6671
processes map[int]*proc
6772

@@ -78,14 +83,27 @@ type manager struct {
7883
onClose chan struct{}
7984
}
8085

81-
func NewManager(diskPath string, frequency, cpuHalflife, diskHalflife time.Duration) Manager {
86+
func NewManager(
87+
diskPath string,
88+
frequency,
89+
cpuHalflife,
90+
diskHalflife time.Duration,
91+
metricsRegisterer prometheus.Registerer,
92+
) (Manager, error) {
93+
processMetrics, err := newMetrics("system_resources", metricsRegisterer)
94+
if err != nil {
95+
return nil, err
96+
}
97+
8298
m := &manager{
99+
processMetrics: processMetrics,
83100
processes: make(map[int]*proc),
84101
onClose: make(chan struct{}),
85102
availableDiskBytes: math.MaxUint64,
86103
}
104+
87105
go m.update(diskPath, frequency, cpuHalflife, diskHalflife)
88-
return m
106+
return m, nil
89107
}
90108

91109
func (m *manager) CPUUsage() float64 {
@@ -187,6 +205,13 @@ func (m *manager) getActiveUsage(secondsSinceLastUpdate float64) (float64, float
187205
totalCPU += cpu
188206
totalRead += read
189207
totalWrite += write
208+
209+
processIDStr := strconv.Itoa(int(p.p.Pid))
210+
m.processMetrics.numCPUCycles.WithLabelValues(processIDStr).Set(p.lastTotalCPU)
211+
m.processMetrics.numDiskReads.WithLabelValues(processIDStr).Set(float64(p.numReads))
212+
m.processMetrics.numDiskReadBytes.WithLabelValues(processIDStr).Set(float64(p.lastReadBytes))
213+
m.processMetrics.numDiskWrites.WithLabelValues(processIDStr).Set(float64(p.numWrites))
214+
m.processMetrics.numDiskWritesBytes.WithLabelValues(processIDStr).Set(float64(p.lastWriteBytes))
190215
}
191216

192217
return totalCPU, totalRead, totalWrite
@@ -200,8 +225,13 @@ type proc struct {
200225
// [lastTotalCPU] is the most recent measurement of total CPU usage.
201226
lastTotalCPU float64
202227

228+
// [numReads] is the total number of disk reads performed.
229+
numReads uint64
203230
// [lastReadBytes] is the most recent measurement of total disk bytes read.
204231
lastReadBytes uint64
232+
233+
// [numWrites] is the total number of disk writes performed.
234+
numWrites uint64
205235
// [lastWriteBytes] is the most recent measurement of total disk bytes
206236
// written.
207237
lastWriteBytes uint64
@@ -243,7 +273,9 @@ func (p *proc) getActiveUsage(secondsSinceLastUpdate float64) (float64, float64,
243273

244274
p.initialized = true
245275
p.lastTotalCPU = totalCPU
276+
p.numReads = io.ReadCount
246277
p.lastReadBytes = io.ReadBytes
278+
p.numWrites = io.WriteCount
247279
p.lastWriteBytes = io.WriteBytes
248280

249281
return cpu, read, write

vms/registry/vm_getter_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111

1212
"github.com/golang/mock/gomock"
1313

14+
"github.com/prometheus/client_golang/prometheus"
15+
1416
"github.com/stretchr/testify/require"
1517

1618
"github.com/ava-labs/avalanchego/ids"
@@ -143,13 +145,16 @@ func initVMGetterTest(t *testing.T) *vmGetterTestResources {
143145

144146
mockReader := filesystem.NewMockReader(ctrl)
145147
mockManager := vms.NewMockManager(ctrl)
148+
mockRegistry := prometheus.NewRegistry()
149+
mockCPUTracker, err := resource.NewManager(" ", time.Hour, time.Hour, time.Hour, mockRegistry)
150+
require.NoError(t, err)
146151

147152
getter := NewVMGetter(
148153
VMGetterConfig{
149154
FileReader: mockReader,
150155
Manager: mockManager,
151156
PluginDirectory: pluginDir,
152-
CPUTracker: resource.NewManager("", time.Hour, time.Hour, time.Hour),
157+
CPUTracker: mockCPUTracker,
153158
},
154159
)
155160

0 commit comments

Comments
 (0)