Skip to content

Commit 88b7c1e

Browse files
gzliudanJukLee0ira
authored andcommitted
metrics: add cpu counters (ethereum#26796)
This PR adds counter metrics for the CPU system and the Geth process. Currently the only metrics available for these items are gauges. Gauges are fine when the consumer scrapes metrics data at the same interval as Geth produces new values (every 3 seconds), but it is likely that most consumers will not scrape that often. Intervals of 10, 15, or maybe even 30 seconds are probably more common. So the problem is, how does the consumer estimate what the CPU was doing in between scrapes. With a counter, it's easy ... you just subtract two successive values and divide by the time to get a nice, accurate average. But with a gauge, you can't do that. A gauge reading is an instantaneous picture of what was happening at that moment, but it gives you no idea about what was going on between scrapes. Taking an average of values is meaningless.
1 parent 1c86521 commit 88b7c1e

17 files changed

+312
-13
lines changed

metrics/counter_float64.go

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
package metrics
2+
3+
import (
4+
"sync"
5+
)
6+
7+
// CounterFloat64 holds a float64 value that can be incremented and decremented.
8+
type CounterFloat64 interface {
9+
Clear()
10+
Count() float64
11+
Dec(float64)
12+
Inc(float64)
13+
Snapshot() CounterFloat64
14+
}
15+
16+
// GetOrRegisterCounterFloat64 returns an existing CounterFloat64 or constructs and registers
17+
// a new StandardCounterFloat64.
18+
func GetOrRegisterCounterFloat64(name string, r Registry) CounterFloat64 {
19+
if nil == r {
20+
r = DefaultRegistry
21+
}
22+
return r.GetOrRegister(name, NewCounterFloat64).(CounterFloat64)
23+
}
24+
25+
// GetOrRegisterCounterFloat64Forced returns an existing CounterFloat64 or constructs and registers a
26+
// new CounterFloat64 no matter the global switch is enabled or not.
27+
// Be sure to unregister the counter from the registry once it is of no use to
28+
// allow for garbage collection.
29+
func GetOrRegisterCounterFloat64Forced(name string, r Registry) CounterFloat64 {
30+
if nil == r {
31+
r = DefaultRegistry
32+
}
33+
return r.GetOrRegister(name, NewCounterFloat64Forced).(CounterFloat64)
34+
}
35+
36+
// NewCounterFloat64 constructs a new StandardCounterFloat64.
37+
func NewCounterFloat64() CounterFloat64 {
38+
if !Enabled {
39+
return NilCounterFloat64{}
40+
}
41+
return &StandardCounterFloat64{count: 0.0}
42+
}
43+
44+
// NewCounterFloat64Forced constructs a new StandardCounterFloat64 and returns it no matter if
45+
// the global switch is enabled or not.
46+
func NewCounterFloat64Forced() CounterFloat64 {
47+
return &StandardCounterFloat64{count: 0.0}
48+
}
49+
50+
// NewRegisteredCounterFloat64 constructs and registers a new StandardCounterFloat64.
51+
func NewRegisteredCounterFloat64(name string, r Registry) CounterFloat64 {
52+
c := NewCounterFloat64()
53+
if nil == r {
54+
r = DefaultRegistry
55+
}
56+
r.Register(name, c)
57+
return c
58+
}
59+
60+
// NewRegisteredCounterFloat64Forced constructs and registers a new StandardCounterFloat64
61+
// and launches a goroutine no matter the global switch is enabled or not.
62+
// Be sure to unregister the counter from the registry once it is of no use to
63+
// allow for garbage collection.
64+
func NewRegisteredCounterFloat64Forced(name string, r Registry) CounterFloat64 {
65+
c := NewCounterFloat64Forced()
66+
if nil == r {
67+
r = DefaultRegistry
68+
}
69+
r.Register(name, c)
70+
return c
71+
}
72+
73+
// CounterFloat64Snapshot is a read-only copy of another CounterFloat64.
74+
type CounterFloat64Snapshot float64
75+
76+
// Clear panics.
77+
func (CounterFloat64Snapshot) Clear() {
78+
panic("Clear called on a CounterFloat64Snapshot")
79+
}
80+
81+
// Count returns the value at the time the snapshot was taken.
82+
func (c CounterFloat64Snapshot) Count() float64 { return float64(c) }
83+
84+
// Dec panics.
85+
func (CounterFloat64Snapshot) Dec(float64) {
86+
panic("Dec called on a CounterFloat64Snapshot")
87+
}
88+
89+
// Inc panics.
90+
func (CounterFloat64Snapshot) Inc(float64) {
91+
panic("Inc called on a CounterFloat64Snapshot")
92+
}
93+
94+
// Snapshot returns the snapshot.
95+
func (c CounterFloat64Snapshot) Snapshot() CounterFloat64 { return c }
96+
97+
// NilCounterFloat64 is a no-op CounterFloat64.
98+
type NilCounterFloat64 struct{}
99+
100+
// Clear is a no-op.
101+
func (NilCounterFloat64) Clear() {}
102+
103+
// Count is a no-op.
104+
func (NilCounterFloat64) Count() float64 { return 0.0 }
105+
106+
// Dec is a no-op.
107+
func (NilCounterFloat64) Dec(i float64) {}
108+
109+
// Inc is a no-op.
110+
func (NilCounterFloat64) Inc(i float64) {}
111+
112+
// Snapshot is a no-op.
113+
func (NilCounterFloat64) Snapshot() CounterFloat64 { return NilCounterFloat64{} }
114+
115+
// StandardCounterFloat64 is the standard implementation of a CounterFloat64 and uses the
116+
// sync.Mutex package to manage a single float64 value.
117+
type StandardCounterFloat64 struct {
118+
mutex sync.Mutex
119+
count float64
120+
}
121+
122+
// Clear sets the counter to zero.
123+
func (c *StandardCounterFloat64) Clear() {
124+
c.mutex.Lock()
125+
defer c.mutex.Unlock()
126+
c.count = 0.0
127+
}
128+
129+
// Count returns the current value.
130+
func (c *StandardCounterFloat64) Count() float64 {
131+
c.mutex.Lock()
132+
defer c.mutex.Unlock()
133+
return c.count
134+
}
135+
136+
// Dec decrements the counter by the given amount.
137+
func (c *StandardCounterFloat64) Dec(v float64) {
138+
c.mutex.Lock()
139+
defer c.mutex.Unlock()
140+
c.count -= v
141+
}
142+
143+
// Inc increments the counter by the given amount.
144+
func (c *StandardCounterFloat64) Inc(v float64) {
145+
c.mutex.Lock()
146+
defer c.mutex.Unlock()
147+
c.count += v
148+
}
149+
150+
// Snapshot returns a read-only copy of the counter.
151+
func (c *StandardCounterFloat64) Snapshot() CounterFloat64 {
152+
return CounterFloat64Snapshot(c.Count())
153+
}

metrics/counter_float_64_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package metrics
2+
3+
import "testing"
4+
5+
func BenchmarkCounterFloat64(b *testing.B) {
6+
c := NewCounterFloat64()
7+
b.ResetTimer()
8+
for i := 0; i < b.N; i++ {
9+
c.Inc(1.0)
10+
}
11+
}
12+
13+
func TestCounterFloat64Clear(t *testing.T) {
14+
c := NewCounterFloat64()
15+
c.Inc(1.0)
16+
c.Clear()
17+
if count := c.Count(); count != 0 {
18+
t.Errorf("c.Count(): 0 != %v\n", count)
19+
}
20+
}
21+
22+
func TestCounterFloat64Dec1(t *testing.T) {
23+
c := NewCounterFloat64()
24+
c.Dec(1.0)
25+
if count := c.Count(); count != -1.0 {
26+
t.Errorf("c.Count(): -1.0 != %v\n", count)
27+
}
28+
}
29+
30+
func TestCounterFloat64Dec2(t *testing.T) {
31+
c := NewCounterFloat64()
32+
c.Dec(2.0)
33+
if count := c.Count(); count != -2.0 {
34+
t.Errorf("c.Count(): -2.0 != %v\n", count)
35+
}
36+
}
37+
38+
func TestCounterFloat64Inc1(t *testing.T) {
39+
c := NewCounterFloat64()
40+
c.Inc(1.0)
41+
if count := c.Count(); count != 1.0 {
42+
t.Errorf("c.Count(): 1.0 != %v\n", count)
43+
}
44+
}
45+
46+
func TestCounterFloat64Inc2(t *testing.T) {
47+
c := NewCounterFloat64()
48+
c.Inc(2.0)
49+
if count := c.Count(); count != 2.0 {
50+
t.Errorf("c.Count(): 2.0 != %v\n", count)
51+
}
52+
}
53+
54+
func TestCounterFloat64Snapshot(t *testing.T) {
55+
c := NewCounterFloat64()
56+
c.Inc(1.0)
57+
snapshot := c.Snapshot()
58+
c.Inc(1.0)
59+
if count := snapshot.Count(); count != 1.0 {
60+
t.Errorf("c.Count(): 1.0 != %v\n", count)
61+
}
62+
}
63+
64+
func TestCounterFloat64Zero(t *testing.T) {
65+
c := NewCounterFloat64()
66+
if count := c.Count(); count != 0 {
67+
t.Errorf("c.Count(): 0 != %v\n", count)
68+
}
69+
}
70+
71+
func TestGetOrRegisterCounterFloat64(t *testing.T) {
72+
r := NewRegistry()
73+
NewRegisteredCounterFloat64("foo", r).Inc(47.0)
74+
if c := GetOrRegisterCounterFloat64("foo", r); c.Count() != 47.0 {
75+
t.Fatal(c)
76+
}
77+
}

metrics/exp/exp.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ func (exp *exp) publishCounter(name string, metric metrics.Counter) {
100100
v.Set(metric.Count())
101101
}
102102

103+
func (exp *exp) publishCounterFloat64(name string, metric metrics.CounterFloat64) {
104+
v := exp.getFloat(name)
105+
v.Set(metric.Count())
106+
}
107+
103108
func (exp *exp) publishGauge(name string, metric metrics.Gauge) {
104109
v := exp.getInt(name)
105110
v.Set(metric.Value())
@@ -167,6 +172,8 @@ func (exp *exp) syncToExpvar() {
167172
switch i := i.(type) {
168173
case metrics.Counter:
169174
exp.publishCounter(name, i)
175+
case metrics.CounterFloat64:
176+
exp.publishCounterFloat64(name, i)
170177
case metrics.Gauge:
171178
exp.publishGauge(name, i)
172179
case metrics.GaugeFloat64:

metrics/graphite.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ func graphite(c *GraphiteConfig) error {
6767
switch metric := i.(type) {
6868
case Counter:
6969
fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, metric.Count(), now)
70+
case CounterFloat64:
71+
fmt.Fprintf(w, "%s.%s.count %f %d\n", c.Prefix, name, metric.Count(), now)
7072
case Gauge:
7173
fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Value(), now)
7274
case GaugeFloat64:

metrics/influxdb/influxdb.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,16 @@ func (r *reporter) send() error {
113113
},
114114
Time: now,
115115
})
116+
case metrics.CounterFloat64:
117+
count := metric.Count()
118+
pts = append(pts, client.Point{
119+
Measurement: fmt.Sprintf("%s%s.count", namespace, name),
120+
Tags: r.tags,
121+
Fields: map[string]interface{}{
122+
"value": count,
123+
},
124+
Time: now,
125+
})
116126
case metrics.Gauge:
117127
ms := metric.Snapshot()
118128
pts = append(pts, client.Point{

metrics/influxdb/influxdbv2.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ type v2Reporter struct {
2424

2525
client influxdb2.Client
2626
write api.WriteAPI
27-
28-
cache map[string]int64
2927
}
3028

3129
// InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags
@@ -39,7 +37,6 @@ func InfluxDBV2WithTags(r metrics.Registry, d time.Duration, endpoint string, to
3937
organization: organization,
4038
namespace: namespace,
4139
tags: tags,
42-
cache: make(map[string]int64),
4340
}
4441

4542
rep.client = influxdb2.NewClient(rep.endpoint, rep.token)
@@ -87,17 +84,25 @@ func (r *v2Reporter) send() {
8784

8885
case metrics.Counter:
8986
v := metric.Count()
90-
l := r.cache[name]
9187

9288
measurement := fmt.Sprintf("%s%s.count", namespace, name)
9389
fields := map[string]interface{}{
94-
"value": v - l,
90+
"value": v,
9591
}
9692

9793
pt := influxdb2.NewPoint(measurement, r.tags, fields, now)
9894
r.write.WritePoint(pt)
9995

100-
r.cache[name] = v
96+
case metrics.CounterFloat64:
97+
v := metric.Count()
98+
99+
measurement := fmt.Sprintf("%s%s.count", namespace, name)
100+
fields := map[string]interface{}{
101+
"value": v,
102+
}
103+
104+
pt := influxdb2.NewPoint(measurement, r.tags, fields, now)
105+
r.write.WritePoint(pt)
101106

102107
case metrics.Gauge:
103108
ms := metric.Snapshot()

metrics/librato/librato.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,17 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba
107107
}
108108
snapshot.Counters = append(snapshot.Counters, measurement)
109109
}
110+
case metrics.CounterFloat64:
111+
if m.Count() > 0 {
112+
measurement[Name] = fmt.Sprintf("%s.%s", name, "count")
113+
measurement[Value] = m.Count()
114+
measurement[Attributes] = map[string]interface{}{
115+
DisplayUnitsLong: Operations,
116+
DisplayUnitsShort: OperationsShort,
117+
DisplayMin: "0",
118+
}
119+
snapshot.Counters = append(snapshot.Counters, measurement)
120+
}
110121
case metrics.Gauge:
111122
measurement[Name] = name
112123
measurement[Value] = float64(m.Value())

metrics/log.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) {
2424
case Counter:
2525
l.Printf("counter %s\n", name)
2626
l.Printf(" count: %9d\n", metric.Count())
27+
case CounterFloat64:
28+
l.Printf("counter %s\n", name)
29+
l.Printf(" count: %f\n", metric.Count())
2730
case Gauge:
2831
l.Printf("gauge %s\n", name)
2932
l.Printf(" value: %9d\n", metric.Value())

metrics/metrics.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ func CollectProcessMetrics(refresh time.Duration) {
144144
cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry)
145145
cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry)
146146
cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry)
147+
cpuSysLoadTotal = GetOrRegisterCounterFloat64("system/cpu/sysload/total", DefaultRegistry)
148+
cpuSysWaitTotal = GetOrRegisterCounterFloat64("system/cpu/syswait/total", DefaultRegistry)
149+
cpuProcLoadTotal = GetOrRegisterCounterFloat64("system/cpu/procload/total", DefaultRegistry)
147150
cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry)
148151
cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry)
149152
cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil)
@@ -172,13 +175,17 @@ func CollectProcessMetrics(refresh time.Duration) {
172175
secondsSinceLastCollect := collectTime.Sub(lastCollectTime).Seconds()
173176
lastCollectTime = collectTime
174177
if secondsSinceLastCollect > 0 {
175-
sysLoad := (cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / secondsSinceLastCollect
176-
sysWait := (cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / secondsSinceLastCollect
177-
procLoad := (cpustats[now].LocalTime - cpustats[prev].LocalTime) / secondsSinceLastCollect
178+
sysLoad := cpustats[now].GlobalTime - cpustats[prev].GlobalTime
179+
sysWait := cpustats[now].GlobalWait - cpustats[prev].GlobalWait
180+
procLoad := cpustats[now].LocalTime - cpustats[prev].LocalTime
178181
// Convert to integer percentage.
179-
cpuSysLoad.Update(int64(sysLoad * 100))
180-
cpuSysWait.Update(int64(sysWait * 100))
181-
cpuProcLoad.Update(int64(procLoad * 100))
182+
cpuSysLoad.Update(int64(sysLoad / secondsSinceLastCollect * 100))
183+
cpuSysWait.Update(int64(sysWait / secondsSinceLastCollect * 100))
184+
cpuProcLoad.Update(int64(procLoad / secondsSinceLastCollect * 100))
185+
// increment counters (ms)
186+
cpuSysLoadTotal.Inc(sysLoad)
187+
cpuSysWaitTotal.Inc(sysWait)
188+
cpuProcLoadTotal.Inc(procLoad)
182189
}
183190

184191
// Threads

0 commit comments

Comments
 (0)