diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index d4ebea74fbe7..56f19e944abf 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -135,27 +135,28 @@ func (r *reporter) send() error { }) case metrics.Histogram: ms := metric.Snapshot() - if ms.Count() > 0 { - ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + ps := ms.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p25": ps[0], + "p50": ps[1], + "p75": ps[2], + "p95": ps[3], + "p99": ps[4], + "p999": ps[5], + "p9999": ps[6], + } pts = append(pts, client.Point{ Measurement: fmt.Sprintf("%s%s.histogram", namespace, name), Tags: r.tags, - Fields: map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p50": ps[0], - "p75": ps[1], - "p95": ps[2], - "p99": ps[3], - "p999": ps[4], - "p9999": ps[5], - }, - Time: now, + Fields: fields, + Time: now, }) } case metrics.Meter: diff --git a/metrics/metrics.go b/metrics/metrics.go index 2df2404b5f60..2b8bad8bee36 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -7,7 +7,8 @@ package metrics import ( "os" - "runtime" + "runtime/metrics" + "runtime/pprof" "strings" "time" @@ -54,38 +55,106 @@ func init() { } } -// CollectProcessMetrics periodically collects various metrics about the running -// process. +var threadCreateProfile = pprof.Lookup("threadcreate") + +type runtimeStats struct { + GCPauses *metrics.Float64Histogram + GCAllocBytes uint64 + GCFreedBytes uint64 + + MemTotal uint64 + HeapObjects uint64 + HeapFree uint64 + HeapReleased uint64 + HeapUnused uint64 + + Goroutines uint64 + SchedLatency *metrics.Float64Histogram +} + +var runtimeSamples = []metrics.Sample{ + {Name: "/gc/pauses:seconds"}, // histogram + {Name: "/gc/heap/allocs:bytes"}, + {Name: "/gc/heap/frees:bytes"}, + {Name: "/memory/classes/total:bytes"}, + {Name: "/memory/classes/heap/objects:bytes"}, + {Name: "/memory/classes/heap/free:bytes"}, + {Name: "/memory/classes/heap/released:bytes"}, + {Name: "/memory/classes/heap/unused:bytes"}, + {Name: "/sched/goroutines:goroutines"}, + {Name: "/sched/latencies:seconds"}, // histogram +} + +func readRuntimeStats(v *runtimeStats) { + metrics.Read(runtimeSamples) + for _, s := range runtimeSamples { + // Skip invalid/unknown metrics. This is needed because some metrics + // are unavailable in older Go versions, and attempting to read a 'bad' + // metric panics. + if s.Value.Kind() == metrics.KindBad { + continue + } + + switch s.Name { + case "/gc/pauses:seconds": + v.GCPauses = s.Value.Float64Histogram() + case "/gc/heap/allocs:bytes": + v.GCAllocBytes = s.Value.Uint64() + case "/gc/heap/frees:bytes": + v.GCFreedBytes = s.Value.Uint64() + case "/memory/classes/total:bytes": + v.MemTotal = s.Value.Uint64() + case "/memory/classes/heap/objects:bytes": + v.HeapObjects = s.Value.Uint64() + case "/memory/classes/heap/free:bytes": + v.HeapFree = s.Value.Uint64() + case "/memory/classes/heap/released:bytes": + v.HeapReleased = s.Value.Uint64() + case "/memory/classes/heap/unused:bytes": + v.HeapUnused = s.Value.Uint64() + case "/sched/goroutines:goroutines": + v.Goroutines = s.Value.Uint64() + case "/sched/latencies:seconds": + v.SchedLatency = s.Value.Float64Histogram() + } + } +} + +// CollectProcessMetrics periodically collects various metrics about the running process. func CollectProcessMetrics(refresh time.Duration) { // Short circuit if the metrics system is disabled if !Enabled { return } + refreshFreq := int64(refresh / time.Second) // Create the various data collectors - cpuStats := make([]*CPUStats, 2) - memstats := make([]*runtime.MemStats, 2) - diskstats := make([]*DiskStats, 2) - for i := 0; i < len(memstats); i++ { - cpuStats[i] = new(CPUStats) - memstats[i] = new(runtime.MemStats) - diskstats[i] = new(DiskStats) - } - // Define the various metrics to collect var ( - cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) - cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) - cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) - cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) - cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) - - memPauses = GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) - memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) - memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) - memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) - memUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) + cpustats = make([]CPUStats, 2) + diskstats = make([]DiskStats, 2) + rstats = make([]runtimeStats, 2) + ) + + // This scale factor is used for the runtime's time metrics. It's useful to convert to + // ns here because the runtime gives times in float seconds, but runtimeHistogram can + // only provide integers for the minimum and maximum values. + const secondsToNs = float64(time.Second) + // Define the various metrics to collect + var ( + cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) + cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) + cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) + cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) + cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) + cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil) + memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil) + memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) + memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) + memTotal = GetOrRegisterGauge("system/memory/held", DefaultRegistry) + heapUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) + heapObjects = GetOrRegisterGauge("system/memory/objects", DefaultRegistry) diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) @@ -93,34 +162,43 @@ func CollectProcessMetrics(refresh time.Duration) { diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) ) - // Iterate loading the different stats and updating the meters - for i := 1; ; i++ { - location1 := i % 2 - location2 := (i - 1) % 2 - - ReadCPUStats(cpuStats[location1]) - cpuSysLoad.Update((cpuStats[location1].GlobalTime - cpuStats[location2].GlobalTime) / refreshFreq) - cpuSysWait.Update((cpuStats[location1].GlobalWait - cpuStats[location2].GlobalWait) / refreshFreq) - cpuProcLoad.Update((cpuStats[location1].LocalTime - cpuStats[location2].LocalTime) / refreshFreq) + + // Iterate loading the different stats and updating the meters. + now, prev := 0, 1 + for ; ; now, prev = prev, now { + // CPU + ReadCPUStats(&cpustats[now]) + cpuSysLoad.Update((cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / refreshFreq) + cpuSysWait.Update((cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / refreshFreq) + cpuProcLoad.Update((cpustats[now].LocalTime - cpustats[prev].LocalTime) / refreshFreq) + + // Threads cpuThreads.Update(int64(threadCreateProfile.Count())) - cpuGoroutines.Update(int64(runtime.NumGoroutine())) - - runtime.ReadMemStats(memstats[location1]) - memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) - memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs)) - memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees)) - memHeld.Update(int64(memstats[location1].HeapSys - memstats[location1].HeapReleased)) - memUsed.Update(int64(memstats[location1].Alloc)) - - if ReadDiskStats(diskstats[location1]) == nil { - diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) - diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) - diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount) - diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) - - diskReadBytesCounter.Inc(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) - diskWriteBytesCounter.Inc(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) + + // Go runtime metrics + readRuntimeStats(&rstats[now]) + + cpuGoroutines.Update(int64(rstats[now].Goroutines)) + cpuSchedLatency.update(rstats[now].SchedLatency) + memPauses.update(rstats[now].GCPauses) + + memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes)) + memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes)) + + memTotal.Update(int64(rstats[now].MemTotal)) + heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased)) + heapObjects.Update(int64(rstats[now].HeapObjects)) + + // Disk + if ReadDiskStats(&diskstats[now]) == nil { + diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount) + diskReadBytes.Mark(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) + diskWrites.Mark(diskstats[now].WriteCount - diskstats[prev].WriteCount) + diskWriteBytes.Mark(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) + diskReadBytesCounter.Inc(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) + diskWriteBytesCounter.Inc(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) } + time.Sleep(refresh) } } diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 029c99870eba..e3fde1ea62ce 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -2,8 +2,6 @@ package metrics import ( "fmt" - "io" - "log" "sync" "testing" "time" @@ -11,11 +9,11 @@ import ( const FANOUT = 128 -// Stop the compiler from complaining during debugging. -var ( - _ = io.Discard - _ = log.LstdFlags -) +func TestReadRuntimeValues(t *testing.T) { + var v runtimeStats + readRuntimeStats(&v) + t.Logf("%+v", v) +} func BenchmarkMetrics(b *testing.B) { r := NewRegistry() @@ -26,7 +24,6 @@ func BenchmarkMetrics(b *testing.B) { m := NewRegisteredMeter("meter", r) t := NewRegisteredTimer("timer", r) RegisterDebugGCStats(r) - RegisterRuntimeMemStats(r) b.ResetTimer() ch := make(chan bool) @@ -48,24 +45,6 @@ func BenchmarkMetrics(b *testing.B) { }() //*/ - wgR := &sync.WaitGroup{} - //* - wgR.Add(1) - go func() { - defer wgR.Done() - //log.Println("go CaptureRuntimeMemStats") - for { - select { - case <-ch: - //log.Println("done CaptureRuntimeMemStats") - return - default: - CaptureRuntimeMemStatsOnce(r) - } - } - }() - //*/ - wgW := &sync.WaitGroup{} /* wgW.Add(1) @@ -104,7 +83,6 @@ func BenchmarkMetrics(b *testing.B) { wg.Wait() close(ch) wgD.Wait() - wgR.Wait() wgW.Wait() } diff --git a/metrics/runtime.go b/metrics/runtime.go deleted file mode 100644 index 9450c479bad7..000000000000 --- a/metrics/runtime.go +++ /dev/null @@ -1,212 +0,0 @@ -package metrics - -import ( - "runtime" - "runtime/pprof" - "time" -) - -var ( - memStats runtime.MemStats - runtimeMetrics struct { - MemStats struct { - Alloc Gauge - BuckHashSys Gauge - DebugGC Gauge - EnableGC Gauge - Frees Gauge - HeapAlloc Gauge - HeapIdle Gauge - HeapInuse Gauge - HeapObjects Gauge - HeapReleased Gauge - HeapSys Gauge - LastGC Gauge - Lookups Gauge - Mallocs Gauge - MCacheInuse Gauge - MCacheSys Gauge - MSpanInuse Gauge - MSpanSys Gauge - NextGC Gauge - NumGC Gauge - GCCPUFraction GaugeFloat64 - PauseNs Histogram - PauseTotalNs Gauge - StackInuse Gauge - StackSys Gauge - Sys Gauge - TotalAlloc Gauge - } - NumCgoCall Gauge - NumGoroutine Gauge - NumThread Gauge - ReadMemStats Timer - } - frees uint64 - lookups uint64 - mallocs uint64 - numGC uint32 - numCgoCalls int64 - - threadCreateProfile = pprof.Lookup("threadcreate") -) - -// Capture new values for the Go runtime statistics exported in -// runtime.MemStats. This is designed to be called as a goroutine. -func CaptureRuntimeMemStats(r Registry, d time.Duration) { - for range time.Tick(d) { - CaptureRuntimeMemStatsOnce(r) - } -} - -// Capture new values for the Go runtime statistics exported in -// runtime.MemStats. This is designed to be called in a background -// goroutine. Giving a registry which has not been given to -// RegisterRuntimeMemStats will panic. -// -// Be very careful with this because runtime.ReadMemStats calls the C -// functions runtime·semacquire(&runtime·worldsema) and runtime·stoptheworld() -// and that last one does what it says on the tin. -func CaptureRuntimeMemStatsOnce(r Registry) { - t := time.Now() - runtime.ReadMemStats(&memStats) // This takes 50-200us. - runtimeMetrics.ReadMemStats.UpdateSince(t) - - runtimeMetrics.MemStats.Alloc.Update(int64(memStats.Alloc)) - runtimeMetrics.MemStats.BuckHashSys.Update(int64(memStats.BuckHashSys)) - if memStats.DebugGC { - runtimeMetrics.MemStats.DebugGC.Update(1) - } else { - runtimeMetrics.MemStats.DebugGC.Update(0) - } - if memStats.EnableGC { - runtimeMetrics.MemStats.EnableGC.Update(1) - } else { - runtimeMetrics.MemStats.EnableGC.Update(0) - } - - runtimeMetrics.MemStats.Frees.Update(int64(memStats.Frees - frees)) - runtimeMetrics.MemStats.HeapAlloc.Update(int64(memStats.HeapAlloc)) - runtimeMetrics.MemStats.HeapIdle.Update(int64(memStats.HeapIdle)) - runtimeMetrics.MemStats.HeapInuse.Update(int64(memStats.HeapInuse)) - runtimeMetrics.MemStats.HeapObjects.Update(int64(memStats.HeapObjects)) - runtimeMetrics.MemStats.HeapReleased.Update(int64(memStats.HeapReleased)) - runtimeMetrics.MemStats.HeapSys.Update(int64(memStats.HeapSys)) - runtimeMetrics.MemStats.LastGC.Update(int64(memStats.LastGC)) - runtimeMetrics.MemStats.Lookups.Update(int64(memStats.Lookups - lookups)) - runtimeMetrics.MemStats.Mallocs.Update(int64(memStats.Mallocs - mallocs)) - runtimeMetrics.MemStats.MCacheInuse.Update(int64(memStats.MCacheInuse)) - runtimeMetrics.MemStats.MCacheSys.Update(int64(memStats.MCacheSys)) - runtimeMetrics.MemStats.MSpanInuse.Update(int64(memStats.MSpanInuse)) - runtimeMetrics.MemStats.MSpanSys.Update(int64(memStats.MSpanSys)) - runtimeMetrics.MemStats.NextGC.Update(int64(memStats.NextGC)) - runtimeMetrics.MemStats.NumGC.Update(int64(memStats.NumGC - numGC)) - runtimeMetrics.MemStats.GCCPUFraction.Update(gcCPUFraction(&memStats)) - - // - i := numGC % uint32(len(memStats.PauseNs)) - ii := memStats.NumGC % uint32(len(memStats.PauseNs)) - if memStats.NumGC-numGC >= uint32(len(memStats.PauseNs)) { - for i = 0; i < uint32(len(memStats.PauseNs)); i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - } else { - if i > ii { - for ; i < uint32(len(memStats.PauseNs)); i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - i = 0 - } - for ; i < ii; i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - } - frees = memStats.Frees - lookups = memStats.Lookups - mallocs = memStats.Mallocs - numGC = memStats.NumGC - - runtimeMetrics.MemStats.PauseTotalNs.Update(int64(memStats.PauseTotalNs)) - runtimeMetrics.MemStats.StackInuse.Update(int64(memStats.StackInuse)) - runtimeMetrics.MemStats.StackSys.Update(int64(memStats.StackSys)) - runtimeMetrics.MemStats.Sys.Update(int64(memStats.Sys)) - runtimeMetrics.MemStats.TotalAlloc.Update(int64(memStats.TotalAlloc)) - - currentNumCgoCalls := numCgoCall() - runtimeMetrics.NumCgoCall.Update(currentNumCgoCalls - numCgoCalls) - numCgoCalls = currentNumCgoCalls - - runtimeMetrics.NumGoroutine.Update(int64(runtime.NumGoroutine())) - - runtimeMetrics.NumThread.Update(int64(threadCreateProfile.Count())) -} - -// Register runtimeMetrics for the Go runtime statistics exported in runtime and -// specifically runtime.MemStats. The runtimeMetrics are named by their -// fully-qualified Go symbols, i.e. runtime.MemStats.Alloc. -func RegisterRuntimeMemStats(r Registry) { - runtimeMetrics.MemStats.Alloc = NewGauge() - runtimeMetrics.MemStats.BuckHashSys = NewGauge() - runtimeMetrics.MemStats.DebugGC = NewGauge() - runtimeMetrics.MemStats.EnableGC = NewGauge() - runtimeMetrics.MemStats.Frees = NewGauge() - runtimeMetrics.MemStats.HeapAlloc = NewGauge() - runtimeMetrics.MemStats.HeapIdle = NewGauge() - runtimeMetrics.MemStats.HeapInuse = NewGauge() - runtimeMetrics.MemStats.HeapObjects = NewGauge() - runtimeMetrics.MemStats.HeapReleased = NewGauge() - runtimeMetrics.MemStats.HeapSys = NewGauge() - runtimeMetrics.MemStats.LastGC = NewGauge() - runtimeMetrics.MemStats.Lookups = NewGauge() - runtimeMetrics.MemStats.Mallocs = NewGauge() - runtimeMetrics.MemStats.MCacheInuse = NewGauge() - runtimeMetrics.MemStats.MCacheSys = NewGauge() - runtimeMetrics.MemStats.MSpanInuse = NewGauge() - runtimeMetrics.MemStats.MSpanSys = NewGauge() - runtimeMetrics.MemStats.NextGC = NewGauge() - runtimeMetrics.MemStats.NumGC = NewGauge() - runtimeMetrics.MemStats.GCCPUFraction = NewGaugeFloat64() - runtimeMetrics.MemStats.PauseNs = NewHistogram(NewExpDecaySample(1028, 0.015)) - runtimeMetrics.MemStats.PauseTotalNs = NewGauge() - runtimeMetrics.MemStats.StackInuse = NewGauge() - runtimeMetrics.MemStats.StackSys = NewGauge() - runtimeMetrics.MemStats.Sys = NewGauge() - runtimeMetrics.MemStats.TotalAlloc = NewGauge() - runtimeMetrics.NumCgoCall = NewGauge() - runtimeMetrics.NumGoroutine = NewGauge() - runtimeMetrics.NumThread = NewGauge() - runtimeMetrics.ReadMemStats = NewTimer() - - r.Register("runtime.MemStats.Alloc", runtimeMetrics.MemStats.Alloc) - r.Register("runtime.MemStats.BuckHashSys", runtimeMetrics.MemStats.BuckHashSys) - r.Register("runtime.MemStats.DebugGC", runtimeMetrics.MemStats.DebugGC) - r.Register("runtime.MemStats.EnableGC", runtimeMetrics.MemStats.EnableGC) - r.Register("runtime.MemStats.Frees", runtimeMetrics.MemStats.Frees) - r.Register("runtime.MemStats.HeapAlloc", runtimeMetrics.MemStats.HeapAlloc) - r.Register("runtime.MemStats.HeapIdle", runtimeMetrics.MemStats.HeapIdle) - r.Register("runtime.MemStats.HeapInuse", runtimeMetrics.MemStats.HeapInuse) - r.Register("runtime.MemStats.HeapObjects", runtimeMetrics.MemStats.HeapObjects) - r.Register("runtime.MemStats.HeapReleased", runtimeMetrics.MemStats.HeapReleased) - r.Register("runtime.MemStats.HeapSys", runtimeMetrics.MemStats.HeapSys) - r.Register("runtime.MemStats.LastGC", runtimeMetrics.MemStats.LastGC) - r.Register("runtime.MemStats.Lookups", runtimeMetrics.MemStats.Lookups) - r.Register("runtime.MemStats.Mallocs", runtimeMetrics.MemStats.Mallocs) - r.Register("runtime.MemStats.MCacheInuse", runtimeMetrics.MemStats.MCacheInuse) - r.Register("runtime.MemStats.MCacheSys", runtimeMetrics.MemStats.MCacheSys) - r.Register("runtime.MemStats.MSpanInuse", runtimeMetrics.MemStats.MSpanInuse) - r.Register("runtime.MemStats.MSpanSys", runtimeMetrics.MemStats.MSpanSys) - r.Register("runtime.MemStats.NextGC", runtimeMetrics.MemStats.NextGC) - r.Register("runtime.MemStats.NumGC", runtimeMetrics.MemStats.NumGC) - r.Register("runtime.MemStats.GCCPUFraction", runtimeMetrics.MemStats.GCCPUFraction) - r.Register("runtime.MemStats.PauseNs", runtimeMetrics.MemStats.PauseNs) - r.Register("runtime.MemStats.PauseTotalNs", runtimeMetrics.MemStats.PauseTotalNs) - r.Register("runtime.MemStats.StackInuse", runtimeMetrics.MemStats.StackInuse) - r.Register("runtime.MemStats.StackSys", runtimeMetrics.MemStats.StackSys) - r.Register("runtime.MemStats.Sys", runtimeMetrics.MemStats.Sys) - r.Register("runtime.MemStats.TotalAlloc", runtimeMetrics.MemStats.TotalAlloc) - r.Register("runtime.NumCgoCall", runtimeMetrics.NumCgoCall) - r.Register("runtime.NumGoroutine", runtimeMetrics.NumGoroutine) - r.Register("runtime.NumThread", runtimeMetrics.NumThread) - r.Register("runtime.ReadMemStats", runtimeMetrics.ReadMemStats) -} diff --git a/metrics/runtime_cgo.go b/metrics/runtime_cgo.go deleted file mode 100644 index 4307ebdba689..000000000000 --- a/metrics/runtime_cgo.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build cgo && !appengine && !js -// +build cgo,!appengine,!js - -package metrics - -import "runtime" - -func numCgoCall() int64 { - return runtime.NumCgoCall() -} diff --git a/metrics/runtime_gccpufraction.go b/metrics/runtime_gccpufraction.go deleted file mode 100644 index 28cd44752b45..000000000000 --- a/metrics/runtime_gccpufraction.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build go1.5 -// +build go1.5 - -package metrics - -import "runtime" - -func gcCPUFraction(memStats *runtime.MemStats) float64 { - return memStats.GCCPUFraction -} diff --git a/metrics/runtime_no_cgo.go b/metrics/runtime_no_cgo.go deleted file mode 100644 index 1799bef63bfb..000000000000 --- a/metrics/runtime_no_cgo.go +++ /dev/null @@ -1,8 +0,0 @@ -//go:build !cgo || appengine || js -// +build !cgo appengine js - -package metrics - -func numCgoCall() int64 { - return 0 -} diff --git a/metrics/runtime_no_gccpufraction.go b/metrics/runtime_no_gccpufraction.go deleted file mode 100644 index af1a4b63c809..000000000000 --- a/metrics/runtime_no_gccpufraction.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !go1.5 -// +build !go1.5 - -package metrics - -import "runtime" - -func gcCPUFraction(memStats *runtime.MemStats) float64 { - return 0 -} diff --git a/metrics/runtime_test.go b/metrics/runtime_test.go deleted file mode 100644 index f85f7868f71a..000000000000 --- a/metrics/runtime_test.go +++ /dev/null @@ -1,88 +0,0 @@ -package metrics - -import ( - "runtime" - "testing" - "time" -) - -func BenchmarkRuntimeMemStats(b *testing.B) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - b.ResetTimer() - for i := 0; i < b.N; i++ { - CaptureRuntimeMemStatsOnce(r) - } -} - -func TestRuntimeMemStats(t *testing.T) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - CaptureRuntimeMemStatsOnce(r) - zero := runtimeMetrics.MemStats.PauseNs.Count() // Get a "zero" since GC may have run before these tests. - runtime.GC() - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 1 { - t.Fatal(count - zero) - } - runtime.GC() - runtime.GC() - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 3 { - t.Fatal(count - zero) - } - for i := 0; i < 256; i++ { - runtime.GC() - } - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 259 { - t.Fatal(count - zero) - } - for i := 0; i < 257; i++ { - runtime.GC() - } - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 515 { // We lost one because there were too many GCs between captures. - t.Fatal(count - zero) - } -} - -func TestRuntimeMemStatsNumThread(t *testing.T) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - CaptureRuntimeMemStatsOnce(r) - - if value := runtimeMetrics.NumThread.Value(); value < 1 { - t.Fatalf("got NumThread: %d, wanted at least 1", value) - } -} - -func TestRuntimeMemStatsBlocking(t *testing.T) { - if g := runtime.GOMAXPROCS(0); g < 2 { - t.Skipf("skipping TestRuntimeMemStatsBlocking with GOMAXPROCS=%d\n", g) - } - ch := make(chan int) - go testRuntimeMemStatsBlocking(ch) - var memStats runtime.MemStats - t0 := time.Now() - runtime.ReadMemStats(&memStats) - t1 := time.Now() - t.Log("i++ during runtime.ReadMemStats:", <-ch) - go testRuntimeMemStatsBlocking(ch) - d := t1.Sub(t0) - t.Log(d) - time.Sleep(d) - t.Log("i++ during time.Sleep:", <-ch) -} - -func testRuntimeMemStatsBlocking(ch chan int) { - i := 0 - for { - select { - case ch <- i: - return - default: - i++ - } - } -} diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go new file mode 100644 index 000000000000..c68939af1ef7 --- /dev/null +++ b/metrics/runtimehistogram.go @@ -0,0 +1,319 @@ +package metrics + +import ( + "math" + "runtime/metrics" + "sort" + "sync/atomic" +) + +func getOrRegisterRuntimeHistogram(name string, scale float64, r Registry) *runtimeHistogram { + if r == nil { + r = DefaultRegistry + } + constructor := func() Histogram { return newRuntimeHistogram(scale) } + return r.GetOrRegister(name, constructor).(*runtimeHistogram) +} + +// runtimeHistogram wraps a runtime/metrics histogram. +type runtimeHistogram struct { + v atomic.Value + scaleFactor float64 +} + +func newRuntimeHistogram(scale float64) *runtimeHistogram { + h := &runtimeHistogram{scaleFactor: scale} + h.update(&metrics.Float64Histogram{}) + return h +} + +func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) { + if mh == nil { + // The update value can be nil if the current Go version doesn't support a + // requested metric. It's just easier to handle nil here than putting + // conditionals everywhere. + return + } + + s := runtimeHistogramSnapshot{ + Counts: make([]uint64, len(mh.Counts)), + Buckets: make([]float64, len(mh.Buckets)), + } + copy(s.Counts, mh.Counts) + copy(s.Buckets, mh.Buckets) + for i, b := range s.Buckets { + s.Buckets[i] = b * h.scaleFactor + } + h.v.Store(&s) +} + +func (h *runtimeHistogram) load() *runtimeHistogramSnapshot { + return h.v.Load().(*runtimeHistogramSnapshot) +} + +func (h *runtimeHistogram) Clear() { + panic("runtimeHistogram does not support Clear") +} +func (h *runtimeHistogram) Update(int64) { + panic("runtimeHistogram does not support Update") +} +func (h *runtimeHistogram) Sample() Sample { + return NilSample{} +} + +// Snapshot returns a non-changing cop of the histogram. +func (h *runtimeHistogram) Snapshot() Histogram { + return h.load() +} + +// Count returns the sample count. +func (h *runtimeHistogram) Count() int64 { + return h.load().Count() +} + +// Mean returns an approximation of the mean. +func (h *runtimeHistogram) Mean() float64 { + return h.load().Mean() +} + +// StdDev approximates the standard deviation of the histogram. +func (h *runtimeHistogram) StdDev() float64 { + return h.load().StdDev() +} + +// Variance approximates the variance of the histogram. +func (h *runtimeHistogram) Variance() float64 { + return h.load().Variance() +} + +// Percentile computes the p'th percentile value. +func (h *runtimeHistogram) Percentile(p float64) float64 { + return h.load().Percentile(p) +} + +// Percentiles computes all requested percentile values. +func (h *runtimeHistogram) Percentiles(ps []float64) []float64 { + return h.load().Percentiles(ps) +} + +// Max returns the highest sample value. +func (h *runtimeHistogram) Max() int64 { + return h.load().Max() +} + +// Min returns the lowest sample value. +func (h *runtimeHistogram) Min() int64 { + return h.load().Min() +} + +// Sum returns the sum of all sample values. +func (h *runtimeHistogram) Sum() int64 { + return h.load().Sum() +} + +type runtimeHistogramSnapshot metrics.Float64Histogram + +func (h *runtimeHistogramSnapshot) Clear() { + panic("runtimeHistogram does not support Clear") +} +func (h *runtimeHistogramSnapshot) Update(int64) { + panic("runtimeHistogram does not support Update") +} +func (h *runtimeHistogramSnapshot) Sample() Sample { + return NilSample{} +} + +func (h *runtimeHistogramSnapshot) Snapshot() Histogram { + return h +} + +// Count returns the sample count. +func (h *runtimeHistogramSnapshot) Count() int64 { + var count int64 + for _, c := range h.Counts { + count += int64(c) + } + return count +} + +// Mean returns an approximation of the mean. +func (h *runtimeHistogramSnapshot) Mean() float64 { + if len(h.Counts) == 0 { + return 0 + } + mean, _ := h.mean() + return mean +} + +// mean computes the mean and also the total sample count. +func (h *runtimeHistogramSnapshot) mean() (mean, totalCount float64) { + var sum float64 + for i, c := range h.Counts { + midpoint := h.midpoint(i) + sum += midpoint * float64(c) + totalCount += float64(c) + } + return sum / totalCount, totalCount +} + +func (h *runtimeHistogramSnapshot) midpoint(bucket int) float64 { + high := h.Buckets[bucket+1] + low := h.Buckets[bucket] + if math.IsInf(high, 1) { + // The edge of the highest bucket can be +Inf, and it's supposed to mean that this + // bucket contains all remaining samples > low. We can't get the middle of an + // infinite range, so just return the lower bound of this bucket instead. + return low + } + if math.IsInf(low, -1) { + // Similarly, we can get -Inf in the left edge of the lowest bucket, + // and it means the bucket contains all remaining values < high. + return high + } + return (low + high) / 2 +} + +// StdDev approximates the standard deviation of the histogram. +func (h *runtimeHistogramSnapshot) StdDev() float64 { + return math.Sqrt(h.Variance()) +} + +// Variance approximates the variance of the histogram. +func (h *runtimeHistogramSnapshot) Variance() float64 { + if len(h.Counts) == 0 { + return 0 + } + + mean, totalCount := h.mean() + if totalCount <= 1 { + // There is no variance when there are zero or one items. + return 0 + } + + var sum float64 + for i, c := range h.Counts { + midpoint := h.midpoint(i) + d := midpoint - mean + sum += float64(c) * (d * d) + } + return sum / (totalCount - 1) +} + +// Percentile computes the p'th percentile value. +func (h *runtimeHistogramSnapshot) Percentile(p float64) float64 { + threshold := float64(h.Count()) * p + values := [1]float64{threshold} + h.computePercentiles(values[:]) + return values[0] +} + +// Percentiles computes all requested percentile values. +func (h *runtimeHistogramSnapshot) Percentiles(ps []float64) []float64 { + // Compute threshold values. We need these to be sorted + // for the percentile computation, but restore the original + // order later, so keep the indexes as well. + count := float64(h.Count()) + thresholds := make([]float64, len(ps)) + indexes := make([]int, len(ps)) + for i, percentile := range ps { + thresholds[i] = count * math.Max(0, math.Min(1.0, percentile)) + indexes[i] = i + } + sort.Sort(floatsAscendingKeepingIndex{thresholds, indexes}) + + // Now compute. The result is stored back into the thresholds slice. + h.computePercentiles(thresholds) + + // Put the result back into the requested order. + sort.Sort(floatsByIndex{thresholds, indexes}) + return thresholds +} + +func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) { + var totalCount float64 + for i, count := range h.Counts { + totalCount += float64(count) + + for len(thresh) > 0 && thresh[0] < totalCount { + thresh[0] = h.Buckets[i] + thresh = thresh[1:] + } + if len(thresh) == 0 { + return + } + } +} + +// Note: runtime/metrics.Float64Histogram is a collection of float64s, but the methods +// below need to return int64 to satisfy the interface. The histogram provided by runtime +// also doesn't keep track of individual samples, so results are approximated. + +// Max returns the highest sample value. +func (h *runtimeHistogramSnapshot) Max() int64 { + for i := len(h.Counts) - 1; i >= 0; i-- { + count := h.Counts[i] + if count > 0 { + edge := h.Buckets[i+1] + if math.IsInf(edge, 1) { + edge = h.Buckets[i] + } + return int64(math.Ceil(edge)) + } + } + return 0 +} + +// Min returns the lowest sample value. +func (h *runtimeHistogramSnapshot) Min() int64 { + for i, count := range h.Counts { + if count > 0 { + return int64(math.Floor(h.Buckets[i])) + } + } + return 0 +} + +// Sum returns the sum of all sample values. +func (h *runtimeHistogramSnapshot) Sum() int64 { + var sum float64 + for i := range h.Counts { + sum += h.Buckets[i] * float64(h.Counts[i]) + } + return int64(math.Ceil(sum)) +} + +type floatsAscendingKeepingIndex struct { + values []float64 + indexes []int +} + +func (s floatsAscendingKeepingIndex) Len() int { + return len(s.values) +} + +func (s floatsAscendingKeepingIndex) Less(i, j int) bool { + return s.values[i] < s.values[j] +} + +func (s floatsAscendingKeepingIndex) Swap(i, j int) { + s.values[i], s.values[j] = s.values[j], s.values[i] + s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i] +} + +type floatsByIndex struct { + values []float64 + indexes []int +} + +func (s floatsByIndex) Len() int { + return len(s.values) +} + +func (s floatsByIndex) Less(i, j int) bool { + return s.indexes[i] < s.indexes[j] +} + +func (s floatsByIndex) Swap(i, j int) { + s.values[i], s.values[j] = s.values[j], s.values[i] + s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i] +} diff --git a/metrics/runtimehistogram_test.go b/metrics/runtimehistogram_test.go new file mode 100644 index 000000000000..d53a01438311 --- /dev/null +++ b/metrics/runtimehistogram_test.go @@ -0,0 +1,133 @@ +package metrics + +import ( + "fmt" + "math" + "reflect" + "runtime/metrics" + "testing" +) + +var _ Histogram = (*runtimeHistogram)(nil) + +type runtimeHistogramTest struct { + h metrics.Float64Histogram + + Count int64 + Min int64 + Max int64 + Sum int64 + Mean float64 + Variance float64 + StdDev float64 + Percentiles []float64 // .5 .8 .9 .99 .995 +} + +// This test checks the results of statistical functions implemented +// by runtimeHistogramSnapshot. +func TestRuntimeHistogramStats(t *testing.T) { + tests := []runtimeHistogramTest{ + 0: { + h: metrics.Float64Histogram{ + Counts: []uint64{}, + Buckets: []float64{}, + }, + Count: 0, + Max: 0, + Min: 0, + Sum: 0, + Mean: 0, + Variance: 0, + StdDev: 0, + Percentiles: []float64{0, 0, 0, 0, 0}, + }, + 1: { + // This checks the case where the highest bucket is +Inf. + h: metrics.Float64Histogram{ + Counts: []uint64{0, 1, 2}, + Buckets: []float64{0, 0.5, 1, math.Inf(1)}, + }, + Count: 3, + Max: 1, + Min: 0, + Sum: 3, + Mean: 0.9166666, + Percentiles: []float64{1, 1, 1, 1, 1}, + Variance: 0.020833, + StdDev: 0.144433, + }, + 2: { + h: metrics.Float64Histogram{ + Counts: []uint64{8, 6, 3, 1}, + Buckets: []float64{12, 16, 18, 24, 25}, + }, + Count: 18, + Max: 25, + Min: 12, + Sum: 270, + Mean: 16.75, + Variance: 10.3015, + StdDev: 3.2096, + Percentiles: []float64{16, 18, 18, 24, 24}, + }, + } + + for i, test := range tests { + t.Run(fmt.Sprint(i), func(t *testing.T) { + s := runtimeHistogramSnapshot(test.h) + + if v := s.Count(); v != test.Count { + t.Errorf("Count() = %v, want %v", v, test.Count) + } + if v := s.Min(); v != test.Min { + t.Errorf("Min() = %v, want %v", v, test.Min) + } + if v := s.Max(); v != test.Max { + t.Errorf("Max() = %v, want %v", v, test.Max) + } + if v := s.Sum(); v != test.Sum { + t.Errorf("Sum() = %v, want %v", v, test.Sum) + } + if v := s.Mean(); !approxEqual(v, test.Mean, 0.0001) { + t.Errorf("Mean() = %v, want %v", v, test.Mean) + } + if v := s.Variance(); !approxEqual(v, test.Variance, 0.0001) { + t.Errorf("Variance() = %v, want %v", v, test.Variance) + } + if v := s.StdDev(); !approxEqual(v, test.StdDev, 0.0001) { + t.Errorf("StdDev() = %v, want %v", v, test.StdDev) + } + ps := []float64{.5, .8, .9, .99, .995} + if v := s.Percentiles(ps); !reflect.DeepEqual(v, test.Percentiles) { + t.Errorf("Percentiles(%v) = %v, want %v", ps, v, test.Percentiles) + } + }) + } +} + +func approxEqual(x, y, ε float64) bool { + if math.IsInf(x, -1) && math.IsInf(y, -1) { + return true + } + if math.IsInf(x, 1) && math.IsInf(y, 1) { + return true + } + if math.IsNaN(x) && math.IsNaN(y) { + return true + } + return math.Abs(x-y) < ε +} + +// This test verifies that requesting Percentiles in unsorted order +// returns them in the requested order. +func TestRuntimeHistogramStatsPercentileOrder(t *testing.T) { + p := runtimeHistogramSnapshot{ + Counts: []uint64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + } + result := p.Percentiles([]float64{1, 0.2, 0.5, 0.1, 0.2}) + expected := []float64{10, 2, 5, 1, 2} + if !reflect.DeepEqual(result, expected) { + t.Fatal("wrong result:", result) + } +}