Implement go.schedule.duration (#5991)

Fixes #5974. This uses the same method for estimating the sum as the prometheus client-go: https://github.com/prometheus/client_golang/blob/46f77a97fa1e83e7285634745bd5c92c11bf20c7/prometheus/go_collector_latest.go#L543, which is to use the lower-bound of each bucket as the value of each observation. It implements this using a metric producer. It is designed to be used together with existing runtime instrumentation. --------- Co-authored-by: Sam Xie <sam@samxie.me>
open-telemetry · Sep 4, 2024 · 8f5330c · 8f5330c
1 parent 06ace3e
commit 8f5330c
Show file tree

Hide file tree

Showing 17 changed files with 423 additions and 365 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 ## [Unreleased]
 
+### Added
+
+- Add `NewProducer` to `go.opentelemetry.io/contrib/instrumentation/runtime`, which allows collecting the `go.schedule.duration` histogram metric from the Go runtime. (#5991)
+
 ### Removed
 
 - Drop support for [Go 1.21]. (#6046, #6047)

diff --git a/instrumentation/runtime/example/doc.go b/instrumentation/runtime/example/doc.go
diff --git a/instrumentation/runtime/example/go.mod b/instrumentation/runtime/example/go.mod
diff --git a/instrumentation/runtime/example/go.sum b/instrumentation/runtime/example/go.sum
diff --git a/instrumentation/runtime/example/main.go b/instrumentation/runtime/example/main.go
diff --git a/instrumentation/runtime/example_test.go b/instrumentation/runtime/example_test.go
@@ -0,0 +1,38 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package runtime_test
+
+import (
+	"context"
+	"log"
+	"time"
+
+	"go.opentelemetry.io/contrib/instrumentation/runtime"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/sdk/metric"
+)
+
+func Example() {
+	// This reader is used as a stand-in for a reader that will actually export
+	// data. See https://pkg.go.dev/go.opentelemetry.io/otel/exporters for
+	// exporters that can be used as or with readers.
+	reader := metric.NewManualReader(
+		// Add the runtime producer to get histograms from the Go runtime.
+		metric.WithProducer(runtime.NewProducer()),
+	)
+	provider := metric.NewMeterProvider(metric.WithReader(reader))
+	defer func() {
+		err := provider.Shutdown(context.Background())
+		if err != nil {
+			log.Fatal(err)
+		}
+	}()
+	otel.SetMeterProvider(provider)
+
+	// Start go runtime metric collection.
+	err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(time.Second))
+	if err != nil {
+		log.Fatal(err)
+	}
+}
diff --git a/instrumentation/runtime/go.mod b/instrumentation/runtime/go.mod
@@ -6,13 +6,17 @@ require (
 	github.com/stretchr/testify v1.9.0
 	go.opentelemetry.io/otel v1.29.0
 	go.opentelemetry.io/otel/metric v1.29.0
+	go.opentelemetry.io/otel/sdk v1.29.0
+	go.opentelemetry.io/otel/sdk/metric v1.29.0
 )
 
 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/google/uuid v1.6.0 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	go.opentelemetry.io/otel/trace v1.29.0 // indirect
+	golang.org/x/sys v0.24.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/instrumentation/runtime/go.sum b/instrumentation/runtime/go.sum
@@ -7,6 +7,8 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
@@ -15,8 +17,14 @@ go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
 go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
 go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
 go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
+go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo=
+go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok=
+go.opentelemetry.io/otel/sdk/metric v1.29.0 h1:K2CfmJohnRgvZ9UAj2/FhIf/okdWcNdBwe1m8xFXiSY=
+go.opentelemetry.io/otel/sdk/metric v1.29.0/go.mod h1:6zZLdCl2fkauYoZIOn/soQIDSWFmNSRcICarHfuhNJQ=
 go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
 go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
+golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
+golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

diff --git a/instrumentation/runtime/options.go b/instrumentation/runtime/options.go
@@ -27,6 +27,13 @@ type Option interface {
 	apply(*config)
 }
 
+// ProducerOption supports configuring optional settings for runtime metrics using a
+// metric producer in addition to standard instrumentation.
+type ProducerOption interface {
+	Option
+	applyProducer(*config)
+}
+
 // DefaultMinimumReadMemStatsInterval is the default minimum interval
 // between calls to runtime.ReadMemStats().  Use the
 // WithMinimumReadMemStatsInterval() option to modify this setting in
@@ -48,6 +55,8 @@ func (o minimumReadMemStatsIntervalOption) apply(c *config) {
 	}
 }
 
+func (o minimumReadMemStatsIntervalOption) applyProducer(c *config) { o.apply(c) }
+
 // WithMeterProvider sets the Metric implementation to use for
 // reporting.  If this option is not used, the global metric.MeterProvider
 // will be used.  `provider` must be non-nil.
@@ -66,11 +75,25 @@ func (o metricProviderOption) apply(c *config) {
 // newConfig computes a config from the supplied Options.
 func newConfig(opts ...Option) config {
 	c := config{
-		MeterProvider:               otel.GetMeterProvider(),
-		MinimumReadMemStatsInterval: DefaultMinimumReadMemStatsInterval,
+		MeterProvider: otel.GetMeterProvider(),
 	}
 	for _, opt := range opts {
 		opt.apply(&c)
 	}
+	if c.MinimumReadMemStatsInterval <= 0 {
+		c.MinimumReadMemStatsInterval = DefaultMinimumReadMemStatsInterval
+	}
+	return c
+}
+
+// newConfig computes a config from the supplied ProducerOptions.
+func newProducerConfig(opts ...ProducerOption) config {
+	c := config{}
+	for _, opt := range opts {
+		opt.applyProducer(&c)
+	}
+	if c.MinimumReadMemStatsInterval <= 0 {
+		c.MinimumReadMemStatsInterval = DefaultMinimumReadMemStatsInterval
+	}
 	return c
 }
diff --git a/instrumentation/runtime/producer.go b/instrumentation/runtime/producer.go
@@ -0,0 +1,120 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package runtime // import "go.opentelemetry.io/contrib/instrumentation/runtime"
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"runtime/metrics"
+	"sync"
+	"time"
+
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/sdk/instrumentation"
+	"go.opentelemetry.io/otel/sdk/metric"
+	"go.opentelemetry.io/otel/sdk/metric/metricdata"
+)
+
+var startTime time.Time
+
+func init() {
+	startTime = time.Now()
+}
+
+var histogramMetrics = []string{goSchedLatencies}
+
+// Producer is a metric.Producer, which provides precomputed histogram metrics from the go runtime.
+type Producer struct {
+	lock      sync.Mutex
+	collector *goCollector
+}
+
+var _ metric.Producer = (*Producer)(nil)
+
+// NewProducer creates a Producer which provides precomputed histogram metrics from the go runtime.
+func NewProducer(opts ...ProducerOption) *Producer {
+	c := newProducerConfig(opts...)
+	return &Producer{
+		collector: newCollector(c.MinimumReadMemStatsInterval, histogramMetrics),
+	}
+}
+
+// Produce returns precomputed histogram metrics from the go runtime, or an error if unsuccessful.
+func (p *Producer) Produce(context.Context) ([]metricdata.ScopeMetrics, error) {
+	p.lock.Lock()
+	p.collector.refresh()
+	schedHist := p.collector.getHistogram(goSchedLatencies)
+	p.lock.Unlock()
+	// Use the last collection time (which may or may not be now) for the timestamp.
+	histDp := convertRuntimeHistogram(schedHist, p.collector.lastCollect)
+	if len(histDp) == 0 {
+		return nil, fmt.Errorf("unable to obtain go.schedule.duration metric from the runtime")
+	}
+	return []metricdata.ScopeMetrics{
+		{
+			Scope: instrumentation.Scope{
+				Name:    ScopeName,
+				Version: Version(),
+			},
+			Metrics: []metricdata.Metrics{
+				{
+					Name:        "go.schedule.duration",
+					Description: "The time goroutines have spent in the scheduler in a runnable state before actually running.",
+					Unit:        "s",
+					Data: metricdata.Histogram[float64]{
+						Temporality: metricdata.CumulativeTemporality,
+						DataPoints:  histDp,
+					},
+				},
+			},
+		},
+	}, nil
+}
+
+var emptySet = attribute.EmptySet()
+
+func convertRuntimeHistogram(runtimeHist *metrics.Float64Histogram, ts time.Time) []metricdata.HistogramDataPoint[float64] {
+	if runtimeHist == nil {
+		return nil
+	}
+	bounds := runtimeHist.Buckets
+	counts := runtimeHist.Counts
+	if len(bounds) < 2 {
+		// runtime histograms are guaranteed to have at least two bucket boundaries.
+		return nil
+	}
+	// trim the first bucket since it is a lower bound. OTel histogram boundaries only have an upper bound.
+	bounds = bounds[1:]
+	if bounds[len(bounds)-1] == math.Inf(1) {
+		// trim the last bucket if it is +Inf, since the +Inf boundary is implicit in OTel.
+		bounds = bounds[:len(bounds)-1]
+	} else {
+		// if the last bucket is not +Inf, append an extra zero count since
+		// the implicit +Inf bucket won't have any observations.
+		counts = append(counts, 0)
+	}
+	count := uint64(0)
+	sum := float64(0)
+	for i, c := range counts {
+		count += c
+		// This computed sum is an underestimate, since it assumes each
+		// observation happens at the bucket's lower bound.
+		if i > 0 && count != 0 {
+			sum += bounds[i-1] * float64(count)
+		}
+	}
+
+	return []metricdata.HistogramDataPoint[float64]{
+		{
+			StartTime:    startTime,
+			Count:        count,
+			Sum:          sum,
+			Time:         ts,
+			Bounds:       bounds,
+			BucketCounts: counts,
+			Attributes:   *emptySet,
+		},
+	}
+}