Skip to content

Commit

Permalink
Generate runtime metrics (#213)
Browse files Browse the repository at this point in the history
* Add metrics probe to the profiler

Signed-off-by: Pablo Chacin <pablochacin@gmail.com>
  • Loading branch information
pablochacin authored Jul 6, 2023
1 parent 924a68e commit 49d65e8
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 3 deletions.
6 changes: 6 additions & 0 deletions cmd/agent/commands/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package commands

import (
"context"
"time"

"github.com/grafana/xk6-disruptor/pkg/agent"
"github.com/grafana/xk6-disruptor/pkg/runtime"
Expand Down Expand Up @@ -60,6 +61,11 @@ func buildRootCmd(c *agent.Config) *cobra.Command {
rootCmd.PersistentFlags().IntVar(&c.Profiler.Memory.Rate, "mem-profile-rate", 1, "memory profiling rate")
rootCmd.PersistentFlags().BoolVar(&c.Profiler.Trace.Enabled, "trace", false, "trace agent execution")
rootCmd.PersistentFlags().StringVar(&c.Profiler.Trace.FileName, "trace-file", "trace.out", "tracing output file")
rootCmd.PersistentFlags().BoolVar(&c.Profiler.Metrics.Enabled, "metrics", false, "collect runtime metrics")
rootCmd.PersistentFlags().StringVar(&c.Profiler.Metrics.FileName, "metrics-file", "metrics.out",
"metrics output file")
rootCmd.PersistentFlags().DurationVar(&c.Profiler.Metrics.Rate, "metrics-rate", time.Second,
"frequency of metrics sampling")

return rootCmd
}
1 change: 1 addition & 0 deletions docs/01-development/01-contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ In order to facilitate debugging `xk6-disruptor-agent` offers options for genera
* `--trace`: generate traces. The `--trace-file` option allows specifying the output file for traces (default `trace.out`)
* `--cpu-profile`: generate CPU profiling information. The `--cpu-profile-file` option allows specifying the output file for profile information (default `cpu.pprof`)
* `--mem-profile`: generate memory profiling information. By default, it sets the [memory profile rate](https://pkg.go.dev/runtime#pkg-variables) to `1`, which will profile every allocation. This rate can be controlled using the `--mem-profile-rate` option. The `--mem-profile-file` option allows specifying the output file for profile information (default `mem.pprof`)
* `--metrics`: generate [go runtime metrics](https://pkg.go.dev/runtime/metrics). The metrics are collected at intervals defined by the `--metrics-rate` argument (default to `1s`). At the end of the agent execution the minimum, maximum and average value for each collected metric is reported to the file specified in `--metrics-file` (default `metrics.out`).

If you run the [disruptor manually](#running-manually) in a pod you have to copy them from the target pod to your local machine. For example, for copying the `trace.out` file:

Expand Down
175 changes: 175 additions & 0 deletions pkg/runtime/profiler/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
package profiler

import (
"context"
"fmt"
"io"
"os"
"runtime/metrics"
"time"
)

// MetricsConfig defines the configuration of a metrics probe
type MetricsConfig struct {
Enabled bool
FileName string
Rate time.Duration
}

type metricsProbe struct {
config MetricsConfig
cancel context.CancelFunc
}

// NewMetricsProbe creates a metrics profiling probe with the given configuration
func NewMetricsProbe(config MetricsConfig) (Probe, error) {
if config.FileName == "" {
return nil, fmt.Errorf("metrics output file name cannot be empty")
}

return &metricsProbe{
config: config,
}, nil
}

func (m *metricsProbe) Start() (io.Closer, error) {
metricsFile, err := os.Create(m.config.FileName)
if err != nil {
return nil, fmt.Errorf("failed to create metrics output file %q: %w", m.config.FileName, err)
}

collector := &metricsCollector{
metricsFile: metricsFile,
rate: m.config.Rate,
}

ctx, cancel := context.WithCancel(context.Background())
err = collector.Start(ctx)
if err != nil {
cancel()
return nil, err
}

m.cancel = cancel
return m, nil
}

func (m *metricsProbe) Close() error {
// stops the collector
m.cancel()

return nil
}

type stats struct {
count uint
minval float64
maxval float64
total float64
}

func (s *stats) add(value float64) {
// if first sample, use value as min and max
if s.count == 0 || value < s.minval {
s.minval = value
}
if s.count == 0 || value > s.maxval {
s.maxval = value
}
s.total += value
s.count++
}

func (s *stats) avg() float64 {
// avoid division by 0
if s.count == 0 {
return 0
}
return s.total / float64(s.count)
}

func (s *stats) min() float64 {
return s.minval
}

func (s *stats) max() float64 {
return s.maxval
}

// metricsCollector maintains the state for collecting metrics
type metricsCollector struct {
rate time.Duration
samples []metrics.Sample
metricsFile *os.File
stats map[string]*stats
}

// Start starts the periodic metrics collection.
// When the context is cancelled, it generates a summary to the metrics file
func (m *metricsCollector) Start(ctx context.Context) error {
m.init()

// start periodic sampling in background
go func() {
ticks := time.NewTicker(m.rate)
defer ticks.Stop()

for {
select {
case <-ticks.C:
m.sample()
case <-ctx.Done():
m.generate()
return
}
}
}()

return nil
}

func (m *metricsCollector) init() {
m.stats = map[string]*stats{}

for _, metric := range metrics.All() {
// skip histogram values
if metric.Kind == metrics.KindUint64 || metric.Kind == metrics.KindFloat64 {
m.samples = append(
m.samples,
metrics.Sample{
Name: metric.Name,
},
)
m.stats[metric.Name] = &stats{}
}
}

m.sample()
}

func (m *metricsCollector) sample() {
metrics.Read(m.samples)
for _, sample := range m.samples {
stats := m.stats[sample.Name]

var value float64
switch sample.Value.Kind() {
case metrics.KindFloat64:
value = sample.Value.Float64()
case metrics.KindUint64:
value = float64(sample.Value.Uint64())
default:
continue
}

stats.add(value)
}
}

func (m *metricsCollector) generate() {
fmt.Fprintln(m.metricsFile, "metric,min,max,average")
for k, v := range m.stats {
fmt.Fprintf(m.metricsFile, "%s,%.2f,%.2f,%.2f\n", k, v.min(), v.max(), v.avg())
}
_ = m.metricsFile.Close()
}
15 changes: 12 additions & 3 deletions pkg/runtime/profiler/profiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ import (

// Config is the configuration of the profiler
type Config struct {
CPU CPUConfig
Memory MemoryConfig
Trace TraceConfig
CPU CPUConfig
Memory MemoryConfig
Metrics MetricsConfig
Trace TraceConfig
}

// Profiler defines the methods to control execution profiling
Expand Down Expand Up @@ -95,5 +96,13 @@ func buildProbes(config Config) ([]Probe, error) {
probes = append(probes, probe)
}

if config.Metrics.Enabled {
probe, err := NewMetricsProbe(config.Metrics)
if err != nil {
return nil, err
}
probes = append(probes, probe)
}

return probes, nil
}

0 comments on commit 49d65e8

Please sign in to comment.