Skip to content

Commit

Permalink
Merge pull request google#2611 from katarzyna-z/kk_aggregate_perf
Browse files Browse the repository at this point in the history
Aggregate perf metrics
  • Loading branch information
dashpole authored Aug 17, 2020
2 parents 888dc21 + 538a6d5 commit 65e04ec
Show file tree
Hide file tree
Showing 4 changed files with 348 additions and 34 deletions.
6 changes: 6 additions & 0 deletions docs/runtime_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ cAdvisor stores the latest historical data in memory. How long of a history it s
--perf_events_config="" Path to a JSON file containing configuration of perf events to measure. Empty value disables perf events measuring.
```

Core perf events can be exposed on Prometheus endpoint per CPU or aggregated by event. It is controlled through `--disable_metrics` parameter with option `percpu`, e.g.:
- `--disable_metrics="percpu"` - core perf events are aggregated
- `--disable_metrics=""` - core perf events are exposed per CPU.

Aggregated form of core perf events significantly decrease volume of data. For aggregated form of core perf events scaling ratio (`container_perf_metric_scaling ratio`) indicates the lowest value of scaling ratio for specific event to show the worst precision.

### Perf subsystem introduction

One of the goals of kernel perf subsystem is to instrument CPU performance counters that allow to profile applications.
Expand Down
142 changes: 108 additions & 34 deletions metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -1577,41 +1577,48 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
}...)
}
if includedMetrics.Has(container.PerfMetrics) {
if includedMetrics.Has(container.PerCpuUsageMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_events_total",
help: "Perf event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
return getPerCPUCorePerfEvents(s)
},
},
{
name: "container_perf_events_scaling_ratio",
help: "Perf event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
return getPerCPUCoreScalingRatio(s)
},
}}...)
} else {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_events_total",
help: "Perf event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
return getAggregatedCorePerfEvents(s)
},
},
{
name: "container_perf_events_scaling_ratio",
help: "Perf event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
return getMinCoreScalingRatio(s)
},
}}...)
}
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_events_total",
help: "Perf event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: float64(metric.Value),
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
},
},
{
name: "container_perf_events_scaling_ratio",
help: "Perf event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: metric.ScalingRatio,
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
},
},
{
name: "container_perf_uncore_events_total",
help: "Perf uncore event metric.",
Expand Down Expand Up @@ -1940,3 +1947,70 @@ func getNumaStatsPerNode(nodeStats map[uint8]uint64, labels []string, timestamp
}
return mValues
}

func getPerCPUCorePerfEvents(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: float64(metric.Value),
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
}

func getPerCPUCoreScalingRatio(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: metric.ScalingRatio,
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
}

func getAggregatedCorePerfEvents(s *info.ContainerStats) metricValues {
values := make(metricValues, 0)

perfEventStatAgg := make(map[string]uint64)
// aggregate by event
for _, perfStat := range s.PerfStats {
perfEventStatAgg[perfStat.Name] += perfStat.Value
}
// create aggregated metrics
for perfEvent, perfValue := range perfEventStatAgg {
values = append(values, metricValue{
value: float64(perfValue),
labels: []string{"", perfEvent},
timestamp: s.Timestamp,
})
}
return values
}

func getMinCoreScalingRatio(s *info.ContainerStats) metricValues {
values := make(metricValues, 0)
perfEventStatMin := make(map[string]float64)
// search for minimal value of scalin ratio for specific event
for _, perfStat := range s.PerfStats {
if _, ok := perfEventStatMin[perfStat.Name]; !ok {
// found a new event
perfEventStatMin[perfStat.Name] = perfStat.ScalingRatio
} else if perfStat.ScalingRatio < perfEventStatMin[perfStat.Name] {
// found a lower value of scaling ration so replace the minimal value
perfEventStatMin[perfStat.Name] = perfStat.ScalingRatio
}
}

for perfEvent, perfScalingRatio := range perfEventStatMin {
values = append(values, metricValue{
value: perfScalingRatio,
labels: []string{"", perfEvent},
timestamp: s.Timestamp,
})
}
return values
}
197 changes: 197 additions & 0 deletions metrics/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,21 @@ func TestPrometheusCollector(t *testing.T) {
testPrometheusCollector(t, reg, "testdata/prometheus_metrics")
}

func TestPrometheusCollectorWithPerfAggregated(t *testing.T) {
metrics := container.MetricSet{
container.PerfMetrics: struct{}{},
}
c := NewPrometheusCollector(testSubcontainersInfoProvider{}, func(container *info.ContainerInfo) map[string]string {
s := DefaultContainerLabels(container)
s["zone.name"] = "hello"
return s
}, metrics, now, v2.RequestOptions{})
reg := prometheus.NewRegistry()
reg.MustRegister(c)

testPrometheusCollector(t, reg, "testdata/prometheus_metrics_perf_aggregated")
}

func testPrometheusCollector(t *testing.T, gatherer prometheus.Gatherer, metricsFile string) {
wantMetrics, err := os.Open(metricsFile)
if err != nil {
Expand Down Expand Up @@ -122,3 +137,185 @@ func (m *mockInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
func mockLabelFunc(*info.ContainerInfo) map[string]string {
return map[string]string{}
}

func TestGetPerCpuCorePerfEvents(t *testing.T) {
containerStats := &info.ContainerStats{
Timestamp: time.Unix(1395066367, 0),
PerfStats: []info.PerfStat{
{
PerfValue: info.PerfValue{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions",
},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions",
},
Cpu: 1,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.7,
Value: 321,
Name: "instructions_retired"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.3,
Value: 789,
Name: "instructions_retired"},
Cpu: 1,
},
},
}
metricVals := getPerCPUCorePerfEvents(containerStats)
assert.Equal(t, 4, len(metricVals))
values := []float64{}
for _, metric := range metricVals {
values = append(values, metric.value)
}
assert.Contains(t, values, 123.0)
assert.Contains(t, values, 456.0)
assert.Contains(t, values, 321.0)
assert.Contains(t, values, 789.0)
}

func TestGetPerCpuCoreScalingRatio(t *testing.T) {
containerStats := &info.ContainerStats{
Timestamp: time.Unix(1395066367, 0),
PerfStats: []info.PerfStat{
{
PerfValue: info.PerfValue{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions"},
Cpu: 1,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.7,
Value: 321,
Name: "instructions_retired"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.3,
Value: 789,
Name: "instructions_retired"},
Cpu: 1,
},
},
}
metricVals := getPerCPUCoreScalingRatio(containerStats)
assert.Equal(t, 4, len(metricVals))
values := []float64{}
for _, metric := range metricVals {
values = append(values, metric.value)
}
assert.Contains(t, values, 1.0)
assert.Contains(t, values, 0.5)
assert.Contains(t, values, 0.7)
assert.Contains(t, values, 0.3)
}

func TestGetAggCorePerfEvents(t *testing.T) {
containerStats := &info.ContainerStats{
Timestamp: time.Unix(1395066367, 0),
PerfStats: []info.PerfStat{
{
PerfValue: info.PerfValue{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions"},
Cpu: 1,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.7,
Value: 321,
Name: "instructions_retired"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.3,
Value: 789,
Name: "instructions_retired"},
Cpu: 1,
},
},
}
metricVals := getAggregatedCorePerfEvents(containerStats)
assert.Equal(t, 2, len(metricVals))
values := []float64{}
for _, metric := range metricVals {
values = append(values, metric.value)
}
assert.Contains(t, values, 579.0)
assert.Contains(t, values, 1110.0)
}

func TestGetMinCoreScalingRatio(t *testing.T) {
containerStats := &info.ContainerStats{
Timestamp: time.Unix(1395066367, 0),
PerfStats: []info.PerfStat{
{
PerfValue: info.PerfValue{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions"},
Cpu: 1,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.7,
Value: 321,
Name: "instructions_retired"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.3,
Value: 789,
Name: "instructions_retired"},
Cpu: 1,
},
},
}
metricVals := getMinCoreScalingRatio(containerStats)
assert.Equal(t, 2, len(metricVals))
values := []float64{}
for _, metric := range metricVals {
values = append(values, metric.value)
}
assert.Contains(t, values, 0.5)
assert.Contains(t, values, 0.3)
}
Loading

0 comments on commit 65e04ec

Please sign in to comment.