Skip to content

Ignore prometheus metrics when their values are NaN or Inf #12084

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
May 24, 2019
Merged
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Fixed RabbitMQ `queue` metricset gathering when `consumer_utilisation` is set empty at the metrics source {pull}12089[12089]
- Fix direction of incoming IPv6 sockets. {pull}12248[12248]
- Validate that kibana/status metricset cannot be used when xpack is enabled. {pull}12264[12264]
- Ignore prometheus metrics when their values are NaN or Inf. {pull}12084[12084] {issue}10849[10849]

*Packetbeat*

Expand Down
31 changes: 20 additions & 11 deletions metricbeat/helper/prometheus/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,28 +135,33 @@ func (m *commonMetric) GetField() string {
func (m *commonMetric) GetValue(metric *dto.Metric) interface{} {
counter := metric.GetCounter()
if counter != nil {
return int64(counter.GetValue())
if !math.IsNaN(counter.GetValue()) && !math.IsInf(counter.GetValue(), 0) {
return int64(counter.GetValue())
}
}

gauge := metric.GetGauge()
if gauge != nil {
return gauge.GetValue()
if !math.IsNaN(gauge.GetValue()) && !math.IsInf(gauge.GetValue(), 0) {
return gauge.GetValue()
}
}

summary := metric.GetSummary()
if summary != nil {
value := common.MapStr{}
value["sum"] = summary.GetSampleSum()
value["count"] = summary.GetSampleCount()
if !math.IsNaN(summary.GetSampleSum()) && !math.IsInf(summary.GetSampleSum(), 0) {
value["sum"] = summary.GetSampleSum()
value["count"] = summary.GetSampleCount()
}

quantiles := summary.GetQuantile()
percentileMap := common.MapStr{}
for _, quantile := range quantiles {
if !math.IsNaN(quantile.GetValue()) {
key := strconv.FormatFloat((100 * quantile.GetQuantile()), 'f', -1, 64)
if !math.IsNaN(quantile.GetValue()) && !math.IsInf(quantile.GetValue(), 0) {
key := strconv.FormatFloat(100*quantile.GetQuantile(), 'f', -1, 64)
percentileMap[key] = quantile.GetValue()
}

}

if len(percentileMap) != 0 {
Expand All @@ -169,14 +174,18 @@ func (m *commonMetric) GetValue(metric *dto.Metric) interface{} {
histogram := metric.GetHistogram()
if histogram != nil {
value := common.MapStr{}
value["sum"] = histogram.GetSampleSum()
value["count"] = histogram.GetSampleCount()
if !math.IsNaN(histogram.GetSampleSum()) && !math.IsInf(histogram.GetSampleSum(), 0) {
value["sum"] = histogram.GetSampleSum()
value["count"] = histogram.GetSampleCount()
}

buckets := histogram.GetBucket()
bucketMap := common.MapStr{}
for _, bucket := range buckets {
key := strconv.FormatFloat(bucket.GetUpperBound(), 'f', -1, 64)
bucketMap[key] = bucket.GetCumulativeCount()
if bucket.GetCumulativeCount() != uint64(math.NaN()) && bucket.GetCumulativeCount() != uint64(math.Inf(0)) {
key := strconv.FormatFloat(bucket.GetUpperBound(), 'f', -1, 64)
bucketMap[key] = bucket.GetCumulativeCount()
}
}

if len(bucketMap) != 0 {
Expand Down
3 changes: 1 addition & 2 deletions metricbeat/helper/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,9 @@ func (p *prometheus) GetProcessedMetrics(mapping *MetricsMapping) ([]common.MapS

if field != "" {
event := getEvent(eventsMap, keyLabels)

// value may be a mapstr (for histograms and summaries), do a deep update to avoid smashing existing fields
update := common.MapStr{}
update.Put(field, value)
// value may be a mapstr (for histograms and summaries), do a deep update to avoid smashing existing fields
event.DeepUpdate(update)

event.DeepUpdate(labels)
Expand Down
196 changes: 196 additions & 0 deletions metricbeat/helper/prometheus/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,17 @@ metrics_one_count_total{name="jane",surname="foster"} 1
metrics_one_count_total{name="john",surname="williams"} 2
metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3

`

promGaugeKeyLabelWithNaNInf = `
# TYPE metrics_one_count_errors gauge
metrics_one_count_errors{name="jane",surname="foster"} 0
# TYPE metrics_one_count_total gauge
metrics_one_count_total{name="jane",surname="foster"} NaN
metrics_one_count_total{name="foo",surname="bar"} +Inf
metrics_one_count_total{name="john",surname="williams"} -Inf
metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3

`

promCounterKeyLabel = `
Expand All @@ -77,6 +88,16 @@ metrics_one_count_total{name="jane",surname="foster"} 1
metrics_one_count_total{name="john",surname="williams"} 2
metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3

`

promCounterKeyLabelWithNaNInf = `
# TYPE metrics_one_count_errors counter
metrics_one_count_errors{name="jane",surname="foster"} 1
# TYPE metrics_one_count_total counter
metrics_one_count_total{name="jane",surname="foster"} NaN
metrics_one_count_total{name="john",surname="williams"} +Inf
metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3

`

promHistogramKeyLabel = `
Expand All @@ -98,6 +119,19 @@ metrics_one_midichlorians_bucket{rank="padawan",alive="yes",le="+Inf"} 28
metrics_one_midichlorians_sum{rank="padawan",alive="yes"} 800001
metrics_one_midichlorians_count{rank="padawan",alive="yes"} 28

`

promHistogramKeyLabelWithNaNInf = `
# TYPE metrics_one_midichlorians histogram
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="2000"} NaN
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="4000"} +Inf
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="8000"} -Inf
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="16000"} 84
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="32000"} 86
metrics_one_midichlorians_bucket{rank="youngling",alive="yes",le="+Inf"} 86
metrics_one_midichlorians_sum{rank="youngling",alive="yes"} 1000001
metrics_one_midichlorians_count{rank="youngling",alive="yes"} 86

`

promSummaryKeyLabel = `
Expand All @@ -117,6 +151,18 @@ metrics_force_propagation_ms{kind="sith",quantile="1"} 29
metrics_force_propagation_ms_sum{kind="sith"} 112
metrics_force_propagation_ms_count{kind="sith"} 711

`

promSummaryKeyLabelWithNaNInf = `
# TYPE metrics_force_propagation_ms summary
metrics_force_propagation_ms{kind="jedi",quantile="0"} NaN
metrics_force_propagation_ms{kind="jedi",quantile="0.25"} +Inf
metrics_force_propagation_ms{kind="jedi",quantile="0.5"} -Inf
metrics_force_propagation_ms{kind="jedi",quantile="0.75"} 20
metrics_force_propagation_ms{kind="jedi",quantile="1"} 30
metrics_force_propagation_ms_sum{kind="jedi"} 50
metrics_force_propagation_ms_count{kind="jedi"} 651

`
)

Expand Down Expand Up @@ -496,6 +542,47 @@ func TestPrometheusKeyLabels(t *testing.T) {
},
},

{
testName: "Test gauge with KeyLabel With NaN Inf",
prometheusResponse: promGaugeKeyLabelWithNaNInf,
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"metrics_one_count_errors": Metric("metrics.one.count"),
"metrics_one_count_total": Metric("metrics.one.count"),
},
Labels: map[string]LabelMap{
"name": KeyLabel("metrics.one.labels.name"),
"surname": KeyLabel("metrics.one.labels.surname"),
"age": KeyLabel("metrics.one.labels.age"),
},
},
expectedEvents: []common.MapStr{
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"count": 0.0,
"labels": common.MapStr{
"name": "jane",
"surname": "foster",
},
},
},
},
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"count": 3.0,
"labels": common.MapStr{
"name": "jahn",
"surname": "baldwin",
"age": "30",
},
},
},
},
},
},

{
testName: "Test counter with KeyLabel",
prometheusResponse: promCounterKeyLabel,
Expand Down Expand Up @@ -547,6 +634,47 @@ func TestPrometheusKeyLabels(t *testing.T) {
},
},

{
testName: "Test counter with KeyLabel With NaN Inf",
prometheusResponse: promCounterKeyLabelWithNaNInf,
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"metrics_one_count_errors": Metric("metrics.one.count"),
"metrics_one_count_total": Metric("metrics.one.count"),
},
Labels: map[string]LabelMap{
"name": KeyLabel("metrics.one.labels.name"),
"surname": KeyLabel("metrics.one.labels.surname"),
"age": KeyLabel("metrics.one.labels.age"),
},
},
expectedEvents: []common.MapStr{
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"count": int64(1),
"labels": common.MapStr{
"name": "jane",
"surname": "foster",
},
},
},
},
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"count": int64(3),
"labels": common.MapStr{
"name": "jahn",
"surname": "baldwin",
"age": "30",
},
},
},
},
},
},

{
testName: "Test histogram with KeyLabel",
prometheusResponse: promHistogramKeyLabel,
Expand Down Expand Up @@ -604,6 +732,40 @@ func TestPrometheusKeyLabels(t *testing.T) {
},
},

{
testName: "Test histogram with KeyLabel With NaN Inf",
prometheusResponse: promHistogramKeyLabelWithNaNInf,
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"metrics_one_midichlorians": Metric("metrics.one.midichlorians"),
},
Labels: map[string]LabelMap{
"rank": KeyLabel("metrics.one.midichlorians.rank"),
"alive": KeyLabel("metrics.one.midichlorians.alive"),
},
},
expectedEvents: []common.MapStr{
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"midichlorians": common.MapStr{
"count": uint64(86),
"sum": 1000001.0,
"bucket": common.MapStr{
"16000": uint64(84),
"32000": uint64(86),
"+Inf": uint64(86),
},

"rank": "youngling",
"alive": "yes",
},
},
},
},
},
},

{
testName: "Test summary with KeyLabel",
prometheusResponse: promSummaryKeyLabel,
Expand Down Expand Up @@ -662,6 +824,40 @@ func TestPrometheusKeyLabels(t *testing.T) {
},
},
},

{
testName: "Test summary with KeyLabel With NaN Inf",
prometheusResponse: promSummaryKeyLabelWithNaNInf,
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"metrics_force_propagation_ms": Metric("metrics.force.propagation.ms"),
},
Labels: map[string]LabelMap{
"kind": KeyLabel("metrics.force.propagation.ms.labels.kind"),
},
},
expectedEvents: []common.MapStr{
common.MapStr{
"metrics": common.MapStr{
"force": common.MapStr{
"propagation": common.MapStr{
"ms": common.MapStr{
"count": uint64(651),
"sum": 50.0,
"percentile": common.MapStr{
"75": 20.0,
"100": 30.0,
},
"labels": common.MapStr{
"kind": "jedi",
},
},
},
},
},
},
},
},
}

for _, tc := range testCases {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# HELP kafka_consumer_records_lag_records The latest lag of the partition
# TYPE kafka_consumer_records_lag_records gauge
kafka_consumer_records_lag_records{client_id="consumer1",} NaN
kafka_consumer_records_lag_records{client_id="consumer2",} +Inf
kafka_consumer_records_lag_records{client_id="consumer3",} -Inf
kafka_consumer_records_lag_records{client_id="consumer4",} 5
# HELP http_failures Total number of http request failures
# TYPE http_failures counter
http_failures{method="GET"} 2
http_failures{method="POST"} NaN
http_failures{method="DELETE"} +Inf
# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0",} NaN
go_gc_duration_seconds{quantile="0.25",} +Inf
go_gc_duration_seconds{quantile="0.5",} -Inf
go_gc_duration_seconds{quantile="0.75"} 9.8154e-05
go_gc_duration_seconds{quantile="1",} 0.011689149
go_gc_duration_seconds_sum 3.451780079
go_gc_duration_seconds_count 13118
# HELP http_request_duration_seconds request duration histogram
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{le="0.1"} +Inf
http_request_duration_seconds_bucket{le="0.2"} -Inf
http_request_duration_seconds_bucket{le="0.5"} NaN
http_request_duration_seconds_bucket{le="1"} 1
http_request_duration_seconds_bucket{le="2"} 2
http_request_duration_seconds_bucket{le="3"} 3
http_request_duration_seconds_bucket{le="5"} 3
http_request_duration_seconds_bucket{le="+Inf"} 3
http_request_duration_seconds_sum 6
http_request_duration_seconds_count 3
Loading