Skip to content

Commit

Permalink
add metrics otelcol_exporter_queue_capacity (#5475)
Browse files Browse the repository at this point in the history
Fix #4902, add metric otelcol_exporter_queue_capacity

Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
  • Loading branch information
fatsheep9146 authored Jul 19, 2022
1 parent 47b1ed6 commit 389c047
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- `loggingexporter`: Decouple `loglevel` field from level of logged messages (#5678)
- Expose `pcommon.NewSliceFromRaw` function (#5679)
- `loggingexporter`: create the exporter's logger from the service's logger (#5677)
- Add `otelcol_exporter_queue_capacity` metrics show the collector's exporter queue capacity (#5475)

### 🧰 Bug fixes 🧰

Expand Down
6 changes: 4 additions & 2 deletions docs/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ Most exporters offer a [queue/retry mechanism](../exporter/exporterhelper/README
that is recommended as the retry mechanism for the Collector and as such should
be used in any production deployment.

**TODO:** Add metric to monitor queue length.
The `otelcol_exporter_queue_capacity` indicates the capacity of the retry queue (in batches). The `otelcol_exporter_queue_size` indicates the current size of retry queue. So you can use these two metrics to check if the queue capacity is enough for your workload.

Currently, the queue/retry mechanism only supports logging for monitoring. Check
The `otelcol_exporter_enqueue_failed_spans`, `otelcol_exporter_enqueue_failed_metric_points` and `otelcol_exporter_enqueue_failed_log_records` indicate the number of span/metric points/log records failed to be added to the sending queue. This may be cause by a queue full of unsettled elements, so you may need to decrease your sending rate or horizontally scale collectors.

The queue/retry mechanism also supports logging for monitoring. Check
the logs for messages like `"Dropping data because sending_queue is full"`.

### Receive Failures
Expand Down
7 changes: 7 additions & 0 deletions exporter/exporterhelper/obsreport.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ func init() {
type instruments struct {
registry *metric.Registry
queueSize *metric.Int64DerivedGauge
queueCapacity *metric.Int64DerivedGauge
failedToEnqueueTraceSpans *metric.Int64Cumulative
failedToEnqueueMetricPoints *metric.Int64Cumulative
failedToEnqueueLogRecords *metric.Int64Cumulative
Expand All @@ -55,6 +56,12 @@ func newInstruments(registry *metric.Registry) *instruments {
metric.WithLabelKeys(obsmetrics.ExporterKey),
metric.WithUnit(metricdata.UnitDimensionless))

insts.queueCapacity, _ = registry.AddInt64DerivedGauge(
obsmetrics.ExporterKey+"/queue_capacity",
metric.WithDescription("Fixed capacity of the retry queue (in batches)"),
metric.WithLabelKeys(obsmetrics.ExporterKey),
metric.WithUnit(metricdata.UnitDimensionless))

insts.failedToEnqueueTraceSpans, _ = registry.AddInt64Cumulative(
obsmetrics.ExporterKey+"/enqueue_failed_spans",
metric.WithDescription("Number of spans failed to be added to the sending queue."),
Expand Down
6 changes: 6 additions & 0 deletions exporter/exporterhelper/queued_retry_experimental.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,12 @@ func (qrs *queuedRetrySender) start(ctx context.Context, host component.Host) er
if err != nil {
return fmt.Errorf("failed to create retry queue size metric: %w", err)
}
err = globalInstruments.queueCapacity.UpsertEntry(func() int64 {
return int64(qrs.cfg.QueueSize)
}, metricdata.NewLabelValue(qrs.fullName()))
if err != nil {
return fmt.Errorf("failed to create retry queue capacity metric: %w", err)
}
}

return nil
Expand Down
6 changes: 6 additions & 0 deletions exporter/exporterhelper/queued_retry_inmemory.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ func (qrs *queuedRetrySender) start(context.Context, component.Host) error {
if err != nil {
return fmt.Errorf("failed to create retry queue size metric: %w", err)
}
err = globalInstruments.queueCapacity.UpsertEntry(func() int64 {
return int64(qrs.cfg.QueueSize)
}, metricdata.NewLabelValue(qrs.fullName))
if err != nil {
return fmt.Errorf("failed to create retry queue capacity metric: %w", err)
}
}

return nil
Expand Down
1 change: 1 addition & 0 deletions exporter/exporterhelper/queued_retry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ func TestQueuedRetry_QueueMetricsReported(t *testing.T) {
be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler())
require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost()))

checkValueForGlobalManager(t, defaultExporterTags, int64(5000), "exporter/queue_capacity")
for i := 0; i < 7; i++ {
require.NoError(t, be.sender.send(newErrorRequest(context.Background())))
}
Expand Down

0 comments on commit 389c047

Please sign in to comment.