Skip to content

Commit

Permalink
Add HighCardinalityTagsDetector (#3047)
Browse files Browse the repository at this point in the history
Introduces the concept of a detector for high cardinality tags. High cardinality tags are generally a problem for metrics, and except in special circumstances, it is expected that tags on metrics will be low cardinality or otherwise the cardinality bounded. It is an instrumentation error to make high cardinality tags. A library you are using may have Micrometer instrumentation and be contributing high cardinality tags without you explicitly instrumenting anything. This detector can help identify the meter that has high cardinality tags if you have noticed high memory usage or other signs of high cardinality tags.
  • Loading branch information
jonatan-ivanov authored Jun 22, 2022
1 parent e5d2442 commit 3697478
Show file tree
Hide file tree
Showing 3 changed files with 344 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/*
* Copyright 2022 VMware, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.micrometer.core.instrument;

import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;

import io.micrometer.common.lang.Nullable;
import io.micrometer.common.util.internal.logging.InternalLogger;
import io.micrometer.common.util.internal.logging.InternalLoggerFactory;
import io.micrometer.common.util.internal.logging.WarnThenDebugLogger;
import io.micrometer.core.instrument.config.MeterFilter;
import io.micrometer.core.instrument.util.NamedThreadFactory;

/**
* Tries to detect high cardinality tags by checking if the amount of Meters with the same
* name is above a threshold. This mechanism will not detect if you have other
* memory-usage-related issues, like appending random values to the name of the Meters,
* the only purpose of this class is detecting the potential presence of high cardinality
* tags. You can use this class in two ways: 1. Call findFirst and check if you get any
* results, if so you probably have high cardinality tags 2. Call start which will start a
* scheduled job that will do this check for you.
*
* You can also utilize
* {@link MeterFilter#maximumAllowableTags(String, String, int, MeterFilter)} and
* {@link MeterFilter#maximumAllowableMetrics(int)} to set an upper bound on the number of
* tags/metrics.
*
* @author Jonatan Ivanov
* @since 1.10.0
*/
public class HighCardinalityTagsDetector implements AutoCloseable {

private static final InternalLogger LOGGER = InternalLoggerFactory.getInstance(HighCardinalityTagsDetector.class);

private static final WarnThenDebugLogger WARN_THEN_DEBUG_LOGGER = new WarnThenDebugLogger(
HighCardinalityTagsDetector.class);

private static final Duration DEFAULT_DELAY = Duration.ofMinutes(5);

private final MeterRegistry registry;

private final long threshold;

private final Consumer<String> meterNameConsumer;

private final ScheduledExecutorService scheduledExecutorService;

private final Duration delay;

/**
* @param registry The registry to use to check the Meters in it
*/
public HighCardinalityTagsDetector(MeterRegistry registry) {
this(registry, calculateThreshold(), DEFAULT_DELAY);
}

/**
* @param registry The registry to use to check the Meters in it
* @param threshold The threshold to use to detect high cardinality tags (if the
* number of Meters with the same name are higher than this value, that's a high
* cardinality tag)
* @param delay The delay between the termination of one check and the commencement of
* the next
*/
public HighCardinalityTagsDetector(MeterRegistry registry, long threshold, Duration delay) {
this(registry, threshold, delay, null);
}

/**
* @param registry The registry to use to check the Meters in it
* @param threshold The threshold to use to detect high cardinality tags (if the
* number of Meters with the same name are higher than this value, that's a high
* cardinality tag)
* @param delay The delay between the termination of one check and the commencement of
* the next
* @param meterNameConsumer The action to execute if the first high cardinality tag is
* found
*/
public HighCardinalityTagsDetector(MeterRegistry registry, long threshold, Duration delay,
@Nullable Consumer<String> meterNameConsumer) {
this.registry = registry;
this.threshold = threshold;
this.delay = delay;
this.meterNameConsumer = meterNameConsumer != null ? meterNameConsumer : this::logWarning;
this.scheduledExecutorService = Executors
.newSingleThreadScheduledExecutor(new NamedThreadFactory("high-cardinality-tags-detector"));
}

/**
* Starts a scheduled job that checks if you have high cardinality tags.
*/
public void start() {
LOGGER.info(String.format("Starting %s with threshold: %d and delay: %s", this.getClass().getSimpleName(),
this.threshold, this.delay));
this.scheduledExecutorService.scheduleWithFixedDelay(this::detectHighCardinalityTags, 0, this.delay.toMillis(),
TimeUnit.MILLISECONDS);
}

/**
* Shuts down the scheduled job that checks if you have high cardinality tags.
*/
public void shutdown() {
LOGGER.info("Stopping " + this.getClass().getSimpleName());
this.scheduledExecutorService.shutdown();
}

@Override
public void close() {
this.shutdown();
}

private void detectHighCardinalityTags() {
try {
findFirst().ifPresent(this.meterNameConsumer);
}
catch (Exception exception) {
LOGGER.warn("Something went wrong during high cardinality tag detection", exception);
}
}

/**
* Finds the name of the first Meter that potentially has high cardinality tags.
* @return the name of the first Meter that potentially has high cardinality tags, an
* empty Optional if none found.
*/
public Optional<String> findFirst() {
Map<String, Long> meterNameFrequencies = new HashMap<>();
for (Meter meter : this.registry.getMeters()) {
String name = meter.getId().getName();
if (!meterNameFrequencies.containsKey(name)) {
meterNameFrequencies.put(name, 1L);
}
else {
Long frequency = meterNameFrequencies.get(name);
if (frequency < this.threshold) {
meterNameFrequencies.put(name, frequency + 1);
}
else {
return Optional.of(name);
}
}
}

return Optional.empty();
}

private void logWarning(String name) {
WARN_THEN_DEBUG_LOGGER.log(String.format("It seems %s has high cardinality tags (threshold: %d meters).\n"
+ "Check your configuration for the instrumentation of %s to find and fix the cause of the high cardinality (see: https://micrometer.io/docs/concepts#_tag_values).\n"
+ "If the cardinality is expected and acceptable, raise the threshold for this %s.", name,
this.threshold, name, this.getClass().getSimpleName()));
}

private static long calculateThreshold() {
// 10% of the heap in MiB
long allowance = Runtime.getRuntime().maxMemory() / 1024 / 1024 / 10;

// 2k Meters can take ~1MiB, 2M Meters can take ~1GiB
return Math.max(1_000, Math.min(allowance * 2_000, 2_000_000));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import io.micrometer.core.instrument.search.Search;
import io.micrometer.core.instrument.util.TimeUtils;

import java.time.Duration;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
Expand Down Expand Up @@ -96,6 +97,9 @@ public abstract class MeterRegistry {

private PauseDetector pauseDetector = new NoPauseDetector();

@Nullable
private HighCardinalityTagsDetector highCardinalityTagsDetector = null;

/**
* We'll use snake case as a general-purpose default for registries because it is the
* most likely to result in a portable name. Camel casing is also perfectly
Expand Down Expand Up @@ -846,6 +850,50 @@ public PauseDetector pauseDetector() {
return pauseDetector;
}

/**
* Creates and starts a new {@link HighCardinalityTagsDetector} for this registry.
* @return This configuration instance.
*/
public Config withHighCardinalityTagsDetector() {
return this.withHighCardinalityTagsDetector(new HighCardinalityTagsDetector(MeterRegistry.this));
}

/**
* Creates and starts a new {@link HighCardinalityTagsDetector} for this registry.
* @param threshold The threshold to use to detect high cardinality tags (if the
* number of Meters with the same name are higher than this value, that's a high
* cardinality tag).
* @param delay The delay between the termination of one check and the
* commencement of the next.
* @return This configuration instance.
*/
public Config withHighCardinalityTagsDetector(long threshold, Duration delay) {
return this.withHighCardinalityTagsDetector(
new HighCardinalityTagsDetector(MeterRegistry.this, threshold, delay));
}

private Config withHighCardinalityTagsDetector(HighCardinalityTagsDetector newHighCardinalityTagsDetector) {
if (highCardinalityTagsDetector != null) {
highCardinalityTagsDetector.close();
}

highCardinalityTagsDetector = newHighCardinalityTagsDetector;
highCardinalityTagsDetector.start();

return this;
}

/**
* Returns the current {@link HighCardinalityTagsDetector}. You can "deregister"
* it by calling {@link HighCardinalityTagsDetector#close()} or register a new one
* by closing the previous one and creating a new one.
* @return The {@link HighCardinalityTagsDetector} that is currently in effect.
*/
@Nullable
public HighCardinalityTagsDetector highCardinalityTagsDetector() {
return highCardinalityTagsDetector;
}

}

/**
Expand Down Expand Up @@ -1026,6 +1074,10 @@ public void close() {
}
}
}

if (highCardinalityTagsDetector != null) {
highCardinalityTagsDetector.close();
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Copyright 2022 VMware, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.micrometer.core.instrument;

import java.time.Duration;
import java.util.function.Consumer;

import io.micrometer.common.lang.Nullable;
import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.assertj.core.api.Assertions.assertThat;
import static org.awaitility.Awaitility.await;

/**
* Tests for {@link HighCardinalityTagsDetector}
*
* @author Jonatan Ivanov
*/
class HighCardinalityTagsDetectorTests {

private TestMeterNameConsumer testMeterNameConsumer;

private SimpleMeterRegistry registry;

private HighCardinalityTagsDetector highCardinalityTagsDetector;

@BeforeEach
void setUp() {
this.testMeterNameConsumer = new TestMeterNameConsumer();
this.registry = new SimpleMeterRegistry();
this.highCardinalityTagsDetector = new HighCardinalityTagsDetector(registry, 3, Duration.ofMinutes(1),
testMeterNameConsumer);
}

@AfterEach
void tearDown() {
this.highCardinalityTagsDetector.shutdown();
}

@Test
void shouldDetectTagsAboveTheThreshold() {
for (int i = 0; i < 4; i++) {
Counter.builder("test.counter").tag("index", String.valueOf(i)).register(registry).increment();
}
highCardinalityTagsDetector.start();

await().atMost(Duration.ofSeconds(1)).until(() -> "test.counter".equals(testMeterNameConsumer.getName()));
}

@Test
void shouldNotDetectTagsOnTheThreshold() {
for (int i = 0; i < 3; i++) {
Counter.builder("test.counter").tag("index", String.valueOf(i)).register(registry).increment();
}

assertThat(highCardinalityTagsDetector.findFirst()).isEmpty();
}

@Test
void shouldNotDetectLowCardinalityTags() {
for (int i = 0; i < 5; i++) {
Counter.builder("test.counter").tag("index", "0").register(registry).increment();
}

assertThat(highCardinalityTagsDetector.findFirst()).isEmpty();
}

@Test
void shouldNotDetectNoTags() {
for (int i = 0; i < 5; i++) {
Counter.builder("test.counter").register(registry).increment();
}

assertThat(highCardinalityTagsDetector.findFirst()).isEmpty();
}

private static class TestMeterNameConsumer implements Consumer<String> {

@Nullable
private String name;

@Override
public void accept(String name) {
this.name = name;
}

@Nullable
public String getName() {
return this.name;
}

}

}

0 comments on commit 3697478

Please sign in to comment.