Skip to content

Commit 9deab0e

Browse files
committed
Rate limited sampling
1 parent b13a24f commit 9deab0e

File tree

83 files changed

+1679
-292
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+1679
-292
lines changed

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/configuration/Configuration.java

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public void validate() {
7474
preview.validate();
7575
}
7676

77-
// TODO (trask) investigate options for mapping lowercase values to otel enum directly
77+
@Deprecated
7878
public enum SpanKind {
7979
@JsonProperty("server")
8080
SERVER(io.opentelemetry.api.trace.SpanKind.SERVER),
@@ -94,6 +94,17 @@ public enum SpanKind {
9494
}
9595
}
9696

97+
public enum TelemetryKind {
98+
@JsonProperty("request")
99+
REQUEST,
100+
@JsonProperty("dependency")
101+
DEPENDENCY,
102+
@JsonProperty("trace")
103+
TRACE,
104+
@JsonProperty("exception")
105+
EXCEPTION
106+
}
107+
97108
public enum MatchType {
98109
@JsonProperty("strict")
99110
STRICT,
@@ -151,7 +162,12 @@ public static class Role {
151162

152163
public static class Sampling {
153164

154-
public float percentage = 100;
165+
// fixed percentage of requests
166+
@Nullable public Double percentage;
167+
168+
// default is 5 requests per second (set in ConfigurationBuilder if neither percentage nor
169+
// limitPerSecond was configured)
170+
@Nullable public Double limitPerSecond;
155171
}
156172

157173
public static class SamplingPreview {
@@ -175,6 +191,8 @@ public static class SamplingPreview {
175191
// Another (lesser) reason is because .NET SDK always propagates trace flags "00" (not
176192
// sampled)
177193
//
194+
// future goal: make parentBased sampling the default if item count is received via tracestate
195+
//
178196
// IMPORTANT if changing this default, we need to keep it at least on Azure Functions
179197
public boolean parentBased;
180198

@@ -352,7 +370,7 @@ public static class PreviewConfiguration {
352370
new HashSet<>(asList("b3", "b3multi"));
353371

354372
public void validate() {
355-
for (Configuration.SamplingOverride samplingOverride : sampling.overrides) {
373+
for (SamplingOverride samplingOverride : sampling.overrides) {
356374
samplingOverride.validate();
357375
}
358376
for (Configuration.InstrumentationKeyOverride instrumentationKeyOverride :
@@ -578,22 +596,35 @@ private static boolean isRuntimeAttached() {
578596
}
579597

580598
public static class SamplingOverride {
581-
// TODO (trask) consider making this required when moving out of preview
582-
@Nullable public SpanKind spanKind;
599+
@Deprecated @Nullable public SpanKind spanKind;
600+
601+
// TODO (trask) make this required when moving out of preview
602+
// for now the default is both "request" and "dependency" for backwards compatibility
603+
@Nullable public TelemetryKind telemetryKind;
604+
605+
// TODO (trask) should this be named "standalone" (and meaning flipped)
606+
// especially if we aren't going to change meaning of "request" to include unparented INTERNAL
607+
// spans
608+
public boolean inRequest = true;
609+
583610
// not using include/exclude, because you can still get exclude with this by adding a second
584611
// (exclude) override above it
585612
// (since only the first matching override is used)
586613
public List<SamplingOverrideAttribute> attributes = new ArrayList<>();
587-
public Float percentage;
614+
public Double percentage;
588615
public String id; // optional, used for debugging purposes only
589616

617+
public boolean isForRequestTelemetry() {
618+
return telemetryKind == TelemetryKind.REQUEST
619+
|| (telemetryKind == null && spanKind != SpanKind.CLIENT);
620+
}
621+
622+
public boolean isForDependencyTelemetry() {
623+
return telemetryKind == TelemetryKind.DEPENDENCY
624+
|| (telemetryKind == null && spanKind != SpanKind.SERVER);
625+
}
626+
590627
public void validate() {
591-
if (spanKind == null && attributes.isEmpty()) {
592-
// TODO add doc and go link, similar to telemetry processors
593-
throw new FriendlyException(
594-
"A sampling override configuration is missing \"spanKind\" and has no attributes.",
595-
"Please provide at least one of \"spanKind\" or \"attributes\" for the sampling override configuration.");
596-
}
597628
if (percentage == null) {
598629
// TODO add doc and go link, similar to telemetry processors
599630
throw new FriendlyException(

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/configuration/ConfigurationBuilder.java

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ public class ConfigurationBuilder {
8383
private static final String APPLICATIONINSIGHTS_SAMPLING_PERCENTAGE =
8484
"APPLICATIONINSIGHTS_SAMPLING_PERCENTAGE";
8585

86+
private static final String APPLICATIONINSIGHTS_SAMPLING_LIMIT_PER_SECOND =
87+
"APPLICATIONINSIGHTS_SAMPLING_LIMIT_PER_SECOND";
88+
8689
private static final String APPLICATIONINSIGHTS_INSTRUMENTATION_LOGGING_LEVEL =
8790
"APPLICATIONINSIGHTS_INSTRUMENTATION_LOGGING_LEVEL";
8891

@@ -181,6 +184,15 @@ public static Configuration create(Path agentJarPath, @Nullable RpConfiguration
181184
+ " and it is now enabled by default,"
182185
+ " so no need to enable it under preview configuration");
183186
}
187+
for (SamplingOverride override : config.preview.sampling.overrides) {
188+
if (override.spanKind != null) {
189+
configurationLogger.warn(
190+
"Sampling overrides \"spanKind\" has been deprecated,"
191+
+ " and support for it will be removed in a future release, please transition from"
192+
+ " \"spanKind\" to \"telemetryKind\".");
193+
}
194+
}
195+
184196
logWarningIfUsingInternalAttributes(config);
185197

186198
overlayFromEnv(config, agentJarPath.getParent());
@@ -194,6 +206,10 @@ public static Configuration create(Path agentJarPath, @Nullable RpConfiguration
194206
overlayFromEnv(rpConfiguration);
195207
overlayRpConfiguration(config, rpConfiguration);
196208
}
209+
// only fall back to default sampling configuration after all overlays have been performed
210+
if (config.sampling.limitPerSecond == null && config.sampling.percentage == null) {
211+
config.sampling.limitPerSecond = 5.0;
212+
}
197213
// only set role instance to host name as a last resort
198214
if (config.role.instance == null) {
199215
String hostname = HostName.get();
@@ -457,6 +473,10 @@ static void overlayFromEnv(Configuration config, Path baseDir) throws IOExceptio
457473
config.sampling.percentage =
458474
overlayWithEnvVar(APPLICATIONINSIGHTS_SAMPLING_PERCENTAGE, config.sampling.percentage);
459475

476+
config.sampling.limitPerSecond =
477+
overlayWithEnvVar(
478+
APPLICATIONINSIGHTS_SAMPLING_LIMIT_PER_SECOND, config.sampling.limitPerSecond);
479+
460480
config.proxy = overlayProxyFromEnv(config.proxy);
461481

462482
config.selfDiagnostics.level =
@@ -466,10 +486,9 @@ static void overlayFromEnv(Configuration config, Path baseDir) throws IOExceptio
466486
APPLICATIONINSIGHTS_SELF_DIAGNOSTICS_FILE_PATH, config.selfDiagnostics.file.path);
467487

468488
config.preview.metricIntervalSeconds =
469-
(int)
470-
overlayWithEnvVar(
471-
APPLICATIONINSIGHTS_PREVIEW_METRIC_INTERVAL_SECONDS,
472-
config.preview.metricIntervalSeconds);
489+
overlayWithEnvVar(
490+
APPLICATIONINSIGHTS_PREVIEW_METRIC_INTERVAL_SECONDS,
491+
config.preview.metricIntervalSeconds);
473492

474493
config.preview.instrumentation.springIntegration.enabled =
475494
overlayWithEnvVar(
@@ -576,6 +595,7 @@ static void overlayRpConfiguration(Configuration config, RpConfiguration rpConfi
576595
}
577596
if (rpConfiguration.sampling != null) {
578597
config.sampling.percentage = rpConfiguration.sampling.percentage;
598+
config.sampling.limitPerSecond = rpConfiguration.sampling.limitPerSecond;
579599
}
580600
if (isTrimEmpty(config.role.name)) {
581601
// only use rp configuration role name as a fallback, similar to WEBSITE_SITE_NAME
@@ -626,13 +646,25 @@ public static String overlayWithEnvVar(String name, String defaultValue) {
626646
return defaultValue;
627647
}
628648

629-
static float overlayWithEnvVar(String name, float defaultValue) {
649+
@Nullable
650+
static Double overlayWithEnvVar(String name, @Nullable Double defaultValue) {
651+
String value = getEnvVar(name);
652+
if (value != null) {
653+
configurationLogger.debug("using environment variable: {}", name);
654+
// intentionally allowing NumberFormatException to bubble up as invalid configuration and
655+
// prevent agent from starting
656+
return Double.parseDouble(value);
657+
}
658+
return defaultValue;
659+
}
660+
661+
static int overlayWithEnvVar(String name, int defaultValue) {
630662
String value = getEnvVar(name);
631663
if (value != null) {
632664
configurationLogger.debug("using environment variable: {}", name);
633665
// intentionally allowing NumberFormatException to bubble up as invalid configuration and
634666
// prevent agent from starting
635-
return Float.parseFloat(value);
667+
return Integer.parseInt(value);
636668
}
637669
return defaultValue;
638670
}
@@ -798,17 +830,21 @@ public static Configuration loadJsonConfigFile(Path configPath) throws IOExcepti
798830
}
799831

800832
// this is for external callers, where logging is ok
801-
public static float roundToNearest(float samplingPercentage) {
833+
public static double roundToNearest(double samplingPercentage) {
802834
return roundToNearest(samplingPercentage, false);
803835
}
804836

805-
// visible for testing
806-
private static float roundToNearest(float samplingPercentage, boolean doNotLogWarnMessages) {
837+
@Nullable
838+
private static Double roundToNearest(
839+
@Nullable Double samplingPercentage, boolean doNotLogWarnMessages) {
840+
if (samplingPercentage == null) {
841+
return null;
842+
}
807843
if (samplingPercentage == 0) {
808-
return 0;
844+
return 0.0;
809845
}
810846
double itemCount = 100 / samplingPercentage;
811-
float rounded = 100.0f / Math.round(itemCount);
847+
double rounded = 100.0 / Math.round(itemCount);
812848

813849
if (Math.abs(samplingPercentage - rounded) >= 1) {
814850
// TODO include link to docs in this warning message

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/exporter/AgentLogExporter.java

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,22 @@
2828
import com.azure.monitor.opentelemetry.exporter.implementation.logging.OperationLogger;
2929
import com.azure.monitor.opentelemetry.exporter.implementation.models.TelemetryItem;
3030
import com.azure.monitor.opentelemetry.exporter.implementation.quickpulse.QuickPulse;
31+
import com.microsoft.applicationinsights.agent.internal.configuration.Configuration.SamplingOverride;
32+
import com.microsoft.applicationinsights.agent.internal.sampling.AiSampler;
33+
import com.microsoft.applicationinsights.agent.internal.sampling.SamplingOverrides;
3134
import com.microsoft.applicationinsights.agent.internal.telemetry.BatchItemProcessor;
3235
import com.microsoft.applicationinsights.agent.internal.telemetry.TelemetryClient;
3336
import com.microsoft.applicationinsights.agent.internal.telemetry.TelemetryObservers;
37+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
3438
import io.opentelemetry.api.trace.SpanContext;
3539
import io.opentelemetry.sdk.common.CompletableResultCode;
3640
import io.opentelemetry.sdk.logs.data.LogData;
3741
import io.opentelemetry.sdk.logs.data.Severity;
3842
import io.opentelemetry.sdk.logs.export.LogExporter;
43+
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
3944
import java.util.Collection;
45+
import java.util.List;
46+
import java.util.concurrent.ThreadLocalRandom;
4047
import java.util.function.Consumer;
4148
import javax.annotation.Nullable;
4249
import org.slf4j.Logger;
@@ -52,15 +59,21 @@ public class AgentLogExporter implements LogExporter {
5259
// TODO (trask) could implement this in a filtering LogExporter instead
5360
private volatile Severity threshold;
5461

62+
private final SamplingOverrides logSamplingOverrides;
63+
private final SamplingOverrides exceptionSamplingOverrides;
5564
private final LogDataMapper mapper;
5665
private final Consumer<TelemetryItem> telemetryItemConsumer;
5766

5867
public AgentLogExporter(
5968
Severity threshold,
69+
List<SamplingOverride> logSamplingOverrides,
70+
List<SamplingOverride> exceptionSamplingOverrides,
6071
LogDataMapper mapper,
6172
@Nullable QuickPulse quickPulse,
6273
BatchItemProcessor batchItemProcessor) {
6374
this.threshold = threshold;
75+
this.logSamplingOverrides = new SamplingOverrides(logSamplingOverrides);
76+
this.exceptionSamplingOverrides = new SamplingOverrides(exceptionSamplingOverrides);
6477
this.mapper = mapper;
6578
telemetryItemConsumer =
6679
telemetryItem -> {
@@ -86,18 +99,46 @@ public CompletableResultCode export(Collection<LogData> logs) {
8699
return CompletableResultCode.ofFailure();
87100
}
88101
for (LogData log : logs) {
89-
SpanContext spanContext = log.getSpanContext();
90-
if (spanContext.isValid() && !spanContext.getTraceFlags().isSampled()) {
91-
continue;
92-
}
93102
logger.debug("exporting log: {}", log);
94103
try {
95104
int severity = log.getSeverity().getSeverityNumber();
96105
int threshold = this.threshold.getSeverityNumber();
97106
if (severity < threshold) {
98107
continue;
99108
}
100-
mapper.map(log, telemetryItemConsumer);
109+
110+
String stack = log.getAttributes().get(SemanticAttributes.EXCEPTION_STACKTRACE);
111+
112+
SamplingOverrides samplingOverrides =
113+
stack != null ? exceptionSamplingOverrides : logSamplingOverrides;
114+
115+
SpanContext spanContext = log.getSpanContext();
116+
117+
boolean inRequest = spanContext.isValid();
118+
Double samplingPercentage =
119+
samplingOverrides.getOverridePercentage(inRequest, log.getAttributes());
120+
121+
if (samplingPercentage != null && !shouldSample(spanContext, samplingPercentage)) {
122+
continue;
123+
}
124+
125+
if (samplingPercentage == null
126+
&& spanContext.isValid()
127+
&& !spanContext.getTraceFlags().isSampled()) {
128+
// if there is no sampling override, and the log is part of an unsampled trace, then don't
129+
// capture it
130+
continue;
131+
}
132+
133+
Long itemCount = null;
134+
if (samplingPercentage != null) {
135+
// samplingPercentage cannot be 0 here
136+
itemCount = Math.round(100.0 / samplingPercentage);
137+
}
138+
139+
TelemetryItem telemetryItem = mapper.map(log, stack, itemCount);
140+
telemetryItemConsumer.accept(telemetryItem);
141+
101142
exportingLogLogger.recordSuccess();
102143
} catch (Throwable t) {
103144
exportingLogLogger.recordFailure(t.getMessage(), t, EXPORTER_MAPPING_ERROR);
@@ -116,4 +157,22 @@ public CompletableResultCode flush() {
116157
public CompletableResultCode shutdown() {
117158
return CompletableResultCode.ofSuccess();
118159
}
160+
161+
@SuppressFBWarnings(
162+
value = "SECPR", // Predictable pseudorandom number generator
163+
justification = "Predictable random is ok for sampling decision")
164+
private static boolean shouldSample(SpanContext spanContext, double percentage) {
165+
if (percentage == 100) {
166+
// optimization, no need to calculate score
167+
return true;
168+
}
169+
if (percentage == 0) {
170+
// optimization, no need to calculate score
171+
return false;
172+
}
173+
if (spanContext.isValid()) {
174+
return AiSampler.shouldRecordAndSample(spanContext.getTraceId(), percentage);
175+
}
176+
return ThreadLocalRandom.current().nextDouble() < percentage / 100;
177+
}
119178
}

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/init/RpConfigurationPolling.java

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import java.nio.file.Files;
3737
import java.nio.file.attribute.BasicFileAttributes;
3838
import java.nio.file.attribute.FileTime;
39+
import java.util.Objects;
3940
import java.util.concurrent.Executors;
4041
import org.slf4j.Logger;
4142
import org.slf4j.LoggerFactory;
@@ -100,24 +101,28 @@ public void run() {
100101
if (!newRpConfiguration.connectionString.equals(rpConfiguration.connectionString)) {
101102
logger.debug(
102103
"Connection string from the JSON config file is overriding the previously configured connection string.");
104+
configuration.connectionString = newRpConfiguration.connectionString;
103105
telemetryClient.updateConnectionStrings(
104-
newRpConfiguration.connectionString,
106+
configuration.connectionString,
105107
configuration.internal.statsbeat.instrumentationKey,
106108
configuration.internal.statsbeat.endpoint);
107109
appIdSupplier.updateAppId();
108110
}
109111

110-
if (newRpConfiguration.sampling.percentage != rpConfiguration.sampling.percentage) {
111-
logger.debug(
112-
"Updating sampling percentage from {} to {}",
113-
rpConfiguration.sampling.percentage,
114-
newRpConfiguration.sampling.percentage);
115-
float roundedSamplingPercentage =
116-
ConfigurationBuilder.roundToNearest(newRpConfiguration.sampling.percentage);
117-
DelegatingSampler.getInstance()
118-
.setDelegate(Samplers.getSampler(roundedSamplingPercentage, configuration));
119-
BytecodeUtilImpl.samplingPercentage = roundedSamplingPercentage;
120-
rpConfiguration.sampling.percentage = newRpConfiguration.sampling.percentage;
112+
if (!Objects.equals(
113+
newRpConfiguration.sampling.percentage, rpConfiguration.sampling.percentage)
114+
|| !Objects.equals(
115+
newRpConfiguration.sampling.limitPerSecond,
116+
rpConfiguration.sampling.limitPerSecond)) {
117+
logger.debug("Updating sampling percentage");
118+
configuration.sampling.percentage = newRpConfiguration.sampling.percentage;
119+
configuration.sampling.limitPerSecond = newRpConfiguration.sampling.limitPerSecond;
120+
DelegatingSampler.getInstance().setDelegate(Samplers.getSampler(configuration));
121+
if (configuration.sampling.percentage != null) {
122+
BytecodeUtilImpl.samplingPercentage = configuration.sampling.percentage.floatValue();
123+
} else {
124+
BytecodeUtilImpl.samplingPercentage = 100;
125+
}
121126
}
122127
rpConfiguration = newRpConfiguration;
123128
}

0 commit comments

Comments
 (0)