Skip to content

Commit 8bb4b5b

Browse files
committed
Rate limited sampling
1 parent b13a24f commit 8bb4b5b

File tree

83 files changed

+1664
-316
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+1664
-316
lines changed

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/configuration/Configuration.java

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public void validate() {
7474
preview.validate();
7575
}
7676

77-
// TODO (trask) investigate options for mapping lowercase values to otel enum directly
77+
@Deprecated
7878
public enum SpanKind {
7979
@JsonProperty("server")
8080
SERVER(io.opentelemetry.api.trace.SpanKind.SERVER),
@@ -94,6 +94,17 @@ public enum SpanKind {
9494
}
9595
}
9696

97+
public enum TelemetryKind {
98+
@JsonProperty("request")
99+
REQUEST,
100+
@JsonProperty("dependency")
101+
DEPENDENCY,
102+
@JsonProperty("trace")
103+
TRACE,
104+
@JsonProperty("exception")
105+
EXCEPTION
106+
}
107+
97108
public enum MatchType {
98109
@JsonProperty("strict")
99110
STRICT,
@@ -151,7 +162,11 @@ public static class Role {
151162

152163
public static class Sampling {
153164

154-
public float percentage = 100;
165+
// fixed percentage of requests
166+
@Nullable public Double percentage;
167+
168+
// default is 5 requests per second
169+
@Nullable public Double limitPerSecond;
155170
}
156171

157172
public static class SamplingPreview {
@@ -175,6 +190,8 @@ public static class SamplingPreview {
175190
// Another (lesser) reason is because .NET SDK always propagates trace flags "00" (not
176191
// sampled)
177192
//
193+
// future goal: make parentBased sampling the default if item count is received via tracestate
194+
//
178195
// IMPORTANT if changing this default, we need to keep it at least on Azure Functions
179196
public boolean parentBased;
180197

@@ -352,7 +369,7 @@ public static class PreviewConfiguration {
352369
new HashSet<>(asList("b3", "b3multi"));
353370

354371
public void validate() {
355-
for (Configuration.SamplingOverride samplingOverride : sampling.overrides) {
372+
for (SamplingOverride samplingOverride : sampling.overrides) {
356373
samplingOverride.validate();
357374
}
358375
for (Configuration.InstrumentationKeyOverride instrumentationKeyOverride :
@@ -578,22 +595,34 @@ private static boolean isRuntimeAttached() {
578595
}
579596

580597
public static class SamplingOverride {
581-
// TODO (trask) consider making this required when moving out of preview
582-
@Nullable public SpanKind spanKind;
598+
@Deprecated @Nullable public SpanKind spanKind;
599+
600+
// TODO (trask) make this required when moving out of preview
601+
// for now the default is both "request" and "dependency" for backwards compatibility
602+
@Nullable public TelemetryKind telemetryKind;
603+
604+
// TODO (trask) should this be named "standalone" (and meaning flipped)
605+
// TODO (trask) need to add test for this
606+
public boolean inRequest = true;
607+
583608
// not using include/exclude, because you can still get exclude with this by adding a second
584609
// (exclude) override above it
585610
// (since only the first matching override is used)
586611
public List<SamplingOverrideAttribute> attributes = new ArrayList<>();
587-
public Float percentage;
612+
public Double percentage;
588613
public String id; // optional, used for debugging purposes only
589614

615+
public boolean isForRequestTelemetry() {
616+
return telemetryKind == TelemetryKind.REQUEST
617+
|| (telemetryKind == null && spanKind != SpanKind.CLIENT);
618+
}
619+
620+
public boolean isForDependencyTelemetry() {
621+
return telemetryKind == TelemetryKind.DEPENDENCY
622+
|| (telemetryKind == null && spanKind != SpanKind.SERVER);
623+
}
624+
590625
public void validate() {
591-
if (spanKind == null && attributes.isEmpty()) {
592-
// TODO add doc and go link, similar to telemetry processors
593-
throw new FriendlyException(
594-
"A sampling override configuration is missing \"spanKind\" and has no attributes.",
595-
"Please provide at least one of \"spanKind\" or \"attributes\" for the sampling override configuration.");
596-
}
597626
if (percentage == null) {
598627
// TODO add doc and go link, similar to telemetry processors
599628
throw new FriendlyException(

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/configuration/ConfigurationBuilder.java

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ public class ConfigurationBuilder {
8383
private static final String APPLICATIONINSIGHTS_SAMPLING_PERCENTAGE =
8484
"APPLICATIONINSIGHTS_SAMPLING_PERCENTAGE";
8585

86+
private static final String APPLICATIONINSIGHTS_SAMPLING_LIMIT_PER_SECOND =
87+
"APPLICATIONINSIGHTS_SAMPLING_LIMIT_PER_SECOND";
88+
8689
private static final String APPLICATIONINSIGHTS_INSTRUMENTATION_LOGGING_LEVEL =
8790
"APPLICATIONINSIGHTS_INSTRUMENTATION_LOGGING_LEVEL";
8891

@@ -181,6 +184,15 @@ public static Configuration create(Path agentJarPath, @Nullable RpConfiguration
181184
+ " and it is now enabled by default,"
182185
+ " so no need to enable it under preview configuration");
183186
}
187+
for (SamplingOverride override : config.preview.sampling.overrides) {
188+
if (override.spanKind != null) {
189+
configurationLogger.warn(
190+
"Sampling overrides \"spanKind\" has been deprecated,"
191+
+ " and support for it will be removed in a future release, please transition from"
192+
+ " \"spanKind\" to \"telemetryKind\".");
193+
}
194+
}
195+
184196
logWarningIfUsingInternalAttributes(config);
185197

186198
overlayFromEnv(config, agentJarPath.getParent());
@@ -194,6 +206,10 @@ public static Configuration create(Path agentJarPath, @Nullable RpConfiguration
194206
overlayFromEnv(rpConfiguration);
195207
overlayRpConfiguration(config, rpConfiguration);
196208
}
209+
// only fall back to default sampling configuration after all overlays have been performed
210+
if (config.sampling.limitPerSecond == null && config.sampling.percentage == null) {
211+
config.sampling.limitPerSecond = 5.0;
212+
}
197213
// only set role instance to host name as a last resort
198214
if (config.role.instance == null) {
199215
String hostname = HostName.get();
@@ -457,6 +473,10 @@ static void overlayFromEnv(Configuration config, Path baseDir) throws IOExceptio
457473
config.sampling.percentage =
458474
overlayWithEnvVar(APPLICATIONINSIGHTS_SAMPLING_PERCENTAGE, config.sampling.percentage);
459475

476+
config.sampling.limitPerSecond =
477+
overlayWithEnvVar(
478+
APPLICATIONINSIGHTS_SAMPLING_LIMIT_PER_SECOND, config.sampling.limitPerSecond);
479+
460480
config.proxy = overlayProxyFromEnv(config.proxy);
461481

462482
config.selfDiagnostics.level =
@@ -466,10 +486,9 @@ static void overlayFromEnv(Configuration config, Path baseDir) throws IOExceptio
466486
APPLICATIONINSIGHTS_SELF_DIAGNOSTICS_FILE_PATH, config.selfDiagnostics.file.path);
467487

468488
config.preview.metricIntervalSeconds =
469-
(int)
470-
overlayWithEnvVar(
471-
APPLICATIONINSIGHTS_PREVIEW_METRIC_INTERVAL_SECONDS,
472-
config.preview.metricIntervalSeconds);
489+
overlayWithEnvVar(
490+
APPLICATIONINSIGHTS_PREVIEW_METRIC_INTERVAL_SECONDS,
491+
config.preview.metricIntervalSeconds);
473492

474493
config.preview.instrumentation.springIntegration.enabled =
475494
overlayWithEnvVar(
@@ -576,6 +595,7 @@ static void overlayRpConfiguration(Configuration config, RpConfiguration rpConfi
576595
}
577596
if (rpConfiguration.sampling != null) {
578597
config.sampling.percentage = rpConfiguration.sampling.percentage;
598+
config.sampling.limitPerSecond = rpConfiguration.sampling.limitPerSecond;
579599
}
580600
if (isTrimEmpty(config.role.name)) {
581601
// only use rp configuration role name as a fallback, similar to WEBSITE_SITE_NAME
@@ -626,13 +646,25 @@ public static String overlayWithEnvVar(String name, String defaultValue) {
626646
return defaultValue;
627647
}
628648

629-
static float overlayWithEnvVar(String name, float defaultValue) {
649+
@Nullable
650+
static Double overlayWithEnvVar(String name, @Nullable Double defaultValue) {
651+
String value = getEnvVar(name);
652+
if (value != null) {
653+
configurationLogger.debug("using environment variable: {}", name);
654+
// intentionally allowing NumberFormatException to bubble up as invalid configuration and
655+
// prevent agent from starting
656+
return Double.parseDouble(value);
657+
}
658+
return defaultValue;
659+
}
660+
661+
static int overlayWithEnvVar(String name, int defaultValue) {
630662
String value = getEnvVar(name);
631663
if (value != null) {
632664
configurationLogger.debug("using environment variable: {}", name);
633665
// intentionally allowing NumberFormatException to bubble up as invalid configuration and
634666
// prevent agent from starting
635-
return Float.parseFloat(value);
667+
return Integer.parseInt(value);
636668
}
637669
return defaultValue;
638670
}
@@ -798,17 +830,21 @@ public static Configuration loadJsonConfigFile(Path configPath) throws IOExcepti
798830
}
799831

800832
// this is for external callers, where logging is ok
801-
public static float roundToNearest(float samplingPercentage) {
833+
public static double roundToNearest(double samplingPercentage) {
802834
return roundToNearest(samplingPercentage, false);
803835
}
804836

805-
// visible for testing
806-
private static float roundToNearest(float samplingPercentage, boolean doNotLogWarnMessages) {
837+
@Nullable
838+
private static Double roundToNearest(
839+
@Nullable Double samplingPercentage, boolean doNotLogWarnMessages) {
840+
if (samplingPercentage == null) {
841+
return null;
842+
}
807843
if (samplingPercentage == 0) {
808-
return 0;
844+
return 0.0;
809845
}
810846
double itemCount = 100 / samplingPercentage;
811-
float rounded = 100.0f / Math.round(itemCount);
847+
double rounded = 100.0 / Math.round(itemCount);
812848

813849
if (Math.abs(samplingPercentage - rounded) >= 1) {
814850
// TODO include link to docs in this warning message

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/exporter/AgentLogExporter.java

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,22 @@
2828
import com.azure.monitor.opentelemetry.exporter.implementation.logging.OperationLogger;
2929
import com.azure.monitor.opentelemetry.exporter.implementation.models.TelemetryItem;
3030
import com.azure.monitor.opentelemetry.exporter.implementation.quickpulse.QuickPulse;
31+
import com.microsoft.applicationinsights.agent.internal.configuration.Configuration.SamplingOverride;
32+
import com.microsoft.applicationinsights.agent.internal.sampling.AiSampler;
33+
import com.microsoft.applicationinsights.agent.internal.sampling.SamplingOverrides;
3134
import com.microsoft.applicationinsights.agent.internal.telemetry.BatchItemProcessor;
3235
import com.microsoft.applicationinsights.agent.internal.telemetry.TelemetryClient;
3336
import com.microsoft.applicationinsights.agent.internal.telemetry.TelemetryObservers;
37+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
3438
import io.opentelemetry.api.trace.SpanContext;
3539
import io.opentelemetry.sdk.common.CompletableResultCode;
3640
import io.opentelemetry.sdk.logs.data.LogData;
3741
import io.opentelemetry.sdk.logs.data.Severity;
3842
import io.opentelemetry.sdk.logs.export.LogExporter;
43+
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
3944
import java.util.Collection;
45+
import java.util.List;
46+
import java.util.concurrent.ThreadLocalRandom;
4047
import java.util.function.Consumer;
4148
import javax.annotation.Nullable;
4249
import org.slf4j.Logger;
@@ -52,15 +59,21 @@ public class AgentLogExporter implements LogExporter {
5259
// TODO (trask) could implement this in a filtering LogExporter instead
5360
private volatile Severity threshold;
5461

62+
private final SamplingOverrides logSamplingOverrides;
63+
private final SamplingOverrides exceptionSamplingOverrides;
5564
private final LogDataMapper mapper;
5665
private final Consumer<TelemetryItem> telemetryItemConsumer;
5766

5867
public AgentLogExporter(
5968
Severity threshold,
69+
List<SamplingOverride> logSamplingOverrides,
70+
List<SamplingOverride> exceptionSamplingOverrides,
6071
LogDataMapper mapper,
6172
@Nullable QuickPulse quickPulse,
6273
BatchItemProcessor batchItemProcessor) {
6374
this.threshold = threshold;
75+
this.logSamplingOverrides = new SamplingOverrides(logSamplingOverrides);
76+
this.exceptionSamplingOverrides = new SamplingOverrides(exceptionSamplingOverrides);
6477
this.mapper = mapper;
6578
telemetryItemConsumer =
6679
telemetryItem -> {
@@ -86,18 +99,44 @@ public CompletableResultCode export(Collection<LogData> logs) {
8699
return CompletableResultCode.ofFailure();
87100
}
88101
for (LogData log : logs) {
89-
SpanContext spanContext = log.getSpanContext();
90-
if (spanContext.isValid() && !spanContext.getTraceFlags().isSampled()) {
91-
continue;
92-
}
93102
logger.debug("exporting log: {}", log);
94103
try {
95104
int severity = log.getSeverity().getSeverityNumber();
96105
int threshold = this.threshold.getSeverityNumber();
97106
if (severity < threshold) {
98107
continue;
99108
}
100-
mapper.map(log, telemetryItemConsumer);
109+
110+
String stack = log.getAttributes().get(SemanticAttributes.EXCEPTION_STACKTRACE);
111+
112+
SamplingOverrides samplingOverrides =
113+
stack != null ? exceptionSamplingOverrides : logSamplingOverrides;
114+
115+
Double samplingPercentage = samplingOverrides.getOverridePercentage(log.getAttributes());
116+
117+
SpanContext spanContext = log.getSpanContext();
118+
119+
if (samplingPercentage != null && !shouldSample(spanContext, samplingPercentage)) {
120+
continue;
121+
}
122+
123+
if (samplingPercentage == null
124+
&& spanContext.isValid()
125+
&& !spanContext.getTraceFlags().isSampled()) {
126+
// if there is no sampling override, and the log is part of an unsampled trace, then don't
127+
// capture it
128+
continue;
129+
}
130+
131+
Long itemCount = null;
132+
if (samplingPercentage != null) {
133+
// samplingPercentage cannot be 0 here
134+
itemCount = Math.round(100.0 / samplingPercentage);
135+
}
136+
137+
TelemetryItem telemetryItem = mapper.map(log, stack, itemCount);
138+
telemetryItemConsumer.accept(telemetryItem);
139+
101140
exportingLogLogger.recordSuccess();
102141
} catch (Throwable t) {
103142
exportingLogLogger.recordFailure(t.getMessage(), t, EXPORTER_MAPPING_ERROR);
@@ -116,4 +155,22 @@ public CompletableResultCode flush() {
116155
public CompletableResultCode shutdown() {
117156
return CompletableResultCode.ofSuccess();
118157
}
158+
159+
@SuppressFBWarnings(
160+
value = "SECPR", // Predictable pseudorandom number generator
161+
justification = "Predictable random is ok for sampling decision")
162+
private static boolean shouldSample(SpanContext spanContext, double percentage) {
163+
if (percentage == 100) {
164+
// optimization, no need to calculate score
165+
return true;
166+
}
167+
if (percentage == 0) {
168+
// optimization, no need to calculate score
169+
return false;
170+
}
171+
if (spanContext.isValid()) {
172+
return AiSampler.shouldRecordAndSample(spanContext.getTraceId(), percentage);
173+
}
174+
return ThreadLocalRandom.current().nextDouble() < percentage / 100;
175+
}
119176
}

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/init/RpConfigurationPolling.java

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import java.nio.file.Files;
3737
import java.nio.file.attribute.BasicFileAttributes;
3838
import java.nio.file.attribute.FileTime;
39+
import java.util.Objects;
3940
import java.util.concurrent.Executors;
4041
import org.slf4j.Logger;
4142
import org.slf4j.LoggerFactory;
@@ -100,24 +101,28 @@ public void run() {
100101
if (!newRpConfiguration.connectionString.equals(rpConfiguration.connectionString)) {
101102
logger.debug(
102103
"Connection string from the JSON config file is overriding the previously configured connection string.");
104+
configuration.connectionString = newRpConfiguration.connectionString;
103105
telemetryClient.updateConnectionStrings(
104-
newRpConfiguration.connectionString,
106+
configuration.connectionString,
105107
configuration.internal.statsbeat.instrumentationKey,
106108
configuration.internal.statsbeat.endpoint);
107109
appIdSupplier.updateAppId();
108110
}
109111

110-
if (newRpConfiguration.sampling.percentage != rpConfiguration.sampling.percentage) {
111-
logger.debug(
112-
"Updating sampling percentage from {} to {}",
113-
rpConfiguration.sampling.percentage,
114-
newRpConfiguration.sampling.percentage);
115-
float roundedSamplingPercentage =
116-
ConfigurationBuilder.roundToNearest(newRpConfiguration.sampling.percentage);
117-
DelegatingSampler.getInstance()
118-
.setDelegate(Samplers.getSampler(roundedSamplingPercentage, configuration));
119-
BytecodeUtilImpl.samplingPercentage = roundedSamplingPercentage;
120-
rpConfiguration.sampling.percentage = newRpConfiguration.sampling.percentage;
112+
if (!Objects.equals(
113+
newRpConfiguration.sampling.percentage, rpConfiguration.sampling.percentage)
114+
|| !Objects.equals(
115+
newRpConfiguration.sampling.limitPerSecond,
116+
rpConfiguration.sampling.limitPerSecond)) {
117+
logger.debug("Updating sampling percentage");
118+
configuration.sampling.percentage = newRpConfiguration.sampling.percentage;
119+
configuration.sampling.limitPerSecond = newRpConfiguration.sampling.limitPerSecond;
120+
DelegatingSampler.getInstance().setDelegate(Samplers.getSampler(configuration));
121+
if (configuration.sampling.percentage != null) {
122+
BytecodeUtilImpl.samplingPercentage = configuration.sampling.percentage.floatValue();
123+
} else {
124+
BytecodeUtilImpl.samplingPercentage = 100;
125+
}
121126
}
122127
rpConfiguration = newRpConfiguration;
123128
}

0 commit comments

Comments
 (0)