Skip to content

Commit bd15ec7

Browse files
committed
Rate limited sampling
Also changed mapping of unparented INTERNAL spans from Dependencies to REquests.
1 parent cfb9ca6 commit bd15ec7

File tree

47 files changed

+2355
-570
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2355
-570
lines changed

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/configuration/Configuration.java

Lines changed: 86 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ public void validate() {
7474
preview.validate();
7575
}
7676

77-
// TODO (trask) investigate options for mapping lowercase values to otel enum directly
77+
// used for now to still support sampling overrides v1
78+
@Deprecated
7879
public enum SpanKind {
7980
@JsonProperty("server")
8081
SERVER(io.opentelemetry.api.trace.SpanKind.SERVER),
@@ -151,11 +152,24 @@ public static class Role {
151152

152153
public static class Sampling {
153154

154-
public float percentage = 100;
155+
// simple story: fixed percentage of all telemetry (traceId-based if traceId is available)
156+
@Nullable public Float percentage;
157+
158+
// when this is set, percentage above is ignored, when neither is set, 5 req/sec is the default
159+
@Nullable public Double targetRequestsPerSecondLimit;
160+
161+
// TODO (trask) print sampling percentages when it changes, make sure that with small adaption
162+
// it never goes tooo low in which case consistent sampling across nodes could be compromised
155163
}
156164

157165
public static class SamplingPreview {
158166

167+
// used for now to still support sampling overrides v1
168+
@Deprecated public List<SamplingOverrideV1> overrides = new ArrayList<>();
169+
170+
@JsonProperty("overrides-v2")
171+
public SamplingOverridesV2Configuration newOverrides = new SamplingOverridesV2Configuration();
172+
159173
// this is not the default for now at least, because
160174
//
161175
// parent not-sampled -> child not-sampled (always, to avoid broken traces)
@@ -177,8 +191,15 @@ public static class SamplingPreview {
177191
//
178192
// IMPORTANT if changing this default, we need to keep it at least on Azure Functions
179193
public boolean parentBased;
194+
}
195+
196+
public static class SamplingOverridesV2Configuration {
180197

181-
public List<SamplingOverride> overrides = new ArrayList<>();
198+
public List<SamplingOverrideV2> requests = new ArrayList<>();
199+
public List<SamplingOverrideV2> dependencies = new ArrayList<>();
200+
public List<SamplingOverrideV2> logs = new ArrayList<>();
201+
public List<SamplingOverrideV2> exceptions = new ArrayList<>();
202+
// FUTURE: public List<SamplingOverride> customEvents = new ArrayList<>();
182203
}
183204

184205
public static class JmxMetric {
@@ -352,9 +373,21 @@ public static class PreviewConfiguration {
352373
new HashSet<>(asList("b3", "b3multi"));
353374

354375
public void validate() {
355-
for (Configuration.SamplingOverride samplingOverride : sampling.overrides) {
376+
for (SamplingOverrideV1 samplingOverride : sampling.overrides) {
356377
samplingOverride.validate();
357378
}
379+
for (SamplingOverrideV2 samplingOverride : sampling.newOverrides.requests) {
380+
samplingOverride.validate(false);
381+
}
382+
for (SamplingOverrideV2 samplingOverride : sampling.newOverrides.dependencies) {
383+
samplingOverride.validate(true);
384+
}
385+
for (SamplingOverrideV2 samplingOverride : sampling.newOverrides.logs) {
386+
samplingOverride.validate(true);
387+
}
388+
for (SamplingOverrideV2 samplingOverride : sampling.newOverrides.exceptions) {
389+
samplingOverride.validate(true);
390+
}
358391
for (Configuration.InstrumentationKeyOverride instrumentationKeyOverride :
359392
instrumentationKeyOverrides) {
360393
instrumentationKeyOverride.validate();
@@ -577,7 +610,9 @@ private static boolean isRuntimeAttached() {
577610
return Boolean.getBoolean("applicationinsights.internal.runtime.attached");
578611
}
579612

580-
public static class SamplingOverride {
613+
// used for now to still support sampling overrides v1
614+
@Deprecated
615+
public static class SamplingOverrideV1 {
581616
// TODO (trask) consider making this required when moving out of preview
582617
@Nullable public SpanKind spanKind;
583618
// not using include/exclude, because you can still get exclude with this by adding a second
@@ -612,6 +647,52 @@ public void validate() {
612647
}
613648
}
614649

650+
public static class SamplingOverrideV2 {
651+
// not using include/exclude, because you can still get exclude with this by adding a second
652+
// (exclude) override above it (since only the first matching override is used)
653+
public List<SamplingOverrideAttribute> attributes = new ArrayList<>();
654+
@Nullable public Double targetPerSecondLimit;
655+
@Nullable public Float fixedPercentage;
656+
657+
@Nullable public Boolean standalone;
658+
659+
public void validate(boolean allowStandalone) {
660+
if (targetPerSecondLimit == null && fixedPercentage == null) {
661+
// TODO add doc and go link, similar to telemetry processors
662+
throw new FriendlyException(
663+
"A sampling override configuration is missing one of \"targetPerSecondLimit\" or"
664+
+ " \"fixedPercentage\".",
665+
"Please provide one of \"targetPerSecondLimit\" or \"fixedPercentage\" for the sampling"
666+
+ " override configuration.");
667+
}
668+
if (targetPerSecondLimit != null && fixedPercentage != null) {
669+
// TODO add doc and go link, similar to telemetry processors
670+
throw new FriendlyException(
671+
"A sampling override configuration has both a \"targetPerSecondLimit\" and a"
672+
+ " \"fixedPercentage\".",
673+
"Please provide only one of \"targetPerSecondLimit\" or \"fixedPercentage\" for the"
674+
+ " sampling override configuration.");
675+
}
676+
if (fixedPercentage != null && (fixedPercentage < 0 || fixedPercentage > 100)) {
677+
// TODO add doc and go link, similar to telemetry processors
678+
throw new FriendlyException(
679+
"A sampling override configuration has a \"fixedPercentage\" that is not between 0 and"
680+
+ " 100.",
681+
"Please provide a \"fixedPercentage\" that is between 0 and 100 for the sampling"
682+
+ " override configuration.");
683+
}
684+
if (standalone != null && !allowStandalone) {
685+
// TODO add doc and go link, similar to telemetry processors
686+
throw new FriendlyException(
687+
"A sampling \"requestOverride\" configuration has specified \"standalone\".",
688+
"Please remove \"standalone\" from the \"requestOverride\" configuration.");
689+
}
690+
for (SamplingOverrideAttribute attribute : attributes) {
691+
attribute.validate();
692+
}
693+
}
694+
}
695+
615696
public static class SamplingOverrideAttribute {
616697
public String key;
617698
@Nullable public String value;

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/configuration/ConfigurationBuilder.java

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
import com.microsoft.applicationinsights.agent.bootstrap.diagnostics.DiagnosticsHelper;
3333
import com.microsoft.applicationinsights.agent.internal.common.FriendlyException;
3434
import com.microsoft.applicationinsights.agent.internal.configuration.Configuration.JmxMetric;
35-
import com.microsoft.applicationinsights.agent.internal.configuration.Configuration.SamplingOverride;
35+
import com.microsoft.applicationinsights.agent.internal.configuration.Configuration.SamplingOverrideV1;
3636
import java.io.IOException;
3737
import java.io.InputStream;
3838
import java.net.URL;
@@ -181,19 +181,35 @@ public static Configuration create(Path agentJarPath, @Nullable RpConfiguration
181181
+ " and it is now enabled by default,"
182182
+ " so no need to enable it under preview configuration");
183183
}
184+
if (!config.preview.sampling.overrides.isEmpty()) {
185+
configurationLogger.warn(
186+
"\"preview\": { \"sampling\": { \"overrides\": ... } } has been deprecated,"
187+
+ " and support for it will be removed in a future release, please transition to"
188+
+ " \"preview\": { \"sampling\": { \"requestOverrides\": ... } } and"
189+
+ " \"preview\": { \"sampling\": { \"dependencyOverrides\": ... } }");
190+
}
191+
184192
logWarningIfUsingInternalAttributes(config);
185193

186194
overlayFromEnv(config, agentJarPath.getParent());
187-
config.sampling.percentage = roundToNearest(config.sampling.percentage, true);
188-
for (SamplingOverride override : config.preview.sampling.overrides) {
189-
override.percentage = roundToNearest(override.percentage, true);
195+
if (config.sampling.percentage != null) {
196+
config.sampling.percentage = roundToNearest(config.sampling.percentage, true);
190197
}
198+
roundFixedPercentageToNearest(config.preview.sampling.newOverrides.requests);
199+
roundFixedPercentageToNearest(config.preview.sampling.newOverrides.dependencies);
200+
roundFixedPercentageToNearest(config.preview.sampling.newOverrides.logs);
201+
roundFixedPercentageToNearest(config.preview.sampling.newOverrides.exceptions);
191202
// rp configuration should always be last (so it takes precedence)
192203
// currently applicationinsights-rp.json is only used by Azure Spring Cloud
193204
if (rpConfiguration != null) {
194205
overlayFromEnv(rpConfiguration);
195206
overlayRpConfiguration(config, rpConfiguration);
196207
}
208+
// only fall back to default sampling configuration after all overlays have been performed
209+
if (config.sampling.targetRequestsPerSecondLimit == null
210+
&& config.sampling.percentage == null) {
211+
config.sampling.targetRequestsPerSecondLimit = 5.0;
212+
}
197213
// only set role instance to host name as a last resort
198214
if (config.role.instance == null) {
199215
String hostname = HostName.get();
@@ -202,6 +218,15 @@ public static Configuration create(Path agentJarPath, @Nullable RpConfiguration
202218
return config;
203219
}
204220

221+
private static void roundFixedPercentageToNearest(
222+
List<Configuration.SamplingOverrideV2> overrides) {
223+
for (Configuration.SamplingOverrideV2 override : overrides) {
224+
if (override.fixedPercentage != null) {
225+
override.fixedPercentage = roundToNearest(override.fixedPercentage, true);
226+
}
227+
}
228+
}
229+
205230
private static void logWarningIfUsingInternalAttributes(Configuration config) {
206231
for (Configuration.ProcessorConfig processor : config.preview.processors) {
207232
if (processor.include != null) {
@@ -219,7 +244,7 @@ private static void logWarningIfUsingInternalAttributes(Configuration config) {
219244
}
220245
}
221246
}
222-
for (SamplingOverride override : config.preview.sampling.overrides) {
247+
for (SamplingOverrideV1 override : config.preview.sampling.overrides) {
223248
for (Configuration.SamplingOverrideAttribute attribute : override.attributes) {
224249
logWarningIfUsingInternalAttributes(attribute.key);
225250
}
@@ -466,10 +491,9 @@ static void overlayFromEnv(Configuration config, Path baseDir) throws IOExceptio
466491
APPLICATIONINSIGHTS_SELF_DIAGNOSTICS_FILE_PATH, config.selfDiagnostics.file.path);
467492

468493
config.preview.metricIntervalSeconds =
469-
(int)
470-
overlayWithEnvVar(
471-
APPLICATIONINSIGHTS_PREVIEW_METRIC_INTERVAL_SECONDS,
472-
config.preview.metricIntervalSeconds);
494+
overlayWithEnvVar(
495+
APPLICATIONINSIGHTS_PREVIEW_METRIC_INTERVAL_SECONDS,
496+
config.preview.metricIntervalSeconds);
473497

474498
config.preview.instrumentation.springIntegration.enabled =
475499
overlayWithEnvVar(
@@ -576,6 +600,8 @@ static void overlayRpConfiguration(Configuration config, RpConfiguration rpConfi
576600
}
577601
if (rpConfiguration.sampling != null) {
578602
config.sampling.percentage = rpConfiguration.sampling.percentage;
603+
config.sampling.targetRequestsPerSecondLimit =
604+
rpConfiguration.sampling.targetRequestsPerSecondLimit;
579605
}
580606
if (isTrimEmpty(config.role.name)) {
581607
// only use rp configuration role name as a fallback, similar to WEBSITE_SITE_NAME
@@ -626,7 +652,8 @@ public static String overlayWithEnvVar(String name, String defaultValue) {
626652
return defaultValue;
627653
}
628654

629-
static float overlayWithEnvVar(String name, float defaultValue) {
655+
@Nullable
656+
static Float overlayWithEnvVar(String name, @Nullable Float defaultValue) {
630657
String value = getEnvVar(name);
631658
if (value != null) {
632659
configurationLogger.debug("using environment variable: {}", name);
@@ -637,6 +664,17 @@ static float overlayWithEnvVar(String name, float defaultValue) {
637664
return defaultValue;
638665
}
639666

667+
static int overlayWithEnvVar(String name, int defaultValue) {
668+
String value = getEnvVar(name);
669+
if (value != null) {
670+
configurationLogger.debug("using environment variable: {}", name);
671+
// intentionally allowing NumberFormatException to bubble up as invalid configuration and
672+
// prevent agent from starting
673+
return Integer.parseInt(value);
674+
}
675+
return defaultValue;
676+
}
677+
640678
static boolean overlayWithEnvVar(String name, boolean defaultValue) {
641679
String value = getEnvVar(name);
642680
if (value != null) {
@@ -802,7 +840,6 @@ public static float roundToNearest(float samplingPercentage) {
802840
return roundToNearest(samplingPercentage, false);
803841
}
804842

805-
// visible for testing
806843
private static float roundToNearest(float samplingPercentage, boolean doNotLogWarnMessages) {
807844
if (samplingPercentage == 0) {
808845
return 0;

agent/agent-tooling/src/main/java/com/microsoft/applicationinsights/agent/internal/exporter/AgentLogExporter.java

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,22 @@
2828
import com.azure.monitor.opentelemetry.exporter.implementation.logging.OperationLogger;
2929
import com.azure.monitor.opentelemetry.exporter.implementation.models.TelemetryItem;
3030
import com.azure.monitor.opentelemetry.exporter.implementation.quickpulse.QuickPulse;
31+
import com.microsoft.applicationinsights.agent.internal.configuration.Configuration;
32+
import com.microsoft.applicationinsights.agent.internal.sampling.AiSampler;
33+
import com.microsoft.applicationinsights.agent.internal.sampling.SamplingOverridesV2;
3134
import com.microsoft.applicationinsights.agent.internal.telemetry.BatchItemProcessor;
3235
import com.microsoft.applicationinsights.agent.internal.telemetry.TelemetryClient;
3336
import com.microsoft.applicationinsights.agent.internal.telemetry.TelemetryObservers;
37+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
3438
import io.opentelemetry.api.trace.SpanContext;
3539
import io.opentelemetry.sdk.common.CompletableResultCode;
3640
import io.opentelemetry.sdk.logs.data.LogData;
3741
import io.opentelemetry.sdk.logs.data.Severity;
3842
import io.opentelemetry.sdk.logs.export.LogExporter;
43+
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
3944
import java.util.Collection;
45+
import java.util.List;
46+
import java.util.concurrent.ThreadLocalRandom;
4047
import java.util.function.Consumer;
4148
import javax.annotation.Nullable;
4249
import org.slf4j.Logger;
@@ -52,15 +59,21 @@ public class AgentLogExporter implements LogExporter {
5259
// TODO (trask) could implement this in a filtering LogExporter instead
5360
private volatile Severity threshold;
5461

62+
private final SamplingOverridesV2 logSamplingOverrides;
63+
private final SamplingOverridesV2 exceptionSamplingOverrides;
5564
private final LogDataMapper mapper;
5665
private final Consumer<TelemetryItem> telemetryItemConsumer;
5766

5867
public AgentLogExporter(
5968
Severity threshold,
69+
List<Configuration.SamplingOverrideV2> logSamplingOverrides,
70+
List<Configuration.SamplingOverrideV2> exceptionSamplingOverrides,
6071
LogDataMapper mapper,
6172
@Nullable QuickPulse quickPulse,
6273
BatchItemProcessor batchItemProcessor) {
6374
this.threshold = threshold;
75+
this.logSamplingOverrides = new SamplingOverridesV2(logSamplingOverrides);
76+
this.exceptionSamplingOverrides = new SamplingOverridesV2(exceptionSamplingOverrides);
6477
this.mapper = mapper;
6578
telemetryItemConsumer =
6679
telemetryItem -> {
@@ -86,18 +99,44 @@ public CompletableResultCode export(Collection<LogData> logs) {
8699
return CompletableResultCode.ofFailure();
87100
}
88101
for (LogData log : logs) {
89-
SpanContext spanContext = log.getSpanContext();
90-
if (spanContext.isValid() && !spanContext.getTraceFlags().isSampled()) {
91-
continue;
92-
}
93102
logger.debug("exporting log: {}", log);
94103
try {
95104
int severity = log.getSeverity().getSeverityNumber();
96105
int threshold = this.threshold.getSeverityNumber();
97106
if (severity < threshold) {
98107
continue;
99108
}
100-
mapper.map(log, telemetryItemConsumer);
109+
110+
String stack = log.getAttributes().get(SemanticAttributes.EXCEPTION_STACKTRACE);
111+
112+
SamplingOverridesV2 samplingOverrides =
113+
stack != null ? exceptionSamplingOverrides : logSamplingOverrides;
114+
115+
Float samplingPercentage = samplingOverrides.getOverridePercentage(log.getAttributes());
116+
117+
SpanContext spanContext = log.getSpanContext();
118+
119+
if (samplingPercentage != null && !shouldSample(spanContext, samplingPercentage)) {
120+
continue;
121+
}
122+
123+
if (samplingPercentage == null
124+
&& spanContext.isValid()
125+
&& !spanContext.getTraceFlags().isSampled()) {
126+
// if there is no sampling override, and the log is part of an unsampled trace, then don't
127+
// capture it
128+
continue;
129+
}
130+
131+
Long itemCount = null;
132+
if (samplingPercentage != null) {
133+
// samplingPercentage cannot be 0 here
134+
itemCount = Math.round(100.0 / samplingPercentage);
135+
}
136+
137+
TelemetryItem telemetryItem = mapper.map(log, stack, itemCount);
138+
telemetryItemConsumer.accept(telemetryItem);
139+
101140
exportingLogLogger.recordSuccess();
102141
} catch (Throwable t) {
103142
exportingLogLogger.recordFailure(t.getMessage(), t, EXPORTER_MAPPING_ERROR);
@@ -116,4 +155,22 @@ public CompletableResultCode flush() {
116155
public CompletableResultCode shutdown() {
117156
return CompletableResultCode.ofSuccess();
118157
}
158+
159+
@SuppressFBWarnings(
160+
value = "SECPR", // Predictable pseudorandom number generator
161+
justification = "Predictable random is ok for sampling decision")
162+
private static boolean shouldSample(SpanContext spanContext, float percentage) {
163+
if (percentage == 100) {
164+
// optimization, no need to calculate score
165+
return true;
166+
}
167+
if (percentage == 0) {
168+
// optimization, no need to calculate score
169+
return false;
170+
}
171+
if (spanContext.isValid()) {
172+
return AiSampler.shouldRecordAndSample(spanContext.getTraceId(), percentage);
173+
}
174+
return ThreadLocalRandom.current().nextDouble() < percentage / 100;
175+
}
119176
}

0 commit comments

Comments
 (0)