Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
8a59b29
UTF8 caching for v0.4
dougqh Aug 28, 2025
68fdcb9
spotless
dougqh Aug 28, 2025
95767a6
Tweaking comments
dougqh Aug 28, 2025
5270f9c
Tweaking comments
dougqh Aug 28, 2025
69c4983
Comparing results with caching off
dougqh Aug 28, 2025
d725543
Merge branch 'master' into dougqh/utf8-caching
dougqh Aug 28, 2025
ebc3fb0
Fixing silly oversight when cache is disabled
dougqh Aug 29, 2025
247bb02
Adding comments about benchmark data being used
dougqh Aug 29, 2025
69c94d1
Misc improvements
dougqh Aug 29, 2025
d017b02
Merge branch 'master' into dougqh/utf8-caching
dougqh Aug 29, 2025
01aa284
Tweaking the cache heuristics
dougqh Sep 2, 2025
bde8118
Merge branch 'dougqh/utf8-caching' of github.com:DataDog/dd-trace-jav…
dougqh Sep 2, 2025
f15e1cc
spotless
dougqh Sep 2, 2025
947734a
Merge branch 'master' into dougqh/utf8-caching
dougqh Sep 2, 2025
f509c0a
Clean-up & tweaking
dougqh Sep 2, 2025
6bfbf88
Merge branch 'dougqh/utf8-caching' of github.com:DataDog/dd-trace-jav…
dougqh Sep 2, 2025
000de35
Merge branch 'master' into dougqh/utf8-caching
dougqh Sep 2, 2025
db82394
Tweaking settings to be good at multiple memory levels
dougqh Sep 2, 2025
ff6e0f8
Merge branch 'dougqh/utf8-caching' of github.com:DataDog/dd-trace-jav…
dougqh Sep 2, 2025
41d059d
Fixing oversight from marking change
dougqh Sep 3, 2025
3b69e62
Fixing bug introduced with different probes lengths for eden & tenured
dougqh Sep 3, 2025
4102a26
More clean-up
dougqh Sep 3, 2025
6902e80
Merge branch 'master' into dougqh/utf8-caching
dougqh Sep 3, 2025
9b78df7
Misc fixes
dougqh Sep 3, 2025
3c33c38
Fixing benchmarks brought over from standalone prototype
dougqh Sep 3, 2025
41af3df
Merge branch 'dougqh/utf8-caching' of github.com:DataDog/dd-trace-jav…
dougqh Sep 3, 2025
0b9f0d0
test & benchmark clean-up
dougqh Sep 3, 2025
bdc1859
Added some explanatory comments
dougqh Sep 3, 2025
6ab19b0
Update dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent…
dougqh Sep 4, 2025
49100cb
Making cache more configurable & clean-up
dougqh Sep 4, 2025
75bff75
Merge branch 'master' into dougqh/utf8-caching
dougqh Sep 4, 2025
bd17af9
fix: small compilation fix
bric3 Sep 5, 2025
f53ed6e
Merge branch 'master' into dougqh/utf8-caching
dougqh Sep 5, 2025
6d035cc
Merge branch 'master' into dougqh/utf8-caching
dougqh Sep 5, 2025
c923194
Adding missing size parameters tp benchmark
dougqh Sep 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ public final class GeneralConfig {
public static final String JDK_SOCKET_ENABLED = "jdk.socket.enabled";

public static final String OPTIMIZED_MAP_ENABLED = "optimized.map.enabled";
public static final String TAG_NAME_UTF8_CACHE_SIZE = "tag.name.utf8.cache.size";
public static final String TAG_VALUE_UTF8_CACHE_SIZE = "tag.value.utf8.cache.size";
public static final String STACK_TRACE_LENGTH_LIMIT = "stack.trace.length.limit";

public static final String SSI_INJECTION_ENABLED = "injection.enabled";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package datadog.trace.common.writer.ddagent;

import java.nio.charset.StandardCharsets;
import java.util.concurrent.ThreadLocalRandom;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.infra.Blackhole;

/**
* This benchmark isn't really intended to used to measure throughput, but rather to be used with
* "-prof gc" to check bytes / op.
*
* <p>Since {@link String#getBytes(java.nio.charset.Charset)} is intrinsified the caches typically
* perform worse throughput wise, the benefit of the caches is to reduce allocation. Intention of
* this benchmark is to create data that roughly resembles what might be seen in a trace payload.
* Tag names are quite static, tag values are mostly low cardinality, but some tag values have
* infinite cardinality.
*/
@BenchmarkMode(Mode.Throughput)
public class Utf8Benchmark {
static final int NUM_LOOKUPS = 10_000;

static final String[] TAGS = {
"_dd.asm.keep",
"ci.provider",
"language",
"db.statement",
"ci.job.url",
"ci.pipeline.url",
"db.pool",
"http.forwarder",
"db.warehouse",
"custom"
};

static int pos = 0;
static int standardVal = 0;

static final String nextTag() {
if (pos == TAGS.length - 1) {
pos = 0;
} else {
pos += 1;
}
return TAGS[pos];
}

static final String nextValue(String tag) {
if (tag.equals("custom")) {
return nextCustomValue(tag);
} else {
return nextStandardValue(tag);
}
}

/*
* Produces a high cardinality value - > thousands of distinct values per tag - many 1-time values
*/
static final String nextCustomValue(String tag) {
return tag + ThreadLocalRandom.current().nextInt();
}

/*
* Produces a moderate cardinality value - tens of distinct values per tag
*/
static final String nextStandardValue(String tag) {
return tag + ThreadLocalRandom.current().nextInt(20);
}

@Benchmark
public static final String tagUtf8_baseline() {
return nextTag();
}

@Benchmark
public static final byte[] tagUtf8_nocache() {
String tag = nextTag();
return tag.getBytes(StandardCharsets.UTF_8);
}

static final SimpleUtf8Cache TAG_CACHE = new SimpleUtf8Cache(128);

@Benchmark
public static final byte[] tagUtf8_w_cache() {
String tag = nextTag();

byte[] cache = TAG_CACHE.getUtf8(tag);
if (cache != null) return cache;

return tag.getBytes(StandardCharsets.UTF_8);
}

@Benchmark
public static final void valueUtf8_baseline(Blackhole bh) {
for (int i = 0; i < NUM_LOOKUPS; ++i) {
String tag = nextTag();
String value = nextValue(tag);
Comment on lines +97 to +98
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: Out of curiosity, should it it be better to generate a tag / value dataset outside the the benchmark methods ? Maybe this could allow to have datasets with wider range of values.


I believe some customers have wide chars values (e.g. in korean) in their tag, would it be useful to have a benchmark for that, could the gains be more pronounced in this case ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, probably. I need to experiment some more to figure out what's possible with JMH.
The x_baseline methods exist, so that I can do a comparison to the "same" logic without the encoding.


bh.consume(tag);
bh.consume(value);
}
}

static final GenerationalUtf8Cache VALUE_CACHE = new GenerationalUtf8Cache(64, 128);

@Benchmark
public static final void valueUtf8_cache_generational(Blackhole bh) {
GenerationalUtf8Cache valueCache = VALUE_CACHE;
valueCache.recalibrate();

for (int i = 0; i < NUM_LOOKUPS; ++i) {
String tag = nextTag();
String value = nextValue(tag);

byte[] lookup = valueCache.getUtf8(value);
bh.consume(lookup);
}
}

static final SimpleUtf8Cache SIMPLE_VALUE_CACHE = new SimpleUtf8Cache(128);

@Benchmark
public static final void valueUtf8_cache_simple(Blackhole bh) {
SimpleUtf8Cache valueCache = SIMPLE_VALUE_CACHE;
valueCache.recalibrate();

for (int i = 0; i < NUM_LOOKUPS; ++i) {
String tag = nextTag();
String value = nextValue(tag);

byte[] lookup = valueCache.getUtf8(value);
bh.consume(lookup);
}
}

@Benchmark
public static final void valueUtf8_nocache(Blackhole bh) {
for (int i = 0; i < NUM_LOOKUPS; ++i) {
String tag = nextTag();
String value = nextValue(tag);

bh.consume(tag);
bh.consume(value.getBytes(StandardCharsets.UTF_8));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package datadog.trace.common.writer.ddagent;

import java.util.Arrays;

/** Some common static functions used by simple & generational caches */
final class Caching {
private Caching() {}

/**
* Provides the cache size that holds the requestedCapacity
*
* @param requestedCapacity > 0
* @return size >= requestedCapacity
*/
static final int cacheSizeFor(int requestedCapacity) {
int pow;
for (pow = 1; pow < requestedCapacity; pow *= 2) ;
return pow;
}

/** Provides an "adjusted" (e.g. non-zero) hash for the given String */
static final int adjHash(String value) {
int hash = value.hashCode();
return (hash == 0) ? 0xDA7AD06 : hash;
}

/** Resets markers to zero */
static final void reset(int[] marks) {
Arrays.fill(marks, 0);
}

/**
* Changes the mark status of the corresponding slot in the marking array. If there was previously
* a matching mark, resets the slot to zero and returns true If there was previously a mismatching
* mark, updates the slot and returns false
*
* <p>A return value of true indicates that the requested value has likely been seen previously
* and cache entry should be created.
*/
static final boolean mark(int[] marks, int newAdjHash) {
int index = bucketIndex(marks, newAdjHash);

// This is the 4th iteration of the marking strategy
// First version - used a mark entry, but that would prematurely
// burn a slot in the cache
// Second version - used a mark boolean, that worked well, but
// was a overly permissive in allowing the next request to the same slot
// to immediately create a CacheEntry
// Third version - used a mark hash that to match exactly,
// that could lead to access order fights over the cache slot
// So this version is a hybrid of 2nd & 3rd, using a bloom filter
// that effectively degenerates to a boolean

// This approach provides a nice balance when there's an A-B-A access pattern
// The first A will mark the slot
// Then B will mark the slot with A | B
// Then either A or B can claim and reset the slot

int priorMarkHash = marks[index];
boolean match = ((priorMarkHash & newAdjHash) == newAdjHash);
if (match) {
marks[index] = 0;
} else {
marks[index] = priorMarkHash | newAdjHash;
}
return match;
}

/** Provides the corresponding index into the marking array */
static final int bucketIndex(int[] marks, int adjHash) {
return adjHash & (marks.length - 1);
}

/**
* Provides the corresponding index into an entry array Assumes that array size was determined by
* using {@Caching#cacheSizeFor}
*/
static final <E> int bucketIndex(E[] entries, int adjHash) {
return adjHash & (entries.length - 1);
}
}
Loading