Skip to content

Commit 4abe3ff

Browse files
dougqhbric3
andauthored
UTF8 caching for v0.4 (#9434)
* UTF8 caching for v0.4 This change adds UTF-8 encoding caching to optimize v0.4 payload construction. Since String#getBytes is intrinsified these caches actually perform worse throughput wise than an uncached conversion. However, the caches are useful in reducing allocation from UTF-8 conversions. For tags, a "simple" cache is used. The simple cache is a single level cache -- that uses hashing combined with linear probing. To avoid, cache churn and unnecessary allocation of a CacheEntry, the simple cache uses a first request marking scheme that typically avoids creating a CacheEntry for values that are requested only once. Eviction from the "simple" cache is done based on LFU policy. For tag values, a more complicated generational cache is used. The generational cache combines the delayed CacheEntry logic of the simple cache with a 2nd-level for resilience. Frequently used entries are "promoted" to the higher level cache. The 1st level of the generational cache uses a LFU eviction policy. The 2nd level of the generational cache uses a LRU eviction policy. For the value use cache, the generational policy provided 2x increase in hit rate over the simple cache. * spotless * Tweaking comments * Tweaking comments * Comparing results with caching off * Fixing silly oversight when cache is disabled * Adding comments about benchmark data being used * Misc improvements - implementing review feedback - experimenting with exact hash based marking scheme - fixed issue with not updating entry after hit in simple cache - re-enabling cache by default for benchmarking - spotless * Tweaking the cache heuristics - altered marking strategy to use a bloom filter of previously requested values, once a new entry hits the filter the filter is reset to zero - tweaking cache sizes * spotless * Clean-up & tweaking - clean-up based on review feedback - making naming consistent - some vestiges of prior names for second level cache updated - tweaked generational cache to check tenured entries first - * Tweaking settings to be good at multiple memory levels - switching generational cache to use different probe lengths for eden vs tenured generation - these settings are neutral or better throughput wise for petclinic for 64m, 80m, 96m, and 128m heaps * Fixing oversight from marking change Should be using adjHash not value.hashCode * Fixing bug introduced with different probes lengths for eden & tenured * More clean-up - more explanatory comments - more naming updates: local -> eden * Misc fixes - adding protections against storing large strings in cache - fixed errant use of CacheEntry.utf8(String) instead of entry.utf8() - removed unnecessary lookupTimeMs variable * Fixing benchmarks brought over from standalone prototype * test & benchmark clean-up Added tests to verify that big strings are not cached * Added some explanatory comments * Update dd-trace-core/src/jmh/java/datadog/trace/common/writer/ddagent/Utf8Benchmark.java Co-authored-by: Brice Dutheil <brice.dutheil@gmail.com> * Making cache more configurable & clean-up - added ability to configure cache size - for both tag names & values - factored shared code into Caching static utility class - added tests for Caching class & size determination logic * fix: small compilation fix --------- Co-authored-by: Brice Dutheil <brice.dutheil@gmail.com>
1 parent cb08250 commit 4abe3ff

File tree

10 files changed

+1379
-14
lines changed

10 files changed

+1379
-14
lines changed

dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ public final class GeneralConfig {
105105
public static final String JDK_SOCKET_ENABLED = "jdk.socket.enabled";
106106

107107
public static final String OPTIMIZED_MAP_ENABLED = "optimized.map.enabled";
108+
public static final String TAG_NAME_UTF8_CACHE_SIZE = "tag.name.utf8.cache.size";
109+
public static final String TAG_VALUE_UTF8_CACHE_SIZE = "tag.value.utf8.cache.size";
108110
public static final String STACK_TRACE_LENGTH_LIMIT = "stack.trace.length.limit";
109111

110112
public static final String SSI_INJECTION_ENABLED = "injection.enabled";
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
package datadog.trace.common.writer.ddagent;
2+
3+
import java.nio.charset.StandardCharsets;
4+
import java.util.concurrent.ThreadLocalRandom;
5+
import org.openjdk.jmh.annotations.Benchmark;
6+
import org.openjdk.jmh.annotations.BenchmarkMode;
7+
import org.openjdk.jmh.annotations.Mode;
8+
import org.openjdk.jmh.infra.Blackhole;
9+
10+
/**
11+
* This benchmark isn't really intended to used to measure throughput, but rather to be used with
12+
* "-prof gc" to check bytes / op.
13+
*
14+
* <p>Since {@link String#getBytes(java.nio.charset.Charset)} is intrinsified the caches typically
15+
* perform worse throughput wise, the benefit of the caches is to reduce allocation. Intention of
16+
* this benchmark is to create data that roughly resembles what might be seen in a trace payload.
17+
* Tag names are quite static, tag values are mostly low cardinality, but some tag values have
18+
* infinite cardinality.
19+
*/
20+
@BenchmarkMode(Mode.Throughput)
21+
public class Utf8Benchmark {
22+
static final int NUM_LOOKUPS = 10_000;
23+
24+
static final String[] TAGS = {
25+
"_dd.asm.keep",
26+
"ci.provider",
27+
"language",
28+
"db.statement",
29+
"ci.job.url",
30+
"ci.pipeline.url",
31+
"db.pool",
32+
"http.forwarder",
33+
"db.warehouse",
34+
"custom"
35+
};
36+
37+
static int pos = 0;
38+
static int standardVal = 0;
39+
40+
static final String nextTag() {
41+
if (pos == TAGS.length - 1) {
42+
pos = 0;
43+
} else {
44+
pos += 1;
45+
}
46+
return TAGS[pos];
47+
}
48+
49+
static final String nextValue(String tag) {
50+
if (tag.equals("custom")) {
51+
return nextCustomValue(tag);
52+
} else {
53+
return nextStandardValue(tag);
54+
}
55+
}
56+
57+
/*
58+
* Produces a high cardinality value - > thousands of distinct values per tag - many 1-time values
59+
*/
60+
static final String nextCustomValue(String tag) {
61+
return tag + ThreadLocalRandom.current().nextInt();
62+
}
63+
64+
/*
65+
* Produces a moderate cardinality value - tens of distinct values per tag
66+
*/
67+
static final String nextStandardValue(String tag) {
68+
return tag + ThreadLocalRandom.current().nextInt(20);
69+
}
70+
71+
@Benchmark
72+
public static final String tagUtf8_baseline() {
73+
return nextTag();
74+
}
75+
76+
@Benchmark
77+
public static final byte[] tagUtf8_nocache() {
78+
String tag = nextTag();
79+
return tag.getBytes(StandardCharsets.UTF_8);
80+
}
81+
82+
static final SimpleUtf8Cache TAG_CACHE = new SimpleUtf8Cache(128);
83+
84+
@Benchmark
85+
public static final byte[] tagUtf8_w_cache() {
86+
String tag = nextTag();
87+
88+
byte[] cache = TAG_CACHE.getUtf8(tag);
89+
if (cache != null) return cache;
90+
91+
return tag.getBytes(StandardCharsets.UTF_8);
92+
}
93+
94+
@Benchmark
95+
public static final void valueUtf8_baseline(Blackhole bh) {
96+
for (int i = 0; i < NUM_LOOKUPS; ++i) {
97+
String tag = nextTag();
98+
String value = nextValue(tag);
99+
100+
bh.consume(tag);
101+
bh.consume(value);
102+
}
103+
}
104+
105+
static final GenerationalUtf8Cache VALUE_CACHE = new GenerationalUtf8Cache(64, 128);
106+
107+
@Benchmark
108+
public static final void valueUtf8_cache_generational(Blackhole bh) {
109+
GenerationalUtf8Cache valueCache = VALUE_CACHE;
110+
valueCache.recalibrate();
111+
112+
for (int i = 0; i < NUM_LOOKUPS; ++i) {
113+
String tag = nextTag();
114+
String value = nextValue(tag);
115+
116+
byte[] lookup = valueCache.getUtf8(value);
117+
bh.consume(lookup);
118+
}
119+
}
120+
121+
static final SimpleUtf8Cache SIMPLE_VALUE_CACHE = new SimpleUtf8Cache(128);
122+
123+
@Benchmark
124+
public static final void valueUtf8_cache_simple(Blackhole bh) {
125+
SimpleUtf8Cache valueCache = SIMPLE_VALUE_CACHE;
126+
valueCache.recalibrate();
127+
128+
for (int i = 0; i < NUM_LOOKUPS; ++i) {
129+
String tag = nextTag();
130+
String value = nextValue(tag);
131+
132+
byte[] lookup = valueCache.getUtf8(value);
133+
bh.consume(lookup);
134+
}
135+
}
136+
137+
@Benchmark
138+
public static final void valueUtf8_nocache(Blackhole bh) {
139+
for (int i = 0; i < NUM_LOOKUPS; ++i) {
140+
String tag = nextTag();
141+
String value = nextValue(tag);
142+
143+
bh.consume(tag);
144+
bh.consume(value.getBytes(StandardCharsets.UTF_8));
145+
}
146+
}
147+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package datadog.trace.common.writer.ddagent;
2+
3+
import java.util.Arrays;
4+
5+
/** Some common static functions used by simple & generational caches */
6+
final class Caching {
7+
private Caching() {}
8+
9+
/**
10+
* Provides the cache size that holds the requestedCapacity
11+
*
12+
* @param requestedCapacity > 0
13+
* @return size >= requestedCapacity
14+
*/
15+
static final int cacheSizeFor(int requestedCapacity) {
16+
int pow;
17+
for (pow = 1; pow < requestedCapacity; pow *= 2) ;
18+
return pow;
19+
}
20+
21+
/** Provides an "adjusted" (e.g. non-zero) hash for the given String */
22+
static final int adjHash(String value) {
23+
int hash = value.hashCode();
24+
return (hash == 0) ? 0xDA7AD06 : hash;
25+
}
26+
27+
/** Resets markers to zero */
28+
static final void reset(int[] marks) {
29+
Arrays.fill(marks, 0);
30+
}
31+
32+
/**
33+
* Changes the mark status of the corresponding slot in the marking array. If there was previously
34+
* a matching mark, resets the slot to zero and returns true If there was previously a mismatching
35+
* mark, updates the slot and returns false
36+
*
37+
* <p>A return value of true indicates that the requested value has likely been seen previously
38+
* and cache entry should be created.
39+
*/
40+
static final boolean mark(int[] marks, int newAdjHash) {
41+
int index = bucketIndex(marks, newAdjHash);
42+
43+
// This is the 4th iteration of the marking strategy
44+
// First version - used a mark entry, but that would prematurely
45+
// burn a slot in the cache
46+
// Second version - used a mark boolean, that worked well, but
47+
// was a overly permissive in allowing the next request to the same slot
48+
// to immediately create a CacheEntry
49+
// Third version - used a mark hash that to match exactly,
50+
// that could lead to access order fights over the cache slot
51+
// So this version is a hybrid of 2nd & 3rd, using a bloom filter
52+
// that effectively degenerates to a boolean
53+
54+
// This approach provides a nice balance when there's an A-B-A access pattern
55+
// The first A will mark the slot
56+
// Then B will mark the slot with A | B
57+
// Then either A or B can claim and reset the slot
58+
59+
int priorMarkHash = marks[index];
60+
boolean match = ((priorMarkHash & newAdjHash) == newAdjHash);
61+
if (match) {
62+
marks[index] = 0;
63+
} else {
64+
marks[index] = priorMarkHash | newAdjHash;
65+
}
66+
return match;
67+
}
68+
69+
/** Provides the corresponding index into the marking array */
70+
static final int bucketIndex(int[] marks, int adjHash) {
71+
return adjHash & (marks.length - 1);
72+
}
73+
74+
/**
75+
* Provides the corresponding index into an entry array Assumes that array size was determined by
76+
* using {@Caching#cacheSizeFor}
77+
*/
78+
static final <E> int bucketIndex(E[] entries, int adjHash) {
79+
return adjHash & (entries.length - 1);
80+
}
81+
}

0 commit comments

Comments
 (0)