-
Notifications
You must be signed in to change notification settings - Fork 318
UTF8 caching for v0.4 #9434
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
UTF8 caching for v0.4 #9434
Changes from all commits
8a59b29
68fdcb9
95767a6
5270f9c
69c4983
d725543
ebc3fb0
247bb02
69c94d1
d017b02
01aa284
bde8118
f15e1cc
947734a
f509c0a
6bfbf88
000de35
db82394
ff6e0f8
41d059d
3b69e62
4102a26
6902e80
9b78df7
3c33c38
41af3df
0b9f0d0
bdc1859
6ab19b0
49100cb
75bff75
bd17af9
f53ed6e
6d035cc
c923194
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,147 @@ | ||
| package datadog.trace.common.writer.ddagent; | ||
|
|
||
| import java.nio.charset.StandardCharsets; | ||
| import java.util.concurrent.ThreadLocalRandom; | ||
| import org.openjdk.jmh.annotations.Benchmark; | ||
| import org.openjdk.jmh.annotations.BenchmarkMode; | ||
| import org.openjdk.jmh.annotations.Mode; | ||
| import org.openjdk.jmh.infra.Blackhole; | ||
|
|
||
| /** | ||
| * This benchmark isn't really intended to used to measure throughput, but rather to be used with | ||
| * "-prof gc" to check bytes / op. | ||
| * | ||
| * <p>Since {@link String#getBytes(java.nio.charset.Charset)} is intrinsified the caches typically | ||
| * perform worse throughput wise, the benefit of the caches is to reduce allocation. Intention of | ||
| * this benchmark is to create data that roughly resembles what might be seen in a trace payload. | ||
| * Tag names are quite static, tag values are mostly low cardinality, but some tag values have | ||
| * infinite cardinality. | ||
| */ | ||
| @BenchmarkMode(Mode.Throughput) | ||
| public class Utf8Benchmark { | ||
| static final int NUM_LOOKUPS = 10_000; | ||
|
|
||
| static final String[] TAGS = { | ||
| "_dd.asm.keep", | ||
| "ci.provider", | ||
| "language", | ||
| "db.statement", | ||
| "ci.job.url", | ||
| "ci.pipeline.url", | ||
| "db.pool", | ||
| "http.forwarder", | ||
| "db.warehouse", | ||
| "custom" | ||
| }; | ||
|
|
||
| static int pos = 0; | ||
| static int standardVal = 0; | ||
|
|
||
| static final String nextTag() { | ||
| if (pos == TAGS.length - 1) { | ||
| pos = 0; | ||
| } else { | ||
| pos += 1; | ||
| } | ||
| return TAGS[pos]; | ||
| } | ||
|
|
||
| static final String nextValue(String tag) { | ||
| if (tag.equals("custom")) { | ||
| return nextCustomValue(tag); | ||
| } else { | ||
| return nextStandardValue(tag); | ||
| } | ||
| } | ||
|
|
||
| /* | ||
| * Produces a high cardinality value - > thousands of distinct values per tag - many 1-time values | ||
| */ | ||
| static final String nextCustomValue(String tag) { | ||
| return tag + ThreadLocalRandom.current().nextInt(); | ||
| } | ||
|
|
||
| /* | ||
| * Produces a moderate cardinality value - tens of distinct values per tag | ||
| */ | ||
| static final String nextStandardValue(String tag) { | ||
| return tag + ThreadLocalRandom.current().nextInt(20); | ||
| } | ||
|
|
||
| @Benchmark | ||
| public static final String tagUtf8_baseline() { | ||
| return nextTag(); | ||
| } | ||
|
|
||
| @Benchmark | ||
| public static final byte[] tagUtf8_nocache() { | ||
| String tag = nextTag(); | ||
| return tag.getBytes(StandardCharsets.UTF_8); | ||
| } | ||
|
|
||
| static final SimpleUtf8Cache TAG_CACHE = new SimpleUtf8Cache(128); | ||
|
|
||
| @Benchmark | ||
| public static final byte[] tagUtf8_w_cache() { | ||
| String tag = nextTag(); | ||
|
|
||
| byte[] cache = TAG_CACHE.getUtf8(tag); | ||
| if (cache != null) return cache; | ||
|
|
||
| return tag.getBytes(StandardCharsets.UTF_8); | ||
| } | ||
|
|
||
| @Benchmark | ||
| public static final void valueUtf8_baseline(Blackhole bh) { | ||
| for (int i = 0; i < NUM_LOOKUPS; ++i) { | ||
| String tag = nextTag(); | ||
| String value = nextValue(tag); | ||
|
Comment on lines
+97
to
+98
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: Out of curiosity, should it it be better to generate a tag / value dataset outside the the benchmark methods ? Maybe this could allow to have datasets with wider range of values. I believe some customers have wide chars values (e.g. in korean) in their tag, would it be useful to have a benchmark for that, could the gains be more pronounced in this case ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, probably. I need to experiment some more to figure out what's possible with JMH. |
||
|
|
||
| bh.consume(tag); | ||
| bh.consume(value); | ||
| } | ||
| } | ||
|
|
||
| static final GenerationalUtf8Cache VALUE_CACHE = new GenerationalUtf8Cache(64, 128); | ||
|
|
||
| @Benchmark | ||
| public static final void valueUtf8_cache_generational(Blackhole bh) { | ||
| GenerationalUtf8Cache valueCache = VALUE_CACHE; | ||
| valueCache.recalibrate(); | ||
|
|
||
| for (int i = 0; i < NUM_LOOKUPS; ++i) { | ||
| String tag = nextTag(); | ||
| String value = nextValue(tag); | ||
|
|
||
| byte[] lookup = valueCache.getUtf8(value); | ||
| bh.consume(lookup); | ||
| } | ||
| } | ||
|
|
||
| static final SimpleUtf8Cache SIMPLE_VALUE_CACHE = new SimpleUtf8Cache(128); | ||
|
|
||
| @Benchmark | ||
| public static final void valueUtf8_cache_simple(Blackhole bh) { | ||
| SimpleUtf8Cache valueCache = SIMPLE_VALUE_CACHE; | ||
| valueCache.recalibrate(); | ||
|
|
||
| for (int i = 0; i < NUM_LOOKUPS; ++i) { | ||
| String tag = nextTag(); | ||
| String value = nextValue(tag); | ||
|
|
||
| byte[] lookup = valueCache.getUtf8(value); | ||
| bh.consume(lookup); | ||
| } | ||
| } | ||
|
|
||
| @Benchmark | ||
| public static final void valueUtf8_nocache(Blackhole bh) { | ||
| for (int i = 0; i < NUM_LOOKUPS; ++i) { | ||
| String tag = nextTag(); | ||
| String value = nextValue(tag); | ||
|
|
||
| bh.consume(tag); | ||
| bh.consume(value.getBytes(StandardCharsets.UTF_8)); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| package datadog.trace.common.writer.ddagent; | ||
|
|
||
| import java.util.Arrays; | ||
|
|
||
| /** Some common static functions used by simple & generational caches */ | ||
| final class Caching { | ||
| private Caching() {} | ||
|
|
||
| /** | ||
| * Provides the cache size that holds the requestedCapacity | ||
| * | ||
| * @param requestedCapacity > 0 | ||
| * @return size >= requestedCapacity | ||
| */ | ||
| static final int cacheSizeFor(int requestedCapacity) { | ||
| int pow; | ||
| for (pow = 1; pow < requestedCapacity; pow *= 2) ; | ||
| return pow; | ||
| } | ||
|
|
||
| /** Provides an "adjusted" (e.g. non-zero) hash for the given String */ | ||
| static final int adjHash(String value) { | ||
| int hash = value.hashCode(); | ||
| return (hash == 0) ? 0xDA7AD06 : hash; | ||
| } | ||
|
|
||
| /** Resets markers to zero */ | ||
| static final void reset(int[] marks) { | ||
| Arrays.fill(marks, 0); | ||
| } | ||
|
|
||
| /** | ||
| * Changes the mark status of the corresponding slot in the marking array. If there was previously | ||
| * a matching mark, resets the slot to zero and returns true If there was previously a mismatching | ||
| * mark, updates the slot and returns false | ||
| * | ||
| * <p>A return value of true indicates that the requested value has likely been seen previously | ||
| * and cache entry should be created. | ||
| */ | ||
| static final boolean mark(int[] marks, int newAdjHash) { | ||
| int index = bucketIndex(marks, newAdjHash); | ||
|
|
||
| // This is the 4th iteration of the marking strategy | ||
| // First version - used a mark entry, but that would prematurely | ||
| // burn a slot in the cache | ||
| // Second version - used a mark boolean, that worked well, but | ||
| // was a overly permissive in allowing the next request to the same slot | ||
| // to immediately create a CacheEntry | ||
| // Third version - used a mark hash that to match exactly, | ||
| // that could lead to access order fights over the cache slot | ||
| // So this version is a hybrid of 2nd & 3rd, using a bloom filter | ||
| // that effectively degenerates to a boolean | ||
|
|
||
| // This approach provides a nice balance when there's an A-B-A access pattern | ||
| // The first A will mark the slot | ||
| // Then B will mark the slot with A | B | ||
| // Then either A or B can claim and reset the slot | ||
|
|
||
| int priorMarkHash = marks[index]; | ||
| boolean match = ((priorMarkHash & newAdjHash) == newAdjHash); | ||
| if (match) { | ||
| marks[index] = 0; | ||
| } else { | ||
| marks[index] = priorMarkHash | newAdjHash; | ||
| } | ||
| return match; | ||
| } | ||
|
|
||
| /** Provides the corresponding index into the marking array */ | ||
| static final int bucketIndex(int[] marks, int adjHash) { | ||
| return adjHash & (marks.length - 1); | ||
| } | ||
|
|
||
| /** | ||
| * Provides the corresponding index into an entry array Assumes that array size was determined by | ||
| * using {@Caching#cacheSizeFor} | ||
| */ | ||
| static final <E> int bucketIndex(E[] entries, int adjHash) { | ||
| return adjHash & (entries.length - 1); | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.