Skip to content

Commit 6d20dbc

Browse files
Intern IndexFieldCapabilities Type String on Read (#76405)
In case of handling a large number of these messages, i.e. when fetching field caps for many indices (and/or those indices contain lots of fields) the type string is repeated many times over. As these strings are already interned because they are constants, taking the performance hit of interning them on deserialization seems a reasonable trade-off for the benefit of saving a non-trivial amount of memory for large clusters as well as speeding up `org.elasticsearch.action.fieldcaps.TransportFieldCapabilitiesAction#merge` which uses these strings in map lookup and will run significantly faster with interned strings instead of fresh strings that do not have their hash values cached yet.
1 parent a073752 commit 6d20dbc

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

server/src/main/java/org/elasticsearch/action/fieldcaps/IndexFieldCapabilities.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.StreamOutput;
1313
import org.elasticsearch.common.io.stream.Writeable;
14+
import org.elasticsearch.common.util.StringLiteralDeduplicator;
1415

1516
import java.io.IOException;
1617
import java.util.Map;
@@ -21,6 +22,8 @@
2122
*/
2223
public class IndexFieldCapabilities implements Writeable {
2324

25+
private static final StringLiteralDeduplicator typeStringDeduplicator = new StringLiteralDeduplicator();
26+
2427
private final String name;
2528
private final String type;
2629
private final boolean isMetadatafield;
@@ -50,7 +53,7 @@ public class IndexFieldCapabilities implements Writeable {
5053

5154
IndexFieldCapabilities(StreamInput in) throws IOException {
5255
this.name = in.readString();
53-
this.type = in.readString();
56+
this.type = typeStringDeduplicator.deduplicate(in.readString());
5457
this.isMetadatafield = in.readBoolean();
5558
this.isSearchable = in.readBoolean();
5659
this.isAggregatable = in.readBoolean();
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
package org.elasticsearch.common.util;
9+
10+
import org.apache.logging.log4j.LogManager;
11+
import org.apache.logging.log4j.Logger;
12+
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
13+
14+
import java.util.Map;
15+
16+
/**
17+
* A cache in front of Java's string interning. This method assumes that it is only called with strings that are already part of the
18+
* JVM's string pool so that interning them does not grow the pool. Calling it with strings not in the interned string pool is not
19+
* advisable as its performance may deteriorate to slower than outright calls to {@link String#intern()}.
20+
*/
21+
public final class StringLiteralDeduplicator {
22+
23+
private static final Logger logger = LogManager.getLogger(StringLiteralDeduplicator.class);
24+
25+
private static final int MAX_SIZE = 1000;
26+
27+
private final Map<String, String> map = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency();
28+
29+
public StringLiteralDeduplicator() {
30+
}
31+
32+
public String deduplicate(String string) {
33+
final String res = map.get(string);
34+
if (res != null) {
35+
return res;
36+
}
37+
final String interned = string.intern();
38+
if (map.size() > MAX_SIZE) {
39+
map.clear();
40+
logger.debug("clearing intern cache");
41+
}
42+
map.put(interned, interned);
43+
return interned;
44+
}
45+
}

0 commit comments

Comments
 (0)