Skip to content

Commit e724fe9

Browse files
Implement Setting Deduplication via String Interning (#80493) (#82659)
This is a somewhat crude solution to #78892 that addresses 95%+ of duplicate setting entry memory consumption in large clusters. The remaining duplicate structures (lists of all the same strings) are comparatively cheap in their heap consumption. In heavy benchmarking for #77466 no runtime impact of adding this extra step to setting creation has been found despite pushing setting creation harder than is expected in real-world usage (part of the low relative impact here is the fact that populating a tree-map is quite expensive to begin with so adding the string interning which is fast via the CHM cache doesn't add much overhead). On the other hand, the heap use impact for use-cases that come with a large number of duplicate settings (many similar indices) is significant. As an example, 10k AuditBeat indices consume about 500M of heap for duplicate settings data structures without this change. This cahnge brings the heap consumption from duplicate settings down to O(1M) on every node in the cluster. Relates and addresses most of #78892 Relates #77466
1 parent bc2a6f6 commit e724fe9

File tree

4 files changed

+92
-11
lines changed

4 files changed

+92
-11
lines changed

server/src/internalClusterTest/java/org/elasticsearch/indices/settings/UpdateSettingsIT.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,4 +880,47 @@ public void testNoopUpdate() {
880880
assertNotSame(currentState, clusterService.state());
881881
}
882882

883+
public void testAllSettingStringInterned() {
884+
final String masterNode = internalCluster().startMasterOnlyNode();
885+
final String dataNode = internalCluster().startDataOnlyNode();
886+
887+
final String index1 = "index-1";
888+
final String index2 = "index-2";
889+
createIndex(index1, index2);
890+
final ClusterService clusterServiceMaster = internalCluster().getInstance(ClusterService.class, masterNode);
891+
final ClusterService clusterServiceData = internalCluster().getInstance(ClusterService.class, dataNode);
892+
final Settings index1SettingsMaster = clusterServiceMaster.state().metadata().index(index1).getSettings();
893+
final Settings index1SettingsData = clusterServiceData.state().metadata().index(index1).getSettings();
894+
assertNotSame(index1SettingsMaster, index1SettingsData);
895+
assertSame(index1SettingsMaster.get(IndexMetadata.SETTING_INDEX_UUID), index1SettingsData.get(IndexMetadata.SETTING_INDEX_UUID));
896+
897+
// Create a list of not interned strings to make sure interning setting values works
898+
final List<String> queryFieldsSetting = org.elasticsearch.core.List.of(new String("foo"), new String("bar"), new String("bla"));
899+
assertAcked(
900+
admin().indices()
901+
.prepareUpdateSettings(index1, index2)
902+
.setSettings(Settings.builder().putList("query.default_field", queryFieldsSetting))
903+
);
904+
final Settings updatedIndex1SettingsMaster = clusterServiceMaster.state().metadata().index(index1).getSettings();
905+
final Settings updatedIndex1SettingsData = clusterServiceData.state().metadata().index(index1).getSettings();
906+
assertNotSame(updatedIndex1SettingsMaster, updatedIndex1SettingsData);
907+
assertEqualsAndStringsInterned(queryFieldsSetting, updatedIndex1SettingsMaster);
908+
assertEqualsAndStringsInterned(queryFieldsSetting, updatedIndex1SettingsData);
909+
assertEqualsAndStringsInterned(queryFieldsSetting, clusterServiceMaster.state().metadata().index(index2).getSettings());
910+
assertEqualsAndStringsInterned(queryFieldsSetting, clusterServiceData.state().metadata().index(index2).getSettings());
911+
}
912+
913+
private void assertEqualsAndStringsInterned(List<String> queryFieldsSetting, Settings settings) {
914+
final List<String> defaultFields = settings.getAsList("index.query.default_field");
915+
assertEquals(queryFieldsSetting, defaultFields);
916+
assertNotSame(queryFieldsSetting, defaultFields);
917+
// all setting strings should be interned
918+
assertSame("foo", defaultFields.get(0));
919+
assertSame("bar", defaultFields.get(1));
920+
assertSame("bla", defaultFields.get(2));
921+
for (String key : settings.keySet()) {
922+
assertSame(key, key.intern());
923+
}
924+
}
925+
883926
}

server/src/main/java/org/elasticsearch/common/settings/Setting.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,7 +2030,7 @@ public static class SimpleKey implements Key {
20302030
protected final String key;
20312031

20322032
public SimpleKey(String key) {
2033-
this.key = key;
2033+
this.key = Settings.internKeyOrValue(key);
20342034
}
20352035

20362036
@Override
@@ -2121,7 +2121,7 @@ public static final class AffixKey implements Key {
21212121
sb.append('.');
21222122
sb.append(suffix);
21232123
}
2124-
keyString = sb.toString();
2124+
keyString = Settings.internKeyOrValue(sb.toString());
21252125
}
21262126

21272127
@Override

server/src/main/java/org/elasticsearch/common/settings/Settings.java

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.common.unit.ByteSizeUnit;
2323
import org.elasticsearch.common.unit.ByteSizeValue;
2424
import org.elasticsearch.common.unit.MemorySizeValue;
25+
import org.elasticsearch.common.util.StringLiteralDeduplicator;
2526
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
2627
import org.elasticsearch.common.xcontent.XContentParserUtils;
2728
import org.elasticsearch.core.Booleans;
@@ -99,7 +100,27 @@ private static Settings of(Map<String, Object> settings, SecureSettings secureSe
99100

100101
private Settings(Map<String, Object> settings, SecureSettings secureSettings) {
101102
// we use a sorted map for consistent serialization when using getAsMap()
102-
this.settings = Collections.unmodifiableNavigableMap(new TreeMap<>(settings));
103+
final TreeMap<String, Object> tree = new TreeMap<>();
104+
for (Map.Entry<String, Object> settingEntry : settings.entrySet()) {
105+
final Object value = settingEntry.getValue();
106+
final Object internedValue;
107+
if (value instanceof String) {
108+
internedValue = internKeyOrValue((String) value);
109+
} else if (value instanceof List) {
110+
@SuppressWarnings("unchecked")
111+
List<String> valueList = (List<String>) value;
112+
final int listSize = valueList.size();
113+
final String[] internedArr = new String[listSize];
114+
for (int i = 0; i < valueList.size(); i++) {
115+
internedArr[i] = internKeyOrValue(valueList.get(i));
116+
}
117+
internedValue = org.elasticsearch.core.List.of(internedArr);
118+
} else {
119+
internedValue = value;
120+
}
121+
tree.put(internKeyOrValue(settingEntry.getKey()), internedValue);
122+
}
123+
this.settings = Collections.unmodifiableNavigableMap(tree);
103124
this.secureSettings = secureSettings;
104125
}
105126

@@ -418,7 +439,7 @@ public List<String> getAsList(String key, List<String> defaultValue, Boolean com
418439
if (valueFromPrefix instanceof List) {
419440
@SuppressWarnings("unchecked")
420441
final List<String> valuesAsList = (List<String>) valueFromPrefix;
421-
return Collections.unmodifiableList(valuesAsList);
442+
return valuesAsList;
422443
} else if (commaDelimited) {
423444
String[] strings = Strings.splitStringByCommaToArray(get(key));
424445
if (strings.length > 0) {
@@ -1222,11 +1243,19 @@ public boolean shouldRemoveMissingPlaceholder(String placeholderName) {
12221243
}
12231244
if (entry.getValue() instanceof List) {
12241245
@SuppressWarnings("unchecked")
1225-
final ListIterator<String> li = ((List<String>) entry.getValue()).listIterator();
1246+
final List<String> mutableList = new ArrayList<>((List<String>) entry.getValue());
1247+
final ListIterator<String> li = mutableList.listIterator();
1248+
boolean changed = false;
12261249
while (li.hasNext()) {
12271250
final String settingValueRaw = li.next();
12281251
final String settingValueResolved = propertyPlaceholder.replacePlaceholders(settingValueRaw, placeholderResolver);
1229-
li.set(settingValueResolved);
1252+
if (settingValueResolved.equals(settingValueRaw) == false) {
1253+
li.set(settingValueResolved);
1254+
changed = true;
1255+
}
1256+
}
1257+
if (changed) {
1258+
entry.setValue(org.elasticsearch.core.List.copyOf(mutableList));
12301259
}
12311260
continue;
12321261
}
@@ -1478,4 +1507,18 @@ private static String toString(Object o) {
14781507
return o == null ? null : o.toString();
14791508
}
14801509

1510+
private static final StringLiteralDeduplicator settingLiteralDeduplicator = new StringLiteralDeduplicator();
1511+
1512+
/**
1513+
* Interns the given string which should be either a setting key or value or part of a setting value list. This is used to reduce the
1514+
* memory footprint of similar setting instances like index settings that may contain mostly the same keys and values. Interning these
1515+
* strings at some runtime cost is considered a reasonable trade-off here since neither setting keys nor values change frequently
1516+
* while duplicate keys values may consume significant amounts of memory.
1517+
*
1518+
* @param s string to intern
1519+
* @return interned string
1520+
*/
1521+
static String internKeyOrValue(String s) {
1522+
return settingLiteralDeduplicator.deduplicate(s);
1523+
}
14811524
}

server/src/main/java/org/elasticsearch/common/util/StringLiteralDeduplicator.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
*/
88
package org.elasticsearch.common.util;
99

10-
import org.apache.logging.log4j.LogManager;
11-
import org.apache.logging.log4j.Logger;
1210
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
1311

1412
import java.util.Map;
@@ -20,8 +18,6 @@
2018
*/
2119
public final class StringLiteralDeduplicator {
2220

23-
private static final Logger logger = LogManager.getLogger(StringLiteralDeduplicator.class);
24-
2521
private static final int MAX_SIZE = 1000;
2622

2723
private final Map<String, String> map = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency();
@@ -36,7 +32,6 @@ public String deduplicate(String string) {
3632
final String interned = string.intern();
3733
if (map.size() > MAX_SIZE) {
3834
map.clear();
39-
logger.debug("clearing intern cache");
4035
}
4136
map.put(interned, interned);
4237
return interned;

0 commit comments

Comments
 (0)