Skip to content

Commit 3eca4af

Browse files
committed
Reduce ML_ONLY heap size, so that direct memory is accounted for.
1 parent f262f81 commit 3eca4af

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/MachineDependentHeap.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,19 @@ protected int getHeapSizeMb(Settings nodeSettings, MachineNodeRole role, long av
9898
* could result in ML processes crashing with OOM errors or repeated autoscaling up and down.
9999
*/
100100
case ML_ONLY -> {
101+
/*
102+
* An ML node used to have 40% of the total memory for the Java heap, and the remainder
103+
* for ML and overhead (the operating system). This did not account for the Java direct
104+
* memory, which equals half of the heap size (see JvmErgonomics).
105+
* Right now, a factor of 2/3 is applied to the heap size here, leaving the ML memory
106+
* formula the same. That means the formula now also correctly accounts for direct memory,
107+
* since the heap (2/3 * 40% of the memory) plus the direct memory (1/3 * 40% of the memory)
108+
* equals the original 40% of the total memory.
109+
*/
101110
if (availableMemory <= (GB * 16)) {
102-
yield mb((long) (availableMemory * .4), 4);
111+
yield mb((long) (availableMemory * .4 * 2/3), 4);
103112
} else {
104-
yield mb((long) min((GB * 16) * .4 + (availableMemory - GB * 16) * .1, MAX_HEAP_SIZE), 4);
113+
yield mb((long) min(((GB * 16) * .4 + (availableMemory - GB * 16) * .1) * 2/3, MAX_HEAP_SIZE), 4);
105114
}
106115
}
107116
/*

distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/MachineDependentHeapTests.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,13 @@ public void testMasterOnlyOptions() throws Exception {
5656
}
5757

5858
public void testMlOnlyOptions() throws Exception {
59-
assertHeapOptions(1, containsInAnyOrder("-Xmx408m", "-Xms408m"), "ml");
60-
assertHeapOptions(4, containsInAnyOrder("-Xmx1636m", "-Xms1636m"), "ml");
61-
assertHeapOptions(32, containsInAnyOrder("-Xmx8192m", "-Xms8192m"), "ml");
62-
assertHeapOptions(64, containsInAnyOrder("-Xmx11468m", "-Xms11468m"), "ml");
59+
assertHeapOptions(1, containsInAnyOrder("-Xmx272m", "-Xms272m"), "ml");
60+
assertHeapOptions(4, containsInAnyOrder("-Xmx1092m", "-Xms1092m"), "ml");
61+
assertHeapOptions(32, containsInAnyOrder("-Xmx5460m", "-Xms5460m"), "ml");
62+
assertHeapOptions(64, containsInAnyOrder("-Xmx7644m", "-Xms7644m"), "ml");
6363
// We'd never see a node this big in Cloud, but this assertion proves that the 31GB absolute maximum
6464
// eventually kicks in (because 0.4 * 16 + 0.1 * (263 - 16) > 31)
65-
assertHeapOptions(263, containsInAnyOrder("-Xmx31744m", "-Xms31744m"), "ml");
65+
assertHeapOptions(263, containsInAnyOrder("-Xmx21228m", "-Xms21228m"), "ml");
6666
}
6767

6868
public void testDataNodeOptions() throws Exception {

0 commit comments

Comments
 (0)