Skip to content

Commit cf8ae49

Browse files
authored
Handle cgroups v2 in OsProbe (#77128) (#77306)
* Handle cgroups v2 in `OsProbe` (#77128) Closes #76812. Closes #77126. OsProbe was only capable of handle cgroup data in the v1 format. However, Debian 11 uses cgroups v2 by default, and Elasticsearch isn't capable of reporting any cgroup information. Therefore, add support for the v2 layout. Note that we have to open access to all of /sys/fs/cgroup because with cgroups v2, the files we need are in an unpredictably location. * Handle a max memory value of 'max' (#77289) * Handle a max memory value of 'max' * Update docs/changelog/77289.yaml * Delete 77289.yaml * Fixes to backport * Fix
1 parent 551216f commit cf8ae49

File tree

7 files changed

+320
-115
lines changed

7 files changed

+320
-115
lines changed

docs/changelog/77128.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
pr: 77128
2+
summary: Handle cgroups v2 in `OsProbe`
3+
area: Infra/Core
4+
type: enhancement
5+
issues:
6+
- 77126
7+
- 76812

qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,7 +838,6 @@ public void test131InitProcessHasCorrectPID() {
838838
/**
839839
* Check that Elasticsearch reports per-node cgroup information.
840840
*/
841-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/76812")
842841
public void test140CgroupOsStatsAreAvailable() throws Exception {
843842
waitForElasticsearch(installation);
844843

qa/os/src/test/java/org/elasticsearch/packaging/util/Packages.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,12 +252,18 @@ private static void verifyDefaultInstallation(Installation es, Distribution dist
252252
/**
253253
* Starts Elasticsearch, without checking that startup is successful.
254254
*/
255-
public static Shell.Result runElasticsearchStartCommand(Shell sh) throws IOException {
255+
public static Shell.Result runElasticsearchStartCommand(Shell sh) {
256256
if (isSystemd()) {
257+
Packages.JournaldWrapper journald = new Packages.JournaldWrapper(sh);
257258
sh.run("systemctl daemon-reload");
258259
sh.run("systemctl enable elasticsearch.service");
259260
sh.run("systemctl is-enabled elasticsearch.service");
260-
return sh.runIgnoreExitCode("systemctl start elasticsearch.service");
261+
Result exitCode = sh.runIgnoreExitCode("systemctl start elasticsearch.service");
262+
if (exitCode.isSuccess() == false) {
263+
logger.warn(sh.runIgnoreExitCode("systemctl status elasticsearch.service").stdout);
264+
logger.warn(journald.getLogs().stdout);
265+
}
266+
return exitCode;
261267
}
262268
return sh.runIgnoreExitCode("service elasticsearch start");
263269
}

server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java

Lines changed: 217 additions & 55 deletions
Large diffs are not rendered by default.

server/src/main/resources/org/elasticsearch/bootstrap/security.policy

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -144,14 +144,11 @@ grant {
144144
permission java.io.FilePermission "/proc/self/mountinfo", "read";
145145
permission java.io.FilePermission "/proc/diskstats", "read";
146146

147-
// control group stats on Linux
147+
// control group stats on Linux. cgroup v2 stats are in an unpredicable
148+
// location under `/sys/fs/cgroup`, so unfortunately we have to allow
149+
// read access to the entire directory hierarchy.
148150
permission java.io.FilePermission "/proc/self/cgroup", "read";
149-
permission java.io.FilePermission "/sys/fs/cgroup/cpu", "read";
150-
permission java.io.FilePermission "/sys/fs/cgroup/cpu/-", "read";
151-
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct", "read";
152-
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct/-", "read";
153-
permission java.io.FilePermission "/sys/fs/cgroup/memory", "read";
154-
permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read";
151+
permission java.io.FilePermission "/sys/fs/cgroup/-", "read";
155152

156153
// system memory on Linux systems affected by JDK bug (#66629)
157154
permission java.io.FilePermission "/proc/meminfo", "read";

server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java

Lines changed: 83 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public void testOsInfo() throws IOException {
4343
final OsProbe osProbe = new OsProbe() {
4444

4545
@Override
46-
List<String> readOsRelease() throws IOException {
46+
List<String> readOsRelease() {
4747
assert Constants.LINUX : Constants.OS_NAME;
4848
if (prettyName != null) {
4949
final String quote = randomFrom("\"", "'", "");
@@ -78,8 +78,10 @@ public void testOsStats() {
7878
OsStats stats = osProbe.osStats();
7979
assertNotNull(stats);
8080
assertThat(stats.getTimestamp(), greaterThan(0L));
81-
assertThat(stats.getCpu().getPercent(), anyOf(equalTo((short) -1),
82-
is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100)))));
81+
assertThat(
82+
stats.getCpu().getPercent(),
83+
anyOf(equalTo((short) -1), is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100))))
84+
);
8385
double[] loadAverage = stats.getCpu().getLoadAverage();
8486
if (loadAverage != null) {
8587
assertThat(loadAverage.length, equalTo(3));
@@ -141,8 +143,11 @@ public void testOsStats() {
141143
assertThat(stats.getCgroup().getCpuStat().getTimeThrottledNanos(), greaterThanOrEqualTo(0L));
142144
// These could be null if transported from a node running an older version, but shouldn't be null on the current node
143145
assertThat(stats.getCgroup().getMemoryControlGroup(), notNullValue());
144-
assertThat(stats.getCgroup().getMemoryLimitInBytes(), notNullValue());
145-
assertThat(new BigInteger(stats.getCgroup().getMemoryLimitInBytes()), greaterThan(BigInteger.ZERO));
146+
String memoryLimitInBytes = stats.getCgroup().getMemoryLimitInBytes();
147+
assertThat(memoryLimitInBytes, notNullValue());
148+
if (memoryLimitInBytes.equals("max") == false) {
149+
assertThat(new BigInteger(memoryLimitInBytes), greaterThan(BigInteger.ZERO));
150+
}
146151
assertThat(stats.getCgroup().getMemoryUsageInBytes(), notNullValue());
147152
assertThat(new BigInteger(stats.getCgroup().getMemoryUsageInBytes()), greaterThan(BigInteger.ZERO));
148153
}
@@ -173,16 +178,14 @@ String readProcLoadavg() {
173178
}
174179

175180
public void testCgroupProbe() {
176-
assumeTrue("test runs on Linux only", Constants.LINUX);
177-
178-
final boolean areCgroupStatsAvailable = randomBoolean();
181+
final int availableCgroupsVersion = randomFrom(0, 1, 2);
179182
final String hierarchy = randomAlphaOfLength(16);
180183

181-
final OsProbe probe = buildStubOsProbe(areCgroupStatsAvailable, hierarchy);
184+
final OsProbe probe = buildStubOsProbe(availableCgroupsVersion, hierarchy);
182185

183186
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
184187

185-
if (areCgroupStatsAvailable) {
188+
if (availableCgroupsVersion > 0) {
186189
assertNotNull(cgroup);
187190
assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy));
188191
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063112L));
@@ -200,61 +203,53 @@ public void testCgroupProbe() {
200203
}
201204

202205
public void testCgroupProbeWithMissingCpuAcct() {
203-
assumeTrue("test runs on Linux only", Constants.LINUX);
204-
205206
final String hierarchy = randomAlphaOfLength(16);
206207

207208
// This cgroup data is missing a line about cpuacct
208-
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
209-
.stream()
209+
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
210210
.map(line -> line.replaceFirst(",cpuacct", ""))
211211
.collect(Collectors.toList());
212212

213-
final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
213+
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);
214214

215215
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
216216

217217
assertNull(cgroup);
218218
}
219219

220220
public void testCgroupProbeWithMissingCpu() {
221-
assumeTrue("test runs on Linux only", Constants.LINUX);
222-
223221
final String hierarchy = randomAlphaOfLength(16);
224222

225223
// This cgroup data is missing a line about cpu
226-
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
227-
.stream()
224+
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
228225
.map(line -> line.replaceFirst(":cpu,", ":"))
229226
.collect(Collectors.toList());
230227

231-
232-
final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
228+
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);
233229

234230
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
235231

236232
assertNull(cgroup);
237233
}
238234

239235
public void testCgroupProbeWithMissingMemory() {
240-
assumeTrue("test runs on Linux only", Constants.LINUX);
241-
242236
final String hierarchy = randomAlphaOfLength(16);
243237

244238
// This cgroup data is missing a line about memory
245-
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
246-
.stream()
239+
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
247240
.filter(line -> line.contains(":memory:") == false)
248241
.collect(Collectors.toList());
249242

250-
final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
243+
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);
251244

252245
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
253246

254247
assertNull(cgroup);
255248
}
256249

257250
public void testGetTotalMemFromProcMeminfo() throws Exception {
251+
int cgroupsVersion = randomFrom(1, 2);
252+
258253
// missing MemTotal line
259254
List<String> meminfoLines = Arrays.asList(
260255
"MemFree: 8467692 kB",
@@ -265,7 +260,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
265260
"Active: 43637908 kB",
266261
"Inactive: 8130280 kB"
267262
);
268-
OsProbe probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
263+
OsProbe probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
269264
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));
270265

271266
// MemTotal line with invalid value
@@ -279,7 +274,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
279274
"Active: 43637908 kB",
280275
"Inactive: 8130280 kB"
281276
);
282-
probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
277+
probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
283278
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));
284279

285280
// MemTotal line with invalid unit
@@ -293,7 +288,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
293288
"Active: 43637908 kB",
294289
"Inactive: 8130280 kB"
295290
);
296-
probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
291+
probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
297292
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));
298293

299294
// MemTotal line with random valid value
@@ -308,7 +303,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception {
308303
"Active: 43637908 kB",
309304
"Inactive: 8130280 kB"
310305
);
311-
probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
306+
probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
312307
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(memTotalInKb * 1024L));
313308
}
314309

@@ -319,7 +314,13 @@ public void testGetTotalMemoryOnDebian8() throws Exception {
319314
assertThat(osProbe.getTotalPhysicalMemorySize(), greaterThan(0L));
320315
}
321316

322-
private static List<String> getProcSelfGroupLines(String hierarchy) {
317+
private static List<String> getProcSelfGroupLines(int cgroupsVersion, String hierarchy) {
318+
// It doesn't really matter if cgroupsVersion == 0 here
319+
320+
if (cgroupsVersion == 2) {
321+
return org.elasticsearch.core.List.of("0::/" + hierarchy);
322+
}
323+
323324
return Arrays.asList(
324325
"10:freezer:/",
325326
"9:net_cls,net_prio:/",
@@ -331,32 +332,40 @@ private static List<String> getProcSelfGroupLines(String hierarchy) {
331332
"3:perf_event:/",
332333
"2:cpu,cpuacct,cpuset:/" + hierarchy,
333334
"1:name=systemd:/user.slice/user-1000.slice/session-2359.scope",
334-
"0::/cgroup2");
335+
"0::/cgroup2"
336+
);
335337
}
336338

337-
private static OsProbe buildStubOsProbe(final boolean areCgroupStatsAvailable, final String hierarchy) {
338-
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy);
339+
private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy) {
340+
List<String> procSelfCgroupLines = getProcSelfGroupLines(availableCgroupsVersion, hierarchy);
339341

340-
return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines);
342+
return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines);
341343
}
342344

343345
/**
344346
* Builds a test instance of OsProbe. Methods that ordinarily read from the filesystem are overridden to return values based upon
345347
* the arguments to this method.
346348
*
347-
* @param areCgroupStatsAvailable whether or not cgroup data is available. Normally OsProbe establishes this for itself.
349+
* @param availableCgroupsVersion what version of cgroups are available, 1 or 2, or 0 for no cgroups. Normally OsProbe establishes this
350+
* for itself.
348351
* @param hierarchy a mock value used to generate a cgroup hierarchy.
349352
* @param procSelfCgroupLines the lines that will be used as the content of <code>/proc/self/cgroup</code>
350353
* @param procMeminfoLines lines that will be used as the content of <code>/proc/meminfo</code>
351354
* @return a test instance
352355
*/
353356
private static OsProbe buildStubOsProbe(
354-
final boolean areCgroupStatsAvailable,
357+
final int availableCgroupsVersion,
355358
final String hierarchy,
356359
List<String> procSelfCgroupLines,
357360
List<String> procMeminfoLines
358361
) {
359362
return new OsProbe() {
363+
@Override
364+
OsStats.Cgroup getCgroup(boolean isLinux) {
365+
// Pretend we're always on Linux so that we can run the cgroup tests
366+
return super.getCgroup(true);
367+
}
368+
360369
@Override
361370
List<String> readProcSelfCgroup() {
362371
return procSelfCgroupLines;
@@ -382,10 +391,7 @@ String readSysFsCgroupCpuAcctCpuAcctCfsQuota(String controlGroup) {
382391

383392
@Override
384393
List<String> readSysFsCgroupCpuAcctCpuStat(String controlGroup) {
385-
return Arrays.asList(
386-
"nr_periods 17992",
387-
"nr_throttled 1311",
388-
"throttled_time 139298645489");
394+
return Arrays.asList("nr_periods 17992", "nr_throttled 1311", "throttled_time 139298645489");
389395
}
390396

391397
@Override
@@ -403,22 +409,50 @@ String readSysFsCgroupMemoryUsageInBytes(String controlGroup) {
403409

404410
@Override
405411
boolean areCgroupStatsAvailable() {
406-
return areCgroupStatsAvailable;
412+
return availableCgroupsVersion > 0;
407413
}
408414

409415
@Override
410-
List<String> readProcMeminfo() throws IOException {
416+
List<String> readProcMeminfo() {
411417
return procMeminfoLines;
412418
}
419+
420+
@Override
421+
String readSysFsCgroupV2MemoryLimitInBytes(String controlGroup) {
422+
assertThat(controlGroup, equalTo("/" + hierarchy));
423+
// This is the highest value that can be stored in an unsigned 64 bit number, hence too big for long
424+
return "18446744073709551615";
425+
}
426+
427+
@Override
428+
String readSysFsCgroupV2MemoryUsageInBytes(String controlGroup) {
429+
assertThat(controlGroup, equalTo("/" + hierarchy));
430+
return "4796416";
431+
}
432+
433+
@Override
434+
List<String> readCgroupV2CpuStats(String controlGroup) {
435+
assertThat(controlGroup, equalTo("/" + hierarchy));
436+
return org.elasticsearch.core.List.of(
437+
"usage_usec 364869866063112",
438+
"user_usec 34636",
439+
"system_usec 9896",
440+
"nr_periods 17992",
441+
"nr_throttled 1311",
442+
"throttled_usec 139298645489"
443+
);
444+
}
445+
446+
@Override
447+
String readCgroupV2CpuLimit(String controlGroup) {
448+
assertThat(controlGroup, equalTo("/" + hierarchy));
449+
return "50000 100000";
450+
}
413451
};
414452
}
415453

416-
private static OsProbe buildStubOsProbe(
417-
final boolean areCgroupStatsAvailable,
418-
final String hierarchy,
419-
List<String> procSelfCgroupLines
420-
) {
421-
return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines, org.elasticsearch.core.List.of());
454+
private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy, List<String> procSelfCgroupLines) {
455+
return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines, org.elasticsearch.core.List.of());
422456
}
423457

424458
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1212,7 +1212,7 @@ static long machineMemoryFromStats(OsStats stats) {
12121212
OsStats.Cgroup cgroup = stats.getCgroup();
12131213
if (cgroup != null) {
12141214
String containerLimitStr = cgroup.getMemoryLimitInBytes();
1215-
if (containerLimitStr != null) {
1215+
if (containerLimitStr != null && containerLimitStr.equals("max") == false) {
12161216
BigInteger containerLimit = new BigInteger(containerLimitStr);
12171217
if ((containerLimit.compareTo(BigInteger.valueOf(mem)) < 0 && containerLimit.compareTo(BigInteger.ZERO) > 0)
12181218
// mem <= 0 means the value couldn't be obtained for some reason

0 commit comments

Comments
 (0)