30
30
import java .util .regex .Matcher ;
31
31
import java .util .regex .Pattern ;
32
32
import java .util .stream .Collectors ;
33
+ import java .util .stream .Stream ;
33
34
34
35
/**
35
36
* The {@link OsProbe} class retrieves information about the physical and swap size of the machine
36
37
* memory, as well as the system load average and cpu load.
37
38
*
38
- * In some exceptional cases, it's possible the underlying native methods used by
39
+ * <p> In some exceptional cases, it's possible the underlying native methods used by
39
40
* {@link #getFreePhysicalMemorySize()}, {@link #getTotalPhysicalMemorySize()},
40
41
* {@link #getFreeSwapSpaceSize()}, and {@link #getTotalSwapSpaceSize()} can return a
41
42
* negative value. Because of this, we prevent those methods from returning negative values,
42
43
* returning 0 instead.
43
44
*
44
- * The OS can report a negative number in a number of cases:
45
- * - Non-supported OSes (HP-UX, or AIX)
46
- * - A failure of macOS to initialize host statistics
47
- * - An OS that does not support the {@code _SC_PHYS_PAGES} or {@code _SC_PAGE_SIZE} flags for the {@code sysconf()} linux kernel call
48
- * - An overflow of the product of {@code _SC_PHYS_PAGES} and {@code _SC_PAGE_SIZE}
49
- * - An error case retrieving these values from a linux kernel
50
- * - A non-standard libc implementation not implementing the required values
51
- * For a more exhaustive explanation, see https://github.com/elastic/elasticsearch/pull/42725
45
+ * <p>The OS can report a negative number in a number of cases:
46
+ *
47
+ * <ul>
48
+ * <li>Non-supported OSes (HP-UX, or AIX)
49
+ * <li>A failure of macOS to initialize host statistics
50
+ * <li>An OS that does not support the {@code _SC_PHYS_PAGES} or {@code _SC_PAGE_SIZE} flags for the {@code sysconf()} linux kernel call
51
+ * <li>An overflow of the product of {@code _SC_PHYS_PAGES} and {@code _SC_PAGE_SIZE}
52
+ * <li>An error case retrieving these values from a linux kernel
53
+ * <li>A non-standard libc implementation not implementing the required values
54
+ * </ul>
55
+ *
56
+ * <p>For a more exhaustive explanation, see <a href="https://github.com/elastic/elasticsearch/pull/42725"
57
+ * >https://github.com/elastic/elasticsearch/pull/42725</a>
52
58
*/
53
59
public class OsProbe {
54
60
@@ -178,7 +184,7 @@ final double[] getSystemLoadAverage() {
178
184
final String procLoadAvg = readProcLoadavg ();
179
185
assert procLoadAvg .matches ("(\\ d+\\ .\\ d+\\ s+){3}\\ d+/\\ d+\\ s+\\ d+" );
180
186
final String [] fields = procLoadAvg .split ("\\ s+" );
181
- return new double []{ Double .parseDouble (fields [0 ]), Double .parseDouble (fields [1 ]), Double .parseDouble (fields [2 ])};
187
+ return new double [] { Double .parseDouble (fields [0 ]), Double .parseDouble (fields [1 ]), Double .parseDouble (fields [2 ]) };
182
188
} catch (final IOException e ) {
183
189
if (logger .isDebugEnabled ()) {
184
190
logger .debug ("error reading /proc/loadavg" , e );
@@ -192,7 +198,7 @@ final double[] getSystemLoadAverage() {
192
198
}
193
199
try {
194
200
final double oneMinuteLoadAverage = (double ) getSystemLoadAverage .invoke (osMxBean );
195
- return new double []{ oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1 , -1 , -1 };
201
+ return new double [] { oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1 , -1 , -1 };
196
202
} catch (IllegalAccessException | InvocationTargetException e ) {
197
203
if (logger .isDebugEnabled ()) {
198
204
logger .debug ("error reading one minute load average from operating system" , e );
@@ -318,6 +324,23 @@ String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOEx
318
324
return readSingleLine (PathUtils .get ("/sys/fs/cgroup/cpuacct" , controlGroup , "cpuacct.usage" ));
319
325
}
320
326
327
+ private long [] getCgroupV2CpuLimit (String controlGroup ) throws IOException {
328
+ String entry = readCgroupV2CpuLimit (controlGroup );
329
+ String [] parts = entry .split ("\\ s+" );
330
+ assert parts .length == 2 : "Expected 2 fields in [cpu.max]" ;
331
+
332
+ long [] values = new long [2 ];
333
+
334
+ values [0 ] = "max" .equals (parts [0 ]) ? -1L : Long .parseLong (parts [0 ]);
335
+ values [1 ] = Long .parseLong (parts [1 ]);
336
+ return values ;
337
+ }
338
+
339
+ @ SuppressForbidden (reason = "access /sys/fs/cgroup/cpu.max" )
340
+ String readCgroupV2CpuLimit (String controlGroup ) throws IOException {
341
+ return readSingleLine (PathUtils .get ("/sys/fs/cgroup/" , controlGroup , "cpu.max" ));
342
+ }
343
+
321
344
/**
322
345
* The total period of time in microseconds for how frequently the Elasticsearch control group's access to CPU resources will be
323
346
* reallocated.
@@ -454,6 +477,35 @@ String readSysFsCgroupMemoryLimitInBytes(final String controlGroup) throws IOExc
454
477
return readSingleLine (PathUtils .get ("/sys/fs/cgroup/memory" , controlGroup , "memory.limit_in_bytes" ));
455
478
}
456
479
480
+ /**
481
+ * The maximum amount of user memory (including file cache).
482
+ * If there is no limit then some Linux versions return the maximum value that can be stored in an
483
+ * unsigned 64 bit number, and this will overflow a long, hence the result type is <code>String</code>.
484
+ * (The alternative would have been <code>BigInteger</code> but then it would not be possible to index
485
+ * the OS stats document into Elasticsearch without losing information, as <code>BigInteger</code> is
486
+ * not a supported Elasticsearch type.)
487
+ *
488
+ * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
489
+ * @return the maximum amount of user memory (including file cache)
490
+ * @throws IOException if an I/O exception occurs reading {@code memory.limit_in_bytes} for the control group
491
+ */
492
+ private String getCgroupV2MemoryLimitInBytes (final String controlGroup ) throws IOException {
493
+ return readSysFsCgroupV2MemoryLimitInBytes (controlGroup );
494
+ }
495
+
496
+ /**
497
+ * Returns the line from {@code memory.max} for the control group to which the Elasticsearch process belongs for the
498
+ * {@code memory} subsystem. This line represents the maximum amount of user memory (including file cache).
499
+ *
500
+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
501
+ * @return the line from {@code memory.max}
502
+ * @throws IOException if an I/O exception occurs reading {@code memory.max} for the control group
503
+ */
504
+ @ SuppressForbidden (reason = "access /sys/fs/cgroup/memory.max" )
505
+ String readSysFsCgroupV2MemoryLimitInBytes (final String controlGroup ) throws IOException {
506
+ return readSingleLine (PathUtils .get ("/sys/fs/cgroup/" , controlGroup , "memory.max" ));
507
+ }
508
+
457
509
/**
458
510
* The total current memory usage by processes in the cgroup (in bytes).
459
511
* If there is no limit then some Linux versions return the maximum value that can be stored in an
@@ -483,27 +535,94 @@ String readSysFsCgroupMemoryUsageInBytes(final String controlGroup) throws IOExc
483
535
return readSingleLine (PathUtils .get ("/sys/fs/cgroup/memory" , controlGroup , "memory.usage_in_bytes" ));
484
536
}
485
537
538
+ /**
539
+ * The total current memory usage by processes in the cgroup (in bytes).
540
+ * If there is no limit then some Linux versions return the maximum value that can be stored in an
541
+ * unsigned 64 bit number, and this will overflow a long, hence the result type is <code>String</code>.
542
+ * (The alternative would have been <code>BigInteger</code> but then it would not be possible to index
543
+ * the OS stats document into Elasticsearch without losing information, as <code>BigInteger</code> is
544
+ * not a supported Elasticsearch type.)
545
+ *
546
+ * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
547
+ * @return the total current memory usage by processes in the cgroup (in bytes)
548
+ * @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group
549
+ */
550
+ private String getCgroupV2MemoryUsageInBytes (final String controlGroup ) throws IOException {
551
+ return readSysFsCgroupV2MemoryUsageInBytes (controlGroup );
552
+ }
553
+
554
+ /**
555
+ * Returns the line from {@code memory.current} for the control group to which the Elasticsearch process belongs for the
556
+ * {@code memory} subsystem. This line represents the total current memory usage by processes in the cgroup (in bytes).
557
+ *
558
+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
559
+ * @return the line from {@code memory.current}
560
+ * @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group
561
+ */
562
+ @ SuppressForbidden (reason = "access /sys/fs/cgroup/memory.current" )
563
+ String readSysFsCgroupV2MemoryUsageInBytes (final String controlGroup ) throws IOException {
564
+ return readSingleLine (PathUtils .get ("/sys/fs/cgroup/" , controlGroup , "memory.current" ));
565
+ }
566
+
486
567
/**
487
568
* Checks if cgroup stats are available by checking for the existence of {@code /proc/self/cgroup}, {@code /sys/fs/cgroup/cpu},
488
569
* {@code /sys/fs/cgroup/cpuacct} and {@code /sys/fs/cgroup/memory}.
489
570
*
490
571
* @return {@code true} if the stats are available, otherwise {@code false}
491
572
*/
492
573
@ SuppressForbidden (reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, /sys/fs/cgroup/cpuacct and /sys/fs/cgroup/memory" )
493
- boolean areCgroupStatsAvailable () {
574
+ boolean areCgroupStatsAvailable () throws IOException {
494
575
if (Files .exists (PathUtils .get ("/proc/self/cgroup" )) == false ) {
495
576
return false ;
496
577
}
497
- if (Files .exists (PathUtils .get ("/sys/fs/cgroup/cpu" )) == false ) {
498
- return false ;
499
- }
500
- if (Files .exists (PathUtils .get ("/sys/fs/cgroup/cpuacct" )) == false ) {
501
- return false ;
578
+
579
+ List <String > lines = readProcSelfCgroup ();
580
+
581
+ // cgroup v2
582
+ if (lines .size () == 1 && lines .get (0 ).startsWith ("0::" )) {
583
+ return Stream .of ("/sys/fs/cgroup/cpu.stat" , "/sys/fs/cgroup/memory.stat" ).allMatch (path -> Files .exists (PathUtils .get (path )));
502
584
}
503
- if (Files .exists (PathUtils .get ("/sys/fs/cgroup/memory" )) == false ) {
504
- return false ;
585
+
586
+ return Stream .of ("/sys/fs/cgroup/cpu" , "/sys/fs/cgroup/cpuacct" , "/sys/fs/cgroup/memory" )
587
+ .allMatch (path -> Files .exists (PathUtils .get (path )));
588
+ }
589
+
590
+ /**
591
+ * The CPU statistics for all tasks in the Elasticsearch control group.
592
+ *
593
+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
594
+ * @return the CPU statistics
595
+ * @throws IOException if an I/O exception occurs reading {@code cpu.stat} for the control group
596
+ */
597
+ private Map <String , Long > getCgroupV2CpuStats (String controlGroup ) throws IOException {
598
+ final List <String > lines = readCgroupV2CpuStats (controlGroup );
599
+ final Map <String , Long > stats = new HashMap <>();
600
+
601
+ for (String line : lines ) {
602
+ String [] parts = line .split ("\\ s+" );
603
+ assert parts .length == 2 : "Corrupt cpu.stat line: [" + line + "]" ;
604
+ stats .put (parts [0 ], Long .parseLong (parts [1 ]));
505
605
}
506
- return true ;
606
+
607
+ final List <String > expectedKeys = org .elasticsearch .core .List .of (
608
+ "nr_periods" ,
609
+ "nr_throttled" ,
610
+ "system_usec" ,
611
+ "throttled_usec" ,
612
+ "usage_usec" ,
613
+ "user_usec"
614
+ );
615
+ expectedKeys .forEach (key -> {
616
+ assert stats .containsKey (key ) : key ;
617
+ assert stats .get (key ) != -1 : stats .get (key );
618
+ });
619
+
620
+ return stats ;
621
+ }
622
+
623
+ @ SuppressForbidden (reason = "access /sys/fs/cgroup/cpu.stat" )
624
+ List <String > readCgroupV2CpuStats (final String controlGroup ) throws IOException {
625
+ return Files .readAllLines (PathUtils .get ("/sys/fs/cgroup" , controlGroup , "cpu.stat" ));
507
626
}
508
627
509
628
/**
@@ -515,45 +634,79 @@ private OsStats.Cgroup getCgroup() {
515
634
try {
516
635
if (areCgroupStatsAvailable () == false ) {
517
636
return null ;
518
- } else {
519
- final Map <String , String > controllerMap = getControlGroups ();
520
- assert controllerMap .isEmpty () == false ;
637
+ }
638
+
639
+ final Map <String , String > controllerMap = getControlGroups ();
640
+ assert controllerMap .isEmpty () == false ;
521
641
522
- final String cpuAcctControlGroup = controllerMap .get ("cpuacct" );
642
+ final String cpuAcctControlGroup ;
643
+ final long cgroupCpuAcctUsageNanos ;
644
+ final long cgroupCpuAcctCpuCfsPeriodMicros ;
645
+ final long cgroupCpuAcctCpuCfsQuotaMicros ;
646
+ final String cpuControlGroup ;
647
+ final OsStats .Cgroup .CpuStat cpuStat ;
648
+ final String memoryControlGroup ;
649
+ final String cgroupMemoryLimitInBytes ;
650
+ final String cgroupMemoryUsageInBytes ;
651
+
652
+ if (controllerMap .size () == 1 && controllerMap .containsKey ("" )) {
653
+ // There's a single hierarchy for all controllers
654
+ cpuControlGroup = cpuAcctControlGroup = memoryControlGroup = controllerMap .get ("" );
655
+
656
+ // `cpuacct` was merged with `cpu` in v2
657
+ final Map <String , Long > cpuStatsMap = getCgroupV2CpuStats (cpuControlGroup );
658
+
659
+ cgroupCpuAcctUsageNanos = cpuStatsMap .get ("usage_usec" );
660
+
661
+ long [] cpuLimits = getCgroupV2CpuLimit (cpuControlGroup );
662
+ cgroupCpuAcctCpuCfsQuotaMicros = cpuLimits [0 ];
663
+ cgroupCpuAcctCpuCfsPeriodMicros = cpuLimits [1 ];
664
+
665
+ cpuStat = new OsStats .Cgroup .CpuStat (
666
+ cpuStatsMap .get ("nr_periods" ),
667
+ cpuStatsMap .get ("nr_throttled" ),
668
+ cpuStatsMap .get ("throttled_usec" )
669
+ );
670
+
671
+ cgroupMemoryLimitInBytes = getCgroupV2MemoryLimitInBytes (memoryControlGroup );
672
+ cgroupMemoryUsageInBytes = getCgroupV2MemoryUsageInBytes (memoryControlGroup );
673
+ } else {
674
+ cpuAcctControlGroup = controllerMap .get ("cpuacct" );
523
675
if (cpuAcctControlGroup == null ) {
524
676
logger .debug ("no [cpuacct] data found in cgroup stats" );
525
677
return null ;
526
678
}
527
- final long cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos (cpuAcctControlGroup );
679
+ cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos (cpuAcctControlGroup );
528
680
529
- final String cpuControlGroup = controllerMap .get ("cpu" );
681
+ cpuControlGroup = controllerMap .get ("cpu" );
530
682
if (cpuControlGroup == null ) {
531
683
logger .debug ("no [cpu] data found in cgroup stats" );
532
684
return null ;
533
685
}
534
- final long cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros (cpuControlGroup );
535
- final long cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros (cpuControlGroup );
536
- final OsStats . Cgroup . CpuStat cpuStat = getCgroupCpuAcctCpuStat (cpuControlGroup );
686
+ cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros (cpuControlGroup );
687
+ cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros (cpuControlGroup );
688
+ cpuStat = getCgroupCpuAcctCpuStat (cpuControlGroup );
537
689
538
- final String memoryControlGroup = controllerMap .get ("memory" );
690
+ memoryControlGroup = controllerMap .get ("memory" );
539
691
if (memoryControlGroup == null ) {
540
692
logger .debug ("no [memory] data found in cgroup stats" );
541
693
return null ;
542
694
}
543
- final String cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes (memoryControlGroup );
544
- final String cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes (memoryControlGroup );
545
-
546
- return new OsStats .Cgroup (
547
- cpuAcctControlGroup ,
548
- cgroupCpuAcctUsageNanos ,
549
- cpuControlGroup ,
550
- cgroupCpuAcctCpuCfsPeriodMicros ,
551
- cgroupCpuAcctCpuCfsQuotaMicros ,
552
- cpuStat ,
553
- memoryControlGroup ,
554
- cgroupMemoryLimitInBytes ,
555
- cgroupMemoryUsageInBytes );
695
+ cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes (memoryControlGroup );
696
+ cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes (memoryControlGroup );
556
697
}
698
+
699
+ return new OsStats .Cgroup (
700
+ cpuAcctControlGroup ,
701
+ cgroupCpuAcctUsageNanos ,
702
+ cpuControlGroup ,
703
+ cgroupCpuAcctCpuCfsPeriodMicros ,
704
+ cgroupCpuAcctCpuCfsQuotaMicros ,
705
+ cpuStat ,
706
+ memoryControlGroup ,
707
+ cgroupMemoryLimitInBytes ,
708
+ cgroupMemoryUsageInBytes
709
+ );
557
710
} catch (final IOException e ) {
558
711
logger .debug ("error reading control group stats" , e );
559
712
return null ;
@@ -576,13 +729,14 @@ public static OsProbe getInstance() {
576
729
577
730
OsInfo osInfo (long refreshInterval , int allocatedProcessors ) throws IOException {
578
731
return new OsInfo (
579
- refreshInterval ,
580
- Runtime .getRuntime ().availableProcessors (),
581
- allocatedProcessors ,
582
- Constants .OS_NAME ,
583
- getPrettyName (),
584
- Constants .OS_ARCH ,
585
- Constants .OS_VERSION );
732
+ refreshInterval ,
733
+ Runtime .getRuntime ().availableProcessors (),
734
+ allocatedProcessors ,
735
+ Constants .OS_NAME ,
736
+ getPrettyName (),
737
+ Constants .OS_ARCH ,
738
+ Constants .OS_VERSION
739
+ );
586
740
}
587
741
588
742
private String getPrettyName () throws IOException {
@@ -594,11 +748,13 @@ private String getPrettyName() throws IOException {
594
748
* wrapped in single- or double-quotes.
595
749
*/
596
750
final List <String > etcOsReleaseLines = readOsRelease ();
597
- final List <String > prettyNameLines =
598
- etcOsReleaseLines .stream ().filter (line -> line .startsWith ("PRETTY_NAME" )).collect (Collectors .toList ());
751
+ final List <String > prettyNameLines = etcOsReleaseLines .stream ()
752
+ .filter (line -> line .startsWith ("PRETTY_NAME" ))
753
+ .collect (Collectors .toList ());
599
754
assert prettyNameLines .size () <= 1 : prettyNameLines ;
600
- final Optional <String > maybePrettyNameLine =
601
- prettyNameLines .size () == 1 ? Optional .of (prettyNameLines .get (0 )) : Optional .empty ();
755
+ final Optional <String > maybePrettyNameLine = prettyNameLines .size () == 1
756
+ ? Optional .of (prettyNameLines .get (0 ))
757
+ : Optional .empty ();
602
758
if (maybePrettyNameLine .isPresent ()) {
603
759
// we trim since some OS contain trailing space, for example, Oracle Linux Server 6.9 has a trailing space after the quote
604
760
final String trimmedPrettyNameLine = maybePrettyNameLine .get ().trim ();
@@ -695,11 +851,15 @@ boolean isDebian8() throws IOException {
695
851
return Constants .LINUX && getPrettyName ().equals ("Debian GNU/Linux 8 (jessie)" );
696
852
}
697
853
854
+ OsStats .Cgroup getCgroup (boolean isLinux ) {
855
+ return isLinux ? getCgroup () : null ;
856
+ }
857
+
698
858
public OsStats osStats () {
699
859
final OsStats .Cpu cpu = new OsStats .Cpu (getSystemCpuPercent (), getSystemLoadAverage ());
700
860
final OsStats .Mem mem = new OsStats .Mem (getTotalPhysicalMemorySize (), getFreePhysicalMemorySize ());
701
861
final OsStats .Swap swap = new OsStats .Swap (getTotalSwapSpaceSize (), getFreeSwapSpaceSize ());
702
- final OsStats .Cgroup cgroup = Constants .LINUX ? getCgroup () : null ;
862
+ final OsStats .Cgroup cgroup = getCgroup ( Constants .LINUX ) ;
703
863
return new OsStats (System .currentTimeMillis (), cpu , mem , swap , cgroup );
704
864
}
705
865
0 commit comments