Skip to content

Commit 20c931e

Browse files
github-actions[bot]niver2d2joperezr
authored
[release/9.4] Improve CPU metrics calculations for CgroupV2 (#6318)
* Getting metrics from path proc/self/cgroup * Calculate usage by considering actual time elapsed since last usage calculation * Decoupling actual slices read logic in V2 Parser. * Fixing Broken Tests * Mark Experimental * A few test cases * Nanoseconds for time interval * Remove Min to reflect actual usages * Resolving comments and adding tests. * Resolving comments and test fixes * Correctness Warning minor fix * Fixing conflicting correctness warnings * Minor change to retrigger pipelines * New log line and minor fixes * Acceptance test * Increasing patch version for 9.4.1 --------- Co-authored-by: Nidhi Verma <nive@microsoft.com> Co-authored-by: Jose Perez Rodriguez <joperezr@microsoft.com>
1 parent 779cae3 commit 20c931e

File tree

52 files changed

+575
-85
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+575
-85
lines changed

eng/Versions.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<PropertyGroup Label="Version settings">
33
<MajorVersion>9</MajorVersion>
44
<MinorVersion>4</MinorVersion>
5-
<PatchVersion>0</PatchVersion>
5+
<PatchVersion>1</PatchVersion>
66
<PreReleaseVersionLabel>preview</PreReleaseVersionLabel>
77
<PreReleaseVersionIteration>1</PreReleaseVersionIteration>
88
<VersionPrefix>$(MajorVersion).$(MinorVersion).$(PatchVersion)</VersionPrefix>

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/ILinuxUtilizationParser.cs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ internal interface ILinuxUtilizationParser
2323
/// <returns>nanoseconds.</returns>
2424
long GetCgroupCpuUsageInNanoseconds();
2525

26+
/// <summary>
27+
/// For CgroupV2 only and experimental. Reads the file cpu.stat based on /proc/self/cgroup, which is part of the cgroup v2 CPU controller.
28+
/// It provides statistics about the CPU usage of a cgroup from its actual slice.
29+
/// </summary>
30+
/// <returns>nanoseconds.</returns>
31+
long GetCgroupCpuUsageInNanosecondsV2();
32+
2633
/// <summary>
2734
/// Reads the file /sys/fs/cgroup/cpu.max, which is part of the cgroup v2 CPU controller.
2835
/// It is used to set the maximum amount of CPU time that can be used by a cgroup.
@@ -33,6 +40,16 @@ internal interface ILinuxUtilizationParser
3340
/// <returns>cpuUnits.</returns>
3441
float GetCgroupLimitedCpus();
3542

43+
/// <summary>
44+
/// For CgroupV2 only and experimental. Reads the file cpu.max based on /proc/self/cgroup, which is part of the cgroup v2 CPU controller.
45+
/// It is used to set the maximum amount of CPU time that can be used by a cgroup from actual slice.
46+
/// The file contains two fields, separated by a space.
47+
/// The first field is the quota, which specifies the maximum amount of CPU time (in microseconds) that can be used by the cgroup during one period.
48+
/// The second value is the period, which specifies the length of a period in microseconds.
49+
/// </summary>
50+
/// <returns>cpuUnits.</returns>
51+
float GetCgroupLimitV2();
52+
3653
/// <summary>
3754
/// Reads the file /proc/stat, which provides information about the system’s memory usage.
3855
/// It contains information about the total amount of installed memory, the amount of free and used memory, and the amount of memory used by the kernel and buffers/cache.
@@ -66,4 +83,10 @@ internal interface ILinuxUtilizationParser
6683
/// </summary>
6784
/// <returns>cpuPodRequest.</returns>
6885
float GetCgroupRequestCpu();
86+
87+
/// <summary>
88+
/// For CgroupV2 only and experimental. Reads the file cpu.weight based on /proc/self/cgroup. And calculates the Pod CPU Request in millicores based on actual slice.
89+
/// </summary>
90+
/// <returns>cpuPodRequest.</returns>
91+
float GetCgroupRequestCpuV2();
6992
}

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationParserCgroupV1.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ public LinuxUtilizationParserCgroupV1(IFileSystem fileSystem, IUserHz userHz)
9494
_userHz = userHz.Value;
9595
}
9696

97+
public float GetCgroupLimitV2() => throw new NotSupportedException();
98+
public float GetCgroupRequestCpuV2() => throw new NotSupportedException();
99+
public long GetCgroupCpuUsageInNanosecondsV2() => throw new NotSupportedException();
100+
97101
public long GetCgroupCpuUsageInNanoseconds()
98102
{
99103
using ReturnableBufferWriter<char> bufferWriter = new(_sharedBufferWriterPool);

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationParserCgroupV2.cs

Lines changed: 154 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ internal sealed class LinuxUtilizationParserCgroupV2 : ILinuxUtilizationParser
2121
{
2222
private const int Thousand = 1000;
2323
private const int CpuShares = 1024;
24+
private const string CpuStat = "cpu.stat"; // File containing CPU usage in nanoseconds.
25+
private const string CpuLimit = "cpu.max"; // File with amount of CPU time available to the group along with the accounting period in microseconds.
26+
private const string CpuRequest = "cpu.weight"; // CPU weights, also known as shares in cgroup v1, is used for resource allocation.
2427
private static readonly ObjectPool<BufferWriter<char>> _sharedBufferWriterPool = BufferWriterPool.CreateBufferWriterPool<char>();
2528

2629
/// <remarks>
@@ -86,47 +89,76 @@ internal sealed class LinuxUtilizationParserCgroupV2 : ILinuxUtilizationParser
8689
/// </summary>
8790
private static readonly FileInfo _cpuPodWeight = new("/sys/fs/cgroup/cpu.weight");
8891

92+
private static readonly FileInfo _cpuCgroupInfoFile = new("/proc/self/cgroup");
93+
8994
private readonly IFileSystem _fileSystem;
9095
private readonly long _userHz;
9196

97+
// Cache for the trimmed path string to avoid repeated file reads and processing
98+
private string? _cachedCgroupPath;
99+
92100
public LinuxUtilizationParserCgroupV2(IFileSystem fileSystem, IUserHz userHz)
93101
{
94102
_fileSystem = fileSystem;
95103
_userHz = userHz.Value;
96104
}
97105

98-
public long GetCgroupCpuUsageInNanoseconds()
106+
public string GetCgroupPath(string filename)
99107
{
100-
// The value we are interested in starts with this. We just want to make sure it is true.
101-
const string Usage_usec = "usage_usec";
102-
103-
// If the file doesn't exist, we assume that the system is a Host and we read the CPU usage from /proc/stat.
104-
if (!_fileSystem.Exists(_cpuacctUsage))
108+
// If we've already parsed the path, use the cached value
109+
if (_cachedCgroupPath != null)
105110
{
106-
return GetHostCpuUsageInNanoseconds();
111+
return $"{_cachedCgroupPath}{filename}";
107112
}
108113

109114
using ReturnableBufferWriter<char> bufferWriter = new(_sharedBufferWriterPool);
110-
_fileSystem.ReadAll(_cpuacctUsage, bufferWriter.Buffer);
111-
ReadOnlySpan<char> usage = bufferWriter.Buffer.WrittenSpan;
112115

113-
if (!usage.StartsWith(Usage_usec))
116+
// Read the content of the file
117+
_fileSystem.ReadFirstLine(_cpuCgroupInfoFile, bufferWriter.Buffer);
118+
ReadOnlySpan<char> fileContent = bufferWriter.Buffer.WrittenSpan;
119+
120+
// Ensure the file content is not empty
121+
if (fileContent.IsEmpty)
114122
{
115-
Throw.InvalidOperationException($"Could not parse '{_cpuacctUsage}'. We expected first line of the file to start with '{Usage_usec}' but it was '{new string(usage)}' instead.");
123+
Throw.InvalidOperationException($"The file '{_cpuCgroupInfoFile}' is empty or could not be read.");
116124
}
117125

118-
ReadOnlySpan<char> cpuUsage = usage.Slice(Usage_usec.Length, usage.Length - Usage_usec.Length);
126+
// Find the index of the first colon (:)
127+
int colonIndex = fileContent.LastIndexOf(':');
128+
if (colonIndex == -1 || colonIndex + 1 >= fileContent.Length)
129+
{
130+
Throw.InvalidOperationException($"Invalid format in file '{_cpuCgroupInfoFile}'. Expected content with ':' separator.");
131+
}
119132

120-
int next = GetNextNumber(cpuUsage, out long microseconds);
133+
// Extract the part after the last colon and cache it for future use
134+
ReadOnlySpan<char> trimmedPath = fileContent.Slice(colonIndex + 1);
135+
_cachedCgroupPath = "/sys/fs/cgroup" + trimmedPath.ToString().TrimEnd('/') + "/";
121136

122-
if (microseconds == -1)
137+
return $"{_cachedCgroupPath}{filename}";
138+
}
139+
140+
public long GetCgroupCpuUsageInNanoseconds()
141+
{
142+
// If the file doesn't exist, we assume that the system is a Host and we read the CPU usage from /proc/stat.
143+
if (!_fileSystem.Exists(_cpuacctUsage))
123144
{
124-
Throw.InvalidOperationException($"Could not get cpu usage from '{_cpuacctUsage}'. Expected positive number, but got '{new string(usage)}'.");
145+
return GetHostCpuUsageInNanoseconds();
125146
}
126147

127-
// In cgroup v2, the Units are microseconds for usage_usec.
128-
// We multiply by 1000 to convert to nanoseconds to keep the common calculation logic.
129-
return microseconds * Thousand;
148+
return ParseCpuUsageFromFile(_fileSystem, _cpuacctUsage);
149+
}
150+
151+
public long GetCgroupCpuUsageInNanosecondsV2()
152+
{
153+
FileInfo cpuUsageFile = new(GetCgroupPath(CpuStat));
154+
155+
// If the file doesn't exist, we assume that the system is a Host and we read the CPU usage from /proc/stat.
156+
if (!_fileSystem.Exists(cpuUsageFile))
157+
{
158+
return GetHostCpuUsageInNanoseconds();
159+
}
160+
161+
return ParseCpuUsageFromFile(_fileSystem, cpuUsageFile);
130162
}
131163

132164
public long GetHostCpuUsageInNanoseconds()
@@ -184,6 +216,22 @@ public float GetCgroupLimitedCpus()
184216
return GetHostCpuCount();
185217
}
186218

219+
/// <remarks>
220+
/// When CGroup limits are set, we can calculate number of cores based on the file settings.
221+
/// It should be 99% of the cases when app is hosted in the container environment.
222+
/// Otherwise, we assume that all host's CPUs are available, which we read from proc/stat file.
223+
/// </remarks>
224+
public float GetCgroupLimitV2()
225+
{
226+
FileInfo cpuLimitsFile = new(GetCgroupPath(CpuLimit));
227+
if (LinuxUtilizationParserCgroupV2.TryGetCpuLimitFromCgroupsV2(_fileSystem, cpuLimitsFile, out float cpus))
228+
{
229+
return cpus;
230+
}
231+
232+
return GetHostCpuCount();
233+
}
234+
187235
/// <remarks>
188236
/// If we are able to read the CPU share, we calculate the CPU request based on the weight by dividing it by 1024.
189237
/// If we can't read the CPU weight, we assume that the pod/vm cpu request is 1 core by default.
@@ -198,6 +246,21 @@ public float GetCgroupRequestCpu()
198246
return GetHostCpuCount();
199247
}
200248

249+
/// <remarks>
250+
/// If we are able to read the CPU share, we calculate the CPU request based on the weight by dividing it by 1024.
251+
/// If we can't read the CPU weight, we assume that the pod/vm cpu request is 1 core by default.
252+
/// </remarks>
253+
public float GetCgroupRequestCpuV2()
254+
{
255+
FileInfo cpuRequestsFile = new(GetCgroupPath(CpuRequest));
256+
if (TryGetCgroupRequestCpuV2(_fileSystem, cpuRequestsFile, out float cpuPodRequest))
257+
{
258+
return cpuPodRequest / CpuShares;
259+
}
260+
261+
return GetHostCpuCount();
262+
}
263+
201264
/// <remarks>
202265
/// If the file doesn't exist, we assume that the system is a Host and we read the memory from /proc/meminfo.
203266
/// </remarks>
@@ -447,6 +510,34 @@ static void ThrowException(ReadOnlySpan<char> content) =>
447510
$"Could not parse '{_cpuSetCpus}'. Expected comma-separated list of integers, with dashes (\"-\") based ranges (\"0\", \"2-6,12\") but got '{new string(content)}'.");
448511
}
449512

513+
private static long ParseCpuUsageFromFile(IFileSystem fileSystem, FileInfo cpuUsageFile)
514+
{
515+
// The value we are interested in starts with this. We just want to make sure it is true.
516+
const string Usage_usec = "usage_usec";
517+
518+
using ReturnableBufferWriter<char> bufferWriter = new(_sharedBufferWriterPool);
519+
fileSystem.ReadAll(cpuUsageFile, bufferWriter.Buffer);
520+
ReadOnlySpan<char> usage = bufferWriter.Buffer.WrittenSpan;
521+
522+
if (!usage.StartsWith(Usage_usec))
523+
{
524+
Throw.InvalidOperationException($"Could not parse '{cpuUsageFile}'. We expected first line of the file to start with '{Usage_usec}' but it was '{new string(usage)}' instead.");
525+
}
526+
527+
ReadOnlySpan<char> cpuUsage = usage.Slice(Usage_usec.Length, usage.Length - Usage_usec.Length);
528+
529+
int next = GetNextNumber(cpuUsage, out long microseconds);
530+
531+
if (microseconds == -1)
532+
{
533+
Throw.InvalidOperationException($"Could not get cpu usage from '{cpuUsageFile}'. Expected positive number, but got '{new string(usage)}'.");
534+
}
535+
536+
// In cgroup v2, the Units are microseconds for usage_usec.
537+
// We multiply by 1000 to convert to nanoseconds to keep the common calculation logic.
538+
return microseconds * Thousand;
539+
}
540+
450541
/// <remarks>
451542
/// The input must contain only number. If there is something more than whitespace before the number, it will return failure (-1).
452543
/// </remarks>
@@ -492,8 +583,27 @@ private static bool TryGetCpuUnitsFromCgroups(IFileSystem fileSystem, out float
492583
return false;
493584
}
494585

586+
return TryParseCpuQuotaAndPeriodFromFile(fileSystem, _cpuCfsQuaotaPeriodUs, out cpuUnits);
587+
}
588+
589+
/// <remarks>
590+
/// If the file doesn't exist, we assume that the system is a Host and we read the CPU usage from /proc/stat.
591+
/// </remarks>
592+
private static bool TryGetCpuLimitFromCgroupsV2(IFileSystem fileSystem, FileInfo cpuLimitsFile, out float cpuUnits)
593+
{
594+
if (!fileSystem.Exists(cpuLimitsFile))
595+
{
596+
cpuUnits = 0;
597+
return false;
598+
}
599+
600+
return TryParseCpuQuotaAndPeriodFromFile(fileSystem, cpuLimitsFile, out cpuUnits);
601+
}
602+
603+
private static bool TryParseCpuQuotaAndPeriodFromFile(IFileSystem fileSystem, FileInfo cpuLimitsFile, out float cpuUnits)
604+
{
495605
using ReturnableBufferWriter<char> bufferWriter = new(_sharedBufferWriterPool);
496-
fileSystem.ReadFirstLine(_cpuCfsQuaotaPeriodUs, bufferWriter.Buffer);
606+
fileSystem.ReadFirstLine(cpuLimitsFile, bufferWriter.Buffer);
497607

498608
ReadOnlySpan<char> quotaBuffer = bufferWriter.Buffer.WrittenSpan;
499609

@@ -513,7 +623,7 @@ private static bool TryGetCpuUnitsFromCgroups(IFileSystem fileSystem, out float
513623

514624
if (quota == -1)
515625
{
516-
Throw.InvalidOperationException($"Could not parse '{_cpuCfsQuaotaPeriodUs}'. Expected an integer but got: '{new string(quotaBuffer)}'.");
626+
Throw.InvalidOperationException($"Could not parse '{cpuLimitsFile}'. Expected an integer but got: '{new string(quotaBuffer)}'.");
517627
}
518628

519629
string quotaString = quota.ToString(CultureInfo.CurrentCulture);
@@ -523,7 +633,7 @@ private static bool TryGetCpuUnitsFromCgroups(IFileSystem fileSystem, out float
523633

524634
if (period == -1)
525635
{
526-
Throw.InvalidOperationException($"Could not parse '{_cpuCfsQuaotaPeriodUs}'. Expected to get an integer but got: '{new string(cpuPeriodSlice)}'.");
636+
Throw.InvalidOperationException($"Could not parse '{cpuLimitsFile}'. Expected to get an integer but got: '{new string(cpuPeriodSlice)}'.");
527637
}
528638

529639
cpuUnits = (float)quota / period;
@@ -533,37 +643,53 @@ private static bool TryGetCpuUnitsFromCgroups(IFileSystem fileSystem, out float
533643

534644
private static bool TryGetCgroupRequestCpu(IFileSystem fileSystem, out float cpuUnits)
535645
{
536-
const long CpuPodWeightPossibleMax = 10_000;
537-
const long CpuPodWeightPossibleMin = 1;
538-
539646
if (!fileSystem.Exists(_cpuPodWeight))
540647
{
541648
cpuUnits = 0;
542649
return false;
543650
}
544651

652+
return TryParseCpuWeightFromFile(fileSystem, _cpuPodWeight, out cpuUnits);
653+
}
654+
655+
private static bool TryGetCgroupRequestCpuV2(IFileSystem fileSystem, FileInfo cpuRequestsFile, out float cpuUnits)
656+
{
657+
if (!fileSystem.Exists(cpuRequestsFile))
658+
{
659+
cpuUnits = 0;
660+
return false;
661+
}
662+
663+
return TryParseCpuWeightFromFile(fileSystem, cpuRequestsFile, out cpuUnits);
664+
}
665+
666+
private static bool TryParseCpuWeightFromFile(IFileSystem fileSystem, FileInfo cpuWeightFile, out float cpuUnits)
667+
{
668+
const long CpuPodWeightPossibleMax = 10_000;
669+
const long CpuPodWeightPossibleMin = 1;
670+
545671
using ReturnableBufferWriter<char> bufferWriter = new(_sharedBufferWriterPool);
546-
fileSystem.ReadFirstLine(_cpuPodWeight, bufferWriter.Buffer);
672+
fileSystem.ReadFirstLine(cpuWeightFile, bufferWriter.Buffer);
547673
ReadOnlySpan<char> cpuPodWeightBuffer = bufferWriter.Buffer.WrittenSpan;
548674

549675
if (cpuPodWeightBuffer.IsEmpty || (cpuPodWeightBuffer.Length == 2 && cpuPodWeightBuffer[0] == '-' && cpuPodWeightBuffer[1] == '1'))
550676
{
551677
Throw.InvalidOperationException(
552-
$"Could not parse '{_cpuPodWeight}' content. Expected to find CPU weight but got '{new string(cpuPodWeightBuffer)}' instead.");
678+
$"Could not parse '{cpuWeightFile}' content. Expected to find CPU weight but got '{new string(cpuPodWeightBuffer)}' instead.");
553679
}
554680

555681
_ = GetNextNumber(cpuPodWeightBuffer, out long cpuPodWeight);
556682

557683
if (cpuPodWeight == -1)
558684
{
559685
Throw.InvalidOperationException(
560-
$"Could not parse '{_cpuPodWeight}' content. Expected to get an integer but got: '{cpuPodWeightBuffer}'.");
686+
$"Could not parse '{cpuWeightFile}' content. Expected to get an integer but got: '{cpuPodWeightBuffer}'.");
561687
}
562688

563689
if (cpuPodWeight < CpuPodWeightPossibleMin || cpuPodWeight > CpuPodWeightPossibleMax)
564690
{
565691
Throw.ArgumentOutOfRangeException("CPU weight",
566-
$"Expected to find CPU weight in range [{CpuPodWeightPossibleMin}-{CpuPodWeightPossibleMax}] in '{_cpuPodWeight}', but got '{cpuPodWeight}' instead.");
692+
$"Expected to find CPU weight in range [{CpuPodWeightPossibleMin}-{CpuPodWeightPossibleMax}] in '{cpuWeightFile}', but got '{cpuPodWeight}' instead.");
567693
}
568694

569695
// The formula to calculate CPU pod weight (measured in millicores) from CPU share:

0 commit comments

Comments
 (0)