Skip to content

Commit aa55cff

Browse files
update, wip
1 parent ecc6d5d commit aa55cff

File tree

1 file changed

+41
-23
lines changed

1 file changed

+41
-23
lines changed

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
using System;
55
using System.Collections.Generic;
66
using System.Diagnostics.Metrics;
7-
using System.Linq;
87
using System.Threading;
98
using Microsoft.Extensions.Logging;
109
using Microsoft.Extensions.Logging.Abstractions;
@@ -17,6 +16,7 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
1716
{
1817
private const double One = 1.0;
1918
private const long Hundred = 100L;
19+
private const double NanosecondsInSecond = 1_000_000_000;
2020

2121
private readonly object _cpuLocker = new();
2222
private readonly object _memoryLocker = new();
@@ -82,41 +82,46 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
8282
(_previousCgroupCpuTime, _previousCgroupCpuPeriodCounter) = _parser.GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2();
8383

8484
_ = meter.CreateObservableGauge(
85-
ResourceUtilizationInstruments.ContainerCpuLimitUtilization,
86-
() => GetMeasurementWithRetry(() => CpuUtilizationLimit(cpuLimit)),
87-
"1");
85+
name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization,
86+
observeValues: () => GetMeasurementsWithRetry(() => CpuUtilizationLimit(cpuLimit)),
87+
unit: "1");
8888

8989
_ = meter.CreateObservableGauge(
9090
name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization,
91-
observeValues: () => GetMeasurementWithRetry(() => CpuUtilizationRequest(cpuRequest)),
91+
observeValues: () => GetMeasurementsWithRetry(() => CpuUtilizationRequest(cpuRequest)),
92+
unit: "1");
93+
94+
_ = meter.CreateObservableGauge(
95+
name: ResourceUtilizationInstruments.ContainerCpuTime,
96+
observeValues: GetCpuTime,
9297
unit: "1");
9398
}
9499
else
95100
{
96101
_ = meter.CreateObservableGauge(
97102
name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization,
98-
observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * scaleRelativeToCpuLimit),
103+
observeValues: () => GetMeasurementsWithRetry(() => CpuUtilization() * scaleRelativeToCpuLimit),
99104
unit: "1");
100105

101106
_ = meter.CreateObservableGauge(
102107
name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization,
103-
observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * scaleRelativeToCpuRequest),
108+
observeValues: () => GetMeasurementsWithRetry(() => CpuUtilization() * scaleRelativeToCpuRequest),
104109
unit: "1");
105110

106111
_ = meter.CreateObservableGauge(
107112
name: ResourceUtilizationInstruments.ProcessCpuUtilization,
108-
observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * scaleRelativeToCpuRequest),
113+
observeValues: () => GetMeasurementsWithRetry(() => CpuUtilization() * scaleRelativeToCpuRequest),
109114
unit: "1");
110115
}
111116

112117
_ = meter.CreateObservableGauge(
113118
name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization,
114-
observeValues: () => GetMeasurementWithRetry(() => MemoryUtilization()),
119+
observeValues: () => GetMeasurementsWithRetry(MemoryUtilization),
115120
unit: "1");
116121

117122
_ = meter.CreateObservableGauge(
118123
name: ResourceUtilizationInstruments.ProcessMemoryUtilization,
119-
observeValues: () => GetMeasurementWithRetry(() => MemoryUtilization()),
124+
observeValues: () => GetMeasurementsWithRetry(MemoryUtilization),
120125
unit: "1");
121126

122127
// cpuRequest is a CPU request (aka guaranteed number of CPU units) for pod, for host its 1 core
@@ -259,23 +264,32 @@ public Snapshot GetSnapshot()
259264
memoryUsageInBytes: memoryUsed);
260265
}
261266

262-
private IEnumerable<Measurement<double>> GetMeasurementWithRetry(Func<double> func)
267+
private Measurement<double>[] GetMeasurementsWithRetry(Func<double> func)
268+
{
269+
if (!TryGetValueWithRetry(func, out double value))
270+
{
271+
return Array.Empty<Measurement<double>>();
272+
}
273+
274+
return new[] { new Measurement<double>(value) };
275+
}
276+
277+
private bool TryGetValueWithRetry<T>(Func<T> func, out T value)
278+
where T : struct
263279
{
280+
value = default;
264281
if (Volatile.Read(ref _measurementsUnavailable) == 1 &&
265282
_timeProvider.GetUtcNow() - _lastFailure < _retryInterval)
266283
{
267-
return Enumerable.Empty<Measurement<double>>();
284+
return false;
268285
}
269286

270287
try
271288
{
272-
double result = func();
273-
if (Volatile.Read(ref _measurementsUnavailable) == 1)
274-
{
275-
_ = Interlocked.Exchange(ref _measurementsUnavailable, 0);
276-
}
289+
value = func();
290+
_ = Interlocked.CompareExchange(ref _measurementsUnavailable, 0, 1);
277291

278-
return new[] { new Measurement<double>(result) };
292+
return true;
279293
}
280294
catch (Exception ex) when (
281295
ex is System.IO.FileNotFoundException ||
@@ -285,7 +299,7 @@ ex is System.IO.DirectoryNotFoundException ||
285299
_lastFailure = _timeProvider.GetUtcNow();
286300
_ = Interlocked.Exchange(ref _measurementsUnavailable, 1);
287301

288-
return Enumerable.Empty<Measurement<double>>();
302+
return false;
289303
}
290304
}
291305

@@ -296,10 +310,14 @@ ex is System.IO.DirectoryNotFoundException ||
296310

297311
private IEnumerable<Measurement<double>> GetCpuTime()
298312
{
299-
long hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
300-
double cgroupCpuTime = CpuUtilizationWithoutHostDelta();
313+
if (TryGetValueWithRetry(_parser.GetHostCpuUsageInNanoseconds, out long systemCpuTime))
314+
{
315+
yield return new Measurement<double>(systemCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
316+
}
301317

302-
yield return new Measurement<double>(cgroupCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
303-
yield return new Measurement<double>(hostCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
318+
if (TryGetValueWithRetry(CpuUtilizationV2, out double userCpuTime))
319+
{
320+
yield return new Measurement<double>(userCpuTime, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
321+
}
304322
}
305323
}

0 commit comments

Comments
 (0)