4
4
using System ;
5
5
using System . Collections . Generic ;
6
6
using System . Diagnostics . Metrics ;
7
- using System . Linq ;
8
7
using System . Threading ;
9
8
using Microsoft . Extensions . Logging ;
10
9
using Microsoft . Extensions . Logging . Abstractions ;
@@ -17,6 +16,7 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
17
16
{
18
17
private const double One = 1.0 ;
19
18
private const long Hundred = 100L ;
19
+ private const double NanosecondsInSecond = 1_000_000_000 ;
20
20
21
21
private readonly object _cpuLocker = new ( ) ;
22
22
private readonly object _memoryLocker = new ( ) ;
@@ -82,41 +82,46 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
82
82
( _previousCgroupCpuTime , _previousCgroupCpuPeriodCounter ) = _parser . GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2 ( ) ;
83
83
84
84
_ = meter . CreateObservableGauge (
85
- ResourceUtilizationInstruments . ContainerCpuLimitUtilization ,
86
- ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilizationLimit ( cpuLimit ) ) ,
87
- "1" ) ;
85
+ name : ResourceUtilizationInstruments . ContainerCpuLimitUtilization ,
86
+ observeValues : ( ) => GetMeasurementsWithRetry ( ( ) => CpuUtilizationLimit ( cpuLimit ) ) ,
87
+ unit : "1" ) ;
88
88
89
89
_ = meter . CreateObservableGauge (
90
90
name : ResourceUtilizationInstruments . ContainerCpuRequestUtilization ,
91
- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilizationRequest ( cpuRequest ) ) ,
91
+ observeValues : ( ) => GetMeasurementsWithRetry ( ( ) => CpuUtilizationRequest ( cpuRequest ) ) ,
92
+ unit : "1" ) ;
93
+
94
+ _ = meter . CreateObservableGauge (
95
+ name : ResourceUtilizationInstruments . ContainerCpuTime ,
96
+ observeValues : GetCpuTime ,
92
97
unit : "1" ) ;
93
98
}
94
99
else
95
100
{
96
101
_ = meter . CreateObservableGauge (
97
102
name : ResourceUtilizationInstruments . ContainerCpuLimitUtilization ,
98
- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilization ( ) * scaleRelativeToCpuLimit ) ,
103
+ observeValues : ( ) => GetMeasurementsWithRetry ( ( ) => CpuUtilization ( ) * scaleRelativeToCpuLimit ) ,
99
104
unit : "1" ) ;
100
105
101
106
_ = meter . CreateObservableGauge (
102
107
name : ResourceUtilizationInstruments . ContainerCpuRequestUtilization ,
103
- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilization ( ) * scaleRelativeToCpuRequest ) ,
108
+ observeValues : ( ) => GetMeasurementsWithRetry ( ( ) => CpuUtilization ( ) * scaleRelativeToCpuRequest ) ,
104
109
unit : "1" ) ;
105
110
106
111
_ = meter . CreateObservableGauge (
107
112
name : ResourceUtilizationInstruments . ProcessCpuUtilization ,
108
- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilization ( ) * scaleRelativeToCpuRequest ) ,
113
+ observeValues : ( ) => GetMeasurementsWithRetry ( ( ) => CpuUtilization ( ) * scaleRelativeToCpuRequest ) ,
109
114
unit : "1" ) ;
110
115
}
111
116
112
117
_ = meter . CreateObservableGauge (
113
118
name : ResourceUtilizationInstruments . ContainerMemoryLimitUtilization ,
114
- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => MemoryUtilization ( ) ) ,
119
+ observeValues : ( ) => GetMeasurementsWithRetry ( MemoryUtilization ) ,
115
120
unit : "1" ) ;
116
121
117
122
_ = meter . CreateObservableGauge (
118
123
name : ResourceUtilizationInstruments . ProcessMemoryUtilization ,
119
- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => MemoryUtilization ( ) ) ,
124
+ observeValues : ( ) => GetMeasurementsWithRetry ( MemoryUtilization ) ,
120
125
unit : "1" ) ;
121
126
122
127
// cpuRequest is a CPU request (aka guaranteed number of CPU units) for pod, for host its 1 core
@@ -259,23 +264,32 @@ public Snapshot GetSnapshot()
259
264
memoryUsageInBytes : memoryUsed ) ;
260
265
}
261
266
262
- private IEnumerable < Measurement < double > > GetMeasurementWithRetry ( Func < double > func )
267
+ private Measurement < double > [ ] GetMeasurementsWithRetry ( Func < double > func )
268
+ {
269
+ if ( ! TryGetValueWithRetry ( func , out double value ) )
270
+ {
271
+ return Array . Empty < Measurement < double > > ( ) ;
272
+ }
273
+
274
+ return new [ ] { new Measurement < double > ( value ) } ;
275
+ }
276
+
277
+ private bool TryGetValueWithRetry < T > ( Func < T > func , out T value )
278
+ where T : struct
263
279
{
280
+ value = default ;
264
281
if ( Volatile . Read ( ref _measurementsUnavailable ) == 1 &&
265
282
_timeProvider . GetUtcNow ( ) - _lastFailure < _retryInterval )
266
283
{
267
- return Enumerable . Empty < Measurement < double > > ( ) ;
284
+ return false ;
268
285
}
269
286
270
287
try
271
288
{
272
- double result = func ( ) ;
273
- if ( Volatile . Read ( ref _measurementsUnavailable ) == 1 )
274
- {
275
- _ = Interlocked . Exchange ( ref _measurementsUnavailable , 0 ) ;
276
- }
289
+ value = func ( ) ;
290
+ _ = Interlocked . CompareExchange ( ref _measurementsUnavailable , 0 , 1 ) ;
277
291
278
- return new [ ] { new Measurement < double > ( result ) } ;
292
+ return true ;
279
293
}
280
294
catch ( Exception ex ) when (
281
295
ex is System . IO . FileNotFoundException ||
@@ -285,7 +299,7 @@ ex is System.IO.DirectoryNotFoundException ||
285
299
_lastFailure = _timeProvider . GetUtcNow ( ) ;
286
300
_ = Interlocked . Exchange ( ref _measurementsUnavailable , 1 ) ;
287
301
288
- return Enumerable . Empty < Measurement < double > > ( ) ;
302
+ return false ;
289
303
}
290
304
}
291
305
@@ -296,10 +310,14 @@ ex is System.IO.DirectoryNotFoundException ||
296
310
297
311
private IEnumerable < Measurement < double > > GetCpuTime ( )
298
312
{
299
- long hostCpuTime = _parser . GetHostCpuUsageInNanoseconds ( ) ;
300
- double cgroupCpuTime = CpuUtilizationWithoutHostDelta ( ) ;
313
+ if ( TryGetValueWithRetry ( _parser . GetHostCpuUsageInNanoseconds , out long systemCpuTime ) )
314
+ {
315
+ yield return new Measurement < double > ( systemCpuTime / NanosecondsInSecond , [ new KeyValuePair < string , object ? > ( "cpu.mode" , "system" ) ] ) ;
316
+ }
301
317
302
- yield return new Measurement < double > ( cgroupCpuTime / NanosecondsInSecond , [ new KeyValuePair < string , object ? > ( "cpu.mode" , "user" ) ] ) ;
303
- yield return new Measurement < double > ( hostCpuTime / NanosecondsInSecond , [ new KeyValuePair < string , object ? > ( "cpu.mode" , "system" ) ] ) ;
318
+ if ( TryGetValueWithRetry ( CpuUtilizationV2 , out double userCpuTime ) )
319
+ {
320
+ yield return new Measurement < double > ( userCpuTime , [ new KeyValuePair < string , object ? > ( "cpu.mode" , "user" ) ] ) ;
321
+ }
304
322
}
305
323
}
0 commit comments