@@ -9,14 +9,19 @@ import (
9
9
10
10
corev1 "k8s.io/api/core/v1"
11
11
"k8s.io/apimachinery/pkg/api/resource"
12
+ "k8s.io/apimachinery/pkg/util/errors"
13
+ utilfeature "k8s.io/apiserver/pkg/util/feature"
12
14
recommendermodel "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
13
15
"k8s.io/klog/v2"
14
16
"sigs.k8s.io/yaml"
15
17
16
18
predictionapi "github.com/gocrane/api/prediction/v1alpha1"
17
19
20
+ "github.com/gocrane/crane/pkg/common"
21
+ "github.com/gocrane/crane/pkg/features"
18
22
"github.com/gocrane/crane/pkg/metricnaming"
19
23
"github.com/gocrane/crane/pkg/oom"
24
+ "github.com/gocrane/crane/pkg/prediction"
20
25
"github.com/gocrane/crane/pkg/prediction/config"
21
26
"github.com/gocrane/crane/pkg/recommend/types"
22
27
"github.com/gocrane/crane/pkg/recommendation/framework"
@@ -90,6 +95,8 @@ func (rr *ResourceRecommender) Recommend(ctx *framework.RecommendationContext) e
90
95
}
91
96
92
97
resourceRecommendation := & types.ResourceRequestRecommendation {}
98
+ namespace := ctx .Object .GetNamespace ()
99
+ caller := fmt .Sprintf (callerFormat , klog .KObj (ctx .Recommendation ), ctx .Recommendation .UID )
93
100
94
101
var newContainers []corev1.Container
95
102
var oldContainers []corev1.Container
@@ -99,82 +106,56 @@ func (rr *ResourceRecommender) Recommend(ctx *framework.RecommendationContext) e
99
106
return err
100
107
}
101
108
102
- namespace := ctx .Object .GetNamespace ()
103
- for _ , c := range ctx .Pods [0 ].Spec .Containers {
104
- cr := types.ContainerRecommendation {
105
- ContainerName : c .Name ,
106
- Target : map [corev1.ResourceName ]string {},
107
- }
108
-
109
- caller := fmt .Sprintf (callerFormat , klog .KObj (ctx .Recommendation ), ctx .Recommendation .UID )
110
- metricNamer := metricnaming .ResourceToContainerMetricNamer (namespace , ctx .Recommendation .Spec .TargetRef .APIVersion ,
111
- ctx .Recommendation .Spec .TargetRef .Kind , ctx .Recommendation .Spec .TargetRef .Name , c .Name , corev1 .ResourceCPU , caller )
112
- klog .Infof ("%s: CPU query for resource request recommendation: %s" , ctx .String (), metricNamer .BuildUniqueKey ())
113
- cpuConfig := rr .makeCpuConfig ()
114
- tsList , err := utils .QueryPredictedValuesOnce (ctx .Recommendation , predictor , caller , cpuConfig , metricNamer )
109
+ // pod
110
+ if utilfeature .DefaultFeatureGate .Enabled (features .EnablePodRecommendation ) {
111
+ cpuTsList , memoryTsList , usePodMetrics , err := rr .getPodCpuAndMemoryTsList (ctx , namespace , caller , predictor )
115
112
if err != nil {
116
- return err
113
+ klog . Warningf ( "getPodCpuAndMemoryTsList err: %v" , err )
117
114
}
118
- if len (tsList ) < 1 || len (tsList [0 ].Samples ) < 1 {
119
- return fmt .Errorf ("no value retured for queryExpr: %s" , metricNamer .BuildUniqueKey ())
120
- }
121
- // Check timestamp is completed
122
- if rr .HistoryCompletionCheck {
123
- completion , existDays , err := utils .DetectTimestampCompletion (tsList , rr .CpuModelHistoryLength , time .Now ())
124
- if ! completion || err != nil {
125
- return fmt .Errorf ("%s: cpu timestamps are not completed, expect %s actual %d days" , metricNamer .BuildUniqueKey (), rr .CpuModelHistoryLength , existDays )
115
+ if usePodMetrics {
116
+ klog .V (4 ).Infof ("use pod metrics for pod %s" , ctx .Pods [0 ].Name )
117
+ pr := types.PodRecommendation {
118
+ PodName : ctx .Pods [0 ].Name ,
119
+ Target : map [corev1.ResourceName ]string {},
126
120
}
127
- }
128
121
129
- v := int64 (tsList [0 ].Samples [0 ].Value * 1000 )
130
- cpuQuantity := resource .NewMilliQuantity (v , resource .DecimalSI )
131
- klog .Infof ("%s: container %s recommended cpu %s" , ctx .String (), c .Name , cpuQuantity .String ())
122
+ cpuQuantity , memQuantity , err := rr .recommendCpuAndMemResources (ctx , cpuTsList , memoryTsList , oomRecords , namespace , ctx .Object .GetName (), ctx .Pods [0 ].Name )
123
+ if err != nil {
124
+ klog .Errorf ("recommendCpuAndMemResources %v" , err )
125
+ }
132
126
133
- metricNamer = metricnaming .ResourceToContainerMetricNamer (namespace , ctx .Recommendation .Spec .TargetRef .APIVersion ,
134
- ctx .Recommendation .Spec .TargetRef .Kind , ctx .Recommendation .Spec .TargetRef .Name , c .Name , corev1 .ResourceMemory , caller )
135
- klog .Infof ("%s Memory query for resource request recommendation: %s" , ctx .String (), metricNamer .BuildUniqueKey ())
136
- memConfig := rr .makeMemConfig ()
137
- tsList , err = utils .QueryPredictedValuesOnce (ctx .Recommendation , predictor , caller , memConfig , metricNamer )
127
+ if cpuQuantity != nil {
128
+ pr .Target [corev1 .ResourceCPU ] = cpuQuantity .String ()
129
+ }
130
+ if memQuantity != nil {
131
+ pr .Target [corev1 .ResourceMemory ] = memQuantity .String ()
132
+ }
133
+
134
+ resourceRecommendation .Pod = pr
135
+ } else {
136
+ klog .V (4 ).Infof ("not use pod metrics for pod %s" , ctx .Pods [0 ].Name )
137
+ }
138
+ }
139
+
140
+ // containers
141
+ for _ , c := range ctx .Pods [0 ].Spec .Containers {
142
+ cpuTsList , memTsList , err := rr .getContainerCpuAndMemoryTsList (ctx , predictor , caller , namespace , c .Name )
138
143
if err != nil {
139
144
return err
140
145
}
141
- if len (tsList ) < 1 || len (tsList [0 ].Samples ) < 1 {
142
- return fmt .Errorf ("no value retured for queryExpr: %s" , metricNamer .BuildUniqueKey ())
143
- }
144
- // Check timestamp is completed
145
- if rr .HistoryCompletionCheck {
146
- completion , existDays , err := utils .DetectTimestampCompletion (tsList , rr .MemHistoryLength , time .Now ())
147
- if ! completion || err != nil {
148
- return fmt .Errorf ("%s: memory timestamps are not completed, expect %s actual %d days " , metricNamer .BuildUniqueKey (), rr .MemHistoryLength , existDays )
149
- }
150
- }
151
146
152
- v = int64 ( tsList [ 0 ]. Samples [ 0 ]. Value )
153
- if v <= 0 {
154
- return fmt . Errorf ( "no enough metrics" )
147
+ cpuQuantity , memQuantity , err := rr . recommendCpuAndMemResources ( ctx , cpuTsList , memTsList , oomRecords , namespace , ctx . Object . GetName (), c . Name )
148
+ if err != nil {
149
+ return err
155
150
}
156
- memQuantity := resource .NewQuantity (v , resource .BinarySI )
157
- klog .Infof ("%s: container %s recommended memory %s" , ctx .String (), c .Name , memQuantity .String ())
158
-
159
- // Use oom protected memory if exist
160
- if rr .OOMProtection {
161
- oomProtectMem := rr .MemoryOOMProtection (oomRecords , namespace , ctx .Object .GetName (), c .Name )
162
- if oomProtectMem != nil && ! oomProtectMem .IsZero () && oomProtectMem .Cmp (* memQuantity ) > 0 {
163
- klog .Infof ("%s: container %s using oomProtect Memory %s" , ctx .String (), c .Name , oomProtectMem .String ())
164
- memQuantity = oomProtectMem
165
- }
151
+ if cpuQuantity == nil || memQuantity == nil {
152
+ return fmt .Errorf ("resource recommendation failed for container %s: cpu=%v, memory=%v" , c .Name , cpuQuantity != nil , memQuantity != nil )
166
153
}
167
154
168
- // Resource Specification enabled
169
- if rr .Specification {
170
- normalizedCpu , normalizedMem := GetNormalizedResource (cpuQuantity , memQuantity , rr .SpecificationConfigs )
171
- klog .Infof ("GetNormalizedResource currentCpu %s normalizedCpu %s currentMem %s normalizedMem %s" , cpuQuantity .String (), normalizedCpu .String (), memQuantity .String (), normalizedMem .String ())
172
- if normalizedCpu .Value () > 0 && normalizedMem .Value () > 0 {
173
- cpuQuantity = & normalizedCpu
174
- memQuantity = & normalizedMem
175
- }
155
+ cr := types.ContainerRecommendation {
156
+ ContainerName : c .Name ,
157
+ Target : map [corev1.ResourceName ]string {},
176
158
}
177
-
178
159
cr .Target [corev1 .ResourceCPU ] = cpuQuantity .String ()
179
160
cr .Target [corev1 .ResourceMemory ] = memQuantity .String ()
180
161
@@ -269,3 +250,162 @@ func (rr *ResourceRecommender) MemoryOOMProtection(oomRecords []oom.OOMRecord, n
269
250
270
251
return nil
271
252
}
253
+
254
+ // getContainerCpuAndMemoryTsList gets container metrics data
255
+ func (rr * ResourceRecommender ) getContainerCpuAndMemoryTsList (ctx * framework.RecommendationContext ,
256
+ predictor prediction.Interface ,
257
+ caller string ,
258
+ namespace , containerName string ) ([]* common.TimeSeries , []* common.TimeSeries , error ) {
259
+
260
+ // cpu
261
+ cpuNamer := metricnaming .ResourceToContainerMetricNamer (namespace ,
262
+ ctx .Recommendation .Spec .TargetRef .APIVersion ,
263
+ ctx .Recommendation .Spec .TargetRef .Kind ,
264
+ ctx .Recommendation .Spec .TargetRef .Name ,
265
+ containerName ,
266
+ corev1 .ResourceCPU ,
267
+ caller )
268
+
269
+ cpuTs , err := utils .QueryPredictedValuesOnce (ctx .Recommendation , predictor , caller , rr .makeCpuConfig (), cpuNamer )
270
+ if err != nil {
271
+ return nil , nil , err
272
+ }
273
+
274
+ // memory
275
+ memNamer := metricnaming .ResourceToContainerMetricNamer (namespace ,
276
+ ctx .Recommendation .Spec .TargetRef .APIVersion ,
277
+ ctx .Recommendation .Spec .TargetRef .Kind ,
278
+ ctx .Recommendation .Spec .TargetRef .Name ,
279
+ containerName ,
280
+ corev1 .ResourceMemory ,
281
+ caller )
282
+
283
+ memTs , err := utils .QueryPredictedValuesOnce (ctx .Recommendation , predictor , caller , rr .makeMemConfig (), memNamer )
284
+ if err != nil {
285
+ return nil , nil , err
286
+ }
287
+
288
+ return cpuTs , memTs , nil
289
+ }
290
+
291
+ func (rr * ResourceRecommender ) getPodCpuAndMemoryTsList (ctx * framework.RecommendationContext , namespace , caller string , predictor prediction.Interface ) ([]* common.TimeSeries , []* common.TimeSeries , bool , error ) {
292
+ var errs []error
293
+ cpuOK , memOK := true , true
294
+
295
+ // cpu
296
+ cpuMetricNamer := metricnaming .ResourceToPodMetricNamer (namespace ,
297
+ ctx .Pods [0 ].Name ,
298
+ corev1 .ResourceCPU ,
299
+ caller )
300
+ cpuTsList , err := utils .QueryPredictedValuesOnce (ctx .Recommendation , predictor , caller , rr .makeCpuConfig (), cpuMetricNamer )
301
+ if err != nil {
302
+ cpuOK = false
303
+ errs = append (errs , err )
304
+ }
305
+
306
+ // memory
307
+ memoryMetricNamer := metricnaming .ResourceToPodMetricNamer (namespace ,
308
+ ctx .Pods [0 ].Name ,
309
+ corev1 .ResourceMemory ,
310
+ caller )
311
+ memTsList , err := utils .QueryPredictedValuesOnce (ctx .Recommendation , predictor , caller , rr .makeMemConfig (), memoryMetricNamer )
312
+ if err != nil {
313
+ memOK = false
314
+ errs = append (errs , err )
315
+ }
316
+
317
+ if ! cpuOK && ! memOK {
318
+ return nil , nil , false , errors .NewAggregate (errs )
319
+ }
320
+
321
+ return cpuTsList , memTsList , true , errors .NewAggregate (errs )
322
+ }
323
+
324
+ // recommendCpuAndMemResources recommends CPU and memory resources based on historical monitoring data, OOM records, and resource specification normalization
325
+ func (rr * ResourceRecommender ) recommendCpuAndMemResources (ctx * framework.RecommendationContext ,
326
+ cpuTsList []* common.TimeSeries ,
327
+ memTsList []* common.TimeSeries ,
328
+ oomRecords []oom.OOMRecord ,
329
+ namespace , workloadName , containerName string ) (* resource.Quantity , * resource.Quantity , error ) {
330
+
331
+ var errs []error
332
+ cpuOK , memOK := true , true
333
+
334
+ // cpu
335
+ cpuQuantity , err := rr .recommendSingleResource (ctx , cpuTsList , rr .CpuModelHistoryLength , corev1 .ResourceCPU , containerName )
336
+ if err != nil {
337
+ cpuOK = false
338
+ errs = append (errs , err )
339
+ }
340
+
341
+ // memory
342
+ memQuantity , err := rr .recommendSingleResource (ctx , memTsList , rr .MemHistoryLength , corev1 .ResourceMemory , containerName )
343
+ if err != nil {
344
+ memOK = false
345
+ errs = append (errs , err )
346
+ }
347
+
348
+ if ! cpuOK && ! memOK {
349
+ return nil , nil , errors .NewAggregate (errs )
350
+ }
351
+
352
+ // adjust memory recommendations by analyzing historical OOM events
353
+ if memOK && rr .OOMProtection {
354
+ if oomMem := rr .MemoryOOMProtection (oomRecords , namespace , workloadName , containerName ); oomMem != nil {
355
+ if ! oomMem .IsZero () && oomMem .Cmp (* memQuantity ) > 0 {
356
+ klog .Infof ("%s: %s using oomProtect Memory %s" , ctx .String (), containerName , oomMem .String ())
357
+ memQuantity = oomMem
358
+ }
359
+ }
360
+ }
361
+
362
+ // standardize resource recommendations to predefined specifications
363
+ if rr .Specification {
364
+ if cpuOK && memOK {
365
+ normalizedCpu , normalizedMem := GetNormalizedResource (cpuQuantity , memQuantity , rr .SpecificationConfigs )
366
+ klog .Infof ("GetNormalizedResource currentCpu %s normalizedCpu %s currentMem %s normalizedMem %s" ,
367
+ cpuQuantity .String (), normalizedCpu .String (), memQuantity .String (), normalizedMem .String ())
368
+ if normalizedCpu .Value () > 0 && normalizedMem .Value () > 0 {
369
+ cpuQuantity = & normalizedCpu
370
+ memQuantity = & normalizedMem
371
+ }
372
+ } else {
373
+ return nil , nil , fmt .Errorf ("cpu or memory recommendation failed, cannot standardize resource recommendations to predefined specifications" )
374
+ }
375
+ }
376
+
377
+ return cpuQuantity , memQuantity , nil
378
+ }
379
+
380
+ func (rr * ResourceRecommender ) recommendSingleResource (ctx * framework.RecommendationContext ,
381
+ tsList []* common.TimeSeries ,
382
+ historyLength string ,
383
+ resourceType corev1.ResourceName ,
384
+ containerName string ) (* resource.Quantity , error ) {
385
+
386
+ if len (tsList ) == 0 || len (tsList [0 ].Samples ) == 0 {
387
+ return nil , fmt .Errorf ("no metrics data for %s" , resourceType )
388
+ }
389
+
390
+ if rr .HistoryCompletionCheck {
391
+ completion , existDays , err := utils .DetectTimestampCompletion (tsList , historyLength , time .Now ())
392
+ if ! completion || err != nil {
393
+ return nil , fmt .Errorf ("%s timestamps not completed: expect %s actual %d days" , resourceType , historyLength , existDays )
394
+ }
395
+ }
396
+
397
+ value := tsList [0 ].Samples [0 ].Value
398
+ var quantity * resource.Quantity
399
+ if resourceType == corev1 .ResourceCPU {
400
+ value *= 1000
401
+ quantity = resource .NewQuantity (int64 (value ), resource .DecimalSI )
402
+ } else if resourceType == corev1 .ResourceMemory {
403
+ quantity = resource .NewQuantity (int64 (value ), resource .BinarySI )
404
+ if value <= 0 {
405
+ return nil , fmt .Errorf ("invalid %s value: %f" , resourceType , value )
406
+ }
407
+ }
408
+
409
+ klog .Infof ("%s: %s recommended %s %s" , ctx .String (), containerName , resourceType , quantity .String ())
410
+ return quantity , nil
411
+ }
0 commit comments