Skip to content

Commit ba930bb

Browse files
committed
Make machineConfigLabels-related misconfiguration more visible
NTO Tuned CRs support machine config labels (mcLabels) based matching. This involves finding a MachineConfigPool with machineConfigSelector matching mcLabels and setting a TuneD profile on all nodes that are assigned the found MachineConfigPool. We do not support configurations, where more than 1 MachineConfigPool matches the mcLabels. While this configuration is not supported, users often unwittingly configure their clusters in this way. In this case, NTO issues an error message in the operator logs. This is often not sufficient, so we make this misconfiguration more visible by making ClusterOperator/node-tuning object Degraded.
1 parent bb78f6c commit ba930bb

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

pkg/operator/controller.go

+7
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ type Controller struct {
8787
// tracked as having kernel command-line conflict due to belonging
8888
// to the same MCP.
8989
bootcmdlineConflict map[string]bool
90+
91+
// mcLabelsAcrossMCP is the internal operator's cache of Profiles
92+
// tracked as using machineConfigLabels that match across multiple MCPs.
93+
mcLabelsAcrossMCP map[string]bool
9094
}
9195

9296
type wqKey struct {
@@ -118,6 +122,7 @@ func NewController() (*Controller, error) {
118122
}
119123

120124
controller.bootcmdlineConflict = map[string]bool{}
125+
controller.mcLabelsAcrossMCP = map[string]bool{}
121126

122127
// Initial event to bootstrap CR if it doesn't exist.
123128
controller.workqueue.AddRateLimited(wqKey{kind: wqKindTuned, name: tunedv1.TunedDefaultResourceName})
@@ -592,6 +597,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
592597

593598
profileMf.Name = nodeName
594599
delete(c.bootcmdlineConflict, nodeName)
600+
delete(c.mcLabelsAcrossMCP, nodeName)
595601
nodeLabels, err := c.pc.nodeLabelsGet(nodeName)
596602
if err != nil {
597603
// Remove Profiles for Nodes which no longer exist.
@@ -775,6 +781,7 @@ func (c *Controller) syncMachineConfig(labels map[string]string, profile *tunedv
775781
// Log an error and do not requeue, this is a configuration issue.
776782
klog.Errorf("profile %v uses machineConfigLabels that match across multiple MCPs (%v); this is not supported",
777783
profile.Name, printMachineConfigPoolsNames(pools))
784+
c.mcLabelsAcrossMCP[profile.Name] = true
778785
return nil
779786
}
780787

pkg/operator/status.go

+22
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,20 @@ func (c *Controller) numProfilesWithBootcmdlineConflict(profileList []*tunedv1.P
163163
return numConflict
164164
}
165165

166+
// numMCLabelsAcrossMCP returns the total number
167+
// of Profiles in the internal operator's cache (mcLabelsAcrossMCP)
168+
// tracked as using machineConfigLabels that match across multiple MCPs.
169+
func (c *Controller) numMCLabelsAcrossMCP(profileList []*tunedv1.Profile) int {
170+
n := 0
171+
for _, profile := range profileList {
172+
if c.mcLabelsAcrossMCP[profile.Name] {
173+
n++
174+
}
175+
}
176+
177+
return n
178+
}
179+
166180
// computeStatus computes the operator's current status.
167181
func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.ClusterOperatorStatusCondition) ([]configv1.ClusterOperatorStatusCondition, string, error) {
168182
const (
@@ -318,6 +332,14 @@ func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.C
318332
degradedCondition.Message = fmt.Sprintf("%v/%v Profiles with bootcmdline conflict", numConflict, len(profileList))
319333
}
320334

335+
numMCLabelsAcrossMCP := c.numMCLabelsAcrossMCP(profileList)
336+
if numMCLabelsAcrossMCP > 0 {
337+
klog.Infof("%v/%v Profiles use machineConfigLabels that match across multiple MCPs", numMCLabelsAcrossMCP, len(profileList))
338+
degradedCondition.Status = configv1.ConditionTrue
339+
degradedCondition.Reason = "MCLabelsAcrossMCPs"
340+
degradedCondition.Message = fmt.Sprintf("%v/%v Profiles use machineConfigLabels that match across multiple MCPs", numMCLabelsAcrossMCP, len(profileList))
341+
}
342+
321343
// If the operator is not available for an extensive period of time, set the Degraded operator status.
322344
conditions = clusteroperator.SetStatusCondition(conditions, &availableCondition)
323345
now := metav1.Now().Unix()

0 commit comments

Comments
 (0)