@@ -302,211 +302,192 @@ var _ = Describe("[performance] Checking IRQBalance settings", Ordered, func() {
302302 Expect (out ).To (Equal ("0" ), "file %s does not contain the expect output; expected=0 actual=%s" , fullPath , out )
303303 })
304304 })
305- Context ("Testing the new annotation value for irq-load-balancing.crio.io" , Label (string (label .Tier0 )), func () {
306305
307- Context ("Verify irqbalance housekeeping annotation exists with sibling cpus" , func () {
308- testpod := & corev1.Pod {}
306+ Context ("Testing the new housekeeping annotation value for irq-load-balancing.crio.io" , Label (string (label .Tier0 )), func () {
307+ DescribeTable ("Validate housekeeping CPU annotation propagation and enforcement" , func (ctx context.Context , numOfContainersInPod int ) {
308+ testpod , err := createPodWithHouskeeping (numOfContainersInPod , isolatedCPUSet , profile , context .TODO (), targetNode )
309+ Expect (err ).ToNot (HaveOccurred (), "Failed to create pod with housekeeping annotation" )
309310
310- BeforeAll (func () {
311- cpuRequest := 4
312- if cpuRequest >= isolatedCPUSet .Size () {
313- Skip (fmt .Sprintf ("cpus request %d is greater than the available on the node as the isolated cpus are %d" , cpuRequest , isolatedCPUSet .Size ()))
314- }
311+ defer deleteTestPod (context .TODO (), testpod )
315312
316- annotations := map [ string ] string {
317- "irq-load-balancing.crio.io" : "housekeeping" ,
318- }
319- testpod = getTestPodWithProfileAndAnnotations ( profile , annotations , cpuRequest )
320- testpod . Spec . NodeName = targetNode . Name
313+ for i := 0 ; i < numOfContainersInPod ; i ++ {
314+ containerName := testpod . Spec . Containers [ i ]. Name
315+ // Get housekeeping CPUs for this container and convert to cpuset
316+ housekeepingCPUSet , err := getContainerHouskeepCpuSet ( testpod , containerName )
317+ Expect ( err ). ToNot ( HaveOccurred (), "Failed to Get OPENSHIFT_HOUSEKEEPING_CPUS from container %s" , containerName )
321318
322- data , _ := json .Marshal (testpod )
323- testlog .Infof ("using testpod:\n %s" , string (data ))
319+ // Get container's assigned CPUs and convert to cpuset
320+ containerCpuSet , err := getContainerCpusSet (testpod , containerName )
321+ Expect (err ).ToNot (HaveOccurred (), "Unable to get CPUs used by container %s" , containerName )
324322
325- err = testclient .DataPlaneClient .Create (context .TODO (), testpod )
323+ // Check first cpu in container is first cpu in the housekeeping cpus
324+ Expect (housekeepingCPUSet .List ()[0 ]).To (Equal (containerCpuSet .List ()[0 ]), "housekeeping and container first cpu mismatch for %s" , containerName )
325+
326+ // Verify that all housekeeping CPUs are a valid set of siblings
327+ areValidHousekeepingSiblings , err := isHouskeepValidSiblings (context .TODO (), targetNode , housekeepingCPUSet )
326328 Expect (err ).ToNot (HaveOccurred ())
329+ Expect (areValidHousekeepingSiblings ).To (BeTrue ())
327330
328- testpod , err = pods .WaitForCondition (context .TODO (), client .ObjectKeyFromObject (testpod ), corev1 .PodReady , corev1 .ConditionTrue , 10 * time .Minute )
331+ // Check housekeeping pod is reflected correctly under irq_smp_affinity
332+ isHousekeepingReflected , err := isSmpReflectingHousekeeping (context .TODO (), targetNode , housekeepingCPUSet , containerCpuSet )
329333 Expect (err ).ToNot (HaveOccurred ())
330- logEventsForPod (testpod )
331- })
332-
333- It ("verifies housekeeping env var 'OPENSHIFT_HOUSEKEEPING_CPUS' is present" , func () {
334- getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
335- _ , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , testpod .Spec .Containers [0 ].Name , getHousekeepingCpus )
336- Expect (err ).ToNot (HaveOccurred (), "OPENSHIFT_HOUSEKEEPING_CPUS wasnt found on the container" )
337- })
338-
339- It ("checks that houskeeping takes first cpu from cpu's asigned to container" , func () {
340- getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
341- testpodHousekeepCpusByte , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , testpod .Spec .Containers [0 ].Name , getHousekeepingCpus )
342- Expect (err ).ToNot (HaveOccurred (), "OPENSHIFT_HOUSEKEEPING_CPUS wasnt found on the container" )
343-
344- // Parse to cpuset for stable comparison
345- testpodHousekeepCpusStr := strings .TrimSpace (string (testpodHousekeepCpusByte ))
346- housekeepingCPUSet , err := cpuset .Parse (testpodHousekeepCpusStr )
347- Expect (err ).ToNot (HaveOccurred (), "Failed to parse OPENSHIFT_HOUSEKEEPING_CPUS: %s" , testpodHousekeepCpusStr )
348-
349- // Get all cpus used by container
350- containerName := testpod .Spec .Containers [0 ].Name
351- tasksetcmd := []string {"/bin/taskset" , "-pc" , "1" }
352- containerAffinityBytes , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , containerName , tasksetcmd )
353- Expect (err ).ToNot (HaveOccurred (), "Failed to get CPUs used by container" )
354- containerAffinityStr := string (containerAffinityBytes )
355- parts := strings .Split (strings .TrimSpace (containerAffinityStr ), ":" )
356- containerCpusStr := strings .TrimSpace (parts [1 ])
357- containerCpuSet , err := cpuset .Parse (containerCpusStr )
358- Expect (err ).ToNot (HaveOccurred (), "Unable to parse CPUs %s used by container %s" , containerCpusStr , containerName )
359- testlog .Infof ("Container Cpu's are: %s" , containerCpuSet )
334+ Expect (isHousekeepingReflected ).To (BeTrue (), "irq_smp_affinity does not correctly reflect the housekeeping cpuset for %s" , containerName )
360335
361- // Check first cpu in container is first cpu in the housekeeping cpus
362- listHousekeepingCPUSet := housekeepingCPUSet .List ()
363- listContainerCpuSet := containerCpuSet .List ()
364- testlog .Infof ("First housekeeping cpu: %v - First container cpu: %v" , listHousekeepingCPUSet [0 ], listHousekeepingCPUSet [0 ])
365- Expect (listHousekeepingCPUSet [0 ]).To (Equal (listContainerCpuSet [0 ]))
366- })
367-
368- It ("verifies houskeeping cpus are all valid siblings" , func () {
369- getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
370- testpodHousekeepCpusByte , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , testpod .Spec .Containers [0 ].Name , getHousekeepingCpus )
371- Expect (err ).ToNot (HaveOccurred (), "OPENSHIFT_HOUSEKEEPING_CPUS wasnt found on the container" )
372-
373- // Parse to cpuset for stable comparison
374- testpodHousekeepCpusStr := strings .TrimSpace (string (testpodHousekeepCpusByte ))
375- housekeepingCPUSet , err := cpuset .Parse (testpodHousekeepCpusStr )
376- Expect (err ).ToNot (HaveOccurred (), "Failed to parse OPENSHIFT_HOUSEKEEPING_CPUS: %s" , testpodHousekeepCpusStr )
377-
378- // Get core sibling topology
379- coreSiblings , err := nodes .GetCoreSiblings (context .TODO (), targetNode )
380- Expect (err ).ToNot (HaveOccurred (), "Failed to get Core topology" )
381-
382- // Verify all housekeeping CPUs are siblings (belong to the same core)
383- found := false
384- for _ , numaMap := range coreSiblings {
385- for _ , siblings := range numaMap {
386- siblingSet := cpuset .New (siblings ... )
387- if housekeepingCPUSet .Equals (siblingSet ) {
388- testlog .Infof ("Housekeep Cpu's: %v --- %v :Siblings set from core affinity\n " , housekeepingCPUSet , siblingSet )
389- found = true
390- break
391- }
392- }
393- if found {
394- break
395- }
396- }
397- Expect (found ).To (BeTrue ())
336+ // Check housekeeping pod is reflected correctly under IRQBALANCE_BANNED_CPUS (in /etc/sysconfig/irqbalance) using the new function
337+ isIrqBalanceHousekeepingReflected , err := isIrqBalanceReflectingHousekeeping (context .TODO (), targetNode , housekeepingCPUSet , containerCpuSet )
338+ Expect (err ).ToNot (HaveOccurred ())
339+ Expect (isIrqBalanceHousekeepingReflected ).To (BeTrue (), "IRQBALANCE_BANNED_CPUS does not correctly reflect the housekeeping cpuset for %s" , containerName )
398340
399- })
400- })
341+ }
342+ },
343+ Entry ("[test_id:TBD] houskeeping pod with single container" , context .TODO (), 1 ),
344+ Entry ("[test_id:TBD] houskeeping pod with multiple containers" , context .TODO (), 2 ),
345+ )
346+ })
401347
402- When ("a pod has two containers" , func () {
403- testpod := & corev1.Pod {}
404- numContainers := 2
405- cpusPerContainer := 2
406- BeforeAll (func () {
348+ })
407349
408- cpuRequest := numContainers * cpusPerContainer
409- if cpuRequest >= isolatedCPUSet .Size () {
410- Skip (fmt .Sprintf ("cpus request %d is greater than the available on the node as the isolated cpus are %d" , cpuRequest , isolatedCPUSet .Size ()))
411- }
350+ func createPodWithHouskeeping (numOfContainersInPod int , isolatedCPUSet cpuset.CPUSet , perfProf * performancev2.PerformanceProfile , ctx context.Context , targetNode * corev1.Node ) (* corev1.Pod , error ) {
351+ cpusPerContainer := 4
352+ cpuRequest := cpusPerContainer * numOfContainersInPod
353+ if cpuRequest >= isolatedCPUSet .Size () {
354+ err := fmt .Errorf ("cpus request %d is greater than the available on the node as the isolated cpus are %d" , cpuRequest , isolatedCPUSet .Size ())
355+ testlog .Errorf ("cannot create pod: %v" , err )
356+ return nil , err
357+ }
412358
413- annotations := map [string ]string {
414- "irq-load-balancing.crio.io" : "housekeeping" ,
415- }
359+ annotations := map [string ]string {
360+ "irq-load-balancing.crio.io" : "housekeeping" ,
361+ }
362+ testpod := getTestPodWithProfileAndAnnotations (perfProf , annotations , cpuRequest )
363+ testpod .Spec .NodeName = targetNode .Name
364+
365+ if numOfContainersInPod == 2 {
366+ secondContainer := corev1.Container {
367+ Name : testpod .Spec .Containers [0 ].Name + "-2" ,
368+ Image : testpod .Spec .Containers [0 ].Image ,
369+ Command : testpod .Spec .Containers [0 ].Command ,
370+ Resources : corev1.ResourceRequirements {
371+ Limits : corev1.ResourceList {
372+ corev1 .ResourceCPU : resource .MustParse (fmt .Sprintf ("%v" , cpusPerContainer )),
373+ corev1 .ResourceMemory : resource .MustParse ("256Mi" ),
374+ },
375+ },
376+ }
377+ testpod .Spec .Containers = append (testpod .Spec .Containers , secondContainer )
378+ }
379+ data , _ := json .Marshal (testpod )
380+ testlog .Infof ("using testpod:\n %s" , string (data ))
416381
417- // Create base pod with first container (2 CPUs)
418- testpod = getTestPodWithProfileAndAnnotations (profile , annotations , cpusPerContainer )
419- testpod .Spec .NodeName = targetNode .Name
420-
421- // Create an additional continaer
422- secondContainer := corev1.Container {
423- Name : testpod .Spec .Containers [0 ].Name + "-2" ,
424- Image : testpod .Spec .Containers [0 ].Image ,
425- Command : testpod .Spec .Containers [0 ].Command ,
426- Resources : corev1.ResourceRequirements {
427- Limits : corev1.ResourceList {
428- corev1 .ResourceCPU : resource .MustParse (fmt .Sprintf ("%v" , cpusPerContainer )),
429- corev1 .ResourceMemory : resource .MustParse ("256Mi" ),
430- },
431- },
432- }
433- testpod .Spec .Containers = append (testpod .Spec .Containers , secondContainer )
382+ err := testclient .DataPlaneClient .Create (ctx , testpod )
383+ if err != nil {
384+ testlog .Errorf ("failed to create pod: %v" , err )
385+ return nil , err
386+ }
434387
435- data , _ := json .Marshal (testpod )
436- testlog .Infof ("using testpod:\n %s" , string (data ))
388+ testpod , err = pods .WaitForCondition (ctx , client .ObjectKeyFromObject (testpod ), corev1 .PodReady , corev1 .ConditionTrue , 10 * time .Minute )
389+ if err != nil {
390+ testlog .Errorf ("pod did not become ready: %v" , err )
391+ return nil , err
392+ }
393+ logEventsForPod (testpod )
437394
438- err = testclient . DataPlaneClient . Create ( context . TODO (), testpod )
439- Expect ( err ). ToNot ( HaveOccurred ())
395+ return testpod , nil
396+ }
440397
441- testpod , err = pods .WaitForCondition (context .TODO (), client .ObjectKeyFromObject (testpod ), corev1 .PodReady , corev1 .ConditionTrue , 10 * time .Minute )
442- Expect (err ).ToNot (HaveOccurred ())
443- logEventsForPod (testpod )
444- })
445-
446- It ("verifies housekeeping env var 'OPENSHIFT_HOUSEKEEPING_CPUS' is available on both containers" , func () {
447- for i := 0 ; i < numContainers ; i ++ {
448- getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
449- _ , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , testpod .Spec .Containers [i ].Name , getHousekeepingCpus )
450- Expect (err ).ToNot (HaveOccurred ())
451- }
452- })
398+ func isSmpReflectingHousekeeping (ctx context.Context , node * corev1.Node , housekeepCPUSet , containerCpuSet cpuset.CPUSet ) (bool , error ) {
399+ minusCpuSet := containerCpuSet .Difference (housekeepCPUSet )
400+ irqSmpAffinity , err := nodes .GetDefaultSmpAffinitySet (ctx , node )
401+ if err != nil {
402+ testlog .Errorf ("failed getting default SMP affinity set" )
403+ return false , err
404+ }
405+ testlog .Infof ("SMP Affinity: %v" , irqSmpAffinity )
453406
454- It ("verifies each container has OPENSHIFT_HOUSEKEEPING_CPUS aligned with its first CPU siblings" , func () {
455- // Get core sibling topology once
456- coreSiblings , err := nodes .GetCoreSiblings (context .TODO (), targetNode )
457- Expect (err ).ToNot (HaveOccurred ())
407+ if minusCpuSet .IsEmpty () {
408+ // All container CPUs are housekeeping CPUs; just ensure all of housekeeping is in SMP affinity.
409+ if housekeepCPUSet .IsSubsetOf (irqSmpAffinity ) {
410+ return true , nil
411+ }
412+ testlog .Errorf ("housekeeping cpuset is not a subset of SMP affinity; housekeepCPUSet=%v, irqSmpAffinity=%v" , housekeepCPUSet , irqSmpAffinity )
413+ return false , nil
414+ }
458415
459- for i := 0 ; i < numContainers ; i ++ {
460- containerName := testpod .Spec .Containers [i ].Name
461-
462- // Get housekeeping CPUs for this container and convert to cpuset
463- getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
464- housekeepCpusByte , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , containerName , getHousekeepingCpus )
465- Expect (err ).ToNot (HaveOccurred ())
466- housekeepCpusStr := strings .TrimSpace (string (housekeepCpusByte ))
467- housekeepingCPUSet , err := cpuset .Parse (housekeepCpusStr )
468- Expect (err ).ToNot (HaveOccurred (), "Failed to parse OPENSHIFT_HOUSEKEEPING_CPUS for container %s: %s" , containerName , housekeepCpusStr )
469-
470- // Get container's assigned CPUs and convert to cpuset
471- tasksetcmd := []string {"/bin/taskset" , "-pc" , "1" }
472- containerAffinityBytes , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , containerName , tasksetcmd )
473- Expect (err ).ToNot (HaveOccurred ())
474- containerAffinityStr := string (containerAffinityBytes )
475- parts := strings .Split (strings .TrimSpace (containerAffinityStr ), ":" )
476- containerCpusStr := strings .TrimSpace (parts [1 ])
477- containerCpuSet , err := cpuset .Parse (containerCpusStr )
478- Expect (err ).ToNot (HaveOccurred (), "Unable to parse CPUs %s used by container %s" , containerCpusStr , containerName )
479-
480- // Check 1: First CPU in housekeeping matches first CPU in container
481- listHousekeepingCPUs := housekeepingCPUSet .List ()
482- listContainerCpus := containerCpuSet .List ()
483- testlog .Infof ("Container %s: First housekeeping cpu: %v - First container cpu: %v" , containerName , listHousekeepingCPUs [0 ], listContainerCpus [0 ])
484- Expect (listHousekeepingCPUs [0 ]).To (Equal (listContainerCpus [0 ]))
485-
486- // Check 2: All housekeeping CPUs are a single siblings set
487- found := false
488- for _ , numaMap := range coreSiblings {
489- for _ , siblings := range numaMap {
490- siblingSet := cpuset .New (siblings ... )
491- if housekeepingCPUSet .Equals (siblingSet ) {
492- found = true
493- testlog .Infof ("Container %s: housekeeping CPUs %v match sibling set %v" , containerName , housekeepingCPUSet , siblingSet )
494- break
495- }
496- }
497- if found {
498- break
499- }
500- }
501- Expect (found ).To (BeTrue ())
502- }
503- })
416+ // There are CPUs in minus set, i.e., assigned cpus which are not housekeeping.
417+ // All of housekeeping must still be in SMP, but also, minus set must NOT be in SMP (should not be subset).
418+ isHousekeepingSubset := housekeepCPUSet .IsSubsetOf (irqSmpAffinity )
419+ isMinusSetSubset := minusCpuSet .IsSubsetOf (irqSmpAffinity )
504420
505- })
421+ if isHousekeepingSubset && ! isMinusSetSubset {
422+ return true , nil
423+ }
506424
507- })
425+ testlog .Errorf ("unexpected SMP affinity, housekeeping subset: %v, non-housekeeping subset: %v, housekeepCPUSet=%v, minusCpuSet=%v, irqSmpAffinity=%v" , isHousekeepingSubset , isMinusSetSubset , housekeepCPUSet , minusCpuSet , irqSmpAffinity )
426+ return false , nil
427+ }
508428
509- })
429+ func isIrqBalanceReflectingHousekeeping (ctx context.Context , node * corev1.Node , housekeepCPUSet , containerCpuSet cpuset.CPUSet ) (bool , error ) {
430+ minusCpuSet := containerCpuSet .Difference (housekeepCPUSet )
431+ irqBalanceBannedCpus , err := getIrqBalanceBannedCPUs (ctx , node )
432+ if err != nil {
433+ testlog .Error ("failed to get IRQBALANCE_BANNED_CPUS" )
434+ return false , err
435+ }
436+ testlog .Infof ("IRQBALANCE_BANNED_CPUS: %v" , irqBalanceBannedCpus )
437+ isHousekeepingSubset := housekeepCPUSet .IsSubsetOf (irqBalanceBannedCpus )
438+ isMinusSetSubset := minusCpuSet .IsSubsetOf (irqBalanceBannedCpus )
439+ if ! isHousekeepingSubset && isMinusSetSubset {
440+ return true , nil
441+ }
442+ testlog .Errorf ("unexpected IRQBALANCE_BANNED_CPUS, housekeeping subset: %v, non-housekeeping subset: %v, housekeepCPUSet=%v, minusCpuSet=%v, irqBalanceBannedCpus=%v" , isHousekeepingSubset , isMinusSetSubset , housekeepCPUSet , minusCpuSet , irqBalanceBannedCpus )
443+ return false , nil
444+ }
445+
446+ func isHouskeepValidSiblings (ctx context.Context , node * corev1.Node , housekeepCPUSet cpuset.CPUSet ) (bool , error ) {
447+ coreSiblings , err := nodes .GetCoreSiblings (ctx , node )
448+ if err != nil {
449+ testlog .Errorf ("Failed to get core siblings: %v" , err )
450+ return false , err
451+ }
452+ for _ , numaMap := range coreSiblings {
453+ for _ , siblings := range numaMap {
454+ siblingSet := cpuset .New (siblings ... )
455+ if housekeepCPUSet .Equals (siblingSet ) {
456+ return true , nil
457+ }
458+ }
459+ }
460+ return false , nil
461+ }
462+
463+ func getContainerHouskeepCpuSet (testpod * corev1.Pod , containerName string ) (cpuset.CPUSet , error ) {
464+ getHousekeepCpusCmd := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
465+ testpodHousekeepCpusByte , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , containerName , getHousekeepCpusCmd )
466+ if err != nil {
467+ testlog .Warningf ("error exec pod for housekeeping cpus: %v" , err )
468+ return cpuset .New (), err
469+ }
470+ testpodHousekeepCpusStr := strings .TrimSpace (string (testpodHousekeepCpusByte ))
471+ housekeepingCPUSet , err := cpuset .Parse (testpodHousekeepCpusStr )
472+ if err != nil {
473+ testlog .Warningf ("error parsing housekeeping cpuset: %v" , err )
474+ return cpuset .New (), err
475+ }
476+ return housekeepingCPUSet , err
477+ }
478+
479+ func getContainerCpusSet (testpod * corev1.Pod , containerName string ) (cpuset.CPUSet , error ) {
480+ tasksetcmd := []string {"taskset" , "-pc" , "1" }
481+ testpodCpusByte , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , containerName , tasksetcmd )
482+ if err != nil {
483+ return cpuset .New (), err
484+ }
485+ testpodCpusStr := string (testpodCpusByte )
486+ parts := strings .Split (strings .TrimSpace (testpodCpusStr ), ":" )
487+ cpus := strings .TrimSpace (parts [1 ])
488+ containerCpuSet , err := cpuset .Parse (cpus )
489+ return containerCpuSet , err
490+ }
510491
511492// nodes.BannedCPUs fails (!!!) if the current banned list is empty because, deep down, ExecCommandOnNode expects non-empty stdout.
512493// In turn, we do this to at least have a chance to detect failed commands vs failed to execute commands (we had this issue in
0 commit comments