@@ -11,6 +11,7 @@ import (
1111 "time"
1212
1313 corev1 "k8s.io/api/core/v1"
14+ "k8s.io/apimachinery/pkg/api/resource"
1415 "k8s.io/utils/cpuset"
1516 "sigs.k8s.io/controller-runtime/pkg/client"
1617
@@ -301,6 +302,210 @@ var _ = Describe("[performance] Checking IRQBalance settings", Ordered, func() {
301302 Expect (out ).To (Equal ("0" ), "file %s does not contain the expect output; expected=0 actual=%s" , fullPath , out )
302303 })
303304 })
305+ Context ("Testing the new annotation value for irq-load-balancing.crio.io" , Label (string (label .Tier0 ), "omer" ), func () {
306+
307+ Context ("Verify irqbalance housekeeping annotation exists with sibling cpus" , func () {
308+ testpod := & corev1.Pod {}
309+
310+ BeforeAll (func () {
311+ cpuRequest := 4
312+ if cpuRequest >= isolatedCPUSet .Size () {
313+ Skip (fmt .Sprintf ("cpus request %d is greater than the available on the node as the isolated cpus are %d" , cpuRequest , isolatedCPUSet .Size ()))
314+ }
315+
316+ annotations := map [string ]string {
317+ "irq-load-balancing.crio.io" : "housekeeping" ,
318+ }
319+ testpod = getTestPodWithProfileAndAnnotations (profile , annotations , cpuRequest )
320+ testpod .Spec .NodeName = targetNode .Name
321+
322+ data , _ := json .Marshal (testpod )
323+ testlog .Infof ("using testpod:\n %s" , string (data ))
324+
325+ err = testclient .DataPlaneClient .Create (context .TODO (), testpod )
326+ Expect (err ).ToNot (HaveOccurred ())
327+
328+ testpod , err = pods .WaitForCondition (context .TODO (), client .ObjectKeyFromObject (testpod ), corev1 .PodReady , corev1 .ConditionTrue , 10 * time .Minute )
329+ Expect (err ).ToNot (HaveOccurred ())
330+ logEventsForPod (testpod )
331+ })
332+
333+ It ("verifies housekeeping env var 'OPENSHIFT_HOUSEKEEPING_CPUS' is present" , func () {
334+ getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
335+ _ , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , testpod .Spec .Containers [0 ].Name , getHousekeepingCpus )
336+ Expect (err ).ToNot (HaveOccurred (), "OPENSHIFT_HOUSEKEEPING_CPUS wasnt found on the container" )
337+ })
338+
339+ It ("checks that houskeeping takes first cpu from cpu's asigned to container" , func () {
340+ getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
341+ testpodHousekeepCpusByte , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , testpod .Spec .Containers [0 ].Name , getHousekeepingCpus )
342+ Expect (err ).ToNot (HaveOccurred (), "OPENSHIFT_HOUSEKEEPING_CPUS wasnt found on the container" )
343+
344+ // Parse to cpuset for stable comparison
345+ testpodHousekeepCpusStr := strings .TrimSpace (string (testpodHousekeepCpusByte ))
346+ housekeepingCPUSet , err := cpuset .Parse (testpodHousekeepCpusStr )
347+ Expect (err ).ToNot (HaveOccurred (), "Failed to parse OPENSHIFT_HOUSEKEEPING_CPUS: %s" , testpodHousekeepCpusStr )
348+
349+ // Get all cpus used by container
350+ containerName := testpod .Spec .Containers [0 ].Name
351+ tasksetcmd := []string {"/bin/taskset" , "-pc" , "1" }
352+ containerAffinityBytes , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , containerName , tasksetcmd )
353+ Expect (err ).ToNot (HaveOccurred (), "Failed to get CPUs used by container" )
354+ containerAffinityStr := string (containerAffinityBytes )
355+ parts := strings .Split (strings .TrimSpace (containerAffinityStr ), ":" )
356+ containerCpusStr := strings .TrimSpace (parts [1 ])
357+ containerCpuSet , err := cpuset .Parse (containerCpusStr )
358+ Expect (err ).ToNot (HaveOccurred (), "Unable to parse CPUs %s used by container %s" , containerCpusStr , containerName )
359+ testlog .Infof ("Container Cpu's are: %s" , containerCpuSet )
360+
361+ // Check first cpu in container is first cpu in the housekeeping cpus
362+ listHousekeepingCPUSet := housekeepingCPUSet .List ()
363+ listContainerCpuSet := containerCpuSet .List ()
364+ testlog .Infof ("First housekeeping cpu: %v - First container cpu: %v" , listHousekeepingCPUSet [0 ], listHousekeepingCPUSet [0 ])
365+ Expect (listHousekeepingCPUSet [0 ]).To (Equal (listContainerCpuSet [0 ]))
366+ })
367+
368+ It ("verifies houskeeping cpus are all valid siblings" , func () {
369+ getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
370+ testpodHousekeepCpusByte , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , testpod .Spec .Containers [0 ].Name , getHousekeepingCpus )
371+ Expect (err ).ToNot (HaveOccurred (), "OPENSHIFT_HOUSEKEEPING_CPUS wasnt found on the container" )
372+
373+ // Parse to cpuset for stable comparison
374+ testpodHousekeepCpusStr := strings .TrimSpace (string (testpodHousekeepCpusByte ))
375+ housekeepingCPUSet , err := cpuset .Parse (testpodHousekeepCpusStr )
376+ Expect (err ).ToNot (HaveOccurred (), "Failed to parse OPENSHIFT_HOUSEKEEPING_CPUS: %s" , testpodHousekeepCpusStr )
377+
378+ // Get core sibling topology
379+ coreSiblings , err := nodes .GetCoreSiblings (context .TODO (), targetNode )
380+ Expect (err ).ToNot (HaveOccurred (), "Failed to get Core topology" )
381+
382+ // Verify all housekeeping CPUs are siblings (belong to the same core)
383+ found := false
384+ for _ , numaMap := range coreSiblings {
385+ for _ , siblings := range numaMap {
386+ siblingSet := cpuset .New (siblings ... )
387+ if housekeepingCPUSet .Equals (siblingSet ) {
388+ testlog .Infof ("Housekeep Cpu's: %v --- %v :Siblings set from core affinity\n " , housekeepingCPUSet , siblingSet )
389+ found = true
390+ break
391+ }
392+ }
393+ if found {
394+ break
395+ }
396+ }
397+ Expect (found ).To (BeTrue ())
398+
399+ })
400+ })
401+
402+ When ("a pod has two containers" , func () {
403+ testpod := & corev1.Pod {}
404+ numContainers := 2
405+ cpusPerContainer := 2
406+ BeforeAll (func () {
407+
408+ cpuRequest := numContainers * cpusPerContainer
409+ if cpuRequest >= isolatedCPUSet .Size () {
410+ Skip (fmt .Sprintf ("cpus request %d is greater than the available on the node as the isolated cpus are %d" , cpuRequest , isolatedCPUSet .Size ()))
411+ }
412+
413+ annotations := map [string ]string {
414+ "irq-load-balancing.crio.io" : "housekeeping" ,
415+ }
416+
417+ // Create base pod with first container (2 CPUs)
418+ testpod = getTestPodWithProfileAndAnnotations (profile , annotations , cpusPerContainer )
419+ testpod .Spec .NodeName = targetNode .Name
420+
421+ // Create an additional continaer
422+ secondContainer := corev1.Container {
423+ Name : testpod .Spec .Containers [0 ].Name + "-2" ,
424+ Image : testpod .Spec .Containers [0 ].Image ,
425+ Command : testpod .Spec .Containers [0 ].Command ,
426+ Resources : corev1.ResourceRequirements {
427+ Limits : corev1.ResourceList {
428+ corev1 .ResourceCPU : resource .MustParse (fmt .Sprintf ("%v" , cpusPerContainer )),
429+ corev1 .ResourceMemory : resource .MustParse ("256Mi" ),
430+ },
431+ },
432+ }
433+ testpod .Spec .Containers = append (testpod .Spec .Containers , secondContainer )
434+
435+ data , _ := json .Marshal (testpod )
436+ testlog .Infof ("using testpod:\n %s" , string (data ))
437+
438+ err = testclient .DataPlaneClient .Create (context .TODO (), testpod )
439+ Expect (err ).ToNot (HaveOccurred ())
440+
441+ testpod , err = pods .WaitForCondition (context .TODO (), client .ObjectKeyFromObject (testpod ), corev1 .PodReady , corev1 .ConditionTrue , 10 * time .Minute )
442+ Expect (err ).ToNot (HaveOccurred ())
443+ logEventsForPod (testpod )
444+ })
445+
446+ It ("verifies housekeeping env var 'OPENSHIFT_HOUSEKEEPING_CPUS' is available on both containers" , func () {
447+ for i := 0 ; i < numContainers ; i ++ {
448+ getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
449+ _ , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , testpod .Spec .Containers [i ].Name , getHousekeepingCpus )
450+ Expect (err ).ToNot (HaveOccurred ())
451+ }
452+ })
453+
454+ It ("verifies each container has OPENSHIFT_HOUSEKEEPING_CPUS aligned with its first CPU siblings" , func () {
455+ // Get core sibling topology once
456+ coreSiblings , err := nodes .GetCoreSiblings (context .TODO (), targetNode )
457+ Expect (err ).ToNot (HaveOccurred ())
458+
459+ for i := 0 ; i < numContainers ; i ++ {
460+ containerName := testpod .Spec .Containers [i ].Name
461+
462+ // Get housekeeping CPUs for this container and convert to cpuset
463+ getHousekeepingCpus := []string {"printenv" , "OPENSHIFT_HOUSEKEEPING_CPUS" }
464+ housekeepCpusByte , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , containerName , getHousekeepingCpus )
465+ Expect (err ).ToNot (HaveOccurred ())
466+ housekeepCpusStr := strings .TrimSpace (string (housekeepCpusByte ))
467+ housekeepingCPUSet , err := cpuset .Parse (housekeepCpusStr )
468+ Expect (err ).ToNot (HaveOccurred (), "Failed to parse OPENSHIFT_HOUSEKEEPING_CPUS for container %s: %s" , containerName , housekeepCpusStr )
469+
470+ // Get container's assigned CPUs and convert to cpuset
471+ tasksetcmd := []string {"/bin/taskset" , "-pc" , "1" }
472+ containerAffinityBytes , err := pods .ExecCommandOnPod (testclient .K8sClient , testpod , containerName , tasksetcmd )
473+ Expect (err ).ToNot (HaveOccurred ())
474+ containerAffinityStr := string (containerAffinityBytes )
475+ parts := strings .Split (strings .TrimSpace (containerAffinityStr ), ":" )
476+ containerCpusStr := strings .TrimSpace (parts [1 ])
477+ containerCpuSet , err := cpuset .Parse (containerCpusStr )
478+ Expect (err ).ToNot (HaveOccurred (), "Unable to parse CPUs %s used by container %s" , containerCpusStr , containerName )
479+
480+ // Check 1: First CPU in housekeeping matches first CPU in container
481+ listHousekeepingCPUs := housekeepingCPUSet .List ()
482+ listContainerCpus := containerCpuSet .List ()
483+ testlog .Infof ("Container %s: First housekeeping cpu: %v - First container cpu: %v" , containerName , listHousekeepingCPUs [0 ], listContainerCpus [0 ])
484+ Expect (listHousekeepingCPUs [0 ]).To (Equal (listContainerCpus [0 ]))
485+
486+ // Check 2: All housekeeping CPUs are a single siblings set
487+ found := false
488+ for _ , numaMap := range coreSiblings {
489+ for _ , siblings := range numaMap {
490+ siblingSet := cpuset .New (siblings ... )
491+ if housekeepingCPUSet .Equals (siblingSet ) {
492+ found = true
493+ testlog .Infof ("Container %s: housekeeping CPUs %v match sibling set %v" , containerName , housekeepingCPUSet , siblingSet )
494+ break
495+ }
496+ }
497+ if found {
498+ break
499+ }
500+ }
501+ Expect (found ).To (BeTrue ())
502+ }
503+ })
504+
505+ })
506+
507+ })
508+
304509})
305510
306511// nodes.BannedCPUs fails (!!!) if the current banned list is empty because, deep down, ExecCommandOnNode expects non-empty stdout.
0 commit comments