Skip to content

Commit ba8d935

Browse files
authored
Add performanceProfile API offline cores (openshift#345)
Signed-off-by: Mario Fernandez <mariofer@redhat.com>
1 parent 994352a commit ba8d935

File tree

17 files changed

+212
-20
lines changed

17 files changed

+212
-20
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/bash
2+
3+
set -euo pipefail
4+
5+
for cpu in ${OFFLINE_CPUS//,/ };
6+
do
7+
online_cpu_file="/sys/devices/system/cpu/cpu$cpu/online"
8+
if [ ! -f "${online_cpu_file}" ]; then
9+
echo "ERROR: ${online_cpu_file} does not exist, abort script execution"
10+
exit 1
11+
fi
12+
done
13+
14+
echo "All cpus offlined exists, set them offline"
15+
16+
for cpu in ${OFFLINE_CPUS//,/ };
17+
do
18+
online_cpu_file="/sys/devices/system/cpu/cpu$cpu/online"
19+
echo 0 > "${online_cpu_file}"
20+
echo "offline cpu num $cpu"
21+
done
22+

docs/performanceprofile/performance_profile.md

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ CPU defines a set of CPU related features.
3131
| reserved | Reserved defines a set of CPUs that will not be used for any container workloads initiated by kubelet. | *[CPUSet](#cpuset) | true |
3232
| isolated | Isolated defines a set of CPUs that will be used to give to application threads the most execution time possible, which means removing as many extraneous tasks off a CPU as possible. It is important to notice the CPU manager can choose any CPU to run the workload except the reserved CPUs. In order to guarantee that your workload will run on the isolated CPU:\n 1. The union of reserved CPUs and isolated CPUs should include all online CPUs\n 2. The isolated CPUs field should be the complementary to reserved CPUs field | *[CPUSet](#cpuset) | true |
3333
| balanceIsolated | BalanceIsolated toggles whether or not the Isolated CPU set is eligible for load balancing work loads. When this option is set to \"false\", the Isolated CPU set will be static, meaning workloads have to explicitly assign each thread to a specific cpu in order to work across multiple CPUs. Setting this to \"true\" allows workloads to be balanced across CPUs. Setting this to \"false\" offers the most predictable performance for guaranteed workloads, but it offloads the complexity of cpu load balancing to the application. Defaults to \"true\" | *bool | false |
34+
| offlined | Offline defines a set of CPUs that will be unused and set offline | *[CPUSet](#cpuset) | true |
3435

3536
[Back to TOC](#table-of-contents)
3637

examples/performanceprofile/samples/performance_v2_performanceprofile.yaml

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ spec:
1111
- "idle=poll"
1212
- "intel_idle.max_cstate=0"
1313
cpu:
14-
isolated: "2-3"
15-
reserved: "0-1"
14+
isolated: "2"
15+
reserved: "0-1"
16+
offlined: "3"
1617
hugepages:
1718
defaultHugepagesSize: "1G"
1819
pages:

manifests/20-performance-profile.crd.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,9 @@ spec:
370370
isolated:
371371
description: 'Isolated defines a set of CPUs that will be used to give to application threads the most execution time possible, which means removing as many extraneous tasks off a CPU as possible. It is important to notice the CPU manager can choose any CPU to run the workload except the reserved CPUs. In order to guarantee that your workload will run on the isolated CPU: 1. The union of reserved CPUs and isolated CPUs should include all online CPUs 2. The isolated CPUs field should be the complementary to reserved CPUs field'
372372
type: string
373+
offlined:
374+
description: Offline defines a set of CPUs that will be unused and set offline
375+
type: string
373376
reserved:
374377
description: Reserved defines a set of CPUs that will not be used for any container workloads initiated by kubelet.
375378
type: string

pkg/apis/performanceprofile/v2/performanceprofile_types.go

+3
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ type CPU struct {
9898
// Defaults to "true"
9999
// +optional
100100
BalanceIsolated *bool `json:"balanceIsolated,omitempty"`
101+
// Offline defines a set of CPUs that will be unused and set offline
102+
// +optional
103+
Offlined *CPUSet `json:"offlined,omitempty"`
101104
}
102105

103106
// HugePageSize defines size of huge pages, can be 2M or 1G.

pkg/apis/performanceprofile/v2/performanceprofile_validation.go

+13-2
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,12 @@ func (r *PerformanceProfile) validateCPUs() field.ErrorList {
127127
}
128128

129129
if r.Spec.CPU.Isolated != nil && r.Spec.CPU.Reserved != nil {
130-
cpuLists, err := components.NewCPULists(string(*r.Spec.CPU.Reserved), string(*r.Spec.CPU.Isolated))
130+
var offlined string
131+
if r.Spec.CPU.Offlined != nil {
132+
offlined = string(*r.Spec.CPU.Offlined)
133+
}
134+
135+
cpuLists, err := components.NewCPULists(string(*r.Spec.CPU.Reserved), string(*r.Spec.CPU.Isolated), offlined)
131136
if err != nil {
132137
allErrs = append(allErrs, field.InternalError(field.NewPath("spec.cpu"), err))
133138
}
@@ -141,10 +146,16 @@ func (r *PerformanceProfile) validateCPUs() field.ErrorList {
141146
allErrs = append(allErrs, field.Invalid(field.NewPath("spec.cpu.isolated"), r.Spec.CPU.Isolated, "isolated CPUs can not be empty"))
142147
}
143148

144-
if overlap := cpuLists.Intersect(); len(overlap) != 0 {
149+
if overlap := components.Intersect(cpuLists.GetIsolated(), cpuLists.GetReserved()); len(overlap) != 0 {
145150
allErrs = append(allErrs, field.Invalid(field.NewPath("spec.cpu"), r.Spec.CPU, fmt.Sprintf("reserved and isolated cpus overlap: %v", overlap)))
146151
}
147152
}
153+
154+
if r.Spec.CPU.Offlined != nil {
155+
if overlap := components.Intersect(cpuLists.GetReserved(), cpuLists.GetOfflined()); len(overlap) != 0 {
156+
allErrs = append(allErrs, field.Invalid(field.NewPath("spec.cpu"), r.Spec.CPU, fmt.Sprintf("reserved and offlined cpus overlap: %v", overlap)))
157+
}
158+
}
148159
}
149160
}
150161

pkg/apis/performanceprofile/v2/performanceprofile_validation_test.go

+32-4
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@ const (
2020
// HugePagesCount defines the huge page count used for tests
2121
HugePagesCount = 4
2222
// IsolatedCPUs defines the isolated CPU set used for tests
23-
IsolatedCPUs = CPUSet("4-7")
23+
IsolatedCPUs = CPUSet("4-6")
2424
// ReservedCPUs defines the reserved CPU set used for tests
2525
ReservedCPUs = CPUSet("0-3")
26+
// ReservedCPUs defines the reserved CPU set used for tests
27+
OfflinedCPUs = CPUSet("7")
2628
// SingleNUMAPolicy defines the topologyManager policy used for tests
2729
SingleNUMAPolicy = "single-numa-node"
2830

@@ -48,6 +50,7 @@ func NewPerformanceProfile(name string) *PerformanceProfile {
4850
size := HugePageSize1G
4951
isolatedCPUs := IsolatedCPUs
5052
reservedCPUs := ReservedCPUs
53+
offlinedCPUs := OfflinedCPUs
5154
numaPolicy := SingleNUMAPolicy
5255

5356
netDeviceName := NetDeviceName
@@ -64,6 +67,7 @@ func NewPerformanceProfile(name string) *PerformanceProfile {
6467
CPU: &CPU{
6568
Isolated: &isolatedCPUs,
6669
Reserved: &reservedCPUs,
70+
Offlined: &offlinedCPUs,
6771
},
6872
HugePages: &HugePages{
6973
DefaultHugePagesSize: &size,
@@ -140,9 +144,11 @@ var _ = Describe("PerformanceProfile", func() {
140144

141145
It("should reject cpus allocation with no reserved CPUs", func() {
142146
reservedCPUs := CPUSet("")
143-
isolatedCPUs := CPUSet("0-7")
144-
profile.Spec.CPU.Isolated = &isolatedCPUs
147+
isolatedCPUs := CPUSet("0-6")
148+
offlinedCPUs := CPUSet("7")
145149
profile.Spec.CPU.Reserved = &reservedCPUs
150+
profile.Spec.CPU.Isolated = &isolatedCPUs
151+
profile.Spec.CPU.Offlined = &offlinedCPUs
146152
errors := profile.validateCPUs()
147153
Expect(errors[0].Error()).To(ContainSubstring("reserved CPUs can not be empty"))
148154
})
@@ -157,7 +163,17 @@ var _ = Describe("PerformanceProfile", func() {
157163
Expect(errors[0].Error()).To(ContainSubstring("isolated CPUs can not be empty"))
158164
})
159165

160-
It("should reject cpus allocation with overlapping sets", func() {
166+
It("should allow cpus allocation with no offlined CPUs", func() {
167+
cpusIsolaled := CPUSet("0")
168+
cpusReserved := CPUSet("1")
169+
profile.Spec.CPU.Isolated = &cpusIsolaled
170+
profile.Spec.CPU.Reserved = &cpusReserved
171+
profile.Spec.CPU.Offlined = nil
172+
errors := profile.validateCPUs()
173+
Expect(errors).To(BeEmpty())
174+
})
175+
176+
It("should reject cpus allocation with overlapping sets between reserved and isolated", func() {
161177
reservedCPUs := CPUSet("0-7")
162178
isolatedCPUs := CPUSet("0-15")
163179
profile.Spec.CPU.Reserved = &reservedCPUs
@@ -166,6 +182,18 @@ var _ = Describe("PerformanceProfile", func() {
166182
Expect(errors).NotTo(BeEmpty(), "should have validation error when reserved and isolation CPUs have overlap")
167183
Expect(errors[0].Error()).To(ContainSubstring("reserved and isolated cpus overlap"))
168184
})
185+
186+
It("should reject cpus allocation with overlapping sets between reserved and offlined", func() {
187+
reservedCPUs := CPUSet("0-7")
188+
isolatedCPUs := CPUSet("8-11")
189+
offlinedCPUs := CPUSet("0,12-15")
190+
profile.Spec.CPU.Reserved = &reservedCPUs
191+
profile.Spec.CPU.Isolated = &isolatedCPUs
192+
profile.Spec.CPU.Offlined = &offlinedCPUs
193+
errors := profile.validateCPUs()
194+
Expect(errors).NotTo(BeEmpty(), "should have validation error when reserved and isolation CPUs have overlap")
195+
Expect(errors[0].Error()).To(ContainSubstring("reserved and offlined cpus overlap"))
196+
})
169197
})
170198

171199
Describe("Label selectors validation", func() {

pkg/apis/performanceprofile/v2/zz_generated.deepcopy.go

+5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/performanceprofile/controller/performanceprofile/components/machineconfig/machineconfig.go

+43-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
igntypes "github.com/coreos/ignition/v2/config/v3_2/types"
1616
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1717
"k8s.io/apimachinery/pkg/runtime"
18+
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
1819
"k8s.io/utils/pointer"
1920

2021
performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
@@ -48,6 +49,7 @@ const (
4849
udevRpsRules = "99-netdev-rps.rules"
4950
// scripts
5051
hugepagesAllocation = "hugepages-allocation"
52+
setCPUsOffline = "set-cpus-offline"
5153
ociHooks = "low-latency-hooks"
5254
setRPSMask = "set-rps-mask"
5355
)
@@ -76,6 +78,7 @@ const (
7678
environmentHugepagesSize = "HUGEPAGES_SIZE"
7779
environmentHugepagesCount = "HUGEPAGES_COUNT"
7880
environmentNUMANode = "NUMA_NODE"
81+
environmentOfflineCpus = "OFFLINE_CPUS"
7982
)
8083

8184
const (
@@ -139,7 +142,7 @@ func getIgnitionConfig(profile *performancev2.PerformanceProfile) (*igntypes.Con
139142

140143
// add script files under the node /usr/local/bin directory
141144
mode := 0700
142-
for _, script := range []string{hugepagesAllocation, ociHooks, setRPSMask} {
145+
for _, script := range []string{hugepagesAllocation, ociHooks, setRPSMask, setCPUsOffline} {
143146
dst := getBashScriptPath(script)
144147
content, err := assets.Scripts.ReadFile(fmt.Sprintf("scripts/%s.sh", script))
145148
if err != nil {
@@ -221,6 +224,24 @@ func getIgnitionConfig(profile *performancev2.PerformanceProfile) (*igntypes.Con
221224
})
222225
}
223226

227+
if profile.Spec.CPU.Offlined != nil {
228+
offlinedCPUSList, err := cpuset.Parse(string(*profile.Spec.CPU.Offlined))
229+
if err != nil {
230+
return nil, err
231+
}
232+
offlinedCPUSstring := components.ListToString(offlinedCPUSList.ToSlice())
233+
offlineCPUsService, err := getSystemdContent(getOfflineCPUs(offlinedCPUSstring))
234+
if err != nil {
235+
return nil, err
236+
}
237+
238+
ignitionConfig.Systemd.Units = append(ignitionConfig.Systemd.Units, igntypes.Unit{
239+
Contents: &offlineCPUsService,
240+
Enabled: pointer.BoolPtr(true),
241+
Name: getSystemdService(setCPUsOffline),
242+
})
243+
}
244+
224245
return ignitionConfig, nil
225246
}
226247

@@ -305,6 +326,27 @@ func getHugepagesAllocationUnitOptions(hugepagesSize string, hugepagesCount int3
305326
}
306327
}
307328

329+
func getOfflineCPUs(offlineCpus string) []*unit.UnitOption {
330+
return []*unit.UnitOption{
331+
// [Unit]
332+
// Description
333+
unit.NewUnitOption(systemdSectionUnit, systemdDescription, fmt.Sprintf("Set cpus offline: %s", offlineCpus)),
334+
// Before
335+
unit.NewUnitOption(systemdSectionUnit, systemdBefore, systemdServiceKubelet),
336+
// Environment
337+
unit.NewUnitOption(systemdSectionService, systemdEnvironment, getSystemdEnvironment(environmentOfflineCpus, offlineCpus)),
338+
// Type
339+
unit.NewUnitOption(systemdSectionService, systemdType, systemdServiceTypeOneshot),
340+
// RemainAfterExit
341+
unit.NewUnitOption(systemdSectionService, systemdRemainAfterExit, systemdTrue),
342+
// ExecStart
343+
unit.NewUnitOption(systemdSectionService, systemdExecStart, getBashScriptPath(setCPUsOffline)),
344+
// [Install]
345+
// WantedBy
346+
unit.NewUnitOption(systemdSectionInstall, systemdWantedBy, systemdTargetMultiUser),
347+
}
348+
}
349+
308350
func getRPSUnitOptions(rpsMask string) []*unit.UnitOption {
309351
cmd := fmt.Sprintf("%s %%i %s", getBashScriptPath(setRPSMask), rpsMask)
310352
return []*unit.UnitOption{

pkg/performanceprofile/controller/performanceprofile/components/machineconfig/machineconfig_test.go

+32-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,27 @@ const hugepagesAllocationService = `
3333
name: hugepages-allocation-1048576kB-NUMA0.service
3434
`
3535

36+
const offlineCPUS = `
37+
- contents: |
38+
[Unit]
39+
Description=Set cpus offline: 6,7
40+
Before=kubelet.service
41+
42+
[Service]
43+
Environment=OFFLINE_CPUS=6,7
44+
Type=oneshot
45+
RemainAfterExit=true
46+
ExecStart=/usr/local/bin/set-cpus-offline.sh
47+
48+
[Install]
49+
WantedBy=multi-user.target
50+
enabled: true
51+
name: set-cpus-offline.service
52+
`
53+
54+
var CPUs = []int{1, 2, 3, 4, 5, 6, 7, 8, 9}
55+
var CPUstring = "1,2,3,4,5,6,7,8,9"
56+
3657
var _ = Describe("Machine Config", func() {
3758

3859
Context("machine config creation ", func() {
@@ -45,7 +66,7 @@ var _ = Describe("Machine Config", func() {
4566
})
4667
})
4768

48-
Context("with hugepages with specified NUMA node", func() {
69+
Context("with hugepages with specified NUMA node and offlinedCPUs", func() {
4970
var manifest string
5071

5172
BeforeEach(func() {
@@ -73,5 +94,15 @@ var _ = Describe("Machine Config", func() {
7394
Expect(manifest).To(ContainSubstring(hugepagesAllocationService))
7495
})
7596

97+
It("should add systemd unit to offlineCPUs", func() {
98+
Expect(manifest).To(ContainSubstring(offlineCPUS))
99+
})
100+
})
101+
102+
Context("check listToString ", func() {
103+
It("should create string from CPUSet", func() {
104+
res := components.ListToString(CPUs)
105+
Expect(res).To(Equal(CPUstring))
106+
})
76107
})
77108
})

pkg/performanceprofile/controller/performanceprofile/components/tuned/tuned_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ var _ = Describe("Tuned", func() {
5656
manifest := getTunedManifest(profile)
5757

5858
Expect(manifest).To(ContainSubstring(expectedMatchSelector))
59-
Expect(manifest).To(ContainSubstring("isolated_cores=4-7"))
59+
Expect(manifest).To(ContainSubstring("isolated_cores=4-5"))
6060
Expect(manifest).To(ContainSubstring("governor=performance"))
6161
Expect(manifest).To(ContainSubstring("service.stalld=start,enable"))
6262
Expect(manifest).To(ContainSubstring("sched_rt_runtime_us=-1"))

pkg/performanceprofile/controller/performanceprofile/components/utils.go

+21-3
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,12 @@ func CPUListToMaskList(cpulist string) (hexMask string, err error) {
7979
type CPULists struct {
8080
reserved cpuset.CPUSet
8181
isolated cpuset.CPUSet
82+
offlined cpuset.CPUSet
8283
}
8384

8485
// Intersect returns cpu ids found in both the provided cpuLists, if any
85-
func (c *CPULists) Intersect() []int {
86-
commonSet := c.reserved.Intersection(c.isolated)
86+
func Intersect(firstSet cpuset.CPUSet, secondSet cpuset.CPUSet) []int {
87+
commonSet := firstSet.Intersection(secondSet)
8788
return commonSet.ToSlice()
8889
}
8990

@@ -95,8 +96,12 @@ func (c *CPULists) GetReserved() cpuset.CPUSet {
9596
return c.reserved
9697
}
9798

99+
func (c *CPULists) GetOfflined() cpuset.CPUSet {
100+
return c.offlined
101+
}
102+
98103
// NewCPULists parse text representations of reserved and isolated cpusets definiton and returns a CPULists object
99-
func NewCPULists(reservedList, isolatedList string) (*CPULists, error) {
104+
func NewCPULists(reservedList, isolatedList, offlinedList string) (*CPULists, error) {
100105
var err error
101106
reserved, err := cpuset.Parse(reservedList)
102107
if err != nil {
@@ -106,9 +111,14 @@ func NewCPULists(reservedList, isolatedList string) (*CPULists, error) {
106111
if err != nil {
107112
return nil, err
108113
}
114+
offlined, err := cpuset.Parse(offlinedList)
115+
if err != nil {
116+
return nil, err
117+
}
109118
return &CPULists{
110119
reserved: reserved,
111120
isolated: isolated,
121+
offlined: offlined,
112122
}, nil
113123
}
114124

@@ -142,3 +152,11 @@ func CPUMaskToCPUSet(cpuMask string) (cpuset.CPUSet, error) {
142152

143153
return builder.Result(), nil
144154
}
155+
156+
func ListToString(cpus []int) string {
157+
items := make([]string, len(cpus))
158+
for idx, cpu := range cpus {
159+
items[idx] = strconv.FormatInt(int64(cpu), 10)
160+
}
161+
return strings.Join(items, ",")
162+
}

0 commit comments

Comments
 (0)