Skip to content

Commit 445cc60

Browse files
authored
OCPBUGS-18052: feat: added logic to handle legacy sno install (#778)
* feat: added logic to handle legacy sno install updated documentation to correctly refer to feature as 4.14 changed variables to be more consistent with current naming pattern Signed-off-by: ehila <ehila@redhat.com> * doc: added comment and updated var name Signed-off-by: ehila <ehila@redhat.com> * refactor: updated function calls to use non-deprecated versions Signed-off-by: ehila <ehila@redhat.com> * feat: add e2e test for rendering default and legacy cpu partitioning Signed-off-by: ehila <ehila@redhat.com> --------- Signed-off-by: ehila <ehila@redhat.com>
1 parent 6784795 commit 445cc60

File tree

12 files changed

+244
-21
lines changed

12 files changed

+244
-21
lines changed

cmd/cluster-node-tuning-operator/main.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,14 @@ func removePerformanceOLMOperator(cfg *rest.Config) error {
266266
return nil
267267
}
268268

269-
// During upgrade from 4.12 -> 4.13, this logic will update the authoritative flag in the new
269+
// During upgrade from 4.13 -> 4.14, this logic will update the authoritative flag in the new
270270
// Infrastructures.Status.CPUPartitioning to migrate Single Node clusters to the new method.
271-
// TODO: Revisit after 4.13 to remove logic when no longer needed.
271+
// Note:
272+
// This method will also execute during fresh installs on SNO using the legacy method. We are
273+
// generating the bootstrap files during that process, when this method then executes, it will only
274+
// update the API flag as intended, and since the bootstrap configs will already exist, nothing will happen.
275+
//
276+
// TODO: Revisit after 4.14 to remove logic when no longer needed.
272277
func migratePinnedSingleNodeInfraStatus(cfg *rest.Config, scheme *apiruntime.Scheme) error {
273278
k8sclient, err := client.New(cfg, client.Options{Scheme: scheme})
274279
if err != nil {

pkg/performanceprofile/cmd/render/render.go

+59-6
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ limitations under the License.
1616
package render
1717

1818
import (
19+
"encoding/json"
1920
"fmt"
2021
"os"
2122
"path/filepath"
2223
"strings"
2324

2425
"sigs.k8s.io/yaml"
2526

27+
igntypes "github.com/coreos/ignition/v2/config/v3_2/types"
2628
apicfgv1 "github.com/openshift/api/config/v1"
2729
mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
2830

@@ -86,6 +88,7 @@ func render(inputDir, outputDir string) error {
8688
var (
8789
perfProfiles []*performancev2.PerformanceProfile
8890
mcPools []*mcfgv1.MachineConfigPool
91+
mcConfigs []*mcfgv1.MachineConfig
8992
infra *apicfgv1.Infrastructure
9093
ctrcfgs []*mcfgv1.ContainerRuntimeConfig
9194
)
@@ -102,7 +105,7 @@ func render(inputDir, outputDir string) error {
102105
return fmt.Errorf("error parsing manifests from %s: %w", file.Name(), err)
103106
}
104107

105-
// Decode manifest files and
108+
// Decode manifest files
106109
for idx, m := range manifests {
107110
obji, err := runtime.Decode(runtimeDecoder, m.Raw)
108111
if err != nil {
@@ -118,6 +121,8 @@ func render(inputDir, outputDir string) error {
118121
perfProfiles = append(perfProfiles, obj)
119122
case *mcfgv1.MachineConfigPool:
120123
mcPools = append(mcPools, obj)
124+
case *mcfgv1.MachineConfig:
125+
mcConfigs = append(mcConfigs, obj)
121126
case *apicfgv1.Infrastructure:
122127
if obj.Name == clusterConfigResourceName {
123128
infra = obj
@@ -134,18 +139,23 @@ func render(inputDir, outputDir string) error {
134139
klog.Warning("zero performance profiles were found")
135140
}
136141

137-
var pinnedMode *apicfgv1.CPUPartitioningMode
142+
var partitioningMode *apicfgv1.CPUPartitioningMode
138143
if infra != nil {
139-
pinnedMode = &infra.Status.CPUPartitioning
144+
partitioningMode = &infra.Status.CPUPartitioning
140145
}
141146

142-
if err := genBootstrapWorkloadPinningManifests(pinnedMode, outputDir, defaultMCPNames...); err != nil {
147+
if isLegacySNOWorkloadPinningMethod(mcConfigs, infra, partitioningMode) {
148+
legacyAllNodes := apicfgv1.CPUPartitioningAllNodes
149+
partitioningMode = &legacyAllNodes
150+
}
151+
152+
if err := genBootstrapWorkloadPinningManifests(partitioningMode, outputDir, defaultMCPNames...); err != nil {
143153
return err
144154
}
145155

146156
// If the user supplies extra machine pools, we ingest them here
147157
for _, pool := range mcPools {
148-
if err := genBootstrapWorkloadPinningManifests(pinnedMode, outputDir, pool.Name); err != nil {
158+
if err := genBootstrapWorkloadPinningManifests(partitioningMode, outputDir, pool.Name); err != nil {
149159
return err
150160
}
151161
}
@@ -166,7 +176,7 @@ func render(inputDir, outputDir string) error {
166176
return fmt.Errorf("render: could not determine high-performance runtime class container-runtime for profile %q; %w", pp.Name, err)
167177
}
168178

169-
components, err := manifestset.GetNewComponents(pp, mcp, pinnedMode, defaultRuntime)
179+
components, err := manifestset.GetNewComponents(pp, mcp, partitioningMode, defaultRuntime)
170180
if err != nil {
171181
return err
172182
}
@@ -210,6 +220,49 @@ func render(inputDir, outputDir string) error {
210220
return nil
211221
}
212222

223+
// isLegacySNOWorkloadPinningMethod provides a check for situations where the user is creating an SNO cluster with the
224+
// legacy method for CPU Partitioning. In order to make sure the bootstrap and running cluster MCs are synced up we check the MCs
225+
// provided by the user, if any one of them contain the file addition to `/etc/kubernetes/openshift-workload-pinning` and have not set
226+
// the API CPUPartitioningAllNodes at install time, we assume a legacy intention and alter the flag to generate the new bootstrap manifests.
227+
//
228+
// Note:
229+
// - This will only trigger when Control plane is SNO or the CPU Partitioning API is NOT set to AllNodes
230+
// - We do not alter the API flag here, when NTO starts up in cluster, it will notice the flag and
231+
// update the flag and ignore the create error since the files already exist.
232+
func isLegacySNOWorkloadPinningMethod(mcs []*mcfgv1.MachineConfig, infra *apicfgv1.Infrastructure, partitioningMode *apicfgv1.CPUPartitioningMode) bool {
233+
234+
// If we can't determine SNO topology, we return.
235+
if infra == nil {
236+
return false
237+
}
238+
239+
if infra.Status.ControlPlaneTopology != apicfgv1.SingleReplicaTopologyMode || (partitioningMode != nil && *partitioningMode == apicfgv1.CPUPartitioningAllNodes) {
240+
return false
241+
}
242+
243+
// This file name is stable and currently hardcoded in kubelet
244+
// https://github.com/openshift/kubernetes/blob/ba1825544533d273d86b405195ee791e500b74c7/pkg/kubelet/managed/managed.go#L31
245+
const kubernetesPinningConfFile = "/etc/kubernetes/openshift-workload-pinning"
246+
247+
for _, mc := range mcs {
248+
ign := &igntypes.Config{}
249+
err := json.Unmarshal(mc.Spec.Config.Raw, ign)
250+
if err != nil {
251+
klog.Errorf("skipping legacy check on mc (%s) unable to marshal raw config to ignition struct: %s", mc.Name, err)
252+
continue
253+
}
254+
255+
for _, file := range ign.Storage.Files {
256+
if file.Node.Path == kubernetesPinningConfFile {
257+
klog.Infof("mc (%s) contains file path (%s), using legacy signal for workload pinning", mc.Name, kubernetesPinningConfFile)
258+
return true
259+
}
260+
}
261+
}
262+
263+
return false
264+
}
265+
213266
// genBootstrapWorkloadPinningManifests is used to generate the appropriate bootstrap workload pinning manifests
214267
// based on the cluster CPU Partitioning Mode and the given MachineConfigPool names. The created manifests are the
215268
// default state configs for the node which make no assumption for which CPU's are used for workload pinning.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: config.openshift.io/v1
2+
kind: Infrastructure
3+
metadata:
4+
name: cluster
5+
spec:
6+
cloudConfig:
7+
name: ""
8+
platformSpec:
9+
aws: {}
10+
type: AWS
11+
status:
12+
cpuPartitioning: AllNodes
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: config.openshift.io/v1
2+
kind: Infrastructure
3+
metadata:
4+
name: cluster
5+
spec:
6+
cloudConfig:
7+
name: ""
8+
platformSpec:
9+
aws: {}
10+
type: AWS
11+
status:
12+
controlPlaneTopology: SingleReplica
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
apiVersion: machineconfiguration.openshift.io/v1
2+
kind: MachineConfig
3+
metadata:
4+
labels:
5+
machineconfiguration.openshift.io/role: master
6+
name: 02-master-workload-partitioning
7+
spec:
8+
config:
9+
ignition:
10+
version: 3.2.0
11+
storage:
12+
files:
13+
- contents:
14+
source: data:text/plain;charset=utf-8;base64,ewogICJtYW5hZ2VtZW50IjogewogICAgImNwdXNldCI6ICIiCiAgfQp9Cg==
15+
mode: 420
16+
overwrite: true
17+
path: /etc/kubernetes/openshift-workload-pinning

test/e2e/performanceprofile/functests-render-command/1_render_command/render_test.go

+53-13
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package __render_command_test
22

33
import (
44
"fmt"
5-
"io/ioutil"
65
"os"
76
"os/exec"
87
"path/filepath"
@@ -12,11 +11,18 @@ import (
1211
. "github.com/onsi/gomega"
1312
)
1413

14+
const (
15+
defaultExpectedDir = "default"
16+
pinnedExpectedDir = "pinned"
17+
)
18+
1519
var (
16-
assetsOutDir string
17-
assetsInDirs []string
18-
ppDir string
19-
testDataPath string
20+
assetsOutDir string
21+
assetsInDirs []string
22+
ppDir string
23+
testDataPath string
24+
defaultPinnedDir string
25+
snoLegacyPinnedDir string
2026
)
2127

2228
var _ = Describe("render command e2e test", func() {
@@ -25,6 +31,8 @@ var _ = Describe("render command e2e test", func() {
2531
assetsOutDir = createTempAssetsDir()
2632
assetsInDir := filepath.Join(workspaceDir, "test", "e2e", "performanceprofile", "cluster-setup", "base", "performance")
2733
ppDir = filepath.Join(workspaceDir, "test", "e2e", "performanceprofile", "cluster-setup", "manual-cluster", "performance")
34+
defaultPinnedDir = filepath.Join(workspaceDir, "test", "e2e", "performanceprofile", "cluster-setup", "pinned-cluster", "default")
35+
snoLegacyPinnedDir = filepath.Join(workspaceDir, "test", "e2e", "performanceprofile", "cluster-setup", "pinned-cluster", "single-node-legacy")
2836
testDataPath = filepath.Join(workspaceDir, "test", "e2e", "performanceprofile", "testdata")
2937
assetsInDirs = []string{assetsInDir, ppDir}
3038
})
@@ -41,7 +49,7 @@ var _ = Describe("render command e2e test", func() {
4149
fmt.Fprintf(GinkgoWriter, "running: %v\n", cmdline)
4250

4351
cmd := exec.Command(cmdline[0], cmdline[1:]...)
44-
runAndCompare(cmd)
52+
runAndCompare(cmd, defaultExpectedDir)
4553

4654
})
4755

@@ -57,7 +65,39 @@ var _ = Describe("render command e2e test", func() {
5765
fmt.Sprintf("ASSET_INPUT_DIR=%s", strings.Join(assetsInDirs, ",")),
5866
fmt.Sprintf("ASSET_OUTPUT_DIR=%s", assetsOutDir),
5967
)
60-
runAndCompare(cmd)
68+
runAndCompare(cmd, defaultExpectedDir)
69+
})
70+
})
71+
72+
Context("With pinned cluster resources", func() {
73+
It("Given default pinned infrastructure status, should render cpu partitioning configs", func() {
74+
75+
cmdline := []string{
76+
filepath.Join(binPath, "cluster-node-tuning-operator"),
77+
"render",
78+
"--asset-input-dir", defaultPinnedDir,
79+
"--asset-output-dir", assetsOutDir,
80+
}
81+
fmt.Fprintf(GinkgoWriter, "running: %v\n", cmdline)
82+
83+
cmd := exec.Command(cmdline[0], cmdline[1:]...)
84+
runAndCompare(cmd, pinnedExpectedDir)
85+
86+
})
87+
88+
It("Given legacy SNO pinned infrastructure status, should render cpu partitioning configs", func() {
89+
90+
cmdline := []string{
91+
filepath.Join(binPath, "cluster-node-tuning-operator"),
92+
"render",
93+
"--asset-input-dir", snoLegacyPinnedDir,
94+
"--asset-output-dir", assetsOutDir,
95+
}
96+
fmt.Fprintf(GinkgoWriter, "running: %v\n", cmdline)
97+
98+
cmd := exec.Command(cmdline[0], cmdline[1:]...)
99+
runAndCompare(cmd, pinnedExpectedDir)
100+
61101
})
62102
})
63103

@@ -68,7 +108,7 @@ var _ = Describe("render command e2e test", func() {
68108
})
69109

70110
func createTempAssetsDir() string {
71-
assets, err := ioutil.TempDir("", "assets")
111+
assets, err := os.MkdirTemp("", "assets")
72112
Expect(err).ToNot(HaveOccurred())
73113
fmt.Printf("assets` output dir at: %q\n", assets)
74114
return assets
@@ -78,21 +118,21 @@ func cleanArtifacts() {
78118
os.RemoveAll(assetsOutDir)
79119
}
80120

81-
func runAndCompare(cmd *exec.Cmd) {
121+
func runAndCompare(cmd *exec.Cmd, dir string) {
82122
_, err := cmd.Output()
83123
Expect(err).ToNot(HaveOccurred())
84124

85-
outputAssetsFiles, err := ioutil.ReadDir(assetsOutDir)
125+
outputAssetsFiles, err := os.ReadDir(assetsOutDir)
86126
Expect(err).ToNot(HaveOccurred())
87127

88-
refPath := filepath.Join(testDataPath, "render-expected-output")
128+
refPath := filepath.Join(testDataPath, "render-expected-output", dir)
89129
fmt.Fprintf(GinkgoWriter, "reference data at: %q\n", refPath)
90130

91131
for _, f := range outputAssetsFiles {
92-
refData, err := ioutil.ReadFile(filepath.Join(refPath, f.Name()))
132+
refData, err := os.ReadFile(filepath.Join(refPath, f.Name()))
93133
Expect(err).ToNot(HaveOccurred())
94134

95-
data, err := ioutil.ReadFile(filepath.Join(assetsOutDir, f.Name()))
135+
data, err := os.ReadFile(filepath.Join(assetsOutDir, f.Name()))
96136
Expect(err).ToNot(HaveOccurred())
97137

98138
diff, err := getFilesDiff(data, refData)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
apiVersion: machineconfiguration.openshift.io/v1
2+
kind: MachineConfig
3+
metadata:
4+
creationTimestamp: null
5+
labels:
6+
machineconfiguration.openshift.io/role: master
7+
name: 01-master-cpu-partitioning
8+
spec:
9+
baseOSExtensionsContainerImage: ""
10+
config:
11+
ignition:
12+
config:
13+
replace:
14+
verification: {}
15+
proxy: {}
16+
security:
17+
tls: {}
18+
timeouts: {}
19+
version: 3.2.0
20+
passwd: {}
21+
storage:
22+
files:
23+
- contents:
24+
source: data:text/plain;charset=utf-8;base64,CnsKICAibWFuYWdlbWVudCI6IHsKICAgICJjcHVzZXQiOiAiIgogIH0KfQo=
25+
verification: {}
26+
group: {}
27+
mode: 420
28+
path: /etc/kubernetes/openshift-workload-pinning
29+
user: {}
30+
- contents:
31+
source: data:text/plain;charset=utf-8;base64,CltjcmlvLnJ1bnRpbWUud29ya2xvYWRzLm1hbmFnZW1lbnRdCmFjdGl2YXRpb25fYW5ub3RhdGlvbiA9ICJ0YXJnZXQud29ya2xvYWQub3BlbnNoaWZ0LmlvL21hbmFnZW1lbnQiCmFubm90YXRpb25fcHJlZml4ID0gInJlc291cmNlcy53b3JrbG9hZC5vcGVuc2hpZnQuaW8iCnJlc291cmNlcyA9IHsgImNwdXNoYXJlcyIgPSAwLCAiY3B1c2V0IiA9ICIiIH0K
32+
verification: {}
33+
group: {}
34+
mode: 420
35+
path: /etc/crio/crio.conf.d/01-workload-pinning-default.conf
36+
user: {}
37+
systemd: {}
38+
extensions: null
39+
fips: false
40+
kernelArguments: null
41+
kernelType: ""
42+
osImageURL: ""

0 commit comments

Comments
 (0)