Skip to content

Commit

Permalink
MGMT-19860: Remove dependencies from OpenShift AI operator (#7274)
Browse files Browse the repository at this point in the history
With the introduction of operator bundles it is no longer necessary to
add dependencies to the OpenShift AI operator because those will be part
of the bundle.

Related: https://issues.redhat.com/browse/MGMT-19860:

Signed-off-by: Juan Hernandez <juan.hernandez@redhat.com>
  • Loading branch information
jhernand authored Feb 5, 2025
1 parent 1e8f04f commit 03c6b94
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 163 deletions.
6 changes: 0 additions & 6 deletions internal/operators/openshiftai/openshift_ai_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,9 @@ type Config struct {
MinWorkerNodes int64 `envconfig:"OPENSHIFT_AI_MIN_WORKER_NODES" default:"2"`
MinWorkerMemoryGiB int64 `envconfig:"OPENSHIFT_AI_MIN_WORKER_MEMORY_GIB" default:"32"`
MinWorkerCPUCores int64 `envconfig:"OPENSHIFT_AI_MIN_WORKER_CPU_CORES" default:"8"`
RequireGPU bool `envconfig:"OPENSHIFT_AI_REQUIRE_GPU" default:"true"`

// TODO: Currently we use the controller image to run the setup tools because all we need is the shell and the
// `oc` command, and that way we don't need an additional image. But in the future we will probably want to have
// a separate image that contains the things that we need to run these setup jobs.
ControllerImage string `envconfig:"CONTROLLER_IMAGE" default:"quay.io/edge-infrastructure/assisted-installer-controller:latest"`

// SupportedGPUS is a comma separated list of vendor identifiers of supported GPUs. For examaple, to enable
// support for NVIDIA and Virtio GPUS the value should be `10de,1af4`. By default only NVIDIA GPUs are
// supported.
SupportedGPUs []string `envconfig:"OPENSHIFT_AI_SUPPORTED_GPUS" default:"10de"`
}
82 changes: 1 addition & 81 deletions internal/operators/openshiftai/openshift_ai_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,13 @@ package openshiftai
import (
"context"
"fmt"
"strings"
"text/template"

"github.com/kelseyhightower/envconfig"
"github.com/lib/pq"
"github.com/openshift/assisted-service/internal/common"
"github.com/openshift/assisted-service/internal/operators/api"
"github.com/openshift/assisted-service/internal/operators/authorino"
operatorscommon "github.com/openshift/assisted-service/internal/operators/common"
"github.com/openshift/assisted-service/internal/operators/nvidiagpu"
"github.com/openshift/assisted-service/internal/operators/odf"
"github.com/openshift/assisted-service/internal/operators/pipelines"
"github.com/openshift/assisted-service/internal/operators/serverless"
"github.com/openshift/assisted-service/internal/operators/servicemesh"
"github.com/openshift/assisted-service/internal/templating"
"github.com/openshift/assisted-service/models"
"github.com/openshift/assisted-service/pkg/conversions"
Expand Down Expand Up @@ -70,19 +63,7 @@ func (o *operator) GetFullName() string {

// GetDependencies provides a list of dependencies of the Operator
func (o *operator) GetDependencies(c *common.Cluster) (result []string, err error) {
// TODO: We shold probably add the node feature discovery and NVIDIA GPU operators only if there is at least one
// NVIDIA GPU in the cluster, but unfortunatelly this is calculated and saved to the database only when the
// cluster is created or updated via the API, and at that point we don't have the host inventory yet to
// determine if there are NVIDIA GPU.
result = []string{
authorino.Operator.Name,
nvidiagpu.Operator.Name,
odf.Operator.Name,
pipelines.Operator.Name,
serverless.Operator.Name,
servicemesh.Operator.Name,
}
return result, nil
return nil, nil
}

// GetClusterValidationID returns cluster validation ID for the operator.
Expand Down Expand Up @@ -112,33 +93,6 @@ func (o *operator) ValidateCluster(ctx context.Context, cluster *common.Cluster)
)
}

// Check that there is at least one supported GPU:
if o.config.RequireGPU {
var gpuList []*models.Gpu
gpuList, err = o.gpusInCluster(cluster)
if err != nil {
return
}
var supportedGpuCount int64
for _, gpu := range gpuList {
var isSupported bool
isSupported, err = o.isSupportedGpu(gpu)
if err != nil {
return
}
if isSupported {
supportedGpuCount++
}
}
if supportedGpuCount == 0 {
result.Reasons = append(
result.Reasons,
"OpenShift AI requires at least one supported GPU, but there is none in the "+
"discovered hosts.",
)
}
}

if len(result.Reasons) > 0 {
result.Status = api.Failure
} else {
Expand All @@ -147,40 +101,6 @@ func (o *operator) ValidateCluster(ctx context.Context, cluster *common.Cluster)
return
}

func (o *operator) gpusInCluster(cluster *common.Cluster) (result []*models.Gpu, err error) {
for _, host := range cluster.Hosts {
var gpus []*models.Gpu
gpus, err = o.gpusInHost(host)
if err != nil {
return
}
result = append(result, gpus...)
}
return
}

func (o *operator) gpusInHost(host *models.Host) (result []*models.Gpu, err error) {
if host.Inventory == "" {
return
}
inventory, err := common.UnmarshalInventory(host.Inventory)
if err != nil {
return
}
result = inventory.Gpus
return
}

func (o *operator) isSupportedGpu(gpu *models.Gpu) (result bool, err error) {
for _, supportedGpu := range o.config.SupportedGPUs {
if strings.EqualFold(gpu.VendorID, supportedGpu) {
result = true
return
}
}
return
}

// ValidateHost returns validationResult based on node type requirements such as memory and CPU.
func (o *operator) ValidateHost(ctx context.Context, cluster *common.Cluster, host *models.Host,
_ *models.ClusterHostRequirementsDetails) (result api.ValidationResult, err error) {
Expand Down
76 changes: 0 additions & 76 deletions internal/operators/openshiftai/openshift_ai_operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package openshiftai

import (
"context"
"os"

. "github.com/onsi/ginkgo"
. "github.com/onsi/ginkgo/extensions/table"
Expand Down Expand Up @@ -97,79 +96,4 @@ var _ = Describe("Operator", func() {
},
),
)

DescribeTable(
"Is supported GPU",
func(env map[string]string, gpu *models.Gpu, expected bool) {
// Set the environment variables and restore the previous values when finished:
for name, value := range env {
oldValue, present := os.LookupEnv(name)
if present {
defer func() {
err := os.Setenv(name, oldValue)
Expect(err).ToNot(HaveOccurred())
}()
} else {
defer func() {
err := os.Unsetenv(name)
Expect(err).ToNot(HaveOccurred())
}()
}
err := os.Setenv(name, value)
Expect(err).ToNot(HaveOccurred())
}

// Create the operator:
operator = NewOpenShiftAIOperator(common.GetTestLog())

// Run the check:
actual, err := operator.isSupportedGpu(gpu)
Expect(err).ToNot(HaveOccurred())
Expect(actual).To(Equal(expected))
},
Entry(
"NVIDIA is supported by default",
nil,
&models.Gpu{
VendorID: "10de",
},
true,
),
Entry(
"Virtio isn't supported by default",
nil,
&models.Gpu{
VendorID: "1af4",
},
false,
),
Entry(
"Virtio is supported if explicitly added",
map[string]string{
"OPENSHIFT_AI_SUPPORTED_GPUS": "10de,1af4",
},
&models.Gpu{
VendorID: "1af4",
},
true,
),
Entry(
"Order isn't relevant",
map[string]string{
"OPENSHIFT_AI_SUPPORTED_GPUS": "1af4,10de",
},
&models.Gpu{
VendorID: "10de",
},
true,
),
Entry(
"Case isn't relevant",
nil,
&models.Gpu{
VendorID: "10DE",
},
true,
),
)
})

0 comments on commit 03c6b94

Please sign in to comment.