Skip to content

Commit

Permalink
MGMT-19860: Remove dependencies from OpenShift AI operator (openshift…
Browse files Browse the repository at this point in the history
…#7274)

With the introduction of operator bundles it is no longer necessary to
add dependencies to the OpenShift AI operator because those will be part
of the bundle.

Related: https://issues.redhat.com/browse/MGMT-19860:

Signed-off-by: Juan Hernandez <juan.hernandez@redhat.com>
  • Loading branch information
jhernand authored Feb 5, 2025
1 parent 1e8f04f commit 03c6b94
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 163 deletions.
6 changes: 0 additions & 6 deletions internal/operators/openshiftai/openshift_ai_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,9 @@ type Config struct {
MinWorkerNodes int64 `envconfig:"OPENSHIFT_AI_MIN_WORKER_NODES" default:"2"`
MinWorkerMemoryGiB int64 `envconfig:"OPENSHIFT_AI_MIN_WORKER_MEMORY_GIB" default:"32"`
MinWorkerCPUCores int64 `envconfig:"OPENSHIFT_AI_MIN_WORKER_CPU_CORES" default:"8"`
RequireGPU bool `envconfig:"OPENSHIFT_AI_REQUIRE_GPU" default:"true"`

// TODO: Currently we use the controller image to run the setup tools because all we need is the shell and the
// `oc` command, and that way we don't need an additional image. But in the future we will probably want to have
// a separate image that contains the things that we need to run these setup jobs.
ControllerImage string `envconfig:"CONTROLLER_IMAGE" default:"quay.io/edge-infrastructure/assisted-installer-controller:latest"`

// SupportedGPUS is a comma separated list of vendor identifiers of supported GPUs. For examaple, to enable
// support for NVIDIA and Virtio GPUS the value should be `10de,1af4`. By default only NVIDIA GPUs are
// supported.
SupportedGPUs []string `envconfig:"OPENSHIFT_AI_SUPPORTED_GPUS" default:"10de"`
}
82 changes: 1 addition & 81 deletions internal/operators/openshiftai/openshift_ai_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,13 @@ package openshiftai
import (
"context"
"fmt"
"strings"
"text/template"

"github.com/kelseyhightower/envconfig"
"github.com/lib/pq"
"github.com/openshift/assisted-service/internal/common"
"github.com/openshift/assisted-service/internal/operators/api"
"github.com/openshift/assisted-service/internal/operators/authorino"
operatorscommon "github.com/openshift/assisted-service/internal/operators/common"
"github.com/openshift/assisted-service/internal/operators/nvidiagpu"
"github.com/openshift/assisted-service/internal/operators/odf"
"github.com/openshift/assisted-service/internal/operators/pipelines"
"github.com/openshift/assisted-service/internal/operators/serverless"
"github.com/openshift/assisted-service/internal/operators/servicemesh"
"github.com/openshift/assisted-service/internal/templating"
"github.com/openshift/assisted-service/models"
"github.com/openshift/assisted-service/pkg/conversions"
Expand Down Expand Up @@ -70,19 +63,7 @@ func (o *operator) GetFullName() string {

// GetDependencies provides a list of dependencies of the Operator
func (o *operator) GetDependencies(c *common.Cluster) (result []string, err error) {
// TODO: We shold probably add the node feature discovery and NVIDIA GPU operators only if there is at least one
// NVIDIA GPU in the cluster, but unfortunatelly this is calculated and saved to the database only when the
// cluster is created or updated via the API, and at that point we don't have the host inventory yet to
// determine if there are NVIDIA GPU.
result = []string{
authorino.Operator.Name,
nvidiagpu.Operator.Name,
odf.Operator.Name,
pipelines.Operator.Name,
serverless.Operator.Name,
servicemesh.Operator.Name,
}
return result, nil
return nil, nil
}

// GetClusterValidationID returns cluster validation ID for the operator.
Expand Down Expand Up @@ -112,33 +93,6 @@ func (o *operator) ValidateCluster(ctx context.Context, cluster *common.Cluster)
)
}

// Check that there is at least one supported GPU:
if o.config.RequireGPU {
var gpuList []*models.Gpu
gpuList, err = o.gpusInCluster(cluster)
if err != nil {
return
}
var supportedGpuCount int64
for _, gpu := range gpuList {
var isSupported bool
isSupported, err = o.isSupportedGpu(gpu)
if err != nil {
return
}
if isSupported {
supportedGpuCount++
}
}
if supportedGpuCount == 0 {
result.Reasons = append(
result.Reasons,
"OpenShift AI requires at least one supported GPU, but there is none in the "+
"discovered hosts.",
)
}
}

if len(result.Reasons) > 0 {
result.Status = api.Failure
} else {
Expand All @@ -147,40 +101,6 @@ func (o *operator) ValidateCluster(ctx context.Context, cluster *common.Cluster)
return
}

func (o *operator) gpusInCluster(cluster *common.Cluster) (result []*models.Gpu, err error) {
for _, host := range cluster.Hosts {
var gpus []*models.Gpu
gpus, err = o.gpusInHost(host)
if err != nil {
return
}
result = append(result, gpus...)
}
return
}

func (o *operator) gpusInHost(host *models.Host) (result []*models.Gpu, err error) {
if host.Inventory == "" {
return
}
inventory, err := common.UnmarshalInventory(host.Inventory)
if err != nil {
return
}
result = inventory.Gpus
return
}

func (o *operator) isSupportedGpu(gpu *models.Gpu) (result bool, err error) {
for _, supportedGpu := range o.config.SupportedGPUs {
if strings.EqualFold(gpu.VendorID, supportedGpu) {
result = true
return
}
}
return
}

// ValidateHost returns validationResult based on node type requirements such as memory and CPU.
func (o *operator) ValidateHost(ctx context.Context, cluster *common.Cluster, host *models.Host,
_ *models.ClusterHostRequirementsDetails) (result api.ValidationResult, err error) {
Expand Down
76 changes: 0 additions & 76 deletions internal/operators/openshiftai/openshift_ai_operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package openshiftai

import (
"context"
"os"

. "github.com/onsi/ginkgo"
. "github.com/onsi/ginkgo/extensions/table"
Expand Down Expand Up @@ -97,79 +96,4 @@ var _ = Describe("Operator", func() {
},
),
)

DescribeTable(
"Is supported GPU",
func(env map[string]string, gpu *models.Gpu, expected bool) {
// Set the environment variables and restore the previous values when finished:
for name, value := range env {
oldValue, present := os.LookupEnv(name)
if present {
defer func() {
err := os.Setenv(name, oldValue)
Expect(err).ToNot(HaveOccurred())
}()
} else {
defer func() {
err := os.Unsetenv(name)
Expect(err).ToNot(HaveOccurred())
}()
}
err := os.Setenv(name, value)
Expect(err).ToNot(HaveOccurred())
}

// Create the operator:
operator = NewOpenShiftAIOperator(common.GetTestLog())

// Run the check:
actual, err := operator.isSupportedGpu(gpu)
Expect(err).ToNot(HaveOccurred())
Expect(actual).To(Equal(expected))
},
Entry(
"NVIDIA is supported by default",
nil,
&models.Gpu{
VendorID: "10de",
},
true,
),
Entry(
"Virtio isn't supported by default",
nil,
&models.Gpu{
VendorID: "1af4",
},
false,
),
Entry(
"Virtio is supported if explicitly added",
map[string]string{
"OPENSHIFT_AI_SUPPORTED_GPUS": "10de,1af4",
},
&models.Gpu{
VendorID: "1af4",
},
true,
),
Entry(
"Order isn't relevant",
map[string]string{
"OPENSHIFT_AI_SUPPORTED_GPUS": "1af4,10de",
},
&models.Gpu{
VendorID: "10de",
},
true,
),
Entry(
"Case isn't relevant",
nil,
&models.Gpu{
VendorID: "10DE",
},
true,
),
)
})

0 comments on commit 03c6b94

Please sign in to comment.