From 5c6aee050c6ef4b5229b8d36c5c48bde871ab140 Mon Sep 17 00:00:00 2001 From: lucklypriyansh-2 Date: Thu, 5 Oct 2023 17:09:01 +0530 Subject: [PATCH] GPU, Memory, Restart policy, Country codes, GPU and networking --- go.mod | 8 ++-- go.sum | 3 ++ internal/provider/provider.go | 79 ++++++++++++++++------------------- internal/utils/utils.go | 4 +- sample-deployment.yaml | 7 ++++ 5 files changed, 51 insertions(+), 50 deletions(-) diff --git a/go.mod b/go.mod index 13208c9..3585c87 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/SaladTechnologies/virtual-kubelet-saladcloud go 1.20 require ( - github.com/lucklypriyansh-2/salad-client v0.0.0-20230902193233-7c5a02b4c6d7 + github.com/lucklypriyansh-2/salad-client v0.0.0-20231005112118-e2e44b2b9380 github.com/prometheus/client_model v0.4.0 github.com/sirupsen/logrus v1.9.3 github.com/virtual-kubelet/virtual-kubelet v1.10.0 @@ -37,7 +37,7 @@ require ( github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.5.9 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/uuid v1.3.0 // indirect + github.com/google/uuid v1.3.0 github.com/gorilla/mux v1.8.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect @@ -52,7 +52,7 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/pkg/errors v0.9.1 // indirect + github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.16.0 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.10.1 // indirect @@ -97,7 +97,7 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apimachinery v0.28.2 k8s.io/apiserver v0.28.1 // indirect - k8s.io/client-go v0.28.1 // indirect + k8s.io/client-go v0.28.1 k8s.io/component-base v0.28.1 // indirect k8s.io/klog/v2 v2.100.1 // indirect k8s.io/kms v0.28.1 // indirect diff --git a/go.sum b/go.sum index c6aec46..f3fc423 100644 --- a/go.sum +++ b/go.sum @@ -133,6 +133,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lucklypriyansh-2/salad-client v0.0.0-20230902193233-7c5a02b4c6d7 h1:N7D71xaSmphJuSZdtJnhq/B5sF2/qEkId00LhrqWusg= github.com/lucklypriyansh-2/salad-client v0.0.0-20230902193233-7c5a02b4c6d7/go.mod h1:VjkYRyCaHqCXvxGIDnUCRg8QEcvOuOkhNnbHsrAPETc= +github.com/lucklypriyansh-2/salad-client v0.0.0-20231005112118-e2e44b2b9380 h1:LQo11CfzUSuxI3VNrm0/ZNxBnIpZ3slvfXWWun5Buvo= +github.com/lucklypriyansh-2/salad-client v0.0.0-20231005112118-e2e44b2b9380/go.mod h1:aFjkAhEyFj9mGwJ+w4rE0tWMTjwE9OBNhl5/Q9dFNDE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= @@ -184,6 +186,7 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE= github.com/virtual-kubelet/virtual-kubelet v1.10.0 h1:eV/mFFqThOJLz7Gjn1Ev8LchanGKGA2qZlsW6wipb4g= github.com/virtual-kubelet/virtual-kubelet v1.10.0/go.mod h1:7Pvdei1p82C9uWS1VzLrnXbHTwQcGBoqShahChpacgI= diff --git a/internal/provider/provider.go b/internal/provider/provider.go index 252a097..ab158e0 100644 --- a/internal/provider/provider.go +++ b/internal/provider/provider.go @@ -338,12 +338,9 @@ func (p *SaladCloudProvider) getContainerEnvironment(podMetadata metav1.ObjectMe } func (p *SaladCloudProvider) createContainersObject(pod *corev1.Pod) []saladclient.CreateContainer { - cpu, memory := utils.GetPodResource(pod.Spec) - creteContainersArray := make([]saladclient.CreateContainer, 0) for _, container := range pod.Spec.Containers { - containerResourceRequirement := saladclient.NewContainerResourceRequirements(int32(cpu), int32(memory)) createContainer := saladclient.NewCreateContainer(container.Image, *containerResourceRequirement) @@ -351,36 +348,19 @@ func (p *SaladCloudProvider) createContainersObject(pod *corev1.Pod) []saladclie if container.Command != nil { createContainer.SetCommand(container.Command) } - gpuClass, err := p.getGPUClasses(pod) - if err == nil && gpuClass != nil { - createContainer.Resources.GpuClass = *gpuClass + gpuClasses, err := p.getGPUClasses(pod) + if err == nil && gpuClasses != nil && len(gpuClasses) > 0 { + createContainer.Resources.SetGpuClasses(gpuClasses) } creteContainersArray = append(creteContainersArray, *createContainer) - // TODO Add support for container Registry auth } return creteContainersArray - } func (p *SaladCloudProvider) createContainerGroup(createContainerList []saladclient.CreateContainer, pod *corev1.Pod) []saladclient.CreateContainerGroup { - createContainerGroups := make([]saladclient.CreateContainerGroup, 0) - - if pod.ObjectMeta.GetAnnotations()["countryCodes"] == "" { - pod.ObjectMeta.SetAnnotations(map[string]string{ - "countryCodes": "US", - }) - } - - var countryCodesEnum []saladclient.CountryCode - for _, countryCode := range strings.Split(pod.ObjectMeta.GetAnnotations()["countryCodes"], ",") { - countryCodeEnum := saladclient.CountryCode(countryCode) - countryCodesEnum = append(countryCodesEnum, countryCodeEnum) - } - for _, container := range createContainerList { createContainerGroupRequest := *saladclient.NewCreateContainerGroup(utils.GetPodName(pod.Namespace, pod.Name, pod), container, "always", 1) - createContainerGroupRequest.SetCountryCodes(countryCodesEnum) readinessProbe, err := p.getWorkloadContainerProbeFrom(pod.Spec.Containers[0].ReadinessProbe) if err == nil { createContainerGroupRequest.ReadinessProbe = *readinessProbe @@ -451,34 +431,45 @@ func (p *SaladCloudProvider) getWorkloadContainerProbeFrom(k8sProbe *corev1.Prob return saladclient.NewNullableContainerGroupProbe(probe), nil } -func (p *SaladCloudProvider) getGPUClasses(pod *corev1.Pod) (*saladclient.NullableString, error) { - gpuClasses, _, err := p.apiClient.OrganizationDataAPI.ListGpuClasses(context.Background(), p.inputVars.OrganizationName).Execute() - if err != nil { - log.G(context.Background()).Errorf("Failed to get gpuClasses ", err) - return nil, err - } - gpuRequested, ok := pod.Annotations["salad.com/gpu-classes"] +func (p *SaladCloudProvider) getGPUClasses(pod *corev1.Pod) ([]string, error) { + gpuRequestedString, ok := pod.ObjectMeta.Annotations["salad.com/gpu-classes"] if !ok { return nil, nil } - gpuRequestedIsUUID := false - _, err = uuid.Parse(gpuRequested) - if err == nil { - gpuRequestedIsUUID = true - } - for _, gpu := range gpuClasses.Items { - if !gpuRequestedIsUUID { - if gpu.Name == gpuRequested { - return saladclient.NewNullableString(&gpu.Name), nil + gpuRequested := strings.Split(gpuRequestedString, ",") + saladClientGpuIds := make([]string, 0) + var gpuClasses *saladclient.GpuClassesList = nil + + for _, gpu := range gpuRequested { + gpuCleaned := strings.TrimSpace(strings.ToLower(gpu)) + _, uuidErr := uuid.Parse(gpuCleaned) + if uuidErr == nil { + saladClientGpuIds = append(saladClientGpuIds, gpuCleaned) + } else { + if gpuClasses == nil { + classes, _, err := p.apiClient.OrganizationDataAPI.ListGpuClasses(context.Background(), p.inputVars.OrganizationName).Execute() + if err != nil { + log.G(context.Background()).Errorf("Failed to get gpuClasses ", err) + return nil, err + } else { + gpuClasses = classes + } + } + for _, gpuClass := range gpuClasses.Items { + if strings.TrimSpace(strings.ToLower(gpuClass.Name)) == gpuCleaned { + saladClientGpuIds = append(saladClientGpuIds, gpuClass.Id) + break + } } } } - return nil, nil + return saladClientGpuIds, nil } func (p *SaladCloudProvider) getCountryCodes(pod *corev1.Pod) ([]saladclient.CountryCode, error) { countryCodes := make([]saladclient.CountryCode, 0) - countryCodesFromAnnotation, ok := pod.Annotations["salad.com/country-codes"] + countryCodes = append(countryCodes, "US") + countryCodesFromAnnotation, ok := pod.ObjectMeta.Annotations["salad.com/country-codes"] if !ok { return countryCodes, nil } @@ -494,9 +485,9 @@ func (p *SaladCloudProvider) getCountryCodes(pod *corev1.Pod) ([]saladclient.Cou } func (p *SaladCloudProvider) getNetworking(pod *corev1.Pod) (*saladclient.CreateContainerGroupNetworking, error) { - protocol, hasProtocol := pod.Annotations["salad.com/networking-protocol"] - port, hasPort := pod.Annotations["salad.com/networking-port"] - auth, hasAuth := pod.Annotations["salad.com/networking-auth"] + protocol, hasProtocol := pod.ObjectMeta.Annotations["salad.com/networking-protocol"] + port, hasPort := pod.ObjectMeta.Annotations["salad.com/networking-port"] + auth, hasAuth := pod.ObjectMeta.Annotations["salad.com/networking-auth"] if !hasProtocol || !hasPort || !hasAuth { return nil, nil } diff --git a/internal/utils/utils.go b/internal/utils/utils.go index f3c7d08..a477d42 100644 --- a/internal/utils/utils.go +++ b/internal/utils/utils.go @@ -19,7 +19,7 @@ func roundUpToNearest(value int64, list []int64) int64 { func GetPodResource(podSpec corev1.PodSpec) (cpu int64, memory int64) { allowedCPUValues := []int64{1, 2, 3, 4, 6, 8, 12, 16} - allowedMemoryValues := []int64{1024, 2048, 3, 4, 5, 6, 12} // in GB + allowedMemoryValues := []int64{1024, 2048, 3, 4, 5, 6, 12, 16, 24, 30} // in GB for _, container := range podSpec.Containers { // Convert milliCPU to cores and round to nearest value in the list @@ -50,7 +50,7 @@ func GetPodName(nameSpace, containerGroup string, pod *corev1.Pod) string { func GetPodPhaseFromContainerGroupState(containerGroupState saladclient.ContainerGroupState) corev1.PodPhase { switch containerGroupState.Status { - case saladclient.CONTAINERGROUPSTATUS_PENDING: + case saladclient.CONTAINER_GROUP_STATUS_PENDING: return corev1.PodPending case saladclient.CONTAINERGROUPSTATUS_RUNNING: { diff --git a/sample-deployment.yaml b/sample-deployment.yaml index 56fbbaf..b0d042e 100644 --- a/sample-deployment.yaml +++ b/sample-deployment.yaml @@ -2,6 +2,12 @@ apiVersion: apps/v1 kind: Deployment metadata: name: demo-deployment + annotations: + salad.com/country-codes: "ca,us,mx" + salad.com/gpu-classes: "GTX 1070 (8 GB), ffc51032-64d2-4df3-855a-f3a649895c0f" + salad.com/networking-protocol: "http" + salad.com/networking-port: "80" + salad.com/networking-auth: "true" spec: replicas: 1 selector: @@ -29,6 +35,7 @@ spec: cpu: "1" nodeSelector: kubernetes.io/role: agent + restartPolicy: Always tolerations: - key: "virtual-kubelet.io/provider" operator: "Equal"