Added target and metric to components (kubeflow#2082)

* Added target and metric to components Added scaleTarget and scaleMetric to ComponentSpec Made changes to ksvc_reconciler to add annotations to component as required Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Added validation Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Fixed validation bugs Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Updated comments Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Updated python sdk and openapi Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Added test for autoscaling Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Added tests for autoscaling Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Added e2e for autoscaling changes Removed validation for metric concurrency Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Updated CRD Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Fixed liniting issues Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Made ScaleMetric as enum Added a test for raw deployment Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Fixed issues with type after changes in ScaleMetric type Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Updated crd Updated crd Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Updated test case. Modified to check deployment mode before checking hpa or kpa. Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Added debug logs to debug e2e failure in workflow alone Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Disabling all tests temporarily to debug failing e2e Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Reverting debugging changes Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Temporary changes to verify if e2e is passing after Dan's port forwarding Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Removed e2e changes. Removed comments. Updated python sdk version to debug test failure. Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Updated test variable Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Fixed linting error Added test logs Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Adding logs to print spec Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Added more debugging logs Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Reverting python definition log in run e2e script Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Updated python sdk and docs Fixed liniting error Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Fix for failing protobuf issue Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Added debug logs for kserve controller Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Added logs to check controller changes show up Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Added knative hpa Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Updated resource requirements Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Moved knative hpa installation to setup-deps Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Removed skip for tests Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com> Signed-off-by: Dan Sun <dsun20@bloomberg.net> * Mutate and add deployment mode annotation if RawDeployment or ModelMesh Signed-off-by: Dan Sun <dsun20@bloomberg.net> Co-authored-by: Dan Sun <dsun20@bloomberg.net>
magdalenakuhn17 · Jun 11, 2022 · aa68b4c · aa68b4c
1 parent 7bcd11e
commit aa68b4c
Show file tree

Hide file tree

Showing 29 changed files with 852 additions and 35 deletions.
diff --git a/config/crd/serving.kserve.io_inferenceservices.yaml b/config/crd/serving.kserve.io_inferenceservices.yaml
@@ -2782,6 +2782,15 @@ spec:
                       type: string
                     runtimeClassName:
                       type: string
+                    scaleMetric:
+                      enum:
+                        - cpu
+                        - memory
+                        - concurrency
+                        - rps
+                      type: string
+                    scaleTarget:
+                      type: integer
                     schedulerName:
                       type: string
                     securityContext:
@@ -8027,6 +8036,15 @@ spec:
                       type: string
                     runtimeClassName:
                       type: string
+                    scaleMetric:
+                      enum:
+                        - cpu
+                        - memory
+                        - concurrency
+                        - rps
+                      type: string
+                    scaleTarget:
+                      type: integer
                     schedulerName:
                       type: string
                     securityContext:
@@ -12133,6 +12151,15 @@ spec:
                       type: string
                     runtimeClassName:
                       type: string
+                    scaleMetric:
+                      enum:
+                        - cpu
+                        - memory
+                        - concurrency
+                        - rps
+                      type: string
+                    scaleTarget:
+                      type: integer
                     schedulerName:
                       type: string
                     securityContext:

diff --git a/pkg/apis/serving/v1beta1/component.go b/pkg/apis/serving/v1beta1/component.go
@@ -80,6 +80,16 @@ type ComponentExtensionSpec struct {
 	// Maximum number of replicas for autoscaling.
 	// +optional
 	MaxReplicas int `json:"maxReplicas,omitempty"`
+	// ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for.
+	// concurrency and rps targets are supported by Knative Pod Autoscaler
+	//(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).
+	// +optional
+	ScaleTarget *int `json:"scaleTarget,omitempty"`
+	// ScaleMetric defines the scaling metric type watched by autoscaler
+	// possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via
+	// Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).
+	// +optional
+	ScaleMetric *ScaleMetric `json:"scaleMetric,omitempty"`
 	// ContainerConcurrency specifies how many requests can be processed concurrently, this sets the hard limit of the container
 	// concurrency(https://knative.dev/docs/serving/autoscaling/concurrency).
 	// +optional
@@ -98,6 +108,17 @@ type ComponentExtensionSpec struct {
 	Batcher *Batcher `json:"batcher,omitempty"`
 }
 
+// ScaleMetric enum
+// +kubebuilder:validation:Enum=cpu;memory;concurrency;rps
+type ScaleMetric string
+
+const (
+	MetricCPU         ScaleMetric = "cpu"
+	MetricMemory      ScaleMetric = "memory"
+	MetricConcurrency ScaleMetric = "concurrency"
+	MetricRPS         ScaleMetric = "rps"
+)
+
 // Default the ComponentExtensionSpec
 func (s *ComponentExtensionSpec) Default(config *InferenceServicesConfig) {}
 

diff --git a/pkg/apis/serving/v1beta1/inference_service_defaults.go b/pkg/apis/serving/v1beta1/inference_service_defaults.go
@@ -72,10 +72,18 @@ func (isvc *InferenceService) Default() {
 	if err != nil {
 		panic(err)
 	}
-	isvc.DefaultInferenceService(configMap)
+	deployConfig, err := NewDeployConfig(cli)
+	if err != nil {
+		panic(err)
+	}
+	isvc.DefaultInferenceService(configMap, deployConfig)
 }
 
-func (isvc *InferenceService) DefaultInferenceService(config *InferenceServicesConfig) {
+func (isvc *InferenceService) DefaultInferenceService(config *InferenceServicesConfig, deployConfig *DeployConfig) {
+	if deployConfig.DefaultDeploymentMode == string(constants.ModelMeshDeployment) ||
+		deployConfig.DefaultDeploymentMode == string(constants.RawDeployment) {
+		isvc.ObjectMeta.Annotations[constants.DeploymentMode] = deployConfig.DefaultDeploymentMode
+	}
 	deploymentMode, ok := isvc.ObjectMeta.Annotations[constants.DeploymentMode]
 	if !ok || deploymentMode != string(constants.ModelMeshDeployment) {
 		// Only attempt to assign runtimes for non-modelmesh predictors

diff --git a/pkg/apis/serving/v1beta1/inference_service_defaults_test.go b/pkg/apis/serving/v1beta1/inference_service_defaults_test.go
@@ -45,6 +45,9 @@ func TestInferenceServiceDefaults(t *testing.T) {
 			},
 		},
 	}
+	deployConfig := &DeployConfig{
+		DefaultDeploymentMode: "Serverless",
+	}
 	isvc := InferenceService{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      "foo",
@@ -83,7 +86,7 @@ func TestInferenceServiceDefaults(t *testing.T) {
 	}
 	resources := v1.ResourceRequirements{Requests: defaultResource, Limits: defaultResource}
 	isvc.Spec.DeepCopy()
-	isvc.DefaultInferenceService(config)
+	isvc.DefaultInferenceService(config, deployConfig)
 	g.Expect(*&isvc.Spec.Predictor.Tensorflow).To(gomega.BeNil())
 	g.Expect(*&isvc.Spec.Predictor.Model).NotTo(gomega.BeNil())
 
@@ -111,6 +114,9 @@ func TestCustomPredictorDefaults(t *testing.T) {
 			},
 		},
 	}
+	deployConfig := &DeployConfig{
+		DefaultDeploymentMode: "Serverless",
+	}
 	isvc := InferenceService{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      "foo",
@@ -135,13 +141,16 @@ func TestCustomPredictorDefaults(t *testing.T) {
 	}
 	resources := v1.ResourceRequirements{Requests: defaultResource, Limits: defaultResource}
 	isvc.Spec.DeepCopy()
-	isvc.DefaultInferenceService(config)
+	isvc.DefaultInferenceService(config, deployConfig)
 	g.Expect(isvc.Spec.Predictor.PodSpec.Containers[0].Resources).To(gomega.Equal(resources))
 }
 
 func TestInferenceServiceDefaultsModelMeshAnnotation(t *testing.T) {
 	g := gomega.NewGomegaWithT(t)
 	config := &InferenceServicesConfig{}
+	deployConfig := &DeployConfig{
+		DefaultDeploymentMode: "Serverless",
+	}
 	isvc := InferenceService{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      "foo",
@@ -161,7 +170,7 @@ func TestInferenceServiceDefaultsModelMeshAnnotation(t *testing.T) {
 		},
 	}
 	isvc.Spec.DeepCopy()
-	isvc.DefaultInferenceService(config)
+	isvc.DefaultInferenceService(config, deployConfig)
 	g.Expect(isvc.Spec.Predictor.Model).To(gomega.BeNil())
 	g.Expect(isvc.Spec.Predictor.Tensorflow).ToNot(gomega.BeNil())
 }
diff --git a/pkg/apis/serving/v1beta1/inference_service_validation.go b/pkg/apis/serving/v1beta1/inference_service_validation.go
@@ -26,6 +26,7 @@ import (
 	"github.com/kserve/kserve/pkg/constants"
 	"github.com/kserve/kserve/pkg/utils"
 	"k8s.io/apimachinery/pkg/runtime"
+	"knative.dev/serving/pkg/apis/autoscaling"
 	logf "sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/webhook"
 )
@@ -49,6 +50,8 @@ var _ webhook.Validator = &InferenceService{}
 func (isvc *InferenceService) ValidateCreate() error {
 	validatorLogger.Info("validate create", "name", isvc.Name)
 
+	annotations := isvc.Annotations
+
 	if err := validateInferenceServiceName(isvc); err != nil {
 		return err
 	}
@@ -60,6 +63,7 @@ func (isvc *InferenceService) ValidateCreate() error {
 	if err := validateAutoscalerTargetUtilizationPercentage(isvc); err != nil {
 		return err
 	}
+
 	for _, component := range []Component{
 		&isvc.Spec.Predictor,
 		isvc.Spec.Transformer,
@@ -72,6 +76,7 @@ func (isvc *InferenceService) ValidateCreate() error {
 			if err := utils.FirstNonNilError([]error{
 				component.GetImplementation().Validate(),
 				component.GetExtensions().Validate(),
+				validateAutoScalingCompExtension(annotations, component.GetExtensions()),
 			}); err != nil {
 				return err
 			}
@@ -80,6 +85,18 @@ func (isvc *InferenceService) ValidateCreate() error {
 	return nil
 }
 
+// Validate scaling options component extensions
+func validateAutoScalingCompExtension(annotations map[string]string, compExtSpec *ComponentExtensionSpec) error {
+	deploymentMode := annotations["serving.kserve.io/deploymentMode"]
+	annotationClass := annotations[autoscaling.ClassAnnotationKey]
+	if deploymentMode == string(constants.RawDeployment) || annotationClass == string(autoscaling.HPA) {
+		return validateScalingHPACompExtension(compExtSpec)
+	}
+
+	return validateScalingKPACompExtension(compExtSpec)
+
+}
+
 // ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
 func (isvc *InferenceService) ValidateUpdate(old runtime.Object) error {
 	validatorLogger.Info("validate update", "name", isvc.Name)
@@ -118,7 +135,7 @@ func validateInferenceServiceAutoscaler(isvc *InferenceService) error {
 				switch class {
 				case constants.AutoscalerClassHPA:
 					if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
-						return validateHPAMetrics(metric)
+						return validateHPAMetrics(ScaleMetric(metric))
 					} else {
 						return nil
 					}
@@ -134,7 +151,7 @@ func validateInferenceServiceAutoscaler(isvc *InferenceService) error {
 }
 
 //Validate of autoscaler HPA metrics
-func validateHPAMetrics(metric string) error {
+func validateHPAMetrics(metric ScaleMetric) error {
 	for _, item := range constants.AutoscalerAllowedMetricsList {
 		if item == constants.AutoscalerMetricsType(metric) {
 			return nil
@@ -157,5 +174,67 @@ func validateAutoscalerTargetUtilizationPercentage(isvc *InferenceService) error
 			}
 		}
 	}
+
+	return nil
+}
+
+func validateScalingHPACompExtension(compExtSpec *ComponentExtensionSpec) error {
+	metric := MetricCPU
+	if compExtSpec.ScaleMetric != nil {
+		metric = *compExtSpec.ScaleMetric
+	}
+
+	err := validateHPAMetrics(metric)
+
+	if err != nil {
+		return err
+	}
+
+	if compExtSpec.ScaleTarget != nil {
+		target := *compExtSpec.ScaleTarget
+		if metric == MetricCPU && target < 1 || target > 100 {
+			return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
+		}
+
+		if metric == MetricMemory && target < 1 {
+			return fmt.Errorf("The target memory should be greater than 1 MiB")
+		}
+
+	}
+
+	return nil
+}
+
+func validateKPAMetrics(metric ScaleMetric) error {
+	for _, item := range constants.AutoScalerKPAMetricsAllowedList {
+		if item == constants.AutoScalerKPAMetricsType(metric) {
+			return nil
+		}
+	}
+	return fmt.Errorf("[%s] is not a supported metric.\n", metric)
+
+}
+
+func validateScalingKPACompExtension(compExtSpec *ComponentExtensionSpec) error {
+	metric := MetricConcurrency
+	if compExtSpec.ScaleMetric != nil {
+		metric = *compExtSpec.ScaleMetric
+	}
+
+	err := validateKPAMetrics(metric)
+
+	if err != nil {
+		return err
+	}
+
+	if compExtSpec.ScaleTarget != nil {
+		target := *compExtSpec.ScaleTarget
+
+		if metric == MetricRPS && target < 1 {
+			return fmt.Errorf("The target for rps should be greater than 1")
+		}
+
+	}
+
 	return nil
 }
diff --git a/pkg/apis/serving/v1beta1/openapi_generated.go b/pkg/apis/serving/v1beta1/openapi_generated.go