Skip to content

Commit

Permalink
Added target and metric to components (kubeflow#2082)
Browse files Browse the repository at this point in the history
* Added target and metric to components
Added scaleTarget and scaleMetric to ComponentSpec
Made changes to ksvc_reconciler to add annotations to component as required

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Added validation

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Fixed validation bugs

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Updated comments

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Updated python sdk and openapi

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Added test for autoscaling

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Added tests for autoscaling

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Added e2e for autoscaling changes
Removed validation for metric concurrency

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Updated CRD

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Fixed liniting issues

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Made ScaleMetric as enum
Added a test for raw deployment

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Fixed issues with type after changes in ScaleMetric type

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Updated crd

Updated crd

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Updated test case.
Modified to check deployment mode before checking hpa or kpa.

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Added debug logs to debug e2e failure in workflow alone

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Disabling all tests temporarily to debug failing e2e

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Reverting debugging changes

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Temporary changes to verify if e2e is passing after Dan's port forwarding

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Removed e2e changes.
Removed comments.
Updated python sdk version to debug test failure.

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Updated test variable

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Fixed linting error
Added test logs

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Adding logs to print spec

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Added more debugging logs

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Reverting python definition log in run e2e script

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Updated python sdk and docs
Fixed liniting error

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Fix for failing protobuf issue

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Added debug logs for kserve controller

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Added logs to check controller changes show up

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Added knative hpa

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Updated resource requirements

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Moved knative hpa installation to setup-deps

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>

Removed skip for tests

Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>
Signed-off-by: Dan Sun <dsun20@bloomberg.net>

* Mutate and add deployment mode annotation if RawDeployment or ModelMesh

Signed-off-by: Dan Sun <dsun20@bloomberg.net>

Co-authored-by: Dan Sun <dsun20@bloomberg.net>
  • Loading branch information
andyi2it and yuzisun authored Jun 11, 2022
1 parent 7bcd11e commit aa68b4c
Show file tree
Hide file tree
Showing 29 changed files with 852 additions and 35 deletions.
27 changes: 27 additions & 0 deletions config/crd/serving.kserve.io_inferenceservices.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2782,6 +2782,15 @@ spec:
type: string
runtimeClassName:
type: string
scaleMetric:
enum:
- cpu
- memory
- concurrency
- rps
type: string
scaleTarget:
type: integer
schedulerName:
type: string
securityContext:
Expand Down Expand Up @@ -8027,6 +8036,15 @@ spec:
type: string
runtimeClassName:
type: string
scaleMetric:
enum:
- cpu
- memory
- concurrency
- rps
type: string
scaleTarget:
type: integer
schedulerName:
type: string
securityContext:
Expand Down Expand Up @@ -12133,6 +12151,15 @@ spec:
type: string
runtimeClassName:
type: string
scaleMetric:
enum:
- cpu
- memory
- concurrency
- rps
type: string
scaleTarget:
type: integer
schedulerName:
type: string
securityContext:
Expand Down
21 changes: 21 additions & 0 deletions pkg/apis/serving/v1beta1/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ type ComponentExtensionSpec struct {
// Maximum number of replicas for autoscaling.
// +optional
MaxReplicas int `json:"maxReplicas,omitempty"`
// ScaleTarget specifies the integer target value of the metric type the Autoscaler watches for.
// concurrency and rps targets are supported by Knative Pod Autoscaler
//(https://knative.dev/docs/serving/autoscaling/autoscaling-targets/).
// +optional
ScaleTarget *int `json:"scaleTarget,omitempty"`
// ScaleMetric defines the scaling metric type watched by autoscaler
// possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via
// Knative Pod Autoscaler(https://knative.dev/docs/serving/autoscaling/autoscaling-metrics).
// +optional
ScaleMetric *ScaleMetric `json:"scaleMetric,omitempty"`
// ContainerConcurrency specifies how many requests can be processed concurrently, this sets the hard limit of the container
// concurrency(https://knative.dev/docs/serving/autoscaling/concurrency).
// +optional
Expand All @@ -98,6 +108,17 @@ type ComponentExtensionSpec struct {
Batcher *Batcher `json:"batcher,omitempty"`
}

// ScaleMetric enum
// +kubebuilder:validation:Enum=cpu;memory;concurrency;rps
type ScaleMetric string

const (
MetricCPU ScaleMetric = "cpu"
MetricMemory ScaleMetric = "memory"
MetricConcurrency ScaleMetric = "concurrency"
MetricRPS ScaleMetric = "rps"
)

// Default the ComponentExtensionSpec
func (s *ComponentExtensionSpec) Default(config *InferenceServicesConfig) {}

Expand Down
12 changes: 10 additions & 2 deletions pkg/apis/serving/v1beta1/inference_service_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,18 @@ func (isvc *InferenceService) Default() {
if err != nil {
panic(err)
}
isvc.DefaultInferenceService(configMap)
deployConfig, err := NewDeployConfig(cli)
if err != nil {
panic(err)
}
isvc.DefaultInferenceService(configMap, deployConfig)
}

func (isvc *InferenceService) DefaultInferenceService(config *InferenceServicesConfig) {
func (isvc *InferenceService) DefaultInferenceService(config *InferenceServicesConfig, deployConfig *DeployConfig) {
if deployConfig.DefaultDeploymentMode == string(constants.ModelMeshDeployment) ||
deployConfig.DefaultDeploymentMode == string(constants.RawDeployment) {
isvc.ObjectMeta.Annotations[constants.DeploymentMode] = deployConfig.DefaultDeploymentMode
}
deploymentMode, ok := isvc.ObjectMeta.Annotations[constants.DeploymentMode]
if !ok || deploymentMode != string(constants.ModelMeshDeployment) {
// Only attempt to assign runtimes for non-modelmesh predictors
Expand Down
15 changes: 12 additions & 3 deletions pkg/apis/serving/v1beta1/inference_service_defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ func TestInferenceServiceDefaults(t *testing.T) {
},
},
}
deployConfig := &DeployConfig{
DefaultDeploymentMode: "Serverless",
}
isvc := InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Expand Down Expand Up @@ -83,7 +86,7 @@ func TestInferenceServiceDefaults(t *testing.T) {
}
resources := v1.ResourceRequirements{Requests: defaultResource, Limits: defaultResource}
isvc.Spec.DeepCopy()
isvc.DefaultInferenceService(config)
isvc.DefaultInferenceService(config, deployConfig)
g.Expect(*&isvc.Spec.Predictor.Tensorflow).To(gomega.BeNil())
g.Expect(*&isvc.Spec.Predictor.Model).NotTo(gomega.BeNil())

Expand Down Expand Up @@ -111,6 +114,9 @@ func TestCustomPredictorDefaults(t *testing.T) {
},
},
}
deployConfig := &DeployConfig{
DefaultDeploymentMode: "Serverless",
}
isvc := InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Expand All @@ -135,13 +141,16 @@ func TestCustomPredictorDefaults(t *testing.T) {
}
resources := v1.ResourceRequirements{Requests: defaultResource, Limits: defaultResource}
isvc.Spec.DeepCopy()
isvc.DefaultInferenceService(config)
isvc.DefaultInferenceService(config, deployConfig)
g.Expect(isvc.Spec.Predictor.PodSpec.Containers[0].Resources).To(gomega.Equal(resources))
}

func TestInferenceServiceDefaultsModelMeshAnnotation(t *testing.T) {
g := gomega.NewGomegaWithT(t)
config := &InferenceServicesConfig{}
deployConfig := &DeployConfig{
DefaultDeploymentMode: "Serverless",
}
isvc := InferenceService{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Expand All @@ -161,7 +170,7 @@ func TestInferenceServiceDefaultsModelMeshAnnotation(t *testing.T) {
},
}
isvc.Spec.DeepCopy()
isvc.DefaultInferenceService(config)
isvc.DefaultInferenceService(config, deployConfig)
g.Expect(isvc.Spec.Predictor.Model).To(gomega.BeNil())
g.Expect(isvc.Spec.Predictor.Tensorflow).ToNot(gomega.BeNil())
}
83 changes: 81 additions & 2 deletions pkg/apis/serving/v1beta1/inference_service_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/kserve/kserve/pkg/constants"
"github.com/kserve/kserve/pkg/utils"
"k8s.io/apimachinery/pkg/runtime"
"knative.dev/serving/pkg/apis/autoscaling"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/webhook"
)
Expand All @@ -49,6 +50,8 @@ var _ webhook.Validator = &InferenceService{}
func (isvc *InferenceService) ValidateCreate() error {
validatorLogger.Info("validate create", "name", isvc.Name)

annotations := isvc.Annotations

if err := validateInferenceServiceName(isvc); err != nil {
return err
}
Expand All @@ -60,6 +63,7 @@ func (isvc *InferenceService) ValidateCreate() error {
if err := validateAutoscalerTargetUtilizationPercentage(isvc); err != nil {
return err
}

for _, component := range []Component{
&isvc.Spec.Predictor,
isvc.Spec.Transformer,
Expand All @@ -72,6 +76,7 @@ func (isvc *InferenceService) ValidateCreate() error {
if err := utils.FirstNonNilError([]error{
component.GetImplementation().Validate(),
component.GetExtensions().Validate(),
validateAutoScalingCompExtension(annotations, component.GetExtensions()),
}); err != nil {
return err
}
Expand All @@ -80,6 +85,18 @@ func (isvc *InferenceService) ValidateCreate() error {
return nil
}

// Validate scaling options component extensions
func validateAutoScalingCompExtension(annotations map[string]string, compExtSpec *ComponentExtensionSpec) error {
deploymentMode := annotations["serving.kserve.io/deploymentMode"]
annotationClass := annotations[autoscaling.ClassAnnotationKey]
if deploymentMode == string(constants.RawDeployment) || annotationClass == string(autoscaling.HPA) {
return validateScalingHPACompExtension(compExtSpec)
}

return validateScalingKPACompExtension(compExtSpec)

}

// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
func (isvc *InferenceService) ValidateUpdate(old runtime.Object) error {
validatorLogger.Info("validate update", "name", isvc.Name)
Expand Down Expand Up @@ -118,7 +135,7 @@ func validateInferenceServiceAutoscaler(isvc *InferenceService) error {
switch class {
case constants.AutoscalerClassHPA:
if metric, ok := annotations[constants.AutoscalerMetrics]; ok {
return validateHPAMetrics(metric)
return validateHPAMetrics(ScaleMetric(metric))
} else {
return nil
}
Expand All @@ -134,7 +151,7 @@ func validateInferenceServiceAutoscaler(isvc *InferenceService) error {
}

//Validate of autoscaler HPA metrics
func validateHPAMetrics(metric string) error {
func validateHPAMetrics(metric ScaleMetric) error {
for _, item := range constants.AutoscalerAllowedMetricsList {
if item == constants.AutoscalerMetricsType(metric) {
return nil
Expand All @@ -157,5 +174,67 @@ func validateAutoscalerTargetUtilizationPercentage(isvc *InferenceService) error
}
}
}

return nil
}

func validateScalingHPACompExtension(compExtSpec *ComponentExtensionSpec) error {
metric := MetricCPU
if compExtSpec.ScaleMetric != nil {
metric = *compExtSpec.ScaleMetric
}

err := validateHPAMetrics(metric)

if err != nil {
return err
}

if compExtSpec.ScaleTarget != nil {
target := *compExtSpec.ScaleTarget
if metric == MetricCPU && target < 1 || target > 100 {
return fmt.Errorf("The target utilization percentage should be a [1-100] integer.")
}

if metric == MetricMemory && target < 1 {
return fmt.Errorf("The target memory should be greater than 1 MiB")
}

}

return nil
}

func validateKPAMetrics(metric ScaleMetric) error {
for _, item := range constants.AutoScalerKPAMetricsAllowedList {
if item == constants.AutoScalerKPAMetricsType(metric) {
return nil
}
}
return fmt.Errorf("[%s] is not a supported metric.\n", metric)

}

func validateScalingKPACompExtension(compExtSpec *ComponentExtensionSpec) error {
metric := MetricConcurrency
if compExtSpec.ScaleMetric != nil {
metric = *compExtSpec.ScaleMetric
}

err := validateKPAMetrics(metric)

if err != nil {
return err
}

if compExtSpec.ScaleTarget != nil {
target := *compExtSpec.ScaleTarget

if metric == MetricRPS && target < 1 {
return fmt.Errorf("The target for rps should be greater than 1")
}

}

return nil
}
62 changes: 62 additions & 0 deletions pkg/apis/serving/v1beta1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit aa68b4c

Please sign in to comment.