Skip to content

Commit

Permalink
Remove default queue proxy resource limit and Add KFServing benchmark…
Browse files Browse the repository at this point in the history
…ing (kubeflow#894)

* Add benchmark script

* Add sklearn bechmarking

* Add tf benchmark

* Add perf result for autoscaler and activator

* Add HPA experiments

* Address comments

* Bump up tf flower example cpu limit

* Bump cpu limit for tf benchmark

* Update hpa result for tf

* Fix hpa benchmark
  • Loading branch information
yuzisun committed Jul 6, 2020
1 parent 97d1c74 commit 941a38e
Show file tree
Hide file tree
Showing 14 changed files with 482 additions and 58 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ Error Set:
[Developer Guide](/docs/DEVELOPER_GUIDE.md).

### Performance Tests
[KFServing benchmark test comparing Knative and Kubernetes Deployment with HPA](test/benchmark/README.md)
[Performance Tests](https://docs.google.com/document/d/1ss7M3cx1qD1PVpTaKTu_Y3C80JJz4nvMZlIyuZutZoE/edit#)

### Contributor Guide
Expand Down
22 changes: 8 additions & 14 deletions pkg/controller/inferenceservice/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ import (

"knative.dev/pkg/network"

"github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/knative"

"k8s.io/apimachinery/pkg/api/errors"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -174,7 +172,6 @@ func TestInferenceServiceWithOnlyPredictor(t *testing.T) {
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
"queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage,
constants.StorageInitializerSourceUriInternalAnnotationKey: defaultInstance.Spec.Default.Predictor.Tensorflow.StorageURI,
},
},
Expand Down Expand Up @@ -495,7 +492,6 @@ func TestInferenceServiceWithDefaultAndCanaryPredictor(t *testing.T) {
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
"queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage,
constants.StorageInitializerSourceUriInternalAnnotationKey: canary.Spec.Canary.Predictor.Tensorflow.StorageURI,
},
},
Expand Down Expand Up @@ -1097,11 +1093,10 @@ func TestInferenceServiceWithTransformer(t *testing.T) {
constants.KServiceComponentLabel: constants.Transformer.String(),
},
Annotations: map[string]string{
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
"queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage,
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -1661,11 +1656,10 @@ func TestInferenceServiceWithExplainer(t *testing.T) {
constants.KServiceComponentLabel: constants.Explainer.String(),
},
Annotations: map[string]string{
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
"queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage,
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
},
},
Spec: knservingv1.RevisionSpec{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ import (
"testing"
"time"

"github.com/kubeflow/kfserving/pkg/controller/inferenceservice/resources/knative"

"github.com/google/go-cmp/cmp"
"github.com/kubeflow/kfserving/pkg/apis/serving/v1alpha2"
"github.com/kubeflow/kfserving/pkg/constants"
Expand Down Expand Up @@ -130,7 +128,6 @@ func TestKnativeServiceReconcile(t *testing.T) {
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"internal.serving.kubeflow.org/storage-initializer-sourceuri": "gs://testuri",
"queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage,
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -175,7 +172,6 @@ func TestKnativeServiceReconcile(t *testing.T) {
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"internal.serving.kubeflow.org/storage-initializer-sourceuri": "gs://testuri2",
"queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage,
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -238,7 +234,6 @@ func TestKnativeServiceReconcile(t *testing.T) {
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"internal.serving.kubeflow.org/storage-initializer-sourceuri": "gs://testuri",
"queue.sidecar.serving.knative.dev/resourcePercentage": knative.DefaultQueueSideCarResourcePercentage,
},
},
Spec: knservingv1.RevisionSpec{
Expand Down
10 changes: 0 additions & 10 deletions pkg/controller/inferenceservice/resources/knative/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"knative.dev/serving/pkg/apis/autoscaling"
"knative.dev/serving/pkg/apis/serving"
knservingv1 "knative.dev/serving/pkg/apis/serving/v1"
)

Expand All @@ -40,12 +39,6 @@ var serviceAnnotationDisallowedList = []string{
"kubectl.kubernetes.io/last-applied-configuration",
}

const (
// Set to 20% of the resource for main container, InferenceService defaults to 1CPU which is 200m for queue-proxy
// https://github.com/knative/serving/blob/1d263950f9f2fea85a4dd394948a029c328af9d9/pkg/reconciler/revision/resources/resourceboundary.go#L30
DefaultQueueSideCarResourcePercentage = "20"
)

type ServiceBuilder struct {
inferenceServiceConfig *v1alpha2.InferenceServicesConfig
credentialBuilder *credentials.CredentialBuilder
Expand Down Expand Up @@ -393,9 +386,6 @@ func (c *ServiceBuilder) buildAnnotations(metadata metav1.ObjectMeta, minReplica
annotations[autoscaling.MaxScaleAnnotationKey] = fmt.Sprint(maxReplicas)
}

if _, ok := annotations[serving.QueueSideCarResourcePercentageAnnotation]; !ok {
annotations[serving.QueueSideCarResourcePercentageAnnotation] = DefaultQueueSideCarResourcePercentage
}
// User can pass down scaling target annotation to overwrite the target default 1
if _, ok := annotations[autoscaling.TargetAnnotationKey]; !ok {
if parallelism == 0 {
Expand Down
45 changes: 16 additions & 29 deletions pkg/controller/inferenceservice/resources/knative/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ var isvc = v1alpha2.InferenceService{
Name: "mnist",
Namespace: "default",
Annotations: map[string]string{
constants.InferenceServiceGKEAcceleratorAnnotationKey: "nvidia-tesla-t4",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
constants.InferenceServiceGKEAcceleratorAnnotationKey: "nvidia-tesla-t4",
},
},
Spec: v1alpha2.InferenceServiceSpec{
Expand Down Expand Up @@ -100,7 +99,6 @@ var defaultService = &knservingv1.Service{
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/maxScale": "3",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
constants.InferenceServiceGKEAcceleratorAnnotationKey: "nvidia-tesla-t4",
constants.StorageInitializerSourceUriInternalAnnotationKey: isvc.Spec.Default.Predictor.Tensorflow.StorageURI,
},
Expand Down Expand Up @@ -149,7 +147,6 @@ var canaryService = &knservingv1.Service{
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/maxScale": "3",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
constants.InferenceServiceGKEAcceleratorAnnotationKey: "nvidia-tesla-t4",
constants.StorageInitializerSourceUriInternalAnnotationKey: "s3://test/mnist-2/export",
},
Expand Down Expand Up @@ -195,8 +192,7 @@ func TestInferenceServiceToKnativeService(t *testing.T) {
Name: "mnist",
Namespace: "default",
Annotations: map[string]string{
constants.InferenceServiceGKEAcceleratorAnnotationKey: "nvidia-tesla-t4",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
constants.InferenceServiceGKEAcceleratorAnnotationKey: "nvidia-tesla-t4",
},
},
Spec: v1alpha2.InferenceServiceSpec{
Expand Down Expand Up @@ -273,7 +269,6 @@ func TestInferenceServiceToKnativeService(t *testing.T) {
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -334,7 +329,6 @@ func TestInferenceServiceToKnativeService(t *testing.T) {
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -396,7 +390,6 @@ func TestInferenceServiceToKnativeService(t *testing.T) {
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -434,7 +427,6 @@ func TestInferenceServiceToKnativeService(t *testing.T) {
"autoscaling.knative.dev/target": "2",
constants.StorageInitializerSourceUriInternalAnnotationKey: "test",
"kubectl.kubernetes.io/last-applied-configuration": "test2",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
},
},
Spec: v1alpha2.InferenceServiceSpec{
Expand Down Expand Up @@ -472,7 +464,6 @@ func TestInferenceServiceToKnativeService(t *testing.T) {
"autoscaling.knative.dev/minScale": "1",
"sourceName": "srcName",
"prop1": "val1",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -613,11 +604,10 @@ func TestTransformerToKnativeService(t *testing.T) {
constants.KServiceComponentLabel: constants.Transformer.String(),
},
Annotations: map[string]string{
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/maxScale": "3",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/maxScale": "3",
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -660,11 +650,10 @@ func TestTransformerToKnativeService(t *testing.T) {
constants.KServiceComponentLabel: constants.Transformer.String(),
},
Annotations: map[string]string{
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/minScale": "2",
"autoscaling.knative.dev/maxScale": "4",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/minScale": "2",
"autoscaling.knative.dev/maxScale": "4",
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -807,10 +796,9 @@ func TestExplainerToKnativeService(t *testing.T) {
constants.KServiceComponentLabel: constants.Explainer.String(),
},
Annotations: map[string]string{
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
},
},
Spec: knservingv1.RevisionSpec{
Expand Down Expand Up @@ -853,10 +841,9 @@ func TestExplainerToKnativeService(t *testing.T) {
constants.KServiceComponentLabel: constants.Explainer.String(),
},
Annotations: map[string]string{
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"queue.sidecar.serving.knative.dev/resourcePercentage": DefaultQueueSideCarResourcePercentage,
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
},
},
Spec: knservingv1.RevisionSpec{
Expand Down
Loading

0 comments on commit 941a38e

Please sign in to comment.