Skip to content

Commit

Permalink
add model initializer (kubeflow#156)
Browse files Browse the repository at this point in the history
* add download injector

* add download injector

* merge deployment mutators

* add framework method to control mapping from source to local

* move annotations to constants

* move default mount path to constants

* adding missing import for constants in frameworks

* fix tests to account for mapping

* mess around with paths

* actually make the downloader an init contianer

* add pvc:// support

* make parts of pvc path explicit in test

* rename dockerfile

* provisioner makefile

* use logging

* refactor as predictor-initializer

* rename to model initailizer

* udpate message when user container not found

* use only one annotation

* linting

* rename injector to model initializer

* actually call renamed function

* more renames

* move uri validation to validating webhook

* rename to mutator

* make slice of mutators

* use container from gcr.io

* add get model source uri to tensorrt spec

* fix merge error

* fix tests to account for annotation

* use constant in tests

* rename mount location

* fix typo

* fix linter errors

* fix annotation name.. the old one failed with this error:
    message: 'Revision creation failed with message: Revision.serving.knative.dev
      "tensorrt-simple-string-default-qp5q2" is invalid: metadata.annotations: Invalid
      value: "serving.kubeflow.org/internal/model-initializer/sourceURI": a qualified
      name must consist of alphanumeric characters, ''-'', ''_'' or ''.'', and must
      start and end with an alphanumeric character (e.g. ''MyName'',  or ''my.name'',  or
      ''123-abc'', regex used for validation is ''([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]'')
      with an optional DNS subdomain prefix and ''/'' (e.g. ''example.com/MyName'').'

* fix annotation name in reconciler tests

* add method to pytorch

* attempt to make constants nicer

* use ref for user contaner and add failure test

* add comments to clarify usage

* revert bad change
  • Loading branch information
rakelkar authored and k8s-ci-robot committed Jun 30, 2019
1 parent 7d7f108 commit 1d9b9fa
Show file tree
Hide file tree
Showing 23 changed files with 717 additions and 112 deletions.
6 changes: 6 additions & 0 deletions pkg/apis/serving/v1alpha1/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
)

type FrameworkHandler interface {
GetModelSourceUri() string
CreateModelServingContainer(modelName string, config *FrameworksConfig) *v1.Container
ApplyDefaults()
Validate() error
Expand All @@ -40,6 +41,11 @@ var (
DefaultCPURequests = resource.MustParse("1")
)

// Returns a URI to the model. This URI is passed to the model-initializer via the ModelInitializerSourceUriInternalAnnotationKey
func (m *ModelSpec) GetModelSourceUri() string {
return getHandler(m).GetModelSourceUri()
}

func (m *ModelSpec) CreateModelServingContainer(modelName string, config *FrameworksConfig) *v1.Container {
return getHandler(m).CreateModelServingContainer(modelName, config)
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/serving/v1alpha1/framework_custom.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
)

func (c *CustomSpec) GetModelSourceUri() string {
return ""
}

func (c *CustomSpec) CreateModelServingContainer(modelName string, config *FrameworksConfig) *v1.Container {
return &c.Container
}
Expand Down
7 changes: 6 additions & 1 deletion pkg/apis/serving/v1alpha1/framework_scikit.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"strings"

"github.com/kubeflow/kfserving/pkg/constants"
"github.com/kubeflow/kfserving/pkg/utils"
v1 "k8s.io/api/core/v1"
)
Expand All @@ -33,6 +34,10 @@ var (

var _ FrameworkHandler = (*SKLearnSpec)(nil)

func (s *SKLearnSpec) GetModelSourceUri() string {
return s.ModelURI
}

func (s *SKLearnSpec) CreateModelServingContainer(modelName string, config *FrameworksConfig) *v1.Container {
imageName := SKLearnServerImageName
if config.SKlearn.ContainerImage != "" {
Expand All @@ -43,7 +48,7 @@ func (s *SKLearnSpec) CreateModelServingContainer(modelName string, config *Fram
Resources: s.Resources,
Args: []string{
"--model_name=" + modelName,
"--model_dir=" + s.ModelURI,
"--model_dir=" + constants.DefaultModelLocalMountPath,
},
}
}
Expand Down
7 changes: 6 additions & 1 deletion pkg/apis/serving/v1alpha1/framework_tensorflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"strings"

"github.com/kubeflow/kfserving/pkg/constants"
"github.com/kubeflow/kfserving/pkg/utils"
v1 "k8s.io/api/core/v1"
)
Expand Down Expand Up @@ -44,6 +45,10 @@ var (
InvalidTensorflowRuntimeExcludesGPU = "RuntimeVersion is GPU enabled but GPU resources are not requested. " + InvalidTensorflowRuntimeVersionError
)

func (t *TensorflowSpec) GetModelSourceUri() string {
return t.ModelURI
}

func (t *TensorflowSpec) CreateModelServingContainer(modelName string, config *FrameworksConfig) *v1.Container {
imageName := TensorflowServingImageName
if config.Tensorflow.ContainerImage != "" {
Expand All @@ -58,7 +63,7 @@ func (t *TensorflowSpec) CreateModelServingContainer(modelName string, config *F
"--port=" + TensorflowServingGRPCPort,
"--rest_api_port=" + TensorflowServingRestPort,
"--model_name=" + modelName,
"--model_base_path=" + t.ModelURI,
"--model_base_path=" + constants.DefaultModelLocalMountPath,
},
}
}
Expand Down
7 changes: 6 additions & 1 deletion pkg/apis/serving/v1alpha1/framework_xgboost.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"strings"

"github.com/kubeflow/kfserving/pkg/constants"
"github.com/kubeflow/kfserving/pkg/utils"
v1 "k8s.io/api/core/v1"
)
Expand All @@ -31,6 +32,10 @@ var (
DefaultXGBoostRuntimeVersion = "latest"
)

func (x *XGBoostSpec) GetModelSourceUri() string {
return x.ModelURI
}

func (x *XGBoostSpec) CreateModelServingContainer(modelName string, config *FrameworksConfig) *v1.Container {
imageName := XGBoostServerImageName
if config.Xgboost.ContainerImage != "" {
Expand All @@ -41,7 +46,7 @@ func (x *XGBoostSpec) CreateModelServingContainer(modelName string, config *Fram
Resources: x.Resources,
Args: []string{
"--model_name=" + modelName,
"--model_dir=" + x.ModelURI,
"--model_dir=" + constants.DefaultModelLocalMountPath,
},
}
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/serving/v1alpha1/kfservice_framework_pytorch.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ var (

var _ FrameworkHandler = (*PyTorchSpec)(nil)

func (s *PyTorchSpec) GetModelSourceUri() string {
return s.ModelURI
}

func (s *PyTorchSpec) CreateModelServingContainer(modelName string, config *FrameworksConfig) *v1.Container {
imageName := PyTorchServerImageName
if config.PyTorch.ContainerImage != "" {
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/serving/v1alpha1/kfservice_framework_tensorrt.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ var (
TensorRTISRestPort = int32(8080)
)

func (t *TensorRTSpec) GetModelSourceUri() string {
return t.ModelURI
}

func (t *TensorRTSpec) CreateModelServingContainer(modelName string, config *FrameworksConfig) *v1.Container {
imageName := DefaultTensorRTISImageName
if config.TensorRT.ContainerImage != "" {
Expand Down
30 changes: 30 additions & 0 deletions pkg/apis/serving/v1alpha1/kfservice_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package v1alpha1

import (
"fmt"
"regexp"
"strings"

runtime "k8s.io/apimachinery/pkg/runtime"
)
Expand All @@ -29,6 +31,11 @@ const (
MaxReplicasLowerBoundExceededError = "MaxReplicas cannot be less than 0."
TrafficBoundsExceededError = "TrafficPercent must be between [0, 100]."
TrafficProvidedWithoutCanaryError = "Canary must be specified when CanaryTrafficPercent > 0."
UnsupportedModelURIFormatError = "ModelURI, must be one of: [%s] or be an absolute or relative local path. Model URI [%s] is not supported."
)

var (
SupportedModelSourceURIPrefixList = []string{"gs://", "s3://", "pvc://", "file://"}
)

// ValidateCreate implements https://godoc.org/sigs.k8s.io/controller-runtime/pkg/webhook/admission#Validator
Expand Down Expand Up @@ -77,12 +84,35 @@ func validateModelSpec(spec *ModelSpec) error {
if err := spec.Validate(); err != nil {
return err
}
if err := validateModelURI(spec.GetModelSourceUri()); err != nil {
return err
}
if err := validateReplicas(spec.MinReplicas, spec.MaxReplicas); err != nil {
return err
}
return nil
}

func validateModelURI(modelSourceURI string) error {
if modelSourceURI == "" {
return nil
}

// local path (not some protocol?)
if !regexp.MustCompile("\\w+?://").MatchString(modelSourceURI) {
return nil
}

// one of the prefixes we know?
for _, prefix := range SupportedModelSourceURIPrefixList {
if strings.HasPrefix(modelSourceURI, prefix) {
return nil
}
}

return fmt.Errorf(UnsupportedModelURIFormatError, strings.Join(SupportedModelSourceURIPrefixList, ", "), modelSourceURI)
}

func validateReplicas(minReplicas int, maxReplicas int) error {
if minReplicas < 0 {
return fmt.Errorf(MinReplicasLowerBoundExceededError)
Expand Down
36 changes: 36 additions & 0 deletions pkg/apis/serving/v1alpha1/kfservice_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,42 @@ func makeTestKFService() KFService {
return kfservice
}

func TestValidModelURIPrefixOK(t *testing.T) {
g := gomega.NewGomegaWithT(t)
for _, prefix := range SupportedModelSourceURIPrefixList {
kfsvc := makeTestKFService()
kfsvc.Spec.Default.Tensorflow.ModelURI = prefix + "foo/bar"
g.Expect(kfsvc.ValidateCreate()).Should(gomega.Succeed())
}
}

func TestEmptyModelURIPrefixOK(t *testing.T) {
g := gomega.NewGomegaWithT(t)
kfsvc := makeTestKFService()
kfsvc.Spec.Default.Tensorflow.ModelURI = ""
g.Expect(kfsvc.ValidateCreate()).Should(gomega.Succeed())
}

func TestLocalPathModelURIPrefixOK(t *testing.T) {
g := gomega.NewGomegaWithT(t)
kfsvc := makeTestKFService()
kfsvc.Spec.Default.Tensorflow.ModelURI = "some/relative/path"
g.Expect(kfsvc.ValidateCreate()).Should(gomega.Succeed())
kfsvc.Spec.Default.Tensorflow.ModelURI = "/some/absolute/path"
g.Expect(kfsvc.ValidateCreate()).Should(gomega.Succeed())
kfsvc.Spec.Default.Tensorflow.ModelURI = "/"
g.Expect(kfsvc.ValidateCreate()).Should(gomega.Succeed())
kfsvc.Spec.Default.Tensorflow.ModelURI = "foo"
g.Expect(kfsvc.ValidateCreate()).Should(gomega.Succeed())
}

func TestUnkownModelURIPrefixFails(t *testing.T) {
g := gomega.NewGomegaWithT(t)
kfsvc := makeTestKFService()
kfsvc.Spec.Default.Tensorflow.ModelURI = "blob://foo/bar"
g.Expect(kfsvc.ValidateCreate()).ShouldNot(gomega.Succeed())
}

func TestRejectMultipleModelSpecs(t *testing.T) {
g := gomega.NewGomegaWithT(t)
kfsvc := makeTestKFService()
Expand Down
27 changes: 18 additions & 9 deletions pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ var (
KFServiceGKEAcceleratorAnnotationKey = KFServingAPIGroupName + "/gke-accelerator"
)

// KFService Internal Annotations
var (
KFServiceInternalAnnotationsPrefix = "internal." + KFServingAPIGroupName
ModelInitializerSourceUriInternalAnnotationKey = KFServiceInternalAnnotationsPrefix + "/model-initializer-sourceuri"
)

// Controller Constants
var (
ControllerLabelName = KFServingName + "-controller-manager"
Expand All @@ -52,22 +58,25 @@ var (

// Webhook Constants
var (
WebhookServerName = KFServingName + "-webhook-server"
WebhookServerServiceName = WebhookServerName + "-service"
WebhookServerSecretName = WebhookServerName + "-secret"
KFServiceValidatingWebhookConfigName = strings.Join([]string{KFServiceName, KFServingAPIGroupName}, ".")
KFServiceMutatingWebhookConfigName = strings.Join([]string{KFServiceName, KFServingAPIGroupName}, ".")
KFServiceValidatingWebhookName = strings.Join([]string{KFServiceName, WebhookServerName, "validator"}, ".")
KFServiceDefaultingWebhookName = strings.Join([]string{KFServiceName, WebhookServerName, "defaulter"}, ".")
AcceleratorInjectorMutatorWebhookName = strings.Join([]string{KFServiceName, WebhookServerName, "accelerator-injector"}, ".")
WebhookFailurePolicy = v1beta1.Fail
WebhookServerName = KFServingName + "-webhook-server"
WebhookServerServiceName = WebhookServerName + "-service"
WebhookServerSecretName = WebhookServerName + "-secret"
KFServiceValidatingWebhookConfigName = strings.Join([]string{KFServiceName, KFServingAPIGroupName}, ".")
KFServiceMutatingWebhookConfigName = strings.Join([]string{KFServiceName, KFServingAPIGroupName}, ".")
KFServiceValidatingWebhookName = strings.Join([]string{KFServiceName, WebhookServerName, "validator"}, ".")
KFServiceDefaultingWebhookName = strings.Join([]string{KFServiceName, WebhookServerName, "defaulter"}, ".")
DeploymentMutatorWebhookName = strings.Join([]string{KFServiceName, WebhookServerName, "deployment-mutator"}, ".")
WebhookFailurePolicy = v1beta1.Fail
)

// GPU Constants
const (
NvidiaGPUResourceType = "nvidia.com/gpu"
)

// DefaultModelLocalMountPath is where models will be mounted by the model-initializer
const DefaultModelLocalMountPath = "/mnt/models"

func getEnvOrDefault(key string, fallback string) string {
if value, ok := os.LookupEnv(key); ok {
return value
Expand Down
27 changes: 15 additions & 12 deletions pkg/controller/kfservice/kfservice_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ limitations under the License.
package service

import (
"testing"
"time"

"github.com/knative/pkg/apis"
"github.com/knative/serving/pkg/apis/serving/v1beta1"
"github.com/kubeflow/kfserving/pkg/constants"
testutils "github.com/kubeflow/kfserving/pkg/testing"
v1 "k8s.io/api/core/v1"
"testing"
"time"

duckv1beta1 "github.com/knative/pkg/apis/duck/v1beta1"
knservingv1alpha1 "github.com/knative/serving/pkg/apis/serving/v1alpha1"
Expand Down Expand Up @@ -165,10 +166,11 @@ func TestReconcile(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{"serving.kubeflow.org/kfservice": "foo"},
Annotations: map[string]string{
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
constants.ModelInitializerSourceUriInternalAnnotationKey: instance.Spec.Default.Tensorflow.ModelURI,
},
},
Spec: knservingv1alpha1.RevisionSpec{
Expand All @@ -183,7 +185,7 @@ func TestReconcile(t *testing.T) {
"--port=" + servingv1alpha1.TensorflowServingGRPCPort,
"--rest_api_port=" + servingv1alpha1.TensorflowServingRestPort,
"--model_name=" + instance.Name,
"--model_base_path=" + instance.Spec.Default.Tensorflow.ModelURI,
"--model_base_path=" + constants.DefaultModelLocalMountPath,
},
},
},
Expand Down Expand Up @@ -301,10 +303,11 @@ func TestCanaryReconcile(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{"serving.kubeflow.org/kfservice": "bar"},
Annotations: map[string]string{
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
"autoscaling.knative.dev/target": "1",
"autoscaling.knative.dev/class": "kpa.autoscaling.knative.dev",
"autoscaling.knative.dev/maxScale": "3",
"autoscaling.knative.dev/minScale": "1",
constants.ModelInitializerSourceUriInternalAnnotationKey: canary.Spec.Canary.Tensorflow.ModelURI,
},
},
Spec: knservingv1alpha1.RevisionSpec{
Expand All @@ -319,7 +322,7 @@ func TestCanaryReconcile(t *testing.T) {
"--port=" + servingv1alpha1.TensorflowServingGRPCPort,
"--rest_api_port=" + servingv1alpha1.TensorflowServingRestPort,
"--model_name=" + canary.Name,
"--model_base_path=" + canary.Spec.Canary.Tensorflow.ModelURI,
"--model_base_path=" + constants.DefaultModelLocalMountPath,
},
},
},
Expand Down
Loading

0 comments on commit 1d9b9fa

Please sign in to comment.