Add support for generic URI in Storage object (kubeflow#979)

* add generic URI for Storage (kubeflow#826) * handle query parameters * add http as available storage type * update the README * cleanup README * add unit tests * use urlparse to get file name * fail loudly when no filename in uri * inclue http(s) in SupportedStorageURIPrefixList * use regex to check http(s) uri in storage * fix storageURI validation * go fmt * draft for zip & tar archives * fix imports * update tests * support for gzip * draft version of URI examples * newline changes * unit test for http(s) storageUri validation * use mimetypes.guess_type to derive MIME type from url * fix Content-Type validations in storage initializer * update sample README for new ingress access instructions
magdalenakuhn17 · Sep 1, 2020 · d7b95bd · d7b95bd
1 parent 52451bd
commit d7b95bd
Show file tree

Hide file tree

Showing 10 changed files with 381 additions and 13 deletions.
diff --git a/docs/samples/uri/README.md b/docs/samples/uri/README.md
@@ -0,0 +1,231 @@
+# Predict on a `InferenceService` with a saved model from a URI
+This allows you to specify a model object via the URI (Uniform Resource Identifier) of the model object exposed via an `http` or `https` endpoint. 
+
+This `storageUri` option supports single file models, like `sklearn` which is specified by a [joblib](https://joblib.readthedocs.io/en/latest/) file, or artifacts (e.g. `tar` or `zip`) which contain all the necessary dependencies for other model types (e.g. `tensorflow` or `pytorch`). Here, we'll show examples from both of the above.
+
+## Setup
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
+2. Your cluster's Istio Ingress gateway must be network accessible.
+3. Your cluster's Istio Egress gateway must [allow http / https traffic](https://knative.dev/docs/serving/outbound-network-access/)
+
+## Sklearn
+### Train and freeze the model
+Here, we'll train a simple iris model. Please note that `kfserving` requires `sklearn==0.20.3`. 
+
+```python
+from sklearn import svm
+from sklearn import datasets
+import joblib
+
+def train(X, y):
+    clf = svm.SVC(gamma='auto')
+    clf.fit(X, y)
+    return clf
+
+def freeze(clf, path='../frozen'):
+    joblib.dump(clf, f'{path}/model.joblib')
+    return True
+
+if __name__ == '__main__':
+    iris = datasets.load_iris()
+    X, y = iris.data, iris.target
+    clf = train(X, y)
+    freeze(clf)
+```
+Now, you'll need to take that frozen model object and put it somewhere on the web to expose it. For instance, pushing the `model.joblib` file to some repo on GitHub.
+
+### Specify and create the `InferenceService`
+```yaml
+apiVersion: serving.kubeflow.org/v1alpha2
+kind: InferenceService
+metadata:
+  name: sklearn-from-uri
+spec:
+  default:
+    predictor:
+      sklearn:
+        storageUri: https://github.com/tduffy000/kfserving-uri-examples/blob/master/sklearn/frozen/model.joblib?raw=true
+
+```
+
+Apply the CRD,
+```bash
+kubectl apply -f sklearn_uri.yaml
+```
+Expected Output
+```
+$ inferenceservice.serving.kubeflow.org/sklearn-from-uri created
+```
+### Run a prediction
+The first is to [determine the ingress IP and ports](https://github.com/kubeflow/kfserving/blob/master/README.md#determine-the-ingress-ip-and-ports) and set `INGRESS_HOST` and `INGRESS_PORT`.
+
+Now, if everything went according to plan you should be able to hit the endpoint exposing the model we just uploaded.
+
+```bash
+MODEL_NAME=sklearn-from-uri
+INPUT_PATH=@./input.json
+curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH
+```
+Expected Output
+```
+$ *   Trying 10.0.1.16...
+* TCP_NODELAY set
+  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
+                                 Dload  Upload   Total   Spent    Left  Speed
+  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0* Connected to 10.0.1.16 (10.0.1.16) port 30749 (#0)
+> POST /v1/models/sklearn-from-uri:predict HTTP/1.1
+> Host: sklearn-from-uri.kfserving-uri-storage.example.com
+> User-Agent: curl/7.58.0
+> Accept: */*
+> Content-Length: 86
+> Content-Type: application/x-www-form-urlencoded
+> 
+} [86 bytes data]
+* upload completely sent off: 86 out of 86 bytes
+< HTTP/1.1 200 OK
+< content-length: 23
+< content-type: application/json; charset=UTF-8
+< date: Thu, 06 Aug 2020 23:13:42 GMT
+< server: istio-envoy
+< x-envoy-upstream-service-time: 7
+< 
+{ [23 bytes data]
+100   109  100    23  100    86    605   2263 --:--:-- --:--:-- --:--:--  2868
+* Connection #0 to host 10.0.1.16 left intact
+{
+  "predictions": [
+    1,
+    1
+  ]
+}
+```
+
+## Tensorflow
+This will serve as an example of the ability to also pull in a tarball containing all of the 
+required model dependencies, for instance `tensorflow` requires multiple files in a strict directory structure in order to be servable. 
+### Train and freeze the model
+
+```python
+from sklearn import datasets
+import numpy as np
+import tensorflow as tf
+
+def _ohe(targets):
+    y = np.zeros((150, 3))
+    for i, label in enumerate(targets):
+        y[i, label] = 1.0
+    return y
+
+def train(X, y, epochs, batch_size=16):
+    model = tf.keras.Sequential([
+        tf.keras.layers.InputLayer(input_shape=(4,)),
+        tf.keras.layers.Dense(16, activation=tf.nn.relu),
+        tf.keras.layers.Dense(16, activation=tf.nn.relu),
+        tf.keras.layers.Dense(3, activation='softmax')
+    ])
+    model.compile(tf.keras.optimizers.RMSprop(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
+    model.fit(X, y, epochs=epochs)
+    return model
+
+def freeze(model, path='../frozen'):
+    model.save(f'{path}/0001')
+    return True
+
+if __name__ == '__main__':
+    iris = datasets.load_iris()
+    X, targets = iris.data, iris.target
+    y = _ohe(targets)
+    model = train(X, y, epochs=50)
+    freeze(model)
+```
+The post-training procedure here is a bit different. Instead of directly pushing the frozen output to some URI, we'll need to package them into a tarball. To do so, 
+```bash
+cd ../frozen
+tar -cvf artifacts.tar 0001/
+gzip < artifacts.tar > artifacts.tgz
+```
+Where we assume the `0001/` directory has the structure:
+```
+|-- 0001/
+|-- saved_model.pb
+|-- variables/
+|--- variables.data-00000-of-00001
+|--- variables.index
+```
+Note that building the tarball from the directory specifying a version number is required for `tensorflow`.
+
+Now, you can either push the `.tar` or `.tgz` file to some remote uri.
+### Specify and create the `InferenceService`
+And again, if everything went to plan we should be able to pull down the tarball and expose the endpoint.
+
+```yaml
+apiVersion: serving.kubeflow.org/v1alpha2
+kind: InferenceService
+metadata:
+  name: tensorflow-from-uri-gzip
+spec:
+  default:
+    predictor:
+      tensorflow:
+        storageUri: https://raw.githubusercontent.com/tduffy000/kfserving-uri-examples/master/tensorflow/frozen/model_artifacts.tar.gz
+```
+Apply the CRD,
+```bash
+kubectl apply -f tensorflow_uri.yaml
+```
+Expected Output
+```
+$ inferenceservice.serving.kubeflow.org/tensorflow-from-uri created
+```
+
+## Run a prediction
+Again, make sure to first [determine the ingress IP and ports](https://github.com/kubeflow/kfserving/blob/master/README.md#determine-the-ingress-ip-and-ports) and set `INGRESS_HOST` and `INGRESS_PORT`.
+
+Now that our endpoint is up and running, we can get some predictions.
+
+```bash
+MODEL_NAME=tensorflow-from-uri
+INPUT_PATH=@./input.json
+curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH
+```
+Expected Output
+```
+$ *   Trying 10.0.1.16...
+* TCP_NODELAY set
+  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
+                                 Dload  Upload   Total   Spent    Left  Speed
+  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0* Connected to 10.0.1.16 (10.0.1.16) port 30749 (#0)
+> POST /v1/models/tensorflow-from-uri:predict HTTP/1.1
+> Host: tensorflow-from-uri.default.example.com
+> User-Agent: curl/7.58.0
+> Accept: */*
+> Content-Length: 86
+> Content-Type: application/x-www-form-urlencoded
+> 
+} [86 bytes data]
+* upload completely sent off: 86 out of 86 bytes
+< HTTP/1.1 200 OK
+< content-length: 112
+< content-type: application/json
+< date: Thu, 06 Aug 2020 23:21:19 GMT
+< x-envoy-upstream-service-time: 151
+< server: istio-envoy
+< 
+{ [112 bytes data]
+100   198  100   112  100    86    722    554 --:--:-- --:--:-- --:--:--  1285
+* Connection #0 to host 10.0.1.16 left intact
+{
+  "predictions": [
+    [
+      0.0204100646,
+      0.680984616,
+      0.298605353
+    ],
+    [
+      0.0296604875,
+      0.658412039,
+      0.311927497
+    ]
+  ]
+}
+```
diff --git a/docs/samples/uri/input.json b/docs/samples/uri/input.json
@@ -0,0 +1,6 @@
+{
+  "instances": [
+    [6.8,  2.8,  4.8,  1.4],
+    [6.0,  3.4,  4.5,  1.6]
+  ]
+}
diff --git a/docs/samples/uri/sklearn.yaml b/docs/samples/uri/sklearn.yaml
@@ -0,0 +1,9 @@
+apiVersion: serving.kubeflow.org/v1alpha2
+kind: InferenceService
+metadata:
+  name: sklearn-from-uri
+spec:
+  default:
+    predictor:
+      sklearn:
+        storageUri: https://github.com/tduffy000/kfserving-uri-examples/blob/master/sklearn/frozen/model.joblib?raw=true
diff --git a/docs/samples/uri/tensorflow.yaml b/docs/samples/uri/tensorflow.yaml
@@ -0,0 +1,9 @@
+apiVersion: serving.kubeflow.org/v1alpha2
+kind: InferenceService
+metadata:
+  name: tensorflow-from-uri
+spec:
+  default:
+    predictor:
+      tensorflow:
+        storageUri: https://raw.githubusercontent.com/tduffy000/kfserving-uri-examples/master/tensorflow/frozen/model_artifacts.tar.gz
diff --git a/pkg/apis/serving/v1alpha2/inferenceservice_validation.go b/pkg/apis/serving/v1alpha2/inferenceservice_validation.go
@@ -35,7 +35,8 @@ const (
 )
 
 var (
-	SupportedStorageURIPrefixList = []string{"gs://", "s3://", "pvc://", "file://"}
+	SupportedStorageURIPrefixList = []string{"gs://", "s3://", "pvc://", "file://", "https://", "http://"}
+	AzureBlobURL                  = "blob.core.windows.net"
 	AzureBlobURIRegEx             = "https://(.+?).blob.core.windows.net/(.+)"
 )
 

diff --git a/pkg/apis/serving/v1alpha2/inferenceservice_validation_test.go b/pkg/apis/serving/v1alpha2/inferenceservice_validation_test.go
@@ -99,6 +99,15 @@ func TestAzureBlobNoContainerFails(t *testing.T) {
 	g.Expect(isvc.ValidateCreate(c)).ShouldNot(gomega.Succeed())
 }
 
+func TestHttpStorageURIPrefixOK(t *testing.T) {
+	g := gomega.NewGomegaWithT(t)
+	isvc := makeTestInferenceService()
+	isvc.Spec.Default.Predictor.Tensorflow.StorageURI = "https://raw.githubusercontent.com/someOrg/someRepo/model.tar.gz"
+	g.Expect(isvc.ValidateCreate(c)).Should(gomega.Succeed())
+	isvc.Spec.Default.Predictor.Tensorflow.StorageURI = "http://raw.githubusercontent.com/someOrg/someRepo/model.tar.gz"
+	g.Expect(isvc.ValidateCreate(c)).Should(gomega.Succeed())
+}
+
 func TestUnkownStorageURIPrefixFails(t *testing.T) {
 	g := gomega.NewGomegaWithT(t)
 	isvc := makeTestInferenceService()

diff --git a/pkg/apis/serving/v1alpha2/utils.go b/pkg/apis/serving/v1alpha2/utils.go
@@ -82,16 +82,18 @@ func validateStorageURI(storageURI string) error {
 		return nil
 	}
 
-	// one of the prefixes we know?
-	for _, prefix := range SupportedStorageURIPrefixList {
-		if strings.HasPrefix(storageURI, prefix) {
+	// need to verify Azure Blob first, because it uses http(s):// prefix
+	if strings.Contains(storageURI, AzureBlobURL) {
+		azureURIMatcher := regexp.MustCompile(AzureBlobURIRegEx)
+		if parts := azureURIMatcher.FindStringSubmatch(storageURI); parts != nil {
 			return nil
 		}
-	}
-
-	azureURIMatcher := regexp.MustCompile(AzureBlobURIRegEx)
-	if parts := azureURIMatcher.FindStringSubmatch(storageURI); parts != nil {
-		return nil
+	} else {
+		for _, prefix := range SupportedStorageURIPrefixList {
+			if strings.HasPrefix(storageURI, prefix) {
+				return nil
+			}
+		}
 	}
 
 	return fmt.Errorf(UnsupportedStorageURIFormatError, strings.Join(SupportedStorageURIPrefixList, ", "), storageURI)

diff --git a/python/kfserving/README.md b/python/kfserving/README.md
@@ -49,7 +49,9 @@ KFServing supports the following storage providers:
     * The `pvcname` is the name of the PVC that contains the model.
     * The `[path]` is the relative path to the model on the PVC.
     * For e.g. `pvc://mypvcname/model/path/on/pvc`
-
+* Generic URI, over either `HTTP`, prefixed with `http://` or `HTTPS`, prefixed with `https://`. For example:
+    * `https://<some_url>.com/model.joblib`
+    * `http://<some_url>.com/model.joblib`
 
 ## KFServing Client