cortexlabs · RobertLucian · Apr 26, 2021 · Apr 19, 2021 · Apr 20, 2021 · Apr 20, 2021
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -219,6 +219,6 @@ If you are only modifying the CLI, `make cli-watch` will build the CLI and re-bu
 
 If you are only modifying the operator, `make operator-local` will build and start the operator locally, and build/restart it when files are changed.
 
-If you are modifying code in the API images (i.e. any of the Python serving code), `make images-dev` may build more images than you need during testing. For example, if you are only testing using the `python-predictor-cpu` image, you can run `./dev/registry.sh update-single python-predictor-cpu`.
+If you are modifying code in the API images (i.e. any of the Python serving code), `make images-dev` may build more images than you need during testing. For example, if you are only testing using the `python-handler-cpu` image, you can run `./dev/registry.sh update-single python-handler-cpu`.
 
 See `Makefile` for additional dev commands.
diff --git a/Makefile b/Makefile
@@ -144,7 +144,7 @@ images-api-skip-push:
 images-manager-skip-push:
 	@./dev/registry.sh update-single manager --skip-push
 images-iris:
-	@./dev/registry.sh update-single python-predictor-cpu
+	@./dev/registry.sh update-single python-handler-cpu
 
 registry-create:
 	@./dev/registry.sh create

diff --git a/build/build-image.sh b/build/build-image.sh
@@ -29,7 +29,7 @@ fi
 
 build_args=""
 
-if [ "${image}" == "python-predictor-gpu" ]; then
+if [ "${image}" == "python-handler-gpu" ]; then
   cuda=("10.0" "10.1" "10.1" "10.2" "10.2" "11.0" "11.1")
   cudnn=("7" "7" "8" "7" "8" "8" "8")
   for i in ${!cudnn[@]}; do

diff --git a/build/images.sh b/build/images.sh
@@ -20,10 +20,10 @@
 set -euo pipefail
 
 api_images=(
-  "python-predictor-cpu"
-  "python-predictor-gpu"
-  "tensorflow-predictor"
-  "python-predictor-inf"
+  "python-handler-cpu"
+  "python-handler-gpu"
+  "tensorflow-handler"
+  "python-handler-inf"
 )
 
 dev_images=(

diff --git a/build/push-image.sh b/build/push-image.sh
@@ -24,7 +24,7 @@ image=$2
 
 echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
 
-if [ "$image" == "python-predictor-gpu" ]; then
+if [ "$image" == "python-handler-gpu" ]; then
   cuda=("10.0" "10.1" "10.1" "10.2" "10.2" "11.0" "11.1")
   cudnn=("7" "7" "8" "7" "8" "8" "8")
   for i in ${!cudnn[@]}; do

diff --git a/cli/cmd/errors.go b/cli/cmd/errors.go
@@ -286,13 +286,13 @@ func ErrorAPINotFoundInConfig(apiName string) error {
 	})
 }
 
-func ErrorNotSupportedForKindAndType(kind userconfig.Kind, predictorType userconfig.PredictorType) error {
+func ErrorNotSupportedForKindAndType(kind userconfig.Kind, handlerType userconfig.HandlerType) error {
 	return errors.WithStack(&errors.Error{
 		Kind:    ErrNotSupportedForKindAndType,
-		Message: fmt.Sprintf("this command is still in beta and currently only supports %s with type %s", userconfig.RealtimeAPIKind.String(), userconfig.PythonPredictorType.String()),
+		Message: fmt.Sprintf("this command is still in beta and currently only supports %s with type %s", userconfig.RealtimeAPIKind.String(), userconfig.PythonHandlerType.String()),
 		Metadata: map[string]interface{}{
-			"apiKind":       kind.String(),
-			"predictorType": predictorType.String(),
+			"apiKind":     kind.String(),
+			"handlerType": handlerType.String(),
 		},
 	})
 }
diff --git a/cli/cmd/lib_async_apis.go b/cli/cmd/lib_async_apis.go
@@ -25,7 +25,6 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/table"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
-	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 )
 
 const (
@@ -47,10 +46,6 @@ func asyncAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (stri
 
 	out += "\n" + console.Bold("endpoint: ") + asyncAPI.Endpoint + "\n"
 
-	if !(asyncAPI.Spec.Predictor.Type == userconfig.PythonPredictorType && asyncAPI.Spec.Predictor.MultiModelReloading == nil) {
-		out += "\n" + describeModelInput(asyncAPI.Status, asyncAPI.Spec.Predictor, asyncAPI.Endpoint)
-	}
-
 	out += "\n" + apiHistoryTable(asyncAPI.APIVersions)
 
 	if !_flagVerbose {

diff --git a/cli/cmd/lib_realtime_apis.go b/cli/cmd/lib_realtime_apis.go
@@ -29,7 +29,6 @@ import (
 	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/lib/console"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
-	"github.com/cortexlabs/cortex/pkg/lib/json"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/lib/table"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
@@ -52,15 +51,18 @@ func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment)
 		out += "\n" + console.Bold("metrics dashboard: ") + *realtimeAPI.DashboardURL + "\n"
 	}
 
-	if realtimeAPI.Spec.Predictor.IsGRPC() {
+	if realtimeAPI.Spec.Handler.IsGRPC() {
 		out += "\n" + console.Bold("insecure endpoint: ") + fmt.Sprintf("%s:%d", realtimeAPI.Endpoint, realtimeAPI.GRPCPorts["insecure"])
 		out += "\n" + console.Bold("secure endpoint: ") + fmt.Sprintf("%s:%d", realtimeAPI.Endpoint, realtimeAPI.GRPCPorts["secure"]) + "\n"
 	} else {
 		out += "\n" + console.Bold("endpoint: ") + realtimeAPI.Endpoint + "\n"
 	}
 
-	if !(realtimeAPI.Spec.Predictor.Type == userconfig.PythonPredictorType && realtimeAPI.Spec.Predictor.MultiModelReloading == nil) && realtimeAPI.Spec.Predictor.ProtobufPath == nil {
-		out += "\n" + describeModelInput(realtimeAPI.Status, realtimeAPI.Spec.Predictor, realtimeAPI.Endpoint)
+	if !(realtimeAPI.Spec.Handler.Type == userconfig.PythonHandlerType && realtimeAPI.Spec.Handler.MultiModelReloading == nil) && realtimeAPI.Spec.Handler.ProtobufPath == nil {
+		decribedModels := describeModelInput(realtimeAPI.Status, realtimeAPI.RealtimeModelMetadata.TFModelSummary, realtimeAPI.RealtimeModelMetadata.PythonModelSummary)
+		if decribedModels != "" {
+			out += "\n" + decribedModels
+		}
 	}
 
 	out += "\n" + apiHistoryTable(realtimeAPI.APIVersions)
@@ -158,39 +160,28 @@ func code5XXStr(metrics *metrics.Metrics) string {
 	return s.Int(metrics.NetworkStats.Code5XX)
 }
 
-func describeModelInput(status *status.Status, predictor *userconfig.Predictor, apiEndpoint string) string {
+func describeModelInput(status *status.Status, apiTFLiveReloadingSummary *schema.TFLiveReloadingSummary, apiModelSummary *schema.PythonModelSummary) string {
 	if status.Updated.Ready+status.Stale.Ready == 0 {
 		return "the models' metadata schema will be available when the api is live\n"
 	}
 
-	cachingEnabled := predictor.Models != nil && predictor.Models.CacheSize != nil && predictor.Models.DiskCacheSize != nil
-	if predictor.Type == userconfig.TensorFlowPredictorType && !cachingEnabled {
-		apiTFLiveReloadingSummary, err := getAPITFLiveReloadingSummary(apiEndpoint)
-		if err != nil {
-			if strings.Contains(errors.Message(err), "context deadline exceeded") {
-				return "error retrieving the models' metadata schema: unable to connect to the API, you either do not have access or the API is too busy" + "\n"
-			}
-			return "error retrieving the models' metadata schema: " + errors.Message(err) + "\n"
-		}
+	if apiTFLiveReloadingSummary != nil {
 		t, err := parseAPITFLiveReloadingSummary(apiTFLiveReloadingSummary)
 		if err != nil {
-			return "error retrieving the model's input schema: " + errors.Message(err) + "\n"
+			return "error parsing the model's input schema: " + errors.Message(err) + "\n"
 		}
 		return t
 	}
 
-	apiModelSummary, err := getAPIModelSummary(apiEndpoint)
-	if err != nil {
-		if strings.Contains(errors.Message(err), "context deadline exceeded") {
-			return "error retrieving the models' metadata schema: unable to connect to the API, you either do not have access or the API is too busy" + "\n"
+	if apiModelSummary != nil {
+		t, err := parseAPIModelSummary(apiModelSummary)
+		if err != nil {
+			return "error parsing the models' metadata schema: " + errors.Message(err) + "\n"
 		}
-		return "error retrieving the models' metadata schema: " + errors.Message(err) + "\n"
-	}
-	t, err := parseAPIModelSummary(apiModelSummary)
-	if err != nil {
-		return "error retrieving the models' metadata schema: " + errors.Message(err) + "\n"
+		return t
 	}
-	return t
+
+	return ""
 }
 
 func getModelFromModelID(modelID string) (modelName string, modelVersion int64, err error) {
@@ -229,45 +220,7 @@ func makeRequest(request *http.Request) (http.Header, []byte, error) {
 	return response.Header, bodyBytes, nil
 }
 
-func getAPIModelSummary(apiEndpoint string) (*schema.APIModelSummary, error) {
-	req, err := http.NewRequest("GET", apiEndpoint, nil)
-	if err != nil {
-		return nil, errors.Wrap(err, "unable to request api summary")
-	}
-	req.Header.Set("Content-Type", "application/json")
-	_, response, err := makeRequest(req)
-	if err != nil {
-		return nil, err
-	}
-
-	var apiModelSummary schema.APIModelSummary
-	err = json.DecodeWithNumber(response, &apiModelSummary)
-	if err != nil {
-		return nil, errors.Wrap(err, "unable to parse api summary response")
-	}
-	return &apiModelSummary, nil
-}
-
-func getAPITFLiveReloadingSummary(apiEndpoint string) (*schema.APITFLiveReloadingSummary, error) {
-	req, err := http.NewRequest("GET", apiEndpoint, nil)
-	if err != nil {
-		return nil, errors.Wrap(err, "unable to request api summary")
-	}
-	req.Header.Set("Content-Type", "application/json")
-	_, response, err := makeRequest(req)
-	if err != nil {
-		return nil, err
-	}
-
-	var apiTFLiveReloadingSummary schema.APITFLiveReloadingSummary
-	err = json.DecodeWithNumber(response, &apiTFLiveReloadingSummary)
-	if err != nil {
-		return nil, errors.Wrap(err, "unable to parse api summary response")
-	}
-	return &apiTFLiveReloadingSummary, nil
-}
-
-func parseAPIModelSummary(summary *schema.APIModelSummary) (string, error) {
+func parseAPIModelSummary(summary *schema.PythonModelSummary) (string, error) {
 	rows := make([][]interface{}, 0)
 
 	for modelName, modelMetadata := range summary.ModelMetadata {
@@ -324,7 +277,7 @@ func parseAPIModelSummary(summary *schema.APIModelSummary) (string, error) {
 	return t.MustFormat(), nil
 }
 
-func parseAPITFLiveReloadingSummary(summary *schema.APITFLiveReloadingSummary) (string, error) {
+func parseAPITFLiveReloadingSummary(summary *schema.TFLiveReloadingSummary) (string, error) {
 	latestVersions := make(map[string]int64)
 
 	numRows := 0

diff --git a/cli/cmd/prepare_debug.go b/cli/cmd/prepare_debug.go
@@ -84,10 +84,10 @@ var _prepareDebugCmd = &cobra.Command{
 		}
 
 		if apiToPrepare.Kind != userconfig.RealtimeAPIKind {
-			exit.Error(ErrorNotSupportedForKindAndType(apiToPrepare.Kind, userconfig.UnknownPredictorType))
+			exit.Error(ErrorNotSupportedForKindAndType(apiToPrepare.Kind, userconfig.UnknownHandlerType))
 		}
-		if apiToPrepare.Predictor.Type != userconfig.PythonPredictorType {
-			exit.Error(ErrorNotSupportedForKindAndType(apiToPrepare.Kind, apiToPrepare.Predictor.Type))
+		if apiToPrepare.Handler.Type != userconfig.PythonHandlerType {
+			exit.Error(ErrorNotSupportedForKindAndType(apiToPrepare.Kind, apiToPrepare.Handler.Type))
 		}
 
 		apiSpec := spec.API{
@@ -107,6 +107,6 @@ docker run -p 9000:8888 \
 -e "CORTEX_VERSION=%s" \
 -e "CORTEX_API_SPEC=/mnt/project/%s" \
 -v %s:/mnt/project \
-%s`, consts.CortexVersion, debugFileName, path.Clean(projectRoot), apiToPrepare.Predictor.Image))
+%s`, consts.CortexVersion, debugFileName, path.Clean(projectRoot), apiToPrepare.Handler.Image))
 	},
 }
diff --git a/dev/load/cortex.yaml b/dev/load/cortex.yaml
@@ -14,7 +14,7 @@
 
 - name: load
   kind: RealtimeAPI
-  predictor:
+  handler:
     type: python
     path: predictor.py
     log_level: debug

diff --git a/dev/load/predictor.py b/dev/load/predictor.py
@@ -17,7 +17,7 @@
 from cortex_internal.lib.log import logger as cortex_logger
 
 
-class PythonPredictor:
+class Handler:
     def __init__(self, config):
         num_success = 0
         num_fail = 0
@@ -58,5 +58,5 @@ def __init__(self, config):
             extra={"finished": True, "num_success": num_success, "num_fail": num_fail},
         )
 
-    def predict(self, payload):
+    def handle_post(self, payload):
         return "ok"
diff --git a/dev/registry.sh b/dev/registry.sh
@@ -138,7 +138,7 @@ function build_and_push() {
   set -euo pipefail  # necessary since this is called in a new shell by parallel
 
   tag=$CORTEX_VERSION
-  if [ "${image}" == "python-predictor-gpu" ]; then
+  if [ "${image}" == "python-handler-gpu" ]; then
     tag="${CORTEX_VERSION}-cuda10.2-cudnn8"
   fi
 

diff --git a/dev/versions.md b/dev/versions.md
@@ -191,7 +191,7 @@ Note: it's ok if example training notebooks aren't upgraded, as long as the expo
 1. Check if there are any updates
    to [Dockerfile.neuron-rtd](https://github.com/aws/aws-neuron-sdk/blob/master/docs/neuron-container-tools/docker-example/Dockerfile.neuron-rtd)
    which should be brought in to `images/neuron-rtd/Dockerfile`
-1. Set the version of `aws-neuron-tools` and `aws-neuron-runtime` in `images/python-predictor-inf/Dockerfile`
+1. Set the version of `aws-neuron-tools` and `aws-neuron-runtime` in `images/python-handler-inf/Dockerfile`
    and `images/tensorflow-serving-inf/Dockerfile`
 1. Run `docker run --rm -it ubuntu:18.04`
 1. Run the first `RUN` command used in `images/tensorflow-serving-inf/Dockerfile`, having omitted the version specified
@@ -214,15 +214,15 @@ Note: it's ok if example training notebooks aren't upgraded, as long as the expo
 
 ## S6-overlay supervisor
 
-1. Locate the `s6-overlay` installation in `images/python-predictor-*/Dockerfile` and `images/tensorflow-predictor/Dockerfile`.
+1. Locate the `s6-overlay` installation in `images/python-handler-*/Dockerfile` and `images/tensorflow-handler/Dockerfile`.
 1. Update the version in each serving image with the newer one in https://github.com/just-containers/s6-overlay.
 
 ## Nginx
 
 1. Run a base image of ubuntu that matches the version tag used for the serving images. The running command
    is `docker run -it --rm <base-image>`
 1. Run `apt update && apt-cache policy nginx`. Notice the latest minor version of nginx (e.g. `1.14`)
-1. Locate the `nginx` package in `images/python-predictor-*/Dockerfile` and `images/tensorflow-predictor/Dockerfile`.
+1. Locate the `nginx` package in `images/python-handler-*/Dockerfile` and `images/tensorflow-handler/Dockerfile`.
 1. Update the version for all `nginx` appearances using the minor version from step 2 and add an asterisk at the end to
    denote any version (e.g. `1.14.*`)
 

diff --git a/docs/clients/python.md b/docs/clients/python.md
@@ -81,18 +81,18 @@ Delete an environment configured on this machine.
 <!-- CORTEX_VERSION_MINOR -->
 
 ```python
- | create_api(api_spec: dict, predictor=None, task=None, requirements=[], conda_packages=[], project_dir: Optional[str] = None, force: bool = True, wait: bool = False) -> list
+ | create_api(api_spec: dict, handler=None, task=None, requirements=[], conda_packages=[], project_dir: Optional[str] = None, force: bool = True, wait: bool = False) -> list
 ```
 
 Deploy an API.
 
 **Arguments**:
 
 - `api_spec` - A dictionary defining a single Cortex API. See https://docs.cortex.dev/v/master/ for schema.
-- `predictor` - A Cortex Predictor class implementation. Not required for TaskAPI/TrafficSplitter kinds.
+- `handler` - A Cortex handler class implementation. Not required for TaskAPI/TrafficSplitter kinds.
 - `task` - A callable class/function implementation. Not required for RealtimeAPI/BatchAPI/TrafficSplitter kinds.
-- `requirements` - A list of PyPI dependencies that will be installed before the predictor class implementation is invoked.
-- `conda_packages` - A list of Conda dependencies that will be installed before the predictor class implementation is invoked.
+- `requirements` - A list of PyPI dependencies that will be installed before the handler class implementation is invoked.
+- `conda_packages` - A list of Conda dependencies that will be installed before the handler class implementation is invoked.
 - `project_dir` - Path to a python project.
 - `force` - Override any in-progress api updates.
 - `wait` - Streams logs until the APIs are ready.

diff --git a/docs/clusters/observability/logging.md b/docs/clusters/observability/logging.md
@@ -72,7 +72,7 @@ enable you to add custom metadata to the logs.
 
 See the structured logging docs for each API kind:
 
-- [RealtimeAPI](../../workloads/realtime/predictors.md#structured-logging)
-- [AsyncAPI](../../workloads/async/predictors.md#structured-logging)
-- [BatchAPI](../../workloads/batch/predictors.md#structured-logging)
+- [RealtimeAPI](../../workloads/realtime/handler.md#structured-logging)
+- [AsyncAPI](../../workloads/async/handler.md#structured-logging)
+- [BatchAPI](../../workloads/batch/handler.md#structured-logging)
 - [TaskAPI](../../workloads/task/definitions.md#structured-logging)
diff --git a/docs/clusters/observability/metrics.md b/docs/clusters/observability/metrics.md
@@ -72,7 +72,7 @@ You can use any of these metrics to set up your own dashboards.
 
 ## Custom user metrics
 
-It is possible to export your own custom metrics by using the `MetricsClient` class in your predictor code. This allows
+It is possible to export your own custom metrics by using the `MetricsClient` class in your handler code. This allows
 you to create a custom metrics from your deployed API that can be later be used on your own custom dashboards.
 
 Code examples on how to use custom metrics for each API kind can be found here:

diff --git a/docs/summary.md b/docs/summary.md
@@ -30,32 +30,34 @@
 
 * Realtime APIs
   * [Example](workloads/realtime/example.md)
-  * [Predictor](workloads/realtime/predictors.md)
-  * [Configuration](workloads/realtime/configuration.md)
+  * [Handler](workloads/realtime/handler.md)
   * [Models](workloads/realtime/models.md)
+  * Multi-model
+    * [Example](workloads/realtime/multi-model/example.md)
+    * [Configuration](workloads/realtime/multi-model/configuration.md)
+    * [Caching](workloads/realtime/multi-model/caching.md)
+  * [Configuration](workloads/realtime/configuration.md)
   * [Parallelism](workloads/realtime/parallelism.md)
   * [Server-side batching](workloads/realtime/server-side-batching.md)
   * [Autoscaling](workloads/realtime/autoscaling.md)
   * [Statuses](workloads/realtime/statuses.md)
   * [Metrics](workloads/realtime/metrics.md)
-  * Multi-model
-    * [Example](workloads/realtime/multi-model/example.md)
-    * [Configuration](workloads/realtime/multi-model/configuration.md)
-    * [Caching](workloads/realtime/multi-model/caching.md)
   * Traffic Splitter
     * [Example](workloads/realtime/traffic-splitter/example.md)
     * [Configuration](workloads/realtime/traffic-splitter/configuration.md)
   * [Troubleshooting](workloads/realtime/troubleshooting.md)
 * [Async APIs](workloads/async/async-apis.md)
   * [Example](workloads/async/example.md)
-  * [Predictor](workloads/async/predictors.md)
+  * [Handler](workloads/async/handler.md)
+  * [Models](workloads/async/models.md)
   * [Configuration](workloads/async/configuration.md)
   * [Statuses](workloads/async/statuses.md)
   * [Webhooks](workloads/async/webhooks.md)
   * [Metrics](workloads/async/metrics.md)
 * Batch APIs
   * [Example](workloads/batch/example.md)
-  * [Predictor](workloads/batch/predictors.md)
+  * [Handler](workloads/batch/handler.md)
+  * [Models](workloads/batch/models.md)
   * [Configuration](workloads/batch/configuration.md)
   * [Jobs](workloads/batch/jobs.md)
   * [Statuses](workloads/batch/statuses.md)

diff --git a/docs/workloads/async/autoscaling.md b/docs/workloads/async/autoscaling.md
@@ -103,6 +103,6 @@ image and for the api to initialize (via its `__init__()` method).
 If you want the autoscaler to react as quickly as possible, set `upscale_stabilization_period` and `window` to their
 minimum values (0s and 10s respectively).
 
-If it takes a long time to initialize your API replica (i.e. install dependencies and run your predictor's `__init__()`
+If it takes a long time to initialize your API replica (i.e. install dependencies and run your handler's `__init__()`
 function), consider building your own API image to use instead of the default image. With this approach, you can
 pre-download/build/install any custom dependencies and bake them into the image.