cortexlabs · deliahu · May 25, 2021 · May 25, 2021 · May 25, 2021 · May 25, 2021
diff --git a/dev/export_images.sh b/dev/export_images.sh
@@ -40,23 +40,10 @@ for image in "${all_images[@]}"; do
 done
 echo
 
-cuda=("10.0" "10.1" "10.1" "10.2" "10.2" "11.0" "11.1")
-cudnn=("7" "7" "8" "7" "8" "8" "8")
-
 # pull the images from source registry and push them to ECR
 for image in "${all_images[@]}"; do
-    # copy the different cuda/cudnn variations of the python handler image
-    if [ "$image" = "python-handler-gpu" ]; then
-        for i in "${!cuda[@]}"; do
-            full_image="$image:$cortex_version-cuda${cuda[$i]}-cudnn${cudnn[$i]}"
-            echo "copying $full_image from $source_registry to $destination_registry"
-            skopeo copy --src-no-creds "docker://$source_registry/$full_image" "docker://$destination_registry/$full_image"
-            echo
-        done
-    else
-        echo "copying $image:$cortex_version from $source_registry to $destination_registry"
-        skopeo copy --src-no-creds "docker://$source_registry/$image:$cortex_version" "docker://$destination_registry/$image:$cortex_version"
-        echo
-    fi
+    echo "copying $image:$cortex_version from $source_registry to $destination_registry"
+    skopeo copy --src-no-creds "docker://$source_registry/$image:$cortex_version" "docker://$destination_registry/$image:$cortex_version"
+    echo
 done
 echo "done ✓"
diff --git a/dev/python_version_test.sh b/dev/python_version_test.sh
diff --git a/docs/clusters/advanced/self-hosted-images.md b/docs/clusters/advanced/self-hosted-images.md
@@ -1,6 +1,6 @@
 # Self-hosted Docker images
 
-Self-hosted Docker images can be useful for reducing the ingress costs, for accelerating image pulls, or for eliminating the dependency on Cortex's public container registry.
+Self-hosting the Cortex cluster's internal Docker images can be useful for reducing the ingress costs, for accelerating image pulls, or for eliminating the dependency on Cortex's public container registry.
 
 In this guide, we'll use [ECR](https://aws.amazon.com/ecr/) as the destination container registry. When an ECR repository resides in the same region as your Cortex cluster, there are no costs incurred when pulling images.
 
@@ -33,7 +33,7 @@ Feel free to modify the script if you would like to export the images to a diffe
 ./cortex/dev/export_images.sh <AWS_REGION> <AWS_ACCOUNT_ID>
 ```
 
-You can now configure Cortex to use your images when creating a cluster (see [here](../management/create.md) for how to specify cluster images) and/or when deploying APIs (see the configuration docs corresponding to your API type for how to specify API images).
+You can now configure Cortex to use your images when creating a cluster (see [here](../management/create.md) for instructions).
 
 ## Cleanup
 

diff --git a/docs/clusters/instances/multi.md b/docs/clusters/instances/multi.md
@@ -20,11 +20,15 @@ Cortex can be configured to provision different instance types to improve worklo
 node_groups:
   - name: cpu-spot
     instance_type: m5.large
+    min_instances: 0
+    max_instances: 5
     spot: true
     spot_config:
       instance_distribution: [m5a.large, m5d.large, m5n.large, m5ad.large, m5dn.large, m4.large, t3.large, t3a.large, t2.large]
   - name: cpu-on-demand
     instance_type: m5.large
+    min_instances: 0
+    max_instances: 5
 ```
 
 ### On-demand cluster supporting CPU, GPU, and Inferentia
@@ -35,10 +39,16 @@ node_groups:
 node_groups:
   - name: cpu
     instance_type: m5.large
+    min_instances: 0
+    max_instances: 5
   - name: gpu
     instance_type: g4dn.xlarge
+    min_instances: 0
+    max_instances: 5
   - name: inf
     instance_type: inf.xlarge
+    min_instances: 0
+    max_instances: 5
 ```
 
 ### Spot cluster supporting CPU and GPU (with on-demand backup)
@@ -49,16 +59,24 @@ node_groups:
 node_groups:
   - name: cpu-spot
     instance_type: m5.large
+    min_instances: 0
+    max_instances: 5
     spot: true
     spot_config:
       instance_distribution: [m5a.large, m5d.large, m5n.large, m5ad.large, m5dn.large, m4.large, t3.large, t3a.large, t2.large]
   - name: cpu-on-demand
     instance_type: m5.large
+    min_instances: 0
+    max_instances: 5
   - name: gpu-spot
     instance_type: g4dn.xlarge
+    min_instances: 0
+    max_instances: 5
     spot: true
   - name: gpu-on-demand
     instance_type: g4dn.xlarge
+    min_instances: 0
+    max_instances: 5
 ```
 
 ### CPU spot cluster with multiple instance types and on-demand backup
@@ -69,13 +87,21 @@ node_groups:
 node_groups:
   - name: cpu-1
     instance_type: t3.medium
+    min_instances: 0
+    max_instances: 5
     spot: true
   - name: cpu-2
     instance_type: m5.2xlarge
+    min_instances: 0
+    max_instances: 5
     spot: true
   - name: cpu-3
     instance_type: m5.8xlarge
+    min_instances: 0
+    max_instances: 5
     spot: true
   - name: cpu-4
     instance_type: m5.24xlarge
+    min_instances: 0
+    max_instances: 5
 ```
diff --git a/docs/clusters/instances/spot.md b/docs/clusters/instances/spot.md
@@ -43,6 +43,8 @@ There is a spot instance limit associated with your AWS account for each instanc
 node_groups:
   - name: cpu-spot
     instance_type: m5.large
+    min_instances: 0
+    max_instances: 5
     spot: true
     spot_config:
       instance_distribution: [m5a.large, m5d.large, m5n.large, m5ad.large, m5dn.large, m4.large, t3.large, t3a.large, t2.large]

diff --git a/docs/clusters/management/create.md b/docs/clusters/management/create.md
@@ -104,7 +104,6 @@ image_async_gateway: quay.io/cortexlabs/async-gateway:master
 image_cluster_autoscaler: quay.io/cortexlabs/cluster-autoscaler:master
 image_metrics_server: quay.io/cortexlabs/metrics-server:master
 image_inferentia: quay.io/cortexlabs/inferentia:master
-image_neuron_rtd: quay.io/cortexlabs/neuron-rtd:master
 image_nvidia: quay.io/cortexlabs/nvidia:master
 image_fluent_bit: quay.io/cortexlabs/fluent-bit:master
 image_istio_proxy: quay.io/cortexlabs/istio-proxy:master

diff --git a/docs/clusters/management/update.md b/docs/clusters/management/update.md
@@ -27,7 +27,7 @@ cortex cluster up cluster.yaml
 In production environments, you can upgrade your cluster without downtime if you have a backend service or DNS in front of your Cortex cluster:
 
 1. Spin up a new cluster. For example: `cortex cluster up new-cluster.yaml --configure-env cortex2` (this will create a CLI environment named `cortex2` for accessing the new cluster).
-1. Re-deploy your APIs in your new cluster. For example, if the name of your CLI environment for your existing cluster is `cortex`, you can use `cortex get --env cortex` to list all running APIs in your cluster, and re-deploy them in the new cluster by changing directories to each API's project folder and running `cortex deploy --env cortex2`. Alternatively, you can run `cortex cluster export --name <previous_cluster_name> --region <region>` to export all of your API specifications, change directories the folder that was exported, and run `cortex deploy --env cortex2 <file_name>` for each API that you want to deploy in the new cluster.
+1. Re-deploy your APIs in your new cluster. For example, if the name of your CLI environment for your existing cluster is `cortex`, you can use `cortex get --env cortex` to list all running APIs in your cluster, and re-deploy them in the new cluster by running `cortex deploy --env cortex2` for each API. Alternatively, you can run `cortex cluster export --name <previous_cluster_name> --region <region>` to export the API specifications for all of your running APIs, change directories the folder that was exported, and run `cortex deploy --env cortex2 <file_name>` for each API that you want to deploy in the new cluster.
 1. Route requests to your new cluster.
     * If you are using a custom domain: update the A record in your Route 53 hosted zone to point to your new cluster's API load balancer.
     * If you have a backend service which makes requests to Cortex: update your backend service to make requests to the new cluster's endpoints.

diff --git a/docs/clusters/networking/custom-domain.md b/docs/clusters/networking/custom-domain.md
@@ -115,13 +115,9 @@ You could run into connectivity issues if you make a request to your API without
 
 To test connectivity, try the following steps:
 
-1. Deploy any api (e.g. examples/pytorch/iris-classifier).
-1. Make a GET request to the your api (e.g. `curl https://api.cortexlabs.dev/iris-classifier` or paste the url into your browser).
-1. If you run into an error such as `curl: (6) Could not resolve host: api.cortexlabs.dev` wait a few minutes and make the GET request from another device that hasn't made a request to that url in a while. A successful request looks like this:
-
-```text
-{"message":"make a request by sending a POST to this endpoint with a json payload",...}
-```
+1. Deploy an api.
+1. Make a request to the your api (e.g. `curl https://api.cortexlabs.dev/my-api` or paste the url into your browser if your API supports GET requests).
+1. If you run into an error such as `curl: (6) Could not resolve host: api.cortexlabs.dev` wait a few minutes and make the request from another device that hasn't made a request to that url in a while.
 
 ## Cleanup
 

diff --git a/docs/clusters/networking/https.md b/docs/clusters/networking/https.md
@@ -56,13 +56,13 @@ Copy your "Invoke URL"
 You may now use the "Invoke URL" in place of your API load balancer endpoint in your client. For example, this curl request:
 
 ```bash
-curl http://a9eaf69fd125947abb1065f62de59047-81cdebc0275f7d96.elb.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json
+curl http://a9eaf69fd125947abb1065f62de59047-81cdebc0275f7d96.elb.us-west-2.amazonaws.com/my-api -X POST -H "Content-Type: application/json" -d @sample.json
 ```
 
 Would become:
 
 ```bash
-curl https://31qjv48rs6.execute-api.us-west-2.amazonaws.com/dev/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json
+curl https://31qjv48rs6.execute-api.us-west-2.amazonaws.com/dev/my-api -X POST -H "Content-Type: application/json" -d @sample.json
 ```
 
 ### Cleanup
@@ -134,13 +134,13 @@ Copy your "Invoke URL"
 You may now use the "Invoke URL" in place of your API load balancer endpoint in your client. For example, this curl request:
 
 ```bash
-curl http://a5044e34a352d44b0945adcd455c7fa3-32fa161d3e5bcbf9.elb.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json
+curl http://a5044e34a352d44b0945adcd455c7fa3-32fa161d3e5bcbf9.elb.us-west-2.amazonaws.com/my-api -X POST -H "Content-Type: application/json" -d @sample.json
 ```
 
 Would become:
 
 ```bash
-curl https://lrivodooqh.execute-api.us-west-2.amazonaws.com/dev/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json
+curl https://lrivodooqh.execute-api.us-west-2.amazonaws.com/dev/my-api -X POST -H "Content-Type: application/json" -d @sample.json
 ```
 
 ### Cleanup

diff --git a/docs/clusters/observability/logging.md b/docs/clusters/observability/logging.md
@@ -64,15 +64,3 @@ fields @timestamp, message
 | sort @timestamp asc
 | limit 1000
 ```
-
-## Structured logging
-
-You can use Cortex's logger in your Python code to log in JSON, which will enrich your logs with Cortex's metadata, and
-enable you to add custom metadata to the logs.
-
-See the structured logging docs for each API kind:
-
-- [RealtimeAPI](../../workloads/realtime/handler.md#structured-logging)
-- [AsyncAPI](../../workloads/async/handler.md#structured-logging)
-- [BatchAPI](../../workloads/batch/handler.md#structured-logging)
-- [TaskAPI](../../workloads/task/definitions.md#structured-logging)
diff --git a/docs/clusters/observability/metrics.md b/docs/clusters/observability/metrics.md
@@ -96,23 +96,23 @@ Currently, we only support 3 different metric types that will be converted to it
 
 ### Pushing metrics
 
- - Counter
+- Counter
 
-    ```python
-    metrics.increment('my_counter', value=1, tags={"tag": "tag_name"})
-    ```
+   ```python
+   metrics.increment('my_counter', value=1, tags={"tag": "tag_name"})
+   ```
 
- - Gauge
+- Gauge
 
-    ```python
-    metrics.gauge('active_connections', value=1001, tags={"tag": "tag_name"})
-    ```
+   ```python
+   metrics.gauge('active_connections', value=1001, tags={"tag": "tag_name"})
+   ```
 
- - Histogram
+- Histogram
 
-    ```python
-    metrics.histogram('inference_time_milliseconds', 120, tags={"tag": "tag_name"})
-    ```
+   ```python
+   metrics.histogram('inference_time_milliseconds', 120, tags={"tag": "tag_name"})
+   ```
 
 ### Metrics client class reference
 

diff --git a/docs/start.md b/docs/start.md
@@ -21,7 +21,7 @@ cortex cluster up cluster.yaml
 cortex deploy apis.yaml
 ```
 
-* [RealtimeAPI](workloads/realtime/example.md) - create HTTP/gRPC APIs that respond to requests in real-time.
+* [RealtimeAPI](workloads/realtime/example.md) - create APIs that respond to requests in real-time.
 * [AsyncAPI](workloads/async/example.md) - create APIs that respond to requests asynchronously.
 * [BatchAPI](workloads/batch/example.md) - create APIs that run distributed batch jobs.
 * [TaskAPI](workloads/task/example.md) - create APIs that run jobs on-demand.
diff --git a/docs/summary.md b/docs/summary.md
@@ -29,52 +29,32 @@
 
 ## Workloads
 
-* Realtime APIs
+* [Realtime APIs](workloads/realtime/realtime-apis.md)
   * [Example](workloads/realtime/example.md)
-  * [Handler](workloads/realtime/handler.md)
   * [Configuration](workloads/realtime/configuration.md)
-  * [Parallelism](workloads/realtime/parallelism.md)
   * [Autoscaling](workloads/realtime/autoscaling.md)
-  * [Models](workloads/realtime/models.md)
-  * Multi-model
-    * [Example](workloads/realtime/multi-model/example.md)
-    * [Configuration](workloads/realtime/multi-model/configuration.md)
-    * [Caching](workloads/realtime/multi-model/caching.md)
-  * [Server-side batching](workloads/realtime/server-side-batching.md)
+  * [Traffic Splitter](workloads/realtime/traffic-splitter.md)
   * [Metrics](workloads/realtime/metrics.md)
   * [Statuses](workloads/realtime/statuses.md)
-  * Traffic Splitter
-    * [Example](workloads/realtime/traffic-splitter/example.md)
-    * [Configuration](workloads/realtime/traffic-splitter/configuration.md)
   * [Troubleshooting](workloads/realtime/troubleshooting.md)
 * [Async APIs](workloads/async/async-apis.md)
   * [Example](workloads/async/example.md)
-  * [Handler](workloads/async/handler.md)
   * [Configuration](workloads/async/configuration.md)
-  * [TensorFlow Models](workloads/async/models.md)
   * [Metrics](workloads/async/metrics.md)
   * [Statuses](workloads/async/statuses.md)
   * [Webhooks](workloads/async/webhooks.md)
-* Batch APIs
+* [Batch APIs](workloads/batch/batch-apis.md)
   * [Example](workloads/batch/example.md)
-  * [Handler](workloads/batch/handler.md)
   * [Configuration](workloads/batch/configuration.md)
   * [Jobs](workloads/batch/jobs.md)
-  * [TensorFlow Models](workloads/batch/models.md)
   * [Metrics](workloads/batch/metrics.md)
   * [Statuses](workloads/batch/statuses.md)
-* Task APIs
+* [Task APIs](workloads/task/task-apis.md)
   * [Example](workloads/task/example.md)
-  * [Definition](workloads/task/definitions.md)
   * [Configuration](workloads/task/configuration.md)
   * [Jobs](workloads/task/jobs.md)
   * [Metrics](workloads/task/metrics.md)
   * [Statuses](workloads/task/statuses.md)
-* Dependencies
-  * [Example](workloads/dependencies/example.md)
-  * [Python packages](workloads/dependencies/python-packages.md)
-  * [System packages](workloads/dependencies/system-packages.md)
-  * [Custom images](workloads/dependencies/images.md)
 
 ## Clients
 

diff --git a/docs/workloads/async/async-apis.md b/docs/workloads/async/async-apis.md
@@ -1,4 +1,4 @@
-# AsyncAPI
+# Async APIs
 
 The AsyncAPI kind is designed for asynchronous workloads, in which the user submits a request to start the processing
 and retrieves the result later, either by polling or through a webhook.
@@ -14,7 +14,3 @@ workload status and results. Cortex fully manages the Async Gateway and the queu
 
 AsyncAPI is a good fit for users who want to submit longer workloads (such as video, audio
 or document processing), and do not need the result immediately or synchronously.
-
-{% hint style="info" %}
-AsyncAPI is still in a beta state.
-{% endhint %}