Skip to content

Commit cbbfcb9

Browse files
committed
Label config as local or aws api-configuration.md (#1017)
(cherry picked from commit 2806379)
1 parent c7985a6 commit cbbfcb9

File tree

1 file changed

+15
-12
lines changed

1 file changed

+15
-12
lines changed

docs/deployments/api-configuration.md

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,23 @@ Reference the section below which corresponds to your Predictor type: [Python](#
88

99
```yaml
1010
- name: <string> # API name (required)
11-
endpoint: <string> # the endpoint for the API (default: <api_name>)
11+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
12+
local_port: <int> # specify the port for API (local only) (default: 8888)
1213
predictor:
1314
type: python
1415
path: <string> # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required)
1516
config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
1617
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
1718
image: <string> # docker image to use for the Predictor (default: cortexlabs/python-predictor-cpu or cortexlabs/python-predictor-gpu based on compute)
1819
env: <string: string> # dictionary of environment variables
19-
tracker:
20+
tracker: # (aws only)
2021
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
2122
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
2223
compute:
2324
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
2425
gpu: <int> # GPU request per replica (default: 0)
2526
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
26-
autoscaling:
27+
autoscaling: # (aws only)
2728
min_replicas: <int> # minimum number of replicas (default: 1)
2829
max_replicas: <int> # maximum number of replicas (default: 100)
2930
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -38,7 +39,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
3839
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
3940
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
4041
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
41-
update_strategy:
42+
update_strategy: # (aws only)
4243
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
4344
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
4445
```
@@ -49,7 +50,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
4950
5051
```yaml
5152
- name: <string> # API name (required)
52-
endpoint: <string> # the endpoint for the API (default: <api_name>)
53+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
54+
local_port: <int> # specify the port for API (local only) (default: 8888)
5355
predictor:
5456
type: tensorflow
5557
path: <string> # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required)
@@ -60,14 +62,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
6062
image: <string> # docker image to use for the Predictor (default: cortexlabs/tensorflow-predictor)
6163
tensorflow_serving_image: <string> # docker image to use for the TensorFlow Serving container (default: cortexlabs/tensorflow-serving-gpu or cortexlabs/tensorflow-serving-cpu based on compute)
6264
env: <string: string> # dictionary of environment variables
63-
tracker:
65+
tracker: # (aws only)
6466
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
6567
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
6668
compute:
6769
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
6870
gpu: <int> # GPU request per replica (default: 0)
6971
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
70-
autoscaling:
72+
autoscaling: # (aws only)
7173
min_replicas: <int> # minimum number of replicas (default: 1)
7274
max_replicas: <int> # maximum number of replicas (default: 100)
7375
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -82,7 +84,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
8284
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
8385
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
8486
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
85-
update_strategy:
87+
update_strategy: # (aws only)
8688
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
8789
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
8890
```
@@ -93,7 +95,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
9395
9496
```yaml
9597
- name: <string> # API name (required)
96-
endpoint: <string> # the endpoint for the API (default: <api_name>)
98+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
99+
local_port: <int> # specify the port for API (local only) (default: 8888)
97100
predictor:
98101
type: onnx
99102
path: <string> # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required)
@@ -102,14 +105,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
102105
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
103106
image: <string> # docker image to use for the Predictor (default: cortexlabs/onnx-predictor-gpu or cortexlabs/onnx-predictor-cpu based on compute)
104107
env: <string: string> # dictionary of environment variables
105-
tracker:
108+
tracker: # (aws only)
106109
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
107110
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
108111
compute:
109112
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
110113
gpu: <int> # GPU request per replica (default: 0)
111114
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
112-
autoscaling:
115+
autoscaling: # (aws only)
113116
min_replicas: <int> # minimum number of replicas (default: 1)
114117
max_replicas: <int> # maximum number of replicas (default: 100)
115118
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -124,7 +127,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
124127
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
125128
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
126129
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
127-
update_strategy:
130+
update_strategy: # (aws only)
128131
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
129132
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
130133
```

0 commit comments

Comments
 (0)