You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: docs/deployments/api-configuration.md
+15-12Lines changed: 15 additions & 12 deletions
Original file line number
Diff line number
Diff line change
@@ -8,22 +8,23 @@ Reference the section below which corresponds to your Predictor type: [Python](#
8
8
9
9
```yaml
10
10
- name: <string> # API name (required)
11
-
endpoint: <string> # the endpoint for the API (default: <api_name>)
11
+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
12
+
local_port: <int> # specify the port for API (local only) (default: 8888)
12
13
predictor:
13
14
type: python
14
15
path: <string> # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required)
15
16
config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
16
17
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
17
18
image: <string> # docker image to use for the Predictor (default: cortexlabs/python-predictor-cpu or cortexlabs/python-predictor-gpu based on compute)
18
19
env: <string: string> # dictionary of environment variables
19
-
tracker:
20
+
tracker:# (aws only)
20
21
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
21
22
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
22
23
compute:
23
24
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
24
25
gpu: <int> # GPU request per replica (default: 0)
25
26
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
26
-
autoscaling:
27
+
autoscaling:# (aws only)
27
28
min_replicas: <int> # minimum number of replicas (default: 1)
28
29
max_replicas: <int> # maximum number of replicas (default: 100)
29
30
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -38,7 +39,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
38
39
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
39
40
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
40
41
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
41
-
update_strategy:
42
+
update_strategy:# (aws only)
42
43
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
43
44
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
44
45
```
@@ -49,7 +50,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
49
50
50
51
```yaml
51
52
- name: <string> # API name (required)
52
-
endpoint: <string> # the endpoint for the API (default: <api_name>)
53
+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
54
+
local_port: <int> # specify the port for API (local only) (default: 8888)
53
55
predictor:
54
56
type: tensorflow
55
57
path: <string> # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required)
@@ -60,14 +62,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
60
62
image: <string> # docker image to use for the Predictor (default: cortexlabs/tensorflow-predictor)
61
63
tensorflow_serving_image: <string> # docker image to use for the TensorFlow Serving container (default: cortexlabs/tensorflow-serving-gpu or cortexlabs/tensorflow-serving-cpu based on compute)
62
64
env: <string: string> # dictionary of environment variables
63
-
tracker:
65
+
tracker:# (aws only)
64
66
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
65
67
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
66
68
compute:
67
69
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
68
70
gpu: <int> # GPU request per replica (default: 0)
69
71
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
70
-
autoscaling:
72
+
autoscaling:# (aws only)
71
73
min_replicas: <int> # minimum number of replicas (default: 1)
72
74
max_replicas: <int> # maximum number of replicas (default: 100)
73
75
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -82,7 +84,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
82
84
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
83
85
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
84
86
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
85
-
update_strategy:
87
+
update_strategy:# (aws only)
86
88
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
87
89
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
88
90
```
@@ -93,7 +95,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
93
95
94
96
```yaml
95
97
- name: <string> # API name (required)
96
-
endpoint: <string> # the endpoint for the API (default: <api_name>)
98
+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
99
+
local_port: <int> # specify the port for API (local only) (default: 8888)
97
100
predictor:
98
101
type: onnx
99
102
path: <string> # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required)
@@ -102,14 +105,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
102
105
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
103
106
image: <string> # docker image to use for the Predictor (default: cortexlabs/onnx-predictor-gpu or cortexlabs/onnx-predictor-cpu based on compute)
104
107
env: <string: string> # dictionary of environment variables
105
-
tracker:
108
+
tracker:# (aws only)
106
109
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
107
110
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
108
111
compute:
109
112
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
110
113
gpu: <int> # GPU request per replica (default: 0)
111
114
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
112
-
autoscaling:
115
+
autoscaling:# (aws only)
113
116
min_replicas: <int> # minimum number of replicas (default: 1)
114
117
max_replicas: <int> # maximum number of replicas (default: 100)
115
118
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -124,7 +127,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
124
127
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
125
128
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
126
129
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
127
-
update_strategy:
130
+
update_strategy:# (aws only)
128
131
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
129
132
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
0 commit comments