Skip to content

Commit e855371

Browse files
author
Miguel Varela Ramos
authored
Add replica autoscaling to GCP clusters (#1879)
1 parent 50a36fb commit e855371

File tree

8 files changed

+111
-175
lines changed

8 files changed

+111
-175
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ max_instances: 5
213213
image_operator: /cortexlabs/operator:latest
214214
image_manager: gcr.io/<project_id>/cortexlabs/manager:latest
215215
image_downloader: gcr.io/<project_id>/cortexlabs/downloader:latest
216+
image_request_monitor: gcr.io/<project_id>/cortexlabs/request-monitor:latest
216217
image_istio_proxy: gcr.io/<project_id>/cortexlabs/istio-proxy:latest
217218
image_istio_pilot: gcr.io/<project_id>/cortexlabs/istio-pilot:latest
218219
image_google_pause: gcr.io/<project_id>/cortexlabs/google-pause:latest

docs/clusters/gcp/install.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ The docker images used by the Cortex cluster can also be overridden, although th
7474
image_operator: quay.io/cortexlabs/operator:master
7575
image_manager: quay.io/cortexlabs/manager:master
7676
image_downloader: quay.io/cortexlabs/downloader:master
77+
image_request_monitor: quay.io/cortexlabs/request-monitor:master
7778
image_istio_proxy: quay.io/cortexlabs/istio-proxy:master
7879
image_istio_pilot: quay.io/cortexlabs/istio-pilot:master
7980
image_google_pause: quay.io/cortexlabs/google-pause:master

pkg/operator/operator/k8s.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -930,9 +930,16 @@ func neuronRuntimeDaemonContainer(api *spec.API, volumeMounts []kcore.VolumeMoun
930930
}
931931

932932
func RequestMonitorContainer(api *spec.API) kcore.Container {
933+
var image string
934+
if config.Provider == types.AWSProviderType {
935+
image = config.CoreConfig.ImageRequestMonitor
936+
} else if config.Provider == types.GCPProviderType {
937+
image = config.GCPCoreConfig.ImageRequestMonitor
938+
}
939+
933940
return kcore.Container{
934941
Name: _requestMonitorContainerName,
935-
Image: config.CoreConfig.ImageRequestMonitor,
942+
Image: image,
936943
ImagePullPolicy: kcore.PullAlways,
937944
Args: []string{"-p", DefaultRequestMonitorPortStr},
938945
Ports: []kcore.ContainerPort{

pkg/operator/resources/realtimeapi/api.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import (
2929
"github.com/cortexlabs/cortex/pkg/operator/lib/routines"
3030
"github.com/cortexlabs/cortex/pkg/operator/operator"
3131
"github.com/cortexlabs/cortex/pkg/operator/schema"
32-
"github.com/cortexlabs/cortex/pkg/types"
3332
"github.com/cortexlabs/cortex/pkg/types/spec"
3433
"github.com/cortexlabs/cortex/pkg/types/status"
3534
"github.com/cortexlabs/cortex/pkg/types/userconfig"
@@ -325,10 +324,8 @@ func applyK8sDeployment(api *spec.API, prevDeployment *kapps.Deployment) error {
325324
}
326325
}
327326

328-
if config.Provider == types.AWSProviderType {
329-
if err := UpdateAutoscalerCron(newDeployment, api); err != nil {
330-
return err
331-
}
327+
if err := UpdateAutoscalerCron(newDeployment, api); err != nil {
328+
return err
332329
}
333330

334331
return nil

pkg/operator/resources/realtimeapi/k8s_specs.go

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@ package realtimeapi
1919
import (
2020
"github.com/cortexlabs/cortex/pkg/lib/k8s"
2121
"github.com/cortexlabs/cortex/pkg/lib/pointer"
22-
"github.com/cortexlabs/cortex/pkg/operator/config"
2322
"github.com/cortexlabs/cortex/pkg/operator/operator"
24-
"github.com/cortexlabs/cortex/pkg/types"
2523
"github.com/cortexlabs/cortex/pkg/types/spec"
2624
"github.com/cortexlabs/cortex/pkg/types/userconfig"
2725
istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
@@ -45,12 +43,8 @@ func deploymentSpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Depl
4543
}
4644

4745
func tensorflowAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deployment {
48-
4946
containers, volumes := operator.TensorFlowPredictorContainers(api)
50-
51-
if config.Provider == types.AWSProviderType {
52-
containers = append(containers, operator.RequestMonitorContainer(api))
53-
}
47+
containers = append(containers, operator.RequestMonitorContainer(api))
5448

5549
return k8s.Deployment(&k8s.DeploymentSpec{
5650
Name: operator.K8sName(api.Name),
@@ -98,10 +92,7 @@ func tensorflowAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.D
9892

9993
func pythonAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deployment {
10094
containers, volumes := operator.PythonPredictorContainers(api)
101-
102-
if config.Provider == types.AWSProviderType {
103-
containers = append(containers, operator.RequestMonitorContainer(api))
104-
}
95+
containers = append(containers, operator.RequestMonitorContainer(api))
10596

10697
return k8s.Deployment(&k8s.DeploymentSpec{
10798
Name: operator.K8sName(api.Name),
@@ -149,10 +140,7 @@ func pythonAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deplo
149140

150141
func onnxAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deployment {
151142
containers, volumes := operator.ONNXPredictorContainers(api)
152-
153-
if config.Provider == types.AWSProviderType {
154-
containers = append(containers, operator.RequestMonitorContainer(api))
155-
}
143+
containers = append(containers, operator.RequestMonitorContainer(api))
156144

157145
return k8s.Deployment(&k8s.DeploymentSpec{
158146
Name: operator.K8sName(api.Name),

pkg/types/clusterconfig/cluster_config_gcp.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ type GCPCoreConfig struct {
4747
ImageOperator string `json:"image_operator" yaml:"image_operator"`
4848
ImageManager string `json:"image_manager" yaml:"image_manager"`
4949
ImageDownloader string `json:"image_downloader" yaml:"image_downloader"`
50+
ImageRequestMonitor string `json:"image_request_monitor" yaml:"image_request_monitor"`
5051
ImageClusterAutoscaler string `json:"image_cluster_autoscaler" yaml:"image_cluster_autoscaler"`
5152
ImageFluentBit string `json:"image_fluent_bit" yaml:"image_fluent_bit"`
5253
ImageIstioProxy string `json:"image_istio_proxy" yaml:"image_istio_proxy"`
@@ -167,6 +168,13 @@ var GCPCoreConfigStructFieldValidations = []*cr.StructFieldValidation{
167168
Validator: validateImageVersion,
168169
},
169170
},
171+
{
172+
StructField: "ImageRequestMonitor",
173+
StringValidation: &cr.StringValidation{
174+
Default: "quay.io/cortexlabs/request-monitor:" + consts.CortexVersion,
175+
Validator: validateImageVersion,
176+
},
177+
},
170178
{
171179
StructField: "ImageClusterAutoscaler",
172180
StringValidation: &cr.StringValidation{
@@ -655,6 +663,7 @@ func (cc *GCPCoreConfig) UserTable() table.KeyValuePairs {
655663
items.Add(ImageOperatorUserKey, cc.ImageOperator)
656664
items.Add(ImageManagerUserKey, cc.ImageManager)
657665
items.Add(ImageDownloaderUserKey, cc.ImageDownloader)
666+
items.Add(ImageRequestMonitorUserKey, cc.ImageRequestMonitor)
658667
items.Add(ImageClusterAutoscalerUserKey, cc.ImageClusterAutoscaler)
659668
items.Add(ImageFluentBitUserKey, cc.ImageFluentBit)
660669
items.Add(ImageIstioProxyUserKey, cc.ImageIstioProxy)
@@ -739,6 +748,9 @@ func (cc *GCPCoreConfig) TelemetryEvent() map[string]interface{} {
739748
if !strings.HasPrefix(cc.ImageDownloader, "cortexlabs/") {
740749
event["image_downloader._is_custom"] = true
741750
}
751+
if !strings.HasPrefix(cc.ImageRequestMonitor, "cortexlabs/") {
752+
event["image_request_monitor._is_custom"] = true
753+
}
742754
if !strings.HasPrefix(cc.ImageClusterAutoscaler, "cortexlabs/") {
743755
event["image_cluster_autoscaler._is_custom"] = true
744756
}

0 commit comments

Comments
 (0)