Skip to content

Commit c8da085

Browse files
authored
Improve API inter-process queue fairness (#1526)
1 parent 326df0c commit c8da085

File tree

26 files changed

+362
-77
lines changed

26 files changed

+362
-77
lines changed

cli/local/docker_spec.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package local
1919
import (
2020
"context"
2121
"fmt"
22-
"math"
2322
"path/filepath"
2423
"strings"
2524

@@ -92,9 +91,7 @@ func getAPIEnv(api *spec.API, awsClient *aws.Client) []string {
9291
"CORTEX_PROJECT_DIR="+_projectDir,
9392
"CORTEX_PROCESSES_PER_REPLICA="+s.Int32(api.Predictor.ProcessesPerReplica),
9493
"CORTEX_THREADS_PER_PROCESS="+s.Int32(api.Predictor.ThreadsPerProcess),
95-
// add 1 because it was required to achieve the target concurrency for 1 process, 1 thread
96-
"CORTEX_MAX_PROCESS_CONCURRENCY="+s.Int64(1+int64(math.Round(float64(consts.DefaultMaxReplicaConcurrency)/float64(api.Predictor.ProcessesPerReplica)))),
97-
"CORTEX_SO_MAX_CONN="+s.Int64(consts.DefaultMaxReplicaConcurrency+100), // add a buffer to be safe
94+
"CORTEX_MAX_REPLICA_CONCURRENCY="+s.Int32(api.Predictor.ProcessesPerReplica*api.Predictor.ThreadsPerProcess+1024), // allow a queue of 1024
9895
"AWS_REGION="+awsClient.Region,
9996
)
10097

dev/versions.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,18 @@ Note: it's ok if example training notebooks aren't upgraded, as long as the expo
172172
* be careful not to update any of the versions for Inferentia that are not latest in `images/python-predictor-inf/Dockerfile`
173173
1. Rerun all examples and check their logs
174174

175+
## S6-overlay supervisor
176+
177+
1. Locate the `s6-overlay` installation in `images/python-predictor-*/Dockerfile`, `images/tensorflow-predictor/Dockerfile` and `images/onnx-predictor-*/Dockerfile`
178+
1. Update the version in each serving image with the newer one in https://github.com/just-containers/s6-overlay.
179+
180+
## Nginx
181+
182+
1. Run a base image of ubuntu that matches the version tag used for the serving images. The running command is `docker run -it --rm <base-image>`
183+
1. Run `apt update && apt-cache policy nginx`. Notice the latest minor version of nginx (e.g. `1.14`)
184+
1. Locate the `nginx` package in `images/python-predictor-*/Dockerfile`, `images/tensorflow-predictor/Dockerfile` and `images/onnx-predictor-*/Dockerfile`
185+
1. Update the version for all `nginx` appearances using the minor version from step 2 and add an asterisk at the end to denote any version (e.g. `1.14.*`)
186+
175187
## Istio
176188

177189
1. Find the latest [release](https://istio.io/latest/news/releases) and check the release notes (here are the [latest IstioOperator Options](https://istio.io/latest/docs/reference/config/istio.operator.v1alpha1/))

examples/sklearn/iris-classifier/cortex.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
model_type: classification
1313
compute:
1414
cpu: 0.2
15-
mem: 100M
15+
mem: 200M
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
boto3
2+
scikit-learn==0.21.3

images/neuron-rtd/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ RUN yum install -y \
1313
aws-neuron-runtime-1.0.9592.0 \
1414
procps-ng-3.3.10-26.amzn2.x86_64 \
1515
gzip \
16-
tar
16+
tar \
17+
curl
1718

1819
ENV PATH="/opt/aws/neuron/bin:${PATH}"
1920

images/onnx-predictor-cpu/Dockerfile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@ RUN apt-get update -qq && apt-get install -y -q \
99
unzip \
1010
zlib1g-dev \
1111
locales \
12+
nginx=1.14.* \
1213
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*
1314

15+
RUN cd /tmp/ && \
16+
curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
17+
cd - && \
18+
chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
19+
1420
RUN locale-gen en_US.UTF-8
1521
ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
1622

@@ -68,4 +74,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
6874
COPY pkg/workloads/cortex/lib /src/cortex/lib
6975
COPY pkg/workloads/cortex/serve /src/cortex/serve
7076

71-
ENTRYPOINT ["/src/cortex/serve/run.sh"]
77+
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
78+
79+
ENTRYPOINT ["/init"]

images/onnx-predictor-gpu/Dockerfile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@ RUN apt-get update -qq && apt-get install -y -q \
99
unzip \
1010
zlib1g-dev \
1111
locales \
12+
nginx=1.14.* \
1213
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*
1314

15+
RUN cd /tmp/ && \
16+
curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
17+
cd - && \
18+
chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
19+
1420
RUN locale-gen en_US.UTF-8
1521
ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
1622

@@ -68,4 +74,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
6874
COPY pkg/workloads/cortex/lib /src/cortex/lib
6975
COPY pkg/workloads/cortex/serve /src/cortex/serve
7076

71-
ENTRYPOINT ["/src/cortex/serve/run.sh"]
77+
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
78+
79+
ENTRYPOINT ["/init"]

images/python-predictor-cpu/Dockerfile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@ RUN apt-get update -qq && apt-get install -y -q \
99
unzip \
1010
zlib1g-dev \
1111
locales \
12+
nginx=1.14.* \
1213
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*
1314

15+
RUN cd /tmp/ && \
16+
curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
17+
cd - && \
18+
chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
19+
1420
RUN locale-gen en_US.UTF-8
1521
ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
1622

@@ -88,4 +94,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
8894
COPY pkg/workloads/cortex/lib /src/cortex/lib
8995
COPY pkg/workloads/cortex/serve /src/cortex/serve
9096

91-
ENTRYPOINT ["/src/cortex/serve/run.sh"]
97+
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
98+
99+
ENTRYPOINT ["/init"]

images/python-predictor-gpu/Dockerfile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,14 @@ RUN apt-get update -qq && apt-get install -y -q \
1111
unzip \
1212
zlib1g-dev \
1313
locales \
14+
nginx=1.14.* \
1415
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*
1516

17+
RUN cd /tmp/ && \
18+
curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
19+
cd - && \
20+
chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
21+
1622
RUN locale-gen en_US.UTF-8
1723
ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
1824

@@ -93,4 +99,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
9399
COPY pkg/workloads/cortex/lib /src/cortex/lib
94100
COPY pkg/workloads/cortex/serve /src/cortex/serve
95101

96-
ENTRYPOINT ["/src/cortex/serve/run.sh"]
102+
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
103+
104+
ENTRYPOINT ["/init"]

images/python-predictor-inf/Dockerfile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ RUN apt-get update -qq && apt-get install -y -q \
1010
aws-neuron-runtime=1.1.1402.0 && \
1111
apt-get clean -qq && rm -rf /var/lib/apt/lists/*
1212

13+
RUN wget -P /tmp/ https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer && \
14+
chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
15+
1316
ENV PATH=/opt/aws/neuron/bin/:$PATH
1417

1518
RUN apt-get update -qq && apt-get install -y -q \
@@ -21,6 +24,7 @@ RUN apt-get update -qq && apt-get install -y -q \
2124
unzip \
2225
zlib1g-dev \
2326
locales \
27+
nginx=1.14.* \
2428
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*
2529

2630
RUN locale-gen en_US.UTF-8
@@ -100,4 +104,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
100104
COPY pkg/workloads/cortex/lib /src/cortex/lib
101105
COPY pkg/workloads/cortex/serve /src/cortex/serve
102106

103-
ENTRYPOINT ["/src/cortex/serve/run.sh"]
107+
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
108+
109+
ENTRYPOINT ["/init"]

images/tensorflow-predictor/Dockerfile

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@ RUN apt-get update -qq && apt-get install -y -q \
99
unzip \
1010
zlib1g-dev \
1111
locales \
12+
nginx=1.14.* \
1213
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*
1314

15+
RUN cd /tmp/ && \
16+
curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
17+
cd - && \
18+
chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
19+
1420
RUN locale-gen en_US.UTF-8
1521
ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
1622

@@ -68,8 +74,10 @@ RUN test "${SLIM}" = "true" || ( \
6874
tensorflow-hub==0.9.0 \
6975
)
7076

71-
COPY pkg/workloads/cortex/consts.py /src/cortex/
77+
COPY pkg/workloads/cortex/consts.py /src/cortex
7278
COPY pkg/workloads/cortex/lib /src/cortex/lib
7379
COPY pkg/workloads/cortex/serve /src/cortex/serve
7480

75-
ENTRYPOINT ["/src/cortex/serve/run.sh"]
81+
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
82+
83+
ENTRYPOINT ["/init"]

images/tensorflow-serving-cpu/Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
FROM tensorflow/serving:2.3.0
22

3+
RUN apt-get update -qq && apt-get install -y -q \
4+
curl \
5+
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*
6+
37
COPY images/tensorflow-serving-cpu/run.sh /src/
48
RUN chmod +x /src/run.sh
59

images/tensorflow-serving-gpu/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ FROM tensorflow/serving:2.3.0-gpu
33
RUN apt-get update -qq && apt-get install -y --no-install-recommends -q \
44
libnvinfer6=6.0.1-1+cuda10.1 \
55
libnvinfer-plugin6=6.0.1-1+cuda10.1 \
6+
curl \
67
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*
78

89
COPY images/tensorflow-serving-gpu/run.sh /src/

images/tensorflow-serving-inf/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ FROM ubuntu:18.04
44
RUN apt-get update -qq && apt-get install -y -q \
55
gettext-base \
66
supervisor \
7+
curl \
78
wget \
89
netcat \
910
gnupg && \

pkg/operator/operator/k8s.go

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"encoding/base64"
2121
"encoding/json"
2222
"fmt"
23-
"math"
2423
"path"
2524
"strings"
2625

@@ -179,6 +178,7 @@ func PythonPredictorContainers(api *spec.API) ([]kcore.Container, []kcore.Volume
179178
VolumeMounts: apiPodVolumeMounts,
180179
ReadinessProbe: FileExistsProbe(_apiReadinessFile),
181180
LivenessProbe: _apiLivenessProbe,
181+
Lifecycle: nginxGracefulStopper(api.Kind),
182182
Resources: kcore.ResourceRequirements{
183183
Requests: apiPodResourceList,
184184
Limits: apiPodResourceLimitsList,
@@ -267,6 +267,7 @@ func TensorFlowPredictorContainers(api *spec.API) ([]kcore.Container, []kcore.Vo
267267
VolumeMounts: volumeMounts,
268268
ReadinessProbe: FileExistsProbe(_apiReadinessFile),
269269
LivenessProbe: _apiLivenessProbe,
270+
Lifecycle: nginxGracefulStopper(api.Kind),
270271
Resources: kcore.ResourceRequirements{
271272
Requests: apiResourceList,
272273
},
@@ -320,6 +321,7 @@ func ONNXPredictorContainers(api *spec.API) []kcore.Container {
320321
VolumeMounts: DefaultVolumeMounts,
321322
ReadinessProbe: FileExistsProbe(_apiReadinessFile),
322323
LivenessProbe: _apiLivenessProbe,
324+
Lifecycle: nginxGracefulStopper(api.Kind),
323325
Resources: kcore.ResourceRequirements{
324326
Requests: resourceList,
325327
Limits: resourceLimitsList,
@@ -409,15 +411,6 @@ func getEnvVars(api *spec.API, container string) []kcore.EnvVar {
409411
Name: "CORTEX_MAX_REPLICA_CONCURRENCY",
410412
Value: s.Int64(api.Autoscaling.MaxReplicaConcurrency),
411413
},
412-
kcore.EnvVar{
413-
Name: "CORTEX_MAX_PROCESS_CONCURRENCY",
414-
// add 1 because it was required to achieve the target concurrency for 1 process, 1 thread
415-
Value: s.Int64(1 + int64(math.Round(float64(api.Autoscaling.MaxReplicaConcurrency)/float64(api.Predictor.ProcessesPerReplica)))),
416-
},
417-
kcore.EnvVar{
418-
Name: "CORTEX_SO_MAX_CONN",
419-
Value: s.Int64(api.Autoscaling.MaxReplicaConcurrency + 100), // add a buffer to be safe
420-
},
421414
)
422415
}
423416

@@ -699,6 +692,7 @@ func tensorflowServingContainer(api *spec.API, volumeMounts []kcore.VolumeMount,
699692
FailureThreshold: 2,
700693
Handler: probeHandler,
701694
},
695+
Lifecycle: waitAPIContainerToStop(api.Kind),
702696
Resources: resources,
703697
Ports: ports,
704698
}
@@ -720,6 +714,7 @@ func neuronRuntimeDaemonContainer(api *spec.API, volumeMounts []kcore.VolumeMoun
720714
},
721715
VolumeMounts: volumeMounts,
722716
ReadinessProbe: socketExistsProbe(_neuronRTDSocket),
717+
Lifecycle: waitAPIContainerToStop(api.Kind),
723718
Resources: kcore.ResourceRequirements{
724719
Requests: kcore.ResourceList{
725720
"hugepages-2Mi": *kresource.NewQuantity(totalHugePages, kresource.BinarySI),
@@ -794,6 +789,34 @@ func socketExistsProbe(socketName string) *kcore.Probe {
794789
}
795790
}
796791

792+
func nginxGracefulStopper(apiKind userconfig.Kind) *kcore.Lifecycle {
793+
if apiKind == userconfig.RealtimeAPIKind {
794+
return &kcore.Lifecycle{
795+
PreStop: &kcore.Handler{
796+
Exec: &kcore.ExecAction{
797+
// the sleep is required to wait for any k8s-related race conditions
798+
// as described in https://medium.com/codecademy-engineering/kubernetes-nginx-and-zero-downtime-in-production-2c910c6a5ed8
799+
Command: []string{"/bin/sh", "-c", "sleep 5; /usr/sbin/nginx -s quit; while pgrep -x nginx; do sleep 1; done"},
800+
},
801+
},
802+
}
803+
}
804+
return nil
805+
}
806+
807+
func waitAPIContainerToStop(apiKind userconfig.Kind) *kcore.Lifecycle {
808+
if apiKind == userconfig.RealtimeAPIKind {
809+
return &kcore.Lifecycle{
810+
PreStop: &kcore.Handler{
811+
Exec: &kcore.ExecAction{
812+
Command: []string{"/bin/sh", "-c", fmt.Sprintf("while curl localhost:%s/nginx_status; do sleep 1; done", DefaultPortStr)},
813+
},
814+
},
815+
}
816+
}
817+
return nil
818+
}
819+
797820
var BaseEnvVars = []kcore.EnvFromSource{
798821
{
799822
ConfigMapRef: &kcore.ConfigMapEnvSource{

pkg/operator/resources/realtimeapi/k8s_specs.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ import (
2727
kcore "k8s.io/api/core/v1"
2828
)
2929

30+
var _terminationGracePeriodSeconds int64 = 60 // seconds
31+
3032
func deploymentSpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deployment {
3133
switch api.Predictor.Type {
3234
case userconfig.TensorFlowPredictorType:
@@ -74,7 +76,8 @@ func tensorflowAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.D
7476
"traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0",
7577
},
7678
K8sPodSpec: kcore.PodSpec{
77-
RestartPolicy: "Always",
79+
RestartPolicy: "Always",
80+
TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
7881
InitContainers: []kcore.Container{
7982
operator.InitContainer(api),
8083
},
@@ -123,7 +126,8 @@ func pythonAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deplo
123126
"traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0",
124127
},
125128
K8sPodSpec: kcore.PodSpec{
126-
RestartPolicy: "Always",
129+
RestartPolicy: "Always",
130+
TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
127131
InitContainers: []kcore.Container{
128132
operator.InitContainer(api),
129133
},
@@ -175,7 +179,8 @@ func onnxAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deploym
175179
InitContainers: []kcore.Container{
176180
operator.InitContainer(api),
177181
},
178-
Containers: containers,
182+
TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
183+
Containers: containers,
179184
NodeSelector: map[string]string{
180185
"workload": "true",
181186
},

pkg/types/spec/validations.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -399,9 +399,11 @@ func autoscalingValidation(provider types.ProviderType) *cr.StructFieldValidatio
399399
{
400400
StructField: "MaxReplicaConcurrency",
401401
Int64Validation: &cr.Int64Validation{
402-
Default: consts.DefaultMaxReplicaConcurrency,
403-
GreaterThan: pointer.Int64(0),
404-
LessThanOrEqualTo: pointer.Int64(math.MaxUint16),
402+
Default: consts.DefaultMaxReplicaConcurrency,
403+
GreaterThan: pointer.Int64(0),
404+
// our configured nginx can theoretically accept up to 32768 connections, but during testing,
405+
// it has been observed that the number is just slightly lower, so it has been offset by 2678
406+
LessThanOrEqualTo: pointer.Int64(30000),
405407
},
406408
},
407409
{

0 commit comments

Comments
 (0)