cortexlabs
diff --git a/‎cli/local/docker_spec.go
Lines changed: 1 addition & 4 deletions b/‎cli/local/docker_spec.go
Lines changed: 1 addition & 4 deletions
diff --git a/‎dev/versions.md
Lines changed: 12 additions & 0 deletions b/‎dev/versions.md
Lines changed: 12 additions & 0 deletions
diff --git a/‎examples/sklearn/iris-classifier/cortex.yaml
Lines changed: 1 addition & 1 deletion b/‎examples/sklearn/iris-classifier/cortex.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/sklearn/iris-classifier/requirements.txt
Lines changed: 1 addition & 0 deletions b/‎examples/sklearn/iris-classifier/requirements.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎images/neuron-rtd/Dockerfile
Lines changed: 2 additions & 1 deletion b/‎images/neuron-rtd/Dockerfile
Lines changed: 2 additions & 1 deletion
diff --git a/‎images/onnx-predictor-cpu/Dockerfile
Lines changed: 9 additions & 1 deletion b/‎images/onnx-predictor-cpu/Dockerfile
Lines changed: 9 additions & 1 deletion
diff --git a/‎images/onnx-predictor-gpu/Dockerfile
Lines changed: 9 additions & 1 deletion b/‎images/onnx-predictor-gpu/Dockerfile
Lines changed: 9 additions & 1 deletion
diff --git a/‎images/python-predictor-cpu/Dockerfile
Lines changed: 9 additions & 1 deletion b/‎images/python-predictor-cpu/Dockerfile
Lines changed: 9 additions & 1 deletion
diff --git a/‎images/python-predictor-gpu/Dockerfile
Lines changed: 9 additions & 1 deletion b/‎images/python-predictor-gpu/Dockerfile
Lines changed: 9 additions & 1 deletion
diff --git a/‎images/python-predictor-inf/Dockerfile
Lines changed: 7 additions & 1 deletion b/‎images/python-predictor-inf/Dockerfile
Lines changed: 7 additions & 1 deletion
diff --git a/‎images/tensorflow-predictor/Dockerfile
Lines changed: 10 additions & 2 deletions b/‎images/tensorflow-predictor/Dockerfile
Lines changed: 10 additions & 2 deletions
diff --git a/‎images/tensorflow-serving-cpu/Dockerfile
Lines changed: 4 additions & 0 deletions b/‎images/tensorflow-serving-cpu/Dockerfile
Lines changed: 4 additions & 0 deletions
diff --git a/‎images/tensorflow-serving-gpu/Dockerfile
Lines changed: 1 addition & 0 deletions b/‎images/tensorflow-serving-gpu/Dockerfile
Lines changed: 1 addition & 0 deletions
diff --git a/‎images/tensorflow-serving-inf/Dockerfile
Lines changed: 1 addition & 0 deletions b/‎images/tensorflow-serving-inf/Dockerfile
Lines changed: 1 addition & 0 deletions
diff --git a/‎pkg/operator/operator/k8s.go
Lines changed: 33 additions & 10 deletions b/‎pkg/operator/operator/k8s.go
Lines changed: 33 additions & 10 deletions
diff --git a/‎pkg/operator/resources/realtimeapi/k8s_specs.go
Lines changed: 8 additions & 3 deletions b/‎pkg/operator/resources/realtimeapi/k8s_specs.go
Lines changed: 8 additions & 3 deletions
diff --git a/‎pkg/types/spec/validations.go
Lines changed: 5 additions & 3 deletions b/‎pkg/types/spec/validations.go
Lines changed: 5 additions & 3 deletions
@@ -19,7 +19,6 @@ package local
 import (
 	"context"
 	"fmt"
-	"math"
 	"path/filepath"
 	"strings"
 
@@ -92,9 +91,7 @@ func getAPIEnv(api *spec.API, awsClient *aws.Client) []string {
 		"CORTEX_PROJECT_DIR="+_projectDir,
 		"CORTEX_PROCESSES_PER_REPLICA="+s.Int32(api.Predictor.ProcessesPerReplica),
 		"CORTEX_THREADS_PER_PROCESS="+s.Int32(api.Predictor.ThreadsPerProcess),
-		// add 1 because it was required to achieve the target concurrency for 1 process, 1 thread
-		"CORTEX_MAX_PROCESS_CONCURRENCY="+s.Int64(1+int64(math.Round(float64(consts.DefaultMaxReplicaConcurrency)/float64(api.Predictor.ProcessesPerReplica)))),
-		"CORTEX_SO_MAX_CONN="+s.Int64(consts.DefaultMaxReplicaConcurrency+100), // add a buffer to be safe
+		"CORTEX_MAX_REPLICA_CONCURRENCY="+s.Int32(api.Predictor.ProcessesPerReplica*api.Predictor.ThreadsPerProcess+1024), // allow a queue of 1024
 		"AWS_REGION="+awsClient.Region,
 	)
 
 
@@ -172,6 +172,18 @@ Note: it's ok if example training notebooks aren't upgraded, as long as the expo
     * be careful not to update any of the versions for Inferentia that are not latest in `images/python-predictor-inf/Dockerfile`
 1. Rerun all examples and check their logs
 
+## S6-overlay supervisor
+
+1. Locate the `s6-overlay` installation in `images/python-predictor-*/Dockerfile`, `images/tensorflow-predictor/Dockerfile` and `images/onnx-predictor-*/Dockerfile`
+1. Update the version in each serving image with the newer one in https://github.com/just-containers/s6-overlay.
+
+## Nginx
+
+1. Run a base image of ubuntu that matches the version tag used for the serving images. The running command is `docker run -it --rm <base-image>`
+1. Run `apt update && apt-cache policy nginx`. Notice the latest minor version of nginx (e.g. `1.14`)
+1. Locate the `nginx` package in `images/python-predictor-*/Dockerfile`, `images/tensorflow-predictor/Dockerfile` and `images/onnx-predictor-*/Dockerfile`
+1. Update the version for all `nginx` appearances using the minor version from step 2 and add an asterisk at the end to denote any version (e.g. `1.14.*`)
+
 ## Istio
 
 1. Find the latest [release](https://istio.io/latest/news/releases) and check the release notes (here are the [latest IstioOperator Options](https://istio.io/latest/docs/reference/config/istio.operator.v1alpha1/))
 
@@ -12,4 +12,4 @@
     model_type: classification
   compute:
     cpu: 0.2
-    mem: 100M
+    mem: 200M
@@ -1 +1,2 @@
 boto3
+scikit-learn==0.21.3
@@ -13,7 +13,8 @@ RUN yum install -y \
     aws-neuron-runtime-1.0.9592.0 \
     procps-ng-3.3.10-26.amzn2.x86_64 \
     gzip \
-    tar
+    tar \
+    curl
 
 ENV PATH="/opt/aws/neuron/bin:${PATH}"
 
 
@@ -9,8 +9,14 @@ RUN apt-get update -qq && apt-get install -y -q \
         unzip \
         zlib1g-dev \
         locales \
+        nginx=1.14.* \
     && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
+RUN cd /tmp/ && \
+    curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
+    cd - && \
+    chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
+
 RUN locale-gen en_US.UTF-8
 ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
 
@@ -68,4 +74,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
 COPY pkg/workloads/cortex/lib /src/cortex/lib
 COPY pkg/workloads/cortex/serve /src/cortex/serve
 
-ENTRYPOINT ["/src/cortex/serve/run.sh"]
+RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
+
+ENTRYPOINT ["/init"]
@@ -9,8 +9,14 @@ RUN apt-get update -qq && apt-get install -y -q \
         unzip \
         zlib1g-dev \
         locales \
+        nginx=1.14.* \
     && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
+RUN cd /tmp/ && \
+    curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
+    cd - && \
+    chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
+
 RUN locale-gen en_US.UTF-8
 ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
 
@@ -68,4 +74,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
 COPY pkg/workloads/cortex/lib /src/cortex/lib
 COPY pkg/workloads/cortex/serve /src/cortex/serve
 
-ENTRYPOINT ["/src/cortex/serve/run.sh"]
+RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
+
+ENTRYPOINT ["/init"]
@@ -9,8 +9,14 @@ RUN apt-get update -qq && apt-get install -y -q \
         unzip \
         zlib1g-dev \
         locales \
+        nginx=1.14.* \
     && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
+RUN cd /tmp/ && \
+    curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
+    cd - && \
+    chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
+
 RUN locale-gen en_US.UTF-8
 ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
 
@@ -88,4 +94,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
 COPY pkg/workloads/cortex/lib /src/cortex/lib
 COPY pkg/workloads/cortex/serve /src/cortex/serve
 
-ENTRYPOINT ["/src/cortex/serve/run.sh"]
+RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
+
+ENTRYPOINT ["/init"]
@@ -11,8 +11,14 @@ RUN apt-get update -qq && apt-get install -y -q \
         unzip \
         zlib1g-dev \
         locales \
+        nginx=1.14.* \
     && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
+RUN cd /tmp/ && \
+    curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
+    cd - && \
+    chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
+
 RUN locale-gen en_US.UTF-8
 ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
 
@@ -93,4 +99,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
 COPY pkg/workloads/cortex/lib /src/cortex/lib
 COPY pkg/workloads/cortex/serve /src/cortex/serve
 
-ENTRYPOINT ["/src/cortex/serve/run.sh"]
+RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
+
+ENTRYPOINT ["/init"]
@@ -10,6 +10,9 @@ RUN apt-get update -qq && apt-get install -y -q \
     aws-neuron-runtime=1.1.1402.0 && \
     apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
+RUN wget -P /tmp/ https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer && \
+    chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
+
 ENV PATH=/opt/aws/neuron/bin/:$PATH
 
 RUN apt-get update -qq && apt-get install -y -q \
@@ -21,6 +24,7 @@ RUN apt-get update -qq && apt-get install -y -q \
     unzip \
     zlib1g-dev \
     locales \
+    nginx=1.14.* \
     && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
 RUN locale-gen en_US.UTF-8
@@ -100,4 +104,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
 COPY pkg/workloads/cortex/lib /src/cortex/lib
 COPY pkg/workloads/cortex/serve /src/cortex/serve
 
-ENTRYPOINT ["/src/cortex/serve/run.sh"]
+RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
+
+ENTRYPOINT ["/init"]
@@ -9,8 +9,14 @@ RUN apt-get update -qq && apt-get install -y -q \
         unzip \
         zlib1g-dev \
         locales \
+        nginx=1.14.* \
     && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
+RUN cd /tmp/ && \
+    curl -L --output s6-overlay-amd64-installer "https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer" && \
+    cd - && \
+    chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer / && rm /tmp/s6-overlay-amd64-installer
+
 RUN locale-gen en_US.UTF-8
 ENV LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8
 
@@ -68,8 +74,10 @@ RUN test "${SLIM}" = "true" || ( \
             tensorflow-hub==0.9.0 \
     )
 
-COPY pkg/workloads/cortex/consts.py /src/cortex/
+COPY pkg/workloads/cortex/consts.py /src/cortex
 COPY pkg/workloads/cortex/lib /src/cortex/lib
 COPY pkg/workloads/cortex/serve /src/cortex/serve
 
-ENTRYPOINT ["/src/cortex/serve/run.sh"]
+RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh
+
+ENTRYPOINT ["/init"]
@@ -1,5 +1,9 @@
 FROM tensorflow/serving:2.3.0
 
+RUN apt-get update -qq && apt-get install -y -q \
+    curl \
+    && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
+
 COPY images/tensorflow-serving-cpu/run.sh /src/
 RUN chmod +x /src/run.sh
 
 
@@ -3,6 +3,7 @@ FROM tensorflow/serving:2.3.0-gpu
 RUN apt-get update -qq && apt-get install -y --no-install-recommends -q \
         libnvinfer6=6.0.1-1+cuda10.1 \
         libnvinfer-plugin6=6.0.1-1+cuda10.1 \
+        curl \
     && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
 COPY images/tensorflow-serving-gpu/run.sh /src/
 
@@ -4,6 +4,7 @@ FROM ubuntu:18.04
 RUN apt-get update -qq && apt-get install -y -q \
     gettext-base \
     supervisor \
+    curl \
     wget \
     netcat \
     gnupg && \
 
@@ -20,7 +20,6 @@ import (
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
-	"math"
 	"path"
 	"strings"
 
@@ -179,6 +178,7 @@ func PythonPredictorContainers(api *spec.API) ([]kcore.Container, []kcore.Volume
 		VolumeMounts:    apiPodVolumeMounts,
 		ReadinessProbe:  FileExistsProbe(_apiReadinessFile),
 		LivenessProbe:   _apiLivenessProbe,
+		Lifecycle:       nginxGracefulStopper(api.Kind),
 		Resources: kcore.ResourceRequirements{
 			Requests: apiPodResourceList,
 			Limits:   apiPodResourceLimitsList,
@@ -267,6 +267,7 @@ func TensorFlowPredictorContainers(api *spec.API) ([]kcore.Container, []kcore.Vo
 		VolumeMounts:    volumeMounts,
 		ReadinessProbe:  FileExistsProbe(_apiReadinessFile),
 		LivenessProbe:   _apiLivenessProbe,
+		Lifecycle:       nginxGracefulStopper(api.Kind),
 		Resources: kcore.ResourceRequirements{
 			Requests: apiResourceList,
 		},
@@ -320,6 +321,7 @@ func ONNXPredictorContainers(api *spec.API) []kcore.Container {
 		VolumeMounts:    DefaultVolumeMounts,
 		ReadinessProbe:  FileExistsProbe(_apiReadinessFile),
 		LivenessProbe:   _apiLivenessProbe,
+		Lifecycle:       nginxGracefulStopper(api.Kind),
 		Resources: kcore.ResourceRequirements{
 			Requests: resourceList,
 			Limits:   resourceLimitsList,
@@ -409,15 +411,6 @@ func getEnvVars(api *spec.API, container string) []kcore.EnvVar {
 					Name:  "CORTEX_MAX_REPLICA_CONCURRENCY",
 					Value: s.Int64(api.Autoscaling.MaxReplicaConcurrency),
 				},
-				kcore.EnvVar{
-					Name: "CORTEX_MAX_PROCESS_CONCURRENCY",
-					// add 1 because it was required to achieve the target concurrency for 1 process, 1 thread
-					Value: s.Int64(1 + int64(math.Round(float64(api.Autoscaling.MaxReplicaConcurrency)/float64(api.Predictor.ProcessesPerReplica)))),
-				},
-				kcore.EnvVar{
-					Name:  "CORTEX_SO_MAX_CONN",
-					Value: s.Int64(api.Autoscaling.MaxReplicaConcurrency + 100), // add a buffer to be safe
-				},
 			)
 		}
 
@@ -699,6 +692,7 @@ func tensorflowServingContainer(api *spec.API, volumeMounts []kcore.VolumeMount,
 			FailureThreshold:    2,
 			Handler:             probeHandler,
 		},
+		Lifecycle: waitAPIContainerToStop(api.Kind),
 		Resources: resources,
 		Ports:     ports,
 	}
@@ -720,6 +714,7 @@ func neuronRuntimeDaemonContainer(api *spec.API, volumeMounts []kcore.VolumeMoun
 		},
 		VolumeMounts:   volumeMounts,
 		ReadinessProbe: socketExistsProbe(_neuronRTDSocket),
+		Lifecycle:      waitAPIContainerToStop(api.Kind),
 		Resources: kcore.ResourceRequirements{
 			Requests: kcore.ResourceList{
 				"hugepages-2Mi":         *kresource.NewQuantity(totalHugePages, kresource.BinarySI),
@@ -794,6 +789,34 @@ func socketExistsProbe(socketName string) *kcore.Probe {
 	}
 }
 
+func nginxGracefulStopper(apiKind userconfig.Kind) *kcore.Lifecycle {
+	if apiKind == userconfig.RealtimeAPIKind {
+		return &kcore.Lifecycle{
+			PreStop: &kcore.Handler{
+				Exec: &kcore.ExecAction{
+					// the sleep is required to wait for any k8s-related race conditions
+					// as described in https://medium.com/codecademy-engineering/kubernetes-nginx-and-zero-downtime-in-production-2c910c6a5ed8
+					Command: []string{"/bin/sh", "-c", "sleep 5; /usr/sbin/nginx -s quit; while pgrep -x nginx; do sleep 1; done"},
+				},
+			},
+		}
+	}
+	return nil
+}
+
+func waitAPIContainerToStop(apiKind userconfig.Kind) *kcore.Lifecycle {
+	if apiKind == userconfig.RealtimeAPIKind {
+		return &kcore.Lifecycle{
+			PreStop: &kcore.Handler{
+				Exec: &kcore.ExecAction{
+					Command: []string{"/bin/sh", "-c", fmt.Sprintf("while curl localhost:%s/nginx_status; do sleep 1; done", DefaultPortStr)},
+				},
+			},
+		}
+	}
+	return nil
+}
+
 var BaseEnvVars = []kcore.EnvFromSource{
 	{
 		ConfigMapRef: &kcore.ConfigMapEnvSource{
 
@@ -27,6 +27,8 @@ import (
 	kcore "k8s.io/api/core/v1"
 )
 
+var _terminationGracePeriodSeconds int64 = 60 // seconds
+
 func deploymentSpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deployment {
 	switch api.Predictor.Type {
 	case userconfig.TensorFlowPredictorType:
@@ -74,7 +76,8 @@ func tensorflowAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.D
 				"traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0",
 			},
 			K8sPodSpec: kcore.PodSpec{
-				RestartPolicy: "Always",
+				RestartPolicy:                 "Always",
+				TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
 				InitContainers: []kcore.Container{
 					operator.InitContainer(api),
 				},
@@ -123,7 +126,8 @@ func pythonAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deplo
 				"traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0",
 			},
 			K8sPodSpec: kcore.PodSpec{
-				RestartPolicy: "Always",
+				RestartPolicy:                 "Always",
+				TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
 				InitContainers: []kcore.Container{
 					operator.InitContainer(api),
 				},
@@ -175,7 +179,8 @@ func onnxAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deploym
 				InitContainers: []kcore.Container{
 					operator.InitContainer(api),
 				},
-				Containers: containers,
+				TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
+				Containers:                    containers,
 				NodeSelector: map[string]string{
 					"workload": "true",
 				},
 
@@ -399,9 +399,11 @@ func autoscalingValidation(provider types.ProviderType) *cr.StructFieldValidatio
 				{
 					StructField: "MaxReplicaConcurrency",
 					Int64Validation: &cr.Int64Validation{
-						Default:           consts.DefaultMaxReplicaConcurrency,
-						GreaterThan:       pointer.Int64(0),
-						LessThanOrEqualTo: pointer.Int64(math.MaxUint16),
+						Default:     consts.DefaultMaxReplicaConcurrency,
+						GreaterThan: pointer.Int64(0),
+						// our configured nginx can theoretically accept up to 32768 connections, but during testing,
+						// it has been observed that the number is just slightly lower, so it has been offset by 2678
+						LessThanOrEqualTo: pointer.Int64(30000),
 					},
 				},
 				{