Skip to content

Commit eb9b30a

Browse files
authored
Add option to set shared memory (shm) size for Task API (#2132)
1 parent 9206086 commit eb9b30a

File tree

6 files changed

+40
-5
lines changed

6 files changed

+40
-5
lines changed

docs/workloads/task/configuration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
conda: <string> # relative path to conda-packages.txt (default: conda-packages.txt)
1313
shell: <string> # relative path to a shell script for system package installation (default: dependencies.sh)
1414
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
15+
shm_size: <string> # size of shared memory (/dev/shm) for sharing data between multiple processes, e.g. 64Mi or 1Gi (default: Null)
1516
image: <string> # docker image to use for the Task (default: quay.io/cortexlabs/python-handler-cpu:master, quay.io/cortexlabs/python-handler-gpu:master-cuda10.2-cudnn8, or quay.io/cortexlabs/python-handler-inf:master based on compute)
1617
env: <string: string> # dictionary of environment variables
1718
log_level: <string> # log level that can be "debug", "info", "warning" or "error" (default: "info")

pkg/operator/operator/k8s.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,22 @@ func TaskContainers(api *spec.API) ([]kcore.Container, []kcore.Volume) {
277277
}
278278
}
279279

280+
if api.TaskDefinition.ShmSize != nil {
281+
volumes = append(volumes, kcore.Volume{
282+
Name: "dshm",
283+
VolumeSource: kcore.VolumeSource{
284+
EmptyDir: &kcore.EmptyDirVolumeSource{
285+
Medium: kcore.StorageMediumMemory,
286+
SizeLimit: k8s.QuantityPtr(api.TaskDefinition.ShmSize.Quantity),
287+
},
288+
},
289+
})
290+
apiPodVolumeMounts = append(apiPodVolumeMounts, kcore.VolumeMount{
291+
Name: "dshm",
292+
MountPath: "/dev/shm",
293+
})
294+
}
295+
280296
containers = append(containers, kcore.Container{
281297
Name: APIContainerName,
282298
Image: api.TaskDefinition.Image,

pkg/types/spec/errors.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,10 +232,10 @@ func ErrorSurgeAndUnavailableBothZero() error {
232232
})
233233
}
234234

235-
func ErrorShmSizeCannotExceedMem(shmSize k8s.Quantity, mem k8s.Quantity) error {
235+
func ErrorShmSizeCannotExceedMem(parentFieldName string, shmSize k8s.Quantity, mem k8s.Quantity) error {
236236
return errors.WithStack(&errors.Error{
237237
Kind: ErrShmSizeCannotExceedMem,
238-
Message: fmt.Sprintf("handler.shm_size (%s) cannot exceed compute.mem (%s)", shmSize.UserString, mem.UserString),
238+
Message: fmt.Sprintf("%s.shm_size (%s) cannot exceed compute.mem (%s)", parentFieldName, shmSize.UserString, mem.UserString),
239239
})
240240
}
241241

pkg/types/spec/validations.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,14 @@ func taskDefinitionValidation() *cr.StructFieldValidation {
310310
DockerImageOrEmpty: true,
311311
},
312312
},
313+
{
314+
StructField: "ShmSize",
315+
StringPtrValidation: &cr.StringPtrValidation{
316+
Default: nil,
317+
AllowExplicitNull: true,
318+
},
319+
Parser: k8s.QuantityParser(&k8s.QuantityValidation{}),
320+
},
313321
{
314322
StructField: "LogLevel",
315323
StringValidation: &cr.StringValidation{
@@ -803,7 +811,13 @@ func ValidateAPI(
803811

804812
if api.Handler != nil && api.Handler.ShmSize != nil && api.Compute.Mem != nil {
805813
if api.Handler.ShmSize.Cmp(api.Compute.Mem.Quantity) > 0 {
806-
return ErrorShmSizeCannotExceedMem(*api.Handler.ShmSize, *api.Compute.Mem)
814+
return ErrorShmSizeCannotExceedMem(userconfig.HandlerKey, *api.Handler.ShmSize, *api.Compute.Mem)
815+
}
816+
}
817+
818+
if api.TaskDefinition != nil && api.TaskDefinition.ShmSize != nil && api.Compute.Mem != nil {
819+
if api.TaskDefinition.ShmSize.Cmp(api.Compute.Mem.Quantity) > 0 {
820+
return ErrorShmSizeCannotExceedMem(userconfig.TaskDefinitionKey, *api.TaskDefinition.ShmSize, *api.Compute.Mem)
807821
}
808822
}
809823

pkg/types/userconfig/api.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ type TaskDefinition struct {
6969
Path string `json:"path" yaml:"path"`
7070
PythonPath *string `json:"python_path" yaml:"python_path"`
7171
Image string `json:"image" yaml:"image"`
72+
ShmSize *k8s.Quantity `json:"shm_size" yaml:"shm_size"`
7273
LogLevel LogLevel `json:"log_level" yaml:"log_level"`
7374
Config map[string]interface{} `json:"config" yaml:"config"`
7475
Env map[string]string `json:"env" yaml:"env"`
@@ -397,6 +398,9 @@ func (task *TaskDefinition) UserStr() string {
397398
sb.WriteString(fmt.Sprintf("%s: %s\n", PythonPathKey, *task.PythonPath))
398399
}
399400
sb.WriteString(fmt.Sprintf("%s: %s\n", ImageKey, task.Image))
401+
if task.ShmSize != nil {
402+
sb.WriteString(fmt.Sprintf("%s: %s\n", ShmSizeKey, task.ShmSize.String()))
403+
}
400404
sb.WriteString(fmt.Sprintf("%s: %s\n", LogLevelKey, task.LogLevel))
401405
if len(task.Config) > 0 {
402406
sb.WriteString(fmt.Sprintf("%s:\n", ConfigKey))
@@ -442,7 +446,7 @@ func (handler *Handler) UserStr() string {
442446
sb.WriteString(fmt.Sprintf("%s: %s\n", ThreadsPerProcessKey, s.Int32(handler.ThreadsPerProcess)))
443447

444448
if handler.ShmSize != nil {
445-
sb.WriteString(fmt.Sprintf("%s: %s\n", ShmSize, handler.ShmSize.UserString))
449+
sb.WriteString(fmt.Sprintf("%s: %s\n", ShmSizeKey, handler.ShmSize.UserString))
446450
}
447451

448452
if len(handler.Config) > 0 {

pkg/types/userconfig/config_key.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const (
4242
TensorFlowServingImageKey = "tensorflow_serving_image"
4343
ProcessesPerReplicaKey = "processes_per_replica"
4444
ThreadsPerProcessKey = "threads_per_process"
45-
ShmSize = "shm_size"
45+
ShmSizeKey = "shm_size"
4646
LogLevelKey = "log_level"
4747
ConfigKey = "config"
4848
EnvKey = "env"

0 commit comments

Comments
 (0)