Skip to content

Commit

Permalink
feat(backend): add support for resource requests. Closes #7047. Closes
Browse files Browse the repository at this point in the history
…#6354. (#9088)

* Add support for resource requests

* Update pipelinespec reference

* Stash unit tests

* Fix tests

* Update license

* Update driver image

* Fix tests

* Add resource requests sample test

* Revert sample test and add TODO
  • Loading branch information
gkcalat authored Apr 10, 2023
1 parent 0634943 commit 37dac45
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 13 deletions.
4 changes: 2 additions & 2 deletions backend/src/apiserver/template/template_test.go

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion backend/src/v2/compiler/argocompiler/argo.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func Compile(jobArg *pipelinespec.PipelineJob, kubernetesSpecArg *pipelinespec.S
wf: wf,
templates: make(map[string]*wfapi.Template),
// TODO(chensun): release process and update the images.
driverImage: "gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c",
driverImage: "gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc",
launcherImage: "gcr.io/ml-pipeline-test/kfp-launcher-v2@sha256:2b29da85580823f524a349d2e94db3822be00bd49afa82c9f4a718d51a3b7c06",
job: job,
spec: spec,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ spec:
- '{{inputs.parameters.kubernetes-config}}'
command:
- driver
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
name: ""
resources:
limits:
Expand Down Expand Up @@ -297,7 +297,7 @@ spec:
- '{{outputs.parameters.condition.path}}'
command:
- driver
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
name: ""
resources:
limits:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ spec:
- '{{inputs.parameters.kubernetes-config}}'
command:
- driver
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
name: ""
resources:
limits:
Expand Down Expand Up @@ -207,7 +207,7 @@ spec:
- '{{outputs.parameters.condition.path}}'
command:
- driver
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
name: ""
resources:
limits:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ spec:
- '{{outputs.parameters.condition.path}}'
command:
- driver
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
name: ""
resources:
limits:
Expand Down
19 changes: 18 additions & 1 deletion backend/src/v2/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ func makePodSpecPatch(
"--", // separater before user command and args
}
res := k8score.ResourceRequirements{
Limits: map[k8score.ResourceName]k8sres.Quantity{},
Limits: map[k8score.ResourceName]k8sres.Quantity{},
Requests: map[k8score.ResourceName]k8sres.Quantity{},
}
memoryLimit := container.GetResources().GetMemoryLimit()
if memoryLimit != 0 {
Expand All @@ -354,6 +355,14 @@ func makePodSpecPatch(
}
res.Limits[k8score.ResourceMemory] = q
}
memoryRequest := container.GetResources().GetMemoryRequest()
if memoryRequest != 0 {
q, err := k8sres.ParseQuantity(fmt.Sprintf("%vG", memoryRequest))
if err != nil {
return "", err
}
res.Requests[k8score.ResourceMemory] = q
}
cpuLimit := container.GetResources().GetCpuLimit()
if cpuLimit != 0 {
q, err := k8sres.ParseQuantity(fmt.Sprintf("%v", cpuLimit))
Expand All @@ -362,6 +371,14 @@ func makePodSpecPatch(
}
res.Limits[k8score.ResourceCPU] = q
}
cpuRequest := container.GetResources().GetCpuRequest()
if cpuRequest != 0 {
q, err := k8sres.ParseQuantity(fmt.Sprintf("%v", cpuRequest))
if err != nil {
return "", err
}
res.Requests[k8score.ResourceCPU] = q
}
accelerator := container.GetResources().GetAccelerator()
if accelerator != nil {
if accelerator.GetType() != "" && accelerator.GetCount() > 0 {
Expand Down
105 changes: 105 additions & 0 deletions backend/src/v2/driver/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,108 @@ func Test_makePodSpecPatch_acceleratorConfig(t *testing.T) {
})
}
}

func Test_makePodSpecPatch_resourceRequests(t *testing.T) {
viper.Set("KFP_POD_NAME", "MyWorkflowPod")
viper.Set("KFP_POD_UID", "a1b2c3d4-a1b2-a1b2-a1b2-a1b2c3d4e5f6")
type args struct {
container *pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec
componentSpec *pipelinespec.ComponentSpec
executorInput *pipelinespec.ExecutorInput
executionID int64
pipelineName string
runID string
}
tests := []struct {
name string
args args
want string
notWant string
}{
{
"Valid - with requests",
args{
&pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec{
Image: "python:3.7",
Args: []string{"--function_to_execute", "add"},
Command: []string{"sh", "-ec", "python3 -m kfp.components.executor_main"},
Resources: &pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec_ResourceSpec{
CpuLimit: 2.0,
MemoryLimit: 1.5,
CpuRequest: 1.0,
MemoryRequest: 0.65,
},
},
&pipelinespec.ComponentSpec{
Implementation: &pipelinespec.ComponentSpec_ExecutorLabel{ExecutorLabel: "addition"},
InputDefinitions: &pipelinespec.ComponentInputsSpec{
Parameters: map[string]*pipelinespec.ComponentInputsSpec_ParameterSpec{
"a": {Type: pipelinespec.PrimitiveType_DOUBLE},
"b": {Type: pipelinespec.PrimitiveType_DOUBLE},
},
},
OutputDefinitions: &pipelinespec.ComponentOutputsSpec{
Parameters: map[string]*pipelinespec.ComponentOutputsSpec_ParameterSpec{
"Output": {Type: pipelinespec.PrimitiveType_DOUBLE},
},
},
},
nil,
1,
"MyPipeline",
"a1b2c3d4-a1b2-a1b2-a1b2-a1b2c3d4e5f6",
},
`"resources":{"limits":{"cpu":"2","memory":"1500M"},"requests":{"cpu":"1","memory":"650M"}}`,
"",
},
{
"Valid - zero requests",
args{
&pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec{
Image: "python:3.7",
Args: []string{"--function_to_execute", "add"},
Command: []string{"sh", "-ec", "python3 -m kfp.components.executor_main"},
Resources: &pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec_ResourceSpec{
CpuLimit: 2.0,
MemoryLimit: 1.5,
CpuRequest: 0,
MemoryRequest: 0,
},
},
&pipelinespec.ComponentSpec{
Implementation: &pipelinespec.ComponentSpec_ExecutorLabel{ExecutorLabel: "addition"},
InputDefinitions: &pipelinespec.ComponentInputsSpec{
Parameters: map[string]*pipelinespec.ComponentInputsSpec_ParameterSpec{
"a": {Type: pipelinespec.PrimitiveType_DOUBLE},
"b": {Type: pipelinespec.PrimitiveType_DOUBLE},
},
},
OutputDefinitions: &pipelinespec.ComponentOutputsSpec{
Parameters: map[string]*pipelinespec.ComponentOutputsSpec_ParameterSpec{
"Output": {Type: pipelinespec.PrimitiveType_DOUBLE},
},
},
},
nil,
1,
"MyPipeline",
"a1b2c3d4-a1b2-a1b2-a1b2-a1b2c3d4e5f6",
},
`"resources":{"limits":{"cpu":"2","memory":"1500M"}}`,
`"requests"`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := makePodSpecPatch(tt.args.container, tt.args.componentSpec, tt.args.executorInput, tt.args.executionID, tt.args.pipelineName, tt.args.runID)
assert.Nil(t, err)
assert.NotEmpty(t, got)
if tt.want != "" {
assert.Contains(t, got, tt.want)
}
if tt.notWant != "" {
assert.NotContains(t, got, tt.notWant)
}
})
}
}
2 changes: 1 addition & 1 deletion backend/third_party_licenses/apiserver.csv
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ github.com/json-iterator/go,https://github.com/json-iterator/go/blob/v1.1.12/LIC
github.com/klauspost/compress/flate,https://github.com/klauspost/compress/blob/v1.14.2/LICENSE,Apache-2.0
github.com/klauspost/cpuid,https://github.com/klauspost/cpuid/blob/v1.3.1/LICENSE,MIT
github.com/klauspost/pgzip,https://github.com/klauspost/pgzip/blob/v1.2.5/LICENSE,MIT
github.com/kubeflow/pipelines/api/v2alpha1/go,https://github.com/kubeflow/pipelines/blob/10c4db9ebed9/api/LICENSE,Apache-2.0
github.com/kubeflow/pipelines/api/v2alpha1/go,https://github.com/kubeflow/pipelines/blob/758c91f76784/api/LICENSE,Apache-2.0
github.com/kubeflow/pipelines/backend,https://github.com/kubeflow/pipelines/blob/HEAD/LICENSE,Apache-2.0
github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata,https://github.com/kubeflow/pipelines/blob/e78ed557ddcb/third_party/ml-metadata/LICENSE,Apache-2.0
github.com/lann/builder,https://github.com/lann/builder/blob/47ae307949d0/LICENSE,MIT
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ require (
github.com/jinzhu/gorm v1.9.1
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.4 // indirect
github.com/kubeflow/pipelines/api v0.0.0-20230313210723-10c4db9ebed9
github.com/kubeflow/pipelines/api v0.0.0-20230331215358-758c91f76784
github.com/kubeflow/pipelines/third_party/ml-metadata v0.0.0-20220118175555-e78ed557ddcb
github.com/lestrrat-go/strftime v1.0.4
github.com/mattn/go-sqlite3 v1.14.16
Expand Down
4 changes: 2 additions & 2 deletions go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions samples/core/resource_spec/resource_spec_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def my_pipeline(n: int = 11234567):
# Note, with v2 python components, there's a larger memory overhead caused
# by installing KFP SDK in the component, so we had to increase memory limit to 650M.
training_task = training_op(n=n).set_cpu_limit('1').set_memory_limit('650M')

# TODO(gkcalat): enable requests once SDK implements the feature
# training_task = training_task.set_cpu_request('1').set_memory_request('650M')

# TODO(Bobgy): other resource specs like cpu requests, memory requests and
# GPU limits are not available yet: https://github.com/kubeflow/pipelines/issues/6354.
Expand Down

0 comments on commit 37dac45

Please sign in to comment.