feat(backend): add support for resource requests. Closes #7047. Closes …

…#6354. (#9088) * Add support for resource requests * Update pipelinespec reference * Stash unit tests * Fix tests * Update license * Update driver image * Fix tests * Add resource requests sample test * Revert sample test and add TODO
kubeflow · Apr 10, 2023 · 37dac45 · 37dac45
1 parent 0634943
commit 37dac45
Show file tree

Hide file tree

Showing 11 changed files with 138 additions and 13 deletions.
diff --git a/backend/src/apiserver/template/template_test.go b/backend/src/apiserver/template/template_test.go
diff --git a/backend/src/v2/compiler/argocompiler/argo.go b/backend/src/v2/compiler/argocompiler/argo.go
@@ -118,7 +118,7 @@ func Compile(jobArg *pipelinespec.PipelineJob, kubernetesSpecArg *pipelinespec.S
 		wf:        wf,
 		templates: make(map[string]*wfapi.Template),
 		// TODO(chensun): release process and update the images.
-		driverImage:   "gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c",
+		driverImage:   "gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc",
 		launcherImage: "gcr.io/ml-pipeline-test/kfp-launcher-v2@sha256:2b29da85580823f524a349d2e94db3822be00bd49afa82c9f4a718d51a3b7c06",
 		job:           job,
 		spec:          spec,

diff --git a/backend/src/v2/compiler/argocompiler/testdata/create_mount_delete_dynamic_pvc.yaml b/backend/src/v2/compiler/argocompiler/testdata/create_mount_delete_dynamic_pvc.yaml
@@ -67,7 +67,7 @@ spec:
       - '{{inputs.parameters.kubernetes-config}}'
       command:
       - driver
-      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
+      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
       name: ""
       resources:
         limits:
@@ -297,7 +297,7 @@ spec:
       - '{{outputs.parameters.condition.path}}'
       command:
       - driver
-      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
+      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
       name: ""
       resources:
         limits:

diff --git a/backend/src/v2/compiler/argocompiler/testdata/hello_world.yaml b/backend/src/v2/compiler/argocompiler/testdata/hello_world.yaml
@@ -50,7 +50,7 @@ spec:
       - '{{inputs.parameters.kubernetes-config}}'
       command:
       - driver
-      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
+      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
       name: ""
       resources:
         limits:
@@ -207,7 +207,7 @@ spec:
       - '{{outputs.parameters.condition.path}}'
       command:
       - driver
-      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
+      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
       name: ""
       resources:
         limits:

diff --git a/backend/src/v2/compiler/argocompiler/testdata/importer.yaml b/backend/src/v2/compiler/argocompiler/testdata/importer.yaml
@@ -120,7 +120,7 @@ spec:
       - '{{outputs.parameters.condition.path}}'
       command:
       - driver
-      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:690e078d675f9cc91b25465aaabd6bf8d7fa2b745741ad36ac2fce0ce217c11c
+      image: gcr.io/ml-pipeline-test/kfp-driver@sha256:fd53656e109ff9269d9041a49aade07fd9fa5b0a00e6e0c76440b7ce0cf449fc
       name: ""
       resources:
         limits:

diff --git a/backend/src/v2/driver/driver.go b/backend/src/v2/driver/driver.go
@@ -344,7 +344,8 @@ func makePodSpecPatch(
 		"--", // separater before user command and args
 	}
 	res := k8score.ResourceRequirements{
-		Limits: map[k8score.ResourceName]k8sres.Quantity{},
+		Limits:   map[k8score.ResourceName]k8sres.Quantity{},
+		Requests: map[k8score.ResourceName]k8sres.Quantity{},
 	}
 	memoryLimit := container.GetResources().GetMemoryLimit()
 	if memoryLimit != 0 {
@@ -354,6 +355,14 @@ func makePodSpecPatch(
 		}
 		res.Limits[k8score.ResourceMemory] = q
 	}
+	memoryRequest := container.GetResources().GetMemoryRequest()
+	if memoryRequest != 0 {
+		q, err := k8sres.ParseQuantity(fmt.Sprintf("%vG", memoryRequest))
+		if err != nil {
+			return "", err
+		}
+		res.Requests[k8score.ResourceMemory] = q
+	}
 	cpuLimit := container.GetResources().GetCpuLimit()
 	if cpuLimit != 0 {
 		q, err := k8sres.ParseQuantity(fmt.Sprintf("%v", cpuLimit))
@@ -362,6 +371,14 @@ func makePodSpecPatch(
 		}
 		res.Limits[k8score.ResourceCPU] = q
 	}
+	cpuRequest := container.GetResources().GetCpuRequest()
+	if cpuRequest != 0 {
+		q, err := k8sres.ParseQuantity(fmt.Sprintf("%v", cpuRequest))
+		if err != nil {
+			return "", err
+		}
+		res.Requests[k8score.ResourceCPU] = q
+	}
 	accelerator := container.GetResources().GetAccelerator()
 	if accelerator != nil {
 		if accelerator.GetType() != "" && accelerator.GetCount() > 0 {

diff --git a/backend/src/v2/driver/driver_test.go b/backend/src/v2/driver/driver_test.go
@@ -236,3 +236,108 @@ func Test_makePodSpecPatch_acceleratorConfig(t *testing.T) {
 		})
 	}
 }
+
+func Test_makePodSpecPatch_resourceRequests(t *testing.T) {
+	viper.Set("KFP_POD_NAME", "MyWorkflowPod")
+	viper.Set("KFP_POD_UID", "a1b2c3d4-a1b2-a1b2-a1b2-a1b2c3d4e5f6")
+	type args struct {
+		container     *pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec
+		componentSpec *pipelinespec.ComponentSpec
+		executorInput *pipelinespec.ExecutorInput
+		executionID   int64
+		pipelineName  string
+		runID         string
+	}
+	tests := []struct {
+		name    string
+		args    args
+		want    string
+		notWant string
+	}{
+		{
+			"Valid - with requests",
+			args{
+				&pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec{
+					Image:   "python:3.7",
+					Args:    []string{"--function_to_execute", "add"},
+					Command: []string{"sh", "-ec", "python3 -m kfp.components.executor_main"},
+					Resources: &pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec_ResourceSpec{
+						CpuLimit:      2.0,
+						MemoryLimit:   1.5,
+						CpuRequest:    1.0,
+						MemoryRequest: 0.65,
+					},
+				},
+				&pipelinespec.ComponentSpec{
+					Implementation: &pipelinespec.ComponentSpec_ExecutorLabel{ExecutorLabel: "addition"},
+					InputDefinitions: &pipelinespec.ComponentInputsSpec{
+						Parameters: map[string]*pipelinespec.ComponentInputsSpec_ParameterSpec{
+							"a": {Type: pipelinespec.PrimitiveType_DOUBLE},
+							"b": {Type: pipelinespec.PrimitiveType_DOUBLE},
+						},
+					},
+					OutputDefinitions: &pipelinespec.ComponentOutputsSpec{
+						Parameters: map[string]*pipelinespec.ComponentOutputsSpec_ParameterSpec{
+							"Output": {Type: pipelinespec.PrimitiveType_DOUBLE},
+						},
+					},
+				},
+				nil,
+				1,
+				"MyPipeline",
+				"a1b2c3d4-a1b2-a1b2-a1b2-a1b2c3d4e5f6",
+			},
+			`"resources":{"limits":{"cpu":"2","memory":"1500M"},"requests":{"cpu":"1","memory":"650M"}}`,
+			"",
+		},
+		{
+			"Valid - zero requests",
+			args{
+				&pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec{
+					Image:   "python:3.7",
+					Args:    []string{"--function_to_execute", "add"},
+					Command: []string{"sh", "-ec", "python3 -m kfp.components.executor_main"},
+					Resources: &pipelinespec.PipelineDeploymentConfig_PipelineContainerSpec_ResourceSpec{
+						CpuLimit:      2.0,
+						MemoryLimit:   1.5,
+						CpuRequest:    0,
+						MemoryRequest: 0,
+					},
+				},
+				&pipelinespec.ComponentSpec{
+					Implementation: &pipelinespec.ComponentSpec_ExecutorLabel{ExecutorLabel: "addition"},
+					InputDefinitions: &pipelinespec.ComponentInputsSpec{
+						Parameters: map[string]*pipelinespec.ComponentInputsSpec_ParameterSpec{
+							"a": {Type: pipelinespec.PrimitiveType_DOUBLE},
+							"b": {Type: pipelinespec.PrimitiveType_DOUBLE},
+						},
+					},
+					OutputDefinitions: &pipelinespec.ComponentOutputsSpec{
+						Parameters: map[string]*pipelinespec.ComponentOutputsSpec_ParameterSpec{
+							"Output": {Type: pipelinespec.PrimitiveType_DOUBLE},
+						},
+					},
+				},
+				nil,
+				1,
+				"MyPipeline",
+				"a1b2c3d4-a1b2-a1b2-a1b2-a1b2c3d4e5f6",
+			},
+			`"resources":{"limits":{"cpu":"2","memory":"1500M"}}`,
+			`"requests"`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := makePodSpecPatch(tt.args.container, tt.args.componentSpec, tt.args.executorInput, tt.args.executionID, tt.args.pipelineName, tt.args.runID)
+			assert.Nil(t, err)
+			assert.NotEmpty(t, got)
+			if tt.want != "" {
+				assert.Contains(t, got, tt.want)
+			}
+			if tt.notWant != "" {
+				assert.NotContains(t, got, tt.notWant)
+			}
+		})
+	}
+}
diff --git a/backend/third_party_licenses/apiserver.csv b/backend/third_party_licenses/apiserver.csv
@@ -59,7 +59,7 @@ github.com/json-iterator/go,https://github.com/json-iterator/go/blob/v1.1.12/LIC
 github.com/klauspost/compress/flate,https://github.com/klauspost/compress/blob/v1.14.2/LICENSE,Apache-2.0
 github.com/klauspost/cpuid,https://github.com/klauspost/cpuid/blob/v1.3.1/LICENSE,MIT
 github.com/klauspost/pgzip,https://github.com/klauspost/pgzip/blob/v1.2.5/LICENSE,MIT
-github.com/kubeflow/pipelines/api/v2alpha1/go,https://github.com/kubeflow/pipelines/blob/10c4db9ebed9/api/LICENSE,Apache-2.0
+github.com/kubeflow/pipelines/api/v2alpha1/go,https://github.com/kubeflow/pipelines/blob/758c91f76784/api/LICENSE,Apache-2.0
 github.com/kubeflow/pipelines/backend,https://github.com/kubeflow/pipelines/blob/HEAD/LICENSE,Apache-2.0
 github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata,https://github.com/kubeflow/pipelines/blob/e78ed557ddcb/third_party/ml-metadata/LICENSE,Apache-2.0
 github.com/lann/builder,https://github.com/lann/builder/blob/47ae307949d0/LICENSE,MIT

diff --git a/go.mod b/go.mod
@@ -28,7 +28,7 @@ require (
 	github.com/jinzhu/gorm v1.9.1
 	github.com/jinzhu/inflection v1.0.0 // indirect
 	github.com/jinzhu/now v1.1.4 // indirect
-	github.com/kubeflow/pipelines/api v0.0.0-20230313210723-10c4db9ebed9
+	github.com/kubeflow/pipelines/api v0.0.0-20230331215358-758c91f76784
 	github.com/kubeflow/pipelines/third_party/ml-metadata v0.0.0-20220118175555-e78ed557ddcb
 	github.com/lestrrat-go/strftime v1.0.4
 	github.com/mattn/go-sqlite3 v1.14.16

diff --git a/go.sum b/go.sum
diff --git a/samples/core/resource_spec/resource_spec_v2.py b/samples/core/resource_spec/resource_spec_v2.py
@@ -38,6 +38,9 @@ def my_pipeline(n: int = 11234567):
     # Note, with v2 python components, there's a larger memory overhead caused
     # by installing KFP SDK in the component, so we had to increase memory limit to 650M.
     training_task = training_op(n=n).set_cpu_limit('1').set_memory_limit('650M')
+
+    # TODO(gkcalat): enable requests once SDK implements the feature
+    # training_task = training_task.set_cpu_request('1').set_memory_request('650M')
 
     # TODO(Bobgy): other resource specs like cpu requests, memory requests and
     # GPU limits are not available yet: https://github.com/kubeflow/pipelines/issues/6354.