Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SDK - Lightweight - Convert the names of file inputs and outputs #2260

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions sdk/python/kfp/components/_python_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from ._yaml_utils import dump_yaml
from ._components import _create_task_factory_from_component_spec
from ._data_passing import serialize_value, type_name_to_deserializer, type_name_to_serializer, type_to_type_name
from ._naming import _make_name_unique_by_adding_index
from ._structures import *

import inspect
Expand All @@ -38,6 +39,9 @@
T = TypeVar('T')


_FEATURE_STRIP_FILE_IO_NAME_PARTS = True
Ark-kun marked this conversation as resolved.
Show resolved Hide resolved


# InputPath(list) or InputPath('JsonObject')

class InputPath:
Expand Down Expand Up @@ -232,35 +236,53 @@ def annotation_to_type_struct(annotation):
type_name = type_to_type_name[type_name]
return type_name

input_names = set()
output_names = set()
for parameter in parameters:
parameter_annotation = parameter.annotation
passing_style = None
io_name = parameter.name
if isinstance(parameter_annotation, (InputPath, InputTextFile, InputBinaryFile, OutputPath, OutputTextFile, OutputBinaryFile)):
passing_style = type(parameter_annotation)
parameter_annotation = parameter_annotation.type
if parameter.default is not inspect.Parameter.empty:
raise ValueError('Default values for file inputs/outputs are not supported. If you need them for some reason, please create an issue and write about your usage scenario.')
# TODO: Fix the input names: "number_file_path" parameter should be exposed as "number" input
if _FEATURE_STRIP_FILE_IO_NAME_PARTS:
# Removing the "_path" and "_file" suffixes from the input/output names as the argument passed to the component needs to be the data itself, not local file path.
# Problem: When accepting file inputs (outputs), the function inside the component receives file paths (or file streams), so it's natural to call the function parameter "something_file_path" (e.g. model_file_path or number_file_path).
# But from the outside perspective, there are no files or paths - the actual data objects (or references to them) are passed in.
# It looks very strange when argument passing code looks like this: `component(number_file_path=42)`. This looks like an error since 42 is not a path. It's not even a string.
# It's much more natural to strip the names of file inputs and outputs of "_file" or "_path" suffixes. Then the argument passing code will look natural: "component(number=42)".
if isinstance(parameter.annotation, (InputPath, OutputPath)) and io_name.endswith('_path'):
io_name = io_name[0:-len('_path')]
if io_name.endswith('_file'):
io_name = io_name[0:-len('_file')]
type_struct = annotation_to_type_struct(parameter_annotation)
#TODO: Humanize the input/output names

if isinstance(parameter.annotation, (OutputPath, OutputTextFile, OutputBinaryFile)):
io_name = _make_name_unique_by_adding_index(io_name, output_names, '_')
output_names.add(io_name)
output_spec = OutputSpec(
name=parameter.name,
name=io_name,
type=type_struct,
)
output_spec._passing_style = passing_style
output_spec._parameter_name = parameter.name
outputs.append(output_spec)
else:
io_name = _make_name_unique_by_adding_index(io_name, input_names, '_')
input_names.add(io_name)
input_spec = InputSpec(
name=parameter.name,
name=io_name,
type=type_struct,
)
if parameter.default is not inspect.Parameter.empty:
input_spec.optional = True
if parameter.default is not None:
input_spec.default = serialize_value(parameter.default, type_struct)
input_spec._passing_style = passing_style
input_spec._parameter_name = parameter.name
inputs.append(input_spec)

#Analyzing the return type annotations.
Expand All @@ -271,16 +293,21 @@ def annotation_to_type_struct(annotation):
if hasattr(return_ann, '_field_types'):
type_struct = annotation_to_type_struct(return_ann._field_types.get(field_name, None))

output_name = _make_name_unique_by_adding_index(field_name, output_names, '_')
output_names.add(output_name)
output_spec = OutputSpec(
name=field_name,
name=output_name,
type=type_struct,
)
output_spec._passing_style = None
output_spec._return_tuple_field_name = field_name
outputs.append(output_spec)
elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty:
output_name = _make_name_unique_by_adding_index(single_output_name_const, output_names, '_') # Fixes exotic, but possible collision: `def func(output_path: OutputPath()) -> str: ...`
output_names.add(output_name)
type_struct = annotation_to_type_struct(signature.return_annotation)
output_spec = OutputSpec(
name=single_output_name_const,
name=output_name,
type=type_struct,
)
output_spec._passing_style = None
Expand Down Expand Up @@ -398,7 +425,7 @@ def get_serializer_and_register_definitions(type_name) -> str:
is_required = isinstance(input, OutputSpec) or not input.optional
line = '_parser.add_argument("{param_flag}", dest="{param_var}", type={param_type}, required={is_required}, default=argparse.SUPPRESS)'.format(
param_flag=param_flag,
param_var=input.name,
param_var=input._parameter_name, # Not input.name, since the inputs could have been renamed
param_type=get_argparse_type_for_input_file(input._passing_style) or get_deserializer_and_register_definitions(input.type),
is_required=str(is_required),
)
Expand Down
116 changes: 104 additions & 12 deletions sdk/python/tests/components/test_python_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,8 +541,7 @@ def consume_file_path(number_file_path: InputPath(int)) -> int:

self.assertEqual(task_factory.component_spec.inputs[0].type, 'Integer')

# TODO: Fix the input names: "number_file_path" parameter should be exposed as "number" input
self.helper_test_component_using_local_call(task_factory, arguments={'number_file_path': "42"}, expected_output_values={'output': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={'number': "42"}, expected_output_values={'output': '42'})


def test_input_text_file(self):
Expand All @@ -556,8 +555,7 @@ def consume_file_path(number_file: InputTextFile(int)) -> int:

self.assertEqual(task_factory.component_spec.inputs[0].type, 'Integer')

# TODO: Fix the input names: "number_file" parameter should be exposed as "number" input
self.helper_test_component_using_local_call(task_factory, arguments={'number_file': "42"}, expected_output_values={'output': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={'number': "42"}, expected_output_values={'output': '42'})


def test_input_binary_file(self):
Expand All @@ -571,8 +569,7 @@ def consume_file_path(number_file: InputBinaryFile(int)) -> int:

self.assertEqual(task_factory.component_spec.inputs[0].type, 'Integer')

# TODO: Fix the input names: "number_file" parameter should be exposed as "number" input
self.helper_test_component_using_local_call(task_factory, arguments={'number_file': "42"}, expected_output_values={'output': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={'number': "42"}, expected_output_values={'output': '42'})


def test_output_path(self):
Expand All @@ -587,8 +584,7 @@ def write_to_file_path(number_file_path: OutputPath(int)):
self.assertEqual(len(task_factory.component_spec.outputs), 1)
self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer')

# TODO: Fix the output names: "number_file_path" should be exposed as "number" output
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number_file_path': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number': '42'})


def test_output_text_file(self):
Expand All @@ -602,8 +598,7 @@ def write_to_file_path(number_file: OutputTextFile(int)):
self.assertEqual(len(task_factory.component_spec.outputs), 1)
self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer')

# TODO: Fix the output names: "number_file" should be exposed as "number" output
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number_file': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number': '42'})


def test_output_binary_file(self):
Expand All @@ -617,8 +612,105 @@ def write_to_file_path(number_file: OutputBinaryFile(int)):
self.assertEqual(len(task_factory.component_spec.outputs), 1)
self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer')

# TODO: Fix the output names: "number_file" should be exposed as "number" output
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number_file': '42'})
self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={'number': '42'})


def test_file_input_name_conversion(self):
# Checking the input name conversion rules for file inputs:
# For InputPath, the "_path" suffix is removed
# For Input*, the "_file" suffix is removed

from kfp.components import InputPath, InputTextFile, InputBinaryFile, OutputPath, OutputTextFile, OutputBinaryFile
def consume_file_path(
number: int,

number_1a_path: str,
number_1b_file: str,
number_1c_file_path: str,
number_1d_path_file: str,

number_2a_path: InputPath(str),
number_2b_file: InputPath(str),
number_2c_file_path: InputPath(str),
number_2d_path_file: InputPath(str),

number_3a_path: InputTextFile(str),
number_3b_file: InputTextFile(str),
number_3c_file_path: InputTextFile(str),
number_3d_path_file: InputTextFile(str),

number_4a_path: InputBinaryFile(str),
number_4b_file: InputBinaryFile(str),
number_4c_file_path: InputBinaryFile(str),
number_4d_path_file: InputBinaryFile(str),

output_number_2a_path: OutputPath(str),
output_number_2b_file: OutputPath(str),
output_number_2c_file_path: OutputPath(str),
output_number_2d_path_file: OutputPath(str),

output_number_3a_path: OutputTextFile(str),
output_number_3b_file: OutputTextFile(str),
output_number_3c_file_path: OutputTextFile(str),
output_number_3d_path_file: OutputTextFile(str),

output_number_4a_path: OutputBinaryFile(str),
output_number_4b_file: OutputBinaryFile(str),
output_number_4c_file_path: OutputBinaryFile(str),
output_number_4d_path_file: OutputBinaryFile(str),
):
pass

task_factory = comp.func_to_container_op(consume_file_path)
actual_input_names = [input.name for input in task_factory.component_spec.inputs]
actual_output_names = [output.name for output in task_factory.component_spec.outputs]

self.assertEqual(
[
'number',

'number_1a_path',
'number_1b_file',
'number_1c_file_path',
'number_1d_path_file',

'number_2a',
'number_2b',
'number_2c',
'number_2d_path',

'number_3a_path',
'number_3b',
'number_3c_file_path',
'number_3d_path',

'number_4a_path',
'number_4b',
'number_4c_file_path',
'number_4d_path',
],
actual_input_names
)

self.assertEqual(
[
'output_number_2a',
'output_number_2b',
'output_number_2c',
'output_number_2d_path',

'output_number_3a_path',
'output_number_3b',
'output_number_3c_file_path',
'output_number_3d_path',

'output_number_4a_path',
'output_number_4b',
'output_number_4c_file_path',
'output_number_4d_path',
],
actual_output_names
)


def test_end_to_end_python_component_pipeline_compilation(self):
Expand Down