Skip to content

Commit

Permalink
SDK - Made outputs with original names available in ContainerOp.outpu…
Browse files Browse the repository at this point in the history
…ts (#3734)

* SDK - Made outputs with original names available in ContainerOp.outputs

Previously, ContainerOp had strict requirements for the output names, so we had to convert all the names before passing them to the ContainerOp constructor. Outputs with non-pythonic names could not be accessed using their original names.
Now ContainerOp supports any output names, so we're now using the original output names.
However to support legacy pipelines, we're also adding output references with pythonic names.

* Fixed the compiler test data

* Fixed the duplicate parameter outputs in the compiled workflow

* Fixed long line

* Stabilized the output naming conflict resolution

* Fix case of missing special outputs
  • Loading branch information
Ark-kun authored May 13, 2020
1 parent fe30d54 commit 8ba366b
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 76 deletions.
2 changes: 1 addition & 1 deletion sdk/python/kfp/compiler/_op_to_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def _outputs_to_json(op: BaseOp,
else:
value_from_key = "path"
output_parameters = []
for param in outputs.values():
for param in set(outputs.values()): # set() dedupes output references
output_parameters.append({
'name': param.full_name,
'valueFrom': {
Expand Down
27 changes: 15 additions & 12 deletions sdk/python/kfp/dsl/_component_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,14 @@ def _create_container_op_from_component_and_arguments(
arguments=arguments,
)

#Renaming outputs to conform with ContainerOp/Argo
output_names = (resolved_cmd.output_paths or {}).keys()
output_name_to_python = generate_unique_name_conversion_table(output_names, _sanitize_python_function_name)
output_paths_for_container_op = {output_name_to_python[name]: path for name, path in resolved_cmd.output_paths.items()}

container_spec = component_spec.implementation.container

task = dsl.ContainerOp(
name=component_spec.name or _default_component_name,
image=container_spec.image,
command=resolved_cmd.command,
arguments=resolved_cmd.args,
file_outputs=output_paths_for_container_op,
file_outputs=resolved_cmd.output_paths,
artifact_argument_paths=[
dsl.InputArgumentPath(
argument=arguments[input_name],
Expand All @@ -62,18 +57,26 @@ def _create_container_op_from_component_and_arguments(
for input_name, path in resolved_cmd.input_paths.items()
],
)
# Fixing ContainerOp output types
if component_spec.outputs:
for output in component_spec.outputs:
pythonic_name = output_name_to_python[output.name]
if pythonic_name in task.outputs:
task.outputs[pythonic_name].param_type = output.type

component_meta = copy.copy(component_spec)
component_meta.implementation = None
task._set_metadata(component_meta)
task._component_ref = component_ref

# Previously, ContainerOp had strict requirements for the output names, so we had to
# convert all the names before passing them to the ContainerOp constructor.
# Outputs with non-pythonic names could not be accessed using their original names.
# Now ContainerOp supports any output names, so we're now using the original output names.
# However to support legacy pipelines, we're also adding output references with pythonic names.
# TODO: Add warning when people use the legacy output names.
output_names = [output_spec.name for output_spec in component_spec.outputs or []] # Stabilizing the ordering
output_name_to_python = generate_unique_name_conversion_table(output_names, _sanitize_python_function_name)
for output_name in output_names:
pythonic_output_name = output_name_to_python[output_name]
# Note: Some component outputs are currently missing from task.outputs (e.g. MLPipeline UI Metadata)
if pythonic_output_name not in task.outputs and output_name in task.outputs:
task.outputs[pythonic_output_name] = task.outputs[output_name]

if container_spec.env:
from kubernetes import client as k8s_client
for name, value in container_spec.env.items():
Expand Down
13 changes: 7 additions & 6 deletions sdk/python/kfp/dsl/_container_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,9 +1098,13 @@ def _decorated(*args, **kwargs):
name: _pipeline_param.PipelineParam(name, op_name=self.name)
for name in file_outputs.keys()
}

if len(self.outputs) == 1:
self.output = list(self.outputs.values())[0]

# Syntactic sugar: Add task.output attribute if the component has a single output.
# TODO: Currently the "MLPipeline UI Metadata" output is removed from outputs to preserve backwards compatibility.
# Maybe stop excluding it from outputs, but rather exclude it from unique_outputs.
unique_outputs = set(self.outputs.values())
if len(unique_outputs) == 1:
self.output = list(unique_outputs)[0]
else:
self.output = _MultipleOutputsError()

Expand Down Expand Up @@ -1164,9 +1168,6 @@ def _set_metadata(self, metadata):
output_type = output_meta.type
self.outputs[output].param_type = output_type

if len(self.outputs) == 1:
self.output = list(self.outputs.values())[0]

def add_pvolumes(self,
pvolumes: Dict[str, V1Volume] = None):
"""Updates the existing pvolumes dict, extends volumes and volume_mounts
Expand Down
Loading

0 comments on commit 8ba366b

Please sign in to comment.