From ab549efc1efcdf7344e01bd61c8e2ca27b32d9d5 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 14 Mar 2024 15:01:05 -0700 Subject: [PATCH] feat(components): Release Forecasting training pipelines to V1 namespace PiperOrigin-RevId: 615914679 --- components/google-cloud/RELEASE.md | 1 - .../preview/automl/forecasting/__init__.py | 51 +- ...ep_hyperparameter_tuning_job_pipeline.yaml | 4 +- .../wide_and_deep_trainer_pipeline.yaml | 4 +- .../v1/automl/forecasting/__init__.py | 49 - .../learn_to_learn_forecasting_pipeline.yaml | 7586 ----------------- ...ence_to_sequence_forecasting_pipeline.yaml | 7545 ---------------- ...sion_transformer_forecasting_pipeline.yaml | 7531 ---------------- ...es_dense_encoder_forecasting_pipeline.yaml | 7586 ----------------- .../v1/automl/forecasting/utils.py | 920 +- 10 files changed, 45 insertions(+), 31232 deletions(-) delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 8027c394856..7f6e6491917 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,4 @@ ## Upcoming release -* Add `v1.automl.forecasting.learn_to_learn_forecasting_pipeline`, `v1.automl.forecasting.sequence_to_sequence_forecasting_pipeline`, `v1.automl.forecasting.temporal_fusion_transformer_forecasting_pipeline`, `v1.automl.forecasting.time_series_dense_encoder_forecasting_pipeline` as Forecasting on Pipelines moves to GA. * Fix bug in `preview.llm.rlhf_pipeline` that caused wrong output artifact to be used for inference after training. * Fix issue where AutoSxS was not propagating location to all sub-components. * Add CMEK support to `preview.llm.infer_pipeline`. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py index 79bdd605f84..6843d095b53 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py @@ -12,24 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Preview AutoML forecasting components.""" - +"""Experimental AutoML forecasting components.""" import os from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_ensemble import automl_forecasting_ensemble as ForecastingEnsembleOp from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_1_tuner import automl_forecasting_stage_1_tuner as ForecastingStage1TunerOp from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_2_tuner import automl_forecasting_stage_2_tuner as ForecastingStage2TunerOp -from google_cloud_pipeline_components.v1.automl.forecasting import learn_to_learn_forecasting_pipeline -from google_cloud_pipeline_components.v1.automl.forecasting import sequence_to_sequence_forecasting_pipeline -from google_cloud_pipeline_components.v1.automl.forecasting import temporal_fusion_transformer_forecasting_pipeline -from google_cloud_pipeline_components.v1.automl.forecasting import time_series_dense_encoder_forecasting_pipeline -from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_learn_to_learn_forecasting_pipeline_and_parameters -from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_sequence_to_sequence_forecasting_pipeline_and_parameters -from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_temporal_fusion_transformer_forecasting_pipeline_and_parameters -from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_time_series_dense_encoder_forecasting_pipeline_and_parameters +from google_cloud_pipeline_components.preview.automl.forecasting.utils import get_learn_to_learn_forecasting_pipeline_and_parameters +from google_cloud_pipeline_components.preview.automl.forecasting.utils import get_sequence_to_sequence_forecasting_pipeline_and_parameters +from google_cloud_pipeline_components.preview.automl.forecasting.utils import get_temporal_fusion_transformer_forecasting_pipeline_and_parameters +from google_cloud_pipeline_components.preview.automl.forecasting.utils import get_time_series_dense_encoder_forecasting_pipeline_and_parameters from kfp import components - __all__ = [ 'ForecastingEnsembleOp', 'ForecastingStage1TunerOp', @@ -43,3 +37,38 @@ 'temporal_fusion_transformer_forecasting_pipeline', 'time_series_dense_encoder_forecasting_pipeline', ] + +learn_to_learn_forecasting_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join( + os.path.dirname(__file__), 'learn_to_learn_forecasting_pipeline.yaml' + ) +) + +sequence_to_sequence_forecasting_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join( + os.path.dirname(__file__), + 'sequence_to_sequence_forecasting_pipeline.yaml', + ) +) + +temporal_fusion_transformer_forecasting_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join( + os.path.dirname(__file__), + 'temporal_fusion_transformer_forecasting_pipeline.yaml', + ) +) + +time_series_dense_encoder_forecasting_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join( + os.path.dirname(__file__), + 'time_series_dense_encoder_forecasting_pipeline.yaml', + ) +) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml index b0c697bc833..731e7c6b71c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml @@ -49,7 +49,7 @@ # test_fraction: float [Default: -1.0] # tf_auto_transform_features: dict # tf_custom_transformation_definitions: list -# tf_transform_execution_engine: str [Default: 'bigquery'] +# tf_transform_execution_engine: str [Default: ''] # tf_transformations_path: str [Default: ''] # training_fraction: float [Default: -1.0] # transform_dataflow_disk_size_gb: int [Default: 40.0] @@ -3819,7 +3819,7 @@ root: isOptional: true parameterType: LIST tf_transform_execution_engine: - defaultValue: bigquery + defaultValue: '' description: 'Execution engine to run TF-based transformations. Currently supports "dataflow" or "bigquery"' diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml index ce122d5c7be..b6448773b17 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml @@ -65,7 +65,7 @@ # test_fraction: float [Default: -1.0] # tf_auto_transform_features: dict # tf_custom_transformation_definitions: list -# tf_transform_execution_engine: str [Default: 'bigquery'] +# tf_transform_execution_engine: str [Default: ''] # tf_transformations_path: str [Default: ''] # training_fraction: float [Default: -1.0] # transform_dataflow_disk_size_gb: int [Default: 40.0] @@ -3839,7 +3839,7 @@ root: isOptional: true parameterType: LIST tf_transform_execution_engine: - defaultValue: bigquery + defaultValue: '' description: 'Execution engine to run TF-based transformations. Currently supports "dataflow" or "bigquery"' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py index e7b9dbd4f97..d56ec1b4a2b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py @@ -13,18 +13,12 @@ # limitations under the License. """GA AutoML forecasting components.""" -import os from google_cloud_pipeline_components.v1.automl.forecasting.prophet_trainer import prophet_trainer as ProphetTrainerOp from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_bqml_arima_predict_pipeline_and_parameters from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_bqml_arima_train_pipeline_and_parameters -from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_learn_to_learn_forecasting_pipeline_and_parameters from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_prophet_prediction_pipeline_and_parameters from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_prophet_train_pipeline_and_parameters -from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_sequence_to_sequence_forecasting_pipeline_and_parameters -from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_temporal_fusion_transformer_forecasting_pipeline_and_parameters -from google_cloud_pipeline_components.v1.automl.forecasting.utils import get_time_series_dense_encoder_forecasting_pipeline_and_parameters -from kfp import components __all__ = [ 'ProphetTrainerOp', @@ -32,47 +26,4 @@ 'get_bqml_arima_train_pipeline_and_parameters', 'get_prophet_prediction_pipeline_and_parameters', 'get_prophet_train_pipeline_and_parameters', - 'get_learn_to_learn_forecasting_pipeline_and_parameters', - 'get_sequence_to_sequence_forecasting_pipeline_and_parameters', - 'get_temporal_fusion_transformer_forecasting_pipeline_and_parameters', - 'get_time_series_dense_encoder_forecasting_pipeline_and_parameters', - 'learn_to_learn_forecasting_pipeline', - 'sequence_to_sequence_forecasting_pipeline', - 'temporal_fusion_transformer_forecasting_pipeline', - 'time_series_dense_encoder_forecasting_pipeline', ] - -learn_to_learn_forecasting_pipeline = components.load_component_from_file( - # Note, please don't name it as `component.yaml` which will conflict with - # the generated file. - os.path.join( - os.path.dirname(__file__), 'learn_to_learn_forecasting_pipeline.yaml' - ) -) - -sequence_to_sequence_forecasting_pipeline = components.load_component_from_file( - # Note, please don't name it as `component.yaml` which will conflict with - # the generated file. - os.path.join( - os.path.dirname(__file__), - 'sequence_to_sequence_forecasting_pipeline.yaml', - ) -) - -temporal_fusion_transformer_forecasting_pipeline = components.load_component_from_file( - # Note, please don't name it as `component.yaml` which will conflict with - # the generated file. - os.path.join( - os.path.dirname(__file__), - 'temporal_fusion_transformer_forecasting_pipeline.yaml', - ) -) - -time_series_dense_encoder_forecasting_pipeline = components.load_component_from_file( - # Note, please don't name it as `component.yaml` which will conflict with - # the generated file. - os.path.join( - os.path.dirname(__file__), - 'time_series_dense_encoder_forecasting_pipeline.yaml', - ) -) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml deleted file mode 100644 index f2acd9d17f7..00000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +++ /dev/null @@ -1,7586 +0,0 @@ -# PIPELINE DEFINITION -# Name: learn-to-learn-forecasting -# Description: The AutoML Forecasting pipeline. -# Inputs: -# available_at_forecast_columns: list -# context_window: int [Default: 0.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# enable_probabilistic_inference: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# evaluated_examples_bigquery_path: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 22.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# evaluation_batch_predict_max_replica_count: int [Default: 25.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] -# evaluation_dataflow_max_num_workers: int [Default: 25.0] -# evaluation_dataflow_starting_num_workers: int [Default: 22.0] -# fast_testing: bool [Default: False] -# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] -# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] -# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] -# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] -# forecast_horizon: int [Default: 0.0] -# group_columns: list -# group_temporal_total_weight: float [Default: 0.0] -# group_total_weight: float [Default: 0.0] -# holiday_regions: list -# location: str -# model_description: str [Default: ''] -# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] -# num_selected_trials: int [Default: 10.0] -# optimization_objective: str -# parent_model: system.Artifact -# predefined_split_key: str [Default: ''] -# project: str -# quantiles: list -# root_dir: str -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_trainer_worker_pool_specs_override: list -# study_spec_parameters_override: list -# target_column: str -# temporal_total_weight: float [Default: 0.0] -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_attribute_columns: list -# time_series_identifier_columns: list -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transformations: dict -# unavailable_at_forecast_columns: list -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# window_max_count: int [Default: 0.0] -# window_predefined_column: str [Default: ''] -# window_stride_length: int [Default: 0.0] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -components: - comp-automl-forecasting-ensemble: - executorLabel: exec-automl-forecasting-ensemble - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the instance schema, describing the input data - for the tf_model at serving time. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: URI of the Docker image to be used as the container for serving - predictions. This URI must identify an image in Artifact Registry or Container - Registry. - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - example_instance: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: An example instance which may be used as an input for predictions. - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-ensemble-2: - executorLabel: exec-automl-forecasting-ensemble-2 - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the instance schema, describing the input data - for the tf_model at serving time. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: URI of the Docker image to be used as the container for serving - predictions. This URI must identify an image in Artifact Registry or Container - Registry. - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - example_instance: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: An example instance which may be used as an input for predictions. - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-stage-1-tuner: - executorLabel: exec-automl-forecasting-stage-1-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: Number of hours the hyperparameter tuning should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the hyperparameter tuning. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: Number of selected trials. The number of weak learners in the - final model is 5 * num_selected_trials. - parameterType: NUMBER_INTEGER - project: - description: Project to run hyperparameter tuning. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible values: "regular" (default), - "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., [{"parameter_id": "activation","categorical_value_spec": - {"values": ["tanh"]}}]' - isOptional: true - parameterType: LIST - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-stage-2-tuner: - executorLabel: exec-automl-forecasting-stage-2-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The forecasting example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path to the json of hyperparameter tuning results to use when - evaluating models. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Cloud region for running the component: us-central1).' - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: Number of selected trials. The number of weak learners in the - final model. - parameterType: NUMBER_INTEGER - project: - description: Project to run stage 2 tuner. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained (private) model artifact paths and their hyperparameters. - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - tasks: - automl-forecasting-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble - dependentTasks: - - automl-forecasting-stage-2-tuner - - get-prediction-image-uri - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-2-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble - automl-forecasting-stage-2-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-2-tuner - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input_path: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-2-tuner - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: true - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-forecasting-ensemble - - model-upload - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description - get-prediction-image-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri - inputs: - parameters: - model_type: - runtimeValue: - constant: l2l - taskInfo: - name: get-prediction-image-uri - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: get-hyperparameter-tuning-results - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-forecasting-ensemble - - get-or-create-model-description - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - parent_model: - componentInputArtifact: pipelinechannel--parent_model - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - force_runner_mode: - runtimeValue: - constant: Dataflow - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - runtimeValue: - constant: forecasting - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - finalize-eval-quantile-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters - inputs: - parameters: - quantiles: - componentInputParameter: pipelinechannel--quantiles - taskInfo: - name: finalize-eval-quantile-parameters - get-predictions-column: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column - dependentTasks: - - finalize-eval-quantile-parameters - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: true - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: false - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation-forecasting: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting - dependentTasks: - - finalize-eval-quantile-parameters - - get-predictions-column - - model-batch-predict - - table-to-uri - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation-forecasting - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - model-batch-predict - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - use_bq_prefix: - runtimeValue: - constant: true - taskInfo: - name: table-to-uri - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - tasks: - automl-forecasting-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble-2 - dependentTasks: - - automl-forecasting-stage-1-tuner - - get-prediction-image-uri-2 - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-1-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri-2 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble-2 - automl-forecasting-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - reduce_search_space_mode: - runtimeValue: - constant: full - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-1-tuner - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: false - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-forecasting-ensemble-2 - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description-2 - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description-2 - get-prediction-image-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri-2 - inputs: - parameters: - model_type: - runtimeValue: - constant: l2l - taskInfo: - name: get-prediction-image-uri-2 - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - dependentTasks: - - automl-forecasting-ensemble-2 - - get-or-create-model-description-2 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - parent_model: - componentInputArtifact: pipelinechannel--parent_model - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description-2 - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - force_runner_mode: - runtimeValue: - constant: Dataflow - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - runtimeValue: - constant: forecasting - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - finalize-eval-quantile-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters-2 - inputs: - parameters: - quantiles: - componentInputParameter: pipelinechannel--quantiles - taskInfo: - name: finalize-eval-quantile-parameters-2 - get-predictions-column-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: true - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: false - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-forecasting-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - - get-predictions-column-2 - - model-batch-predict-2 - - table-to-uri-2 - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters-2 - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column-2 - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-forecasting-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import-2 - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - use_bq_prefix: - runtimeValue: - constant: true - taskInfo: - name: table-to-uri-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - tasks: - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - forecasting_context_window: - componentInputParameter: pipelinechannel--context_window - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_holiday_regions: - componentInputParameter: pipelinechannel--holiday_regions - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_predefined_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_columns: - componentInputParameter: pipelinechannel--time_series_identifier_columns - forecasting_unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - location: - componentInputParameter: pipelinechannel--location - model_type: - runtimeValue: - constant: l2l - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - stats_gen_execution_engine: - runtimeValue: - constant: bigquery - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--transformations - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: check-if-hyperparameter-tuning-results-are-supplied-by-user - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - context_window: - componentInputParameter: pipelinechannel--context_window - enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_model_type: - runtimeValue: - constant: l2l - forecasting_transformations: - componentInputParameter: pipelinechannel--set-optional-inputs-transformations - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - prediction_type: - runtimeValue: - constant: time_series - quantiles: - componentInputParameter: pipelinechannel--quantiles - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_columns: - componentInputParameter: pipelinechannel--time_series_identifier_columns - unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - artifacts: - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--available_at_forecast_columns: - parameterType: LIST - pipelinechannel--context_window: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_transform_engine_dataflow_machine_type: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--group_columns: - parameterType: LIST - pipelinechannel--group_temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--group_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--holiday_regions: - parameterType: LIST - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-transformations: - parameterType: STRUCT - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_attribute_columns: - parameterType: LIST - pipelinechannel--time_series_identifier_columns: - parameterType: LIST - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transformations: - parameterType: STRUCT - pipelinechannel--unavailable_at_forecast_columns: - parameterType: LIST - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_predefined_column: - parameterType: STRING - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size_gb: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - force_runner_mode: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size_gb: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - force_runner_mode: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: Dataset in "projectId.datasetId" format for storing intermediate-FTE - BigQuery tables. If the specified dataset does not exist in BigQuery, - FTE will create the dataset. If no bigquery_staging_full_dataset_id is - specified, all intermediate tables will be stored in a dataset created - under the provided project in the input data source's location during - FTE execution called "vertex_feature_transform_engine_staging_{location.replace('-', - '_')}". All tables generated by FTE will have a 30 day TTL. - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: BigQuery input data source to run feature transform on. - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: CSV input data source to run feature transform on. - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: The disk size, in gigabytes, to use on each Dataflow worker - instance. If not set, default to 40. - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: The machine type used for dataflow jobs. If not set, default - to n1-standard-16. - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: The number of workers to run the dataflow job. If not set, - default to 25. - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: Custom service account to run Dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty the - default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: Specifies whether Dataflow workers use public IP addresses. - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: 'List of dataset-level custom transformation definitions. Custom, - bring-your-own dataset-level transform functions, where users can define - and import their own transform function and use it with FTE''s built-in - transformations. Using custom transformations is an experimental feature - and it is currently not supported during batch prediction. - - [ { "transformation": "ConcatCols", "module_path": "/path/to/custom_transform_fn_dlt.py", - "function_name": "concat_cols" } ] Using custom transform function together - with FTE''s built-in transformations: .. code-block:: python [ { "transformation": - "Join", "right_table_uri": "bq://test-project.dataset_test.table", "join_keys": - [["join_key_col", "join_key_col"]] },{ "transformation": "ConcatCols", - "cols": ["feature_1", "feature_2"], "output_col": "feature_1_2" } ]' - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level transformations.\n[ { \"transformation\"\ - : \"Join\", \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - , \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }, ... ] Additional\ - \ information about FTE's currently supported built-in\n transformations:\n\ - \ Join: Joins features from right_table_uri. For each join key, the\ - \ left table keys will be included and the right table keys will be dropped.\n\ - \ Example: .. code-block:: python { \"transformation\": \"Join\"\ - , \"right_table_uri\": \"bq://test-project.dataset_test.table\", \"join_keys\"\ - : [[\"join_key_col\", \"join_key_col\"]] }\n Arguments:\n \ - \ right_table_uri: Right table BigQuery uri to join with input_full_table_id.\n\ - \ join_keys: Features to join on. For each nested list, the\ - \ first element is a left table column and the second is its corresponding\ - \ right table column.\n TimeAggregate: Creates a new feature composed\ - \ of values of an existing feature from a fixed time period ago or in\ - \ the future.\n Ex: A feature for sales by store 1 year ago.\n \ - \ Example: .. code-block:: python { \"transformation\": \"TimeAggregate\"\ - , \"time_difference\": 40, \"time_difference_units\": \"DAY\", \"time_series_identifier_columns\"\ - : [\"store_id\"], \"time_column\": \"time_col\", \"time_difference_target_column\"\ - : \"target_col\", \"output_column\": \"output_col\" }\n Arguments:\n\ - \ time_difference: Number of time_difference_units to look\ - \ back or into the future on our time_difference_target_column.\n \ - \ time_difference_units: Units of time_difference to look back\ - \ or into the future on our time_difference_target_column. Must be one\ - \ of * 'DAY' * 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER' * 'YEAR'\n\ - \ time_series_identifier_columns: Names of the time series\ - \ identifier columns.\n time_column: Name of the time column.\n\ - \ time_difference_target_column: Column we wish to get the\ - \ value of time_difference time_difference_units in the past or future.\n\ - \ output_column: Name of our new time aggregate feature.\n\ - \ is_future: Whether we wish to look forward in time. Defaults\ - \ to False. PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\ - \ Performs a partition by reduce operation (one of max, min, avg, or sum)\ - \ with a fixed historic time period. Ex: Getting avg sales (the reduce\ - \ column) for each store (partition_by_column) over the previous 5 days\ - \ (time_column, time_ago_units, and time_ago).\n Example: .. code-block::\ - \ python { \"transformation\": \"PartitionByMax\", \"reduce_column\"\ - : \"sell_price\", \"partition_by_columns\": [\"store_id\", \"state_id\"\ - ], \"time_column\": \"date\", \"time_ago\": 1, \"time_ago_units\": \"\ - WEEK\", \"output_column\": \"partition_by_reduce_max_output\" }\n \ - \ Arguments:\n reduce_column: Column to apply the reduce\ - \ operation on. Reduce operations include the\n following:\ - \ Max, Min, Avg, Sum.\n partition_by_columns: List of columns\ - \ to partition by.\n time_column: Time column for the partition\ - \ by operation's window function.\n time_ago: Number of time_ago_units\ - \ to look back on our target_column, starting from time_column (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on our target_column.\ - \ Must be one of * 'DAY' * 'WEEK'\n output_column: Name of\ - \ our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature selection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\". The algorithms available\ - \ are: AMI(Adjusted Mutual Information):\nReference: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional Mutual\ - \ Information Maximization): Reference paper: Mohamed Bennasar, Yulia\ - \ Hicks, Rossitza Setchi, \u201CFeature selection using Joint Mutual Information\ - \ Maximisation,\u201D Expert Systems with Applications, vol. 42, issue\ - \ 22, 1 December 2015, Pages 8520-8532. JMIM(Joint Mutual Information\ - \ Maximization\nReference:\n paper: Mohamed Bennasar, Yulia Hicks, Rossitza\ - \ Setchi, \u201CFeature selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert Systems with Applications, vol. 42, issue 22, 1 December 2015,\ - \ Pages 8520-8532. MRMR(MIQ Minimum-redundancy Maximum-relevance): Reference\ - \ paper: Hanchuan Peng, Fuhui Long, and Chris Ding. \"Feature selection\ - \ based on mutual information criteria of max-dependency, max-relevance,\ - \ and min-redundancy.\" IEEE Transactions on pattern analysis and machine\ - \ intelligence 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - feature_selection_execution_engine: - defaultValue: dataflow - description: Execution engine to run feature selection, value can be dataflow, - bigquery. - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: Forecasting available at forecast columns. - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the holiday effect - is applied in modeling by adding holiday categorical array feature that - include all holidays matching the date. This option only allowed when - data granularity is day. By default, holiday effect modeling is disabled. - To turn it on, specify the holiday region using this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: Forecasting time series attribute columns. - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - description: '[Deprecated] A forecasting time series identifier column. - Raises an exception if used - use the "time_series_identifier_column" - field instead.' - isOptional: true - parameterType: STRING - forecasting_time_series_identifier_columns: - defaultValue: [] - description: The list of forecasting time series identifier columns. - isOptional: true - parameterType: LIST - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: Forecasting unavailable at forecast columns. - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: The format to use for the materialized examples. Should be - either 'tfrecords_gzip' (default) or 'parquet'. - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: Maximum number of features to select. If specified, the transform - config will be purged by only using the selected features that ranked - top in the feature ranking, which has the ranking value for all supported - features. If the number of input features is smaller than max_selected_features - specified, we will still run the feature selection process and generate - the feature ranking, no features will be excluded. The value will be - set to 1000 by default if run_feature_selection is enabled. - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features for. Can be - one of: neural_network, boosted_trees, l2l, seq2seq, tft, or tide. Defaults - to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: List of multimodal image columns. Defaults to an empty list. - isOptional: true - parameterType: LIST - multimodal_tabular_columns: - defaultValue: [] - description: List of multimodal tabular columns. Defaults to an empty list - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: List of multimodal text columns. Defaults to an empty list - isOptional: true - parameterType: LIST - multimodal_timeseries_columns: - defaultValue: [] - description: List of multimodal timeseries columns. Defaults to an empty - list - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: Model prediction type. One of "classification", "regression", - "time_series". - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: (deprecated) Whether the distillation should be applied to - the training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether the feature selection should be applied to the dataset. - isOptional: true - parameterType: BOOLEAN - stats_gen_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform statistics generation. Can be - one of: "dataflow" (by default) or "bigquery". Using "bigquery" as the - execution engine is experimental.' - isOptional: true - parameterType: STRING - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to TF transform - features. FTE will automatically configure a set of built-in transformations - for each feature based on its data statistics. If users do not want auto - type resolution, but want the set of transformations for a given type - to be automatically generated, they may specify pre-resolved transformations - types. The following type hint dict keys are supported: * ''auto'' * ''categorical'' - * ''numeric'' * ''text'' * ''timestamp'' Example: `{ "auto": ["feature1"], - "categorical": ["feature2", "feature3"], }`. Note that the target and - weight column may not be included as an auto transformation unless users - are running forecasting.' - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: 'List of TensorFlow-based custom transformation definitions. Custom, - bring-your-own transform functions, where users can define and import - their own transform function and use it with FTE''s built-in transformations. - `[ { "transformation": "PlusOne", "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "plus_one_transform" }, { "transformation": "MultiplyTwo", - "module_path": "gs://bucket/custom_transform_fn.py", "function_name": - "multiply_two_transform" } ] Using custom transform function together - with FTE''s built-in transformations: .. code-block:: python [ { "transformation": - "CastToFloat", "input_columns": ["feature_1"], "output_columns": ["feature_1"] - },{ "transformation": "PlusOne", "input_columns": ["feature_1"] "output_columns": - ["feature_1_plused_one"] },{ "transformation": "MultiplyTwo", "input_columns": - ["feature_1"] "output_columns": ["feature_1_multiplied_two"] } ]' - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform row-level TF transformations. - Can be one of: "dataflow" (by default) or "bigquery". Using "bigquery" - as the execution engine is experimental and is for allowlisted customers - only. In addition, executing on "bigquery" only supports auto transformations - (i.e., specified by tf_auto_transform_features) and will raise an error - when tf_custom_transformation_definitions or tf_transformations_path is - set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based transformation configuration. Path\ - \ to a JSON file used to specified FTE's TF transformation configurations.\ - \ In the following, we provide some sample transform configurations to\ - \ demonstrate FTE's capabilities. All transformations on input columns\ - \ are explicitly specified with FTE's built-in transformations. Chaining\ - \ of multiple transformations on a single column is also supported. For\ - \ example: .. code-block:: python [ { \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, { \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]`. Additional information about\ - \ FTE's currently supported built-in\ntransformations:\nDatetime: Extracts\ - \ datetime featues from a column containing timestamp strings.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Datetime\", \"input_columns\"\ - : [\"feature_1\"], \"time_format\": \"%Y-%m-%d\" }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the datetime\ - \ transformation on.\n output_columns: Names of output columns,\ - \ one for each datetime_features element.\n time_format: Datetime\ - \ format string. Time format is a combination of Date + Time Delimiter\ - \ (optional) + Time (optional) directives. Valid date directives are as\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' # 2018/11/30 * '%y-%m-%d'\ - \ # 18-11-30 * '%y/%m/%d' # 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y'\ - \ # 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' # 11/30/18 * '%d-%m-%Y'\ - \ # 30-11-2018 * '%d/%m/%Y' # 30/11/2018 * '%d-%B-%Y' # 30-November-2018\ - \ * '%d-%m-%y' # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' # 30-November-18\ - \ * '%d%m%Y' # 30112018 * '%m%d%Y' # 11302018 * '%Y%m%d' # 20181130\ - \ Valid time delimiters are as follows * 'T' * ' ' Valid time directives\ - \ are as follows * '%H:%M' # 23:59 * '%H:%M:%S' #\n \ - \ 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456] * '%H:%M:%S.%f%z'\ - \ # 23:59:58[.123456]+0000 * '%H:%M:%S%z', # 23:59:58+0000\n \ - \ datetime_features: List of datetime features to be extract. Each entry\ - \ must be one of * 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK' * 'DAY_OF_YEAR'\ - \ * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR' * 'MINUTE' * 'SECOND' Defaults\ - \ to ['YEAR', 'MONTH', 'DAY', 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - Log: Performs the natural log on a numeric column.\n Example: .. code-block::\ - \ python { \"transformation\": \"Log\", \"input_columns\": [\"feature_1\"\ - ] }\n Arguments:\n input_columns: A list with a single column\ - \ to perform the log transformation on.\n output_columns: A list\ - \ with a single output column name, corresponding to the output of our\ - \ transformation.\nZScale: Performs Z-scale normalization on a numeric\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - : \"ZScale\", \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the z-scale\ - \ transformation on.\n output_columns: A list with a single output\ - \ column name, corresponding to the output of our transformation.\nVocabulary:\ - \ Converts strings to integers, where each unique string gets a unique\ - \ integer representation.\n Example: .. code-block:: python { \"\ - transformation\": \"Vocabulary\", \"input_columns\": [\"feature_1\"] }\n\ - \ Arguments:\n input_columns: A list with a single column to\ - \ perform the vocabulary transformation on.\n output_columns: A\ - \ list with a single output column name, corresponding to the output of\ - \ our transformation.\n top_k: Number of the most frequent words\ - \ in the vocabulary to use for generating dictionary lookup indices. If\ - \ not specified, all words in the vocabulary will be used. Defaults to\ - \ None.\n frequency_threshold: Limit the vocabulary only to words\ - \ whose number of occurrences in the input exceeds frequency_threshold.\ - \ If not specified, all words in the vocabulary will be included. If both\ - \ top_k and frequency_threshold are specified, a word must satisfy both\ - \ conditions to be included. Defaults to None.\nCategorical: Transforms\ - \ categorical columns to integer columns.\n Example: .. code-block::\ - \ python { \"transformation\": \"Categorical\", \"input_columns\": [\"\ - feature_1\"], \"top_k\": 10 }\n Arguments:\n input_columns:\ - \ A list with a single column to perform the categorical transformation\ - \ on.\n output_columns: A list with a single output column name,\ - \ corresponding to the output of our transformation.\n top_k: Number\ - \ of the most frequent words in the vocabulary to use for generating dictionary\ - \ lookup indices. If not specified, all words in the vocabulary will be\ - \ used.\n frequency_threshold: Limit the vocabulary only to words\ - \ whose number of occurrences in the input exceeds frequency_threshold.\ - \ If not specified, all words in the vocabulary will be included. If both\ - \ top_k and frequency_threshold are specified, a word must satisfy both\ - \ conditions to be included.\nReduce: Given a column where each entry\ - \ is a numeric array, reduces arrays according to our reduce_mode.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Reduce\"\ - , \"input_columns\": [\"feature_1\"], \"reduce_mode\": \"MEAN\", \"output_columns\"\ - : [\"feature_1_mean\"] }\n Arguments:\n input_columns: A list\ - \ with a single column to perform the reduce transformation on.\n \ - \ output_columns: A list with a single output column name, corresponding\ - \ to the output of our transformation.\n reduce_mode: One of *\ - \ 'MAX' * 'MIN' * 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k:\ - \ The number of last k elements when 'LAST_K' reduce mode is used. Defaults\ - \ to 1.\nSplitString: Given a column of strings, splits strings into token\ - \ arrays.\n Example: .. code-block:: python { \"transformation\"\ - : \"SplitString\", \"input_columns\": [\"feature_1\"], \"separator\":\ - \ \"$\" }\n Arguments:\n input_columns: A list with a single\ - \ column to perform the split string transformation on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\n separator: Separator to split input\ - \ string into tokens. Defaults to ' '.\n missing_token: Missing\ - \ token to use when no string is included. Defaults to ' _MISSING_ '.\n\ - NGram: Given a column of strings, splits strings into token arrays where\ - \ each token is an integer.\n Example: .. code-block:: python { \"\ - transformation\": \"NGram\", \"input_columns\": [\"feature_1\"], \"min_ngram_size\"\ - : 1, \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the n-gram\ - \ transformation on.\n output_columns: A list with a single output\ - \ column name, corresponding to the output of our transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must be a positive number\ - \ and <= max_ngram_size. Defaults to 1.\n max_ngram_size: Maximum\ - \ n-gram size. Must be a positive number and >= min_ngram_size. Defaults\ - \ to 2.\n top_k: Number of the most frequent words in the vocabulary\ - \ to use for generating dictionary lookup indices. If not specified, all\ - \ words in the vocabulary will be used. Defaults to None.\n frequency_threshold:\ - \ Limit the dictionary's vocabulary only to words whose number of occurrences\ - \ in the input exceeds frequency_threshold. If not specified, all words\ - \ in the vocabulary will be included. If both top_k and frequency_threshold\ - \ are specified, a word must satisfy both conditions to be included. Defaults\ - \ to None.\n separator: Separator to split input string into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use when no\ - \ string is included. Defaults to ' _MISSING_ '.\nClip: Given a numeric\ - \ column, clips elements such that elements < min_value are assigned min_value,\ - \ and elements > max_value are assigned max_value.\n Example: .. code-block::\ - \ python { \"transformation\": \"Clip\", \"input_columns\": [\"col1\"\ - ], \"output_columns\": [\"col1_clipped\"], \"min_value\": 1., \"max_value\"\ - : 10., }\n Arguments:\n input_columns: A list with a single\ - \ column to perform the n-gram transformation on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\n min_value: Number where all values below\ - \ min_value are set to min_value. If no min_value is provided, min clipping\ - \ will not occur. Defaults to None.\n max_value: Number where all\ - \ values above max_value are set to max_value If no max_value is provided,\ - \ max clipping will not occur. Defaults to None.\nMultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical array column.\n Example: ..\ - \ code-block:: python { \"transformation\": \"MultiHotEncoding\", \"\ - input_columns\": [\"col1\"], } The number of classes is determened by\ - \ the largest number included in the input if it is numeric or the total\ - \ number of unique values of the input if it is type str. If the input\ - \ is has type str and an element contians separator tokens, the input\ - \ will be split at separator indices, and the each element of the split\ - \ list will be considered a seperate class. For example,\n Input: \ - \ .. code-block:: python [ [\"foo bar\"], # Example 0 [\"foo\",\ - \ \"bar\"], # Example 1 [\"foo\"], # Example 2 [\"bar\"], \ - \ # Example 3 ] Output (with default separator=\" \"): .. code-block::\ - \ python [ [1, 1], # Example 0 [1, 1], # Example 1 [1,\ - \ 0], # Example 2 [0, 1], # Example 3 ]\n Arguments:\n\ - \ input_columns: A list with a single column to perform the multi-hot-encoding\ - \ on.\n output_columns: A list with a single output column name,\ - \ corresponding to the output of our transformation.\n top_k: Number\ - \ of the most frequent words in the vocabulary to use for generating dictionary\ - \ lookup indices. If not specified, all words in the vocabulary will be\ - \ used. Defaults to None.\n frequency_threshold: Limit the dictionary's\ - \ vocabulary only to words whose number of occurrences in the input exceeds\ - \ frequency_threshold. If not specified, all words in the vocabulary will\ - \ be included. If both top_k and frequency_threshold are specified, a\ - \ word must satisfy both conditions to be included. Defaults to None.\n\ - \ separator: Separator to split input string into tokens. Defaults\ - \ to ' '.\nMaxAbsScale: Performs maximum absolute scaling on a numeric\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - : \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\":\ - \ [\"col1_max_abs_scaled\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to perform max-abs-scale on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\nCustom: Transformations defined in tf_custom_transformation_definitions\ - \ are included here in the TensorFlow-based transformation configuration.\ - \ For example, given the following tf_custom_transformation_definitions:\ - \ .. code-block:: python [ { \"transformation\": \"PlusX\", \"module_path\"\ - : \"gs://bucket/custom_transform_fn.py\", \"function_name\": \"plus_one_transform\"\ - \ } ] We can include the following transformation: .. code-block:: python\ - \ { \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"], \"\ - output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note that input_columns\ - \ must still be included in our arguments and output_columns is optional.\ - \ All other arguments are those defined in custom_transform_fn.py, which\ - \ includes `\"x\"` in this case. See tf_custom_transformation_definitions\ - \ above. legacy_transformations_path (Optional[str]) Deprecated. Prefer\ - \ tf_auto_transform_features. Path to a GCS file containing JSON string\ - \ for legacy style transformations. Note that legacy_transformations_path\ - \ and tf_auto_transform_features cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The ranking of features, all features supported in the dataset - will be included. For "AMI" algorithm, array features won't be available - in the ranking as arrays are not supported yet. - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: BigQuery URI for the downsampled test split to pass to the - batch prediction component during batch explain. - parameterType: STRING - bigquery_test_split_uri: - description: BigQuery URI for the test split to pass to the batch prediction - component during evaluation. - parameterType: STRING - bigquery_train_split_uri: - description: BigQuery URI for the train split to pass to the batch prediction - component during distillation. - parameterType: STRING - bigquery_validation_split_uri: - description: BigQuery URI for the validation split to pass to the batch - prediction component during distillation. - parameterType: STRING - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - split_example_counts: - description: JSON string of data split example counts for train, validate, - and test splits. - parameterType: STRING - comp-finalize-eval-quantile-parameters: - executorLabel: exec-finalize-eval-quantile-parameters - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-finalize-eval-quantile-parameters-2: - executorLabel: exec-finalize-eval-quantile-parameters-2 - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-get-or-create-model-description: - executorLabel: exec-get-or-create-model-description - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-or-create-model-description-2: - executorLabel: exec-get-or-create-model-description-2 - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri: - executorLabel: exec-get-prediction-image-uri - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri-2: - executorLabel: exec-get-prediction-image-uri-2 - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column: - executorLabel: exec-get-predictions-column - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column-2: - executorLabel: exec-get-predictions-column-2 - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - `unmanaged_container_model` must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - `prediction__` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - `predictions`, and `errors`. If the Model has both `instance` - - and `prediction` schemata defined then the tables have columns as - - follows: The `predictions` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The `errors` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has [google.rpc.Status](Status) - - represented as a STRUCT, and containing only `code` and - - `message`. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - key_field is not specified. - - When `excluded_fields` is populated, `included_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - `prediction--`, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - `predictions_0001.`, `predictions_0002.`, - - ..., `predictions_N.` are created where `` - - depends on chosen `predictions_format`, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both `instance` and `prediction` schemata defined - - then each such file contains predictions as per the - - `predictions_format`. If prediction for any instance failed - - (partially or completely), then an additional - - `errors_0001.`, `errors_0002.`,..., - - `errors_N.` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional `error` field which as - - value has `google.rpc.Status` containing only `code` and - - `message` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: 'Google Cloud Storage URI(-s) to your instances to run batch - prediction - - on. They must match `instances_format`. May contain wildcards. For more - - information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). - - For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If `instance_type` is `array`, the order of field names in - - `included_fields` also determines the order of the values in the array. - - When `included_fields` is populated, `excluded_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model\naccepts. Vertex\ - \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ - to the specified format. Supported values are:\n`object`: Each input is\ - \ converted to JSON object format.\n * For `bigquery`, each row is converted\ - \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ - \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ - \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ - \ * For `bigquery`, each row is converted to an array. The order\n \ - \ of columns is determined by the BigQuery column order, unless\n \ - \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ - \ is populated.\n `included_fields` must be populated for specifying\ - \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ - \ object,\n `included_fields` must be populated for specifying field\ - \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ - \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ - \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ - \ is the same as `array`. The\n order of columns is the same as defined\ - \ in the file or table, unless\n included_fields is populated.\n * For\ - \ `jsonl`, the prediction instance format is determined by\n each line\ - \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ - \ be converted to\n an object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ record.\n * For `file-list`, each file in the list will be converted\ - \ to an\n object in the format of `{\"b64\": }`, where ``\ - \ is\n the Base64-encoded string of the content of the file." - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: 'The format in which instances are - - given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s - supportedInputStorageFormats. - - For more details about this input config, see - - [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ - \ In addition,\nthe batch prediction output will not include the instances.\ - \ Instead the\noutput will only include the value of the key field, in\ - \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ - \ output will have a `key` field\n instead of the `instance` field.\n\ - \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ - \ column instead of the instance feature columns.\nThe input must be\ - \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: Location for creating the BatchPredictionJob. - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: 'The format in which Vertex AI gives the predictions. Must - be one of the - - Model''s supportedOutputStorageFormats. - - For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' - isOptional: true - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - description: Project to create the BatchPredictionJob. Defaults to the project - in which the PipelineJob is run. - isOptional: true - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - `unmanaged_container_model` must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - `prediction__` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - `predictions`, and `errors`. If the Model has both `instance` - - and `prediction` schemata defined then the tables have columns as - - follows: The `predictions` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The `errors` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has [google.rpc.Status](Status) - - represented as a STRUCT, and containing only `code` and - - `message`. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - key_field is not specified. - - When `excluded_fields` is populated, `included_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - `prediction--`, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - `predictions_0001.`, `predictions_0002.`, - - ..., `predictions_N.` are created where `` - - depends on chosen `predictions_format`, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both `instance` and `prediction` schemata defined - - then each such file contains predictions as per the - - `predictions_format`. If prediction for any instance failed - - (partially or completely), then an additional - - `errors_0001.`, `errors_0002.`,..., - - `errors_N.` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional `error` field which as - - value has `google.rpc.Status` containing only `code` and - - `message` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: 'Google Cloud Storage URI(-s) to your instances to run batch - prediction - - on. They must match `instances_format`. May contain wildcards. For more - - information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). - - For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If `instance_type` is `array`, the order of field names in - - `included_fields` also determines the order of the values in the array. - - When `included_fields` is populated, `excluded_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model\naccepts. Vertex\ - \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ - to the specified format. Supported values are:\n`object`: Each input is\ - \ converted to JSON object format.\n * For `bigquery`, each row is converted\ - \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ - \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ - \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ - \ * For `bigquery`, each row is converted to an array. The order\n \ - \ of columns is determined by the BigQuery column order, unless\n \ - \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ - \ is populated.\n `included_fields` must be populated for specifying\ - \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ - \ object,\n `included_fields` must be populated for specifying field\ - \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ - \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ - \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ - \ is the same as `array`. The\n order of columns is the same as defined\ - \ in the file or table, unless\n included_fields is populated.\n * For\ - \ `jsonl`, the prediction instance format is determined by\n each line\ - \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ - \ be converted to\n an object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ record.\n * For `file-list`, each file in the list will be converted\ - \ to an\n object in the format of `{\"b64\": }`, where ``\ - \ is\n the Base64-encoded string of the content of the file." - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: 'The format in which instances are - - given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s - supportedInputStorageFormats. - - For more details about this input config, see - - [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ - \ In addition,\nthe batch prediction output will not include the instances.\ - \ Instead the\noutput will only include the value of the key field, in\ - \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ - \ output will have a `key` field\n instead of the `instance` field.\n\ - \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ - \ column instead of the instance feature columns.\nThe input must be\ - \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: Location for creating the BatchPredictionJob. - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: 'The format in which Vertex AI gives the predictions. Must - be one of the - - Model''s supportedOutputStorageFormats. - - For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' - isOptional: true - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - description: Project to create the BatchPredictionJob. Defaults to the project - in which the PipelineJob is run. - isOptional: true - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation-forecasting: - executorLabel: exec-model-evaluation-forecasting - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-forecasting-2: - executorLabel: exec-model-evaluation-forecasting-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationClassificationOp component.' - isOptional: true - embedding_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The embedding metrics artifact generated from the - - embedding retrieval metrics component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'google.ForecastingMetrics artifact generated from - - the ModelEvaluationForecastingOp component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.QuestionAnsweringMetrics.' - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationRegressionOp component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.SummarizationMetrics.' - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.TextGenerationMetrics.' - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, `forecasting`, - - `text-generation`, `question-answering`, and `summarization` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - evaluation_resource_name: - parameterType: STRING - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationClassificationOp component.' - isOptional: true - embedding_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The embedding metrics artifact generated from the - - embedding retrieval metrics component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'google.ForecastingMetrics artifact generated from - - the ModelEvaluationForecastingOp component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.QuestionAnsweringMetrics.' - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationRegressionOp component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.SummarizationMetrics.' - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.TextGenerationMetrics.' - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, `forecasting`, - - `text-generation`, `question-answering`, and `summarization` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - evaluation_resource_name: - parameterType: STRING - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - stats_gen_execution_engine: - description: Execution engine used for stats gen in FTE. - parameterType: STRING - transformations: - description: forecasting transformations to append stats gen engine to. - parameterType: STRUCT - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - transformations: - parameterType: STRUCT - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Dataset stats generated by feature transform engine. - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Schema of input data to the tf_model at serving time. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: The names of the columns that are available at forecast time. - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: If probabilistic inference is enabled, the model will fit a - distribution that captures the uncertainty of a prediction. At inference - time, the predictive distribution is used to make a point prediction that - minimizes the optimization objective. For example, the mean of a predictive - distribution is the point prediction that minimizes RMSE loss. If quantiles - are specified, then the quantiles of the distribution are also returned. - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: Dict mapping auto and/or type-resolutions to feature columns. - The supported types are auto, categorical, numeric, text, and timestamp. - isOptional: true - parameterType: STRUCT - group_columns: - description: A list of time series attribute column names that define the - time series hierarchy. - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over both - the horizon and time series in the same hierarchy group. - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over time - series in the same group. - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: 'Objective function the model is optimizing towards. The training - process creates a model that maximizes/minimizes the value of the objective - function over the validation set. The supported optimization objectives - depend on the prediction type. If the field is not set, a default objective - function is used. classification: "maximize-au-roc" (default) - Maximize - the area under the receiver operating characteristic (ROC) curve. "minimize-log-loss" - - Minimize log loss. "maximize-au-prc" - Maximize the area under the precision-recall - curve. "maximize-precision-at-recall" - Maximize precision for a specified - recall value. "maximize-recall-at-precision" - Maximize recall for a specified - precision value. classification (multi-class): "minimize-log-loss" (default) - - Minimize log loss. regression: "minimize-rmse" (default) - Minimize - root-mean-squared error (RMSE). "minimize-mae" - Minimize mean-absolute - error (MAE). "minimize-rmsle" - Minimize root-mean-squared log error - (RMSLE).' - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: Required when optimization_objective is "maximize-recall-at-precision". - Must be between 0 and 1, inclusive. - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: Required when optimization_objective is "maximize-precision-at-recall". - Must be between 0 and 1, inclusive. - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: Model prediction type. One of "classification", "regression", - "time_series". - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: Whether the distillation should be applied to the training. - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: Whether we are running evaluation in the training pipeline. - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: JSON string of data split example counts for train, validate, - and test splits. - parameterType: STRING - stage_1_deadline_hours: - description: Stage 1 training budget in hours. - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: Stage 2 training budget in hours. - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over the - horizon for a single time series. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: The column that indicates the time. Used by forecasting only. - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: The column names of the time series attributes. - isOptional: true - parameterType: LIST - time_series_identifier_column: - description: '[Deprecated] The time series identifier column. Used by forecasting - only. Raises exception if used - use the "time_series_identifier_column" - field instead.' - isOptional: true - parameterType: STRING - time_series_identifier_columns: - defaultValue: [] - description: The list of time series identifier columns. Used by forecasting - only. - isOptional: true - parameterType: LIST - unavailable_at_forecast_columns: - defaultValue: [] - description: The names of the columns that are not available at forecast - time. - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-forecasting-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, - "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", - "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", - "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", - "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", - "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", - "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", - "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", - "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", - "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, - "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", - "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", - "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", - "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", - "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", - "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", - "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", - "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", - "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-2-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20240214_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_runner_mode - - '{{$.inputs.parameters[''force_runner_mode'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_runner_mode - - '{{$.inputs.parameters[''force_runner_mode'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", - "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' - - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' - - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-finalize-eval-quantile-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-finalize-eval-quantile-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-or-create-model-description: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n if original_description:\n\ - \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ - \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ - \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-or-create-model-description-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n if original_description:\n\ - \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ - \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ - \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-prediction-image-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20240214_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20240214_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20240214_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20240214_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-prediction-image-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20240214_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20240214_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20240214_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20240214_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-predictions-column: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-predictions-column-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-evaluation-forecasting: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-forecasting-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", - "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --evaluation_resource_name - - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", - "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --evaluation_resource_name - - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", - "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.17 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", - "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.17 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n stats_gen_execution_engine: str,\n transformations: dict,\n\ - ) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ('transformations', dict),\n ],\n):\n \"\"\"Get the\ - \ data source URI.\n\n Args:\n project: The GCP project that runs the\ - \ pipeline components.\n location: The GCP region that runs the pipeline\ - \ components.\n data_source_csv_filenames: The CSV GCS path when data\ - \ source is CSV.\n data_source_bigquery_table_path: The BigQuery table\ - \ when data source is BQ.\n vertex_dataset: The Vertex dataset when data\ - \ source is Vertex dataset.\n model_display_name: The uploaded model's\ - \ display name.\n stats_gen_execution_engine: Execution engine used for\ - \ stats gen in FTE.\n transformations: forecasting transformations to\ - \ append stats gen engine to.\n\n Returns:\n A named tuple of CSV or\ - \ BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n # TODO(b/261504514) Remove this handling when we use the FTE transform\ - \ config.\n transformations['stats_gen_execution_engine'] = stats_gen_execution_engine\n\ - \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ - \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ - \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ - \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ - \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ - \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ - \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ - \ return collections.namedtuple(\n 'Outputs',\n [\n \ - \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n 'transformations',\n ],\n\ - \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ - \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": - ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' - - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 -pipelineInfo: - description: The AutoML Forecasting pipeline. - name: learn-to-learn-forecasting -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - artifacts: - pipelinechannel--parent_model: - componentInputArtifact: parent_model - parameters: - pipelinechannel--available_at_forecast_columns: - componentInputParameter: available_at_forecast_columns - pipelinechannel--context_window: - componentInputParameter: context_window - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - componentInputParameter: feature_transform_engine_dataflow_disk_size_gb - pipelinechannel--feature_transform_engine_dataflow_machine_type: - componentInputParameter: feature_transform_engine_dataflow_machine_type - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - componentInputParameter: feature_transform_engine_dataflow_max_num_workers - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--group_columns: - componentInputParameter: group_columns - pipelinechannel--group_temporal_total_weight: - componentInputParameter: group_temporal_total_weight - pipelinechannel--group_total_weight: - componentInputParameter: group_total_weight - pipelinechannel--holiday_regions: - componentInputParameter: holiday_regions - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--model_display_name: - componentInputParameter: model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: num_selected_trials - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--quantiles: - componentInputParameter: quantiles - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-transformations: - taskOutputParameter: - outputParameterKey: transformations - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: stage_2_trainer_worker_pool_specs_override - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--temporal_total_weight: - componentInputParameter: temporal_total_weight - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_attribute_columns: - componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_columns: - componentInputParameter: time_series_identifier_columns - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--unavailable_at_forecast_columns: - componentInputParameter: unavailable_at_forecast_columns - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_predefined_column: - componentInputParameter: window_predefined_column - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - stats_gen_execution_engine: - runtimeValue: - constant: bigquery - transformations: - componentInputParameter: transformations - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Vertex Model to upload this model as a version to. - isOptional: true - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - available_at_forecast_columns: - description: 'The columns that are available at the - - forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: 0.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: The full service account name. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: The dataflow subnetwork. - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: '`True` to enable dataflow public IPs.' - isOptional: true - parameterType: BOOLEAN - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is enabled, the - - model will fit a distribution that captures the uncertainty of a - - prediction. If quantiles are specified, then the quantiles of the - - distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluated_examples_bigquery_path: - defaultValue: '' - description: 'The bigquery dataset to write the - - predicted examples into for evaluation, in the format - - `bq://project.dataset`. Only necessary if evaluation is enabled.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 22.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 22.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the batch prediction - - job in evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The maximum count of replicas - - the batch prediction job can scale to.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'Number of replicas to use - - in the batch prediction cluster at startup time.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: The disk space in GB for dataflow. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the dataflow job in - - evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 25.0 - description: Maximum number of dataflow workers. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 22.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - feature_transform_engine_bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The full id of - - the feature transform engine staging dataset.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size of the - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_transform_engine_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type of - - the feature transform engine.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon: - defaultValue: 0.0 - description: The length of the horizon. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - description: 'A list of time series attribute column names that define the - - time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions - - aggregated over both the horizon and time series in the same hierarchy - - group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated over - - time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - holiday_regions: - description: 'The geographical regions where the holiday effect is - - applied in modeling.' - isOptional: true - parameterType: LIST - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_description: - defaultValue: '' - description: Optional description. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - description: Optional display name for model. - isOptional: true - parameterType: STRING - num_selected_trials: - defaultValue: 10.0 - description: Number of selected trails. - isOptional: true - parameterType: NUMBER_INTEGER - optimization_objective: - description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", - - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - - "minimize-quantile-loss".' - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - quantiles: - description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles - - are allowed of values between 0 and 1, exclusive. Represents the quantiles - - to use for that objective. Quantiles must be unique.' - isOptional: true - parameterType: LIST - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: '`True` to evaluate the ensembled model on the test split.' - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 2 trainer worker pool spec.' - isOptional: true - parameterType: LIST - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated - - over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: The column that indicates the time. - parameterType: STRING - time_series_attribute_columns: - description: 'The columns that are invariant across the - - same time series.' - isOptional: true - parameterType: LIST - time_series_identifier_columns: - description: 'The columns that distinguish the different - - time series.' - parameterType: LIST - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Dict mapping auto and/or type-resolutions to feature - - columns. The supported types are: auto, categorical, numeric, text, and - - timestamp.' - parameterType: STRUCT - unavailable_at_forecast_columns: - description: 'The columns that are unavailable at the - - forecast time.' - isOptional: true - parameterType: LIST - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: 0.0 - description: The maximum number of windows that will be generated. - isOptional: true - parameterType: NUMBER_INTEGER - window_predefined_column: - defaultValue: '' - description: The column that indicate the start of each window. - isOptional: true - parameterType: STRING - window_stride_length: - defaultValue: 0.0 - description: The stride length to generate the window. - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml deleted file mode 100644 index be422014b4d..00000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +++ /dev/null @@ -1,7545 +0,0 @@ -# PIPELINE DEFINITION -# Name: sequence-to-sequence-forecasting -# Description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. -# Inputs: -# available_at_forecast_columns: list -# context_window: int [Default: 0.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# encryption_spec_key_name: str [Default: ''] -# evaluated_examples_bigquery_path: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 22.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# evaluation_batch_predict_max_replica_count: int [Default: 25.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] -# evaluation_dataflow_max_num_workers: int [Default: 25.0] -# evaluation_dataflow_starting_num_workers: int [Default: 22.0] -# fast_testing: bool [Default: False] -# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] -# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] -# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] -# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] -# forecast_horizon: int [Default: 0.0] -# group_columns: list -# group_temporal_total_weight: float [Default: 0.0] -# group_total_weight: float [Default: 0.0] -# holiday_regions: list -# location: str -# model_description: str [Default: ''] -# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] -# num_selected_trials: int [Default: 10.0] -# optimization_objective: str -# parent_model: system.Artifact -# predefined_split_key: str [Default: ''] -# project: str -# root_dir: str -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_trainer_worker_pool_specs_override: list -# study_spec_parameters_override: list -# target_column: str -# temporal_total_weight: float [Default: 0.0] -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_attribute_columns: list -# time_series_identifier_columns: list -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transformations: dict -# unavailable_at_forecast_columns: list -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# window_max_count: int [Default: 0.0] -# window_predefined_column: str [Default: ''] -# window_stride_length: int [Default: 0.0] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -components: - comp-automl-forecasting-ensemble: - executorLabel: exec-automl-forecasting-ensemble - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the instance schema, describing the input data - for the tf_model at serving time. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: URI of the Docker image to be used as the container for serving - predictions. This URI must identify an image in Artifact Registry or Container - Registry. - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - example_instance: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: An example instance which may be used as an input for predictions. - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-ensemble-2: - executorLabel: exec-automl-forecasting-ensemble-2 - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the instance schema, describing the input data - for the tf_model at serving time. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: URI of the Docker image to be used as the container for serving - predictions. This URI must identify an image in Artifact Registry or Container - Registry. - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - example_instance: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: An example instance which may be used as an input for predictions. - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-stage-1-tuner: - executorLabel: exec-automl-forecasting-stage-1-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: Number of hours the hyperparameter tuning should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the hyperparameter tuning. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: Number of selected trials. The number of weak learners in the - final model is 5 * num_selected_trials. - parameterType: NUMBER_INTEGER - project: - description: Project to run hyperparameter tuning. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible values: "regular" (default), - "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., [{"parameter_id": "activation","categorical_value_spec": - {"values": ["tanh"]}}]' - isOptional: true - parameterType: LIST - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-stage-2-tuner: - executorLabel: exec-automl-forecasting-stage-2-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The forecasting example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path to the json of hyperparameter tuning results to use when - evaluating models. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Cloud region for running the component: us-central1).' - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: Number of selected trials. The number of weak learners in the - final model. - parameterType: NUMBER_INTEGER - project: - description: Project to run stage 2 tuner. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained (private) model artifact paths and their hyperparameters. - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - tasks: - automl-forecasting-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble - dependentTasks: - - automl-forecasting-stage-2-tuner - - get-prediction-image-uri - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-2-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble - automl-forecasting-stage-2-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-2-tuner - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input_path: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-2-tuner - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: true - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-forecasting-ensemble - - model-upload - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description - get-prediction-image-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri - inputs: - parameters: - model_type: - runtimeValue: - constant: seq2seq - taskInfo: - name: get-prediction-image-uri - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: get-hyperparameter-tuning-results - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-forecasting-ensemble - - get-or-create-model-description - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - parent_model: - componentInputArtifact: pipelinechannel--parent_model - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - force_runner_mode: - runtimeValue: - constant: Dataflow - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - runtimeValue: - constant: forecasting - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - finalize-eval-quantile-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters - inputs: - parameters: - quantiles: - runtimeValue: - constant: [] - taskInfo: - name: finalize-eval-quantile-parameters - get-predictions-column: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column - dependentTasks: - - finalize-eval-quantile-parameters - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: true - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: false - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation-forecasting: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting - dependentTasks: - - finalize-eval-quantile-parameters - - get-predictions-column - - model-batch-predict - - table-to-uri - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation-forecasting - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - model-batch-predict - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - use_bq_prefix: - runtimeValue: - constant: true - taskInfo: - name: table-to-uri - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - tasks: - automl-forecasting-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble-2 - dependentTasks: - - automl-forecasting-stage-1-tuner - - get-prediction-image-uri-2 - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-1-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri-2 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble-2 - automl-forecasting-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - reduce_search_space_mode: - runtimeValue: - constant: full - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-1-tuner - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: false - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-forecasting-ensemble-2 - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description-2 - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description-2 - get-prediction-image-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri-2 - inputs: - parameters: - model_type: - runtimeValue: - constant: seq2seq - taskInfo: - name: get-prediction-image-uri-2 - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - dependentTasks: - - automl-forecasting-ensemble-2 - - get-or-create-model-description-2 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - parent_model: - componentInputArtifact: pipelinechannel--parent_model - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description-2 - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - force_runner_mode: - runtimeValue: - constant: Dataflow - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - runtimeValue: - constant: forecasting - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - finalize-eval-quantile-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters-2 - inputs: - parameters: - quantiles: - runtimeValue: - constant: [] - taskInfo: - name: finalize-eval-quantile-parameters-2 - get-predictions-column-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: true - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: false - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-forecasting-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - - get-predictions-column-2 - - model-batch-predict-2 - - table-to-uri-2 - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters-2 - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column-2 - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-forecasting-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import-2 - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - use_bq_prefix: - runtimeValue: - constant: true - taskInfo: - name: table-to-uri-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - tasks: - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - forecasting_context_window: - componentInputParameter: pipelinechannel--context_window - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_holiday_regions: - componentInputParameter: pipelinechannel--holiday_regions - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_predefined_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_columns: - componentInputParameter: pipelinechannel--time_series_identifier_columns - forecasting_unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - location: - componentInputParameter: pipelinechannel--location - model_type: - runtimeValue: - constant: seq2seq - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - stats_gen_execution_engine: - runtimeValue: - constant: bigquery - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--transformations - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: check-if-hyperparameter-tuning-results-are-supplied-by-user - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - context_window: - componentInputParameter: pipelinechannel--context_window - enable_probabilistic_inference: - runtimeValue: - constant: false - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_model_type: - runtimeValue: - constant: seq2seq - forecasting_transformations: - componentInputParameter: pipelinechannel--set-optional-inputs-transformations - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - prediction_type: - runtimeValue: - constant: time_series - quantiles: - runtimeValue: - constant: [] - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_columns: - componentInputParameter: pipelinechannel--time_series_identifier_columns - unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - artifacts: - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--available_at_forecast_columns: - parameterType: LIST - pipelinechannel--context_window: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_transform_engine_dataflow_machine_type: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--group_columns: - parameterType: LIST - pipelinechannel--group_temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--group_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--holiday_regions: - parameterType: LIST - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-transformations: - parameterType: STRUCT - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_attribute_columns: - parameterType: LIST - pipelinechannel--time_series_identifier_columns: - parameterType: LIST - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transformations: - parameterType: STRUCT - pipelinechannel--unavailable_at_forecast_columns: - parameterType: LIST - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_predefined_column: - parameterType: STRING - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size_gb: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - force_runner_mode: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size_gb: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - force_runner_mode: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: Dataset in "projectId.datasetId" format for storing intermediate-FTE - BigQuery tables. If the specified dataset does not exist in BigQuery, - FTE will create the dataset. If no bigquery_staging_full_dataset_id is - specified, all intermediate tables will be stored in a dataset created - under the provided project in the input data source's location during - FTE execution called "vertex_feature_transform_engine_staging_{location.replace('-', - '_')}". All tables generated by FTE will have a 30 day TTL. - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: BigQuery input data source to run feature transform on. - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: CSV input data source to run feature transform on. - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: The disk size, in gigabytes, to use on each Dataflow worker - instance. If not set, default to 40. - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: The machine type used for dataflow jobs. If not set, default - to n1-standard-16. - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: The number of workers to run the dataflow job. If not set, - default to 25. - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: Custom service account to run Dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty the - default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: Specifies whether Dataflow workers use public IP addresses. - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: 'List of dataset-level custom transformation definitions. Custom, - bring-your-own dataset-level transform functions, where users can define - and import their own transform function and use it with FTE''s built-in - transformations. Using custom transformations is an experimental feature - and it is currently not supported during batch prediction. - - [ { "transformation": "ConcatCols", "module_path": "/path/to/custom_transform_fn_dlt.py", - "function_name": "concat_cols" } ] Using custom transform function together - with FTE''s built-in transformations: .. code-block:: python [ { "transformation": - "Join", "right_table_uri": "bq://test-project.dataset_test.table", "join_keys": - [["join_key_col", "join_key_col"]] },{ "transformation": "ConcatCols", - "cols": ["feature_1", "feature_2"], "output_col": "feature_1_2" } ]' - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level transformations.\n[ { \"transformation\"\ - : \"Join\", \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - , \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }, ... ] Additional\ - \ information about FTE's currently supported built-in\n transformations:\n\ - \ Join: Joins features from right_table_uri. For each join key, the\ - \ left table keys will be included and the right table keys will be dropped.\n\ - \ Example: .. code-block:: python { \"transformation\": \"Join\"\ - , \"right_table_uri\": \"bq://test-project.dataset_test.table\", \"join_keys\"\ - : [[\"join_key_col\", \"join_key_col\"]] }\n Arguments:\n \ - \ right_table_uri: Right table BigQuery uri to join with input_full_table_id.\n\ - \ join_keys: Features to join on. For each nested list, the\ - \ first element is a left table column and the second is its corresponding\ - \ right table column.\n TimeAggregate: Creates a new feature composed\ - \ of values of an existing feature from a fixed time period ago or in\ - \ the future.\n Ex: A feature for sales by store 1 year ago.\n \ - \ Example: .. code-block:: python { \"transformation\": \"TimeAggregate\"\ - , \"time_difference\": 40, \"time_difference_units\": \"DAY\", \"time_series_identifier_columns\"\ - : [\"store_id\"], \"time_column\": \"time_col\", \"time_difference_target_column\"\ - : \"target_col\", \"output_column\": \"output_col\" }\n Arguments:\n\ - \ time_difference: Number of time_difference_units to look\ - \ back or into the future on our time_difference_target_column.\n \ - \ time_difference_units: Units of time_difference to look back\ - \ or into the future on our time_difference_target_column. Must be one\ - \ of * 'DAY' * 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER' * 'YEAR'\n\ - \ time_series_identifier_columns: Names of the time series\ - \ identifier columns.\n time_column: Name of the time column.\n\ - \ time_difference_target_column: Column we wish to get the\ - \ value of time_difference time_difference_units in the past or future.\n\ - \ output_column: Name of our new time aggregate feature.\n\ - \ is_future: Whether we wish to look forward in time. Defaults\ - \ to False. PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\ - \ Performs a partition by reduce operation (one of max, min, avg, or sum)\ - \ with a fixed historic time period. Ex: Getting avg sales (the reduce\ - \ column) for each store (partition_by_column) over the previous 5 days\ - \ (time_column, time_ago_units, and time_ago).\n Example: .. code-block::\ - \ python { \"transformation\": \"PartitionByMax\", \"reduce_column\"\ - : \"sell_price\", \"partition_by_columns\": [\"store_id\", \"state_id\"\ - ], \"time_column\": \"date\", \"time_ago\": 1, \"time_ago_units\": \"\ - WEEK\", \"output_column\": \"partition_by_reduce_max_output\" }\n \ - \ Arguments:\n reduce_column: Column to apply the reduce\ - \ operation on. Reduce operations include the\n following:\ - \ Max, Min, Avg, Sum.\n partition_by_columns: List of columns\ - \ to partition by.\n time_column: Time column for the partition\ - \ by operation's window function.\n time_ago: Number of time_ago_units\ - \ to look back on our target_column, starting from time_column (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on our target_column.\ - \ Must be one of * 'DAY' * 'WEEK'\n output_column: Name of\ - \ our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature selection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\". The algorithms available\ - \ are: AMI(Adjusted Mutual Information):\nReference: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional Mutual\ - \ Information Maximization): Reference paper: Mohamed Bennasar, Yulia\ - \ Hicks, Rossitza Setchi, \u201CFeature selection using Joint Mutual Information\ - \ Maximisation,\u201D Expert Systems with Applications, vol. 42, issue\ - \ 22, 1 December 2015, Pages 8520-8532. JMIM(Joint Mutual Information\ - \ Maximization\nReference:\n paper: Mohamed Bennasar, Yulia Hicks, Rossitza\ - \ Setchi, \u201CFeature selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert Systems with Applications, vol. 42, issue 22, 1 December 2015,\ - \ Pages 8520-8532. MRMR(MIQ Minimum-redundancy Maximum-relevance): Reference\ - \ paper: Hanchuan Peng, Fuhui Long, and Chris Ding. \"Feature selection\ - \ based on mutual information criteria of max-dependency, max-relevance,\ - \ and min-redundancy.\" IEEE Transactions on pattern analysis and machine\ - \ intelligence 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - feature_selection_execution_engine: - defaultValue: dataflow - description: Execution engine to run feature selection, value can be dataflow, - bigquery. - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: Forecasting available at forecast columns. - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the holiday effect - is applied in modeling by adding holiday categorical array feature that - include all holidays matching the date. This option only allowed when - data granularity is day. By default, holiday effect modeling is disabled. - To turn it on, specify the holiday region using this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: Forecasting time series attribute columns. - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - description: '[Deprecated] A forecasting time series identifier column. - Raises an exception if used - use the "time_series_identifier_column" - field instead.' - isOptional: true - parameterType: STRING - forecasting_time_series_identifier_columns: - defaultValue: [] - description: The list of forecasting time series identifier columns. - isOptional: true - parameterType: LIST - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: Forecasting unavailable at forecast columns. - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: The format to use for the materialized examples. Should be - either 'tfrecords_gzip' (default) or 'parquet'. - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: Maximum number of features to select. If specified, the transform - config will be purged by only using the selected features that ranked - top in the feature ranking, which has the ranking value for all supported - features. If the number of input features is smaller than max_selected_features - specified, we will still run the feature selection process and generate - the feature ranking, no features will be excluded. The value will be - set to 1000 by default if run_feature_selection is enabled. - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features for. Can be - one of: neural_network, boosted_trees, l2l, seq2seq, tft, or tide. Defaults - to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: List of multimodal image columns. Defaults to an empty list. - isOptional: true - parameterType: LIST - multimodal_tabular_columns: - defaultValue: [] - description: List of multimodal tabular columns. Defaults to an empty list - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: List of multimodal text columns. Defaults to an empty list - isOptional: true - parameterType: LIST - multimodal_timeseries_columns: - defaultValue: [] - description: List of multimodal timeseries columns. Defaults to an empty - list - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: Model prediction type. One of "classification", "regression", - "time_series". - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: (deprecated) Whether the distillation should be applied to - the training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether the feature selection should be applied to the dataset. - isOptional: true - parameterType: BOOLEAN - stats_gen_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform statistics generation. Can be - one of: "dataflow" (by default) or "bigquery". Using "bigquery" as the - execution engine is experimental.' - isOptional: true - parameterType: STRING - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to TF transform - features. FTE will automatically configure a set of built-in transformations - for each feature based on its data statistics. If users do not want auto - type resolution, but want the set of transformations for a given type - to be automatically generated, they may specify pre-resolved transformations - types. The following type hint dict keys are supported: * ''auto'' * ''categorical'' - * ''numeric'' * ''text'' * ''timestamp'' Example: `{ "auto": ["feature1"], - "categorical": ["feature2", "feature3"], }`. Note that the target and - weight column may not be included as an auto transformation unless users - are running forecasting.' - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: 'List of TensorFlow-based custom transformation definitions. Custom, - bring-your-own transform functions, where users can define and import - their own transform function and use it with FTE''s built-in transformations. - `[ { "transformation": "PlusOne", "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "plus_one_transform" }, { "transformation": "MultiplyTwo", - "module_path": "gs://bucket/custom_transform_fn.py", "function_name": - "multiply_two_transform" } ] Using custom transform function together - with FTE''s built-in transformations: .. code-block:: python [ { "transformation": - "CastToFloat", "input_columns": ["feature_1"], "output_columns": ["feature_1"] - },{ "transformation": "PlusOne", "input_columns": ["feature_1"] "output_columns": - ["feature_1_plused_one"] },{ "transformation": "MultiplyTwo", "input_columns": - ["feature_1"] "output_columns": ["feature_1_multiplied_two"] } ]' - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform row-level TF transformations. - Can be one of: "dataflow" (by default) or "bigquery". Using "bigquery" - as the execution engine is experimental and is for allowlisted customers - only. In addition, executing on "bigquery" only supports auto transformations - (i.e., specified by tf_auto_transform_features) and will raise an error - when tf_custom_transformation_definitions or tf_transformations_path is - set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based transformation configuration. Path\ - \ to a JSON file used to specified FTE's TF transformation configurations.\ - \ In the following, we provide some sample transform configurations to\ - \ demonstrate FTE's capabilities. All transformations on input columns\ - \ are explicitly specified with FTE's built-in transformations. Chaining\ - \ of multiple transformations on a single column is also supported. For\ - \ example: .. code-block:: python [ { \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, { \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]`. Additional information about\ - \ FTE's currently supported built-in\ntransformations:\nDatetime: Extracts\ - \ datetime featues from a column containing timestamp strings.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Datetime\", \"input_columns\"\ - : [\"feature_1\"], \"time_format\": \"%Y-%m-%d\" }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the datetime\ - \ transformation on.\n output_columns: Names of output columns,\ - \ one for each datetime_features element.\n time_format: Datetime\ - \ format string. Time format is a combination of Date + Time Delimiter\ - \ (optional) + Time (optional) directives. Valid date directives are as\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' # 2018/11/30 * '%y-%m-%d'\ - \ # 18-11-30 * '%y/%m/%d' # 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y'\ - \ # 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' # 11/30/18 * '%d-%m-%Y'\ - \ # 30-11-2018 * '%d/%m/%Y' # 30/11/2018 * '%d-%B-%Y' # 30-November-2018\ - \ * '%d-%m-%y' # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' # 30-November-18\ - \ * '%d%m%Y' # 30112018 * '%m%d%Y' # 11302018 * '%Y%m%d' # 20181130\ - \ Valid time delimiters are as follows * 'T' * ' ' Valid time directives\ - \ are as follows * '%H:%M' # 23:59 * '%H:%M:%S' #\n \ - \ 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456] * '%H:%M:%S.%f%z'\ - \ # 23:59:58[.123456]+0000 * '%H:%M:%S%z', # 23:59:58+0000\n \ - \ datetime_features: List of datetime features to be extract. Each entry\ - \ must be one of * 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK' * 'DAY_OF_YEAR'\ - \ * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR' * 'MINUTE' * 'SECOND' Defaults\ - \ to ['YEAR', 'MONTH', 'DAY', 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - Log: Performs the natural log on a numeric column.\n Example: .. code-block::\ - \ python { \"transformation\": \"Log\", \"input_columns\": [\"feature_1\"\ - ] }\n Arguments:\n input_columns: A list with a single column\ - \ to perform the log transformation on.\n output_columns: A list\ - \ with a single output column name, corresponding to the output of our\ - \ transformation.\nZScale: Performs Z-scale normalization on a numeric\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - : \"ZScale\", \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the z-scale\ - \ transformation on.\n output_columns: A list with a single output\ - \ column name, corresponding to the output of our transformation.\nVocabulary:\ - \ Converts strings to integers, where each unique string gets a unique\ - \ integer representation.\n Example: .. code-block:: python { \"\ - transformation\": \"Vocabulary\", \"input_columns\": [\"feature_1\"] }\n\ - \ Arguments:\n input_columns: A list with a single column to\ - \ perform the vocabulary transformation on.\n output_columns: A\ - \ list with a single output column name, corresponding to the output of\ - \ our transformation.\n top_k: Number of the most frequent words\ - \ in the vocabulary to use for generating dictionary lookup indices. If\ - \ not specified, all words in the vocabulary will be used. Defaults to\ - \ None.\n frequency_threshold: Limit the vocabulary only to words\ - \ whose number of occurrences in the input exceeds frequency_threshold.\ - \ If not specified, all words in the vocabulary will be included. If both\ - \ top_k and frequency_threshold are specified, a word must satisfy both\ - \ conditions to be included. Defaults to None.\nCategorical: Transforms\ - \ categorical columns to integer columns.\n Example: .. code-block::\ - \ python { \"transformation\": \"Categorical\", \"input_columns\": [\"\ - feature_1\"], \"top_k\": 10 }\n Arguments:\n input_columns:\ - \ A list with a single column to perform the categorical transformation\ - \ on.\n output_columns: A list with a single output column name,\ - \ corresponding to the output of our transformation.\n top_k: Number\ - \ of the most frequent words in the vocabulary to use for generating dictionary\ - \ lookup indices. If not specified, all words in the vocabulary will be\ - \ used.\n frequency_threshold: Limit the vocabulary only to words\ - \ whose number of occurrences in the input exceeds frequency_threshold.\ - \ If not specified, all words in the vocabulary will be included. If both\ - \ top_k and frequency_threshold are specified, a word must satisfy both\ - \ conditions to be included.\nReduce: Given a column where each entry\ - \ is a numeric array, reduces arrays according to our reduce_mode.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Reduce\"\ - , \"input_columns\": [\"feature_1\"], \"reduce_mode\": \"MEAN\", \"output_columns\"\ - : [\"feature_1_mean\"] }\n Arguments:\n input_columns: A list\ - \ with a single column to perform the reduce transformation on.\n \ - \ output_columns: A list with a single output column name, corresponding\ - \ to the output of our transformation.\n reduce_mode: One of *\ - \ 'MAX' * 'MIN' * 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k:\ - \ The number of last k elements when 'LAST_K' reduce mode is used. Defaults\ - \ to 1.\nSplitString: Given a column of strings, splits strings into token\ - \ arrays.\n Example: .. code-block:: python { \"transformation\"\ - : \"SplitString\", \"input_columns\": [\"feature_1\"], \"separator\":\ - \ \"$\" }\n Arguments:\n input_columns: A list with a single\ - \ column to perform the split string transformation on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\n separator: Separator to split input\ - \ string into tokens. Defaults to ' '.\n missing_token: Missing\ - \ token to use when no string is included. Defaults to ' _MISSING_ '.\n\ - NGram: Given a column of strings, splits strings into token arrays where\ - \ each token is an integer.\n Example: .. code-block:: python { \"\ - transformation\": \"NGram\", \"input_columns\": [\"feature_1\"], \"min_ngram_size\"\ - : 1, \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the n-gram\ - \ transformation on.\n output_columns: A list with a single output\ - \ column name, corresponding to the output of our transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must be a positive number\ - \ and <= max_ngram_size. Defaults to 1.\n max_ngram_size: Maximum\ - \ n-gram size. Must be a positive number and >= min_ngram_size. Defaults\ - \ to 2.\n top_k: Number of the most frequent words in the vocabulary\ - \ to use for generating dictionary lookup indices. If not specified, all\ - \ words in the vocabulary will be used. Defaults to None.\n frequency_threshold:\ - \ Limit the dictionary's vocabulary only to words whose number of occurrences\ - \ in the input exceeds frequency_threshold. If not specified, all words\ - \ in the vocabulary will be included. If both top_k and frequency_threshold\ - \ are specified, a word must satisfy both conditions to be included. Defaults\ - \ to None.\n separator: Separator to split input string into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use when no\ - \ string is included. Defaults to ' _MISSING_ '.\nClip: Given a numeric\ - \ column, clips elements such that elements < min_value are assigned min_value,\ - \ and elements > max_value are assigned max_value.\n Example: .. code-block::\ - \ python { \"transformation\": \"Clip\", \"input_columns\": [\"col1\"\ - ], \"output_columns\": [\"col1_clipped\"], \"min_value\": 1., \"max_value\"\ - : 10., }\n Arguments:\n input_columns: A list with a single\ - \ column to perform the n-gram transformation on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\n min_value: Number where all values below\ - \ min_value are set to min_value. If no min_value is provided, min clipping\ - \ will not occur. Defaults to None.\n max_value: Number where all\ - \ values above max_value are set to max_value If no max_value is provided,\ - \ max clipping will not occur. Defaults to None.\nMultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical array column.\n Example: ..\ - \ code-block:: python { \"transformation\": \"MultiHotEncoding\", \"\ - input_columns\": [\"col1\"], } The number of classes is determened by\ - \ the largest number included in the input if it is numeric or the total\ - \ number of unique values of the input if it is type str. If the input\ - \ is has type str and an element contians separator tokens, the input\ - \ will be split at separator indices, and the each element of the split\ - \ list will be considered a seperate class. For example,\n Input: \ - \ .. code-block:: python [ [\"foo bar\"], # Example 0 [\"foo\",\ - \ \"bar\"], # Example 1 [\"foo\"], # Example 2 [\"bar\"], \ - \ # Example 3 ] Output (with default separator=\" \"): .. code-block::\ - \ python [ [1, 1], # Example 0 [1, 1], # Example 1 [1,\ - \ 0], # Example 2 [0, 1], # Example 3 ]\n Arguments:\n\ - \ input_columns: A list with a single column to perform the multi-hot-encoding\ - \ on.\n output_columns: A list with a single output column name,\ - \ corresponding to the output of our transformation.\n top_k: Number\ - \ of the most frequent words in the vocabulary to use for generating dictionary\ - \ lookup indices. If not specified, all words in the vocabulary will be\ - \ used. Defaults to None.\n frequency_threshold: Limit the dictionary's\ - \ vocabulary only to words whose number of occurrences in the input exceeds\ - \ frequency_threshold. If not specified, all words in the vocabulary will\ - \ be included. If both top_k and frequency_threshold are specified, a\ - \ word must satisfy both conditions to be included. Defaults to None.\n\ - \ separator: Separator to split input string into tokens. Defaults\ - \ to ' '.\nMaxAbsScale: Performs maximum absolute scaling on a numeric\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - : \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\":\ - \ [\"col1_max_abs_scaled\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to perform max-abs-scale on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\nCustom: Transformations defined in tf_custom_transformation_definitions\ - \ are included here in the TensorFlow-based transformation configuration.\ - \ For example, given the following tf_custom_transformation_definitions:\ - \ .. code-block:: python [ { \"transformation\": \"PlusX\", \"module_path\"\ - : \"gs://bucket/custom_transform_fn.py\", \"function_name\": \"plus_one_transform\"\ - \ } ] We can include the following transformation: .. code-block:: python\ - \ { \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"], \"\ - output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note that input_columns\ - \ must still be included in our arguments and output_columns is optional.\ - \ All other arguments are those defined in custom_transform_fn.py, which\ - \ includes `\"x\"` in this case. See tf_custom_transformation_definitions\ - \ above. legacy_transformations_path (Optional[str]) Deprecated. Prefer\ - \ tf_auto_transform_features. Path to a GCS file containing JSON string\ - \ for legacy style transformations. Note that legacy_transformations_path\ - \ and tf_auto_transform_features cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The ranking of features, all features supported in the dataset - will be included. For "AMI" algorithm, array features won't be available - in the ranking as arrays are not supported yet. - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: BigQuery URI for the downsampled test split to pass to the - batch prediction component during batch explain. - parameterType: STRING - bigquery_test_split_uri: - description: BigQuery URI for the test split to pass to the batch prediction - component during evaluation. - parameterType: STRING - bigquery_train_split_uri: - description: BigQuery URI for the train split to pass to the batch prediction - component during distillation. - parameterType: STRING - bigquery_validation_split_uri: - description: BigQuery URI for the validation split to pass to the batch - prediction component during distillation. - parameterType: STRING - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - split_example_counts: - description: JSON string of data split example counts for train, validate, - and test splits. - parameterType: STRING - comp-finalize-eval-quantile-parameters: - executorLabel: exec-finalize-eval-quantile-parameters - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-finalize-eval-quantile-parameters-2: - executorLabel: exec-finalize-eval-quantile-parameters-2 - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-get-or-create-model-description: - executorLabel: exec-get-or-create-model-description - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-or-create-model-description-2: - executorLabel: exec-get-or-create-model-description-2 - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri: - executorLabel: exec-get-prediction-image-uri - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri-2: - executorLabel: exec-get-prediction-image-uri-2 - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column: - executorLabel: exec-get-predictions-column - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column-2: - executorLabel: exec-get-predictions-column-2 - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - `unmanaged_container_model` must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - `prediction__` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - `predictions`, and `errors`. If the Model has both `instance` - - and `prediction` schemata defined then the tables have columns as - - follows: The `predictions` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The `errors` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has [google.rpc.Status](Status) - - represented as a STRUCT, and containing only `code` and - - `message`. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - key_field is not specified. - - When `excluded_fields` is populated, `included_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - `prediction--`, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - `predictions_0001.`, `predictions_0002.`, - - ..., `predictions_N.` are created where `` - - depends on chosen `predictions_format`, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both `instance` and `prediction` schemata defined - - then each such file contains predictions as per the - - `predictions_format`. If prediction for any instance failed - - (partially or completely), then an additional - - `errors_0001.`, `errors_0002.`,..., - - `errors_N.` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional `error` field which as - - value has `google.rpc.Status` containing only `code` and - - `message` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: 'Google Cloud Storage URI(-s) to your instances to run batch - prediction - - on. They must match `instances_format`. May contain wildcards. For more - - information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). - - For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If `instance_type` is `array`, the order of field names in - - `included_fields` also determines the order of the values in the array. - - When `included_fields` is populated, `excluded_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model\naccepts. Vertex\ - \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ - to the specified format. Supported values are:\n`object`: Each input is\ - \ converted to JSON object format.\n * For `bigquery`, each row is converted\ - \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ - \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ - \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ - \ * For `bigquery`, each row is converted to an array. The order\n \ - \ of columns is determined by the BigQuery column order, unless\n \ - \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ - \ is populated.\n `included_fields` must be populated for specifying\ - \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ - \ object,\n `included_fields` must be populated for specifying field\ - \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ - \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ - \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ - \ is the same as `array`. The\n order of columns is the same as defined\ - \ in the file or table, unless\n included_fields is populated.\n * For\ - \ `jsonl`, the prediction instance format is determined by\n each line\ - \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ - \ be converted to\n an object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ record.\n * For `file-list`, each file in the list will be converted\ - \ to an\n object in the format of `{\"b64\": }`, where ``\ - \ is\n the Base64-encoded string of the content of the file." - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: 'The format in which instances are - - given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s - supportedInputStorageFormats. - - For more details about this input config, see - - [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ - \ In addition,\nthe batch prediction output will not include the instances.\ - \ Instead the\noutput will only include the value of the key field, in\ - \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ - \ output will have a `key` field\n instead of the `instance` field.\n\ - \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ - \ column instead of the instance feature columns.\nThe input must be\ - \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: Location for creating the BatchPredictionJob. - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: 'The format in which Vertex AI gives the predictions. Must - be one of the - - Model''s supportedOutputStorageFormats. - - For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' - isOptional: true - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - description: Project to create the BatchPredictionJob. Defaults to the project - in which the PipelineJob is run. - isOptional: true - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - `unmanaged_container_model` must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - `prediction__` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - `predictions`, and `errors`. If the Model has both `instance` - - and `prediction` schemata defined then the tables have columns as - - follows: The `predictions` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The `errors` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has [google.rpc.Status](Status) - - represented as a STRUCT, and containing only `code` and - - `message`. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - key_field is not specified. - - When `excluded_fields` is populated, `included_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - `prediction--`, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - `predictions_0001.`, `predictions_0002.`, - - ..., `predictions_N.` are created where `` - - depends on chosen `predictions_format`, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both `instance` and `prediction` schemata defined - - then each such file contains predictions as per the - - `predictions_format`. If prediction for any instance failed - - (partially or completely), then an additional - - `errors_0001.`, `errors_0002.`,..., - - `errors_N.` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional `error` field which as - - value has `google.rpc.Status` containing only `code` and - - `message` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: 'Google Cloud Storage URI(-s) to your instances to run batch - prediction - - on. They must match `instances_format`. May contain wildcards. For more - - information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). - - For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If `instance_type` is `array`, the order of field names in - - `included_fields` also determines the order of the values in the array. - - When `included_fields` is populated, `excluded_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model\naccepts. Vertex\ - \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ - to the specified format. Supported values are:\n`object`: Each input is\ - \ converted to JSON object format.\n * For `bigquery`, each row is converted\ - \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ - \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ - \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ - \ * For `bigquery`, each row is converted to an array. The order\n \ - \ of columns is determined by the BigQuery column order, unless\n \ - \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ - \ is populated.\n `included_fields` must be populated for specifying\ - \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ - \ object,\n `included_fields` must be populated for specifying field\ - \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ - \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ - \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ - \ is the same as `array`. The\n order of columns is the same as defined\ - \ in the file or table, unless\n included_fields is populated.\n * For\ - \ `jsonl`, the prediction instance format is determined by\n each line\ - \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ - \ be converted to\n an object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ record.\n * For `file-list`, each file in the list will be converted\ - \ to an\n object in the format of `{\"b64\": }`, where ``\ - \ is\n the Base64-encoded string of the content of the file." - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: 'The format in which instances are - - given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s - supportedInputStorageFormats. - - For more details about this input config, see - - [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ - \ In addition,\nthe batch prediction output will not include the instances.\ - \ Instead the\noutput will only include the value of the key field, in\ - \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ - \ output will have a `key` field\n instead of the `instance` field.\n\ - \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ - \ column instead of the instance feature columns.\nThe input must be\ - \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: Location for creating the BatchPredictionJob. - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: 'The format in which Vertex AI gives the predictions. Must - be one of the - - Model''s supportedOutputStorageFormats. - - For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' - isOptional: true - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - description: Project to create the BatchPredictionJob. Defaults to the project - in which the PipelineJob is run. - isOptional: true - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation-forecasting: - executorLabel: exec-model-evaluation-forecasting - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-forecasting-2: - executorLabel: exec-model-evaluation-forecasting-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationClassificationOp component.' - isOptional: true - embedding_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The embedding metrics artifact generated from the - - embedding retrieval metrics component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'google.ForecastingMetrics artifact generated from - - the ModelEvaluationForecastingOp component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.QuestionAnsweringMetrics.' - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationRegressionOp component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.SummarizationMetrics.' - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.TextGenerationMetrics.' - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, `forecasting`, - - `text-generation`, `question-answering`, and `summarization` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - evaluation_resource_name: - parameterType: STRING - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationClassificationOp component.' - isOptional: true - embedding_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The embedding metrics artifact generated from the - - embedding retrieval metrics component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'google.ForecastingMetrics artifact generated from - - the ModelEvaluationForecastingOp component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.QuestionAnsweringMetrics.' - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationRegressionOp component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.SummarizationMetrics.' - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.TextGenerationMetrics.' - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, `forecasting`, - - `text-generation`, `question-answering`, and `summarization` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - evaluation_resource_name: - parameterType: STRING - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - stats_gen_execution_engine: - description: Execution engine used for stats gen in FTE. - parameterType: STRING - transformations: - description: forecasting transformations to append stats gen engine to. - parameterType: STRUCT - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - transformations: - parameterType: STRUCT - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Dataset stats generated by feature transform engine. - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Schema of input data to the tf_model at serving time. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: The names of the columns that are available at forecast time. - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: If probabilistic inference is enabled, the model will fit a - distribution that captures the uncertainty of a prediction. At inference - time, the predictive distribution is used to make a point prediction that - minimizes the optimization objective. For example, the mean of a predictive - distribution is the point prediction that minimizes RMSE loss. If quantiles - are specified, then the quantiles of the distribution are also returned. - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: Dict mapping auto and/or type-resolutions to feature columns. - The supported types are auto, categorical, numeric, text, and timestamp. - isOptional: true - parameterType: STRUCT - group_columns: - description: A list of time series attribute column names that define the - time series hierarchy. - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over both - the horizon and time series in the same hierarchy group. - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over time - series in the same group. - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: 'Objective function the model is optimizing towards. The training - process creates a model that maximizes/minimizes the value of the objective - function over the validation set. The supported optimization objectives - depend on the prediction type. If the field is not set, a default objective - function is used. classification: "maximize-au-roc" (default) - Maximize - the area under the receiver operating characteristic (ROC) curve. "minimize-log-loss" - - Minimize log loss. "maximize-au-prc" - Maximize the area under the precision-recall - curve. "maximize-precision-at-recall" - Maximize precision for a specified - recall value. "maximize-recall-at-precision" - Maximize recall for a specified - precision value. classification (multi-class): "minimize-log-loss" (default) - - Minimize log loss. regression: "minimize-rmse" (default) - Minimize - root-mean-squared error (RMSE). "minimize-mae" - Minimize mean-absolute - error (MAE). "minimize-rmsle" - Minimize root-mean-squared log error - (RMSLE).' - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: Required when optimization_objective is "maximize-recall-at-precision". - Must be between 0 and 1, inclusive. - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: Required when optimization_objective is "maximize-precision-at-recall". - Must be between 0 and 1, inclusive. - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: Model prediction type. One of "classification", "regression", - "time_series". - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: Whether the distillation should be applied to the training. - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: Whether we are running evaluation in the training pipeline. - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: JSON string of data split example counts for train, validate, - and test splits. - parameterType: STRING - stage_1_deadline_hours: - description: Stage 1 training budget in hours. - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: Stage 2 training budget in hours. - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over the - horizon for a single time series. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: The column that indicates the time. Used by forecasting only. - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: The column names of the time series attributes. - isOptional: true - parameterType: LIST - time_series_identifier_column: - description: '[Deprecated] The time series identifier column. Used by forecasting - only. Raises exception if used - use the "time_series_identifier_column" - field instead.' - isOptional: true - parameterType: STRING - time_series_identifier_columns: - defaultValue: [] - description: The list of time series identifier columns. Used by forecasting - only. - isOptional: true - parameterType: LIST - unavailable_at_forecast_columns: - defaultValue: [] - description: The names of the columns that are not available at forecast - time. - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-forecasting-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, - "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", - "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", - "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", - "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", - "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", - "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", - "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", - "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", - "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, - "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", - "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", - "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", - "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", - "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", - "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", - "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", - "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", - "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-2-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20240214_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_runner_mode - - '{{$.inputs.parameters[''force_runner_mode'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_runner_mode - - '{{$.inputs.parameters[''force_runner_mode'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", - "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' - - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' - - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-finalize-eval-quantile-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-finalize-eval-quantile-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-or-create-model-description: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n if original_description:\n\ - \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ - \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ - \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-or-create-model-description-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n if original_description:\n\ - \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ - \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ - \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-prediction-image-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20240214_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20240214_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20240214_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20240214_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-prediction-image-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20240214_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20240214_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20240214_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20240214_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-predictions-column: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-predictions-column-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-evaluation-forecasting: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-forecasting-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", - "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --evaluation_resource_name - - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", - "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --evaluation_resource_name - - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", - "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.17 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", - "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.17 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n stats_gen_execution_engine: str,\n transformations: dict,\n\ - ) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ('transformations', dict),\n ],\n):\n \"\"\"Get the\ - \ data source URI.\n\n Args:\n project: The GCP project that runs the\ - \ pipeline components.\n location: The GCP region that runs the pipeline\ - \ components.\n data_source_csv_filenames: The CSV GCS path when data\ - \ source is CSV.\n data_source_bigquery_table_path: The BigQuery table\ - \ when data source is BQ.\n vertex_dataset: The Vertex dataset when data\ - \ source is Vertex dataset.\n model_display_name: The uploaded model's\ - \ display name.\n stats_gen_execution_engine: Execution engine used for\ - \ stats gen in FTE.\n transformations: forecasting transformations to\ - \ append stats gen engine to.\n\n Returns:\n A named tuple of CSV or\ - \ BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n # TODO(b/261504514) Remove this handling when we use the FTE transform\ - \ config.\n transformations['stats_gen_execution_engine'] = stats_gen_execution_engine\n\ - \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ - \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ - \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ - \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ - \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ - \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ - \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ - \ return collections.namedtuple(\n 'Outputs',\n [\n \ - \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n 'transformations',\n ],\n\ - \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ - \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": - ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' - - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 -pipelineInfo: - description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. - name: sequence-to-sequence-forecasting -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - artifacts: - pipelinechannel--parent_model: - componentInputArtifact: parent_model - parameters: - pipelinechannel--available_at_forecast_columns: - componentInputParameter: available_at_forecast_columns - pipelinechannel--context_window: - componentInputParameter: context_window - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - componentInputParameter: feature_transform_engine_dataflow_disk_size_gb - pipelinechannel--feature_transform_engine_dataflow_machine_type: - componentInputParameter: feature_transform_engine_dataflow_machine_type - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - componentInputParameter: feature_transform_engine_dataflow_max_num_workers - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--group_columns: - componentInputParameter: group_columns - pipelinechannel--group_temporal_total_weight: - componentInputParameter: group_temporal_total_weight - pipelinechannel--group_total_weight: - componentInputParameter: group_total_weight - pipelinechannel--holiday_regions: - componentInputParameter: holiday_regions - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--model_display_name: - componentInputParameter: model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: num_selected_trials - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-transformations: - taskOutputParameter: - outputParameterKey: transformations - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: stage_2_trainer_worker_pool_specs_override - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--temporal_total_weight: - componentInputParameter: temporal_total_weight - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_attribute_columns: - componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_columns: - componentInputParameter: time_series_identifier_columns - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--unavailable_at_forecast_columns: - componentInputParameter: unavailable_at_forecast_columns - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_predefined_column: - componentInputParameter: window_predefined_column - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - stats_gen_execution_engine: - runtimeValue: - constant: bigquery - transformations: - componentInputParameter: transformations - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Vertex model to upload this model as a version to. - isOptional: true - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - available_at_forecast_columns: - description: 'The columns that are available at the - - forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: 0.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: The full service account name. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: The dataflow subnetwork. - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: '`True` to enable dataflow public IPs.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluated_examples_bigquery_path: - defaultValue: '' - description: 'The bigquery dataset to write the - - predicted examples into for evaluation, in the format - - `bq://project.dataset`. Only necessary if evaluation is enabled.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 22.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 22.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the batch prediction - - job in evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The maximum count of replicas - - the batch prediction job can scale to.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'Number of replicas to use - - in the batch prediction cluster at startup time.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: The disk space in GB for dataflow. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the dataflow job in - - evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 25.0 - description: Maximum number of dataflow workers. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 22.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - feature_transform_engine_bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The full id of - - the feature transform engine staging dataset.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size of the - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_transform_engine_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type of - - the feature transform engine.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon: - defaultValue: 0.0 - description: The length of the horizon. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - description: 'A list of time series attribute column names that define the - - time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions - - aggregated over both the horizon and time series in the same hierarchy - - group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated over - - time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - holiday_regions: - description: 'The geographical regions where the holiday effect is - - applied in modeling.' - isOptional: true - parameterType: LIST - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_description: - defaultValue: '' - description: Optional description. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - description: Optional display name for model. - isOptional: true - parameterType: STRING - num_selected_trials: - defaultValue: 10.0 - description: Number of selected trails. - isOptional: true - parameterType: NUMBER_INTEGER - optimization_objective: - description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", - - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - - "minimize-quantile-loss".' - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: '`True` to evaluate the ensembled model on the test split.' - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 2 trainer worker pool spec.' - isOptional: true - parameterType: LIST - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated - - over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: The column that indicates the time. - parameterType: STRING - time_series_attribute_columns: - description: 'The columns that are invariant across the - - same time series.' - isOptional: true - parameterType: LIST - time_series_identifier_columns: - description: 'The columns that distinguish the different - - time series.' - parameterType: LIST - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Dict mapping auto and/or type-resolutions to feature - - columns. The supported types are: auto, categorical, numeric, text, and - - timestamp.' - parameterType: STRUCT - unavailable_at_forecast_columns: - description: 'The columns that are unavailable at the - - forecast time.' - isOptional: true - parameterType: LIST - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: 0.0 - description: The maximum number of windows that will be generated. - isOptional: true - parameterType: NUMBER_INTEGER - window_predefined_column: - defaultValue: '' - description: The column that indicate the start of each window. - isOptional: true - parameterType: STRING - window_stride_length: - defaultValue: 0.0 - description: The stride length to generate the window. - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml deleted file mode 100644 index af3f611e6d7..00000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +++ /dev/null @@ -1,7531 +0,0 @@ -# PIPELINE DEFINITION -# Name: temporal-fusion-transformer-forecasting -# Description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. -# Inputs: -# available_at_forecast_columns: list -# context_window: int [Default: 0.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# encryption_spec_key_name: str [Default: ''] -# evaluated_examples_bigquery_path: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 22.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# evaluation_batch_predict_max_replica_count: int [Default: 25.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] -# evaluation_dataflow_max_num_workers: int [Default: 25.0] -# evaluation_dataflow_starting_num_workers: int [Default: 22.0] -# fast_testing: bool [Default: False] -# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] -# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] -# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] -# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] -# forecast_horizon: int [Default: 0.0] -# group_columns: list -# group_temporal_total_weight: float [Default: 0.0] -# group_total_weight: float [Default: 0.0] -# holiday_regions: list -# location: str -# model_description: str [Default: ''] -# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] -# optimization_objective: str -# parent_model: system.Artifact -# predefined_split_key: str [Default: ''] -# project: str -# root_dir: str -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_trainer_worker_pool_specs_override: list -# study_spec_parameters_override: list -# target_column: str -# temporal_total_weight: float [Default: 0.0] -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_attribute_columns: list -# time_series_identifier_columns: list -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transformations: dict -# unavailable_at_forecast_columns: list -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# window_max_count: int [Default: 0.0] -# window_predefined_column: str [Default: ''] -# window_stride_length: int [Default: 0.0] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -components: - comp-automl-forecasting-ensemble: - executorLabel: exec-automl-forecasting-ensemble - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the instance schema, describing the input data - for the tf_model at serving time. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: URI of the Docker image to be used as the container for serving - predictions. This URI must identify an image in Artifact Registry or Container - Registry. - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - example_instance: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: An example instance which may be used as an input for predictions. - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-ensemble-2: - executorLabel: exec-automl-forecasting-ensemble-2 - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the instance schema, describing the input data - for the tf_model at serving time. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: URI of the Docker image to be used as the container for serving - predictions. This URI must identify an image in Artifact Registry or Container - Registry. - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - example_instance: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: An example instance which may be used as an input for predictions. - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-stage-1-tuner: - executorLabel: exec-automl-forecasting-stage-1-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: Number of hours the hyperparameter tuning should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the hyperparameter tuning. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: Number of selected trials. The number of weak learners in the - final model is 5 * num_selected_trials. - parameterType: NUMBER_INTEGER - project: - description: Project to run hyperparameter tuning. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible values: "regular" (default), - "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., [{"parameter_id": "activation","categorical_value_spec": - {"values": ["tanh"]}}]' - isOptional: true - parameterType: LIST - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-stage-2-tuner: - executorLabel: exec-automl-forecasting-stage-2-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The forecasting example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path to the json of hyperparameter tuning results to use when - evaluating models. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Cloud region for running the component: us-central1).' - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: Number of selected trials. The number of weak learners in the - final model. - parameterType: NUMBER_INTEGER - project: - description: Project to run stage 2 tuner. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained (private) model artifact paths and their hyperparameters. - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - tasks: - automl-forecasting-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble - dependentTasks: - - automl-forecasting-stage-2-tuner - - get-prediction-image-uri - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-2-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble - automl-forecasting-stage-2-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-2-tuner - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input_path: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-2-tuner - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: true - selected_trials: - runtimeValue: - constant: 1.0 - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-forecasting-ensemble - - model-upload - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description - get-prediction-image-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri - inputs: - parameters: - model_type: - runtimeValue: - constant: tft - taskInfo: - name: get-prediction-image-uri - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: get-hyperparameter-tuning-results - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-forecasting-ensemble - - get-or-create-model-description - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - parent_model: - componentInputArtifact: pipelinechannel--parent_model - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - force_runner_mode: - runtimeValue: - constant: Dataflow - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - runtimeValue: - constant: forecasting - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - finalize-eval-quantile-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters - inputs: - parameters: - quantiles: - runtimeValue: - constant: [] - taskInfo: - name: finalize-eval-quantile-parameters - get-predictions-column: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column - dependentTasks: - - finalize-eval-quantile-parameters - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: true - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: false - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation-forecasting: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting - dependentTasks: - - finalize-eval-quantile-parameters - - get-predictions-column - - model-batch-predict - - table-to-uri - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation-forecasting - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - model-batch-predict - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - use_bq_prefix: - runtimeValue: - constant: true - taskInfo: - name: table-to-uri - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - tasks: - automl-forecasting-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble-2 - dependentTasks: - - automl-forecasting-stage-1-tuner - - get-prediction-image-uri-2 - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-1-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri-2 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble-2 - automl-forecasting-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: pipelinechannel--project - reduce_search_space_mode: - runtimeValue: - constant: full - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-1-tuner - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: false - selected_trials: - runtimeValue: - constant: 1.0 - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-forecasting-ensemble-2 - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description-2 - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description-2 - get-prediction-image-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri-2 - inputs: - parameters: - model_type: - runtimeValue: - constant: tft - taskInfo: - name: get-prediction-image-uri-2 - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - dependentTasks: - - automl-forecasting-ensemble-2 - - get-or-create-model-description-2 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - parent_model: - componentInputArtifact: pipelinechannel--parent_model - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description-2 - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - force_runner_mode: - runtimeValue: - constant: Dataflow - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - runtimeValue: - constant: forecasting - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - finalize-eval-quantile-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters-2 - inputs: - parameters: - quantiles: - runtimeValue: - constant: [] - taskInfo: - name: finalize-eval-quantile-parameters-2 - get-predictions-column-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: true - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: false - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-forecasting-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - - get-predictions-column-2 - - model-batch-predict-2 - - table-to-uri-2 - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters-2 - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column-2 - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-forecasting-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import-2 - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - use_bq_prefix: - runtimeValue: - constant: true - taskInfo: - name: table-to-uri-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - tasks: - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - forecasting_context_window: - componentInputParameter: pipelinechannel--context_window - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_holiday_regions: - componentInputParameter: pipelinechannel--holiday_regions - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_predefined_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_columns: - componentInputParameter: pipelinechannel--time_series_identifier_columns - forecasting_unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - location: - componentInputParameter: pipelinechannel--location - model_type: - runtimeValue: - constant: tft - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - stats_gen_execution_engine: - runtimeValue: - constant: bigquery - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--transformations - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: check-if-hyperparameter-tuning-results-are-supplied-by-user - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - context_window: - componentInputParameter: pipelinechannel--context_window - enable_probabilistic_inference: - runtimeValue: - constant: false - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_model_type: - runtimeValue: - constant: tft - forecasting_transformations: - componentInputParameter: pipelinechannel--set-optional-inputs-transformations - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - prediction_type: - runtimeValue: - constant: time_series - quantiles: - runtimeValue: - constant: [] - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_columns: - componentInputParameter: pipelinechannel--time_series_identifier_columns - unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - artifacts: - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--available_at_forecast_columns: - parameterType: LIST - pipelinechannel--context_window: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_transform_engine_dataflow_machine_type: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--group_columns: - parameterType: LIST - pipelinechannel--group_temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--group_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--holiday_regions: - parameterType: LIST - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-transformations: - parameterType: STRUCT - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_attribute_columns: - parameterType: LIST - pipelinechannel--time_series_identifier_columns: - parameterType: LIST - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transformations: - parameterType: STRUCT - pipelinechannel--unavailable_at_forecast_columns: - parameterType: LIST - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_predefined_column: - parameterType: STRING - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size_gb: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - force_runner_mode: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size_gb: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - force_runner_mode: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: Dataset in "projectId.datasetId" format for storing intermediate-FTE - BigQuery tables. If the specified dataset does not exist in BigQuery, - FTE will create the dataset. If no bigquery_staging_full_dataset_id is - specified, all intermediate tables will be stored in a dataset created - under the provided project in the input data source's location during - FTE execution called "vertex_feature_transform_engine_staging_{location.replace('-', - '_')}". All tables generated by FTE will have a 30 day TTL. - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: BigQuery input data source to run feature transform on. - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: CSV input data source to run feature transform on. - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: The disk size, in gigabytes, to use on each Dataflow worker - instance. If not set, default to 40. - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: The machine type used for dataflow jobs. If not set, default - to n1-standard-16. - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: The number of workers to run the dataflow job. If not set, - default to 25. - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: Custom service account to run Dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty the - default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: Specifies whether Dataflow workers use public IP addresses. - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: 'List of dataset-level custom transformation definitions. Custom, - bring-your-own dataset-level transform functions, where users can define - and import their own transform function and use it with FTE''s built-in - transformations. Using custom transformations is an experimental feature - and it is currently not supported during batch prediction. - - [ { "transformation": "ConcatCols", "module_path": "/path/to/custom_transform_fn_dlt.py", - "function_name": "concat_cols" } ] Using custom transform function together - with FTE''s built-in transformations: .. code-block:: python [ { "transformation": - "Join", "right_table_uri": "bq://test-project.dataset_test.table", "join_keys": - [["join_key_col", "join_key_col"]] },{ "transformation": "ConcatCols", - "cols": ["feature_1", "feature_2"], "output_col": "feature_1_2" } ]' - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level transformations.\n[ { \"transformation\"\ - : \"Join\", \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - , \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }, ... ] Additional\ - \ information about FTE's currently supported built-in\n transformations:\n\ - \ Join: Joins features from right_table_uri. For each join key, the\ - \ left table keys will be included and the right table keys will be dropped.\n\ - \ Example: .. code-block:: python { \"transformation\": \"Join\"\ - , \"right_table_uri\": \"bq://test-project.dataset_test.table\", \"join_keys\"\ - : [[\"join_key_col\", \"join_key_col\"]] }\n Arguments:\n \ - \ right_table_uri: Right table BigQuery uri to join with input_full_table_id.\n\ - \ join_keys: Features to join on. For each nested list, the\ - \ first element is a left table column and the second is its corresponding\ - \ right table column.\n TimeAggregate: Creates a new feature composed\ - \ of values of an existing feature from a fixed time period ago or in\ - \ the future.\n Ex: A feature for sales by store 1 year ago.\n \ - \ Example: .. code-block:: python { \"transformation\": \"TimeAggregate\"\ - , \"time_difference\": 40, \"time_difference_units\": \"DAY\", \"time_series_identifier_columns\"\ - : [\"store_id\"], \"time_column\": \"time_col\", \"time_difference_target_column\"\ - : \"target_col\", \"output_column\": \"output_col\" }\n Arguments:\n\ - \ time_difference: Number of time_difference_units to look\ - \ back or into the future on our time_difference_target_column.\n \ - \ time_difference_units: Units of time_difference to look back\ - \ or into the future on our time_difference_target_column. Must be one\ - \ of * 'DAY' * 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER' * 'YEAR'\n\ - \ time_series_identifier_columns: Names of the time series\ - \ identifier columns.\n time_column: Name of the time column.\n\ - \ time_difference_target_column: Column we wish to get the\ - \ value of time_difference time_difference_units in the past or future.\n\ - \ output_column: Name of our new time aggregate feature.\n\ - \ is_future: Whether we wish to look forward in time. Defaults\ - \ to False. PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\ - \ Performs a partition by reduce operation (one of max, min, avg, or sum)\ - \ with a fixed historic time period. Ex: Getting avg sales (the reduce\ - \ column) for each store (partition_by_column) over the previous 5 days\ - \ (time_column, time_ago_units, and time_ago).\n Example: .. code-block::\ - \ python { \"transformation\": \"PartitionByMax\", \"reduce_column\"\ - : \"sell_price\", \"partition_by_columns\": [\"store_id\", \"state_id\"\ - ], \"time_column\": \"date\", \"time_ago\": 1, \"time_ago_units\": \"\ - WEEK\", \"output_column\": \"partition_by_reduce_max_output\" }\n \ - \ Arguments:\n reduce_column: Column to apply the reduce\ - \ operation on. Reduce operations include the\n following:\ - \ Max, Min, Avg, Sum.\n partition_by_columns: List of columns\ - \ to partition by.\n time_column: Time column for the partition\ - \ by operation's window function.\n time_ago: Number of time_ago_units\ - \ to look back on our target_column, starting from time_column (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on our target_column.\ - \ Must be one of * 'DAY' * 'WEEK'\n output_column: Name of\ - \ our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature selection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\". The algorithms available\ - \ are: AMI(Adjusted Mutual Information):\nReference: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional Mutual\ - \ Information Maximization): Reference paper: Mohamed Bennasar, Yulia\ - \ Hicks, Rossitza Setchi, \u201CFeature selection using Joint Mutual Information\ - \ Maximisation,\u201D Expert Systems with Applications, vol. 42, issue\ - \ 22, 1 December 2015, Pages 8520-8532. JMIM(Joint Mutual Information\ - \ Maximization\nReference:\n paper: Mohamed Bennasar, Yulia Hicks, Rossitza\ - \ Setchi, \u201CFeature selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert Systems with Applications, vol. 42, issue 22, 1 December 2015,\ - \ Pages 8520-8532. MRMR(MIQ Minimum-redundancy Maximum-relevance): Reference\ - \ paper: Hanchuan Peng, Fuhui Long, and Chris Ding. \"Feature selection\ - \ based on mutual information criteria of max-dependency, max-relevance,\ - \ and min-redundancy.\" IEEE Transactions on pattern analysis and machine\ - \ intelligence 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - feature_selection_execution_engine: - defaultValue: dataflow - description: Execution engine to run feature selection, value can be dataflow, - bigquery. - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: Forecasting available at forecast columns. - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the holiday effect - is applied in modeling by adding holiday categorical array feature that - include all holidays matching the date. This option only allowed when - data granularity is day. By default, holiday effect modeling is disabled. - To turn it on, specify the holiday region using this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: Forecasting time series attribute columns. - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - description: '[Deprecated] A forecasting time series identifier column. - Raises an exception if used - use the "time_series_identifier_column" - field instead.' - isOptional: true - parameterType: STRING - forecasting_time_series_identifier_columns: - defaultValue: [] - description: The list of forecasting time series identifier columns. - isOptional: true - parameterType: LIST - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: Forecasting unavailable at forecast columns. - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: The format to use for the materialized examples. Should be - either 'tfrecords_gzip' (default) or 'parquet'. - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: Maximum number of features to select. If specified, the transform - config will be purged by only using the selected features that ranked - top in the feature ranking, which has the ranking value for all supported - features. If the number of input features is smaller than max_selected_features - specified, we will still run the feature selection process and generate - the feature ranking, no features will be excluded. The value will be - set to 1000 by default if run_feature_selection is enabled. - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features for. Can be - one of: neural_network, boosted_trees, l2l, seq2seq, tft, or tide. Defaults - to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: List of multimodal image columns. Defaults to an empty list. - isOptional: true - parameterType: LIST - multimodal_tabular_columns: - defaultValue: [] - description: List of multimodal tabular columns. Defaults to an empty list - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: List of multimodal text columns. Defaults to an empty list - isOptional: true - parameterType: LIST - multimodal_timeseries_columns: - defaultValue: [] - description: List of multimodal timeseries columns. Defaults to an empty - list - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: Model prediction type. One of "classification", "regression", - "time_series". - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: (deprecated) Whether the distillation should be applied to - the training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether the feature selection should be applied to the dataset. - isOptional: true - parameterType: BOOLEAN - stats_gen_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform statistics generation. Can be - one of: "dataflow" (by default) or "bigquery". Using "bigquery" as the - execution engine is experimental.' - isOptional: true - parameterType: STRING - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to TF transform - features. FTE will automatically configure a set of built-in transformations - for each feature based on its data statistics. If users do not want auto - type resolution, but want the set of transformations for a given type - to be automatically generated, they may specify pre-resolved transformations - types. The following type hint dict keys are supported: * ''auto'' * ''categorical'' - * ''numeric'' * ''text'' * ''timestamp'' Example: `{ "auto": ["feature1"], - "categorical": ["feature2", "feature3"], }`. Note that the target and - weight column may not be included as an auto transformation unless users - are running forecasting.' - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: 'List of TensorFlow-based custom transformation definitions. Custom, - bring-your-own transform functions, where users can define and import - their own transform function and use it with FTE''s built-in transformations. - `[ { "transformation": "PlusOne", "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "plus_one_transform" }, { "transformation": "MultiplyTwo", - "module_path": "gs://bucket/custom_transform_fn.py", "function_name": - "multiply_two_transform" } ] Using custom transform function together - with FTE''s built-in transformations: .. code-block:: python [ { "transformation": - "CastToFloat", "input_columns": ["feature_1"], "output_columns": ["feature_1"] - },{ "transformation": "PlusOne", "input_columns": ["feature_1"] "output_columns": - ["feature_1_plused_one"] },{ "transformation": "MultiplyTwo", "input_columns": - ["feature_1"] "output_columns": ["feature_1_multiplied_two"] } ]' - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform row-level TF transformations. - Can be one of: "dataflow" (by default) or "bigquery". Using "bigquery" - as the execution engine is experimental and is for allowlisted customers - only. In addition, executing on "bigquery" only supports auto transformations - (i.e., specified by tf_auto_transform_features) and will raise an error - when tf_custom_transformation_definitions or tf_transformations_path is - set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based transformation configuration. Path\ - \ to a JSON file used to specified FTE's TF transformation configurations.\ - \ In the following, we provide some sample transform configurations to\ - \ demonstrate FTE's capabilities. All transformations on input columns\ - \ are explicitly specified with FTE's built-in transformations. Chaining\ - \ of multiple transformations on a single column is also supported. For\ - \ example: .. code-block:: python [ { \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, { \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]`. Additional information about\ - \ FTE's currently supported built-in\ntransformations:\nDatetime: Extracts\ - \ datetime featues from a column containing timestamp strings.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Datetime\", \"input_columns\"\ - : [\"feature_1\"], \"time_format\": \"%Y-%m-%d\" }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the datetime\ - \ transformation on.\n output_columns: Names of output columns,\ - \ one for each datetime_features element.\n time_format: Datetime\ - \ format string. Time format is a combination of Date + Time Delimiter\ - \ (optional) + Time (optional) directives. Valid date directives are as\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' # 2018/11/30 * '%y-%m-%d'\ - \ # 18-11-30 * '%y/%m/%d' # 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y'\ - \ # 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' # 11/30/18 * '%d-%m-%Y'\ - \ # 30-11-2018 * '%d/%m/%Y' # 30/11/2018 * '%d-%B-%Y' # 30-November-2018\ - \ * '%d-%m-%y' # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' # 30-November-18\ - \ * '%d%m%Y' # 30112018 * '%m%d%Y' # 11302018 * '%Y%m%d' # 20181130\ - \ Valid time delimiters are as follows * 'T' * ' ' Valid time directives\ - \ are as follows * '%H:%M' # 23:59 * '%H:%M:%S' #\n \ - \ 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456] * '%H:%M:%S.%f%z'\ - \ # 23:59:58[.123456]+0000 * '%H:%M:%S%z', # 23:59:58+0000\n \ - \ datetime_features: List of datetime features to be extract. Each entry\ - \ must be one of * 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK' * 'DAY_OF_YEAR'\ - \ * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR' * 'MINUTE' * 'SECOND' Defaults\ - \ to ['YEAR', 'MONTH', 'DAY', 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - Log: Performs the natural log on a numeric column.\n Example: .. code-block::\ - \ python { \"transformation\": \"Log\", \"input_columns\": [\"feature_1\"\ - ] }\n Arguments:\n input_columns: A list with a single column\ - \ to perform the log transformation on.\n output_columns: A list\ - \ with a single output column name, corresponding to the output of our\ - \ transformation.\nZScale: Performs Z-scale normalization on a numeric\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - : \"ZScale\", \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the z-scale\ - \ transformation on.\n output_columns: A list with a single output\ - \ column name, corresponding to the output of our transformation.\nVocabulary:\ - \ Converts strings to integers, where each unique string gets a unique\ - \ integer representation.\n Example: .. code-block:: python { \"\ - transformation\": \"Vocabulary\", \"input_columns\": [\"feature_1\"] }\n\ - \ Arguments:\n input_columns: A list with a single column to\ - \ perform the vocabulary transformation on.\n output_columns: A\ - \ list with a single output column name, corresponding to the output of\ - \ our transformation.\n top_k: Number of the most frequent words\ - \ in the vocabulary to use for generating dictionary lookup indices. If\ - \ not specified, all words in the vocabulary will be used. Defaults to\ - \ None.\n frequency_threshold: Limit the vocabulary only to words\ - \ whose number of occurrences in the input exceeds frequency_threshold.\ - \ If not specified, all words in the vocabulary will be included. If both\ - \ top_k and frequency_threshold are specified, a word must satisfy both\ - \ conditions to be included. Defaults to None.\nCategorical: Transforms\ - \ categorical columns to integer columns.\n Example: .. code-block::\ - \ python { \"transformation\": \"Categorical\", \"input_columns\": [\"\ - feature_1\"], \"top_k\": 10 }\n Arguments:\n input_columns:\ - \ A list with a single column to perform the categorical transformation\ - \ on.\n output_columns: A list with a single output column name,\ - \ corresponding to the output of our transformation.\n top_k: Number\ - \ of the most frequent words in the vocabulary to use for generating dictionary\ - \ lookup indices. If not specified, all words in the vocabulary will be\ - \ used.\n frequency_threshold: Limit the vocabulary only to words\ - \ whose number of occurrences in the input exceeds frequency_threshold.\ - \ If not specified, all words in the vocabulary will be included. If both\ - \ top_k and frequency_threshold are specified, a word must satisfy both\ - \ conditions to be included.\nReduce: Given a column where each entry\ - \ is a numeric array, reduces arrays according to our reduce_mode.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Reduce\"\ - , \"input_columns\": [\"feature_1\"], \"reduce_mode\": \"MEAN\", \"output_columns\"\ - : [\"feature_1_mean\"] }\n Arguments:\n input_columns: A list\ - \ with a single column to perform the reduce transformation on.\n \ - \ output_columns: A list with a single output column name, corresponding\ - \ to the output of our transformation.\n reduce_mode: One of *\ - \ 'MAX' * 'MIN' * 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k:\ - \ The number of last k elements when 'LAST_K' reduce mode is used. Defaults\ - \ to 1.\nSplitString: Given a column of strings, splits strings into token\ - \ arrays.\n Example: .. code-block:: python { \"transformation\"\ - : \"SplitString\", \"input_columns\": [\"feature_1\"], \"separator\":\ - \ \"$\" }\n Arguments:\n input_columns: A list with a single\ - \ column to perform the split string transformation on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\n separator: Separator to split input\ - \ string into tokens. Defaults to ' '.\n missing_token: Missing\ - \ token to use when no string is included. Defaults to ' _MISSING_ '.\n\ - NGram: Given a column of strings, splits strings into token arrays where\ - \ each token is an integer.\n Example: .. code-block:: python { \"\ - transformation\": \"NGram\", \"input_columns\": [\"feature_1\"], \"min_ngram_size\"\ - : 1, \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the n-gram\ - \ transformation on.\n output_columns: A list with a single output\ - \ column name, corresponding to the output of our transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must be a positive number\ - \ and <= max_ngram_size. Defaults to 1.\n max_ngram_size: Maximum\ - \ n-gram size. Must be a positive number and >= min_ngram_size. Defaults\ - \ to 2.\n top_k: Number of the most frequent words in the vocabulary\ - \ to use for generating dictionary lookup indices. If not specified, all\ - \ words in the vocabulary will be used. Defaults to None.\n frequency_threshold:\ - \ Limit the dictionary's vocabulary only to words whose number of occurrences\ - \ in the input exceeds frequency_threshold. If not specified, all words\ - \ in the vocabulary will be included. If both top_k and frequency_threshold\ - \ are specified, a word must satisfy both conditions to be included. Defaults\ - \ to None.\n separator: Separator to split input string into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use when no\ - \ string is included. Defaults to ' _MISSING_ '.\nClip: Given a numeric\ - \ column, clips elements such that elements < min_value are assigned min_value,\ - \ and elements > max_value are assigned max_value.\n Example: .. code-block::\ - \ python { \"transformation\": \"Clip\", \"input_columns\": [\"col1\"\ - ], \"output_columns\": [\"col1_clipped\"], \"min_value\": 1., \"max_value\"\ - : 10., }\n Arguments:\n input_columns: A list with a single\ - \ column to perform the n-gram transformation on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\n min_value: Number where all values below\ - \ min_value are set to min_value. If no min_value is provided, min clipping\ - \ will not occur. Defaults to None.\n max_value: Number where all\ - \ values above max_value are set to max_value If no max_value is provided,\ - \ max clipping will not occur. Defaults to None.\nMultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical array column.\n Example: ..\ - \ code-block:: python { \"transformation\": \"MultiHotEncoding\", \"\ - input_columns\": [\"col1\"], } The number of classes is determened by\ - \ the largest number included in the input if it is numeric or the total\ - \ number of unique values of the input if it is type str. If the input\ - \ is has type str and an element contians separator tokens, the input\ - \ will be split at separator indices, and the each element of the split\ - \ list will be considered a seperate class. For example,\n Input: \ - \ .. code-block:: python [ [\"foo bar\"], # Example 0 [\"foo\",\ - \ \"bar\"], # Example 1 [\"foo\"], # Example 2 [\"bar\"], \ - \ # Example 3 ] Output (with default separator=\" \"): .. code-block::\ - \ python [ [1, 1], # Example 0 [1, 1], # Example 1 [1,\ - \ 0], # Example 2 [0, 1], # Example 3 ]\n Arguments:\n\ - \ input_columns: A list with a single column to perform the multi-hot-encoding\ - \ on.\n output_columns: A list with a single output column name,\ - \ corresponding to the output of our transformation.\n top_k: Number\ - \ of the most frequent words in the vocabulary to use for generating dictionary\ - \ lookup indices. If not specified, all words in the vocabulary will be\ - \ used. Defaults to None.\n frequency_threshold: Limit the dictionary's\ - \ vocabulary only to words whose number of occurrences in the input exceeds\ - \ frequency_threshold. If not specified, all words in the vocabulary will\ - \ be included. If both top_k and frequency_threshold are specified, a\ - \ word must satisfy both conditions to be included. Defaults to None.\n\ - \ separator: Separator to split input string into tokens. Defaults\ - \ to ' '.\nMaxAbsScale: Performs maximum absolute scaling on a numeric\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - : \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\":\ - \ [\"col1_max_abs_scaled\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to perform max-abs-scale on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\nCustom: Transformations defined in tf_custom_transformation_definitions\ - \ are included here in the TensorFlow-based transformation configuration.\ - \ For example, given the following tf_custom_transformation_definitions:\ - \ .. code-block:: python [ { \"transformation\": \"PlusX\", \"module_path\"\ - : \"gs://bucket/custom_transform_fn.py\", \"function_name\": \"plus_one_transform\"\ - \ } ] We can include the following transformation: .. code-block:: python\ - \ { \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"], \"\ - output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note that input_columns\ - \ must still be included in our arguments and output_columns is optional.\ - \ All other arguments are those defined in custom_transform_fn.py, which\ - \ includes `\"x\"` in this case. See tf_custom_transformation_definitions\ - \ above. legacy_transformations_path (Optional[str]) Deprecated. Prefer\ - \ tf_auto_transform_features. Path to a GCS file containing JSON string\ - \ for legacy style transformations. Note that legacy_transformations_path\ - \ and tf_auto_transform_features cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The ranking of features, all features supported in the dataset - will be included. For "AMI" algorithm, array features won't be available - in the ranking as arrays are not supported yet. - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: BigQuery URI for the downsampled test split to pass to the - batch prediction component during batch explain. - parameterType: STRING - bigquery_test_split_uri: - description: BigQuery URI for the test split to pass to the batch prediction - component during evaluation. - parameterType: STRING - bigquery_train_split_uri: - description: BigQuery URI for the train split to pass to the batch prediction - component during distillation. - parameterType: STRING - bigquery_validation_split_uri: - description: BigQuery URI for the validation split to pass to the batch - prediction component during distillation. - parameterType: STRING - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - split_example_counts: - description: JSON string of data split example counts for train, validate, - and test splits. - parameterType: STRING - comp-finalize-eval-quantile-parameters: - executorLabel: exec-finalize-eval-quantile-parameters - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-finalize-eval-quantile-parameters-2: - executorLabel: exec-finalize-eval-quantile-parameters-2 - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-get-or-create-model-description: - executorLabel: exec-get-or-create-model-description - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-or-create-model-description-2: - executorLabel: exec-get-or-create-model-description-2 - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri: - executorLabel: exec-get-prediction-image-uri - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri-2: - executorLabel: exec-get-prediction-image-uri-2 - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column: - executorLabel: exec-get-predictions-column - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column-2: - executorLabel: exec-get-predictions-column-2 - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - `unmanaged_container_model` must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - `prediction__` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - `predictions`, and `errors`. If the Model has both `instance` - - and `prediction` schemata defined then the tables have columns as - - follows: The `predictions` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The `errors` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has [google.rpc.Status](Status) - - represented as a STRUCT, and containing only `code` and - - `message`. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - key_field is not specified. - - When `excluded_fields` is populated, `included_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - `prediction--`, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - `predictions_0001.`, `predictions_0002.`, - - ..., `predictions_N.` are created where `` - - depends on chosen `predictions_format`, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both `instance` and `prediction` schemata defined - - then each such file contains predictions as per the - - `predictions_format`. If prediction for any instance failed - - (partially or completely), then an additional - - `errors_0001.`, `errors_0002.`,..., - - `errors_N.` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional `error` field which as - - value has `google.rpc.Status` containing only `code` and - - `message` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: 'Google Cloud Storage URI(-s) to your instances to run batch - prediction - - on. They must match `instances_format`. May contain wildcards. For more - - information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). - - For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If `instance_type` is `array`, the order of field names in - - `included_fields` also determines the order of the values in the array. - - When `included_fields` is populated, `excluded_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model\naccepts. Vertex\ - \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ - to the specified format. Supported values are:\n`object`: Each input is\ - \ converted to JSON object format.\n * For `bigquery`, each row is converted\ - \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ - \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ - \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ - \ * For `bigquery`, each row is converted to an array. The order\n \ - \ of columns is determined by the BigQuery column order, unless\n \ - \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ - \ is populated.\n `included_fields` must be populated for specifying\ - \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ - \ object,\n `included_fields` must be populated for specifying field\ - \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ - \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ - \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ - \ is the same as `array`. The\n order of columns is the same as defined\ - \ in the file or table, unless\n included_fields is populated.\n * For\ - \ `jsonl`, the prediction instance format is determined by\n each line\ - \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ - \ be converted to\n an object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ record.\n * For `file-list`, each file in the list will be converted\ - \ to an\n object in the format of `{\"b64\": }`, where ``\ - \ is\n the Base64-encoded string of the content of the file." - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: 'The format in which instances are - - given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s - supportedInputStorageFormats. - - For more details about this input config, see - - [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ - \ In addition,\nthe batch prediction output will not include the instances.\ - \ Instead the\noutput will only include the value of the key field, in\ - \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ - \ output will have a `key` field\n instead of the `instance` field.\n\ - \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ - \ column instead of the instance feature columns.\nThe input must be\ - \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: Location for creating the BatchPredictionJob. - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: 'The format in which Vertex AI gives the predictions. Must - be one of the - - Model''s supportedOutputStorageFormats. - - For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' - isOptional: true - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - description: Project to create the BatchPredictionJob. Defaults to the project - in which the PipelineJob is run. - isOptional: true - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - `unmanaged_container_model` must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - `prediction__` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - `predictions`, and `errors`. If the Model has both `instance` - - and `prediction` schemata defined then the tables have columns as - - follows: The `predictions` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The `errors` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has [google.rpc.Status](Status) - - represented as a STRUCT, and containing only `code` and - - `message`. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - key_field is not specified. - - When `excluded_fields` is populated, `included_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - `prediction--`, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - `predictions_0001.`, `predictions_0002.`, - - ..., `predictions_N.` are created where `` - - depends on chosen `predictions_format`, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both `instance` and `prediction` schemata defined - - then each such file contains predictions as per the - - `predictions_format`. If prediction for any instance failed - - (partially or completely), then an additional - - `errors_0001.`, `errors_0002.`,..., - - `errors_N.` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional `error` field which as - - value has `google.rpc.Status` containing only `code` and - - `message` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: 'Google Cloud Storage URI(-s) to your instances to run batch - prediction - - on. They must match `instances_format`. May contain wildcards. For more - - information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). - - For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If `instance_type` is `array`, the order of field names in - - `included_fields` also determines the order of the values in the array. - - When `included_fields` is populated, `excluded_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model\naccepts. Vertex\ - \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ - to the specified format. Supported values are:\n`object`: Each input is\ - \ converted to JSON object format.\n * For `bigquery`, each row is converted\ - \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ - \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ - \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ - \ * For `bigquery`, each row is converted to an array. The order\n \ - \ of columns is determined by the BigQuery column order, unless\n \ - \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ - \ is populated.\n `included_fields` must be populated for specifying\ - \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ - \ object,\n `included_fields` must be populated for specifying field\ - \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ - \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ - \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ - \ is the same as `array`. The\n order of columns is the same as defined\ - \ in the file or table, unless\n included_fields is populated.\n * For\ - \ `jsonl`, the prediction instance format is determined by\n each line\ - \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ - \ be converted to\n an object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ record.\n * For `file-list`, each file in the list will be converted\ - \ to an\n object in the format of `{\"b64\": }`, where ``\ - \ is\n the Base64-encoded string of the content of the file." - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: 'The format in which instances are - - given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s - supportedInputStorageFormats. - - For more details about this input config, see - - [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ - \ In addition,\nthe batch prediction output will not include the instances.\ - \ Instead the\noutput will only include the value of the key field, in\ - \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ - \ output will have a `key` field\n instead of the `instance` field.\n\ - \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ - \ column instead of the instance feature columns.\nThe input must be\ - \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: Location for creating the BatchPredictionJob. - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: 'The format in which Vertex AI gives the predictions. Must - be one of the - - Model''s supportedOutputStorageFormats. - - For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' - isOptional: true - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - description: Project to create the BatchPredictionJob. Defaults to the project - in which the PipelineJob is run. - isOptional: true - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation-forecasting: - executorLabel: exec-model-evaluation-forecasting - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-forecasting-2: - executorLabel: exec-model-evaluation-forecasting-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationClassificationOp component.' - isOptional: true - embedding_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The embedding metrics artifact generated from the - - embedding retrieval metrics component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'google.ForecastingMetrics artifact generated from - - the ModelEvaluationForecastingOp component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.QuestionAnsweringMetrics.' - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationRegressionOp component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.SummarizationMetrics.' - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.TextGenerationMetrics.' - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, `forecasting`, - - `text-generation`, `question-answering`, and `summarization` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - evaluation_resource_name: - parameterType: STRING - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationClassificationOp component.' - isOptional: true - embedding_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The embedding metrics artifact generated from the - - embedding retrieval metrics component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'google.ForecastingMetrics artifact generated from - - the ModelEvaluationForecastingOp component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.QuestionAnsweringMetrics.' - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationRegressionOp component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.SummarizationMetrics.' - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.TextGenerationMetrics.' - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, `forecasting`, - - `text-generation`, `question-answering`, and `summarization` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - evaluation_resource_name: - parameterType: STRING - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - stats_gen_execution_engine: - description: Execution engine used for stats gen in FTE. - parameterType: STRING - transformations: - description: forecasting transformations to append stats gen engine to. - parameterType: STRUCT - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - transformations: - parameterType: STRUCT - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Dataset stats generated by feature transform engine. - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Schema of input data to the tf_model at serving time. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: The names of the columns that are available at forecast time. - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: If probabilistic inference is enabled, the model will fit a - distribution that captures the uncertainty of a prediction. At inference - time, the predictive distribution is used to make a point prediction that - minimizes the optimization objective. For example, the mean of a predictive - distribution is the point prediction that minimizes RMSE loss. If quantiles - are specified, then the quantiles of the distribution are also returned. - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: Dict mapping auto and/or type-resolutions to feature columns. - The supported types are auto, categorical, numeric, text, and timestamp. - isOptional: true - parameterType: STRUCT - group_columns: - description: A list of time series attribute column names that define the - time series hierarchy. - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over both - the horizon and time series in the same hierarchy group. - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over time - series in the same group. - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: 'Objective function the model is optimizing towards. The training - process creates a model that maximizes/minimizes the value of the objective - function over the validation set. The supported optimization objectives - depend on the prediction type. If the field is not set, a default objective - function is used. classification: "maximize-au-roc" (default) - Maximize - the area under the receiver operating characteristic (ROC) curve. "minimize-log-loss" - - Minimize log loss. "maximize-au-prc" - Maximize the area under the precision-recall - curve. "maximize-precision-at-recall" - Maximize precision for a specified - recall value. "maximize-recall-at-precision" - Maximize recall for a specified - precision value. classification (multi-class): "minimize-log-loss" (default) - - Minimize log loss. regression: "minimize-rmse" (default) - Minimize - root-mean-squared error (RMSE). "minimize-mae" - Minimize mean-absolute - error (MAE). "minimize-rmsle" - Minimize root-mean-squared log error - (RMSLE).' - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: Required when optimization_objective is "maximize-recall-at-precision". - Must be between 0 and 1, inclusive. - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: Required when optimization_objective is "maximize-precision-at-recall". - Must be between 0 and 1, inclusive. - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: Model prediction type. One of "classification", "regression", - "time_series". - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: Whether the distillation should be applied to the training. - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: Whether we are running evaluation in the training pipeline. - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: JSON string of data split example counts for train, validate, - and test splits. - parameterType: STRING - stage_1_deadline_hours: - description: Stage 1 training budget in hours. - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: Stage 2 training budget in hours. - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over the - horizon for a single time series. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: The column that indicates the time. Used by forecasting only. - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: The column names of the time series attributes. - isOptional: true - parameterType: LIST - time_series_identifier_column: - description: '[Deprecated] The time series identifier column. Used by forecasting - only. Raises exception if used - use the "time_series_identifier_column" - field instead.' - isOptional: true - parameterType: STRING - time_series_identifier_columns: - defaultValue: [] - description: The list of time series identifier columns. Used by forecasting - only. - isOptional: true - parameterType: LIST - unavailable_at_forecast_columns: - defaultValue: [] - description: The names of the columns that are not available at forecast - time. - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-forecasting-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, - "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", - "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", - "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", - "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", - "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", - "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", - "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", - "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", - "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, - "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", - "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", - "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", - "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", - "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", - "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", - "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", - "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", - "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-2-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20240214_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_runner_mode - - '{{$.inputs.parameters[''force_runner_mode'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_runner_mode - - '{{$.inputs.parameters[''force_runner_mode'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", - "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' - - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' - - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-finalize-eval-quantile-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-finalize-eval-quantile-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-or-create-model-description: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n if original_description:\n\ - \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ - \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ - \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-or-create-model-description-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n if original_description:\n\ - \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ - \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ - \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-prediction-image-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20240214_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20240214_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20240214_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20240214_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-prediction-image-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20240214_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20240214_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20240214_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20240214_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-predictions-column: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-predictions-column-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-evaluation-forecasting: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-forecasting-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", - "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --evaluation_resource_name - - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", - "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --evaluation_resource_name - - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", - "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.17 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", - "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.17 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n stats_gen_execution_engine: str,\n transformations: dict,\n\ - ) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ('transformations', dict),\n ],\n):\n \"\"\"Get the\ - \ data source URI.\n\n Args:\n project: The GCP project that runs the\ - \ pipeline components.\n location: The GCP region that runs the pipeline\ - \ components.\n data_source_csv_filenames: The CSV GCS path when data\ - \ source is CSV.\n data_source_bigquery_table_path: The BigQuery table\ - \ when data source is BQ.\n vertex_dataset: The Vertex dataset when data\ - \ source is Vertex dataset.\n model_display_name: The uploaded model's\ - \ display name.\n stats_gen_execution_engine: Execution engine used for\ - \ stats gen in FTE.\n transformations: forecasting transformations to\ - \ append stats gen engine to.\n\n Returns:\n A named tuple of CSV or\ - \ BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n # TODO(b/261504514) Remove this handling when we use the FTE transform\ - \ config.\n transformations['stats_gen_execution_engine'] = stats_gen_execution_engine\n\ - \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ - \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ - \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ - \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ - \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ - \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ - \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ - \ return collections.namedtuple(\n 'Outputs',\n [\n \ - \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n 'transformations',\n ],\n\ - \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ - \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": - ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' - - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 -pipelineInfo: - description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. - name: temporal-fusion-transformer-forecasting -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - artifacts: - pipelinechannel--parent_model: - componentInputArtifact: parent_model - parameters: - pipelinechannel--available_at_forecast_columns: - componentInputParameter: available_at_forecast_columns - pipelinechannel--context_window: - componentInputParameter: context_window - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - componentInputParameter: feature_transform_engine_dataflow_disk_size_gb - pipelinechannel--feature_transform_engine_dataflow_machine_type: - componentInputParameter: feature_transform_engine_dataflow_machine_type - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - componentInputParameter: feature_transform_engine_dataflow_max_num_workers - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--group_columns: - componentInputParameter: group_columns - pipelinechannel--group_temporal_total_weight: - componentInputParameter: group_temporal_total_weight - pipelinechannel--group_total_weight: - componentInputParameter: group_total_weight - pipelinechannel--holiday_regions: - componentInputParameter: holiday_regions - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--model_display_name: - componentInputParameter: model_display_name - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-transformations: - taskOutputParameter: - outputParameterKey: transformations - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: stage_2_trainer_worker_pool_specs_override - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--temporal_total_weight: - componentInputParameter: temporal_total_weight - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_attribute_columns: - componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_columns: - componentInputParameter: time_series_identifier_columns - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--unavailable_at_forecast_columns: - componentInputParameter: unavailable_at_forecast_columns - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_predefined_column: - componentInputParameter: window_predefined_column - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - stats_gen_execution_engine: - runtimeValue: - constant: bigquery - transformations: - componentInputParameter: transformations - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Optional Vertex Model that this model is a version of. - isOptional: true - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - available_at_forecast_columns: - description: 'The columns that are available at the - - forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: 0.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: The full service account name. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: The dataflow subnetwork. - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: '`True` to enable dataflow public IPs.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluated_examples_bigquery_path: - defaultValue: '' - description: 'The bigquery dataset to write the - - predicted examples into for evaluation, in the format - - `bq://project.dataset`. Only necessary if evaluation is enabled.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 22.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 22.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the batch prediction - - job in evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The maximum count of replicas - - the batch prediction job can scale to.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'Number of replicas to use - - in the batch prediction cluster at startup time.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: The disk space in GB for dataflow. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the dataflow job in - - evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 25.0 - description: Maximum number of dataflow workers. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 22.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - feature_transform_engine_bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The full id of - - the feature transform engine staging dataset.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size of the - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_transform_engine_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type of - - the feature transform engine.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon: - defaultValue: 0.0 - description: The length of the horizon. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - description: 'A list of time series attribute column names that define the - - time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions - - aggregated over both the horizon and time series in the same hierarchy - - group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated over - - time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - holiday_regions: - description: 'The geographical regions where the holiday effect is - - applied in modeling.' - isOptional: true - parameterType: LIST - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_description: - defaultValue: '' - description: Optional description. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - description: Optional display name for model. - isOptional: true - parameterType: STRING - optimization_objective: - description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", - - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - - "minimize-quantile-loss".' - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: '`True` to evaluate the ensembled model on the test split.' - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 2 trainer worker pool spec.' - isOptional: true - parameterType: LIST - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated - - over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: The column that indicates the time. - parameterType: STRING - time_series_attribute_columns: - description: 'The columns that are invariant across the - - same time series.' - isOptional: true - parameterType: LIST - time_series_identifier_columns: - description: 'The columns that distinguish the different - - time series.' - parameterType: LIST - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Dict mapping auto and/or type-resolutions to feature - - columns. The supported types are: auto, categorical, numeric, text, and - - timestamp.' - parameterType: STRUCT - unavailable_at_forecast_columns: - description: 'The columns that are unavailable at the - - forecast time.' - isOptional: true - parameterType: LIST - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: 0.0 - description: The maximum number of windows that will be generated. - isOptional: true - parameterType: NUMBER_INTEGER - window_predefined_column: - defaultValue: '' - description: The column that indicate the start of each window. - isOptional: true - parameterType: STRING - window_stride_length: - defaultValue: 0.0 - description: The stride length to generate the window. - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml deleted file mode 100644 index c39b006295f..00000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +++ /dev/null @@ -1,7586 +0,0 @@ -# PIPELINE DEFINITION -# Name: time-series-dense-encoder-forecasting -# Description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. -# Inputs: -# available_at_forecast_columns: list -# context_window: int [Default: 0.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# enable_probabilistic_inference: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# evaluated_examples_bigquery_path: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 22.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# evaluation_batch_predict_max_replica_count: int [Default: 25.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] -# evaluation_dataflow_max_num_workers: int [Default: 25.0] -# evaluation_dataflow_starting_num_workers: int [Default: 22.0] -# fast_testing: bool [Default: False] -# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] -# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] -# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] -# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] -# forecast_horizon: int [Default: 0.0] -# group_columns: list -# group_temporal_total_weight: float [Default: 0.0] -# group_total_weight: float [Default: 0.0] -# holiday_regions: list -# location: str -# model_description: str [Default: ''] -# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] -# num_selected_trials: int [Default: 10.0] -# optimization_objective: str -# parent_model: system.Artifact -# predefined_split_key: str [Default: ''] -# project: str -# quantiles: list -# root_dir: str -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_trainer_worker_pool_specs_override: list -# study_spec_parameters_override: list -# target_column: str -# temporal_total_weight: float [Default: 0.0] -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_attribute_columns: list -# time_series_identifier_columns: list -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transformations: dict -# unavailable_at_forecast_columns: list -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# window_max_count: int [Default: 0.0] -# window_predefined_column: str [Default: ''] -# window_stride_length: int [Default: 0.0] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -components: - comp-automl-forecasting-ensemble: - executorLabel: exec-automl-forecasting-ensemble - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the instance schema, describing the input data - for the tf_model at serving time. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: URI of the Docker image to be used as the container for serving - predictions. This URI must identify an image in Artifact Registry or Container - Registry. - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - example_instance: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: An example instance which may be used as an input for predictions. - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-ensemble-2: - executorLabel: exec-automl-forecasting-ensemble-2 - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the instance schema, describing the input data - for the tf_model at serving time. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: URI of the Docker image to be used as the container for serving - predictions. This URI must identify an image in Artifact Registry or Container - Registry. - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - example_instance: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: An example instance which may be used as an input for predictions. - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-stage-1-tuner: - executorLabel: exec-automl-forecasting-stage-1-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: Number of hours the hyperparameter tuning should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the hyperparameter tuning. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: Number of selected trials. The number of weak learners in the - final model is 5 * num_selected_trials. - parameterType: NUMBER_INTEGER - project: - description: Project to run hyperparameter tuning. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible values: "regular" (default), - "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., [{"parameter_id": "activation","categorical_value_spec": - {"values": ["tanh"]}}]' - isOptional: true - parameterType: LIST - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-forecasting-stage-2-tuner: - executorLabel: exec-automl-forecasting-stage-2-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The forecasting example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path to the json of hyperparameter tuning results to use when - evaluating models. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Cloud region for running the component: us-central1).' - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: Number of selected trials. The number of weak learners in the - final model. - parameterType: NUMBER_INTEGER - project: - description: Project to run stage 2 tuner. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained (private) model artifact paths and their hyperparameters. - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - tasks: - automl-forecasting-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble - dependentTasks: - - automl-forecasting-stage-2-tuner - - get-prediction-image-uri - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-2-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble - automl-forecasting-stage-2-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-2-tuner - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input_path: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-2-tuner - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: true - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-forecasting-ensemble - - model-upload - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description - get-prediction-image-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri - inputs: - parameters: - model_type: - runtimeValue: - constant: tide - taskInfo: - name: get-prediction-image-uri - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: get-hyperparameter-tuning-results - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-forecasting-ensemble - - get-or-create-model-description - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - parent_model: - componentInputArtifact: pipelinechannel--parent_model - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - force_runner_mode: - runtimeValue: - constant: Dataflow - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - runtimeValue: - constant: forecasting - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - finalize-eval-quantile-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters - inputs: - parameters: - quantiles: - componentInputParameter: pipelinechannel--quantiles - taskInfo: - name: finalize-eval-quantile-parameters - get-predictions-column: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column - dependentTasks: - - finalize-eval-quantile-parameters - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: true - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: false - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation-forecasting: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting - dependentTasks: - - finalize-eval-quantile-parameters - - get-predictions-column - - model-batch-predict - - table-to-uri - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation-forecasting - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - model-batch-predict - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - use_bq_prefix: - runtimeValue: - constant: true - taskInfo: - name: table-to-uri - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - tasks: - automl-forecasting-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble-2 - dependentTasks: - - automl-forecasting-stage-1-tuner - - get-prediction-image-uri-2 - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-1-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri-2 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble-2 - automl-forecasting-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - reduce_search_space_mode: - runtimeValue: - constant: full - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-1-tuner - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: false - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-forecasting-ensemble-2 - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description-2 - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description-2 - get-prediction-image-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri-2 - inputs: - parameters: - model_type: - runtimeValue: - constant: tide - taskInfo: - name: get-prediction-image-uri-2 - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - dependentTasks: - - automl-forecasting-ensemble-2 - - get-or-create-model-description-2 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - parent_model: - componentInputArtifact: pipelinechannel--parent_model - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description-2 - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - force_runner_mode: - runtimeValue: - constant: Dataflow - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - runtimeValue: - constant: forecasting - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - finalize-eval-quantile-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters-2 - inputs: - parameters: - quantiles: - componentInputParameter: pipelinechannel--quantiles - taskInfo: - name: finalize-eval-quantile-parameters-2 - get-predictions-column-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: true - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: false - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-forecasting-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - - get-predictions-column-2 - - model-batch-predict-2 - - table-to-uri-2 - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters-2 - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column-2 - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-forecasting-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import-2 - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - use_bq_prefix: - runtimeValue: - constant: true - taskInfo: - name: table-to-uri-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - tasks: - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - forecasting_context_window: - componentInputParameter: pipelinechannel--context_window - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_holiday_regions: - componentInputParameter: pipelinechannel--holiday_regions - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_predefined_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_columns: - componentInputParameter: pipelinechannel--time_series_identifier_columns - forecasting_unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - location: - componentInputParameter: pipelinechannel--location - model_type: - runtimeValue: - constant: tide - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - stats_gen_execution_engine: - runtimeValue: - constant: bigquery - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--transformations - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: check-if-hyperparameter-tuning-results-are-supplied-by-user - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - context_window: - componentInputParameter: pipelinechannel--context_window - enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_model_type: - runtimeValue: - constant: tide - forecasting_transformations: - componentInputParameter: pipelinechannel--set-optional-inputs-transformations - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - prediction_type: - runtimeValue: - constant: time_series - quantiles: - componentInputParameter: pipelinechannel--quantiles - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_columns: - componentInputParameter: pipelinechannel--time_series_identifier_columns - unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - artifacts: - pipelinechannel--parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--available_at_forecast_columns: - parameterType: LIST - pipelinechannel--context_window: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_transform_engine_dataflow_machine_type: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--group_columns: - parameterType: LIST - pipelinechannel--group_temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--group_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--holiday_regions: - parameterType: LIST - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-transformations: - parameterType: STRUCT - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_attribute_columns: - parameterType: LIST - pipelinechannel--time_series_identifier_columns: - parameterType: LIST - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transformations: - parameterType: STRUCT - pipelinechannel--unavailable_at_forecast_columns: - parameterType: LIST - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_predefined_column: - parameterType: STRING - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size_gb: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - force_runner_mode: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size_gb: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - force_runner_mode: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: Dataset in "projectId.datasetId" format for storing intermediate-FTE - BigQuery tables. If the specified dataset does not exist in BigQuery, - FTE will create the dataset. If no bigquery_staging_full_dataset_id is - specified, all intermediate tables will be stored in a dataset created - under the provided project in the input data source's location during - FTE execution called "vertex_feature_transform_engine_staging_{location.replace('-', - '_')}". All tables generated by FTE will have a 30 day TTL. - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: BigQuery input data source to run feature transform on. - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: CSV input data source to run feature transform on. - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: The disk size, in gigabytes, to use on each Dataflow worker - instance. If not set, default to 40. - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: The machine type used for dataflow jobs. If not set, default - to n1-standard-16. - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: The number of workers to run the dataflow job. If not set, - default to 25. - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: Custom service account to run Dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty the - default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: Specifies whether Dataflow workers use public IP addresses. - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: 'List of dataset-level custom transformation definitions. Custom, - bring-your-own dataset-level transform functions, where users can define - and import their own transform function and use it with FTE''s built-in - transformations. Using custom transformations is an experimental feature - and it is currently not supported during batch prediction. - - [ { "transformation": "ConcatCols", "module_path": "/path/to/custom_transform_fn_dlt.py", - "function_name": "concat_cols" } ] Using custom transform function together - with FTE''s built-in transformations: .. code-block:: python [ { "transformation": - "Join", "right_table_uri": "bq://test-project.dataset_test.table", "join_keys": - [["join_key_col", "join_key_col"]] },{ "transformation": "ConcatCols", - "cols": ["feature_1", "feature_2"], "output_col": "feature_1_2" } ]' - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level transformations.\n[ { \"transformation\"\ - : \"Join\", \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - , \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }, ... ] Additional\ - \ information about FTE's currently supported built-in\n transformations:\n\ - \ Join: Joins features from right_table_uri. For each join key, the\ - \ left table keys will be included and the right table keys will be dropped.\n\ - \ Example: .. code-block:: python { \"transformation\": \"Join\"\ - , \"right_table_uri\": \"bq://test-project.dataset_test.table\", \"join_keys\"\ - : [[\"join_key_col\", \"join_key_col\"]] }\n Arguments:\n \ - \ right_table_uri: Right table BigQuery uri to join with input_full_table_id.\n\ - \ join_keys: Features to join on. For each nested list, the\ - \ first element is a left table column and the second is its corresponding\ - \ right table column.\n TimeAggregate: Creates a new feature composed\ - \ of values of an existing feature from a fixed time period ago or in\ - \ the future.\n Ex: A feature for sales by store 1 year ago.\n \ - \ Example: .. code-block:: python { \"transformation\": \"TimeAggregate\"\ - , \"time_difference\": 40, \"time_difference_units\": \"DAY\", \"time_series_identifier_columns\"\ - : [\"store_id\"], \"time_column\": \"time_col\", \"time_difference_target_column\"\ - : \"target_col\", \"output_column\": \"output_col\" }\n Arguments:\n\ - \ time_difference: Number of time_difference_units to look\ - \ back or into the future on our time_difference_target_column.\n \ - \ time_difference_units: Units of time_difference to look back\ - \ or into the future on our time_difference_target_column. Must be one\ - \ of * 'DAY' * 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER' * 'YEAR'\n\ - \ time_series_identifier_columns: Names of the time series\ - \ identifier columns.\n time_column: Name of the time column.\n\ - \ time_difference_target_column: Column we wish to get the\ - \ value of time_difference time_difference_units in the past or future.\n\ - \ output_column: Name of our new time aggregate feature.\n\ - \ is_future: Whether we wish to look forward in time. Defaults\ - \ to False. PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\ - \ Performs a partition by reduce operation (one of max, min, avg, or sum)\ - \ with a fixed historic time period. Ex: Getting avg sales (the reduce\ - \ column) for each store (partition_by_column) over the previous 5 days\ - \ (time_column, time_ago_units, and time_ago).\n Example: .. code-block::\ - \ python { \"transformation\": \"PartitionByMax\", \"reduce_column\"\ - : \"sell_price\", \"partition_by_columns\": [\"store_id\", \"state_id\"\ - ], \"time_column\": \"date\", \"time_ago\": 1, \"time_ago_units\": \"\ - WEEK\", \"output_column\": \"partition_by_reduce_max_output\" }\n \ - \ Arguments:\n reduce_column: Column to apply the reduce\ - \ operation on. Reduce operations include the\n following:\ - \ Max, Min, Avg, Sum.\n partition_by_columns: List of columns\ - \ to partition by.\n time_column: Time column for the partition\ - \ by operation's window function.\n time_ago: Number of time_ago_units\ - \ to look back on our target_column, starting from time_column (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on our target_column.\ - \ Must be one of * 'DAY' * 'WEEK'\n output_column: Name of\ - \ our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature selection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\". The algorithms available\ - \ are: AMI(Adjusted Mutual Information):\nReference: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional Mutual\ - \ Information Maximization): Reference paper: Mohamed Bennasar, Yulia\ - \ Hicks, Rossitza Setchi, \u201CFeature selection using Joint Mutual Information\ - \ Maximisation,\u201D Expert Systems with Applications, vol. 42, issue\ - \ 22, 1 December 2015, Pages 8520-8532. JMIM(Joint Mutual Information\ - \ Maximization\nReference:\n paper: Mohamed Bennasar, Yulia Hicks, Rossitza\ - \ Setchi, \u201CFeature selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert Systems with Applications, vol. 42, issue 22, 1 December 2015,\ - \ Pages 8520-8532. MRMR(MIQ Minimum-redundancy Maximum-relevance): Reference\ - \ paper: Hanchuan Peng, Fuhui Long, and Chris Ding. \"Feature selection\ - \ based on mutual information criteria of max-dependency, max-relevance,\ - \ and min-redundancy.\" IEEE Transactions on pattern analysis and machine\ - \ intelligence 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - feature_selection_execution_engine: - defaultValue: dataflow - description: Execution engine to run feature selection, value can be dataflow, - bigquery. - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: Forecasting available at forecast columns. - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the holiday effect - is applied in modeling by adding holiday categorical array feature that - include all holidays matching the date. This option only allowed when - data granularity is day. By default, holiday effect modeling is disabled. - To turn it on, specify the holiday region using this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: Forecasting time series attribute columns. - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - description: '[Deprecated] A forecasting time series identifier column. - Raises an exception if used - use the "time_series_identifier_column" - field instead.' - isOptional: true - parameterType: STRING - forecasting_time_series_identifier_columns: - defaultValue: [] - description: The list of forecasting time series identifier columns. - isOptional: true - parameterType: LIST - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: Forecasting unavailable at forecast columns. - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: The format to use for the materialized examples. Should be - either 'tfrecords_gzip' (default) or 'parquet'. - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: Maximum number of features to select. If specified, the transform - config will be purged by only using the selected features that ranked - top in the feature ranking, which has the ranking value for all supported - features. If the number of input features is smaller than max_selected_features - specified, we will still run the feature selection process and generate - the feature ranking, no features will be excluded. The value will be - set to 1000 by default if run_feature_selection is enabled. - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features for. Can be - one of: neural_network, boosted_trees, l2l, seq2seq, tft, or tide. Defaults - to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: List of multimodal image columns. Defaults to an empty list. - isOptional: true - parameterType: LIST - multimodal_tabular_columns: - defaultValue: [] - description: List of multimodal tabular columns. Defaults to an empty list - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: List of multimodal text columns. Defaults to an empty list - isOptional: true - parameterType: LIST - multimodal_timeseries_columns: - defaultValue: [] - description: List of multimodal timeseries columns. Defaults to an empty - list - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: Model prediction type. One of "classification", "regression", - "time_series". - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: (deprecated) Whether the distillation should be applied to - the training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether the feature selection should be applied to the dataset. - isOptional: true - parameterType: BOOLEAN - stats_gen_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform statistics generation. Can be - one of: "dataflow" (by default) or "bigquery". Using "bigquery" as the - execution engine is experimental.' - isOptional: true - parameterType: STRING - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to TF transform - features. FTE will automatically configure a set of built-in transformations - for each feature based on its data statistics. If users do not want auto - type resolution, but want the set of transformations for a given type - to be automatically generated, they may specify pre-resolved transformations - types. The following type hint dict keys are supported: * ''auto'' * ''categorical'' - * ''numeric'' * ''text'' * ''timestamp'' Example: `{ "auto": ["feature1"], - "categorical": ["feature2", "feature3"], }`. Note that the target and - weight column may not be included as an auto transformation unless users - are running forecasting.' - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: 'List of TensorFlow-based custom transformation definitions. Custom, - bring-your-own transform functions, where users can define and import - their own transform function and use it with FTE''s built-in transformations. - `[ { "transformation": "PlusOne", "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "plus_one_transform" }, { "transformation": "MultiplyTwo", - "module_path": "gs://bucket/custom_transform_fn.py", "function_name": - "multiply_two_transform" } ] Using custom transform function together - with FTE''s built-in transformations: .. code-block:: python [ { "transformation": - "CastToFloat", "input_columns": ["feature_1"], "output_columns": ["feature_1"] - },{ "transformation": "PlusOne", "input_columns": ["feature_1"] "output_columns": - ["feature_1_plused_one"] },{ "transformation": "MultiplyTwo", "input_columns": - ["feature_1"] "output_columns": ["feature_1_multiplied_two"] } ]' - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform row-level TF transformations. - Can be one of: "dataflow" (by default) or "bigquery". Using "bigquery" - as the execution engine is experimental and is for allowlisted customers - only. In addition, executing on "bigquery" only supports auto transformations - (i.e., specified by tf_auto_transform_features) and will raise an error - when tf_custom_transformation_definitions or tf_transformations_path is - set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based transformation configuration. Path\ - \ to a JSON file used to specified FTE's TF transformation configurations.\ - \ In the following, we provide some sample transform configurations to\ - \ demonstrate FTE's capabilities. All transformations on input columns\ - \ are explicitly specified with FTE's built-in transformations. Chaining\ - \ of multiple transformations on a single column is also supported. For\ - \ example: .. code-block:: python [ { \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, { \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]`. Additional information about\ - \ FTE's currently supported built-in\ntransformations:\nDatetime: Extracts\ - \ datetime featues from a column containing timestamp strings.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Datetime\", \"input_columns\"\ - : [\"feature_1\"], \"time_format\": \"%Y-%m-%d\" }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the datetime\ - \ transformation on.\n output_columns: Names of output columns,\ - \ one for each datetime_features element.\n time_format: Datetime\ - \ format string. Time format is a combination of Date + Time Delimiter\ - \ (optional) + Time (optional) directives. Valid date directives are as\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' # 2018/11/30 * '%y-%m-%d'\ - \ # 18-11-30 * '%y/%m/%d' # 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y'\ - \ # 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' # 11/30/18 * '%d-%m-%Y'\ - \ # 30-11-2018 * '%d/%m/%Y' # 30/11/2018 * '%d-%B-%Y' # 30-November-2018\ - \ * '%d-%m-%y' # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' # 30-November-18\ - \ * '%d%m%Y' # 30112018 * '%m%d%Y' # 11302018 * '%Y%m%d' # 20181130\ - \ Valid time delimiters are as follows * 'T' * ' ' Valid time directives\ - \ are as follows * '%H:%M' # 23:59 * '%H:%M:%S' #\n \ - \ 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456] * '%H:%M:%S.%f%z'\ - \ # 23:59:58[.123456]+0000 * '%H:%M:%S%z', # 23:59:58+0000\n \ - \ datetime_features: List of datetime features to be extract. Each entry\ - \ must be one of * 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK' * 'DAY_OF_YEAR'\ - \ * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR' * 'MINUTE' * 'SECOND' Defaults\ - \ to ['YEAR', 'MONTH', 'DAY', 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - Log: Performs the natural log on a numeric column.\n Example: .. code-block::\ - \ python { \"transformation\": \"Log\", \"input_columns\": [\"feature_1\"\ - ] }\n Arguments:\n input_columns: A list with a single column\ - \ to perform the log transformation on.\n output_columns: A list\ - \ with a single output column name, corresponding to the output of our\ - \ transformation.\nZScale: Performs Z-scale normalization on a numeric\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - : \"ZScale\", \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the z-scale\ - \ transformation on.\n output_columns: A list with a single output\ - \ column name, corresponding to the output of our transformation.\nVocabulary:\ - \ Converts strings to integers, where each unique string gets a unique\ - \ integer representation.\n Example: .. code-block:: python { \"\ - transformation\": \"Vocabulary\", \"input_columns\": [\"feature_1\"] }\n\ - \ Arguments:\n input_columns: A list with a single column to\ - \ perform the vocabulary transformation on.\n output_columns: A\ - \ list with a single output column name, corresponding to the output of\ - \ our transformation.\n top_k: Number of the most frequent words\ - \ in the vocabulary to use for generating dictionary lookup indices. If\ - \ not specified, all words in the vocabulary will be used. Defaults to\ - \ None.\n frequency_threshold: Limit the vocabulary only to words\ - \ whose number of occurrences in the input exceeds frequency_threshold.\ - \ If not specified, all words in the vocabulary will be included. If both\ - \ top_k and frequency_threshold are specified, a word must satisfy both\ - \ conditions to be included. Defaults to None.\nCategorical: Transforms\ - \ categorical columns to integer columns.\n Example: .. code-block::\ - \ python { \"transformation\": \"Categorical\", \"input_columns\": [\"\ - feature_1\"], \"top_k\": 10 }\n Arguments:\n input_columns:\ - \ A list with a single column to perform the categorical transformation\ - \ on.\n output_columns: A list with a single output column name,\ - \ corresponding to the output of our transformation.\n top_k: Number\ - \ of the most frequent words in the vocabulary to use for generating dictionary\ - \ lookup indices. If not specified, all words in the vocabulary will be\ - \ used.\n frequency_threshold: Limit the vocabulary only to words\ - \ whose number of occurrences in the input exceeds frequency_threshold.\ - \ If not specified, all words in the vocabulary will be included. If both\ - \ top_k and frequency_threshold are specified, a word must satisfy both\ - \ conditions to be included.\nReduce: Given a column where each entry\ - \ is a numeric array, reduces arrays according to our reduce_mode.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Reduce\"\ - , \"input_columns\": [\"feature_1\"], \"reduce_mode\": \"MEAN\", \"output_columns\"\ - : [\"feature_1_mean\"] }\n Arguments:\n input_columns: A list\ - \ with a single column to perform the reduce transformation on.\n \ - \ output_columns: A list with a single output column name, corresponding\ - \ to the output of our transformation.\n reduce_mode: One of *\ - \ 'MAX' * 'MIN' * 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k:\ - \ The number of last k elements when 'LAST_K' reduce mode is used. Defaults\ - \ to 1.\nSplitString: Given a column of strings, splits strings into token\ - \ arrays.\n Example: .. code-block:: python { \"transformation\"\ - : \"SplitString\", \"input_columns\": [\"feature_1\"], \"separator\":\ - \ \"$\" }\n Arguments:\n input_columns: A list with a single\ - \ column to perform the split string transformation on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\n separator: Separator to split input\ - \ string into tokens. Defaults to ' '.\n missing_token: Missing\ - \ token to use when no string is included. Defaults to ' _MISSING_ '.\n\ - NGram: Given a column of strings, splits strings into token arrays where\ - \ each token is an integer.\n Example: .. code-block:: python { \"\ - transformation\": \"NGram\", \"input_columns\": [\"feature_1\"], \"min_ngram_size\"\ - : 1, \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n \ - \ input_columns: A list with a single column to perform the n-gram\ - \ transformation on.\n output_columns: A list with a single output\ - \ column name, corresponding to the output of our transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must be a positive number\ - \ and <= max_ngram_size. Defaults to 1.\n max_ngram_size: Maximum\ - \ n-gram size. Must be a positive number and >= min_ngram_size. Defaults\ - \ to 2.\n top_k: Number of the most frequent words in the vocabulary\ - \ to use for generating dictionary lookup indices. If not specified, all\ - \ words in the vocabulary will be used. Defaults to None.\n frequency_threshold:\ - \ Limit the dictionary's vocabulary only to words whose number of occurrences\ - \ in the input exceeds frequency_threshold. If not specified, all words\ - \ in the vocabulary will be included. If both top_k and frequency_threshold\ - \ are specified, a word must satisfy both conditions to be included. Defaults\ - \ to None.\n separator: Separator to split input string into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use when no\ - \ string is included. Defaults to ' _MISSING_ '.\nClip: Given a numeric\ - \ column, clips elements such that elements < min_value are assigned min_value,\ - \ and elements > max_value are assigned max_value.\n Example: .. code-block::\ - \ python { \"transformation\": \"Clip\", \"input_columns\": [\"col1\"\ - ], \"output_columns\": [\"col1_clipped\"], \"min_value\": 1., \"max_value\"\ - : 10., }\n Arguments:\n input_columns: A list with a single\ - \ column to perform the n-gram transformation on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\n min_value: Number where all values below\ - \ min_value are set to min_value. If no min_value is provided, min clipping\ - \ will not occur. Defaults to None.\n max_value: Number where all\ - \ values above max_value are set to max_value If no max_value is provided,\ - \ max clipping will not occur. Defaults to None.\nMultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical array column.\n Example: ..\ - \ code-block:: python { \"transformation\": \"MultiHotEncoding\", \"\ - input_columns\": [\"col1\"], } The number of classes is determened by\ - \ the largest number included in the input if it is numeric or the total\ - \ number of unique values of the input if it is type str. If the input\ - \ is has type str and an element contians separator tokens, the input\ - \ will be split at separator indices, and the each element of the split\ - \ list will be considered a seperate class. For example,\n Input: \ - \ .. code-block:: python [ [\"foo bar\"], # Example 0 [\"foo\",\ - \ \"bar\"], # Example 1 [\"foo\"], # Example 2 [\"bar\"], \ - \ # Example 3 ] Output (with default separator=\" \"): .. code-block::\ - \ python [ [1, 1], # Example 0 [1, 1], # Example 1 [1,\ - \ 0], # Example 2 [0, 1], # Example 3 ]\n Arguments:\n\ - \ input_columns: A list with a single column to perform the multi-hot-encoding\ - \ on.\n output_columns: A list with a single output column name,\ - \ corresponding to the output of our transformation.\n top_k: Number\ - \ of the most frequent words in the vocabulary to use for generating dictionary\ - \ lookup indices. If not specified, all words in the vocabulary will be\ - \ used. Defaults to None.\n frequency_threshold: Limit the dictionary's\ - \ vocabulary only to words whose number of occurrences in the input exceeds\ - \ frequency_threshold. If not specified, all words in the vocabulary will\ - \ be included. If both top_k and frequency_threshold are specified, a\ - \ word must satisfy both conditions to be included. Defaults to None.\n\ - \ separator: Separator to split input string into tokens. Defaults\ - \ to ' '.\nMaxAbsScale: Performs maximum absolute scaling on a numeric\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - : \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\":\ - \ [\"col1_max_abs_scaled\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to perform max-abs-scale on.\n output_columns:\ - \ A list with a single output column name, corresponding to the output\ - \ of our transformation.\nCustom: Transformations defined in tf_custom_transformation_definitions\ - \ are included here in the TensorFlow-based transformation configuration.\ - \ For example, given the following tf_custom_transformation_definitions:\ - \ .. code-block:: python [ { \"transformation\": \"PlusX\", \"module_path\"\ - : \"gs://bucket/custom_transform_fn.py\", \"function_name\": \"plus_one_transform\"\ - \ } ] We can include the following transformation: .. code-block:: python\ - \ { \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"], \"\ - output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note that input_columns\ - \ must still be included in our arguments and output_columns is optional.\ - \ All other arguments are those defined in custom_transform_fn.py, which\ - \ includes `\"x\"` in this case. See tf_custom_transformation_definitions\ - \ above. legacy_transformations_path (Optional[str]) Deprecated. Prefer\ - \ tf_auto_transform_features. Path to a GCS file containing JSON string\ - \ for legacy style transformations. Note that legacy_transformations_path\ - \ and tf_auto_transform_features cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The ranking of features, all features supported in the dataset - will be included. For "AMI" algorithm, array features won't be available - in the ranking as arrays are not supported yet. - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: BigQuery URI for the downsampled test split to pass to the - batch prediction component during batch explain. - parameterType: STRING - bigquery_test_split_uri: - description: BigQuery URI for the test split to pass to the batch prediction - component during evaluation. - parameterType: STRING - bigquery_train_split_uri: - description: BigQuery URI for the train split to pass to the batch prediction - component during distillation. - parameterType: STRING - bigquery_validation_split_uri: - description: BigQuery URI for the validation split to pass to the batch - prediction component during distillation. - parameterType: STRING - gcp_resources: - description: GCP resources created by this component. For more details, - see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - parameterType: STRING - split_example_counts: - description: JSON string of data split example counts for train, validate, - and test splits. - parameterType: STRING - comp-finalize-eval-quantile-parameters: - executorLabel: exec-finalize-eval-quantile-parameters - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-finalize-eval-quantile-parameters-2: - executorLabel: exec-finalize-eval-quantile-parameters-2 - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-get-or-create-model-description: - executorLabel: exec-get-or-create-model-description - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-or-create-model-description-2: - executorLabel: exec-get-or-create-model-description-2 - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri: - executorLabel: exec-get-prediction-image-uri - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri-2: - executorLabel: exec-get-prediction-image-uri-2 - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column: - executorLabel: exec-get-predictions-column - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column-2: - executorLabel: exec-get-predictions-column-2 - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - `unmanaged_container_model` must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - `prediction__` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - `predictions`, and `errors`. If the Model has both `instance` - - and `prediction` schemata defined then the tables have columns as - - follows: The `predictions` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The `errors` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has [google.rpc.Status](Status) - - represented as a STRUCT, and containing only `code` and - - `message`. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - key_field is not specified. - - When `excluded_fields` is populated, `included_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - `prediction--`, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - `predictions_0001.`, `predictions_0002.`, - - ..., `predictions_N.` are created where `` - - depends on chosen `predictions_format`, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both `instance` and `prediction` schemata defined - - then each such file contains predictions as per the - - `predictions_format`. If prediction for any instance failed - - (partially or completely), then an additional - - `errors_0001.`, `errors_0002.`,..., - - `errors_N.` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional `error` field which as - - value has `google.rpc.Status` containing only `code` and - - `message` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: 'Google Cloud Storage URI(-s) to your instances to run batch - prediction - - on. They must match `instances_format`. May contain wildcards. For more - - information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). - - For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If `instance_type` is `array`, the order of field names in - - `included_fields` also determines the order of the values in the array. - - When `included_fields` is populated, `excluded_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model\naccepts. Vertex\ - \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ - to the specified format. Supported values are:\n`object`: Each input is\ - \ converted to JSON object format.\n * For `bigquery`, each row is converted\ - \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ - \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ - \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ - \ * For `bigquery`, each row is converted to an array. The order\n \ - \ of columns is determined by the BigQuery column order, unless\n \ - \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ - \ is populated.\n `included_fields` must be populated for specifying\ - \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ - \ object,\n `included_fields` must be populated for specifying field\ - \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ - \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ - \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ - \ is the same as `array`. The\n order of columns is the same as defined\ - \ in the file or table, unless\n included_fields is populated.\n * For\ - \ `jsonl`, the prediction instance format is determined by\n each line\ - \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ - \ be converted to\n an object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ record.\n * For `file-list`, each file in the list will be converted\ - \ to an\n object in the format of `{\"b64\": }`, where ``\ - \ is\n the Base64-encoded string of the content of the file." - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: 'The format in which instances are - - given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s - supportedInputStorageFormats. - - For more details about this input config, see - - [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ - \ In addition,\nthe batch prediction output will not include the instances.\ - \ Instead the\noutput will only include the value of the key field, in\ - \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ - \ output will have a `key` field\n instead of the `instance` field.\n\ - \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ - \ column instead of the instance feature columns.\nThe input must be\ - \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: Location for creating the BatchPredictionJob. - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: 'The format in which Vertex AI gives the predictions. Must - be one of the - - Model''s supportedOutputStorageFormats. - - For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' - isOptional: true - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - description: Project to create the BatchPredictionJob. Defaults to the project - in which the PipelineJob is run. - isOptional: true - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - `unmanaged_container_model` must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - `prediction__` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - `predictions`, and `errors`. If the Model has both `instance` - - and `prediction` schemata defined then the tables have columns as - - follows: The `predictions` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The `errors` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has [google.rpc.Status](Status) - - represented as a STRUCT, and containing only `code` and - - `message`. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - key_field is not specified. - - When `excluded_fields` is populated, `included_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - `prediction--`, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - `predictions_0001.`, `predictions_0002.`, - - ..., `predictions_N.` are created where `` - - depends on chosen `predictions_format`, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both `instance` and `prediction` schemata defined - - then each such file contains predictions as per the - - `predictions_format`. If prediction for any instance failed - - (partially or completely), then an additional - - `errors_0001.`, `errors_0002.`,..., - - `errors_N.` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional `error` field which as - - value has `google.rpc.Status` containing only `code` and - - `message` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: 'Google Cloud Storage URI(-s) to your instances to run batch - prediction - - on. They must match `instances_format`. May contain wildcards. For more - - information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). - - For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If `instance_type` is `array`, the order of field names in - - `included_fields` also determines the order of the values in the array. - - When `included_fields` is populated, `excluded_fields` must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model\naccepts. Vertex\ - \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ - to the specified format. Supported values are:\n`object`: Each input is\ - \ converted to JSON object format.\n * For `bigquery`, each row is converted\ - \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ - \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ - \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ - \ * For `bigquery`, each row is converted to an array. The order\n \ - \ of columns is determined by the BigQuery column order, unless\n \ - \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ - \ is populated.\n `included_fields` must be populated for specifying\ - \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ - \ object,\n `included_fields` must be populated for specifying field\ - \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ - \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ - \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ - \ is the same as `array`. The\n order of columns is the same as defined\ - \ in the file or table, unless\n included_fields is populated.\n * For\ - \ `jsonl`, the prediction instance format is determined by\n each line\ - \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ - \ be converted to\n an object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ record.\n * For `file-list`, each file in the list will be converted\ - \ to an\n object in the format of `{\"b64\": }`, where ``\ - \ is\n the Base64-encoded string of the content of the file." - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: 'The format in which instances are - - given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s - supportedInputStorageFormats. - - For more details about this input config, see - - [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ - \ In addition,\nthe batch prediction output will not include the instances.\ - \ Instead the\noutput will only include the value of the key field, in\ - \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ - \ output will have a `key` field\n instead of the `instance` field.\n\ - \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ - \ column instead of the instance feature columns.\nThe input must be\ - \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: Location for creating the BatchPredictionJob. - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: 'The format in which Vertex AI gives the predictions. Must - be one of the - - Model''s supportedOutputStorageFormats. - - For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' - isOptional: true - parameterType: STRING - project: - defaultValue: '{{$.pipeline_google_cloud_project_id}}' - description: Project to create the BatchPredictionJob. Defaults to the project - in which the PipelineJob is run. - isOptional: true - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation-forecasting: - executorLabel: exec-model-evaluation-forecasting - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-forecasting-2: - executorLabel: exec-model-evaluation-forecasting-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationClassificationOp component.' - isOptional: true - embedding_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The embedding metrics artifact generated from the - - embedding retrieval metrics component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'google.ForecastingMetrics artifact generated from - - the ModelEvaluationForecastingOp component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.QuestionAnsweringMetrics.' - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationRegressionOp component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.SummarizationMetrics.' - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.TextGenerationMetrics.' - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, `forecasting`, - - `text-generation`, `question-answering`, and `summarization` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - evaluation_resource_name: - parameterType: STRING - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationClassificationOp component.' - isOptional: true - embedding_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The embedding metrics artifact generated from the - - embedding retrieval metrics component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'google.ForecastingMetrics artifact generated from - - the ModelEvaluationForecastingOp component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.QuestionAnsweringMetrics.' - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics artifact generated from - - the ModelEvaluationRegressionOp component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.SummarizationMetrics.' - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'system.Metrics artifact generated from - - the LLMEvaluationTextGenerationOp component. Subject to change to - - google.TextGenerationMetrics.' - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, `forecasting`, - - `text-generation`, `question-answering`, and `summarization` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - evaluation_resource_name: - parameterType: STRING - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - stats_gen_execution_engine: - description: Execution engine used for stats gen in FTE. - parameterType: STRING - transformations: - description: forecasting transformations to append stats gen engine to. - parameterType: STRUCT - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - transformations: - parameterType: STRUCT - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Dataset stats generated by feature transform engine. - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Schema of input data to the tf_model at serving time. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: The names of the columns that are available at forecast time. - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: If probabilistic inference is enabled, the model will fit a - distribution that captures the uncertainty of a prediction. At inference - time, the predictive distribution is used to make a point prediction that - minimizes the optimization objective. For example, the mean of a predictive - distribution is the point prediction that minimizes RMSE loss. If quantiles - are specified, then the quantiles of the distribution are also returned. - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: Dict mapping auto and/or type-resolutions to feature columns. - The supported types are auto, categorical, numeric, text, and timestamp. - isOptional: true - parameterType: STRUCT - group_columns: - description: A list of time series attribute column names that define the - time series hierarchy. - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over both - the horizon and time series in the same hierarchy group. - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over time - series in the same group. - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: 'Objective function the model is optimizing towards. The training - process creates a model that maximizes/minimizes the value of the objective - function over the validation set. The supported optimization objectives - depend on the prediction type. If the field is not set, a default objective - function is used. classification: "maximize-au-roc" (default) - Maximize - the area under the receiver operating characteristic (ROC) curve. "minimize-log-loss" - - Minimize log loss. "maximize-au-prc" - Maximize the area under the precision-recall - curve. "maximize-precision-at-recall" - Maximize precision for a specified - recall value. "maximize-recall-at-precision" - Maximize recall for a specified - precision value. classification (multi-class): "minimize-log-loss" (default) - - Minimize log loss. regression: "minimize-rmse" (default) - Minimize - root-mean-squared error (RMSE). "minimize-mae" - Minimize mean-absolute - error (MAE). "minimize-rmsle" - Minimize root-mean-squared log error - (RMSLE).' - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: Required when optimization_objective is "maximize-recall-at-precision". - Must be between 0 and 1, inclusive. - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: Required when optimization_objective is "maximize-precision-at-recall". - Must be between 0 and 1, inclusive. - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: Model prediction type. One of "classification", "regression", - "time_series". - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: Whether the distillation should be applied to the training. - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: Whether we are running evaluation in the training pipeline. - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: JSON string of data split example counts for train, validate, - and test splits. - parameterType: STRING - stage_1_deadline_hours: - description: Stage 1 training budget in hours. - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: Stage 2 training budget in hours. - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: The weight of the loss for predictions aggregated over the - horizon for a single time series. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: The column that indicates the time. Used by forecasting only. - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: The column names of the time series attributes. - isOptional: true - parameterType: LIST - time_series_identifier_column: - description: '[Deprecated] The time series identifier column. Used by forecasting - only. Raises exception if used - use the "time_series_identifier_column" - field instead.' - isOptional: true - parameterType: STRING - time_series_identifier_columns: - defaultValue: [] - description: The list of time series identifier columns. Used by forecasting - only. - isOptional: true - parameterType: LIST - unavailable_at_forecast_columns: - defaultValue: [] - description: The names of the columns that are not available at forecast - time. - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-forecasting-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, - "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", - "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", - "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", - "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", - "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", - "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", - "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", - "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", - "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, - "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", - "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", - "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", - "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", - "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", - "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", - "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", - "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", - "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-2-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20240214_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_runner_mode - - '{{$.inputs.parameters[''force_runner_mode'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_runner_mode - - '{{$.inputs.parameters[''force_runner_mode'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", - "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' - - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' - - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-finalize-eval-quantile-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-finalize-eval-quantile-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-or-create-model-description: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n if original_description:\n\ - \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ - \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ - \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-or-create-model-description-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n if original_description:\n\ - \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ - \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ - \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-prediction-image-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20240214_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20240214_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20240214_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20240214_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-prediction-image-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20240214_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20240214_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20240214_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20240214_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-predictions-column: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-get-predictions-column-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-evaluation-forecasting: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-forecasting-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", - "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --evaluation_resource_name - - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", - "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --evaluation_resource_name - - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", - "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.17 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", - "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.17 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n stats_gen_execution_engine: str,\n transformations: dict,\n\ - ) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ('transformations', dict),\n ],\n):\n \"\"\"Get the\ - \ data source URI.\n\n Args:\n project: The GCP project that runs the\ - \ pipeline components.\n location: The GCP region that runs the pipeline\ - \ components.\n data_source_csv_filenames: The CSV GCS path when data\ - \ source is CSV.\n data_source_bigquery_table_path: The BigQuery table\ - \ when data source is BQ.\n vertex_dataset: The Vertex dataset when data\ - \ source is Vertex dataset.\n model_display_name: The uploaded model's\ - \ display name.\n stats_gen_execution_engine: Execution engine used for\ - \ stats gen in FTE.\n transformations: forecasting transformations to\ - \ append stats gen engine to.\n\n Returns:\n A named tuple of CSV or\ - \ BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n # TODO(b/261504514) Remove this handling when we use the FTE transform\ - \ config.\n transformations['stats_gen_execution_engine'] = stats_gen_execution_engine\n\ - \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ - \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ - \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ - \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ - \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ - \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ - \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ - \ return collections.namedtuple(\n 'Outputs',\n [\n \ - \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n 'transformations',\n ],\n\ - \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ - \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": - ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' - - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325 -pipelineInfo: - description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. - name: time-series-dense-encoder-forecasting -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - artifacts: - pipelinechannel--parent_model: - componentInputArtifact: parent_model - parameters: - pipelinechannel--available_at_forecast_columns: - componentInputParameter: available_at_forecast_columns - pipelinechannel--context_window: - componentInputParameter: context_window - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - componentInputParameter: feature_transform_engine_dataflow_disk_size_gb - pipelinechannel--feature_transform_engine_dataflow_machine_type: - componentInputParameter: feature_transform_engine_dataflow_machine_type - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - componentInputParameter: feature_transform_engine_dataflow_max_num_workers - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--group_columns: - componentInputParameter: group_columns - pipelinechannel--group_temporal_total_weight: - componentInputParameter: group_temporal_total_weight - pipelinechannel--group_total_weight: - componentInputParameter: group_total_weight - pipelinechannel--holiday_regions: - componentInputParameter: holiday_regions - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--model_display_name: - componentInputParameter: model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: num_selected_trials - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--quantiles: - componentInputParameter: quantiles - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-transformations: - taskOutputParameter: - outputParameterKey: transformations - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: stage_2_trainer_worker_pool_specs_override - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--temporal_total_weight: - componentInputParameter: temporal_total_weight - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_attribute_columns: - componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_columns: - componentInputParameter: time_series_identifier_columns - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--unavailable_at_forecast_columns: - componentInputParameter: unavailable_at_forecast_columns - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_predefined_column: - componentInputParameter: window_predefined_column - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - stats_gen_execution_engine: - runtimeValue: - constant: bigquery - transformations: - componentInputParameter: transformations - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Vertex model to upload the model as a version to. - isOptional: true - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - available_at_forecast_columns: - description: 'The columns that are available at the - - forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: 0.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: The full service account name. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: The dataflow subnetwork. - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: '`True` to enable dataflow public IPs.' - isOptional: true - parameterType: BOOLEAN - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is enabled, the - - model will fit a distribution that captures the uncertainty of a - - prediction. If quantiles are specified, then the quantiles of the - - distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluated_examples_bigquery_path: - defaultValue: '' - description: 'The bigquery dataset to write the - - predicted examples into for evaluation, in the format - - `bq://project.dataset`. Only necessary if evaluation is enabled.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 22.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 22.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the batch prediction - - job in evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The maximum count of replicas - - the batch prediction job can scale to.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'Number of replicas to use - - in the batch prediction cluster at startup time.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: The disk space in GB for dataflow. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the dataflow job in - - evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 25.0 - description: Maximum number of dataflow workers. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 22.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - feature_transform_engine_bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The full id of - - the feature transform engine staging dataset.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size of the - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_transform_engine_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type of - - the feature transform engine.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon: - defaultValue: 0.0 - description: The length of the horizon. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - description: 'A list of time series attribute column names that define the - - time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions - - aggregated over both the horizon and time series in the same hierarchy - - group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated over - - time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - holiday_regions: - description: 'The geographical regions where the holiday effect is - - applied in modeling.' - isOptional: true - parameterType: LIST - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_description: - defaultValue: '' - description: Optional description. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - description: Optional display name for model. - isOptional: true - parameterType: STRING - num_selected_trials: - defaultValue: 10.0 - description: Number of selected trails. - isOptional: true - parameterType: NUMBER_INTEGER - optimization_objective: - description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", - - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - - "minimize-quantile-loss".' - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - quantiles: - description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles - - are allowed of values between 0 and 1, exclusive. Represents the quantiles - - to use for that objective. Quantiles must be unique.' - isOptional: true - parameterType: LIST - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: '`True` to evaluate the ensembled model on the test split.' - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 2 trainer worker pool spec.' - isOptional: true - parameterType: LIST - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated - - over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: The column that indicates the time. - parameterType: STRING - time_series_attribute_columns: - description: 'The columns that are invariant across the - - same time series.' - isOptional: true - parameterType: LIST - time_series_identifier_columns: - description: 'The columns that distinguish the different - - time series.' - parameterType: LIST - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Dict mapping auto and/or type-resolutions to feature - - columns. The supported types are: auto, categorical, numeric, text, and - - timestamp.' - parameterType: STRUCT - unavailable_at_forecast_columns: - description: 'The columns that are unavailable at the - - forecast time.' - isOptional: true - parameterType: LIST - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: 0.0 - description: The maximum number of windows that will be generated. - isOptional: true - parameterType: NUMBER_INTEGER - window_predefined_column: - defaultValue: '' - description: The column that indicate the start of each window. - isOptional: true - parameterType: STRING - window_stride_length: - defaultValue: 0.0 - description: The stride length to generate the window. - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py index 553d4f7f134..31610deb9bd 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py @@ -1,929 +1,11 @@ """Util functions for Vertex Forecasting pipelines.""" -import logging import os import pathlib -from typing import Any, Dict, FrozenSet, List, Optional, Tuple +from typing import Any, Dict, Tuple _GCPC_FORECASTING_PATH = pathlib.Path(__file__).parent.resolve() -_RETAIL_MODEL_DISABLED_OPTIONS = frozenset([ - 'quantiles', - 'enable_probabilistic_inference', -]) - - -def _get_base_forecasting_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_columns: List[str], - time_series_identifier_column: Optional[str] = None, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - enable_probabilistic_inference: bool = False, - quantiles: Optional[List[float]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, - group_columns: Optional[List[str]] = None, - group_total_weight: float = 0.0, - temporal_total_weight: float = 0.0, - group_temporal_total_weight: float = 0.0, - fields_to_exclude: FrozenSet[str] = frozenset(), -) -> Dict[str, Any]: - """Formats a set of parameters common across Vertex forecasting pipelines.""" - if not study_spec_parameters_override: - study_spec_parameters_override = [] - if not stage_1_tuner_worker_pool_specs_override: - stage_1_tuner_worker_pool_specs_override = [] - if not stage_2_trainer_worker_pool_specs_override: - stage_2_trainer_worker_pool_specs_override = [] - - if time_series_identifier_column: - logging.warning( - 'Deprecation warning: `time_series_identifier_column` will soon be' - ' deprecated in favor of `time_series_identifier_columns`. Please' - ' migrate workloads to use the new field.' - ) - time_series_identifier_columns = [time_series_identifier_column] - - parameter_values = {} - parameters = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'dataflow_service_account': dataflow_service_account, - 'evaluated_examples_bigquery_path': evaluated_examples_bigquery_path, - 'target_column': target_column, - 'optimization_objective': optimization_objective, - 'transformations': transformations, - 'train_budget_milli_node_hours': train_budget_milli_node_hours, - 'time_column': time_column, - 'time_series_identifier_columns': time_series_identifier_columns, - 'time_series_attribute_columns': time_series_attribute_columns, - 'available_at_forecast_columns': available_at_forecast_columns, - 'unavailable_at_forecast_columns': unavailable_at_forecast_columns, - 'forecast_horizon': forecast_horizon, - 'context_window': context_window, - 'window_predefined_column': window_predefined_column, - 'window_stride_length': window_stride_length, - 'window_max_count': window_max_count, - 'holiday_regions': holiday_regions, - 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, - 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, - 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, - 'num_selected_trials': num_selected_trials, - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'predefined_split_key': predefined_split_key, - 'timestamp_split_key': timestamp_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'weight_column': weight_column, - 'dataflow_subnetwork': dataflow_subnetwork, - 'feature_transform_engine_dataflow_machine_type': ( - feature_transform_engine_dataflow_machine_type - ), - 'feature_transform_engine_dataflow_max_num_workers': ( - feature_transform_engine_dataflow_max_num_workers - ), - 'feature_transform_engine_dataflow_disk_size_gb': ( - feature_transform_engine_dataflow_disk_size_gb - ), - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'feature_transform_engine_bigquery_staging_full_dataset_id': ( - feature_transform_engine_bigquery_staging_full_dataset_id - ), - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'study_spec_parameters_override': study_spec_parameters_override, - 'stage_1_tuner_worker_pool_specs_override': ( - stage_1_tuner_worker_pool_specs_override - ), - 'stage_2_trainer_worker_pool_specs_override': ( - stage_2_trainer_worker_pool_specs_override - ), - 'quantiles': quantiles, - 'encryption_spec_key_name': encryption_spec_key_name, - 'enable_probabilistic_inference': enable_probabilistic_inference, - 'model_display_name': model_display_name, - 'model_description': model_description, - 'run_evaluation': run_evaluation, - 'group_columns': group_columns, - 'group_total_weight': group_total_weight, - 'temporal_total_weight': temporal_total_weight, - 'group_temporal_total_weight': group_temporal_total_weight, - } - - # Filter out empty values and those excluded from the particular pipeline. - # (example: TFT and Seq2Seq don't support `quantiles`.) - parameter_values.update({ - param: value - for param, value in parameters.items() - if value is not None and param not in fields_to_exclude - }) - return parameter_values - - -def get_learn_to_learn_forecasting_pipeline_and_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_columns: List[str], - time_series_identifier_column: Optional[str] = None, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - enable_probabilistic_inference: bool = False, - quantiles: Optional[List[float]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, - group_columns: Optional[List[str]] = None, - group_total_weight: float = 0.0, - temporal_total_weight: float = 0.0, - group_temporal_total_weight: float = 0.0, -) -> Tuple[str, Dict[str, Any]]: - # fmt: off - """Returns l2l_forecasting pipeline and formatted parameters. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or "minimize-quantile-loss". - transformations: Dict mapping auto and/or type-resolutions to feature columns. The supported types are: auto, categorical, numeric, text, and timestamp. - train_budget_milli_node_hours: The train budget of creating this model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. - time_column: The column that indicates the time. - time_series_identifier_columns: The columns which distinguish different time series. - time_series_identifier_column: [Deprecated] The column which distinguishes different time series. - time_series_attribute_columns: The columns that are invariant across the same time series. - available_at_forecast_columns: The columns that are available at the forecast time. - unavailable_at_forecast_columns: The columns that are unavailable at the forecast time. - forecast_horizon: The length of the horizon. - context_window: The length of the context window. - evaluated_examples_bigquery_path: The bigquery dataset to write the predicted examples into for evaluation, in the format `bq://project.dataset`. - window_predefined_column: The column that indicate the start of each window. - window_stride_length: The stride length to generate the window. - window_max_count: The maximum number of windows that will be generated. - holiday_regions: The geographical regions where the holiday effect is applied in modeling. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - num_selected_trials: Number of selected trails. - data_source_csv_filenames: A string that represents a list of comma separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format bq://bq_project.bq_dataset.bq_table - predefined_split_key: The predefined_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: The test fraction. - weight_column: The weight column name. - dataflow_service_account: The full service account name. - dataflow_subnetwork: The dataflow subnetwork. - dataflow_use_public_ips: `True` to enable dataflow public IPs. - feature_transform_engine_bigquery_staging_full_dataset_id: The full id of the feature transform engine staging dataset. - feature_transform_engine_dataflow_machine_type: The dataflow machine type of the feature transform engine. - feature_transform_engine_dataflow_max_num_workers: The max number of dataflow workers of the feature transform engine. - feature_transform_engine_dataflow_disk_size_gb: The disk size of the dataflow workers of the feature transform engine. - evaluation_batch_predict_machine_type: Machine type for the batch prediction job in evaluation, such as 'n1-standard-16'. - evaluation_batch_predict_starting_replica_count: Number of replicas to use in the batch prediction cluster at startup time. - evaluation_batch_predict_max_replica_count: The maximum count of replicas the batch prediction job can scale to. - evaluation_dataflow_machine_type: Machine type for the dataflow job in evaluation, such as 'n1-standard-16'. - evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. - evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. - study_spec_parameters_override: The list for overriding study spec. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding stage 1 tuner worker pool spec. - stage_2_trainer_worker_pool_specs_override: The dictionary for overriding stage 2 trainer worker pool spec. - enable_probabilistic_inference: If probabilistic inference is enabled, the model will fit a distribution that captures the uncertainty of a prediction. If quantiles are specified, then the quantiles of the distribution are also returned. - quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles are allowed of values between 0 and 1, exclusive. Represents the quantiles to use for that objective. Quantiles must be unique. - encryption_spec_key_name: The KMS key name. - model_display_name: Optional display name for model. - model_description: Optional description. - run_evaluation: `True` to evaluate the ensembled model on the test split. - group_columns: A list of time series attribute column names that define the time series hierarchy. - group_total_weight: The weight of the loss for predictions aggregated over time series in the same group. - temporal_total_weight: The weight of the loss for predictions aggregated over the horizon for a single time series. - group_temporal_total_weight: The weight of the loss for predictions aggregated over both the horizon and time series in the same hierarchy group. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - # fmt: on - parameter_values = _get_base_forecasting_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - time_column=time_column, - dataflow_service_account=dataflow_service_account, - time_series_identifier_columns=time_series_identifier_columns, - time_series_identifier_column=time_series_identifier_column, - time_series_attribute_columns=time_series_attribute_columns, - available_at_forecast_columns=available_at_forecast_columns, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - forecast_horizon=forecast_horizon, - context_window=context_window, - window_predefined_column=window_predefined_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - num_selected_trials=num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - dataflow_use_public_ips=dataflow_use_public_ips, - dataflow_subnetwork=dataflow_subnetwork, - feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, - feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, - feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, - feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - study_spec_parameters_override=study_spec_parameters_override, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, - quantiles=quantiles, - encryption_spec_key_name=encryption_spec_key_name, - enable_probabilistic_inference=enable_probabilistic_inference, - model_display_name=model_display_name, - model_description=model_description, - run_evaluation=run_evaluation, - group_columns=group_columns, - group_total_weight=group_total_weight, - temporal_total_weight=temporal_total_weight, - group_temporal_total_weight=group_temporal_total_weight, - ) - - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, - 'learn_to_learn_forecasting_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_columns: List[str], - time_series_identifier_column: Optional[str] = None, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - enable_probabilistic_inference: bool = False, - quantiles: Optional[List[float]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, - group_columns: Optional[List[str]] = None, - group_total_weight: float = 0.0, - temporal_total_weight: float = 0.0, - group_temporal_total_weight: float = 0.0, -) -> Tuple[str, Dict[str, Any]]: - # fmt: off - """Returns timeseries_dense_encoder_forecasting pipeline and parameters. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or "minimize-quantile-loss". - transformations: Dict mapping auto and/or type-resolutions to feature columns. The supported types are: auto, categorical, numeric, text, and timestamp. - train_budget_milli_node_hours: The train budget of creating this model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. - time_column: The column that indicates the time. - time_series_identifier_columns: The columns which distinguish different time series. - time_series_identifier_column: [Deprecated] The column which distinguishes different time series. - time_series_attribute_columns: The columns that are invariant across the same time series. - available_at_forecast_columns: The columns that are available at the forecast time. - unavailable_at_forecast_columns: The columns that are unavailable at the forecast time. - forecast_horizon: The length of the horizon. - context_window: The length of the context window. - evaluated_examples_bigquery_path: The bigquery dataset to write the predicted examples into for evaluation, in the format `bq://project.dataset`. - window_predefined_column: The column that indicate the start of each window. - window_stride_length: The stride length to generate the window. - window_max_count: The maximum number of windows that will be generated. - holiday_regions: The geographical regions where the holiday effect is applied in modeling. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - num_selected_trials: Number of selected trails. - data_source_csv_filenames: A string that represents a list of comma separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format bq://bq_project.bq_dataset.bq_table - predefined_split_key: The predefined_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: The test fraction. - weight_column: The weight column name. - dataflow_service_account: The full service account name. - dataflow_subnetwork: The dataflow subnetwork. - dataflow_use_public_ips: `True` to enable dataflow public IPs. - feature_transform_engine_bigquery_staging_full_dataset_id: The full id of the feature transform engine staging dataset. - feature_transform_engine_dataflow_machine_type: The dataflow machine type of the feature transform engine. - feature_transform_engine_dataflow_max_num_workers: The max number of dataflow workers of the feature transform engine. - feature_transform_engine_dataflow_disk_size_gb: The disk size of the dataflow workers of the feature transform engine. - evaluation_batch_predict_machine_type: Machine type for the batch prediction job in evaluation, such as 'n1-standard-16'. - evaluation_batch_predict_starting_replica_count: Number of replicas to use in the batch prediction cluster at startup time. - evaluation_batch_predict_max_replica_count: The maximum count of replicas the batch prediction job can scale to. - evaluation_dataflow_machine_type: Machine type for the dataflow job in evaluation, such as 'n1-standard-16'. - evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. - evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. - study_spec_parameters_override: The list for overriding study spec. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding stage 1 tuner worker pool spec. - stage_2_trainer_worker_pool_specs_override: The dictionary for overriding stage 2 trainer worker pool spec. - enable_probabilistic_inference: If probabilistic inference is enabled, the model will fit a distribution that captures the uncertainty of a prediction. If quantiles are specified, then the quantiles of the distribution are also returned. - quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles are allowed of values between 0 and 1, exclusive. Represents the quantiles to use for that objective. Quantiles must be unique. - encryption_spec_key_name: The KMS key name. - model_display_name: Optional display name for model. - model_description: Optional description. - run_evaluation: `True` to evaluate the ensembled model on the test split. - group_columns: A list of time series attribute column names that define the time series hierarchy. - group_total_weight: The weight of the loss for predictions aggregated over time series in the same group. - temporal_total_weight: The weight of the loss for predictions aggregated over the horizon for a single time series. - group_temporal_total_weight: The weight of the loss for predictions aggregated over both the horizon and time series in the same hierarchy group. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - # fmt: on - parameter_values = _get_base_forecasting_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - time_column=time_column, - dataflow_service_account=dataflow_service_account, - time_series_identifier_columns=time_series_identifier_columns, - time_series_identifier_column=time_series_identifier_column, - time_series_attribute_columns=time_series_attribute_columns, - available_at_forecast_columns=available_at_forecast_columns, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - forecast_horizon=forecast_horizon, - context_window=context_window, - window_predefined_column=window_predefined_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - num_selected_trials=num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - dataflow_use_public_ips=dataflow_use_public_ips, - dataflow_subnetwork=dataflow_subnetwork, - feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, - feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, - feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, - feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - study_spec_parameters_override=study_spec_parameters_override, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, - quantiles=quantiles, - encryption_spec_key_name=encryption_spec_key_name, - enable_probabilistic_inference=enable_probabilistic_inference, - model_display_name=model_display_name, - model_description=model_description, - run_evaluation=run_evaluation, - group_columns=group_columns, - group_total_weight=group_total_weight, - temporal_total_weight=temporal_total_weight, - group_temporal_total_weight=group_temporal_total_weight, - ) - - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, - 'time_series_dense_encoder_forecasting_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_columns: List[str], - time_series_identifier_column: Optional[str] = None, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, -): - # fmt: off - """Returns tft_forecasting pipeline and formatted parameters. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or "minimize-quantile-loss". - transformations: Dict mapping auto and/or type-resolutions to feature columns. The supported types are: auto, categorical, numeric, text, and timestamp. - train_budget_milli_node_hours: The train budget of creating this model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. - time_column: The column that indicates the time. - time_series_identifier_columns: The columns which distinguish different time series. - time_series_identifier_column: [Deprecated] The column which distinguishes different time series. - time_series_attribute_columns: The columns that are invariant across the same time series. - available_at_forecast_columns: The columns that are available at the forecast time. - unavailable_at_forecast_columns: The columns that are unavailable at the forecast time. - forecast_horizon: The length of the horizon. - context_window: The length of the context window. - evaluated_examples_bigquery_path: The bigquery dataset to write the predicted examples into for evaluation, in the format `bq://project.dataset`. - window_predefined_column: The column that indicate the start of each window. - window_stride_length: The stride length to generate the window. - window_max_count: The maximum number of windows that will be generated. - holiday_regions: The geographical regions where the holiday effect is applied in modeling. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - data_source_csv_filenames: A string that represents a list of comma separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format bq://bq_project.bq_dataset.bq_table - predefined_split_key: The predefined_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: The test fraction. - weight_column: The weight column name. - dataflow_service_account: The full service account name. - dataflow_subnetwork: The dataflow subnetwork. - dataflow_use_public_ips: `True` to enable dataflow public IPs. - feature_transform_engine_bigquery_staging_full_dataset_id: The full id of the feature transform engine staging dataset. - feature_transform_engine_dataflow_machine_type: The dataflow machine type of the feature transform engine. - feature_transform_engine_dataflow_max_num_workers: The max number of dataflow workers of the feature transform engine. - feature_transform_engine_dataflow_disk_size_gb: The disk size of the dataflow workers of the feature transform engine. - evaluation_batch_predict_machine_type: Machine type for the batch prediction job in evaluation, such as 'n1-standard-16'. - evaluation_batch_predict_starting_replica_count: Number of replicas to use in the batch prediction cluster at startup time. - evaluation_batch_predict_max_replica_count: The maximum count of replicas the batch prediction job can scale to. - evaluation_dataflow_machine_type: Machine type for the dataflow job in evaluation, such as 'n1-standard-16'. - evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. - evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. - study_spec_parameters_override: The list for overriding study spec. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding stage 1 tuner worker pool spec. - stage_2_trainer_worker_pool_specs_override: The dictionary for overriding stage 2 trainer worker pool spec. - encryption_spec_key_name: The KMS key name. - model_display_name: Optional display name for model. - model_description: Optional description. - run_evaluation: `True` to evaluate the ensembled model on the test split. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - # fmt: on - # TFT should only have 1 selected trial to freeze the ensemble size at 1. - excluded_parameters = _RETAIL_MODEL_DISABLED_OPTIONS.union({ - 'num_selected_trials', - }) - parameter_values = _get_base_forecasting_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - time_column=time_column, - dataflow_service_account=dataflow_service_account, - time_series_identifier_columns=time_series_identifier_columns, - time_series_identifier_column=time_series_identifier_column, - time_series_attribute_columns=time_series_attribute_columns, - available_at_forecast_columns=available_at_forecast_columns, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - forecast_horizon=forecast_horizon, - context_window=context_window, - window_predefined_column=window_predefined_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - dataflow_use_public_ips=dataflow_use_public_ips, - dataflow_subnetwork=dataflow_subnetwork, - feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, - feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, - feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, - feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - study_spec_parameters_override=study_spec_parameters_override, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, - encryption_spec_key_name=encryption_spec_key_name, - model_display_name=model_display_name, - model_description=model_description, - run_evaluation=run_evaluation, - fields_to_exclude=excluded_parameters, - ) - - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, - 'temporal_fusion_transformer_forecasting_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_sequence_to_sequence_forecasting_pipeline_and_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_columns: List[str], - time_series_identifier_column: Optional[str] = None, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, -): - # fmt: off - """Returns seq2seq forecasting pipeline and formatted parameters. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or "minimize-quantile-loss". - transformations: Dict mapping auto and/or type-resolutions to feature columns. The supported types are: auto, categorical, numeric, text, and timestamp. - train_budget_milli_node_hours: The train budget of creating this model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. - time_column: The column that indicates the time. - time_series_identifier_columns: The columns which distinguish different time series. - time_series_identifier_column: [Deprecated] The column which distinguishes different time series. - time_series_attribute_columns: The columns that are invariant across the same time series. - available_at_forecast_columns: The columns that are available at the forecast time. - unavailable_at_forecast_columns: The columns that are unavailable at the forecast time. - forecast_horizon: The length of the horizon. - context_window: The length of the context window. - evaluated_examples_bigquery_path: The bigquery dataset to write the predicted examples into for evaluation, in the format `bq://project.dataset`. - window_predefined_column: The column that indicate the start of each window. - window_stride_length: The stride length to generate the window. - window_max_count: The maximum number of windows that will be generated. - holiday_regions: The geographical regions where the holiday effect is applied in modeling. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - num_selected_trials: Number of selected trails. - data_source_csv_filenames: A string that represents a list of comma separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format bq://bq_project.bq_dataset.bq_table - predefined_split_key: The predefined_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: The test fraction. - weight_column: The weight column name. - dataflow_service_account: The full service account name. - dataflow_subnetwork: The dataflow subnetwork. - dataflow_use_public_ips: `True` to enable dataflow public IPs. - feature_transform_engine_bigquery_staging_full_dataset_id: The full id of the feature transform engine staging dataset. - feature_transform_engine_dataflow_machine_type: The dataflow machine type of the feature transform engine. - feature_transform_engine_dataflow_max_num_workers: The max number of dataflow workers of the feature transform engine. - feature_transform_engine_dataflow_disk_size_gb: The disk size of the dataflow workers of the feature transform engine. - evaluation_batch_predict_machine_type: Machine type for the batch prediction job in evaluation, such as 'n1-standard-16'. - evaluation_batch_predict_starting_replica_count: Number of replicas to use in the batch prediction cluster at startup time. - evaluation_batch_predict_max_replica_count: The maximum count of replicas the batch prediction job can scale to. - evaluation_dataflow_machine_type: Machine type for the dataflow job in evaluation, such as 'n1-standard-16'. - evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. - evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. - study_spec_parameters_override: The list for overriding study spec. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding stage 1 tuner worker pool spec. - stage_2_trainer_worker_pool_specs_override: The dictionary for overriding stage 2 trainer worker pool spec. - encryption_spec_key_name: The KMS key name. - model_display_name: Optional display name for model. - model_description: Optional description. - run_evaluation: `True` to evaluate the ensembled model on the test split. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - # fmt: on - parameter_values = _get_base_forecasting_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - time_column=time_column, - dataflow_service_account=dataflow_service_account, - time_series_identifier_columns=time_series_identifier_columns, - time_series_identifier_column=time_series_identifier_column, - time_series_attribute_columns=time_series_attribute_columns, - available_at_forecast_columns=available_at_forecast_columns, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - forecast_horizon=forecast_horizon, - context_window=context_window, - window_predefined_column=window_predefined_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - num_selected_trials=num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - dataflow_use_public_ips=dataflow_use_public_ips, - dataflow_subnetwork=dataflow_subnetwork, - feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, - feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, - feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, - feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - study_spec_parameters_override=study_spec_parameters_override, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, - encryption_spec_key_name=encryption_spec_key_name, - model_display_name=model_display_name, - model_description=model_description, - run_evaluation=run_evaluation, - fields_to_exclude=_RETAIL_MODEL_DISABLED_OPTIONS, - ) - - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, - 'sequence_to_sequence_forecasting_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - def get_bqml_arima_train_pipeline_and_parameters( project: str,