|
460 | 460 | " name=\"Merge Taxi Data\",\n",
|
461 | 461 | " script_name=\"merge.py\", \n",
|
462 | 462 | " arguments=[\"--output_merge\", merged_data],\n",
|
463 |
| - " inputs=[cleansed_green_data.parse_parquet_files(file_extension=None),\n", |
464 |
| - " cleansed_yellow_data.parse_parquet_files(file_extension=None)],\n", |
| 463 | + " inputs=[cleansed_green_data.parse_parquet_files(),\n", |
| 464 | + " cleansed_yellow_data.parse_parquet_files()],\n", |
465 | 465 | " outputs=[merged_data],\n",
|
466 | 466 | " compute_target=aml_compute,\n",
|
467 | 467 | " runconfig=aml_run_config,\n",
|
|
497 | 497 | " name=\"Filter Taxi Data\",\n",
|
498 | 498 | " script_name=\"filter.py\", \n",
|
499 | 499 | " arguments=[\"--output_filter\", filtered_data],\n",
|
500 |
| - " inputs=[merged_data.parse_parquet_files(file_extension=None)],\n", |
| 500 | + " inputs=[merged_data.parse_parquet_files()],\n", |
501 | 501 | " outputs=[filtered_data],\n",
|
502 | 502 | " compute_target=aml_compute,\n",
|
503 | 503 | " runconfig = aml_run_config,\n",
|
|
533 | 533 | " name=\"Normalize Taxi Data\",\n",
|
534 | 534 | " script_name=\"normalize.py\", \n",
|
535 | 535 | " arguments=[\"--output_normalize\", normalized_data],\n",
|
536 |
| - " inputs=[filtered_data.parse_parquet_files(file_extension=None)],\n", |
| 536 | + " inputs=[filtered_data.parse_parquet_files()],\n", |
537 | 537 | " outputs=[normalized_data],\n",
|
538 | 538 | " compute_target=aml_compute,\n",
|
539 | 539 | " runconfig = aml_run_config,\n",
|
|
574 | 574 | " name=\"Transform Taxi Data\",\n",
|
575 | 575 | " script_name=\"transform.py\", \n",
|
576 | 576 | " arguments=[\"--output_transform\", transformed_data],\n",
|
577 |
| - " inputs=[normalized_data.parse_parquet_files(file_extension=None)],\n", |
| 577 | + " inputs=[normalized_data.parse_parquet_files()],\n", |
578 | 578 | " outputs=[transformed_data],\n",
|
579 | 579 | " compute_target=aml_compute,\n",
|
580 | 580 | " runconfig = aml_run_config,\n",
|
|
614 | 614 | " script_name=\"train_test_split.py\", \n",
|
615 | 615 | " arguments=[\"--output_split_train\", output_split_train,\n",
|
616 | 616 | " \"--output_split_test\", output_split_test],\n",
|
617 |
| - " inputs=[transformed_data.parse_parquet_files(file_extension=None)],\n", |
| 617 | + " inputs=[transformed_data.parse_parquet_files()],\n", |
618 | 618 | " outputs=[output_split_train, output_split_test],\n",
|
619 | 619 | " compute_target=aml_compute,\n",
|
620 | 620 | " runconfig = aml_run_config,\n",
|
|
690 | 690 | " \"n_cross_validations\": 5\n",
|
691 | 691 | "}\n",
|
692 | 692 | "\n",
|
693 |
| - "training_dataset = output_split_train.parse_parquet_files(file_extension=None).keep_columns(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor', 'cost'])\n", |
| 693 | + "training_dataset = output_split_train.parse_parquet_files().keep_columns(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor', 'cost'])\n", |
694 | 694 | "\n",
|
695 | 695 | "automl_config = AutoMLConfig(task = 'regression',\n",
|
696 | 696 | " debug_log = 'automated_ml_errors.log',\n",
|
|
0 commit comments