kedro-org · deepyaman · Nov 8, 2023 · Nov 4, 2023 · Nov 6, 2023 · Nov 6, 2023
@@ -5,13 +5,16 @@ default_stages: [commit, manual]
 
 repos:
     - repo: https://github.com/astral-sh/ruff-pre-commit
-      # Ruff version.
-      rev: v0.0.277
+      rev: v0.1.3
       hooks:
         - id: ruff
           name: "ruff on kedro/, tests/ and docs/"
           args: ["--fix", "--exit-non-zero-on-fix"]
           exclude: "^kedro/templates/|^features/steps/test_starter/"
+        - id: ruff-format
+          name: "ruff format on kedro/, features/ and tests/"
+          files: "^kedro/|^features/|^tests/"
+          exclude: "^features/steps/test_starter|^kedro/templates/"
 
     - repo: https://github.com/pre-commit/pre-commit-hooks
       rev: v2.2.3
@@ -54,18 +57,12 @@ repos:
       rev: v1.12.1
       hooks:
           - id: blacken-docs
-            additional_dependencies: [black~=22.0]
+            additional_dependencies:
+            - black~=23.0
             entry: blacken-docs --skip-errors
 
     - repo: local
       hooks:
-          - id: black
-            name: "Black"
-            language: system
-            files: ^kedro/|^features/|^tests/
-            types: [file, python]
-            exclude: ^features/steps/test_starter|^kedro/templates/
-            entry: black
           - id: imports
             name: "Import Linter"
             language: system

@@ -441,13 +441,13 @@ def log_suggestions(lines: list[str], name: str):
                 "["
                 + str(i)
                 + "] "
-                + re.sub(existing, r"{}".format(style(obj, fg="magenta")), lines[i])
+                + re.sub(existing, rf"{style(obj, fg='magenta')}", lines[i])
             )
             print(
                 "["
                 + str(i)
                 + "] "
-                + re.sub(existing, r"``{}``".format(style(obj, fg="green")), lines[i])
+                + re.sub(existing, rf"``{style(obj, fg='green')}``", lines[i])
             )
 
     if title_printed:
@@ -472,8 +472,8 @@ def autodoc_process_docstring(app, what, name, obj, options, lines):  # noqa: PL
         print(
             style(
                 "Failed to check for class name mentions that can be "
-                "converted to reStructuredText links in docstring of {}. "
-                "Error is: \n{}".format(name, str(e)),
+                f"converted to reStructuredText links in docstring of {name}. "
+                f"Error is: \n{str(e)}",
                 fg="red",
             )
         )
@@ -499,9 +499,7 @@ def _add_jinja_filters(app):
 
     # LaTeXBuilder is used in the PDF docs build,
     # and it doesn't have attribute 'templates'
-    if not (
-        isinstance(app.builder, (LaTeXBuilder,CheckExternalLinksBuilder))
-    ):
+    if not (isinstance(app.builder, (LaTeXBuilder, CheckExternalLinksBuilder))):
         app.builder.templates.environment.filters["env_override"] = env_override
 
 
@@ -537,7 +535,7 @@ def setup(app):
         style(
             "Failed to create list of (regex, reStructuredText link "
             "replacement) for class names and method names in docstrings. "
-            "Error is: \n{}".format(str(e)),
+            f"Error is: \n{str(e)}",
             fg="red",
         )
     )

@@ -297,7 +297,7 @@ def _track_batch_job(job_id: str, client: Any) -> None:
 
 #### Set up Batch-related configuration
 
-You'll need to set the Batch-related configuration that the runner will use. Add a `parameters.yml` file inside the `conf/aws_batch/` directory created as part of the prerequistes with the following keys:
+You'll need to set the Batch-related configuration that the runner will use. Add a `parameters.yml` file inside the `conf/aws_batch/` directory created as part of the prerequisites with the following keys:
 
 ```yaml
 aws_batch:

@@ -23,7 +23,7 @@ Kedro has always supported parameter versioning (as part of your codebase with a
 
 Kedro-Viz version 4.1.1 introduced metadata capture, visualisation, discovery and comparison, enabling you to access, edit and [compare your experiments](#access-run-data-and-compare-runs) and additionally [track how your metrics change over time](#view-and-compare-metrics-data).
 
-Kedro-Viz version 5.0 also supports the [display and comparison of plots, such as Plotly and Matplotlib](../visualisation/visualise_charts_with_plotly.md). Support for metric plots (timeseries and parellel coords) was added to Kedro-Viz version 5.2.1.
+Kedro-Viz version 5.0 also supports the [display and comparison of plots, such as Plotly and Matplotlib](../visualisation/visualise_charts_with_plotly.md). Support for metric plots (timeseries and parallel coords) was added to Kedro-Viz version 5.2.1.
 
 Kedro-Viz version 6.2 includes support for collaborative experiment tracking using a cloud storage solution. This means that multiple users can store their experiment data in a centralized remote storage, such as AWS S3, and access it through Kedro-Viz.
 

@@ -21,13 +21,15 @@ Here are two simple nodes as an example:
 ```python
 from kedro.pipeline import node
 
+
 # First node
 def return_greeting():
     return "Hello"
 
 
 return_greeting_node = node(func=return_greeting, inputs=None, outputs="my_salutation")
 
+
 # Second node
 def join_statements(greeting):
     return f"{greeting} Kedro!"

@@ -116,7 +116,6 @@ import great_expectations as ge
 
 
 class DataValidationHooks:
-
     # Map expectation to dataset
     DATASET_EXPECTATION_MAPPING = {
         "companies": "raw_companies_dataset_expectation",
@@ -207,7 +206,6 @@ import great_expectations as ge
 
 
 class DataValidationHooks:
-
     # Map checkpoint to dataset
     DATASET_CHECKPOINT_MAPPING = {
         "companies": "raw_companies_dataset_checkpoint",

@@ -391,7 +391,6 @@ random_state = conf_params["model_options"]["random_state"]
 
 ```python
 def big_function():
-
     ####################
     # Data processing  #
     ####################
@@ -469,14 +468,12 @@ def _parse_money(x: pd.Series) -> pd.Series:
 
 
 def preprocess_companies(companies: pd.DataFrame) -> pd.DataFrame:
-
     companies["iata_approved"] = _is_true(companies["iata_approved"])
     companies["company_rating"] = _parse_percentage(companies["company_rating"])
     return companies
 
 
 def preprocess_shuttles(shuttles: pd.DataFrame) -> pd.DataFrame:
-
     shuttles["d_check_complete"] = _is_true(shuttles["d_check_complete"])
     shuttles["moon_clearance_complete"] = _is_true(shuttles["moon_clearance_complete"])
     shuttles["price"] = _parse_money(shuttles["price"])
@@ -486,7 +483,6 @@ def preprocess_shuttles(shuttles: pd.DataFrame) -> pd.DataFrame:
 def create_model_input_table(
     shuttles: pd.DataFrame, companies: pd.DataFrame, reviews: pd.DataFrame
 ) -> pd.DataFrame:
-
     rated_shuttles = shuttles.merge(reviews, left_on="id", right_on="shuttle_id")
     model_input_table = rated_shuttles.merge(
         companies, left_on="company_id", right_on="id"
@@ -590,14 +586,12 @@ def _parse_money(x: pd.Series) -> pd.Series:
 
 
 def preprocess_companies(companies: pd.DataFrame) -> pd.DataFrame:
-
     companies["iata_approved"] = _is_true(companies["iata_approved"])
     companies["company_rating"] = _parse_percentage(companies["company_rating"])
     return companies
 
 
 def preprocess_shuttles(shuttles: pd.DataFrame) -> pd.DataFrame:
-
     shuttles["d_check_complete"] = _is_true(shuttles["d_check_complete"])
     shuttles["moon_clearance_complete"] = _is_true(shuttles["moon_clearance_complete"])
     shuttles["price"] = _parse_money(shuttles["price"])
@@ -607,7 +601,6 @@ def preprocess_shuttles(shuttles: pd.DataFrame) -> pd.DataFrame:
 def create_model_input_table(
     shuttles: pd.DataFrame, companies: pd.DataFrame, reviews: pd.DataFrame
 ) -> pd.DataFrame:
-
     rated_shuttles = shuttles.merge(reviews, left_on="id", right_on="shuttle_id")
     model_input_table = rated_shuttles.merge(
         companies, left_on="company_id", right_on="id"

@@ -152,6 +152,12 @@ You can run the `preprocess_shuttles` node similarly. To test both nodes togethe
 kedro run
 ```
 
+You can also run both nodes by naming each in turn, as follows:
+
+```bash
+kedro run --nodes=preprocess_companies_node,preprocess_shuttles_node
+```
+
 You should see output similar to the following:
 
 <details>

@@ -85,6 +85,7 @@ import plotly.express as px
 import plotly.graph_objs as go
 import pandas as pd
 
+
 # This function uses plotly.express
 def compare_passenger_capacity_exp(preprocessed_shuttles: pd.DataFrame):
     return (
@@ -96,7 +97,6 @@ def compare_passenger_capacity_exp(preprocessed_shuttles: pd.DataFrame):
 
 # This function uses plotly.graph_objects
 def compare_passenger_capacity_go(preprocessed_shuttles: pd.DataFrame):
-
     data_frame = (
         preprocessed_shuttles.groupby(["shuttle_type"])
         .mean(numeric_only=True)

@@ -36,7 +36,7 @@ def run(
     """
     if isinstance(cmd, str) and split:
         cmd = shlex.split(cmd)
-    result = subprocess.run(cmd, input="", capture_output=True, **kwargs)
+    result = subprocess.run(cmd, input="", capture_output=True, **kwargs)  # noqa: PLW1510
     result.stdout = result.stdout.decode("utf-8")
     result.stderr = result.stderr.decode("utf-8")
     if print_output:

@@ -73,7 +73,7 @@ class OmegaConfigLoader(AbstractConfigLoader):
 
     """
 
-    def __init__(  # noqa: too-many-arguments
+    def __init__(  # noqa: PLR0913
         self,
         conf_source: str,
         env: str = None,
@@ -251,7 +251,7 @@ def __repr__(self):  # pragma: no cover
             f"config_patterns={self.config_patterns})"
         )
 
-    def load_and_merge_dir_config(  # noqa: too-many-arguments
+    def load_and_merge_dir_config(  # noqa: PLR0913
         self,
         conf_path: str,
         patterns: Iterable[str],

@@ -11,7 +11,7 @@
 import toml
 from importlib import import_module
 from pathlib import Path
-from typing import Any, Iterable, Iterator, List, Tuple, Union
+from typing import Any, Iterable, Iterator
 
 import click
 from omegaconf import OmegaConf
@@ -156,7 +156,7 @@ def micropkg():
     help="Location of a configuration file for the fsspec filesystem used to pull the package.",
 )
 @click.pass_obj  # this will pass the metadata as first argument
-def pull_package(  # noqa: unused-argument, too-many-arguments
+def pull_package(  # noqa: PLR0913
     metadata: ProjectMetadata,
     package_path,
     env,
@@ -191,7 +191,7 @@ def pull_package(  # noqa: unused-argument, too-many-arguments
     click.secho(message, fg="green")
 
 
-def _pull_package(  # noqa: too-many-arguments
+def _pull_package(  # noqa: PLR0913
     package_path: str,
     metadata: ProjectMetadata,
     env: str = None,
@@ -281,7 +281,6 @@ def _pull_packages_from_manifest(metadata: ProjectMetadata) -> None:
 
 
 def _package_micropkgs_from_manifest(metadata: ProjectMetadata) -> None:
-
     config_dict = toml.load(metadata.config_file)
     config_dict = config_dict["tool"]["kedro"]
     build_specs = config_dict.get("micropkg", {}).get("package")
@@ -328,7 +327,7 @@ def _package_micropkgs_from_manifest(metadata: ProjectMetadata) -> None:
 )
 @click.argument("module_path", nargs=1, required=False, callback=_check_module_path)
 @click.pass_obj  # this will pass the metadata as first argument
-def package_micropkg(  # noqa: too-many-arguments
+def package_micropkg(  # noqa: PLR0913
     metadata: ProjectMetadata,
     module_path,
     env,
@@ -439,7 +438,7 @@ def _rename_files(conf_source: Path, old_name: str, new_name: str):
         config_file.rename(config_file.parent / new_config_name)
 
 
-def _refactor_code_for_unpacking(  # noqa: too-many-arguments
+def _refactor_code_for_unpacking(  # noqa: PLR0913
     project: Project,
     package_path: Path,
     tests_path: Path,
@@ -520,7 +519,7 @@ def _move_package_with_conflicting_name(
     return refactored_package_path, refactored_tests_path
 
 
-def _install_files(  # noqa: too-many-arguments, too-many-locals
+def _install_files(  # noqa: PLR0913, too-many-locals
     project_metadata: ProjectMetadata,
     package_name: str,
     source_path: Path,
@@ -820,13 +819,10 @@ def _move_package_with_conflicting_name(target: Path, conflicting_name: str):
         _move_package_with_conflicting_name(tests_target, "tests")
 
 
-_SourcePathType = Union[Path, List[Tuple[Path, str]]]
-
-
-def _generate_sdist_file(  # noqa: too-many-arguments,too-many-locals
+def _generate_sdist_file(  # noqa: PLR0913,too-many-locals
     micropkg_name: str,
     destination: Path,
-    source_paths: tuple[_SourcePathType, ...],
+    source_paths: tuple[Path, Path, list[tuple[Path, str]]],
     version: str,
     metadata: ProjectMetadata,
     alias: str = None,
@@ -839,20 +835,24 @@ def _generate_sdist_file(  # noqa: too-many-arguments,too-many-locals
 
         project = Project(temp_dir_path)  # project where to do refactoring
         _refactor_code_for_package(
-            project, package_source, tests_source, alias, metadata  # type: ignore
+            project,
+            package_source,
+            tests_source,
+            alias,
+            metadata,
         )
         project.close()
 
         # Copy & "refactor" config
         _, _, conf_target = _get_package_artifacts(temp_dir_path, package_name)
-        _sync_path_list(conf_source, conf_target)  # type: ignore
+        _sync_path_list(conf_source, conf_target)
         if conf_target.is_dir() and alias:
             _rename_files(conf_target, micropkg_name, alias)
 
         # Build a pyproject.toml on the fly
         try:
             install_requires = _make_install_requires(
-                package_source / "requirements.txt"  # type: ignore
+                package_source / "requirements.txt"
             )
         except Exception as exc:
             click.secho("FAILED", fg="red")

@@ -140,9 +140,7 @@ def create_pipeline(
     "-y", "--yes", is_flag=True, help="Confirm deletion of pipeline non-interactively."
 )
 @click.pass_obj  # this will pass the metadata as first argument
-def delete_pipeline(
-    metadata: ProjectMetadata, name, env, yes, **kwargs
-):  # noqa: unused-argument
+def delete_pipeline(metadata: ProjectMetadata, name, env, yes, **kwargs):  # noqa: unused-argument
     """Delete a modular pipeline by providing a name."""
     package_dir = metadata.source_dir / metadata.package_name
     conf_source = settings.CONF_SOURCE

@@ -220,7 +220,7 @@ def package(metadata: ProjectMetadata):
     help=PARAMS_ARG_HELP,
     callback=_split_params,
 )
-def run(  # noqa: too-many-arguments,unused-argument,too-many-locals
+def run(  # noqa: PLR0913,unused-argument,too-many-locals
     tag,
     tags,
     env,

@@ -27,9 +27,7 @@ def list_registered_pipelines():
 @command_with_verbosity(registry, "describe")
 @click.argument("name", nargs=1, default="__default__")
 @click.pass_obj
-def describe_registered_pipeline(
-    metadata: ProjectMetadata, name, **kwargs
-):  # noqa: unused-argument, protected-access
+def describe_registered_pipeline(metadata: ProjectMetadata, name, **kwargs):  # noqa: unused-argument, protected-access
     """Describe a registered pipeline by providing a pipeline name.
     Defaults to the `__default__` pipeline.
     """