From b0465d04013378d5ce0a655818619637e939770f Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Mon, 8 May 2023 11:29:03 +0200 Subject: [PATCH] Extend Ruff to tests and with extra rules (#83) This PR makes Ruff run on tests as well and adds extra rules: - More Pylint rules not yet covered by default checks - Isort for import order - Pydocstyle rules - I ignored quite a few rules here, which we might want to activate (D100 - D107), but we can do that in separate PRs as they require manual effort. I only had to fix one issue and add one local ignore. All other issues were auto-fixed, so these rules shouldn't lead to additional overhead. --- .pre-commit-config.yaml | 8 ++--- .../components/image_filtering/src/main.py | 1 - .../simple_pipeline/config/general_config.py | 2 +- .../simple_pipeline/simple_pipeline.py | 2 +- fondant/component.py | 27 +++++++++-------- fondant/component_spec.py | 25 ++++++++-------- fondant/dataset.py | 15 ++++------ fondant/import_utils.py | 19 +++++------- fondant/logger.py | 4 +-- fondant/manifest.py | 29 ++++++++++--------- fondant/pipeline_utils.py | 7 +++-- fondant/schema.py | 3 +- pyproject.toml | 15 ++++++++++ tests/example_data/raw/split.py | 1 + tests/test_component.py | 7 ++--- tests/test_component_specs.py | 15 +++++----- tests/test_dataset.py | 17 ++++++----- tests/test_import_utils.py | 8 ++--- tests/test_logger.py | 7 ++--- tests/test_manifest.py | 10 +++---- tests/test_manifest_evolution.py | 3 +- 21 files changed, 112 insertions(+), 113 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 38051f6ca..68130d0d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,17 +3,13 @@ ci: autoupdate_schedule: monthly repos: - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.254' + rev: 'v0.0.264' hooks: - id: ruff files: | (?x)^( fondant/.*| - examples/pipelines/hf_dataset_pipeline| - examples/pipelines/finetune_stable_diffusion/components/load_from_hub_component| - examples/pipelines/finetune_stable_diffusion/components/image_filter_component| - examples/pipelines/finetune_stable_diffusion/components/embedding_component| - examples/pipelines/finetune_stable_diffusion/dataset_creation_pipeline.py| + tests/.*| )$ args: [--fix, --exit-non-zero-on-fix] diff --git a/examples/pipelines/simple_pipeline/components/image_filtering/src/main.py b/examples/pipelines/simple_pipeline/components/image_filtering/src/main.py index f328e8091..44b8f7b2d 100644 --- a/examples/pipelines/simple_pipeline/components/image_filtering/src/main.py +++ b/examples/pipelines/simple_pipeline/components/image_filtering/src/main.py @@ -2,7 +2,6 @@ This component filters images of the dataset based on image size (minimum height and width). """ import logging -from typing import Dict import dask.dataframe as dd diff --git a/examples/pipelines/simple_pipeline/config/general_config.py b/examples/pipelines/simple_pipeline/config/general_config.py index 078fa2d8b..6a34bd8ac 100644 --- a/examples/pipelines/simple_pipeline/config/general_config.py +++ b/examples/pipelines/simple_pipeline/config/general_config.py @@ -29,7 +29,7 @@ class KubeflowConfig(GeneralConfig): HOST (str): kfp host url """ - BASE_PATH = f"gcs://soy-audio-379412_kfp-artifacts/custom_artifact" + BASE_PATH = "gcs://soy-audio-379412_kfp-artifacts/custom_artifact" CLUSTER_NAME = "kfp-fondant" CLUSTER_ZONE = "europe-west4-a" HOST = "https://52074149b1563463-dot-europe-west1.pipelines.googleusercontent.com" diff --git a/examples/pipelines/simple_pipeline/simple_pipeline.py b/examples/pipelines/simple_pipeline/simple_pipeline.py index 1a494b99f..66f1c04a8 100644 --- a/examples/pipelines/simple_pipeline/simple_pipeline.py +++ b/examples/pipelines/simple_pipeline/simple_pipeline.py @@ -47,7 +47,7 @@ def simple_pipeline( ).set_display_name("Load initial images") # Component 2 - image_filter_task = image_filtering_op( + image_filtering_op( input_manifest_path=load_from_hub_task.outputs["output_manifest_path"], min_width=image_filter_min_width, min_height=image_filter_min_height, diff --git a/fondant/component.py b/fondant/component.py index b3f71a4ac..50e4b2c57 100644 --- a/fondant/component.py +++ b/fondant/component.py @@ -8,13 +8,13 @@ import argparse import json import logging -from pathlib import Path import typing as t from abc import ABC, abstractmethod +from pathlib import Path import dask.dataframe as dd -from fondant.component_spec import FondantComponentSpec, kubeflow2python_type, Argument +from fondant.component_spec import Argument, FondantComponentSpec, kubeflow2python_type from fondant.dataset import FondantDataset from fondant.manifest import Manifest @@ -22,7 +22,7 @@ class FondantComponent(ABC): - """Abstract base class for a Fondant component""" + """Abstract base class for a Fondant component.""" def __init__(self, spec: FondantComponentSpec) -> None: self.spec = spec @@ -32,7 +32,7 @@ def __init__(self, spec: FondantComponentSpec) -> None: def from_file( cls, path: t.Union[str, Path] = "../fondant_component.yaml" ) -> "FondantComponent": - """Create a component from a component spec file + """Create a component from a component spec file. Args: path: Path to the component spec file @@ -55,7 +55,7 @@ def _get_component_arguments(self) -> t.Dict[str, Argument]: @abstractmethod def _add_and_parse_args(self) -> argparse.Namespace: - """Abstract method to add and parse the component arguments""" + """Abstract method to add and parse the component arguments.""" @property def user_arguments(self) -> t.Dict[str, t.Any]: @@ -68,17 +68,16 @@ def user_arguments(self) -> t.Dict[str, t.Any]: @abstractmethod def _load_or_create_manifest(self) -> Manifest: - """Abstract method that returns the dataset manifest""" + """Abstract method that returns the dataset manifest.""" @abstractmethod def _process_dataset(self, dataset: FondantDataset) -> dd.DataFrame: """Abstract method that processes the input dataframe of the `FondantDataset` and - returns another dataframe""" + returns another dataframe. + """ def run(self): - """ - Runs the component. - """ + """Runs the component.""" input_manifest = self._load_or_create_manifest() input_dataset = FondantDataset(input_manifest) @@ -100,7 +99,7 @@ def upload_manifest(self, manifest: Manifest, save_path: str): class FondantLoadComponent(FondantComponent): - """Base class for a Fondant load component""" + """Base class for a Fondant load component.""" def _add_and_parse_args(self): parser = argparse.ArgumentParser() @@ -145,7 +144,7 @@ def _load_or_create_manifest(self) -> Manifest: @abstractmethod def load(self, **kwargs) -> dd.DataFrame: - """Abstract method that loads the initial dataframe""" + """Abstract method that loads the initial dataframe.""" def _process_dataset(self, dataset: FondantDataset) -> dd.DataFrame: """This function loads the initial dataframe sing the user-provided `load` method. @@ -160,7 +159,7 @@ def _process_dataset(self, dataset: FondantDataset) -> dd.DataFrame: class FondantTransformComponent(FondantComponent): - """Base class for a Fondant transform component""" + """Base class for a Fondant transform component.""" def _add_and_parse_args(self): parser = argparse.ArgumentParser() @@ -181,7 +180,7 @@ def _load_or_create_manifest(self) -> Manifest: @abstractmethod def transform(self, dataframe: dd.DataFrame, **kwargs) -> dd.DataFrame: - """Abstract method for applying data transformations to the input dataframe""" + """Abstract method for applying data transformations to the input dataframe.""" def _process_dataset(self, dataset: FondantDataset) -> dd.DataFrame: """ diff --git a/fondant/component_spec.py b/fondant/component_spec.py index afc259cbb..51e89bf39 100644 --- a/fondant/component_spec.py +++ b/fondant/component_spec.py @@ -1,19 +1,19 @@ """This module defines classes to represent an Fondant component specification.""" import copy import json -import yaml import pkgutil import types import typing as t -from pathlib import Path from dataclasses import dataclass +from pathlib import Path import jsonschema.exceptions +import yaml from jsonschema import Draft4Validator from jsonschema.validators import RefResolver from fondant.exceptions import InvalidComponentSpec -from fondant.schema import Field, Type, KubeflowCommandArguments +from fondant.schema import Field, KubeflowCommandArguments, Type # TODO: Change after upgrading to kfp v2 # :https://www.kubeflow.org/docs/components/pipelines/v2/data-types/parameters/ @@ -40,7 +40,7 @@ @dataclass class Argument: """ - Kubeflow component argument + Kubeflow component argument. Args: name: name of the argument @@ -94,11 +94,10 @@ def __init__(self, specification: t.Dict[str, t.Any]) -> None: self._validate_spec() def _validate_spec(self) -> None: - """Validate a component specification against the component schema + """Validate a component specification against the component schema. Raises: InvalidComponent when the component specification is not valid. """ - spec_data = pkgutil.get_data("fondant", "schemas/component_spec.json") if spec_data is None: @@ -118,13 +117,13 @@ def _validate_spec(self) -> None: @classmethod def from_file(cls, path: t.Union[str, Path]) -> "FondantComponentSpec": - """Load the component spec from the file specified by the provided path""" + """Load the component spec from the file specified by the provided path.""" with open(path, encoding="utf-8") as file_: specification = yaml.safe_load(file_) return cls(specification) def to_file(self, path) -> None: - """Dump the component spec to the file specified by the provided path""" + """Dump the component spec to the file specified by the provided path.""" with open(path, "w", encoding="utf-8") as file_: yaml.dump(self._specification, file_) @@ -142,7 +141,7 @@ def image(self): @property def input_subsets(self) -> t.Mapping[str, ComponentSubset]: - """The input subsets of the component as an immutable mapping""" + """The input subsets of the component as an immutable mapping.""" return types.MappingProxyType( { name: ComponentSubset(subset) @@ -153,7 +152,7 @@ def input_subsets(self) -> t.Mapping[str, ComponentSubset]: @property def output_subsets(self) -> t.Mapping[str, ComponentSubset]: - """The output subsets of the component as an immutable mapping""" + """The output subsets of the component as an immutable mapping.""" return types.MappingProxyType( { name: ComponentSubset(subset) @@ -267,7 +266,7 @@ def _dump_args(args: t.Iterable[Argument]) -> KubeflowCommandArguments: return dumped_args def to_file(self, path: t.Union[str, Path]) -> None: - """Dump the component specification to the file specified by the provided path""" + """Dump the component specification to the file specified by the provided path.""" with open(path, "w", encoding="utf-8") as file_: yaml.dump( self._specification, @@ -279,7 +278,7 @@ def to_file(self, path: t.Union[str, Path]) -> None: @property def input_arguments(self) -> t.Mapping[str, Argument]: - """The input arguments of the component as an immutable mapping""" + """The input arguments of the component as an immutable mapping.""" return types.MappingProxyType( { info["name"]: Argument( @@ -293,7 +292,7 @@ def input_arguments(self) -> t.Mapping[str, Argument]: @property def output_arguments(self) -> t.Mapping[str, Argument]: - """The output arguments of the component as an immutable mapping""" + """The output arguments of the component as an immutable mapping.""" return types.MappingProxyType( { info["name"]: Argument( diff --git a/fondant/dataset.py b/fondant/dataset.py index 574e10110..d176a2187 100644 --- a/fondant/dataset.py +++ b/fondant/dataset.py @@ -6,7 +6,7 @@ import dask.dataframe as dd from fondant.component_spec import FondantComponentSpec -from fondant.manifest import Manifest, Field +from fondant.manifest import Field, Manifest logger = logging.getLogger(__name__) @@ -37,7 +37,6 @@ def _load_subset(self, subset_name: str, fields: t.List[str]) -> dd.DataFrame: Returns: The subset as a dask dataframe """ - subset = self.manifest.subsets[subset_name] remote_path = subset.location index_fields = list(self.manifest.index.fields.keys()) @@ -78,7 +77,7 @@ def _load_index(self) -> dd.DataFrame: def load_dataframe(self, spec: FondantComponentSpec) -> dd.DataFrame: """ Function that loads the subsets defined in the component spec as a single Dask dataframe for - the user + the user. Args: spec: the fondant component spec @@ -95,7 +94,7 @@ def load_dataframe(self, spec: FondantComponentSpec) -> dd.DataFrame: # left joins -> filter on index df = dd.merge(df, subset_df, on=["id", "source"], how="left") - logging.info("Columns of dataframe:", list(df.columns)) + logging.info(f"Columns of dataframe: {list(df.columns)}") return df @@ -116,7 +115,6 @@ def _create_write_dataframe_task( A delayed Dask task that uploads the DataFrame to the remote storage location when executed. """ - # Define task to upload index to remote storage write_task = dd.to_parquet( df, remote_path, schema=schema, overwrite=False, compute=False @@ -126,6 +124,7 @@ def _create_write_dataframe_task( def write_index(self, df: dd.DataFrame): """ Write the index dataframe to a remote location. + Args: df: The output Dask dataframe returned by the user. """ @@ -163,7 +162,7 @@ def verify_subset_columns( ) -> t.List[str]: """ Verify that all the fields defined in the output subset are present in - the output dataframe + the output dataframe. """ # TODO: add logic for `additional fields` subset_columns = [f"{subset_name}_{field}" for field in subset_fields] @@ -181,9 +180,7 @@ def verify_subset_columns( def create_subset_dataframe( subset_name: str, subset_columns: t.List[str], df: dd.DataFrame ): - """ - Create subset dataframe to save with the original field name as the column name - """ + """Create subset dataframe to save with the original field name as the column name.""" # Create a new dataframe with only the columns needed for the output subset subset_df = df[subset_columns] diff --git a/fondant/import_utils.py b/fondant/import_utils.py index c75660391..92868e11d 100644 --- a/fondant/import_utils.py +++ b/fondant/import_utils.py @@ -1,11 +1,9 @@ -""" -Import utils -""" -import logging -import importlib.util +"""Import utils.""" import importlib.metadata -from pathlib import Path +import importlib.util +import logging import sys +from pathlib import Path logger = logging.getLogger(__name__) @@ -36,9 +34,8 @@ def is_package_available(package_name: str, import_error_msg: str) -> bool: package_name (str): the name of the package import_error_msg (str): the error message to return if the package is not found Returns: - bool: check if package is available + bool: check if package is available. """ - package_available = importlib.util.find_spec(package_name) is not None try: @@ -54,15 +51,15 @@ def is_package_available(package_name: str, import_error_msg: str) -> bool: def is_datasets_available(): - """Check if 'datasets' is available""" + """Check if 'datasets' is available.""" return is_package_available("datasets", DATASETS_IMPORT_ERROR) def is_pandas_available(): - """Check if 'pandas' is available""" + """Check if 'pandas' is available.""" return is_package_available("pandas", PANDAS_IMPORT_ERROR) def is_kfp_available(): - """Check if 'pandas' is available""" + """Check if 'pandas' is available.""" return is_package_available("kfp", KFP_IMPORT_ERROR) diff --git a/fondant/logger.py b/fondant/logger.py index 7f3721324..08933eca2 100644 --- a/fondant/logger.py +++ b/fondant/logger.py @@ -1,14 +1,14 @@ """Utils file with useful methods.""" import logging -import sys import os +import sys LOG_LEVEL = os.environ.get("LOG_LEVEL", default="INFO") def configure_logging(log_level=LOG_LEVEL) -> None: - """Configure the root logger based on config settings""" + """Configure the root logger based on config settings.""" logger = logging.getLogger() # set loglevel diff --git a/fondant/manifest.py b/fondant/manifest.py index 1340eb546..790bae9f7 100644 --- a/fondant/manifest.py +++ b/fondant/manifest.py @@ -12,7 +12,7 @@ from fondant.component_spec import FondantComponentSpec from fondant.exceptions import InvalidManifest -from fondant.schema import Type, Field +from fondant.schema import Field, Type class Subset: @@ -30,7 +30,7 @@ def __init__(self, specification: dict, *, base_path: str) -> None: @property def location(self) -> str: - """The absolute location of the subset""" + """The absolute location of the subset.""" return self._base_path + self._specification["location"] @property @@ -57,7 +57,7 @@ def __repr__(self) -> str: class Index(Subset): - """Special case of a subset for the index, which has fixed fields""" + """Special case of a subset for the index, which has fixed fields.""" @property def fields(self) -> t.Dict[str, Field]: @@ -69,7 +69,7 @@ def fields(self) -> t.Dict[str, Field]: class Manifest: """ - Class representing an Fondant manifest + Class representing an Fondant manifest. Args: specification: The manifest specification as a Python dict @@ -80,11 +80,10 @@ def __init__(self, specification: t.Dict[str, t.Any]) -> None: self._validate_spec() def _validate_spec(self) -> None: - """Validate a manifest specification against the manifest schema + """Validate a manifest specification against the manifest schema. Raises: InvalidManifest when the manifest is not valid. """ - spec_data = pkgutil.get_data("fondant", "schemas/manifest.json") if spec_data is None: @@ -104,7 +103,7 @@ def _validate_spec(self) -> None: @classmethod def create(cls, *, base_path: str, run_id: str, component_id: str) -> "Manifest": - """Create an empty manifest + """Create an empty manifest. Args: base_path: The base path of the manifest @@ -124,18 +123,18 @@ def create(cls, *, base_path: str, run_id: str, component_id: str) -> "Manifest" @classmethod def from_file(cls, path: str) -> "Manifest": - """Load the manifest from the file specified by the provided path""" + """Load the manifest from the file specified by the provided path.""" with open(path, encoding="utf-8") as file_: specification = json.load(file_) return cls(specification) def to_file(self, path) -> None: - """Dump the manifest to the file specified by the provided path""" + """Dump the manifest to the file specified by the provided path.""" with open(path, "w", encoding="utf-8") as file_: json.dump(self._specification, file_) def copy(self) -> "Manifest": - """Return a deep copy of itself""" + """Return a deep copy of itself.""" return self.__class__(copy.deepcopy(self._specification)) @property @@ -163,7 +162,7 @@ def index(self) -> Index: @property def subsets(self) -> t.Mapping[str, Subset]: - """The subsets of the manifest as an immutable mapping""" + """The subsets of the manifest as an immutable mapping.""" return types.MappingProxyType( { name: Subset(subset, base_path=self.base_path) @@ -188,11 +187,13 @@ def remove_subset(self, name: str) -> None: del self._specification["subsets"][name] - def evolve(self, component_spec: FondantComponentSpec) -> "Manifest": + def evolve( # noqa : PLR0912 (too many branches) + self, component_spec: FondantComponentSpec + ) -> "Manifest": """Evolve the manifest based on the component spec. The resulting manifest is the expected result if the current manifest is provided - to the component defined by the component spec.""" - + to the component defined by the component spec. + """ evolved_manifest = self.copy() # Update `component_id` of the metadata diff --git a/fondant/pipeline_utils.py b/fondant/pipeline_utils.py index 1c6cf8c12..4c8654063 100644 --- a/fondant/pipeline_utils.py +++ b/fondant/pipeline_utils.py @@ -1,7 +1,7 @@ -"""General pipeline utils""" +"""General pipeline utils.""" -import os import logging +import os from typing import Callable, Optional from fondant.import_utils import is_kfp_available @@ -17,12 +17,13 @@ def compile_and_upload_pipeline( pipeline: Callable[[], None], host: str, env: str, pipeline_id: Optional[str] = None ) -> None: """Upload pipeline to kubeflow. + Args: pipeline: function that contains the pipeline definition host: the url host for kfp env: the project run environment (sbx, dev, prd) pipeline_id: pipeline id of existing component under the same name. Pass it on when you - want to delete current existing pipelines + want to delete current existing pipelines. """ client = kfp.Client(host=host) diff --git a/fondant/schema.py b/fondant/schema.py index 35f0f740c..832a120f5 100644 --- a/fondant/schema.py +++ b/fondant/schema.py @@ -1,5 +1,6 @@ """This module defines common schemas and datatypes used to define Fondant manifests, components -and pipelines.""" +and pipelines. +""" import enum import typing as t diff --git a/pyproject.toml b/pyproject.toml index cd48f07c9..35988373b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,4 +65,19 @@ requires = ["poetry-core>=1.2.0"] build-backend = "poetry.core.masonry.api" [tool.ruff] +select = ["D", "E", "F", "I", "PL", "RUF"] +ignore = [ + "D100", # Missing docstring in public module + "D101", # Missing docstring in public class + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D104", # Missing docstring in public package + "D105", # Missing docstring in magic method + "D107", # Missing docstring in __init__ + "D205", # 1 blank line required between summary line and description (autofix not working) + "D212", # Multi-line docstring summary should start at first line (we choose 2nd) +] line-length = 100 # Only for comments, as code is handled by black at length 88 + +[tool.ruff.pydocstyle] +convention = "google" diff --git a/tests/example_data/raw/split.py b/tests/example_data/raw/split.py index c2e7a092f..f2241d65b 100644 --- a/tests/example_data/raw/split.py +++ b/tests/example_data/raw/split.py @@ -9,6 +9,7 @@ """ from pathlib import Path + import dask.dataframe as dd data_path = Path(__file__).parent diff --git a/tests/test_component.py b/tests/test_component.py index 9ce6faad5..99d6f076a 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -1,8 +1,7 @@ -"""Fondant component tests""" +"""Fondant component tests.""" import argparse import json -import tempfile import sys from pathlib import Path from unittest import mock @@ -11,10 +10,8 @@ import pytest from fondant.component import FondantLoadComponent, FondantTransformComponent -from fondant.component_spec import FondantComponentSpec from fondant.dataset import FondantDataset - components_path = Path(__file__).parent / "example_specs/components" component_specs_path = Path(__file__).parent / "example_specs/component_specs" @@ -78,7 +75,7 @@ def test_transform_kwargs(monkeypatch): """Test that arguments are passed correctly to `Component.transform` method.""" class EarlyStopException(Exception): - """Used to stop execution early instead of mocking all later functionality""" + """Used to stop execution early instead of mocking all later functionality.""" # Mock `Dataset.load_dataframe` so no actual data is loaded def mocked_load_dataframe(self, spec): diff --git a/tests/test_component_specs.py b/tests/test_component_specs.py index 7bd314208..b3e522a85 100644 --- a/tests/test_component_specs.py +++ b/tests/test_component_specs.py @@ -1,10 +1,11 @@ -"""Fondant component specs test""" +"""Fondant component specs test.""" +from pathlib import Path + import pytest import yaml -from pathlib import Path -from fondant.exceptions import InvalidComponentSpec from fondant.component_spec import FondantComponentSpec +from fondant.exceptions import InvalidComponentSpec from fondant.schema import Type component_specs_path = Path(__file__).parent / "example_specs/component_specs" @@ -29,7 +30,7 @@ def invalid_fondant_schema() -> dict: def test_component_spec_validation(valid_fondant_schema, invalid_fondant_schema): - """Test that the manifest is validated correctly on instantiation""" + """Test that the manifest is validated correctly on instantiation.""" FondantComponentSpec(valid_fondant_schema) with pytest.raises(InvalidComponentSpec): FondantComponentSpec(invalid_fondant_schema) @@ -39,7 +40,7 @@ def test_attribute_access(valid_fondant_schema): """ Test that attributes can be accessed as expected: - Fixed properties should be accessible as an attribute - - Dynamic properties should be accessible by lookup + - Dynamic properties should be accessible by lookup. """ fondant_component = FondantComponentSpec(valid_fondant_schema) @@ -49,9 +50,7 @@ def test_attribute_access(valid_fondant_schema): def test_kfp_component_creation(valid_fondant_schema, valid_kubeflow_schema): - """ - Test that the created kubeflow component matches the expected kubeflow component - """ + """Test that the created kubeflow component matches the expected kubeflow component.""" fondant_component = FondantComponentSpec(valid_fondant_schema) kubeflow_component = fondant_component.kubeflow_specification assert kubeflow_component._specification == valid_kubeflow_schema diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 5c931854e..f78d35d41 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -1,16 +1,19 @@ import tempfile -import pytest -import dask.dataframe as dd from pathlib import Path -from fondant.manifest import Manifest -from fondant.dataset import FondantDataset +import dask.dataframe as dd +import pytest + from fondant.component_spec import FondantComponentSpec +from fondant.dataset import FondantDataset +from fondant.manifest import Manifest input_manifest_path = Path(__file__).parent / "example_data/input_manifest.json" output_manifest_path = Path(__file__).parent / "example_data/output_manifest.json" component_spec_path = Path(__file__).parent / "example_data/components/1.yaml" +DATASET_SIZE = 151 + @pytest.fixture def input_manifest(): @@ -29,13 +32,13 @@ def component_spec(): def test_load_index(input_manifest): fds = FondantDataset(input_manifest) - assert len(fds._load_index()) == 151 + assert len(fds._load_index()) == DATASET_SIZE def test_merge_subsets(input_manifest, component_spec): fds = FondantDataset(manifest=input_manifest) df = fds.load_dataframe(spec=component_spec) - assert len(df) == 151 + assert len(df) == DATASET_SIZE assert list(df.columns) == [ "id", "source", @@ -70,5 +73,5 @@ def test_write_subsets(input_manifest, output_manifest, component_spec): for subset, subset_columns in subset_columns_dict.items(): subset_path = Path(tmp_dir_path / subset) df = dd.read_parquet(subset_path) - assert len(df) == 151 + assert len(df) == DATASET_SIZE assert list(df.columns) == subset_columns diff --git a/tests/test_import_utils.py b/tests/test_import_utils.py index c5ca46c36..3f3f78b06 100644 --- a/tests/test_import_utils.py +++ b/tests/test_import_utils.py @@ -1,6 +1,4 @@ -""" -Test scripts for import module functionality -""" +"""Test scripts for import module functionality.""" import pytest from fondant.import_utils import is_package_available @@ -18,9 +16,7 @@ ], ) def test_is_package_available(package_name, import_error_msg, expected_result): - """ - Test function for is_package_available(). - """ + """Test function for is_package_available().""" if isinstance(expected_result, bool): assert is_package_available(package_name, import_error_msg) == expected_result else: diff --git a/tests/test_logger.py b/tests/test_logger.py index 80b5e33ca..487c6b914 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -1,7 +1,6 @@ -""" -Test scripts for logger functionalities -""" +"""Test scripts for logger functionalities.""" import logging + import pytest from fondant.logger import configure_logging @@ -18,7 +17,7 @@ ], ) def test_configure_logging(log_level, expected_level): - """Function to test""" + """Function to test.""" configure_logging(log_level) logger = logging.getLogger(__name__) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index c3f4ee65d..49ed4094d 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -3,10 +3,10 @@ from pathlib import Path import pytest + from fondant.exceptions import InvalidManifest from fondant.manifest import Subset, Manifest, Type - manifest_path = Path(__file__).parent / "example_specs/manifests" @@ -23,7 +23,7 @@ def invalid_manifest(): def test_manifest_validation(valid_manifest, invalid_manifest): - """Test that the manifest is validated correctly on instantiation""" + """Test that the manifest is validated correctly on instantiation.""" Manifest(valid_manifest) with pytest.raises(InvalidManifest): Manifest(invalid_manifest) @@ -87,7 +87,7 @@ def test_set_base_path(valid_manifest): def test_from_to_file(valid_manifest): - """Test reading from and writing to file""" + """Test reading from and writing to file.""" tmp_path = "/tmp/manifest.json" with open(tmp_path, "w", encoding="utf-8") as f: json.dump(valid_manifest, f) @@ -104,7 +104,7 @@ def test_attribute_access(valid_manifest): """ Test that attributes can be accessed as expected: - Fixed properties should be accessible as an attribute - - Dynamic properties should be accessible by lookup + - Dynamic properties should be accessible by lookup. """ manifest = Manifest(valid_manifest) @@ -115,7 +115,7 @@ def test_attribute_access(valid_manifest): def test_manifest_creation(): - """Test the stepwise creation of a manifest via the Manifest class""" + """Test the stepwise creation of a manifest via the Manifest class.""" base_path = "gs://bucket" run_id = "run_id" component_id = "component_id" diff --git a/tests/test_manifest_evolution.py b/tests/test_manifest_evolution.py index 6cacd941d..09caedd12 100644 --- a/tests/test_manifest_evolution.py +++ b/tests/test_manifest_evolution.py @@ -7,7 +7,6 @@ from fondant.component_spec import FondantComponentSpec from fondant.manifest import Manifest - examples_path = Path(__file__).parent / "example_specs/evolution_examples" @@ -18,7 +17,7 @@ def input_manifest(): def examples(): - """Returns examples as tuples of component and expected output_manifest""" + """Returns examples as tuples of component and expected output_manifest.""" for directory in (f for f in examples_path.iterdir() if f.is_dir()): with open(directory / "component.yaml") as c, open( directory / "output_manifest.json"