Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add manifest tests #80

Merged
merged 6 commits into from
May 8, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions fondant/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def remove_field(self, name: str) -> None:
del self._specification["fields"][name]

def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._specification!r}"
return f"{self.__class__.__name__}({self._specification!r})"


class Index(Subset):
Expand Down Expand Up @@ -149,6 +149,10 @@ def add_metadata(self, key: str, value: t.Any) -> None:
def base_path(self) -> str:
return self.metadata["base_path"]

@base_path.setter
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we add a setter specifically for base_path instead of leveraging the add_metadata() method, we probably should do this for the run_id and component_id as well?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used the add_metadata() in my first version but decided against it since I was updating metadata and not adding it. But I guess we could use it instead of the setter.

def base_path(self, path: str) -> None:
self.metadata["base_path"] = path

@property
def run_id(self) -> str:
return self.metadata["run_id"]
Expand Down Expand Up @@ -255,4 +259,4 @@ def evolve(self, component_spec: FondantComponentSpec) -> "Manifest":
return evolved_manifest

def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._specification!r}"
return f"{self.__class__.__name__}({self._specification!r})"
RobbeSneyders marked this conversation as resolved.
Show resolved Hide resolved
8 changes: 5 additions & 3 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@ def test_merge_subsets(input_manifest, component_spec):

def test_write_subsets(input_manifest, output_manifest, component_spec):
# Dictionary specifying the expected subsets to write and their column names
subset_columns_dict = {"index": ['id', 'source'],
"properties": ['Name', 'HP', 'id', 'source'],
"types": ['Type 1', 'Type 2', 'id', 'source']}
subset_columns_dict = {
"index": ["id", "source"],
"properties": ["Name", "HP", "id", "source"],
"types": ["Type 1", "Type 2", "id", "source"],
}

# Load dataframe from input manifest
input_fds = FondantDataset(manifest=input_manifest)
Expand Down
110 changes: 102 additions & 8 deletions tests/test_manifest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import pkgutil
from pathlib import Path

import pytest
from fondant.exceptions import InvalidManifest
from fondant.manifest import Manifest, Type
from fondant.manifest import Subset, Manifest, Type


manifest_path = Path(__file__).parent / "example_specs/manifests"
Expand All @@ -28,6 +29,63 @@ def test_manifest_validation(valid_manifest, invalid_manifest):
Manifest(invalid_manifest)


def test_subset_init():
"""Test initializing a subset"""
subset_spec = {
"location": "/ABC/123/images",
"fields": {
"data": {
"type": "binary",
},
},
}
subset = Subset(specification=subset_spec, base_path="/tmp")
assert subset.location == "/tmp/ABC/123/images"
assert (
subset.__repr__()
== "Subset({'location': '/ABC/123/images', 'fields': {'data': {'type': 'binary'}}})"
)


def test_subset_fields():
"""Test manipulating subset fields"""
subset_spec = {
"location": "/ABC/123/images",
"fields": {
"data": {
"type": "binary",
},
},
}
subset = Subset(specification=subset_spec, base_path="/tmp")

# add a field
subset.add_field(name="data2", type_=Type.binary)
assert "data2" in subset.fields

# add a duplicate field
with pytest.raises(ValueError):
subset.add_field(name="data2", type_=Type.binary)

# add a duplicate field but overwrite
subset.add_field(name="data2", type_=Type.utf8, overwrite=True)
assert subset.fields["data2"].type == "utf8"

# remove a field
subset.remove_field(name="data2")
assert "data2" not in subset.fields


def test_set_base_path(valid_manifest):
"""Test altering the base path in the manifest"""
manifest = Manifest(valid_manifest)
tmp_path = "/tmp/base_path"
manifest.base_path = tmp_path

assert manifest.base_path == tmp_path
assert manifest._specification["metadata"]["base_path"] == tmp_path


def test_from_to_file(valid_manifest):
"""Test reading from and writing to file"""
tmp_path = "/tmp/manifest.json"
Expand Down Expand Up @@ -62,7 +120,9 @@ def test_manifest_creation():
run_id = "run_id"
component_id = "component_id"

manifest = Manifest.create(base_path=base_path, run_id=run_id, component_id=component_id)
manifest = Manifest.create(
base_path=base_path, run_id=run_id, component_id=component_id
)
manifest.add_subset("images", [("width", Type.int32), ("height", Type.int32)])
manifest.subsets["images"].add_field("data", Type.binary)

Expand All @@ -72,9 +132,7 @@ def test_manifest_creation():
"run_id": run_id,
"component_id": component_id,
},
"index": {
"location": f"/{run_id}/{component_id}/index"
},
"index": {"location": f"/{run_id}/{component_id}/index"},
"subsets": {
"images": {
"location": f"/{run_id}/{component_id}/images",
Expand All @@ -87,17 +145,53 @@ def test_manifest_creation():
},
"data": {
"type": "binary",
}
}
},
},
}
}
},
}


def test_manifest_repr():
manifest = Manifest.create(base_path="/", run_id="A", component_id="1")
print(manifest)
RobbeSneyders marked this conversation as resolved.
Show resolved Hide resolved
assert (
manifest.__repr__()
== "Manifest({'metadata': {'base_path': '/', 'run_id': 'A', 'component_id': '1'}, 'index': {'location': '/A/1/index'}, 'subsets': {}})"
)


def test_manifest_alteration(valid_manifest):
"""Test alteration functionalities of a manifest via the Manifest class"""
manifest = Manifest(valid_manifest)

# test adding a subset
manifest.add_subset("images2", [("width", Type.int32), ("height", Type.int32)])
assert "images2" in manifest.subsets

# test adding a duplicate subset
with pytest.raises(ValueError):
manifest.add_subset("images2", [("width", Type.int32), ("height", Type.int32)])

# test removing a subset
manifest.remove_subset("images2")
assert "images2" not in manifest.subsets

# test removing a nonexistant subset
with pytest.raises(ValueError):
manifest.remove_subset("pictures")


def test_manifest_copy_and_adapt(valid_manifest):
"""Test that a manifest can be copied and adapted without changing the original."""
manifest = Manifest(valid_manifest)
new_manifest = manifest.copy()
new_manifest.remove_subset("images")
assert manifest._specification == valid_manifest
assert new_manifest._specification != valid_manifest


def test_no_validate_schema(monkeypatch, valid_manifest):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really understand this test. When would this behavior occur?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably never but we needed to cover this branch in the logic for coverage.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The message being raised mentions component_spec instead of manifest, so you might want to check that 😛

monkeypatch.setattr(pkgutil, "get_data", lambda package, resource: None)
with pytest.raises(FileNotFoundError):
manifest = Manifest(valid_manifest)