Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dict-like methods of Dataset. #153

Merged
merged 17 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 146 additions & 0 deletions src/scitacean/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,152 @@ def validate(self) -> None:
"""
self.make_upload_model()

def keys(self) -> Iterable[str]:
"""Dict-like keys(names of fields) method.

.. versionadded:: RELEASE_PLACEHOLDER
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move those directives to the very end of the docstring.

Copy link
Collaborator Author

@YooSunYoung YooSunYoung Oct 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jl-wynen
image

It seems like it doesn't work if it is not above the sub-titles...?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an annoyance with restructured text. You need two empty lines to 'return' from the previous section. I just added this to the coding conventions: https://github.com/SciCatProject/scitacean/blob/main/docs/developer/coding-conventions.rst


Returns
-------
:
Generator of names of all fields corresponding to ``self.type``
and other fields that are not ``None``.
"""
from itertools import chain

all_fields = set((field.name for field in self.fields()))
my_fields = set((field.name for field in self.fields(dataset_type=self.type)))
other_fields = all_fields - my_fields
invalid_fields = (
f_name for f_name in other_fields if getattr(self, f_name) is not None
)

return chain(my_fields, invalid_fields)

def values(self) -> Iterable[Any]:
"""Dict-like values(values of fields) method.

.. versionadded:: RELEASE_PLACEHOLDER

Returns
-------
:
Generator of values of all fields corresponding to ``self.type``
and other fields that are not ``None``.
"""
return (getattr(self, field_name) for field_name in self.keys())

def items(self) -> Iterable[tuple[str, Any]]:
"""Dict-like items(name and value pairs of fields) method.

.. versionadded:: RELEASE_PLACEHOLDER

Returns
-------
:
Generator of (Name, Value) pairs of all fields
corresponding to ``self.type``
and other fields that are not ``None``.
"""
return ((key, getattr(self, key)) for key in self.keys())

@classmethod
def _validate_field_name(cls, field_name: str) -> None:
"""Validate ``field_name``.

If ``field_name`` is a ``name`` of any
:class:`DatasetBase.Field` objects in ``self.fields()``.

Parameters
----------
field_name:
Name of the field to validate.

Raises
------
:
:class:`KeyError` if validation fails.
YooSunYoung marked this conversation as resolved.
Show resolved Hide resolved
"""
if field_name not in (field.name for field in cls.fields()):
raise KeyError(f"{field_name} is not a valid field name.")

def __getitem__(self, field_name: str) -> Any:
"""Dict-like get-item method.

.. versionadded:: RELEASE_PLACEHOLDER

Parameters
----------
field_name:
Name of the field to retrieve.

Returns
-------
:
Value of the field with the name ``field_name``.

Raises
------
:
:class:`KeyError` if ``field_name`` does not mach any names of fields.
"""
self._validate_field_name(field_name)
return getattr(self, field_name)

def __setitem__(self, field_name: str, field_value: Any) -> None:
"""Dict-like set-item method.

Set the value of the field with name ``field_name`` as ``field_value``.

.. versionadded:: RELEASE_PLACEHOLDER

Parameters
----------
field_name:
Name of the field to set.

default_value:
YooSunYoung marked this conversation as resolved.
Show resolved Hide resolved
Value of the field to set.

Raises
------
:
:class:`KeyError` if ``field_name`` does not mach any names of fields.
"""
self._validate_field_name(field_name)
setattr(self, field_name, field_value)

def setdefault(self, field_name: str, default_value: Any) -> Any:
jl-wynen marked this conversation as resolved.
Show resolved Hide resolved
"""Dict-like setdefault method.

Set the value of the field with name ``field_name`` as ``default_value``
only if the value of ``field_name`` is ``None``.

.. versionadded:: RELEASE_PLACEHOLDER

Parameters
----------
field_name:
Name of the field to retrieve or set a default value if needed.

default_value:
Value of the field to set if not set (is ``None``).

Returns
-------
:
Value of the field with name ``field_name``.

Raises
------
:
:class:`KeyError` if ``field_name`` does not mach any names of fields.
"""
self._validate_field_name(field_name)
if getattr(self, field_name) is None:
setattr(self, field_name, default_value)
return getattr(self, field_name)


@dataclasses.dataclass
class DatablockUploadModels:
Expand Down
133 changes: 133 additions & 0 deletions tests/dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,3 +765,136 @@ def test_derive_removes_attachments(initial, attachments):
initial.attachments = attachments
derived = initial.derive()
assert derived.attachments == []


@pytest.fixture(params=[DatasetType.RAW, DatasetType.DERIVED])
def my_type(request):
return request.param


@pytest.fixture
def invalid_field_example(my_type):
if my_type == DatasetType.DERIVED:
return "data_format", "sth_not_None"
elif my_type == DatasetType.RAW:
return "job_log_data", "sth_not_None"
else:
raise ValueError(my_type, " is not valid DatasetType.")


def test_dataset_dict_like_keys_per_type(my_type):
jl-wynen marked this conversation as resolved.
Show resolved Hide resolved
ds = Dataset(type=my_type)
my_names = set(
field.name for field in Dataset._FIELD_SPEC if field.used_by(my_type)
)
assert set(ds.keys()) == my_names


def test_dataset_dict_like_keys_including_invalid_field(my_type, invalid_field_example):
invalid_name, invalid_value = invalid_field_example

my_names = set(
field.name for field in Dataset._FIELD_SPEC if field.used_by(my_type)
)
assert invalid_name not in my_names
my_names.add(invalid_name)

ds = Dataset(type=my_type)
setattr(ds, invalid_name, invalid_value)

assert set(ds.keys()) == my_names


def test_dataset_dict_like_values(my_type):
ds = Dataset(type=my_type, comment="This is an example.")
for key, value in zip(ds.keys(), ds.values()):
assert value == getattr(ds, key)


def test_dataset_dict_like_values_with_invalid_field(my_type, invalid_field_example):
ds = Dataset(type=my_type, comment="This is an example.")
setattr(ds, *invalid_field_example)
for key, value in zip(ds.keys(), ds.values()):
assert value == getattr(ds, key)


def test_dataset_dict_like_items_with_invalid_field(my_type, invalid_field_example):
ds = Dataset(type=my_type, comment="This is an example.")
setattr(ds, *invalid_field_example)
for key, value in ds.items():
assert value == getattr(ds, key)


def test_dataset_dict_like_getitem(my_type):
ds = Dataset(type=my_type)
assert ds["type"] == my_type
assert ds["comment"] is None


@pytest.mark.parametrize(
("is_attr", "wrong_field"), ((True, "size"), (False, "OBVIOUSLYWRONGNAME"))
)
def test_dataset_dict_like_getitem_wrong_field_raises(is_attr, wrong_field):
# 'size' should be included in the field later.
# It is now excluded because it is ``manual`` field. See issue#151.
ds = Dataset(type="raw")
assert hasattr(ds, wrong_field) == is_attr
with pytest.raises(KeyError, match=f"{wrong_field} is not a valid field name."):
ds[wrong_field]


def test_dataset_dict_like_setdefault(my_type):
sample_comment = "This is an example."
ds = Dataset(type=my_type)
assert ds["comment"] is None
assert ds.setdefault("comment", sample_comment) == sample_comment
assert ds["comment"] == sample_comment


def test_dataset_dict_like_setdefault_existing_key():
original_comment = "This is the original comment."
default_comment = "This is an example."
ds = Dataset(type="raw", comment=original_comment)
assert ds["comment"] == original_comment
assert ds.setdefault("comment", default_comment) == original_comment


def test_dataset_dict_like_setdefault_object():
ds = Dataset(type="raw")
assert ds["shared_with"] is None
default_list = []
shared_with = ds.setdefault("shared_with", default_list)
assert default_list is shared_with
assert ds["shared_with"] is default_list


def test_dataset_dict_like_setitem(my_type):
sample_comment = "This is an example."
ds = Dataset(type=my_type)
assert ds["comment"] is None
ds["comment"] = sample_comment
assert ds["comment"] == sample_comment


def test_dataset_dict_like_setitem_invalid_field(my_type, invalid_field_example):
# ``__setitem__`` doesn't check if the item is invalid for the current type or not.
ds = Dataset(type=my_type)
invalid_field, invalid_value = invalid_field_example
assert ds[invalid_field] is None
ds[invalid_field] = invalid_value
assert ds[invalid_field] == invalid_value


@pytest.mark.parametrize(
("is_attr", "wrong_field", "wrong_value"),
((True, "size", 10), (False, "OBVIOUSLYWRONGNAME", "OBVIOUSLYWRONGVALUE")),
)
def test_dataset_dict_like_setitem_wrong_field_raises(
is_attr, wrong_field, wrong_value
):
# ``manual`` fields such as ``size`` should raise with ``__setitem__``.
# However, it may need more specific error message.
ds = Dataset(type="raw")
assert hasattr(ds, wrong_field) == is_attr
with pytest.raises(KeyError, match=f"{wrong_field} is not a valid field name."):
ds[wrong_field] = wrong_value