Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forbid empty list and None values in attribute_filters #2202

Merged
merged 15 commits into from
Jan 22, 2025
Merged
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

* API:
* Impose compatibility between `WorkflowTaskV2.type_filters` and `TaskV2.input_types` (\#2196).
* Schemas:
* Forbid `None` or `[]` as `attribute_filters` (\#2200).

# 2.11.0a4

Expand Down
12 changes: 4 additions & 8 deletions fractal_server/app/schemas/_filter_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,10 @@ def validate_attribute_filters(

attribute_filters = valdict_keys("attribute_filters")(attribute_filters)
for key, values in attribute_filters.items():
if values is None:
# values=None corresponds to not applying any filter for
# attribute `key`
pass
elif values == []:
# WARNING: in this case, no image can match with the current
# filter. In the future we may deprecate this possibility.
pass
if values == []:
raise ValueError(
f"attribute_filters[{key}] cannot be an empty list."
)
else:
# values is a non-empty list, and its items must all be of the
# same scalar non-None type
Expand Down
69 changes: 58 additions & 11 deletions fractal_server/data_migrations/2_11_0.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Union

from sqlalchemy.orm.attributes import flag_modified
from sqlmodel import select
Expand All @@ -7,61 +8,107 @@
from fractal_server.app.models import DatasetV2
from fractal_server.app.models import JobV2
from fractal_server.app.models import WorkflowTaskV2
from fractal_server.app.schemas.v2 import DatasetReadV2
from fractal_server.app.schemas.v2 import JobReadV2
from fractal_server.app.schemas.v2 import ProjectReadV2
from fractal_server.app.schemas.v2 import TaskReadV2
from fractal_server.app.schemas.v2 import WorkflowTaskReadV2

logger = logging.getLogger("fix_db")
logger.setLevel(logging.INFO)


def fix_db():
def dict_values_to_list(
input_dict: dict[str, Union[int, float, bool, str, None]],
identifier: str,
) -> dict[str, list[Union[int, float, bool, str]]]:
for k, v in input_dict.items():
if not isinstance(v, (int, float, bool, str, type(None))):
error_msg = (
f"Attribute '{k}' from '{identifier}' "
f"has invalid type '{type(v)}'."
)
logger.error(error_msg)
raise RuntimeError(error_msg)
elif v is None:
logger.warning(
f"Attribute '{k}' from '{identifier}' is None and it "
"will be removed."
)
else:
input_dict[k] = [v]
return input_dict


def fix_db():
logger.info("START execution of fix_db function")

with next(get_sync_db()) as db:

# DatasetV2.filters
# DatasetV2.history[].workflowtask.input_filters
stm = select(DatasetV2).order_by(DatasetV2.id)
datasets = db.execute(stm).scalars().all()
for ds in datasets:
ds.attribute_filters = ds.filters["attributes"]
logger.info(f"DatasetV2[{ds.id}] START")
ds.attribute_filters = dict_values_to_list(
ds.filters["attributes"],
f"Dataset[{ds.id}].filters.attributes",
)
ds.type_filters = ds.filters["types"]
ds.filters = None
for i, h in enumerate(ds.history):
ds.history[i]["workflowtask"]["type_filters"] = h[
"workflowtask"
]["input_filters"]["types"]
flag_modified(ds, "history")
DatasetReadV2(
**ds.model_dump(),
project=ProjectReadV2(**ds.project.model_dump()),
)
db.add(ds)
logger.info(f"Fixed filters in DatasetV2[{ds.id}]")
logger.info(f"DatasetV2[{ds.id}] END - fixed filters")

logger.info("------ switch from dataset to workflowtasks ------")

# WorkflowTaskV2.input_filters
stm = select(WorkflowTaskV2).order_by(WorkflowTaskV2.id)
wftasks = db.execute(stm).scalars().all()
for wft in wftasks:
logger.info(f"WorkflowTaskV2[{wft.id}] START")
wft.type_filters = wft.input_filters["types"]
if wft.input_filters["attributes"]:
logger.warning(
f"Removing WorkflowTaskV2[{wft.id}].input_filters"
f"['attributes'] = {wft.input_filters['attributes']}"
"Removing input_filters['attributes']. "
f"(previous value: {wft.input_filters['attributes']})"
)
wft.input_filters = None
flag_modified(wft, "input_filters")
WorkflowTaskReadV2(
**wft.model_dump(),
task=TaskReadV2(**wft.task.model_dump()),
)
db.add(wft)
logger.info(f"Fixed filters in WorkflowTaskV2[{wft.id}]")
logger.info(f"WorkflowTaskV2[{wft.id}] END - fixed filters")

logger.info("------ switch from workflowtasks to jobs ------")

# JOBS V2
stm = select(JobV2).order_by(JobV2.id)
jobs = db.execute(stm).scalars().all()
for job in jobs:
logger.info(f"JobV2[{job.id}] START")
job.dataset_dump["type_filters"] = job.dataset_dump["filters"][
"types"
]
job.dataset_dump["attribute_filters"] = job.dataset_dump[
"filters"
]["attributes"]
job.dataset_dump["attribute_filters"] = dict_values_to_list(
job.dataset_dump["filters"]["attributes"],
f"JobV2[{job.id}].dataset_dump.filters.attributes",
)
job.dataset_dump.pop("filters")
flag_modified(job, "dataset_dump")
logger.info(f"Fixed filters in JobV2[{job.id}].datasetdump")
JobReadV2(**job.model_dump())
db.add(job)
logger.info(f"JobV2[{job.id}] END - fixed filters")

db.commit()
logger.info("Changes committed.")
Expand Down
2 changes: 1 addition & 1 deletion fractal_server/images/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from fractal_server.app.schemas._validators import valdict_keys
from fractal_server.urls import normalize_url

AttributeFiltersType = dict[str, Optional[list[Any]]]
AttributeFiltersType = dict[str, list[Any]]


class _SingleImageBase(BaseModel):
Expand Down
2 changes: 0 additions & 2 deletions fractal_server/images/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ def match_filter(

# Verify match with attributes (only for not-None filters)
for key, values in attribute_filters.items():
if values is None:
continue
if image["attributes"].get(key) not in values:
return False

Expand Down
4 changes: 2 additions & 2 deletions tests/v2/01_schemas/test_schemas_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@

VALID_ATTRIBUTE_FILTERS = (
{},
{"key1": []},
{"key1": ["A"]},
{"key1": ["A", "B"]},
{"key1": [1, 2]},
{"key1": [True, False]},
{"key1": [1.5, -1.2]},
{"key1": None},
{"key1": [1, 2], "key2": ["A", "B"]},
)

INVALID_ATTRIBUTE_FILTERS = (
{"key1": []},
{"key1": None},
{True: ["value"]}, # non-string key
{1: ["value"]}, # non-string key
{"key1": 1}, # not a list
Expand Down
2 changes: 1 addition & 1 deletion tests/v2/05_images/test_benchmark_helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_filter_image_list_with_filters(
):
new_list = filter_image_list(
images=images,
attribute_filters=dict(a1=[0], a2=["a2"], a3=None),
attribute_filters=dict(a1=[0], a2=["a2"]),
type_filters=dict(t1=True, t2=False),
)
debug(len(images), len(new_list))
Expand Down
2 changes: 0 additions & 2 deletions tests/v2/05_images/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ def test_singleimage_attributes_validation():
({}, {"missing_key": False}, 6),
# Key is part of attribute keys, but value is missing
({"plate": ["missing_plate.zarr"]}, {}, 0),
# Meaning of None for attributes: skip a given filter
({"plate": None}, {}, 6),
# Single type filter
({}, {"3D": True}, 4),
# Single type filter
Expand Down
7 changes: 0 additions & 7 deletions tests/v2/05_images/test_unit_image_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,6 @@ def test_match_filter():
assert not match_filter(
image=image, type_filters={}, attribute_filters={"a": [0], "b": [1, 2]}
)
assert match_filter(
image=image,
type_filters={},
attribute_filters={"a": None, "b": [1, 2]},
)

# both
assert match_filter(
Expand Down Expand Up @@ -141,8 +136,6 @@ def test_filter_image_list():
assert len(res) == 3
res = filter_image_list(images, attribute_filters={"a": [1, 2]})
assert len(res) == 4
res = filter_image_list(images, attribute_filters={"a": None, "b": None})
assert len(res) == 5

# both
res = filter_image_list(
Expand Down
Loading