Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to add remote mags and remote layer to an existing dataset #1187

Merged
merged 31 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
1c3f63c
WIP: Add method to add remote mag
MichaelBuessemeyer Aug 29, 2024
73f4233
Add first version working to add remote layers and mags to an existin…
MichaelBuessemeyer Aug 30, 2024
22bda77
WIP: make remote path support more consistent & fix linting and so on
MichaelBuessemeyer Sep 2, 2024
fa5ba3d
format & fix linting
MichaelBuessemeyer Sep 3, 2024
acccb7f
fruther improve add remote mag / layer
MichaelBuessemeyer Sep 4, 2024
6f9da1d
add tests for add remote mag / layer
MichaelBuessemeyer Sep 4, 2024
78a2fcd
rename is_remote to is_remote_mag for more consistency
MichaelBuessemeyer Sep 4, 2024
a1969c4
maybe fix tests
MichaelBuessemeyer Sep 5, 2024
a35c716
remove debug print
MichaelBuessemeyer Sep 5, 2024
ad497db
Merge branch 'master' of github.com:scalableminds/webknossos-libs int…
MichaelBuessemeyer Sep 5, 2024
319881e
fix some tests & use proper strip_trailing_slash on upaths and no cus…
MichaelBuessemeyer Sep 5, 2024
91fd7d7
fix formatting
MichaelBuessemeyer Sep 10, 2024
bb63abb
dont write ome metadata for remote layers
MichaelBuessemeyer Sep 10, 2024
411d3b8
rename remote to foreign in parts of this pr
MichaelBuessemeyer Sep 10, 2024
caf8fbf
fix ci and remove prints & format code
MichaelBuessemeyer Sep 10, 2024
f694e10
format
MichaelBuessemeyer Sep 11, 2024
7449b0a
refactor code
MichaelBuessemeyer Sep 11, 2024
4286f3a
Merge branch 'master' into add-remote-layer-and-mag
MichaelBuessemeyer Sep 11, 2024
e1cc619
Merge branch 'master' of github.com:scalableminds/webknossos-libs int…
MichaelBuessemeyer Sep 16, 2024
10b39fc
apply feedback
MichaelBuessemeyer Sep 16, 2024
f89788e
change back to remote naming scheme of newly added methods
Sep 20, 2024
229fb26
fix layer.path property method
Sep 20, 2024
d29aec0
Merge branch 'master' of github.com:scalableminds/webknossos-libs int…
Sep 24, 2024
4e470e9
remove unused `is_remote_dataset` property from dataset
Sep 24, 2024
845b0ce
remove commented out tests from test_dataset_add_remote_mag_and_layer…
Sep 25, 2024
768fdf6
define is_remote_path as opposite of is_fs_path
Sep 25, 2024
79c9821
disable flaky test
Sep 25, 2024
bf5c239
re-add flaky add remote layer/mag tests
Sep 25, 2024
a5417a7
Update webknossos/tests/dataset/test_dataset_add_remote_mag_and_layer.py
daniel-wer Sep 26, 2024
9da9a1e
Update webknossos/tests/dataset/test_dataset_add_remote_mag_and_layer.py
daniel-wer Sep 26, 2024
4927880
Merge branch 'master' into add-remote-layer-and-mag
daniel-wer Sep 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion webknossos/tests/dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,9 @@ def test_create_dataset_with_layer_and_mag(
assure_exported_properties(ds)


@pytest.mark.skip("This test fails currently, maybe due to the issue with vcr-py.")
@pytest.mark.skip(
reason="The test is flaky as sometimes fetching the file https://ngff.openmicroscopy.org/0.4/schemas/image.schema does fail. Disable it for now."
)
@pytest.mark.parametrize("output_path", [TESTOUTPUT_DIR, REMOTE_TESTOUTPUT_DIR])
def test_ome_ngff_metadata(output_path: Path) -> None:
ds_path = prepare_dataset_path(DataFormat.Zarr, output_path)
Expand Down
125 changes: 125 additions & 0 deletions webknossos/tests/dataset/test_dataset_add_remote_mag_and_layer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Iterator

import pytest
from upath import UPath

import webknossos as wk
from webknossos import Dataset, MagView
from webknossos.utils import is_remote_path

pytestmark = [pytest.mark.with_vcr]


@pytest.fixture(scope="module")
def sample_bbox() -> wk.BoundingBox:
return wk.BoundingBox((2807, 4352, 1794), (10, 10, 10))


@pytest.fixture(scope="module")
def sample_remote_dataset(sample_bbox: wk.BoundingBox) -> Iterator[wk.Dataset]:
url = "https://webknossos.org/datasets/scalable_minds/l4_sample_dev"
with TemporaryDirectory() as temp_dir:
yield wk.Dataset.download(url, path=Path(temp_dir) / "ds", bbox=sample_bbox)


@pytest.fixture(scope="module")
def sample_remote_mags() -> list[wk.MagView]:
mag_urls = [
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/color/1/",
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/color/2-2-1/",
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/color/4-4-2/",
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/segmentation/1/",
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/segmentation/2-2-1/",
"https://data-humerus.webknossos.org/data/zarr/scalable_minds/l4_sample_dev/segmentation/4-4-2/",
]
mags = [MagView._ensure_mag_view(url) for url in mag_urls]
return mags


@pytest.fixture(scope="module")
def sample_remote_layer() -> list[wk.Layer]:
remote_dataset_url = "https://webknossos.org/datasets/scalable_minds/l4_sample_dev"
remote_dataset = Dataset.open_remote(remote_dataset_url)
return list(remote_dataset.layers.values())


def test_add_remote_mags_from_mag_view(
sample_remote_mags: list[wk.MagView], sample_remote_dataset: wk.Dataset
) -> None:
for remote_mag in sample_remote_mags:
mag_path = remote_mag.path
layer_type = remote_mag.layer.category
assert is_remote_path(mag_path), "Remote mag does not have remote path."
layer_name = f"test_remote_layer_{mag_path.parent.name}_{mag_path.name}_object"
new_layer = sample_remote_dataset.add_layer(
layer_name,
layer_type,
data_format=remote_mag.info.data_format,
dtype_per_channel=remote_mag.get_dtype(),
)
new_layer.add_remote_mag(remote_mag)
added_mag = sample_remote_dataset.layers[layer_name].mags[remote_mag.mag]
# checking whether the added_mag.path matches the mag_url with or without a trailing slash.
assert (
added_mag.path == mag_path or added_mag.path == mag_path.parent
), "Added remote mag's path does not match remote path of mag added."


@pytest.mark.skip(
reason="The test is flaky when trying to fetch the required datasource-properties.json from data-humerus.webknossos.org. Disable it for now."
)
def test_add_remote_mags_from_path(
sample_remote_mags: list[wk.MagView],
sample_remote_dataset: wk.Dataset,
) -> None:
for remote_mag in sample_remote_mags:
mag_path = remote_mag.path
layer_type = remote_mag.layer.category
assert is_remote_path(mag_path), "Remote mag does not have remote path."
# Additional .parent calls are needed as the first .parent only removes the trailing slash.
layer_name = f"test_remote_layer_{mag_path.parent.name}_{mag_path.name}_path"
new_layer = sample_remote_dataset.add_layer(
layer_name,
layer_type,
data_format=remote_mag.info.data_format,
dtype_per_channel=remote_mag.get_dtype(),
)
new_layer.add_remote_mag(str(remote_mag.path))
added_mag = sample_remote_dataset.layers[layer_name].mags[remote_mag.mag]
# checking whether the added_mag.path matches the mag_url with or without a trailing slash.
assert (
added_mag.path == mag_path or added_mag.path == mag_path.parent
), "Added remote mag's path does not match remote path of mag added."


def test_add_remote_layer_from_object(
sample_remote_layer: list[wk.Layer], sample_remote_dataset: wk.Dataset
) -> None:
for layer in sample_remote_layer:
assert is_remote_path(layer.path), "Remote mag does not have remote path."
layer_name = f"test_remote_layer_{layer.category}_object"
sample_remote_dataset.add_remote_layer(layer, layer_name)
new_layer = sample_remote_dataset.layers[layer_name]
assert (
is_remote_path(new_layer.path)
and layer.path.as_uri() == new_layer.path.as_uri()
), "Added layer should have a remote path matching the remote layer added."


@pytest.mark.skip(
reason="The test is flaky when trying to fetch the required datasource-properties.json from data-humerus.webknossos.org. Disable it for now."
)
def test_add_remote_layer_from_path(
sample_remote_layer: list[wk.Layer],
sample_remote_dataset: wk.Dataset,
) -> None:
for layer in sample_remote_layer:
assert is_remote_path(layer.path), "Remote mag does not have remote path."
layer_name = f"test_remote_layer_{layer.category}_path"
sample_remote_dataset.add_remote_layer(UPath(layer.path), layer_name)
new_layer = sample_remote_dataset.layers[layer_name]
assert (
is_remote_path(new_layer.path) and new_layer.path == layer.path
), "Added layer should have a remote path matching the remote layer added."
53 changes: 53 additions & 0 deletions webknossos/webknossos/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
get_executor_for_args,
infer_metadata_type,
is_fs_path,
is_remote_path,
named_partial,
rmtree,
strip_trailing_slash,
Expand Down Expand Up @@ -352,6 +353,15 @@ def __init__(

layer = self._initialize_layer_from_properties(layer_properties)
self._layers[layer_properties.name] = layer
if layer.is_remote_to_dataset:
# The mags of remote layers need to have their path properly set.
for mag in layer.mags:
mag_prop = next(
mag_prop
for mag_prop in layer_properties.mags
if mag_prop.mag == mag
)
mag_prop.path = str(layer.mags[mag].path)

if dataset_existed_already:
if voxel_size_with_unit is None:
Expand Down Expand Up @@ -1690,6 +1700,49 @@ def add_symlink_layer(
self._export_as_json()
return self.layers[new_layer_name]

def add_remote_layer(
self,
foreign_layer: Union[str, UPath, Layer],
new_layer_name: Optional[str] = None,
) -> Layer:
"""
Adds a layer of another dataset to this dataset.
The relevant information from the `datasource-properties.json` of the other dataset is copied to this dataset.
Note: If the other dataset modifies its bounding box afterwards, the change does not affect this properties
(or vice versa).
If new_layer_name is None, the name of the foreign layer is used.
"""
self._ensure_writable()
foreign_layer = Layer._ensure_layer(foreign_layer)

if new_layer_name is None:
new_layer_name = foreign_layer.name

if new_layer_name in self.layers.keys():
raise IndexError(
f"Cannot add foreign layer {foreign_layer}. This dataset already has a layer called {new_layer_name}."
)
assert (
foreign_layer.dataset.path != self.path
), "Cannot add layer with the same origin dataset as foreign layer"
foreign_layer_path = foreign_layer.path

assert is_remote_path(
foreign_layer_path
), f"Cannot add foreign layer {foreign_layer_path} as it is not remote. Try using dataset.add_copy_layer instead."

layer_properties = copy.deepcopy(foreign_layer._properties)
for mag in layer_properties.mags:
mag.path = str(foreign_layer.mags[mag.mag].path)
layer_properties.name = new_layer_name
self._properties.data_layers += [layer_properties]
self._layers[new_layer_name] = self._initialize_layer_from_properties(
layer_properties
)

self._export_as_json()
return self.layers[new_layer_name]

def add_fs_copy_layer(
self,
foreign_layer: Union[str, Path, Layer],
Expand Down
Loading
Loading